Fixed major bug related to MKL, now using private implementation

d5d7427a · iker_martin · c769c32d · d5d7427a · d5d7427a · d5d7427a
Commit d5d7427a authored Oct 02, 2023 by iker_martin
--- a/Main/ConjugateGradient.c
+++ b/Main/ConjugateGradient.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
-#include <mkl_blas.h>
-#include <mkl_spblas.h>
+//#include <mkl_blas.h>
+//#include <mkl_spblas.h>
+#include "mymkl.h"
 #include "ScalarVectors.h"
 #include "SparseMatrices.h"
 #include <mpi.h>
@@ -401,17 +402,13 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) {
 	int state = MALL_NOT_STARTED;
        int ended_loop = 1;
        int cnt = 0;
-	int reconfigure = 0;
+	int reconfigure = 0, rec_iter = 500;

        computeData->maxiter = 1000;

-//                    dumb(computeData,dist_data);
-
 	while ((computeData->iter < computeData->maxiter) && (computeData->tol > computeData->umbral)) {
-	//while (computeData->tol > computeData->umbral) {
-
-//		if(dist_data->myId == ROOT) printf ("(%d,%20.10e)\n", computeData->iter, computeData->tol);

+		//TODO Hacer un estudio de los valores escalares para la iteración 501
 //      	COMPUTATION
 #ifdef ONLY_SYM
 		ProdSymSparseMatrixVector (computeData->subm, computeData->d_full, computeData->z);                     // z += A * d_full
@@ -430,18 +427,18 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) {
 		computeData->alpha = computeData->beta / computeData->alpha;                                       	// alpha = beta / alpha
 		dscal (&(dist_data->tamBl), &computeData->alpha, computeData->d, &IONE);                   		// d = alpha * d
 		daxpy (&(dist_data->tamBl), &DONE, computeData->res, &IONE, computeData->d, &IONE);        		// d += res
+
 	        MPI_Allgatherv(computeData->d, dist_data->tamBl, MPI_DOUBLE, computeData->d_full, 
 				computeData->dist_rows, computeData->displs_rows, MPI_DOUBLE, dist_data->comm);		// d_full = Gather(d)
 		computeData->tol = sqrt (computeData->beta);                                          			// tol = sqrt(beta) = norm (res)
 		computeData->iter++;

-                if (computeData->iter == 3) reconfigure = 1;
+                if (computeData->iter == rec_iter) reconfigure = 1;
 		if (reconfigure) {
 		  state = malleability_checkpoint();
 		  if ((state == MALL_COMPLETED && sm == 0) || state == MALL_ZOMBIE) { ended_loop = 0; break; }
 		  else if(state == MALL_COMPLETED) {
 		    reconfigure = 0;
-                    //dumb(computeData,dist_data);
                    free_computeData(computeData, 0);
                    update_dist_data(dist_data);
 		    dist_new(dist_data, computeData);
@@ -458,17 +455,85 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) {

 void dumb(Compute_data *computeData, struct Dist_data *dist_data) {
  int i;
-  sleep(dist_data->myId+dist_data->numP);
-  printf("P%d -tamBl=%d", dist_data->myId, dist_data->tamBl);
-  /*for(i=0; i<dist_data->tamBl; i++) {
-    printf("%lf ", computeData->d[i]);
-  }*/
-  printf("\n");
-  printf("Vlen=");
-  for(i=0; i<dist_data->tamBl; i++) {
-    printf("%d ", computeData->vlen[i]);
+
+  if(dist_data->myId == 0) printf("Vptr="); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+  for(i=0; i<dist_data->numP; i++) {
+    if(dist_data->myId == i) {
+
+      printf("%d, ", computeData->subm.vptr[dist_data->tamBl]);
+      fflush(stdout);
+    }
+    MPI_Barrier(dist_data->comm);
+  }
+  if(dist_data->myId == 0) printf("\n"); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+
+
+  if(dist_data->myId == 0) printf("Tol="); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+  for(i=0; i<dist_data->numP; i++) {
+    if(dist_data->myId == i) {
+
+      printf("%lf, ", computeData->tol);
+      fflush(stdout);
+    }
+    MPI_Barrier(dist_data->comm);
+  }
+  if(dist_data->myId == 0) printf("\n"); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+
+
+  if(dist_data->myId == 0) printf("Z[last]="); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+  for(i=0; i<dist_data->numP; i++) {
+    if(dist_data->myId == i) {
+
+      printf("%lf, ", computeData->z[dist_data->tamBl-1]);
+      fflush(stdout);
+    }
+    MPI_Barrier(dist_data->comm);
+  }
+  if(dist_data->myId == 0) printf("\n"); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+
+  if(dist_data->myId == 0) printf("D[last]="); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+  for(i=0; i<dist_data->numP; i++) {
+    if(dist_data->myId == i) {
+
+      printf("%lf, ", computeData->d[dist_data->tamBl-1]);
+      fflush(stdout);
+    }
+    MPI_Barrier(dist_data->comm);
+  }
+  if(dist_data->myId == 0) printf("\n"); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+
+  if(dist_data->myId == 0) printf("res[last]="); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+  for(i=0; i<dist_data->numP; i++) {
+    if(dist_data->myId == i) {
+
+      printf("%lf, ", computeData->res[dist_data->tamBl-1]);
+      fflush(stdout);
+    }
+    MPI_Barrier(dist_data->comm);
+  }
+  if(dist_data->myId == 0) printf("\n"); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+
+  if(dist_data->myId == 0) printf("Vec[last]="); 
+  fflush(stdout); MPI_Barrier(dist_data->comm);
+  for(i=0; i<dist_data->numP; i++) {
+    if(dist_data->myId == i) {
+
+      printf("%lf, ", computeData->vec[dist_data->tamBl-1]);
+      fflush(stdout);
+    }
+    MPI_Barrier(dist_data->comm);
  }
-  printf("\n");
+  if(dist_data->myId == 0) printf("\n"); 
  fflush(stdout); MPI_Barrier(dist_data->comm);
 }


--- a/generalRun.sh
+++ b/generalRun.sh
@@ -2,7 +2,7 @@

 #SBATCH -N 1
 #SBATCH -p P1
-#SBATCH -t 00:15:00
+#SBATCH -t 01:00:00

 dirCG="/home/martini/malleable_cg"

@@ -19,8 +19,12 @@ msm=$4
 mss=$5
 mrm=$6
 is_sync=$8
+qty=$9

 initial_nodelist=$(bash $dirCG/BashScripts/createInitialNodelist.sh $numP $cores $nodelist)
-echo "Test numP=$numP numC=$numC Meths=$msm $mrm $mss -- Is_synch=$is_sync"
-mpirun -hosts $initial_nodelist -np $numP $dirCG/build/a.out $matrix $numC $msm $mss $mrm $mss $is_sync $nodelist $nodes
+echo "Test numP=$numP numC=$numC Meths=$msm $mrm $mss -- Is_synch=$is_sync qty=$qty"
+for ((i=0; i<qty; i++))
+do
+  mpirun -hosts $initial_nodelist -np $numP $dirCG/build/a.out $matrix $numC $msm $mss $mrm $mss $is_sync $nodelist $nodes
+done
 echo "End"
--- a/malleability/malleabilityManager.c
+++ b/malleability/malleabilityManager.c
@@ -117,6 +117,9 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
  dist_s_data->entries = 0;
  dist_a_data->entries = 0;

+  mall_conf->spawn_time = 0; mall_conf->sync_time = 0; mall_conf->async_time = 0; mall_conf->malleability_time = 0;
+  mall_conf->spawn_start = 0; mall_conf->sync_end = 0; mall_conf->async_end = 0; mall_conf->malleability_end = 0;
+
  state = MALL_NOT_STARTED;

  zombies_service_init();

--- a/runBase.sh
+++ b/runBase.sh
@@ -4,6 +4,12 @@
 #SBATCH -p P1
 #SBATCH -t 00:10:00

+dirM="/home/martini/SparseMatrix/"
+dirCG="/home/martini/malleable_cg"
+matrix="Queen_4147.rb"
+#matrix="audikw_1.rb"
+#matrix="bcsstk01.rsa"
+
 module load /home/martini/MODULES/modulefiles/mpich-4.0.3-ofi
 module load /home/martini/MODULES/modulefiles/intel64Only.module

@@ -11,8 +17,8 @@ nodelist=$SLURM_JOB_NODELIST
 nodes=$SLURM_JOB_NUM_NODES
 cores=20
 numP=$1
-numC=2
-msm=1
+numC=40
+msm=0
 mss=1
 mrm=0
 send_sync=1
@@ -20,7 +26,7 @@ send_sync=1

 initial_nodelist=$(bash BashScripts/createInitialNodelist.sh $numP $cores $nodelist)
 echo "Test"
-mpirun -hosts $initial_nodelist -np $numP ./build/a.out bcsstk01.rsa $numC $msm $mss $mrm $mss $send_sync $nodelist $nodes
+mpirun -hosts $initial_nodelist -np $numP ./build/a.out $dirM/$matrix $numC $msm $mss $mrm $mss $send_sync $nodelist $nodes
 #mpirun -np 4 ./ConjugateGradient bcsstk17.rsa
 #mpirun -np 8 ./ConjugateGradient bcsstk01.rsa
 #mpirun -np 12 ./ConjugateGradient bcsstk01.rsa

--- a/runOFI.sh
+++ b/runOFI.sh
 #!/bin/bash

-#SBATCH -N 8
-#SBATCH -p P1
-#SBATCH --exclude=c01,c00,c02
-
-
 dirM="/home/martini/SparseMatrix/"
 dirCG="/home/martini/malleable_cg"
-#matrix="Queen_4147.rb"
+matrix="Queen_4147.rb"
 #matrix="audikw_1.rb"
-matrix="bcsstk01.rsa"
+#matrix="bcsstk01.rsa"

-#procs=(2 10 20 40 80 120 160)
-procs=(2 4)
+procs=(2 10 20 40 80 120 160)
+#procs=(2 4)
 msm=(0 1)
 mss=(1 2)
 mrm=(0 1)
 is_syncs=(1 0)
+qty=1
+if [ $# -ge 1 ]
+then
+  qty=$1
+fi

 echo $matrix
 for proc in "${procs[@]}"
@@ -46,11 +46,11 @@ do
          do
            if [ $is_sync -eq 1 ] # Matrix is send syncrhonously
            then
-              sbatch -p P1 -N $node_qty $dirCG/generalRun.sh $proc $dirM$matrix $proc_c $sm_type 1 $rm_type 1 $is_sync
+              sbatch -p P1 -N $node_qty $dirCG/generalRun.sh $proc $dirM$matrix $proc_c $sm_type 1 $rm_type 1 $is_sync $qty
  	    else # Matrix is send asyncrhonously
              for ss_type in "${mss[@]}"
              do
-                sbatch -p P1 -N $node_qty $dirCG/generalRun.sh $proc $dirM$matrix $proc_c $sm_type $ss_type $rm_type $ss_type $is_sync
+                sbatch -p P1 -N $node_qty $dirCG/generalRun.sh $proc $dirM$matrix $proc_c $sm_type $ss_type $rm_type $ss_type $is_sync $qty
 	      done
 	    fi
          done