Commit d5d7427a authored by iker_martin's avatar iker_martin
Browse files

Fixed major bug related to MKL, now using private implementation

parent c769c32d
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <math.h> #include <math.h>
#include <mkl_blas.h> //#include <mkl_blas.h>
#include <mkl_spblas.h> //#include <mkl_spblas.h>
#include "mymkl.h"
#include "ScalarVectors.h" #include "ScalarVectors.h"
#include "SparseMatrices.h" #include "SparseMatrices.h"
#include <mpi.h> #include <mpi.h>
...@@ -401,17 +402,13 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) { ...@@ -401,17 +402,13 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) {
int state = MALL_NOT_STARTED; int state = MALL_NOT_STARTED;
int ended_loop = 1; int ended_loop = 1;
int cnt = 0; int cnt = 0;
int reconfigure = 0; int reconfigure = 0, rec_iter = 500;
computeData->maxiter = 1000; computeData->maxiter = 1000;
// dumb(computeData,dist_data);
while ((computeData->iter < computeData->maxiter) && (computeData->tol > computeData->umbral)) { while ((computeData->iter < computeData->maxiter) && (computeData->tol > computeData->umbral)) {
//while (computeData->tol > computeData->umbral) {
// if(dist_data->myId == ROOT) printf ("(%d,%20.10e)\n", computeData->iter, computeData->tol);
//TODO Hacer un estudio de los valores escalares para la iteración 501
// COMPUTATION // COMPUTATION
#ifdef ONLY_SYM #ifdef ONLY_SYM
ProdSymSparseMatrixVector (computeData->subm, computeData->d_full, computeData->z); // z += A * d_full ProdSymSparseMatrixVector (computeData->subm, computeData->d_full, computeData->z); // z += A * d_full
...@@ -430,18 +427,18 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) { ...@@ -430,18 +427,18 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) {
computeData->alpha = computeData->beta / computeData->alpha; // alpha = beta / alpha computeData->alpha = computeData->beta / computeData->alpha; // alpha = beta / alpha
dscal (&(dist_data->tamBl), &computeData->alpha, computeData->d, &IONE); // d = alpha * d dscal (&(dist_data->tamBl), &computeData->alpha, computeData->d, &IONE); // d = alpha * d
daxpy (&(dist_data->tamBl), &DONE, computeData->res, &IONE, computeData->d, &IONE); // d += res daxpy (&(dist_data->tamBl), &DONE, computeData->res, &IONE, computeData->d, &IONE); // d += res
MPI_Allgatherv(computeData->d, dist_data->tamBl, MPI_DOUBLE, computeData->d_full, MPI_Allgatherv(computeData->d, dist_data->tamBl, MPI_DOUBLE, computeData->d_full,
computeData->dist_rows, computeData->displs_rows, MPI_DOUBLE, dist_data->comm); // d_full = Gather(d) computeData->dist_rows, computeData->displs_rows, MPI_DOUBLE, dist_data->comm); // d_full = Gather(d)
computeData->tol = sqrt (computeData->beta); // tol = sqrt(beta) = norm (res) computeData->tol = sqrt (computeData->beta); // tol = sqrt(beta) = norm (res)
computeData->iter++; computeData->iter++;
if (computeData->iter == 3) reconfigure = 1; if (computeData->iter == rec_iter) reconfigure = 1;
if (reconfigure) { if (reconfigure) {
state = malleability_checkpoint(); state = malleability_checkpoint();
if ((state == MALL_COMPLETED && sm == 0) || state == MALL_ZOMBIE) { ended_loop = 0; break; } if ((state == MALL_COMPLETED && sm == 0) || state == MALL_ZOMBIE) { ended_loop = 0; break; }
else if(state == MALL_COMPLETED) { else if(state == MALL_COMPLETED) {
reconfigure = 0; reconfigure = 0;
//dumb(computeData,dist_data);
free_computeData(computeData, 0); free_computeData(computeData, 0);
update_dist_data(dist_data); update_dist_data(dist_data);
dist_new(dist_data, computeData); dist_new(dist_data, computeData);
...@@ -458,17 +455,85 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) { ...@@ -458,17 +455,85 @@ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) {
void dumb(Compute_data *computeData, struct Dist_data *dist_data) { void dumb(Compute_data *computeData, struct Dist_data *dist_data) {
int i; int i;
sleep(dist_data->myId+dist_data->numP);
printf("P%d -tamBl=%d", dist_data->myId, dist_data->tamBl); if(dist_data->myId == 0) printf("Vptr=");
/*for(i=0; i<dist_data->tamBl; i++) { fflush(stdout); MPI_Barrier(dist_data->comm);
printf("%lf ", computeData->d[i]); for(i=0; i<dist_data->numP; i++) {
}*/ if(dist_data->myId == i) {
printf("\n");
printf("Vlen="); printf("%d, ", computeData->subm.vptr[dist_data->tamBl]);
for(i=0; i<dist_data->tamBl; i++) { fflush(stdout);
printf("%d ", computeData->vlen[i]); }
MPI_Barrier(dist_data->comm);
}
if(dist_data->myId == 0) printf("\n");
fflush(stdout); MPI_Barrier(dist_data->comm);
if(dist_data->myId == 0) printf("Tol=");
fflush(stdout); MPI_Barrier(dist_data->comm);
for(i=0; i<dist_data->numP; i++) {
if(dist_data->myId == i) {
printf("%lf, ", computeData->tol);
fflush(stdout);
}
MPI_Barrier(dist_data->comm);
}
if(dist_data->myId == 0) printf("\n");
fflush(stdout); MPI_Barrier(dist_data->comm);
if(dist_data->myId == 0) printf("Z[last]=");
fflush(stdout); MPI_Barrier(dist_data->comm);
for(i=0; i<dist_data->numP; i++) {
if(dist_data->myId == i) {
printf("%lf, ", computeData->z[dist_data->tamBl-1]);
fflush(stdout);
}
MPI_Barrier(dist_data->comm);
}
if(dist_data->myId == 0) printf("\n");
fflush(stdout); MPI_Barrier(dist_data->comm);
if(dist_data->myId == 0) printf("D[last]=");
fflush(stdout); MPI_Barrier(dist_data->comm);
for(i=0; i<dist_data->numP; i++) {
if(dist_data->myId == i) {
printf("%lf, ", computeData->d[dist_data->tamBl-1]);
fflush(stdout);
}
MPI_Barrier(dist_data->comm);
}
if(dist_data->myId == 0) printf("\n");
fflush(stdout); MPI_Barrier(dist_data->comm);
if(dist_data->myId == 0) printf("res[last]=");
fflush(stdout); MPI_Barrier(dist_data->comm);
for(i=0; i<dist_data->numP; i++) {
if(dist_data->myId == i) {
printf("%lf, ", computeData->res[dist_data->tamBl-1]);
fflush(stdout);
}
MPI_Barrier(dist_data->comm);
}
if(dist_data->myId == 0) printf("\n");
fflush(stdout); MPI_Barrier(dist_data->comm);
if(dist_data->myId == 0) printf("Vec[last]=");
fflush(stdout); MPI_Barrier(dist_data->comm);
for(i=0; i<dist_data->numP; i++) {
if(dist_data->myId == i) {
printf("%lf, ", computeData->vec[dist_data->tamBl-1]);
fflush(stdout);
}
MPI_Barrier(dist_data->comm);
} }
printf("\n"); if(dist_data->myId == 0) printf("\n");
fflush(stdout); MPI_Barrier(dist_data->comm); fflush(stdout); MPI_Barrier(dist_data->comm);
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#SBATCH -N 1 #SBATCH -N 1
#SBATCH -p P1 #SBATCH -p P1
#SBATCH -t 00:15:00 #SBATCH -t 01:00:00
dirCG="/home/martini/malleable_cg" dirCG="/home/martini/malleable_cg"
...@@ -19,8 +19,12 @@ msm=$4 ...@@ -19,8 +19,12 @@ msm=$4
mss=$5 mss=$5
mrm=$6 mrm=$6
is_sync=$8 is_sync=$8
qty=$9
initial_nodelist=$(bash $dirCG/BashScripts/createInitialNodelist.sh $numP $cores $nodelist) initial_nodelist=$(bash $dirCG/BashScripts/createInitialNodelist.sh $numP $cores $nodelist)
echo "Test numP=$numP numC=$numC Meths=$msm $mrm $mss -- Is_synch=$is_sync" echo "Test numP=$numP numC=$numC Meths=$msm $mrm $mss -- Is_synch=$is_sync qty=$qty"
mpirun -hosts $initial_nodelist -np $numP $dirCG/build/a.out $matrix $numC $msm $mss $mrm $mss $is_sync $nodelist $nodes for ((i=0; i<qty; i++))
do
mpirun -hosts $initial_nodelist -np $numP $dirCG/build/a.out $matrix $numC $msm $mss $mrm $mss $is_sync $nodelist $nodes
done
echo "End" echo "End"
...@@ -117,6 +117,9 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex ...@@ -117,6 +117,9 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
dist_s_data->entries = 0; dist_s_data->entries = 0;
dist_a_data->entries = 0; dist_a_data->entries = 0;
mall_conf->spawn_time = 0; mall_conf->sync_time = 0; mall_conf->async_time = 0; mall_conf->malleability_time = 0;
mall_conf->spawn_start = 0; mall_conf->sync_end = 0; mall_conf->async_end = 0; mall_conf->malleability_end = 0;
state = MALL_NOT_STARTED; state = MALL_NOT_STARTED;
zombies_service_init(); zombies_service_init();
......
...@@ -4,6 +4,12 @@ ...@@ -4,6 +4,12 @@
#SBATCH -p P1 #SBATCH -p P1
#SBATCH -t 00:10:00 #SBATCH -t 00:10:00
dirM="/home/martini/SparseMatrix/"
dirCG="/home/martini/malleable_cg"
matrix="Queen_4147.rb"
#matrix="audikw_1.rb"
#matrix="bcsstk01.rsa"
module load /home/martini/MODULES/modulefiles/mpich-4.0.3-ofi module load /home/martini/MODULES/modulefiles/mpich-4.0.3-ofi
module load /home/martini/MODULES/modulefiles/intel64Only.module module load /home/martini/MODULES/modulefiles/intel64Only.module
...@@ -11,8 +17,8 @@ nodelist=$SLURM_JOB_NODELIST ...@@ -11,8 +17,8 @@ nodelist=$SLURM_JOB_NODELIST
nodes=$SLURM_JOB_NUM_NODES nodes=$SLURM_JOB_NUM_NODES
cores=20 cores=20
numP=$1 numP=$1
numC=2 numC=40
msm=1 msm=0
mss=1 mss=1
mrm=0 mrm=0
send_sync=1 send_sync=1
...@@ -20,7 +26,7 @@ send_sync=1 ...@@ -20,7 +26,7 @@ send_sync=1
initial_nodelist=$(bash BashScripts/createInitialNodelist.sh $numP $cores $nodelist) initial_nodelist=$(bash BashScripts/createInitialNodelist.sh $numP $cores $nodelist)
echo "Test" echo "Test"
mpirun -hosts $initial_nodelist -np $numP ./build/a.out bcsstk01.rsa $numC $msm $mss $mrm $mss $send_sync $nodelist $nodes mpirun -hosts $initial_nodelist -np $numP ./build/a.out $dirM/$matrix $numC $msm $mss $mrm $mss $send_sync $nodelist $nodes
#mpirun -np 4 ./ConjugateGradient bcsstk17.rsa #mpirun -np 4 ./ConjugateGradient bcsstk17.rsa
#mpirun -np 8 ./ConjugateGradient bcsstk01.rsa #mpirun -np 8 ./ConjugateGradient bcsstk01.rsa
#mpirun -np 12 ./ConjugateGradient bcsstk01.rsa #mpirun -np 12 ./ConjugateGradient bcsstk01.rsa
......
#!/bin/bash #!/bin/bash
#SBATCH -N 8
#SBATCH -p P1
#SBATCH --exclude=c01,c00,c02
dirM="/home/martini/SparseMatrix/" dirM="/home/martini/SparseMatrix/"
dirCG="/home/martini/malleable_cg" dirCG="/home/martini/malleable_cg"
#matrix="Queen_4147.rb" matrix="Queen_4147.rb"
#matrix="audikw_1.rb" #matrix="audikw_1.rb"
matrix="bcsstk01.rsa" #matrix="bcsstk01.rsa"
#procs=(2 10 20 40 80 120 160) procs=(2 10 20 40 80 120 160)
procs=(2 4) #procs=(2 4)
msm=(0 1) msm=(0 1)
mss=(1 2) mss=(1 2)
mrm=(0 1) mrm=(0 1)
is_syncs=(1 0) is_syncs=(1 0)
qty=1
if [ $# -ge 1 ]
then
qty=$1
fi
echo $matrix echo $matrix
for proc in "${procs[@]}" for proc in "${procs[@]}"
...@@ -46,11 +46,11 @@ do ...@@ -46,11 +46,11 @@ do
do do
if [ $is_sync -eq 1 ] # Matrix is send syncrhonously if [ $is_sync -eq 1 ] # Matrix is send syncrhonously
then then
sbatch -p P1 -N $node_qty $dirCG/generalRun.sh $proc $dirM$matrix $proc_c $sm_type 1 $rm_type 1 $is_sync sbatch -p P1 -N $node_qty $dirCG/generalRun.sh $proc $dirM$matrix $proc_c $sm_type 1 $rm_type 1 $is_sync $qty
else # Matrix is send asyncrhonously else # Matrix is send asyncrhonously
for ss_type in "${mss[@]}" for ss_type in "${mss[@]}"
do do
sbatch -p P1 -N $node_qty $dirCG/generalRun.sh $proc $dirM$matrix $proc_c $sm_type $ss_type $rm_type $ss_type $is_sync sbatch -p P1 -N $node_qty $dirCG/generalRun.sh $proc $dirM$matrix $proc_c $sm_type $ss_type $rm_type $ss_type $is_sync $qty
done done
fi fi
done done
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment