Anyadido recoger coste de crear hilos para operaciones de spawn en segundo plano

b1cc8715 · iker_martin · a8edcbc0 · b1cc8715 · b1cc8715 · b1cc8715
Commit b1cc8715 authored Nov 12, 2021 by iker_martin
--- a/Codes/IOcodes/results.c
+++ b/Codes/IOcodes/results.c
@@ -60,14 +60,14 @@ void recv_results(results_data *results, int root, int resizes, MPI_Comm interco
 * En concreto son tres escales y un vector de tamaño "resizes"
 */
 void def_results_type(results_data *results, int resizes, MPI_Datatype *results_type) {
-  int i, counts = 4;
+  int i, counts = 5;
-  int blocklengths[4] = {1, 1, 1, 1};
+  int blocklengths[] = {1, 1, 1, 1, 1};
  MPI_Aint displs[counts], dir;
  MPI_Datatype types[counts];
  // Rellenar vector types
-  types[0] = types[1] = types[2] = types[3] = MPI_DOUBLE;
+  types[0] = types[1] = types[2] = types[3] = types[4] = MPI_DOUBLE;
-  blocklengths[3] = resizes;
+  blocklengths[3] = blocklengths[4] = resizes;
  // Rellenar vector displs
  MPI_Get_address(results, &dir);
@@ -75,7 +75,8 @@ void def_results_type(results_data *results, int resizes, MPI_Datatype *results_
  MPI_Get_address(&(results->sync_start), &displs[0]);
  MPI_Get_address(&(results->async_start), &displs[1]);
  MPI_Get_address(&(results->exec_start), &displs[2]);
-  MPI_Get_address(&(results->spawn_time[0]), &displs[3]); //TODO Revisar si se puede simplificar
+  MPI_Get_address(&(results->spawn_thread_time[0]), &displs[3]);
+  MPI_Get_address(&(results->spawn_time[0]), &displs[4]); //TODO Revisar si se puede simplificar //FIXME Si hay mas de un spawn error?
  for(i=0;i<counts;i++) displs[i] -= dir;
@@ -129,6 +130,11 @@ void print_global_results(results_data *results, int resizes) {
    printf("%lf ", results->spawn_time[i]);
  }
+  printf("\nTthread: ");
+  for(i=0; i< resizes - 1; i++) {
+    printf("%lf ", results->spawn_thread_time[i]);
+  }
  printf("\nTsync: ");
  for(i=1; i < resizes; i++) {
    printf("%lf ", results->sync_time[i]);
@@ -158,6 +164,7 @@ void init_results_data(results_data **results, int resizes, int iters_size) {
  *results = malloc(1 * sizeof(results_data));
  (*results)->spawn_time = calloc(resizes, sizeof(double));
+  (*results)->spawn_thread_time = calloc(resizes, sizeof(double));
  (*results)->sync_time = calloc(resizes, sizeof(double));
  (*results)->async_time = calloc(resizes, sizeof(double));
@@ -188,6 +195,7 @@ void realloc_results_iters(results_data *results, int needed) {
 */
 void free_results_data(results_data **results) {
    free((*results)->spawn_time);
+    free((*results)->spawn_thread_time);
    free((*results)->sync_time);
    free((*results)->async_time);

--- a/Codes/IOcodes/results.h
+++ b/Codes/IOcodes/results.h
@@ -7,8 +7,8 @@ typedef struct {
  double *iters_time;
  int *iters_type, iter_index, iters_size;
-  // Spawn, Sync and Async time
+  // Spawn, Thread, Sync, Async and Exec time
-  double spawn_start, *spawn_time;
+  double spawn_start, *spawn_time, *spawn_thread_time;
  double sync_start,  *sync_time;
  double async_start, *async_time;
  double exec_start, exec_time;

--- a/Codes/Main/Main.c
+++ b/Codes/Main/Main.c
@@ -17,7 +17,7 @@ void Sons_init();
 int checkpoint(int iter, int state, MPI_Request **comm_req);
 int TC(int numS, int comm_type);
-int start_redistribution(int numS, MPI_Request **comm_req);
+int start_redistribution(int iter, int numS, MPI_Request **comm_req);
 int check_redistribution(int iter, MPI_Request **comm_req);
 int end_redistribution(int iter);
@@ -129,6 +129,7 @@ int work() {
  state = MAL_COMM_UNINITIALIZED;
  res = 0;
+  //if(group->myId == ROOT) printf("Iter_start %d\n", group->iter_start);
  for(iter=group->iter_start; iter < maxiter; iter++) {
    iterate(matrix, config_file->matrix_tam, state);
  }
@@ -172,15 +173,15 @@ int checkpoint(int iter, int state, MPI_Request **comm_req) {
    state = TC(group->numS, comm_type);
    if (state == COMM_FINISHED){
-      state = start_redistribution(group->numS, comm_req);
+      state = start_redistribution(0, group->numS, comm_req);
    }
  } else if(state == COMM_IN_PROGRESS) { // Comprueba si el spawn ha terminado y comienza la redistribucion
-    state = check_slurm_comm(group->myId, ROOT, &(group->children));
+    state = check_slurm_comm(group->myId, ROOT, group->numP, &(group->children));
    if (state == COMM_FINISHED) {  
        results->spawn_time[group->grp] = MPI_Wtime() - results->spawn_start;
-      state = start_redistribution(group->numS, comm_req);
+      state = start_redistribution(iter, group->numS, comm_req);
    }
  } else if(state == MAL_ASYNC_PENDING) {
@@ -207,6 +208,10 @@ int TC(int numS, int comm_type){
  comm_state = init_slurm_comm(group->argv, group->myId, numS, ROOT, dist, comm_type, MPI_COMM_WORLD, &(group->children));
  if(comm_type == COMM_SPAWN_SERIAL)
      results->spawn_time[group->grp] = MPI_Wtime() - results->spawn_start;
+  else if(comm_type == COMM_SPAWN_PTHREAD) {
+      results->spawn_thread_time[group->grp] = MPI_Wtime() - results->spawn_start;
+      results->spawn_start = MPI_Wtime();
+  }
  return comm_state;
 }
@@ -224,7 +229,7 @@ int TC(int numS, int comm_type){
 * Finalmente se envian datos sobre los resultados a los hijos y se desconectan ambos
 * grupos de procesos.
 */
-int start_redistribution(int numS, MPI_Request **comm_req) {
+int start_redistribution(int iter, int numS, MPI_Request **comm_req) {
  int rootBcast = MPI_PROC_NULL;
  if(group->myId == ROOT) rootBcast = MPI_ROOT;
@@ -242,7 +247,7 @@ int start_redistribution(int numS, MPI_Request **comm_req) {
      return MAL_ASYNC_PENDING;
    }
  } 
-  return end_redistribution(0);
+  return end_redistribution(iter);
 }
 /*
@@ -396,6 +401,7 @@ void Sons_init() {
      results->async_time[group->grp] = MPI_Wtime();
    MPI_Bcast(&(group->iter_start), 1, MPI_INT, ROOT, group->parents);
  }
+    MPI_Bcast(&(group->iter_start), 1, MPI_INT, ROOT, group->parents); //FIXME Quitar -- Que tenga en cuenta Pthread y async
  if(config_file->sdr) { // Recibir datos sincronos
    recv_sync(&(group->sync_array), config_file->sdr, group->myId, group->numP, ROOT, group->parents, numP_parents);
    results->sync_time[group->grp] = MPI_Wtime();
@@ -449,7 +455,8 @@ void iterate(double *matrix, int n, int async_comm) {
  }
  actual_time = MPI_Wtime(); // Guardar tiempos
-  if(async_comm == MAL_ASYNC_PENDING) { // Se esta realizando una redistribucion de datos asincrona
+  // TODO Que diferencie entre ambas en el IO
+  if(async_comm == MAL_ASYNC_PENDING || async_comm == COMM_IN_PROGRESS) { // Se esta realizando una redistribucion de datos asincrona
    operations=0;
  }

--- a/Codes/malleability/ProcessDist.c
+++ b/Codes/malleability/ProcessDist.c
@@ -109,11 +109,34 @@ int init_slurm_comm(char **argv, int myId, int numP, int root, int type_dist, in
 * Comprueba si una configuracion para crear un nuevo grupo de procesos esta lista,
 * y en caso de que lo este, se devuelve el communicador a estos nuevos procesos.
 */
-int check_slurm_comm(int myId, int root, MPI_Comm *child) {
+int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child) { // TODO Borrar numP si no se usa
  int state=-10;
  if(slurm_data->type_creation == COMM_SPAWN_PTHREAD) {
-    MPI_Allreduce(&commSlurm, &state, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
+    //MPI_Allreduce(&commSlurm, &state, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
+    if(myId == root) {
+      int i, recv_state;
+      state = commSlurm;
+      for(i=0; i<numP; i++) { //Recv states
+	if(i != myId) {
+          MPI_Recv(&recv_state, 1, MPI_INT, i, 120, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+          if(recv_state == COMM_IN_PROGRESS) {
+	    state = recv_state;
+	  }
+	}
+      }
+      for(i=0; i<numP; i++) { //Send state
+	if(i != myId) {
+          MPI_Send(&state, 1, MPI_INT, i, 120, MPI_COMM_WORLD);
+	}
+      }
+    } else {
+      MPI_Send(&commSlurm, 1, MPI_INT, root, 120, MPI_COMM_WORLD);
+      MPI_Recv(&state, 1, MPI_INT, root, 120, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    }
    if(state != COMM_FINISHED) return state; // Continue only if asynchronous process creation has ended 
  } else { 

--- a/Codes/malleability/ProcessDist.h
+++ b/Codes/malleability/ProcessDist.h
@@ -15,4 +15,4 @@
 #define COMM_SPAWN_PTHREAD 1
 int init_slurm_comm(char **argv, int myId, int numP, int root, int type_dist, int type_creation, MPI_Comm comm, MPI_Comm *child);
-int check_slurm_comm(int myId, int root, MPI_Comm *child);
+int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child);
--- a/Codes/runBase.sh
+++ b/Codes/runBase.sh
@@ -2,6 +2,9 @@
 #SBATCH -N 1
+dir="/home/martini/malleability_benchmark"
+codeDir="/Codes"
 echo "MPICH"
 module load mpich-3.4.1-noucx
 #export HYDRA_DEBUG=1
@@ -9,8 +12,8 @@ module load mpich-3.4.1-noucx
 numP=$(bash recordMachinefile.sh $1)
-mpiexec -f hostfile.o$SLURM_JOB_ID ./a.out $1 $2
+#mpirun -f hostfile.o$SLURM_JOB_ID ./a.out $1 $2
+mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/a.out $1 $2
 rm hostfile.o$SLURM_JOB_ID
 echo "END RUN"