Se ha eliminado la llamada a Slurm para evitar intromisiones

7b214a6c · iker_martin · 14574ff0 · 7b214a6c · 7b214a6c · 7b214a6c
Commit 7b214a6c authored Feb 11, 2022 by iker_martin
--- a/Codes/Main/Main.c
+++ b/Codes/Main/Main.c
@@ -50,6 +50,16 @@ int main(int argc, char *argv[]) {
    int req;
    int im_child;

+    //FIXME El codigo no es capaz de hacer mas de una redistribucion - Arreglar malleabilityTypes.c
+    int num_cpus, num_nodes; //nodelist_len; //FIXME Eliminar cuando se utilice Slurm
+    char *nodelist = NULL;
+    num_cpus = 20; //FIXME NUMERO MAGICO
+    if (argc >= 5) {
+      nodelist = argv[3];
+      //nodelist_len = strlen(nodelist);
+      num_nodes = atoi(argv[4]);
+    }
+
    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &req);
    MPI_Comm_size(MPI_COMM_WORLD, &numP);
    MPI_Comm_rank(MPI_COMM_WORLD, &myId);
@@ -60,7 +70,7 @@ int main(int argc, char *argv[]) {
    }

    init_group_struct(argv, argc, myId, numP);
-    im_child = init_malleability(myId, numP, ROOT, comm, argv[0]);
+    im_child = init_malleability(myId, numP, ROOT, comm, argv[0], nodelist, num_cpus, num_nodes);

    if(!im_child) { //TODO REFACTOR Simplificar inicio
      init_application();
@@ -96,6 +106,18 @@ int main(int argc, char *argv[]) {
      group->iter_start = *((int *)value);
      free(value);

+      //FIXME Eliminar cuando se utilice SLURM
+      /*
+      malleability_get_data(&value, 4, 1, 1);
+      num_nodes = *((int *)value);
+      free(value);
+
+      malleability_get_data(&value, 5, 1, 1);
+      nodelist = (char *)value;
+      //free(value);
+      nodelist_len = strlen(nodelist);
+      */
+
      group->grp = group->grp + 1;
    }

@@ -116,6 +138,10 @@ int main(int argc, char *argv[]) {
          malleability_add_data(&(group->grp), 1, MAL_INT, 1, 1);
          malleability_add_data(&run_id, 1, MAL_INT, 1, 1);
          malleability_add_data(&(group->iter_start), 1, MAL_INT, 1, 1);
+
+	  //FIXME Eliminar cuando se utilice SLURM
+          //malleability_add_data(&num_nodes, 1, MAL_INT, 1, 1);
+          //malleability_add_data(&nodelist, nodelist_len, MAL_CHAR, 1, 1);
        }
      }


--- a/Codes/malleability/ProcessDist.c
+++ b/Codes/malleability/ProcessDist.c
@@ -18,6 +18,8 @@ double end_time; //FIXME REFACTOR

 struct Slurm_data {
  char *cmd; // Executable name
+  char *nodelist;
+  int num_cpus, num_nodes;
  int qty_procs, result_procs;
  MPI_Info info;
  int type_creation;
@@ -44,9 +46,9 @@ void generic_spawn(int myId, int root, int is_single, MPI_Comm *child, MPI_Comm
 void single_spawn_connection(int myId, int root, MPI_Comm comm, MPI_Comm *child);
 int create_processes(int myId, int root, MPI_Comm *child, MPI_Comm comm);

-void node_dist(slurm_job_info_t job_record, int type, int total_procs, int already_created, int **qty, int *used_nodes);
+void node_dist(int type, int total_procs, int already_created, int **qty, int *used_nodes);

-void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, char **hostfile_str);
+void fill_str_hostfile(int *qty, int used_nodes, char **hostfile_str);
 int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name);

 //@deprecated functions
@@ -72,7 +74,7 @@ void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_node
 * Devuelve el estado de el procedimiento. Si no devuelve "COMM_FINISHED", es necesario llamar a
 * "check_slurm_comm()".
 */
-int init_slurm_comm(char *argv, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child) {
+int init_slurm_comm(char *argv, int num_cpus, int num_nodes, char *nodelist, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child) {
  int spawn_qty, already_created = 0;
  slurm_data = malloc(sizeof(struct Slurm_data));

@@ -80,6 +82,9 @@ int init_slurm_comm(char *argv, int myId, int numP, int numC, int root, int type
  slurm_data->type_creation = type_creation;
  slurm_data->spawn_is_single = spawn_is_single;
  slurm_data->result_procs = numC;
+  slurm_data->num_cpus = num_cpus;
+  slurm_data->num_nodes = num_nodes;
+  slurm_data->nodelist = nodelist;
  spawn_qty = numC;
  if(type_creation == COMM_SPAWN_MERGE || type_creation == COMM_SPAWN_MERGE_PTHREAD) {
    if (numP < slurm_data->result_procs) {
@@ -305,7 +310,7 @@ void generic_spawn(int myId, int root, int spawn_is_single, MPI_Comm *child, MPI
    MPI_Bcast(&spawn_is_single, 1, MPI_INT, rootBcast, *child);
  }
  pthread_mutex_lock(&spawn_mutex);
-    commState = MAL_SPAWN_COMPLETED; 
+  commState = MAL_SPAWN_COMPLETED; 
    end_time = MPI_Wtime();
  pthread_mutex_unlock(&spawn_mutex);
 }
@@ -406,27 +411,27 @@ void proc_adapt_shrink(int numC, MPI_Comm *comm, int myId) {
 * para los procesos y creando un fichero hostfile.
 */
 void processes_dist(char *argv, int numP_childs, int already_created, int type) {
-    int jobId;
-    char *tmp;
-    job_info_msg_t *j_info;
-    slurm_job_info_t last_record;
+    //int jobId;
+    //char *tmp;
+    //job_info_msg_t *j_info;
+    //slurm_job_info_t last_record;

    int used_nodes=0;
    int *procs_array;
    char *hostfile;

    // Get Slurm job info
-    tmp = getenv("SLURM_JOB_ID");
-    jobId = atoi(tmp);
-    slurm_load_job(&j_info, jobId, 1);
-    last_record = j_info->job_array[j_info->record_count - 1];
+    //tmp = getenv("SLURM_JOB_ID");
+    //jobId = atoi(tmp);
+    //slurm_load_job(&j_info, jobId, 1);
+    //last_record = j_info->job_array[j_info->record_count - 1];

    //COPY PROGRAM NAME
    slurm_data->cmd = malloc(strlen(argv) * sizeof(char));
    strcpy(slurm_data->cmd, argv);

    // GET NEW DISTRIBUTION 
-    node_dist(last_record, type, numP_childs, already_created, &procs_array, &used_nodes);
+    node_dist(type, numP_childs, already_created, &procs_array, &used_nodes); //TODO REFACTOR
    slurm_data->qty_procs = numP_childs;

    /*
@@ -442,16 +447,17 @@ void processes_dist(char *argv, int numP_childs, int already_created, int type)
    close(ptr);
    */
    
-    // TEST 
-    fill_str_hostfile(last_record, procs_array, used_nodes, &hostfile);
+    // CREATE AND SET STRING HOSTFILE
+    fill_str_hostfile(procs_array, used_nodes, &hostfile); //TODO REFACTOR
    MPI_Info_create(&(slurm_data->info));
+    printf("hosts %s\n", hostfile);
    MPI_Info_set(slurm_data->info, "hosts", hostfile);
    free(hostfile);
    free(procs_array);
   

    // Free JOB INFO
-    slurm_free_job_info_msg(j_info); 
+    //slurm_free_job_info_msg(j_info);  //TODO REFACTOR
 }

 /*
@@ -466,26 +472,26 @@ void processes_dist(char *argv, int numP_childs, int already_created, int type)
 *  COMM_PHY_CPU   (2): Orientada a completar la capacidad de un nodo antes de
 *                      ocupar otro nodo.
 */
-void node_dist(slurm_job_info_t job_record, int type, int total_procs, int already_created, int **qty, int *used_nodes) {
+void node_dist(int type, int total_procs, int already_created, int **qty, int *used_nodes) {
  int i, asigCores;
  int tamBl, remainder;
  int *procs;

-  procs = calloc(job_record.num_nodes, sizeof(int)); // Numero de procesos por nodo
+  procs = calloc(slurm_data->num_nodes, sizeof(int)); // Numero de procesos por nodo

  /* GET NEW DISTRIBUTION  */
  if(type == 1) { // DIST NODES
-    *used_nodes = job_record.num_nodes;
-    tamBl = total_procs / job_record.num_nodes;
-    remainder = total_procs % job_record.num_nodes;
+    *used_nodes = slurm_data->num_nodes;
+    tamBl = total_procs / slurm_data->num_nodes;
+    remainder = total_procs % slurm_data->num_nodes;
    for(i=0; i<remainder; i++) {
      procs[i] = tamBl + 1; 
    }
-    for(i=remainder; i<job_record.num_nodes; i++) {
+    for(i=remainder; i<slurm_data->num_nodes; i++) {
      procs[i] = tamBl; 
    }
  } else if (type == 2) { // DIST CPUs
-    tamBl = job_record.num_cpus / job_record.num_nodes;
+    tamBl = slurm_data->num_cpus / slurm_data->num_nodes;
    asigCores = 0;
    i = already_created / tamBl;
    *used_nodes = already_created / tamBl;
@@ -493,7 +499,7 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea
    while(asigCores+tamBl <= total_procs) {
      asigCores += tamBl;
      procs[i] += tamBl;
-      i = (i+1) % job_record.num_nodes;
+      i = (i+1) % slurm_data->num_nodes;
      (*used_nodes)++;
    }

@@ -501,7 +507,7 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea
      procs[i] += total_procs - asigCores;
      (*used_nodes)++;
    }
-    if(*used_nodes > job_record.num_nodes) *used_nodes = job_record.num_nodes;  //FIXME Si ocurre esto no es un error?
+    if(*used_nodes > slurm_data->num_nodes) *used_nodes = slurm_data->num_nodes;  //FIXME Si ocurre esto no es un error?
  }

  *qty = calloc(*used_nodes, sizeof(int)); // Numero de procesos por nodo
@@ -516,12 +522,12 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea
 * Crea y devuelve una cadena para ser utilizada por la llave "hosts"
 * al crear procesos e indicar donde tienen que ser creados.
 */
-void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, char **hostfile_str) {
+void fill_str_hostfile(int *qty, int used_nodes, char **hostfile_str) {
  int i=0, len=0;
  char *host;
  hostlist_t hostlist;
  
-  hostlist = slurm_hostlist_create(job_record.nodes);
+  hostlist = slurm_hostlist_create(slurm_data->nodelist);
  while ( (host = slurm_hostlist_shift(hostlist)) && i < used_nodes) {
    if(qty[i] != 0) {
      len = write_str_node(hostfile_str, len, qty[i], host);
@@ -530,7 +536,6 @@ void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, ch
    free(host);
  }
  slurm_hostlist_destroy(hostlist);
-
 }

 /*

--- a/Codes/malleability/ProcessDist.h
+++ b/Codes/malleability/ProcessDist.h
@@ -5,7 +5,7 @@
 #include <slurm/slurm.h>
 #include "malleabilityStates.h"

-int init_slurm_comm(char *argv, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child);
+int init_slurm_comm(char *argv, int num_cpus, int num_nodes, char *nodelist, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child);
 int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child, MPI_Comm comm, MPI_Comm comm_thread, double *end_real_time);

 void malleability_establish_connection(int myId, int root, MPI_Comm *intercomm);

--- a/Codes/malleability/malleabilityManager.c
+++ b/Codes/malleability/malleabilityManager.c
@@ -45,7 +45,8 @@ typedef struct { //FIXME numC_spawned no se esta usando
  MPI_Comm intercomm;
  MPI_Comm user_comm;
  
-  char *name_exec;
+  char *name_exec, *nodelist;
+  int num_cpus, num_nodes;
 } malleability_t;

 int state = MAL_UNRESERVED; //FIXME Mover a otro lado
@@ -68,7 +69,7 @@ malleability_data_t *dist_a_data;
 * la comunicacion los procesos hijo estan preparados para ejecutar la
 * aplicacion.
 */
-int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec) {
+int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes) {
  MPI_Comm dup_comm, thread_comm;

  mall_conf = (malleability_config_t *) malloc(sizeof(malleability_config_t));
@@ -87,7 +88,11 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
  mall->comm = dup_comm;
  mall->thread_comm = thread_comm; // TODO Refactor -- Crear solo si es necesario?
  mall->user_comm = comm;
+
  mall->name_exec = name_exec;
+  mall->nodelist = nodelist;
+  mall->num_cpus = num_cpus;
+  mall->num_nodes = num_nodes;

  rep_s_data->entries = 0;
  rep_a_data->entries = 0;
@@ -436,7 +441,13 @@ void Children_init() {
    // TODO Crear funcion especifica y anyadir para Asinc
    // TODO Tener en cuenta el tipo y qty
    for(i=0; i<rep_s_data->entries; i++) {
-      MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], MPI_INT, root_parents, mall->intercomm);
+      MPI_Datatype datatype;
+      if(rep_s_data->types[i] == MAL_INT) {
+        datatype = MPI_INT;
+      } else {
+        datatype = MPI_CHAR;
+      }
+      MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, root_parents, mall->intercomm);
    } 
  }

@@ -476,7 +487,7 @@ int spawn_step(){
    return state; 
  }
 
-  state = init_slurm_comm(mall->name_exec, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_type, mall_conf->spawn_is_single, mall->thread_comm, &(mall->intercomm));
+  state = init_slurm_comm(mall->name_exec, mall->num_cpus, mall->num_nodes, mall->nodelist, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_type, mall_conf->spawn_is_single, mall->thread_comm, &(mall->intercomm));

  if(mall_conf->spawn_type == COMM_SPAWN_SERIAL || mall_conf->spawn_type == COMM_SPAWN_MERGE)
      mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start;
@@ -593,9 +604,15 @@ int end_redistribution() {
    send_data(mall->numC, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);

    // TODO Crear funcion especifica y anyadir para Asinc
-    // TODO Tener en cuenta el tipo y qty
+    // TODO Tener en cuenta el tipo
    for(i=0; i<rep_s_data->entries; i++) {
-      MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], MPI_INT, rootBcast, mall->intercomm);
+      MPI_Datatype datatype;
+      if(rep_s_data->types[i] == MAL_INT) {
+        datatype = MPI_INT;
+      } else {
+        datatype = MPI_CHAR;
+      }
+      MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, rootBcast, mall->intercomm);
    } 
  }
    

--- a/Codes/malleability/malleabilityManager.h
+++ b/Codes/malleability/malleabilityManager.h
@@ -8,7 +8,7 @@
 #include "../IOcodes/results.h"
 #include "malleabilityStates.h"

-int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec);
+int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes);
 void free_malleability();
 void indicate_ending_malleability(int new_outside_state);
 int malleability_checkpoint();

--- a/Codes/runBase.sh
+++ b/Codes/runBase.sh
@@ -6,6 +6,9 @@
 dir="/home/martini/malleability_benchmark"
 codeDir="/Codes"

+nodelist=$SLURM_JOB_NODELIST
+nodes=$SLURM_JOB_NUM_NODES
+
 echo "MPICH"
 module load mpich-3.4.1-noucx
 #export HYDRA_DEBUG=1
@@ -13,8 +16,7 @@ module load mpich-3.4.1-noucx

 numP=$(bash recordMachinefile.sh $1)

-#mpirun -f hostfile.o$SLURM_JOB_ID ./a.out $1 $2
-mpirun -print-all-exitcodes -f hostfile.o$SLURM_JOB_ID $dir$codeDir/a.out $1 $2
+mpirun -print-all-exitcodes -f hostfile.o$SLURM_JOB_ID $dir$codeDir/a.out $1 $2 $nodelist $nodes
 rm hostfile.o$SLURM_JOB_ID

 echo "END RUN"

--- a/Codes/test.ini
+++ b/Codes/test.ini
 [general]
 resizes=1          ; Numero de redistribuciones
 matrix_tam=1000    ; Tamaño en bytes de la matriz de computo
-comm_tam=10000000    ; Tamaño en bytes de los datos a comunicar en cada iteracion. Una sola vez
-SDR=1000       ; Tamaño en bytes a redistribuir de forma sincrona
-ADR=1000         ; Tamaño en bytes a redistribuir de forma asincrona  1000000000
+comm_tam=1000    ; Tamaño en bytes de los datos a comunicar en cada iteracion. Una sola vez
+SDR=100000000       ; Tamaño en bytes a redistribuir de forma sincrona
+ADR=100000000         ; Tamaño en bytes a redistribuir de forma asincrona  1000000000
 AIB=3              ; Indica si las redistribuciones asíncronas se consideran terminadas para los padres
-                   ;   cuando terminan de enviar (0) o cuando terminan de recibir los valores (1)
+                   ;   cuando terminan de enviar (0), cuando terminan de recibir los valores (1)
+                   ;   o usar comunicaciones punto a punto (2), o utilizar hebras(3)
+CST=0            ; Indica como realizar el spawn. (0) Para el método baseline, (1) para el método
+                 ; baseline con hilos, (2) para el método merge y (3) para el método merge
+                 ; con hilos
+CSS=0            ; Indica si el spawn se realiza con todos los procesos (0) o solo participa
+                 ; el proceso raiz (1)
 time=1           ; Tiempo necesario para realizar una iteracion
+; end [general]


 [resize0]              ; Grupo inicial(mpirun)
-iters=1              ; Numero de iteraciones a realizar por este grupo
+iters=10              ; Numero de iteraciones a realizar por este grupo
 procs=2               ; Cantidad de procesos en el grupo
 factor=1               ; Factor de coste 
 physical_dist=node     ; Tipo de redistribución física de los procesos
@@ -21,3 +28,4 @@ iters=20
 procs=4
 factor=0.5
 physical_dist=node
+;end [resize1]
--- a/Exec/arrayRun.sh
+++ b/Exec/arrayRun.sh
@@ -5,6 +5,10 @@
 dir="/home/martini/malleability_benchmark"
 codeDir="/Codes"
 ResultsDir="/Results"
+
+nodelist=$SLURM_JOB_NODELIST
+nodes=$SLURM_JOB_NUM_NODES
+
 module load mpich-3.4.1-noucx

 name_dir=$1
@@ -40,7 +44,7 @@ do
          for index in 1 2 3 4 5 6 7 8 9 10
          do
            numP=$(bash $dir$codeDir/recordMachinefile.sh $config_file) # Crea el fichero hostfile
-            mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/./bench.out $config_file $i
+            mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/./bench.out $config_file $i $nodelist $nodes
            rm hostfile.o$SLURM_JOB_ID
          done


--- a/Exec/singleRun.sh
+++ b/Exec/singleRun.sh
@@ -6,6 +6,9 @@ dir="/home/martini/malleability_benchmark"
 codeDir="/Codes"
 ResultsDir="/Results"

+nodelist=$SLURM_JOB_NODELIST
+nodes=$SLURM_JOB_NUM_NODES
+
 module load mpich-3.4.1-noucx
 echo "START TEST"

@@ -24,7 +27,7 @@ for ((i=0; i<qty; i++))
 do
  echo "Iter $i"
  numP=$(bash $dir$codeDir/recordMachinefile.sh $1)
-  mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/bench.out $1 $2
+  mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/bench.out $1 $2 $nodelist $nodes
  rm hostfile.o$SLURM_JOB_ID
 done