Commit 24c93265 authored by iker_martin's avatar iker_martin
Browse files

Mejorada creacion de procesos. Ya no es necesario crear un fichero hostfile

parent e38a0246
...@@ -129,7 +129,6 @@ int work() { ...@@ -129,7 +129,6 @@ int work() {
state = MAL_COMM_UNINITIALIZED; state = MAL_COMM_UNINITIALIZED;
res = 0; res = 0;
//if(group->myId == ROOT) printf("Iter_start %d\n", group->iter_start);
for(iter=group->iter_start; iter < maxiter; iter++) { for(iter=group->iter_start; iter < maxiter; iter++) {
iterate(matrix, config_file->matrix_tam, state); iterate(matrix, config_file->matrix_tam, state);
} }
...@@ -168,7 +167,7 @@ int checkpoint(int iter, int state, MPI_Request **comm_req) { ...@@ -168,7 +167,7 @@ int checkpoint(int iter, int state, MPI_Request **comm_req) {
if(config_file->iters[group->grp] > iter || config_file->resizes == group->grp + 1) {return MAL_COMM_UNINITIALIZED;} if(config_file->iters[group->grp] > iter || config_file->resizes == group->grp + 1) {return MAL_COMM_UNINITIALIZED;}
group->numS = config_file->procs[group->grp +1]; group->numS = config_file->procs[group->grp +1];
int comm_type = COMM_SPAWN_PTHREAD; // TODO Pasar a CONFIG int comm_type = COMM_SPAWN_SERIAL; // TODO Pasar a CONFIG
state = TC(group->numS, comm_type); state = TC(group->numS, comm_type);
...@@ -401,7 +400,7 @@ void Sons_init() { ...@@ -401,7 +400,7 @@ void Sons_init() {
results->async_time[group->grp] = MPI_Wtime(); results->async_time[group->grp] = MPI_Wtime();
MPI_Bcast(&(group->iter_start), 1, MPI_INT, ROOT, group->parents); MPI_Bcast(&(group->iter_start), 1, MPI_INT, ROOT, group->parents);
} }
MPI_Bcast(&(group->iter_start), 1, MPI_INT, ROOT, group->parents); //FIXME Quitar -- Que tenga en cuenta Pthread y async //MPI_Bcast(&(group->iter_start), 1, MPI_INT, ROOT, group->parents); //FIXME Quitar -- Que tenga en cuenta Pthread y async
if(config_file->sdr) { // Recibir datos sincronos if(config_file->sdr) { // Recibir datos sincronos
recv_sync(&(group->sync_array), config_file->sdr, group->myId, group->numP, ROOT, group->parents, numP_parents); recv_sync(&(group->sync_array), config_file->sdr, group->myId, group->numP, ROOT, group->parents, numP_parents);
results->sync_time[group->grp] = MPI_Wtime(); results->sync_time[group->grp] = MPI_Wtime();
......
...@@ -81,7 +81,13 @@ int init_slurm_comm(char **argv, int myId, int numP, int root, int type_dist, in ...@@ -81,7 +81,13 @@ int init_slurm_comm(char **argv, int myId, int numP, int root, int type_dist, in
slurm_data->info = MPI_INFO_NULL; slurm_data->info = MPI_INFO_NULL;
} }
create_processes(myId, root, child, comm); create_processes(myId, root, child, comm);
if(myId == root && slurm_data->info != MPI_INFO_NULL) {
MPI_Info_free(&(slurm_data->info));
}
free(slurm_data->cmd);
free(slurm_data); free(slurm_data);
commSlurm = COMM_FINISHED; commSlurm = COMM_FINISHED;
} else if(type_creation == COMM_SPAWN_PTHREAD) { } else if(type_creation == COMM_SPAWN_PTHREAD) {
...@@ -151,7 +157,7 @@ int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child) { // TODO Bo ...@@ -151,7 +157,7 @@ int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child) { // TODO Bo
*child = *returned_comm; *child = *returned_comm;
if(myId == root) { if(myId == root && slurm_data->info != MPI_INFO_NULL) {
MPI_Info_free(&(slurm_data->info)); MPI_Info_free(&(slurm_data->info));
} }
free(slurm_data->cmd); free(slurm_data->cmd);
...@@ -218,8 +224,8 @@ void processes_dist(char *argv[], int numP_childs, int type) { ...@@ -218,8 +224,8 @@ void processes_dist(char *argv[], int numP_childs, int type) {
node_dist(last_record, type, numP_childs, &procs_array, &used_nodes); node_dist(last_record, type, numP_childs, &procs_array, &used_nodes);
slurm_data->qty_procs = numP_childs; slurm_data->qty_procs = numP_childs;
/*
// CREATE/UPDATE HOSTFILE // CREATE/UPDATE HOSTFILE
ptr = create_hostfile(tmp, &hostfile); ptr = create_hostfile(tmp, &hostfile);
MPI_Info_create(&(slurm_data->info)); MPI_Info_create(&(slurm_data->info));
MPI_Info_set(slurm_data->info, "hostfile", hostfile); MPI_Info_set(slurm_data->info, "hostfile", hostfile);
...@@ -228,14 +234,15 @@ void processes_dist(char *argv[], int numP_childs, int type) { ...@@ -228,14 +234,15 @@ void processes_dist(char *argv[], int numP_childs, int type) {
// SET NEW DISTRIBUTION // SET NEW DISTRIBUTION
fill_hostfile(last_record, ptr, procs_array, used_nodes); fill_hostfile(last_record, ptr, procs_array, used_nodes);
close(ptr); close(ptr);
*/
// TEST // TEST
/*
fill_str_hostfile(last_record, procs_array, used_nodes, &hostfile); fill_str_hostfile(last_record, procs_array, used_nodes, &hostfile);
MPI_Info_create(&(slurm_data->info)); MPI_Info_create(&(slurm_data->info));
MPI_Info_set(slurm_data->info, "hosts", hostfile); MPI_Info_set(slurm_data->info, "hosts", hostfile);
*/ free(hostfile);
// Free JOB INFO // Free JOB INFO
slurm_free_job_info_msg(j_info); slurm_free_job_info_msg(j_info);
...@@ -397,6 +404,7 @@ void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, ch ...@@ -397,6 +404,7 @@ void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, ch
free(host); free(host);
} }
slurm_hostlist_destroy(hostlist); slurm_hostlist_destroy(hostlist);
} }
int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name) { int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name) {
...@@ -407,26 +415,27 @@ int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name) { ...@@ -407,26 +415,27 @@ int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name) {
len = qty * (len_node + 1); len = qty * (len_node + 1);
if(len_og == 0) { // Memoria no reservada if(len_og == 0) { // Memoria no reservada
*hostfile_str = (char *) malloc(len * sizeof(char)); *hostfile_str = (char *) malloc(len * sizeof(char) - (1 * sizeof(char)));
} else { // Cadena ya tiene datos } else { // Cadena ya tiene datos
*hostfile_str = (char *) realloc(*hostfile_str, (len_og + len) * sizeof(char)); *hostfile_str = (char *) realloc(*hostfile_str, (len_og + len) * sizeof(char) - (1 * sizeof(char)));
} }
if(hostfile_str == NULL) return -1; // No ha sido posible alojar la memoria if(hostfile_str == NULL) return -1; // No ha sido posible alojar la memoria
ocurrence = (char *) malloc((len_node+1) * sizeof(char)); ocurrence = (char *) malloc((len_node+1) * sizeof(char));
if(ocurrence == NULL) return -1; // No ha sido posible alojar la memoria if(ocurrence == NULL) return -1; // No ha sido posible alojar la memoria
err = sprintf(ocurrence, "%s,", node_name); err = sprintf(ocurrence, ",%s", node_name);
if(err < 0) return -2; // No ha sido posible escribir sobre la variable auxiliar if(err < 0) return -2; // No ha sido posible escribir sobre la variable auxiliar
i=0; i=0;
if(len_og == 0) { if(len_og == 0) { // Si se inicializa, la primera es una copia
i++; i++;
strcpy(*hostfile_str, ocurrence); strcpy(*hostfile_str, node_name);
} }
for(; i<qty; i++){ for(; i<qty; i++){ // Las siguientes se realizan con concatenan
strcat(*hostfile_str, ocurrence); strcat(*hostfile_str, ocurrence);
} }
free(ocurrence); free(ocurrence);
return len; return len+len_og;
} }
...@@ -24,7 +24,7 @@ for ((i=0; i<qty; i++)) ...@@ -24,7 +24,7 @@ for ((i=0; i<qty; i++))
do do
echo "Iter $i" echo "Iter $i"
numP=$(bash $dir$codeDir/recordMachinefile.sh $1) numP=$(bash $dir$codeDir/recordMachinefile.sh $1)
mpirun -f hostfile.o$SLURM_JOB_ID -np $numP $dir$codeDir/bench.out $1 $2 mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/bench.out $1 $2
rm hostfile.o$SLURM_JOB_ID rm hostfile.o$SLURM_JOB_ID
done done
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment