Commit 550508a0 authored by iker_martin's avatar iker_martin
Browse files

Internode groups destruction is postponed as much as possible

parent 8bf3566f
......@@ -245,7 +245,9 @@ void MAM_Check_configuration() {
MPI_Allreduce(&mall->internode_group, &global_internodes, 1, MPI_INT, MPI_MAX, mall->comm);
if((MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_MULTIPLE, NULL)
|| MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_PARALLEL, NULL) )
&& global_internodes) { // Remove internode MPI_COMM_WORLDs
&& global_internodes
&& mall->numC < mall->inter_numP) { //DMR ADDITION
// Remove internode MPI_COMM_WORLDs
MAM_Set_key_configuration(MAM_SPAWN_METHOD, MAM_SPAWN_BASELINE, NULL);
}
......
......@@ -9,7 +9,7 @@ int state = MAM_I_UNRESERVED;
* de MaM.
*/
void MAM_Def_main_datatype() {
int i, counts = 13;
int i, counts = 14;
int blocklengths[counts];
MPI_Aint displs[counts];
MPI_Datatype types[counts];
......@@ -35,9 +35,10 @@ void MAM_Def_main_datatype() {
MPI_Get_address(&(mall->num_parents), &displs[7]); //TODO Add only when Single strat active?
MPI_Get_address(&(mall->numC), &displs[8]); //TODO Add only when MultipleSpawn strat active?
MPI_Get_address(&(mall->gid), &displs[9]); //TODO Add only when ParallelSpawn strat active?
MPI_Get_address(&(mall->num_cpus), &displs[10]);
MPI_Get_address(&(mall->num_nodes), &displs[11]);
MPI_Get_address(&(mall->nodelist_len), &displs[12]);
MPI_Get_address(&(mall->inter_numP), &displs[10]);
MPI_Get_address(&(mall->num_cpus), &displs[11]);
MPI_Get_address(&(mall->num_nodes), &displs[12]);
MPI_Get_address(&(mall->nodelist_len), &displs[13]);
MPI_Type_create_struct(counts, blocklengths, displs, types, &mall->struct_type);
MPI_Type_commit(&mall->struct_type);
......
......@@ -67,6 +67,7 @@ typedef struct {
char *name_exec, *nodelist;
int num_cpus, num_nodes, nodelist_len;
int internode_group;
int inter_numP; //DMR ADDITION - If first group is internode, how many processes are
} malleability_t;
/* --- VARIABLES --- */
......
......@@ -126,6 +126,7 @@ int MAM_Init(int root, MPI_Comm *comm, char *name_exec, void (*user_function)(vo
//TODO Check potential improvement - If check_hosts does not use slurm, internode_group could be obtained there
MAM_check_hosts();
mall->internode_group = MAM_Is_internode_group();
mall->inter_numP = mall->internode_group ? mall->numP : 0;
MAM_Set_initial_configuration();
#if MAM_USE_BARRIERS && MAM_DEBUG
......@@ -275,10 +276,7 @@ void MAM_Commit(int *mam_state) {
#endif
// Get times before commiting
if(mall_conf->spawn_method == MAM_SPAWN_BASELINE) {
// This communication is only needed when the root process will become a zombie
malleability_times_broadcast(mall->root_collectives);
}
malleability_times_broadcast(mall->root_collectives);
// Free unneded communicators
if(mall->tmp_comm != MPI_COMM_WORLD && mall->tmp_comm != MPI_COMM_NULL) MPI_Comm_disconnect(&(mall->tmp_comm));
......
......@@ -289,7 +289,8 @@ int MAM_I_slurm_getjob_hosts_info() {
last_record = j_info->job_array[j_info->record_count - 1];
mall->num_nodes = last_record.num_nodes;
mall->num_cpus = last_record.num_cpus / last_record.num_nodes;
//mall->num_cpus = last_record.num_cpus / last_record.num_nodes;
mall->num_cpus = 20; //FIXME Previous line considers logical cpus, not physical
mall->nodelist_len = strlen(last_record.nodes)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment