Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Iker Martín Álvarez
Proteo
Commits
550508a0
Commit
550508a0
authored
Jan 16, 2025
by
iker_martin
Browse files
Internode groups destruction is postponed as much as possible
parent
8bf3566f
Changes
5
Hide whitespace changes
Inline
Side-by-side
Codes/MaM/MAM_Configuration.c
View file @
550508a0
...
...
@@ -245,7 +245,9 @@ void MAM_Check_configuration() {
MPI_Allreduce
(
&
mall
->
internode_group
,
&
global_internodes
,
1
,
MPI_INT
,
MPI_MAX
,
mall
->
comm
);
if
((
MAM_Contains_strat
(
MAM_SPAWN_STRATEGIES
,
MAM_STRAT_SPAWN_MULTIPLE
,
NULL
)
||
MAM_Contains_strat
(
MAM_SPAWN_STRATEGIES
,
MAM_STRAT_SPAWN_PARALLEL
,
NULL
)
)
&&
global_internodes
)
{
// Remove internode MPI_COMM_WORLDs
&&
global_internodes
&&
mall
->
numC
<
mall
->
inter_numP
)
{
//DMR ADDITION
// Remove internode MPI_COMM_WORLDs
MAM_Set_key_configuration
(
MAM_SPAWN_METHOD
,
MAM_SPAWN_BASELINE
,
NULL
);
}
...
...
Codes/MaM/MAM_DataStructures.c
View file @
550508a0
...
...
@@ -9,7 +9,7 @@ int state = MAM_I_UNRESERVED;
* de MaM.
*/
void
MAM_Def_main_datatype
()
{
int
i
,
counts
=
1
3
;
int
i
,
counts
=
1
4
;
int
blocklengths
[
counts
];
MPI_Aint
displs
[
counts
];
MPI_Datatype
types
[
counts
];
...
...
@@ -35,9 +35,10 @@ void MAM_Def_main_datatype() {
MPI_Get_address
(
&
(
mall
->
num_parents
),
&
displs
[
7
]);
//TODO Add only when Single strat active?
MPI_Get_address
(
&
(
mall
->
numC
),
&
displs
[
8
]);
//TODO Add only when MultipleSpawn strat active?
MPI_Get_address
(
&
(
mall
->
gid
),
&
displs
[
9
]);
//TODO Add only when ParallelSpawn strat active?
MPI_Get_address
(
&
(
mall
->
num_cpus
),
&
displs
[
10
]);
MPI_Get_address
(
&
(
mall
->
num_nodes
),
&
displs
[
11
]);
MPI_Get_address
(
&
(
mall
->
nodelist_len
),
&
displs
[
12
]);
MPI_Get_address
(
&
(
mall
->
inter_numP
),
&
displs
[
10
]);
MPI_Get_address
(
&
(
mall
->
num_cpus
),
&
displs
[
11
]);
MPI_Get_address
(
&
(
mall
->
num_nodes
),
&
displs
[
12
]);
MPI_Get_address
(
&
(
mall
->
nodelist_len
),
&
displs
[
13
]);
MPI_Type_create_struct
(
counts
,
blocklengths
,
displs
,
types
,
&
mall
->
struct_type
);
MPI_Type_commit
(
&
mall
->
struct_type
);
...
...
Codes/MaM/MAM_DataStructures.h
View file @
550508a0
...
...
@@ -67,6 +67,7 @@ typedef struct {
char
*
name_exec
,
*
nodelist
;
int
num_cpus
,
num_nodes
,
nodelist_len
;
int
internode_group
;
int
inter_numP
;
//DMR ADDITION - If first group is internode, how many processes are
}
malleability_t
;
/* --- VARIABLES --- */
...
...
Codes/MaM/MAM_Manager.c
View file @
550508a0
...
...
@@ -126,6 +126,7 @@ int MAM_Init(int root, MPI_Comm *comm, char *name_exec, void (*user_function)(vo
//TODO Check potential improvement - If check_hosts does not use slurm, internode_group could be obtained there
MAM_check_hosts
();
mall
->
internode_group
=
MAM_Is_internode_group
();
mall
->
inter_numP
=
mall
->
internode_group
?
mall
->
numP
:
0
;
MAM_Set_initial_configuration
();
#if MAM_USE_BARRIERS && MAM_DEBUG
...
...
@@ -275,10 +276,7 @@ void MAM_Commit(int *mam_state) {
#endif
// Get times before commiting
if
(
mall_conf
->
spawn_method
==
MAM_SPAWN_BASELINE
)
{
// This communication is only needed when the root process will become a zombie
malleability_times_broadcast
(
mall
->
root_collectives
);
}
malleability_times_broadcast
(
mall
->
root_collectives
);
// Free unneded communicators
if
(
mall
->
tmp_comm
!=
MPI_COMM_WORLD
&&
mall
->
tmp_comm
!=
MPI_COMM_NULL
)
MPI_Comm_disconnect
(
&
(
mall
->
tmp_comm
));
...
...
Codes/MaM/MAM_RMS.c
View file @
550508a0
...
...
@@ -289,7 +289,8 @@ int MAM_I_slurm_getjob_hosts_info() {
last_record
=
j_info
->
job_array
[
j_info
->
record_count
-
1
];
mall
->
num_nodes
=
last_record
.
num_nodes
;
mall
->
num_cpus
=
last_record
.
num_cpus
/
last_record
.
num_nodes
;
//mall->num_cpus = last_record.num_cpus / last_record.num_nodes;
mall
->
num_cpus
=
20
;
//FIXME Previous line considers logical cpus, not physical
mall
->
nodelist_len
=
strlen
(
last_record
.
nodes
)
+
1
;
mall
->
nodelist
=
(
char
*
)
malloc
(
mall
->
nodelist_len
*
sizeof
(
char
));
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment