Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Iker Martín Álvarez
Proteo
Commits
3b51a4e4
Commit
3b51a4e4
authored
Jan 05, 2024
by
iker_martin
Browse files
Added macros to control Debug, barriers and slurm usage
parent
f5fe619d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Codes/Makefile
View file @
3b51a4e4
...
...
@@ -3,7 +3,15 @@ MCC = mpicc
#C_FLAGS_ALL = -Wconversion -Wpedantic
C_FLAGS
=
-Wall
-Wextra
-Wshadow
-Wfatal-errors
LD_FLAGS
=
-lm
-pthread
DEF
=
USE_MAL_SLURM
?=
0
USE_MAL_BARRIERS
?=
0
USE_MAL_DEBUG
?=
0
ifeq
($(USE_MAL_SLURM),1)
LD_FLAGS
+=
-lslurm
endif
DEF
=
-DUSE_MAL_SLURM
=
$(USE_MAL_SLURM)
-DUSE_MAL_BARRIERS
=
$(USE_MAL_BARRIERS)
-DUSE_MAL_DEBUG
=
$(USE_MAL_DEBUG)
.PHONY
:
clean clear install install_slurm
...
...
@@ -61,8 +69,3 @@ clear:
install
:
$(BIN) $(CONFIG)
echo
"Done"
# Builds target with slurm
install_slurm
:
LD_FLAGS += -lslurm
install_slurm
:
DEF += -DUSE_SLURM
install_slurm
:
install
Codes/malleability/malleabilityManager.c
View file @
3b51a4e4
...
...
@@ -96,6 +96,11 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
return
MALLEABILITY_CHILDREN
;
}
#if USE_MAL_BARRIERS && USE_MAL_DEBUG
if
(
mall
->
myId
==
mall
->
root
)
printf
(
"MaM: Using barriers to record times.
\n
"
);
#endif
if
(
nodelist
!=
NULL
)
{
//TODO To be deprecated by using Slurm or else statement
mall
->
nodelist_len
=
strlen
(
nodelist
);
}
else
{
// If no nodelist is detected, get it from the actual run
...
...
@@ -160,7 +165,10 @@ int malleability_checkpoint() {
case
MALL_NOT_STARTED
:
reset_malleability_times
();
// Comprobar si se tiene que realizar un redimensionado
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
malleability_start
=
MPI_Wtime
();
//if(CHECK_RMS()) {return MALL_DENIED;}
...
...
@@ -175,7 +183,9 @@ int malleability_checkpoint() {
case
MALL_SPAWN_SINGLE_PENDING
:
state
=
check_spawn_state
(
&
(
mall
->
intercomm
),
mall
->
comm
,
&
end_real_time
);
if
(
state
==
MALL_SPAWN_COMPLETED
||
state
==
MALL_SPAWN_ADAPTED
)
{
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
spawn_time
=
MPI_Wtime
()
-
mall_conf
->
times
->
malleability_start
;
malleability_checkpoint
();
...
...
@@ -200,13 +210,18 @@ int malleability_checkpoint() {
break
;
case
MALL_SPAWN_ADAPT_PENDING
:
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
spawn_start
=
MPI_Wtime
();
unset_spawn_postpone_flag
(
state
);
state
=
check_spawn_state
(
&
(
mall
->
intercomm
),
mall
->
comm
,
&
end_real_time
);
if
(
!
malleability_spawn_contains_strat
(
mall_conf
->
spawn_strategies
,
MALL_SPAWN_PTHREAD
,
NULL
))
{
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
spawn_time
=
MPI_Wtime
()
-
mall_conf
->
times
->
malleability_start
;
malleability_checkpoint
();
}
...
...
@@ -218,7 +233,9 @@ int malleability_checkpoint() {
break
;
case
MALL_DIST_COMPLETED
:
//TODO No es esto muy feo?
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
malleability_end
=
MPI_Wtime
();
state
=
MALL_COMPLETED
;
break
;
...
...
@@ -502,7 +519,9 @@ void Children_init() {
comm_data_info
(
rep_a_data
,
dist_a_data
,
MALLEABILITY_CHILDREN
,
mall
->
myId
,
root_parents
,
mall
->
intercomm
);
if
(
dist_a_data
->
entries
||
rep_a_data
->
entries
)
{
// Recibir datos asincronos
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
if
(
malleability_red_contains_strat
(
mall_conf
->
red_strategies
,
MALL_RED_THREAD
,
NULL
))
{
recv_data
(
numP_parents
,
dist_a_data
,
MALLEABILITY_USE_SYNCHRONOUS
);
...
...
@@ -517,13 +536,17 @@ void Children_init() {
}
}
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
mall_conf
->
times
->
async_end
=
MPI_Wtime
();
// Obtener timestamp de cuando termina comm asincrona
}
comm_data_info
(
rep_s_data
,
dist_s_data
,
MALLEABILITY_CHILDREN
,
mall
->
myId
,
root_parents
,
mall
->
intercomm
);
if
(
dist_s_data
->
entries
||
rep_s_data
->
entries
)
{
// Recibir datos sincronos
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
recv_data
(
numP_parents
,
dist_s_data
,
MALLEABILITY_USE_SYNCHRONOUS
);
// TODO Crear funcion especifica y anyadir para Asinc
...
...
@@ -537,7 +560,9 @@ void Children_init() {
}
MPI_Bcast
(
rep_s_data
->
arrays
[
i
],
rep_s_data
->
qty
[
i
],
datatype
,
root_parents
,
mall
->
intercomm
);
}
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
mall_conf
->
times
->
sync_end
=
MPI_Wtime
();
// Obtener timestamp de cuando termina comm sincrona
}
...
...
@@ -547,7 +572,9 @@ void Children_init() {
malleability_comms_update
(
mall
->
intercomm
);
}
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
malleability_end
=
MPI_Wtime
();
// Obtener timestamp de cuando termina maleabilidad
MPI_Comm_disconnect
(
&
(
mall
->
intercomm
));
//FIXME Error en OpenMPI + Merge
}
...
...
@@ -563,13 +590,17 @@ void Children_init() {
* Si se pide en segundo plano devuelve el estado actual.
*/
int
spawn_step
(){
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
spawn_start
=
MPI_Wtime
();
state
=
init_spawn
(
mall
->
name_exec
,
mall
->
num_cpus
,
mall
->
num_nodes
,
mall
->
nodelist
,
mall
->
myId
,
mall
->
numP
,
mall
->
numC
,
mall
->
root
,
mall_conf
->
spawn_dist
,
mall_conf
->
spawn_method
,
mall_conf
->
spawn_strategies
,
mall
->
thread_comm
,
&
(
mall
->
intercomm
));
if
(
!
malleability_spawn_contains_strat
(
mall_conf
->
spawn_strategies
,
MALL_SPAWN_PTHREAD
,
NULL
))
{
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
spawn_time
=
MPI_Wtime
()
-
mall_conf
->
times
->
malleability_start
;
}
return
state
;
...
...
@@ -616,7 +647,9 @@ int start_redistribution() {
comm_data_info
(
rep_a_data
,
dist_a_data
,
MALLEABILITY_NOT_CHILDREN
,
mall
->
myId
,
mall
->
root
,
mall
->
intercomm
);
if
(
dist_a_data
->
entries
||
rep_a_data
->
entries
)
{
// Enviar datos asincronos
//FIXME No se envian los datos replicados (rep_a_data)
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
mall_conf
->
times
->
async_start
=
MPI_Wtime
();
if
(
malleability_red_contains_strat
(
mall_conf
->
red_strategies
,
MALL_RED_THREAD
,
NULL
))
{
return
thread_creation
();
...
...
@@ -668,7 +701,9 @@ int check_redistribution() {
}
MPI_Comm_test_inter
(
mall
->
intercomm
,
&
is_intercomm
);
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
if
(
!
is_intercomm
)
mall_conf
->
times
->
async_end
=
MPI_Wtime
();
// Merge method only
return
end_redistribution
();
}
...
...
@@ -695,7 +730,9 @@ int end_redistribution() {
comm_data_info
(
rep_s_data
,
dist_s_data
,
MALLEABILITY_NOT_CHILDREN
,
mall
->
myId
,
mall
->
root
,
mall
->
intercomm
);
if
(
dist_s_data
->
entries
||
rep_s_data
->
entries
)
{
// Enviar datos sincronos
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
mall_conf
->
times
->
sync_start
=
MPI_Wtime
();
send_data
(
mall
->
numC
,
dist_s_data
,
MALLEABILITY_USE_SYNCHRONOUS
);
...
...
@@ -710,7 +747,9 @@ int end_redistribution() {
}
MPI_Bcast
(
rep_s_data
->
arrays
[
i
],
rep_s_data
->
qty
[
i
],
datatype
,
rootBcast
,
mall
->
intercomm
);
}
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
if
(
!
is_intercomm
)
mall_conf
->
times
->
sync_end
=
MPI_Wtime
();
// Merge method only
}
...
...
@@ -738,7 +777,9 @@ int end_redistribution() {
///=============================================
//TODO Add comment
int
shrink_redistribution
()
{
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
double
time_extra
=
MPI_Wtime
();
//TODO Create Commit function. Processes can perform tasks before that. Then call again Malleability to commit the change
...
...
@@ -758,7 +799,10 @@ int shrink_redistribution() {
MPI_Comm_free
(
&
(
mall
->
intercomm
));
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
comm
);
#endif
mall_conf
->
times
->
spawn_time
+=
MPI_Wtime
()
-
time_extra
;
return
MALL_DIST_COMPLETED
;
}
else
{
...
...
@@ -852,7 +896,10 @@ int thread_check() {
return
-
2
;
}
MPI_Comm_test_inter
(
mall
->
intercomm
,
&
is_intercomm
);
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier
(
mall
->
intercomm
);
#endif
if
(
!
is_intercomm
)
mall_conf
->
times
->
async_end
=
MPI_Wtime
();
// Merge method only
return
end_redistribution
();
}
...
...
Codes/malleability/spawn_methods/ProcessDist.c
View file @
3b51a4e4
...
...
@@ -7,8 +7,6 @@
#include <mpi.h>
#include "ProcessDist.h"
//#define USE_SLURM
//--------------PRIVATE DECLARATIONS---------------//
void
node_dist
(
struct
physical_dist
dist
,
int
**
qty
,
int
*
used_nodes
);
...
...
@@ -17,7 +15,7 @@ void compact_dist(struct physical_dist dist, int *used_nodes, int *procs);
void
generate_info_string
(
int
target_qty
,
MPI_Info
*
info
);
//--------------------------------SLURM USAGE-------------------------------------//
#if
def
USE_SLURM
#if USE_
MAL_
SLURM
#include <slurm/slurm.h>
void
generate_info_string_slurm
(
char
*
nodelist
,
int
*
procs_array
,
size_t
nodes
,
MPI_Info
*
info
);
void
fill_str_hosts_slurm
(
char
*
nodelist
,
int
*
qty
,
size_t
used_nodes
,
char
**
hostfile_str
);
...
...
@@ -77,7 +75,7 @@ int physical_struct_create(int target_qty, int already_created, int num_cpus, in
* a usar al crear los procesos.
*/
void
processes_dist
(
struct
physical_dist
dist
,
MPI_Info
*
info_spawn
)
{
#if
def
USE_SLURM
#if USE_
MAL_
SLURM
int
used_nodes
=
0
;
int
*
procs_array
;
// GET NEW DISTRIBUTION
...
...
@@ -230,7 +228,7 @@ void generate_info_string(int target_qty, MPI_Info *info){
}
//--------------------------------SLURM USAGE-------------------------------------//
#if
def
USE_SLURM
#if USE_
MAL_
SLURM
/*
* Crea y devuelve un objeto MPI_Info con un par hosts/mapping
* en el que se indica el mappeado a utilizar en los nuevos
...
...
@@ -314,7 +312,7 @@ int write_str_node(char **hostfile_str, size_t len_og, size_t qty, char *node_na
//====================================================
//--------------------------------SLURM USAGE-------------------------------------//
#if
def
USE_SLURM
#if USE_
MAL_
SLURM
/* FIXME Por revisar
* @deprecated
* Genera un fichero hostfile y lo anyade a un objeto
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment