Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Iker Martín Álvarez
Proteo
Commits
142780d4
Commit
142780d4
authored
Jan 18, 2024
by
iker_martin
Browse files
Reduced amount of arguments needed in MaM. Reduced tasks performed by users of MaM.
parent
65641141
Changes
3
Hide whitespace changes
Inline
Side-by-side
Codes/Main/Main.c
View file @
142780d4
...
...
@@ -66,7 +66,7 @@ int main(int argc, char *argv[]) {
}
init_group_struct
(
argv
,
argc
,
myId
,
numP
);
im_child
=
MAM_Init
(
myId
,
numP
,
ROOT
,
comm
,
argv
[
0
],
nodelist
,
num_cpus
,
num_nodes
);
im_child
=
MAM_Init
(
ROOT
,
comm
,
argv
[
0
],
nodelist
,
num_cpus
,
num_nodes
);
if
(
!
im_child
)
{
//TODO REFACTOR Simplificar inicio
init_application
();
...
...
@@ -82,8 +82,6 @@ int main(int argc, char *argv[]) {
// EMPIEZA LA EJECUCION-------------------------------
//
do
{
MPI_Comm_size
(
comm
,
&
(
group
->
numP
));
MPI_Comm_rank
(
comm
,
&
(
group
->
myId
));
if
(
group
->
grp
!=
0
)
{
obtain_op_times
(
0
);
//Obtener los nuevos valores de tiempo para el computo
...
...
@@ -107,15 +105,11 @@ int main(int argc, char *argv[]) {
if
(
res
==
1
)
{
// Se ha llegado al final de la aplicacion
MPI_Barrier
(
comm
);
results
->
exec_time
=
MPI_Wtime
()
-
results
->
exec_start
-
results
->
wasted_time
;
print_local_results
();
}
print_local_results
();
reset_results_index
(
results
);
if
(
res
!=
1
)
{
if
(
comm
!=
MPI_COMM_WORLD
)
MPI_Comm_free
(
&
comm
);
comm
=
new_comm
;
}
group
->
grp
=
group
->
grp
+
1
;
}
while
(
config_file
->
n_groups
>
group
->
grp
&&
config_file
->
groups
[
group
->
grp
].
sm
==
MALL_SPAWN_MERGE
);
...
...
@@ -154,7 +148,7 @@ int main(int argc, char *argv[]) {
* de procesos. En caso contrario se devuelve 0.
*/
int
work
()
{
int
iter
,
maxiter
,
state
,
res
,
commited
,
targets_qty
;
int
iter
,
maxiter
,
state
,
res
,
commited
;
int
wait_completed
=
MAM_CHECK_COMPLETION
;
maxiter
=
config_file
->
groups
[
group
->
grp
].
iters
;
...
...
@@ -182,11 +176,13 @@ int work() {
compute_results_iter
(
results
,
group
->
myId
,
group
->
numP
,
ROOT
,
config_file
->
n_stages
,
config_file
->
capture_method
,
comm
);
if
(
config_file
->
n_groups
==
group
->
grp
+
1
)
{
res
=
1
;
}
else
{
MAM_Get_comm
(
&
new_comm
,
&
targets_qty
);
MAM_Get_comm
(
&
new_comm
);
send_config_file
(
config_file
,
ROOT
,
new_comm
);
results_comm
(
results
,
ROOT
,
config_file
->
n_resizes
,
new_comm
);
MPI_Comm_free
(
&
new_comm
);
MAM_Commit
(
&
commited
,
&
new_comm
);
print_local_results
();
MAM_Commit
(
&
commited
,
&
comm
);
MPI_Comm_size
(
comm
,
&
(
group
->
numP
));
MPI_Comm_rank
(
comm
,
&
(
group
->
myId
));
}
if
(
state
==
MAM_ZOMBIE
)
res
=
state
;
return
res
;
...
...
@@ -220,7 +216,7 @@ double iterate(int async_comm) {
// Se esta realizando una redistribucion de datos asincrona
if
(
async_comm
==
MAM_PENDING
)
{
// TODO Que diferencie entre ambas en el IO
// TODO Que diferencie entre ambas en el IO
results
->
iters_async
+=
1
;
}
...
...
@@ -315,7 +311,6 @@ int print_local_results() {
int
ptr_local
,
ptr_out
,
err
;
char
*
file_name
;
//compute_results_iter(results, group->myId, group->numP, ROOT, config_file->n_stages, config_file->capture_method, comm);
if
(
group
->
myId
==
ROOT
)
{
ptr_out
=
dup
(
1
);
...
...
@@ -550,11 +545,11 @@ void init_originals() {
}
void
init_targets
()
{
int
commited
,
targets_qty
;
int
commited
;
size_t
i
,
entries
;
void
*
value
=
NULL
;
MAM_Get_comm
(
&
new_comm
,
&
targets_qty
);
MAM_Get_comm
(
&
new_comm
);
malleability_get_data
(
&
value
,
0
,
1
,
0
);
group
->
grp
=
*
((
int
*
)
value
);
...
...
@@ -564,10 +559,11 @@ void init_targets() {
results
=
malloc
(
sizeof
(
results_data
));
init_results_data
(
results
,
config_file
->
n_resizes
,
config_file
->
n_stages
,
config_file
->
groups
[
group
->
grp
].
iters
);
results_comm
(
results
,
ROOT
,
config_file
->
n_resizes
,
new_comm
);
MPI_Comm_free
(
&
new_comm
);
MAM_Commit
(
&
commited
,
&
comm
);
MPI_Comm_size
(
comm
,
&
(
group
->
numP
));
MPI_Comm_rank
(
comm
,
&
(
group
->
myId
));
// TODO Refactor - Que sea una unica funcion
// Obtiene las variables que van a utilizar los hijos
...
...
Codes/malleability/malleabilityManager.c
View file @
142780d4
...
...
@@ -47,7 +47,7 @@ malleability_data_t *dist_a_data;
* la comunicacion los procesos hijo estan preparados para ejecutar la
* aplicacion.
*/
int
MAM_Init
(
int
myId
,
int
numP
,
int
root
,
MPI_Comm
comm
,
char
*
name_exec
,
char
*
nodelist
,
int
num_cpus
,
int
num_nodes
)
{
int
MAM_Init
(
int
root
,
MPI_Comm
comm
,
char
*
name_exec
,
char
*
nodelist
,
int
num_cpus
,
int
num_nodes
)
{
MPI_Comm
dup_comm
,
thread_comm
;
#if USE_MAL_DEBUG
...
...
@@ -67,8 +67,8 @@ int MAM_Init(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char
MPI_Comm_set_name
(
dup_comm
,
"MPI_COMM_MAM"
);
MPI_Comm_set_name
(
thread_comm
,
"MPI_COMM_MAM_THREAD"
);
mall
->
myId
=
myId
;
mall
->
numP
=
numP
;
MPI_Comm_rank
(
comm
,
&
(
mall
->
myId
))
;
MPI_Comm_size
(
comm
,
&
(
mall
->
numP
))
;
mall
->
root
=
root
;
mall
->
root_parents
=
-
1
;
mall
->
comm
=
dup_comm
;
...
...
@@ -136,8 +136,8 @@ void MAM_Finalize() {
MAM_Free_main_datatype
();
free_malleability_times
();
if
(
mall
->
comm
!=
MPI_COMM_WORLD
)
MPI_Comm_free
(
&
(
mall
->
comm
));
if
(
mall
->
thread_comm
!=
MPI_COMM_WORLD
)
MPI_Comm_free
(
&
(
mall
->
thread_comm
));
if
(
mall
->
comm
!=
MPI_COMM_WORLD
&&
mall
->
comm
!=
MPI_COMM_NULL
)
MPI_Comm_free
(
&
(
mall
->
comm
));
if
(
mall
->
thread_comm
!=
MPI_COMM_WORLD
&&
mall
->
thread_comm
!=
MPI_COMM_NULL
)
MPI_Comm_free
(
&
(
mall
->
thread_comm
));
free
(
mall
);
free
(
mall_conf
);
...
...
@@ -265,31 +265,26 @@ int MAM_Checkpoint(int *mam_state, int wait_completed) {
/*
* Returns an intracommunicator to allow users to perform their
* own redistributions. The user must free this communicator
* when is not longer needed.
* own redistributions. The user may free this communicator
* when is not longer needed. It will be freed by MaM when
* commiting the reconfiguration.
*
* This is a blocking function, must be called by all processes involved in the
* reconfiguration.
* TODO Hacer en otro sitio la creacion del comunicador y borrar en commit.
*
* The communicator is only returned if the state of reconfiguration
* is completed (MALL_COMPLETED / MAM_COMPLETED). Otherwise MALL_DENIED is obtained.
*/
int
MAM_Get_comm
(
MPI_Comm
*
comm
,
int
*
targets_qty
)
{
int
MAM_Get_comm
(
MPI_Comm
*
comm
)
{
if
(
!
(
state
==
MALL_COMPLETED
||
state
==
MALL_ZOMBIE
))
{
return
MALL_DENIED
;
}
MPI_Comm_dup
(
mall
->
user_comm
,
comm
);
MPI_Comm_set_name
(
*
comm
,
"MPI_MAM_DUP"
);
*
targets_qty
=
mall
->
numC
;
*
comm
=
mall
->
user_comm
;
return
0
;
}
/*
* TODO
*/
void
MAM_Commit
(
int
*
mam_state
,
MPI_Comm
*
new
_comm
)
{
void
MAM_Commit
(
int
*
mam_state
,
MPI_Comm
*
updated
_comm
)
{
if
(
!
(
state
==
MALL_COMPLETED
||
state
==
MALL_ZOMBIE
))
{
*
mam_state
=
MALL_DENIED
;
return
;
...
...
@@ -307,13 +302,25 @@ void MAM_Commit(int *mam_state, MPI_Comm *new_comm) {
zombies_collect_suspended
(
mall
->
comm
,
mall
->
myId
,
mall
->
numP
,
mall
->
numC
,
mall
->
root
);
}
}
// Reset/Free unneded communicators
if
(
mall
->
user_comm
!=
MPI_COMM_WORLD
)
MPI_Comm_free
(
&
(
mall
->
user_comm
));
if
(
*
updated_comm
!=
MPI_COMM_WORLD
&&
*
updated_comm
!=
MPI_COMM_NULL
)
MPI_Comm_free
(
updated_comm
);
if
(
mall
->
user_comm
!=
MPI_COMM_WORLD
&&
mall
->
user_comm
!=
MPI_COMM_NULL
)
MPI_Comm_free
(
&
(
mall
->
user_comm
));
if
(
mall_conf
->
spawn_method
==
MALL_SPAWN_MERGE
)
{
malleability_comms_update
(
mall
->
intercomm
);
}
if
(
mall
->
intercomm
!=
MPI_COMM_NULL
&&
mall
->
intercomm
!=
MPI_COMM_WORLD
)
{
MPI_Comm_disconnect
(
&
(
mall
->
intercomm
));
//FIXME Error en OpenMPI + Merge
}
// Zombies KILL
if
(
state
==
MALL_ZOMBIE
)
{
MAM_Finalize
();
MPI_Finalize
();
#if USE_MAL_DEBUG
DEBUG_FUNC
(
"Is terminating as zombie"
,
mall
->
myId
,
mall
->
numP
);
fflush
(
stdout
);
#endif
exit
(
0
);
}
MPI_Comm_rank
(
mall
->
comm
,
&
(
mall
->
myId
));
MPI_Comm_size
(
mall
->
comm
,
&
(
mall
->
numP
));
mall
->
root
=
mall
->
root_parents
==
-
1
?
mall
->
root
:
mall
->
root_parents
;
...
...
@@ -322,8 +329,8 @@ void MAM_Commit(int *mam_state, MPI_Comm *new_comm) {
*
mam_state
=
MAM_COMMITED
;
// Set new communicator
if
(
mall_conf
->
spawn_method
==
MALL_SPAWN_BASELINE
)
{
*
new
_comm
=
MPI_COMM_WORLD
;
}
else
if
(
mall_conf
->
spawn_method
==
MALL_SPAWN_MERGE
)
{
MPI_Comm_dup
(
mall
->
comm
,
new
_comm
);
}
if
(
mall_conf
->
spawn_method
==
MALL_SPAWN_BASELINE
)
{
*
updated
_comm
=
MPI_COMM_WORLD
;
}
else
if
(
mall_conf
->
spawn_method
==
MALL_SPAWN_MERGE
)
{
MPI_Comm_dup
(
mall
->
comm
,
updated
_comm
);
}
#if USE_MAL_DEBUG
if
(
mall
->
myId
==
mall
->
root
)
DEBUG_FUNC
(
"Reconfiguration has been commited"
,
mall
->
myId
,
mall
->
numP
);
fflush
(
stdout
);
MPI_Barrier
(
MPI_COMM_WORLD
);
#endif
...
...
Codes/malleability/malleabilityManager.h
View file @
142780d4
...
...
@@ -9,11 +9,11 @@
#include <mpi.h>
#include "malleabilityStates.h"
int
MAM_Init
(
int
myId
,
int
numP
,
int
root
,
MPI_Comm
comm
,
char
*
name_exec
,
char
*
nodelist
,
int
num_cpus
,
int
num_nodes
);
int
MAM_Init
(
int
root
,
MPI_Comm
comm
,
char
*
name_exec
,
char
*
nodelist
,
int
num_cpus
,
int
num_nodes
);
void
MAM_Finalize
();
int
MAM_Checkpoint
(
int
*
mam_state
,
int
wait_completed
);
int
MAM_Get_comm
(
MPI_Comm
*
comm
,
int
*
targets_qty
);
void
MAM_Commit
(
int
*
mam_state
,
MPI_Comm
*
new
_comm
);
int
MAM_Get_comm
(
MPI_Comm
*
comm
);
void
MAM_Commit
(
int
*
mam_state
,
MPI_Comm
*
updated
_comm
);
void
MAM_Set_configuration
(
int
spawn_method
,
int
spawn_strategies
,
int
spawn_dist
,
int
red_method
,
int
red_strategies
);
void
MAM_Set_target_number
(
int
numC
);
// TODO TO BE DEPRECATED
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment