Commit 142780d4 authored by iker_martin's avatar iker_martin
Browse files

Reduced amount of arguments needed in MaM. Reduced tasks performed by users of MaM.

parent 65641141
......@@ -66,7 +66,7 @@ int main(int argc, char *argv[]) {
}
init_group_struct(argv, argc, myId, numP);
im_child = MAM_Init(myId, numP, ROOT, comm, argv[0], nodelist, num_cpus, num_nodes);
im_child = MAM_Init(ROOT, comm, argv[0], nodelist, num_cpus, num_nodes);
if(!im_child) { //TODO REFACTOR Simplificar inicio
init_application();
......@@ -82,8 +82,6 @@ int main(int argc, char *argv[]) {
// EMPIEZA LA EJECUCION-------------------------------
//
do {
MPI_Comm_size(comm, &(group->numP));
MPI_Comm_rank(comm, &(group->myId));
if(group->grp != 0) {
obtain_op_times(0); //Obtener los nuevos valores de tiempo para el computo
......@@ -107,15 +105,11 @@ int main(int argc, char *argv[]) {
if(res==1) { // Se ha llegado al final de la aplicacion
MPI_Barrier(comm);
results->exec_time = MPI_Wtime() - results->exec_start - results->wasted_time;
print_local_results();
}
print_local_results();
reset_results_index(results);
if(res!=1) {
if(comm != MPI_COMM_WORLD) MPI_Comm_free(&comm);
comm = new_comm;
}
group->grp = group->grp + 1;
} while(config_file->n_groups > group->grp && config_file->groups[group->grp].sm == MALL_SPAWN_MERGE);
......@@ -154,7 +148,7 @@ int main(int argc, char *argv[]) {
* de procesos. En caso contrario se devuelve 0.
*/
int work() {
int iter, maxiter, state, res, commited, targets_qty;
int iter, maxiter, state, res, commited;
int wait_completed = MAM_CHECK_COMPLETION;
maxiter = config_file->groups[group->grp].iters;
......@@ -182,11 +176,13 @@ int work() {
compute_results_iter(results, group->myId, group->numP, ROOT, config_file->n_stages, config_file->capture_method, comm);
if(config_file->n_groups == group->grp + 1) { res=1; }
else {
MAM_Get_comm(&new_comm, &targets_qty);
MAM_Get_comm(&new_comm);
send_config_file(config_file, ROOT, new_comm);
results_comm(results, ROOT, config_file->n_resizes, new_comm);
MPI_Comm_free(&new_comm);
MAM_Commit(&commited, &new_comm);
print_local_results();
MAM_Commit(&commited, &comm);
MPI_Comm_size(comm, &(group->numP));
MPI_Comm_rank(comm, &(group->myId));
}
if(state == MAM_ZOMBIE) res=state;
return res;
......@@ -220,7 +216,7 @@ double iterate(int async_comm) {
// Se esta realizando una redistribucion de datos asincrona
if(async_comm == MAM_PENDING) {
// TODO Que diferencie entre ambas en el IO
// TODO Que diferencie entre ambas en el IO
results->iters_async += 1;
}
......@@ -315,7 +311,6 @@ int print_local_results() {
int ptr_local, ptr_out, err;
char *file_name;
//compute_results_iter(results, group->myId, group->numP, ROOT, config_file->n_stages, config_file->capture_method, comm);
if(group->myId == ROOT) {
ptr_out = dup(1);
......@@ -550,11 +545,11 @@ void init_originals() {
}
void init_targets() {
int commited, targets_qty;
int commited;
size_t i, entries;
void *value = NULL;
MAM_Get_comm(&new_comm, &targets_qty);
MAM_Get_comm(&new_comm);
malleability_get_data(&value, 0, 1, 0);
group->grp = *((int *)value);
......@@ -564,10 +559,11 @@ void init_targets() {
results = malloc(sizeof(results_data));
init_results_data(results, config_file->n_resizes, config_file->n_stages, config_file->groups[group->grp].iters);
results_comm(results, ROOT, config_file->n_resizes, new_comm);
MPI_Comm_free(&new_comm);
MAM_Commit(&commited, &comm);
MPI_Comm_size(comm, &(group->numP));
MPI_Comm_rank(comm, &(group->myId));
// TODO Refactor - Que sea una unica funcion
// Obtiene las variables que van a utilizar los hijos
......
......@@ -47,7 +47,7 @@ malleability_data_t *dist_a_data;
* la comunicacion los procesos hijo estan preparados para ejecutar la
* aplicacion.
*/
int MAM_Init(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes) {
int MAM_Init(int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes) {
MPI_Comm dup_comm, thread_comm;
#if USE_MAL_DEBUG
......@@ -67,8 +67,8 @@ int MAM_Init(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char
MPI_Comm_set_name(dup_comm, "MPI_COMM_MAM");
MPI_Comm_set_name(thread_comm, "MPI_COMM_MAM_THREAD");
mall->myId = myId;
mall->numP = numP;
MPI_Comm_rank(comm, &(mall->myId));
MPI_Comm_size(comm, &(mall->numP));
mall->root = root;
mall->root_parents = -1;
mall->comm = dup_comm;
......@@ -136,8 +136,8 @@ void MAM_Finalize() {
MAM_Free_main_datatype();
free_malleability_times();
if(mall->comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->comm));
if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm));
if(mall->comm != MPI_COMM_WORLD && mall->comm != MPI_COMM_NULL) MPI_Comm_free(&(mall->comm));
if(mall->thread_comm != MPI_COMM_WORLD && mall->thread_comm != MPI_COMM_NULL) MPI_Comm_free(&(mall->thread_comm));
free(mall);
free(mall_conf);
......@@ -265,31 +265,26 @@ int MAM_Checkpoint(int *mam_state, int wait_completed) {
/*
* Returns an intracommunicator to allow users to perform their
* own redistributions. The user must free this communicator
* when is not longer needed.
* own redistributions. The user may free this communicator
* when is not longer needed. It will be freed by MaM when
* commiting the reconfiguration.
*
* This is a blocking function, must be called by all processes involved in the
* reconfiguration.
* TODO Hacer en otro sitio la creacion del comunicador y borrar en commit.
*
* The communicator is only returned if the state of reconfiguration
* is completed (MALL_COMPLETED / MAM_COMPLETED). Otherwise MALL_DENIED is obtained.
*/
int MAM_Get_comm(MPI_Comm *comm, int *targets_qty) {
int MAM_Get_comm(MPI_Comm *comm) {
if(!(state == MALL_COMPLETED || state == MALL_ZOMBIE)) {
return MALL_DENIED;
}
MPI_Comm_dup(mall->user_comm, comm);
MPI_Comm_set_name(*comm, "MPI_MAM_DUP");
*targets_qty = mall->numC;
*comm = mall->user_comm;
return 0;
}
/*
* TODO
*/
void MAM_Commit(int *mam_state, MPI_Comm *new_comm) {
void MAM_Commit(int *mam_state, MPI_Comm *updated_comm) {
if(!(state == MALL_COMPLETED || state == MALL_ZOMBIE)) {
*mam_state = MALL_DENIED;
return;
......@@ -307,13 +302,25 @@ void MAM_Commit(int *mam_state, MPI_Comm *new_comm) {
zombies_collect_suspended(mall->comm, mall->myId, mall->numP, mall->numC, mall->root);
}
}
// Reset/Free unneded communicators
if(mall->user_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->user_comm));
if(*updated_comm != MPI_COMM_WORLD && *updated_comm != MPI_COMM_NULL) MPI_Comm_free(updated_comm);
if(mall->user_comm != MPI_COMM_WORLD && mall->user_comm != MPI_COMM_NULL) MPI_Comm_free(&(mall->user_comm));
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) { malleability_comms_update(mall->intercomm); }
if(mall->intercomm != MPI_COMM_NULL && mall->intercomm != MPI_COMM_WORLD) {
MPI_Comm_disconnect(&(mall->intercomm)); //FIXME Error en OpenMPI + Merge
}
// Zombies KILL
if(state == MALL_ZOMBIE) {
MAM_Finalize();
MPI_Finalize();
#if USE_MAL_DEBUG
DEBUG_FUNC("Is terminating as zombie", mall->myId, mall->numP); fflush(stdout);
#endif
exit(0);
}
MPI_Comm_rank(mall->comm, &(mall->myId));
MPI_Comm_size(mall->comm, &(mall->numP));
mall->root = mall->root_parents == -1 ? mall->root : mall->root_parents;
......@@ -322,8 +329,8 @@ void MAM_Commit(int *mam_state, MPI_Comm *new_comm) {
*mam_state = MAM_COMMITED;
// Set new communicator
if(mall_conf->spawn_method == MALL_SPAWN_BASELINE) { *new_comm = MPI_COMM_WORLD; }
else if(mall_conf->spawn_method == MALL_SPAWN_MERGE) { MPI_Comm_dup(mall->comm, new_comm); }
if(mall_conf->spawn_method == MALL_SPAWN_BASELINE) { *updated_comm = MPI_COMM_WORLD; }
else if(mall_conf->spawn_method == MALL_SPAWN_MERGE) { MPI_Comm_dup(mall->comm, updated_comm); }
#if USE_MAL_DEBUG
if(mall->myId == mall->root) DEBUG_FUNC("Reconfiguration has been commited", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
......
......@@ -9,11 +9,11 @@
#include <mpi.h>
#include "malleabilityStates.h"
int MAM_Init(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes);
int MAM_Init(int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes);
void MAM_Finalize();
int MAM_Checkpoint(int *mam_state, int wait_completed);
int MAM_Get_comm(MPI_Comm *comm, int *targets_qty);
void MAM_Commit(int *mam_state, MPI_Comm *new_comm);
int MAM_Get_comm(MPI_Comm *comm);
void MAM_Commit(int *mam_state, MPI_Comm *updated_comm);
void MAM_Set_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int red_method, int red_strategies);
void MAM_Set_target_number(int numC); // TODO TO BE DEPRECATED
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment