Commit 69193a34 authored by iker_martin's avatar iker_martin
Browse files

Change order of MaM execution. Deleted unneded codes and minor fixes

parent d75645d6
......@@ -23,6 +23,7 @@ void init_group_struct(char *argv[], int argc, int myId, int numP);
void init_application();
void obtain_op_times();
void free_application_data();
void free_zombie_process();
void print_general_info(int myId, int grp, int numP);
int print_local_results();
......@@ -32,6 +33,7 @@ int create_out_file(char *nombre, int *ptr, int newstdout);
void init_originals();
void init_targets();
void update_targets();
void user_redistribution(void *args);
configuration *config_file;
......@@ -44,6 +46,7 @@ int main(int argc, char *argv[]) {
int numP, myId, res;
int req;
int im_child;
int abort_needed = 0;
int num_cpus, num_nodes;
char *nodelist = NULL;
......@@ -69,7 +72,9 @@ int main(int argc, char *argv[]) {
init_group_struct(argv, argc, myId, numP);
im_child = MAM_Init(ROOT, &comm, argv[0], nodelist, num_cpus, num_nodes, user_redistribution, NULL);
if(!im_child) { //TODO REFACTOR Simplificar inicio
if(im_child) {
update_targets();
} else {
init_application();
init_originals();
......@@ -95,8 +100,8 @@ int main(int argc, char *argv[]) {
MAM_Set_target_number(config_file->groups[group->grp+1].procs); // TODO TO BE DEPRECATED
if(group->grp != 0) {
malleability_modify_data(&(group->grp), 0, 1, MPI_INT, 1, 0);
malleability_modify_data(&(group->iter_start), 2, 1, MPI_INT, 1, 0);
malleability_modify_data(&(group->grp), 0, 1, MPI_INT, 1, 1);
malleability_modify_data(&(group->iter_start), 0, 1, MPI_INT, 1, 0);
}
}
......@@ -124,11 +129,12 @@ int main(int argc, char *argv[]) {
MPI_Comm_free(&comm);
}
if(group->myId == ROOT && config_file->groups[group->grp].sm == MALL_SPAWN_MERGE) {
MPI_Abort(MPI_COMM_WORLD, -100);
if(group->myId == ROOT && config_file->groups[group->grp-1].sm == MALL_SPAWN_MERGE) {
abort_needed = 1;
}
free_application_data();
if(abort_needed) { MPI_Abort(MPI_COMM_WORLD, -100); }
MPI_Finalize();
return 0;
}
......@@ -154,8 +160,8 @@ int work() {
maxiter = config_file->groups[group->grp].iters;
state = MAM_NOT_STARTED;
res = 0;
for(iter=group->iter_start; iter < maxiter; iter++) {
iterate(state);
}
......@@ -174,7 +180,6 @@ int work() {
}
if(config_file->n_groups == group->grp + 1) { res=1; }
if(state == MAM_ZOMBIE) res=state;
return res;
}
......@@ -206,7 +211,7 @@ double iterate(int async_comm) {
// Se esta realizando una redistribucion de datos asincrona
if(async_comm == MAM_PENDING) {
// TODO Que diferencie entre ambas en el IO
// TODO Que diferencie entre tipo de partes asincronas?
results->iters_async += 1;
}
......@@ -470,7 +475,14 @@ void free_application_data() {
group->async_array = NULL;
}
MAM_Finalize();
free_zombie_process();
}
/*
* Libera la memoria asociada a un proceso Zombie
*/
void free_zombie_process() {
free_results_data(results, config_file->n_stages);
free(results);
......@@ -519,8 +531,8 @@ void init_originals() {
config_file->groups[group->grp+1].phy_dist, config_file->groups[group->grp+1].rm, config_file->groups[group->grp+1].rs);
MAM_Set_target_number(config_file->groups[group->grp+1].procs);
malleability_add_data(&(group->grp), 1, MPI_INT, 1, 0);
malleability_add_data(&run_id, 1, MPI_INT, 1, 0);
malleability_add_data(&(group->grp), 1, MPI_INT, 1, 1);
malleability_add_data(&run_id, 1, MPI_INT, 1, 1);
malleability_add_data(&(group->iter_start), 1, MPI_INT, 1, 0);
if(config_file->sdr) {
......@@ -540,7 +552,7 @@ void init_targets() {
size_t i, entries;
void *value = NULL;
malleability_get_data(&value, 0, 1, 0);
malleability_get_data(&value, 0, 1, 1);
group->grp = *((int *)value);
group->grp = group->grp + 1;
......@@ -549,13 +561,28 @@ void init_targets() {
init_results_data(results, config_file->n_resizes, config_file->n_stages, config_file->groups[group->grp].iters);
results_comm(results, ROOT, config_file->n_resizes, new_comm);
// TODO Refactor - Que sea una unica funcion
// Obtiene las variables que van a utilizar los hijos
malleability_get_data(&value, 1, 1, 0);
malleability_get_data(&value, 1, 1, 1);
run_id = *((int *)value);
malleability_get_data(&value, 2, 1, 0);
if(config_file->adr) {
malleability_get_entries(&entries, 0, 1);
group->async_qty = (int *) malloc(entries * sizeof(int));
group->async_array = (char **) malloc(entries * sizeof(char *));
for(i=0; i<entries; i++) {
malleability_get_data(&value, i, 0, 1);
group->async_array[i] = (char *)value;
group->async_qty[i] = DR_MAX_SIZE;
}
group->async_qty[entries-1] = config_file->adr % DR_MAX_SIZE ? config_file->adr % DR_MAX_SIZE : DR_MAX_SIZE;
group->async_data_groups = entries;
}
}
void update_targets() { //FIXME Should not be needed after redist -- Declarar antes
size_t i, entries;
void *value = NULL;
malleability_get_data(&value, 0, 1, 0);
group->iter_start = *((int *)value);
if(config_file->sdr) {
......@@ -570,35 +597,25 @@ void init_targets() {
group->sync_qty[entries-1] = config_file->sdr % DR_MAX_SIZE ? config_file->sdr % DR_MAX_SIZE : DR_MAX_SIZE;
group->sync_data_groups = entries;
}
if(config_file->adr) {
malleability_get_entries(&entries, 0, 1);
group->async_qty = (int *) malloc(entries * sizeof(int));
group->async_array = (char **) malloc(entries * sizeof(char *));
for(i=0; i<entries; i++) {
malleability_get_data(&value, i, 0, 1);
group->async_array[i] = (char *)value;
group->async_qty[i] = DR_MAX_SIZE;
}
group->async_qty[entries-1] = config_file->adr % DR_MAX_SIZE ? config_file->adr % DR_MAX_SIZE : DR_MAX_SIZE;
group->async_data_groups = entries;
}
}
void user_redistribution(void *args) {
int commited;
mam_user_reconf_t user_reconf;
MAM_Get_Reconf_Info(&user_reconf);
new_comm = user_reconf.comm;
if(user_reconf.rank_state == 1) { //FIXME Crear MAM_NEW_RANK?
if(user_reconf.rank_state == MAM_PROC_NEW_RANK) {
init_targets();
} else {
send_config_file(config_file, ROOT, new_comm);
results_comm(results, ROOT, config_file->n_resizes, new_comm);
print_local_results();
if(user_reconf.rank_state == MAM_PROC_ZOMBIE) {
free_zombie_process();
}
}
MAM_Commit(&commited);
MAM_Resume_redistribution(&commited);
}
......@@ -59,6 +59,7 @@ typedef struct {
typedef struct { //FIXME numC_spawned no se esta usando
int myId, numP, numC, numC_spawned, root, root_parents, zombie;
int is_intercomm;
pthread_t async_thread;
MPI_Comm comm, thread_comm;
MPI_Comm intercomm, tmp_comm;
......
This diff is collapsed.
......@@ -18,7 +18,8 @@ typedef struct {
int MAM_Init(int root, MPI_Comm *comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes, void (*user_function)(void *), void *user_args);
void MAM_Finalize();
int MAM_Checkpoint(int *mam_state, int wait_completed, void (*user_function)(void *), void *user_args);
void MAM_Commit(int *mam_state);
void MAM_Resume_redistribution(int *mam_state);
int MAM_Get_Reconf_Info(mam_user_reconf_t *reconf_info);
void MAM_Set_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int red_method, int red_strategies);
......
......@@ -6,10 +6,11 @@
//States
#define MALL_DENIED -1
enum mall_inner_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_ZOMBIE, MALL_SPAWN_PENDING, MALL_SPAWN_SINGLE_PENDING,
enum mall_inner_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_SPAWN_PENDING, MALL_SPAWN_SINGLE_PENDING,
MALL_SPAWN_SINGLE_COMPLETED, MALL_SPAWN_ADAPT_POSTPONE, MALL_SPAWN_COMPLETED, MALL_DIST_PENDING, MALL_DIST_COMPLETED,
MALL_SPAWN_ADAPT_PENDING, MALL_SPAWN_ADAPTED, MALL_COMPLETED, MALL_USER_PENDING};
enum mam_states{MAM_UNRESERVED, MAM_NOT_STARTED, MAM_ZOMBIE, MAM_PENDING, MAM_COMPLETED, MAM_USER_PENDING, MAM_COMMITED};
MALL_SPAWN_ADAPT_PENDING, MALL_USER_PENDING, MALL_USER_COMPLETED, MALL_SPAWN_ADAPTED, MALL_COMPLETED};
enum mam_states{MAM_UNRESERVED, MAM_NOT_STARTED, MAM_PENDING, MAM_USER_PENDING, MAM_COMPLETED};
enum mam_proc_states{MAM_PROC_CONTINUE, MAM_PROC_NEW_RANK, MAM_PROC_ZOMBIE};
enum mall_spawn_methods{MALL_SPAWN_BASELINE, MALL_SPAWN_MERGE};
#define MALL_SPAWN_PTHREAD 2
#define MALL_SPAWN_SINGLE 3
......
......@@ -31,7 +31,7 @@ void reset_malleability_times() {
void free_malleability_times() {
#if USE_MAL_DEBUG
DEBUG_FUNC("Freeing recording structure", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(mall->comm);
DEBUG_FUNC("Freeing recording structure", mall->myId, mall->numP); fflush(stdout);
#endif
if(mall_conf->times != NULL) {
if(mall_conf->times->times_type != MPI_DATATYPE_NULL) {
......@@ -41,7 +41,7 @@ void free_malleability_times() {
free(mall_conf->times);
}
#if USE_MAL_DEBUG
DEBUG_FUNC("Freed recording structure", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(mall->comm);
DEBUG_FUNC("Freed recording structure", mall->myId, mall->numP); fflush(stdout);
#endif
}
......
......@@ -16,30 +16,34 @@ int offset_pids, *pids = NULL;
void gestor_usr2() {}
void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int root) {
void zombies_collect_suspended(MPI_Comm comm) {
int pid = getpid();
int *pids_counts = malloc(numP * sizeof(int));
int *pids_displs = malloc(numP * sizeof(int));
int *pids_counts = malloc(mall->numP * sizeof(int));
int *pids_displs = malloc(mall->numP * sizeof(int));
int i, count=1;
if(myId < numC) {
#if USE_MAL_DEBUG > 2
if(mall->myId == mall->root){ DEBUG_FUNC("Collecting zombies", mall->myId, mall->numP); } fflush(stdout);
#endif
if(mall->myId < mall->numC) {
count = 0;
if(myId == root) {
for(i=0; i < numC; i++) {
if(mall->myId == mall->root) {
for(i=0; i < mall->numC; i++) {
pids_counts[i] = 0;
}
for(i=numC; i<numP; i++) {
for(i=mall->numC; i<mall->numP; i++) {
pids_counts[i] = 1;
pids_displs[i] = (i + offset_pids) - numC;
pids_displs[i] = (i - mall->numC) + offset_pids;
}
offset_pids += numP - numC;
offset_pids += mall->numP - mall->numC;
}
}
MPI_Gatherv(&pid, count, MPI_INT, pids, pids_counts, pids_displs, MPI_INT, root, comm);
MPI_Gatherv(&pid, count, MPI_INT, pids, pids_counts, pids_displs, MPI_INT, mall->root, comm);
free(pids_counts);
free(pids_displs);
if(myId >= numC) {
if(mall->myId >= mall->numC) {
zombies_suspend();
}
}
......
......@@ -8,8 +8,9 @@
#include <unistd.h>
#include <mpi.h>
#include <signal.h>
#include "malleabilityDataStructures.h"
void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int root);
void zombies_collect_suspended(MPI_Comm comm);
void zombies_service_init();
void zombies_service_free();
void zombies_awake();
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment