Commit 5cf7c034 authored by iker_martin's avatar iker_martin
Browse files

WIP. Arreglando merge shrink. Zombies no se tratan de orma correcta

parent 43ca7aca
......@@ -109,7 +109,7 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
if(mall->intercomm != MPI_COMM_NULL ) {
Children_init();
return MALLEABILITY_CHILDREN;
} else {printf("P%d/%d MI comm padres es nulo (%d)\n", mall->myId, mall->numP, MPI_COMM_NULL);}
}
zombies_service_init();
return MALLEABILITY_NOT_CHILDREN;
......@@ -161,34 +161,14 @@ void free_malleability() {
int malleability_checkpoint() {
double end_real_time;
char * test = malloc(MPI_MAX_OBJECT_NAME * sizeof(char));
int tester;
//printf("P%d -- Estado %d\n", mall->myId, state);
switch(state) {
case MALL_UNRESERVED:
break;
case MALL_NOT_STARTED:
// Comprobar si se tiene que realizar un redimensionado
//if(CHECK_RMS()) {return MALL_DENIED;}
MPI_Comm_get_name(mall->thread_comm, test, &tester);
printf("TEST 1 P%d Comm=%d name=%s\n", mall->myId, mall->thread_comm, test);
MPI_Comm_get_name(mall->comm, test, &tester);
printf("TEST 2 P%d Comm=%d name=%s\n", mall->myId, mall->comm, test);
if(mall->intercomm != MPI_COMM_NULL) {
MPI_Comm_get_name(mall->intercomm, test, &tester);
printf("TEST 3 P%d Comm=%d name=%s\n", mall->myId, mall->intercomm, test);
}
state = spawn_step();
MPI_Comm_get_name(mall->thread_comm, test, &tester);
printf("TEST 1 P%d Comm=%d name=%s\n", mall->myId, mall->thread_comm, test);
MPI_Comm_get_name(mall->comm, test, &tester);
printf("TEST 2 P%d Comm=%d name=%s\n", mall->myId, mall->comm, test);
if(mall->intercomm != MPI_COMM_NULL) {
MPI_Comm_get_name(mall->intercomm, test, &tester);
printf("TEST 3 P%d Comm=%d name=%s\n", mall->myId, mall->intercomm, test);
}
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPT_POSTPONE){
malleability_checkpoint();
......@@ -227,14 +207,14 @@ int malleability_checkpoint() {
mall_conf->results->spawn_start = MPI_Wtime();
unset_spawn_postpone_flag(state);
state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time);
printf("TEST END state=%d\n", state);
if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) {
mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start;
}
break;
case MALL_SPAWN_ADAPTED:
shrink_redistribution();
state = shrink_redistribution();
break;
case MALL_DIST_COMPLETED: //TODO No es esto muy feo?
......@@ -554,8 +534,7 @@ int start_redistribution() {
} else {
// Si no tiene comunicador creado, se debe a que se ha pospuesto el Spawn
// y se trata del spawn Merge Shrink
mall->intercomm = mall->comm;
if(mall->comm == MPI_COMM_NULL) { printf("COMM nulo?\n");}
MPI_Comm_dup(mall->comm, &(mall->intercomm));
}
if(is_intercomm) {
......@@ -696,15 +675,11 @@ int end_redistribution() {
/*FIXMENOW En algun momento P0 cambia tanto su comm como intercomm respecto al resto...*/
MPI_Barrier(mall->comm); //FIXMENOW Por alguna razon da error en Comm
if(mall->intercomm != MPI_COMM_NULL) {
if(mall->intercomm != MPI_COMM_NULL && mall->intercomm != MPI_COMM_WORLD) {
//FIXMENOW Intercomm se borra, pero no es COMM WORLD ni COMM NULL
//MPI_Comm_disconnect(&(mall->intercomm));
MPI_Comm_disconnect(&(mall->intercomm));
}
MPI_Barrier(mall->intercomm); //FIXMENOW Por alguna razon da error en Comm
printf("TEST 5 P%d Comm=%d intercomm=%d\n", mall->myId, mall->comm, mall->intercomm);
MPI_Barrier(mall->comm); //FIXMENOW Por alguna razon da error en Comm
return local_state;
}
......@@ -719,6 +694,7 @@ int shrink_redistribution() {
//TODO REFACTOR -- Que solo la llamada de collect iters este fuera de los hilos
zombies_collect_suspended(mall->comm, mall->myId, mall->numP, mall->numC, mall->root, (void *) mall_conf->results, mall->intercomm);
printf("HELLO THERE\n");
if(mall->myId < mall->numC) {
if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm));
if(mall->comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->comm));
......@@ -738,6 +714,7 @@ int shrink_redistribution() {
}
return MALL_DIST_COMPLETED;
} else {
printf("P%d is a zombie\n", mall->myId);
return MALL_ZOMBIE;
}
}
......
......@@ -90,12 +90,12 @@ int check_spawn_state(MPI_Comm *child, MPI_Comm comm, double *real_time) {
if(spawn_data->spawn_is_async) { // Async
local_state = get_spawn_state(spawn_data->spawn_is_async);
printf("Test 3.5 local=%d\n",local_state);
//printf("Test 3.5 local=%d\n",local_state);
if(local_state == MALL_SPAWN_SINGLE_PENDING || local_state == MALL_SPAWN_SINGLE_COMPLETED) { // Single
global_state = check_single_state(comm, local_state);
} else if(local_state == MALL_SPAWN_PENDING || local_state == MALL_SPAWN_COMPLETED) { // Baseline
} else if(local_state == MALL_SPAWN_PENDING || local_state == MALL_SPAWN_COMPLETED || local_state == MALL_SPAWN_ADAPTED) { // Baseline
global_state = check_generic_state(comm, child, local_state, real_time);
} else if(local_state == MALL_SPAWN_ADAPT_POSTPONE) {
......@@ -129,6 +129,7 @@ void unset_spawn_postpone_flag(int outside_state) {
int local_state = get_spawn_state(spawn_data->spawn_is_async);
if(local_state == MALL_SPAWN_ADAPT_POSTPONE && outside_state == MALL_SPAWN_ADAPT_PENDING && spawn_data->spawn_is_async) {
set_spawn_state(MALL_SPAWN_PENDING, MALL_SPAWN_PTHREAD);
wakeup();
}
}
......@@ -339,7 +340,7 @@ int allocate_thread_spawn() {
* se avisa al hilo maestro.
*/
void* thread_work(void* arg) {
int local_state, repeat = 0;
int local_state;
returned_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
generic_spawn(returned_comm, MALL_NOT_STARTED);
......@@ -347,11 +348,10 @@ void* thread_work(void* arg) {
local_state = get_spawn_state(MALL_SPAWN_PTHREAD);
if(local_state == MALL_SPAWN_ADAPT_POSTPONE) {
// El grupo de procesos se terminara de juntar tras la redistribucion de datos
repeat = 1;
local_state = wait_wakeup();
printf("Hilos despiertan\n");
generic_spawn(returned_comm, MALL_DIST_COMPLETED);
}
if (repeat) generic_spawn(returned_comm, MALL_DIST_COMPLETED);
pthread_exit(NULL);
}
......
......@@ -75,8 +75,4 @@ void merge_adapt_shrink(int numC, MPI_Comm *child, MPI_Comm comm, int myId) {
color = 1;
}
MPI_Comm_split(comm, color, myId, child);
//TODO REFACTOR Llevar a otra parte -- Hacer solo si MALL_SPAWN_ADAPTED
//if(*comm != MPI_COMM_WORLD && *comm != MPI_COMM_NULL)
// MPI_Comm_free(comm); //POSIBLE ERROR?
}
......@@ -39,10 +39,14 @@ void set_spawn_state(int value, int is_async) {
}
int wait_wakeup() {
pthread_mutex_lock(&spawn_mutex);
pthread_cond_wait(&spawn_cond, &spawn_mutex);
pthread_mutex_unlock(&spawn_mutex);
return get_spawn_state(1);
}
void wakeup() {
pthread_mutex_lock(&spawn_mutex);
pthread_cond_signal(&spawn_cond);
pthread_mutex_unlock(&spawn_mutex);
}
......@@ -6,7 +6,7 @@ SDR=0.0
ADR=0.0
AT=0
SM=1
SS=1
SS=2
; end [general]
[stage0]
PT=0
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment