Commit 5cf7c034 authored by iker_martin's avatar iker_martin
Browse files

WIP. Arreglando merge shrink. Zombies no se tratan de orma correcta

parent 43ca7aca
...@@ -109,7 +109,7 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex ...@@ -109,7 +109,7 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
if(mall->intercomm != MPI_COMM_NULL ) { if(mall->intercomm != MPI_COMM_NULL ) {
Children_init(); Children_init();
return MALLEABILITY_CHILDREN; return MALLEABILITY_CHILDREN;
} else {printf("P%d/%d MI comm padres es nulo (%d)\n", mall->myId, mall->numP, MPI_COMM_NULL);} }
zombies_service_init(); zombies_service_init();
return MALLEABILITY_NOT_CHILDREN; return MALLEABILITY_NOT_CHILDREN;
...@@ -161,34 +161,14 @@ void free_malleability() { ...@@ -161,34 +161,14 @@ void free_malleability() {
int malleability_checkpoint() { int malleability_checkpoint() {
double end_real_time; double end_real_time;
char * test = malloc(MPI_MAX_OBJECT_NAME * sizeof(char));
int tester;
//printf("P%d -- Estado %d\n", mall->myId, state);
switch(state) { switch(state) {
case MALL_UNRESERVED: case MALL_UNRESERVED:
break; break;
case MALL_NOT_STARTED: case MALL_NOT_STARTED:
// Comprobar si se tiene que realizar un redimensionado // Comprobar si se tiene que realizar un redimensionado
//if(CHECK_RMS()) {return MALL_DENIED;} //if(CHECK_RMS()) {return MALL_DENIED;}
MPI_Comm_get_name(mall->thread_comm, test, &tester);
printf("TEST 1 P%d Comm=%d name=%s\n", mall->myId, mall->thread_comm, test);
MPI_Comm_get_name(mall->comm, test, &tester);
printf("TEST 2 P%d Comm=%d name=%s\n", mall->myId, mall->comm, test);
if(mall->intercomm != MPI_COMM_NULL) {
MPI_Comm_get_name(mall->intercomm, test, &tester);
printf("TEST 3 P%d Comm=%d name=%s\n", mall->myId, mall->intercomm, test);
}
state = spawn_step(); state = spawn_step();
MPI_Comm_get_name(mall->thread_comm, test, &tester);
printf("TEST 1 P%d Comm=%d name=%s\n", mall->myId, mall->thread_comm, test);
MPI_Comm_get_name(mall->comm, test, &tester);
printf("TEST 2 P%d Comm=%d name=%s\n", mall->myId, mall->comm, test);
if(mall->intercomm != MPI_COMM_NULL) {
MPI_Comm_get_name(mall->intercomm, test, &tester);
printf("TEST 3 P%d Comm=%d name=%s\n", mall->myId, mall->intercomm, test);
}
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPT_POSTPONE){ if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPT_POSTPONE){
malleability_checkpoint(); malleability_checkpoint();
...@@ -227,14 +207,14 @@ int malleability_checkpoint() { ...@@ -227,14 +207,14 @@ int malleability_checkpoint() {
mall_conf->results->spawn_start = MPI_Wtime(); mall_conf->results->spawn_start = MPI_Wtime();
unset_spawn_postpone_flag(state); unset_spawn_postpone_flag(state);
state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time); state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time);
printf("TEST END state=%d\n", state);
if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) { if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) {
mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start; mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start;
} }
break; break;
case MALL_SPAWN_ADAPTED: case MALL_SPAWN_ADAPTED:
shrink_redistribution(); state = shrink_redistribution();
break; break;
case MALL_DIST_COMPLETED: //TODO No es esto muy feo? case MALL_DIST_COMPLETED: //TODO No es esto muy feo?
...@@ -554,8 +534,7 @@ int start_redistribution() { ...@@ -554,8 +534,7 @@ int start_redistribution() {
} else { } else {
// Si no tiene comunicador creado, se debe a que se ha pospuesto el Spawn // Si no tiene comunicador creado, se debe a que se ha pospuesto el Spawn
// y se trata del spawn Merge Shrink // y se trata del spawn Merge Shrink
mall->intercomm = mall->comm; MPI_Comm_dup(mall->comm, &(mall->intercomm));
if(mall->comm == MPI_COMM_NULL) { printf("COMM nulo?\n");}
} }
if(is_intercomm) { if(is_intercomm) {
...@@ -696,15 +675,11 @@ int end_redistribution() { ...@@ -696,15 +675,11 @@ int end_redistribution() {
/*FIXMENOW En algun momento P0 cambia tanto su comm como intercomm respecto al resto...*/ /*FIXMENOW En algun momento P0 cambia tanto su comm como intercomm respecto al resto...*/
MPI_Barrier(mall->comm); //FIXMENOW Por alguna razon da error en Comm MPI_Barrier(mall->comm); //FIXMENOW Por alguna razon da error en Comm
if(mall->intercomm != MPI_COMM_NULL) { if(mall->intercomm != MPI_COMM_NULL && mall->intercomm != MPI_COMM_WORLD) {
//FIXMENOW Intercomm se borra, pero no es COMM WORLD ni COMM NULL //FIXMENOW Intercomm se borra, pero no es COMM WORLD ni COMM NULL
//MPI_Comm_disconnect(&(mall->intercomm)); MPI_Comm_disconnect(&(mall->intercomm));
} }
MPI_Barrier(mall->intercomm); //FIXMENOW Por alguna razon da error en Comm
printf("TEST 5 P%d Comm=%d intercomm=%d\n", mall->myId, mall->comm, mall->intercomm);
MPI_Barrier(mall->comm); //FIXMENOW Por alguna razon da error en Comm
return local_state; return local_state;
} }
...@@ -719,6 +694,7 @@ int shrink_redistribution() { ...@@ -719,6 +694,7 @@ int shrink_redistribution() {
//TODO REFACTOR -- Que solo la llamada de collect iters este fuera de los hilos //TODO REFACTOR -- Que solo la llamada de collect iters este fuera de los hilos
zombies_collect_suspended(mall->comm, mall->myId, mall->numP, mall->numC, mall->root, (void *) mall_conf->results, mall->intercomm); zombies_collect_suspended(mall->comm, mall->myId, mall->numP, mall->numC, mall->root, (void *) mall_conf->results, mall->intercomm);
printf("HELLO THERE\n");
if(mall->myId < mall->numC) { if(mall->myId < mall->numC) {
if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm)); if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm));
if(mall->comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->comm)); if(mall->comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->comm));
...@@ -738,6 +714,7 @@ int shrink_redistribution() { ...@@ -738,6 +714,7 @@ int shrink_redistribution() {
} }
return MALL_DIST_COMPLETED; return MALL_DIST_COMPLETED;
} else { } else {
printf("P%d is a zombie\n", mall->myId);
return MALL_ZOMBIE; return MALL_ZOMBIE;
} }
} }
......
...@@ -90,12 +90,12 @@ int check_spawn_state(MPI_Comm *child, MPI_Comm comm, double *real_time) { ...@@ -90,12 +90,12 @@ int check_spawn_state(MPI_Comm *child, MPI_Comm comm, double *real_time) {
if(spawn_data->spawn_is_async) { // Async if(spawn_data->spawn_is_async) { // Async
local_state = get_spawn_state(spawn_data->spawn_is_async); local_state = get_spawn_state(spawn_data->spawn_is_async);
printf("Test 3.5 local=%d\n",local_state); //printf("Test 3.5 local=%d\n",local_state);
if(local_state == MALL_SPAWN_SINGLE_PENDING || local_state == MALL_SPAWN_SINGLE_COMPLETED) { // Single if(local_state == MALL_SPAWN_SINGLE_PENDING || local_state == MALL_SPAWN_SINGLE_COMPLETED) { // Single
global_state = check_single_state(comm, local_state); global_state = check_single_state(comm, local_state);
} else if(local_state == MALL_SPAWN_PENDING || local_state == MALL_SPAWN_COMPLETED) { // Baseline } else if(local_state == MALL_SPAWN_PENDING || local_state == MALL_SPAWN_COMPLETED || local_state == MALL_SPAWN_ADAPTED) { // Baseline
global_state = check_generic_state(comm, child, local_state, real_time); global_state = check_generic_state(comm, child, local_state, real_time);
} else if(local_state == MALL_SPAWN_ADAPT_POSTPONE) { } else if(local_state == MALL_SPAWN_ADAPT_POSTPONE) {
...@@ -129,6 +129,7 @@ void unset_spawn_postpone_flag(int outside_state) { ...@@ -129,6 +129,7 @@ void unset_spawn_postpone_flag(int outside_state) {
int local_state = get_spawn_state(spawn_data->spawn_is_async); int local_state = get_spawn_state(spawn_data->spawn_is_async);
if(local_state == MALL_SPAWN_ADAPT_POSTPONE && outside_state == MALL_SPAWN_ADAPT_PENDING && spawn_data->spawn_is_async) { if(local_state == MALL_SPAWN_ADAPT_POSTPONE && outside_state == MALL_SPAWN_ADAPT_PENDING && spawn_data->spawn_is_async) {
set_spawn_state(MALL_SPAWN_PENDING, MALL_SPAWN_PTHREAD); set_spawn_state(MALL_SPAWN_PENDING, MALL_SPAWN_PTHREAD);
wakeup();
} }
} }
...@@ -339,7 +340,7 @@ int allocate_thread_spawn() { ...@@ -339,7 +340,7 @@ int allocate_thread_spawn() {
* se avisa al hilo maestro. * se avisa al hilo maestro.
*/ */
void* thread_work(void* arg) { void* thread_work(void* arg) {
int local_state, repeat = 0; int local_state;
returned_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm)); returned_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
generic_spawn(returned_comm, MALL_NOT_STARTED); generic_spawn(returned_comm, MALL_NOT_STARTED);
...@@ -347,11 +348,10 @@ void* thread_work(void* arg) { ...@@ -347,11 +348,10 @@ void* thread_work(void* arg) {
local_state = get_spawn_state(MALL_SPAWN_PTHREAD); local_state = get_spawn_state(MALL_SPAWN_PTHREAD);
if(local_state == MALL_SPAWN_ADAPT_POSTPONE) { if(local_state == MALL_SPAWN_ADAPT_POSTPONE) {
// El grupo de procesos se terminara de juntar tras la redistribucion de datos // El grupo de procesos se terminara de juntar tras la redistribucion de datos
repeat = 1;
local_state = wait_wakeup(); local_state = wait_wakeup();
printf("Hilos despiertan\n"); generic_spawn(returned_comm, MALL_DIST_COMPLETED);
} }
if (repeat) generic_spawn(returned_comm, MALL_DIST_COMPLETED);
pthread_exit(NULL); pthread_exit(NULL);
} }
......
...@@ -75,8 +75,4 @@ void merge_adapt_shrink(int numC, MPI_Comm *child, MPI_Comm comm, int myId) { ...@@ -75,8 +75,4 @@ void merge_adapt_shrink(int numC, MPI_Comm *child, MPI_Comm comm, int myId) {
color = 1; color = 1;
} }
MPI_Comm_split(comm, color, myId, child); MPI_Comm_split(comm, color, myId, child);
//TODO REFACTOR Llevar a otra parte -- Hacer solo si MALL_SPAWN_ADAPTED
//if(*comm != MPI_COMM_WORLD && *comm != MPI_COMM_NULL)
// MPI_Comm_free(comm); //POSIBLE ERROR?
} }
...@@ -39,10 +39,14 @@ void set_spawn_state(int value, int is_async) { ...@@ -39,10 +39,14 @@ void set_spawn_state(int value, int is_async) {
} }
int wait_wakeup() { int wait_wakeup() {
pthread_mutex_lock(&spawn_mutex);
pthread_cond_wait(&spawn_cond, &spawn_mutex); pthread_cond_wait(&spawn_cond, &spawn_mutex);
pthread_mutex_unlock(&spawn_mutex);
return get_spawn_state(1); return get_spawn_state(1);
} }
void wakeup() { void wakeup() {
pthread_mutex_lock(&spawn_mutex);
pthread_cond_signal(&spawn_cond); pthread_cond_signal(&spawn_cond);
pthread_mutex_unlock(&spawn_mutex);
} }
...@@ -6,7 +6,7 @@ SDR=0.0 ...@@ -6,7 +6,7 @@ SDR=0.0
ADR=0.0 ADR=0.0
AT=0 AT=0
SM=1 SM=1
SS=1 SS=2
; end [general] ; end [general]
[stage0] [stage0]
PT=0 PT=0
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment