Commit 09e30d3b authored by iker_martin's avatar iker_martin
Browse files

Hotfix&&WIP. Fixed errors when reconfiguring after the first group of...

Hotfix&&WIP. Fixed errors when reconfiguring after the first group of processes. There is still an error related to initialization of stages
parent e094a77d
......@@ -333,7 +333,7 @@ void def_struct_groups(group_config_t *groups, size_t n_resizes, MPI_Datatype *c
} else { // Si hay mas de una fase(estructura), el "extent" se modifica.
MPI_Type_create_struct(counts, blocklengths, displs, types, &aux);
// Tipo derivado para enviar N elementos de la estructura
MPI_Type_create_resized(aux, 0, sizeof(iter_stage_t), config_type);
MPI_Type_create_resized(aux, 0, sizeof(group_config_t), config_type);
}
MPI_Type_commit(config_type);
}
......
#include <pthread.h>
#include <string.h>
#include "malleabilityManager.h"
#include "malleabilityStates.h"
#include "malleabilityDataStructures.h"
......@@ -21,6 +22,9 @@ int check_redistribution();
int end_redistribution();
int shrink_redistribution();
void comm_node_data(int rootBcast, int is_child_group);
void def_nodeinfo_type(MPI_Datatype *node_type);
int thread_creation();
int thread_check();
void* thread_async_work();
......@@ -49,7 +53,7 @@ typedef struct { //FIXME numC_spawned no se esta usando
MPI_Comm user_comm;
char *name_exec, *nodelist;
int num_cpus, num_nodes;
int num_cpus, num_nodes, nodelist_len;
} malleability_t;
int state = MALL_UNRESERVED; //FIXME Mover a otro lado
......@@ -113,6 +117,8 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
return MALLEABILITY_CHILDREN;
}
mall->nodelist_len = strlen(nodelist);
zombies_service_init();
return MALLEABILITY_NOT_CHILDREN;
}
......@@ -443,6 +449,7 @@ void Children_init() {
// TODO A partir de este punto tener en cuenta si es BASELINE o MERGE
recv_config_file(mall->root, mall->intercomm, &(mall_conf->config_file));
comm_node_data(root_parents, MALLEABILITY_CHILDREN);
mall_conf->results = (results_data *) malloc(sizeof(results_data));
init_results_data(mall_conf->results, mall_conf->config_file->n_resizes, mall_conf->config_file->n_stages, RESULTS_INIT_DATA_QTY);
......@@ -490,6 +497,7 @@ void Children_init() {
MPI_Comm_dup(mall->intercomm, &(mall->user_comm));
}
MPI_Comm_disconnect(&(mall->intercomm));
}
......@@ -548,6 +556,7 @@ int start_redistribution() {
}
send_config_file(mall_conf->config_file, rootBcast, mall->intercomm);
comm_node_data(rootBcast, MALLEABILITY_NOT_CHILDREN);
if(dist_a_data->entries || rep_a_data->entries) { // Enviar datos asincronos
mall_conf->results->async_start = MPI_Wtime();
......@@ -639,8 +648,8 @@ int end_redistribution() {
}
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm);
if(dist_s_data->entries || rep_s_data->entries) { // Enviar datos sincronos
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm);
send_data(mall->numC, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);
// TODO Crear funcion especifica y anyadir para Asinc
......@@ -678,10 +687,7 @@ int end_redistribution() {
}
/*FIXMENOW En algun momento P0 cambia tanto su comm como intercomm respecto al resto...*/
MPI_Barrier(mall->comm); //FIXMENOW Por alguna razon da error en Comm
if(mall->intercomm != MPI_COMM_NULL && mall->intercomm != MPI_COMM_WORLD) {
//FIXMENOW Intercomm se borra, pero no es COMM WORLD ni COMM NULL
MPI_Comm_disconnect(&(mall->intercomm));
}
......@@ -692,7 +698,7 @@ int end_redistribution() {
///=============================================
///=============================================
///=============================================
//TODO Add comment
int shrink_redistribution() {
double time_extra = MPI_Wtime();
......@@ -724,6 +730,50 @@ int shrink_redistribution() {
}
}
//======================================================||
//================PRIVATE FUNCTIONS=====================||
//=================COMM NODE INFO ======================||
//======================================================||
//======================================================||
//TODO Add comment
void comm_node_data(int rootBcast, int is_child_group) {
MPI_Datatype node_type;
def_nodeinfo_type(&node_type);
MPI_Bcast(mall, 1, node_type, rootBcast, mall->intercomm);
if(is_child_group) {
mall->nodelist = malloc((mall->nodelist_len+1) * sizeof(char));
mall->nodelist[mall->nodelist_len] = '\0';
}
MPI_Bcast(mall->nodelist, mall->nodelist_len, MPI_CHAR, rootBcast, mall->intercomm);
MPI_Type_free(&node_type);
}
//TODO Add comment
void def_nodeinfo_type(MPI_Datatype *node_type) {
int i, counts = 3;
int blocklengths[3] = {1, 1, 1};
MPI_Aint displs[counts], dir;
MPI_Datatype types[counts];
// Rellenar vector types
types[0] = types[1] = types[2] = MPI_INT;
// Rellenar vector displs
MPI_Get_address(mall, &dir);
MPI_Get_address(&(mall->num_cpus), &displs[0]);
MPI_Get_address(&(mall->num_nodes), &displs[1]);
MPI_Get_address(&(mall->nodelist_len), &displs[2]);
for(i=0;i<counts;i++) displs[i] -= dir;
MPI_Type_create_struct(counts, blocklengths, displs, types, node_type);
MPI_Type_commit(node_type);
}
// TODO MOVER A OTRO LADO??
//======================================================||
//================PRIVATE FUNCTIONS=====================||
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment