Commit c706c00b authored by iker_martin's avatar iker_martin
Browse files

SAM config now accepts strats as a list and the output modified for strats....

SAM config now accepts strats as a list and the output modified for strats. Bugfixes(Single strat and Strtoken). Single strat requires to send amount of sources
parent db1ffc32
......@@ -7,6 +7,7 @@
ext_functions_t *user_functions;
void get_numbers_from_string(const char *input, size_t *res_len, int **res);
/*
* Funcion utilizada para leer el fichero de configuracion
......@@ -18,6 +19,8 @@ ext_functions_t *user_functions;
static int handler(void* user, const char* section, const char* name,
const char* value) {
int ret_value=1;
int *aux;
size_t aux_len;
configuration* pconfig = (configuration*)user;
if(pconfig->actual_group >= pconfig->n_groups && pconfig->actual_stage >= pconfig->n_stages) {
......@@ -79,11 +82,15 @@ static int handler(void* user, const char* section, const char* name,
} else if (MATCH(resize_name, "Redistribution_Method") && LAST(pconfig->actual_group, pconfig->n_groups)) {
pconfig->groups[pconfig->actual_group].rm = atoi(value);
} else if (MATCH(resize_name, "Redistribution_Strategy") && LAST(pconfig->actual_group, pconfig->n_groups)) {
pconfig->groups[pconfig->actual_group].rs = atoi(value);
get_numbers_from_string(value, &aux_len, &aux);
pconfig->groups[pconfig->actual_group].rs = aux;
pconfig->groups[pconfig->actual_group].rs_len = aux_len;
} else if (MATCH(resize_name, "Spawn_Method") && LAST(pconfig->actual_group, pconfig->n_groups)) {
pconfig->groups[pconfig->actual_group].sm = atoi(value);
} else if (MATCH(resize_name, "Spawn_Strategy") && LAST(pconfig->actual_group, pconfig->n_groups)) {
pconfig->groups[pconfig->actual_group].ss = atoi(value);
get_numbers_from_string(value, &aux_len, &aux);
pconfig->groups[pconfig->actual_group].ss = aux;
pconfig->groups[pconfig->actual_group].ss_len = aux_len;
pconfig->actual_group = pconfig->actual_group+1; // Ultimo elemento de la estructura
// Unkown case
......@@ -96,6 +103,50 @@ static int handler(void* user, const char* section, const char* name,
return ret_value;
}
/**
* @brief Extracts numbers from a comma-separated string and stores them in an array.
*
* This function takes a string containing a sequence of numbers separated by commas,
* converts each number to an integer, and stores them in a dynamically allocated array.
*
* @param input The input string containing comma-separated numbers.
* @param res_len Pointer to an integer that will hold the length of the resulting array.
* Note: Null can be passed if the caller does not need it.
* @param res Pointer to an integer array where the extracted numbers will be stored.
* Note: The memory for this array is dynamically allocated and should be freed by the caller.
*/
void get_numbers_from_string(const char *input, size_t *res_len, int **res) {
char *aux, *token;
int num;
size_t len, malloc_len;
len = 0;
malloc_len = 10;
*res = (int *) malloc(malloc_len * sizeof(int));
aux = (char *) malloc((strlen(input)+1) * sizeof(char));
strcpy(aux, input);
token = strtok(aux, ",");
while (token != NULL) {
num = atoi(token);
if(len == malloc_len) {
malloc_len += 10;
*res = (int *) realloc(*res, malloc_len * sizeof(int));
}
(*res)[len] = num;
len++;
token = strtok(NULL, ",");
}
if(res_len != NULL) *res_len = len;
if(len != malloc_len) {
*res = (int *) realloc(*res, len * sizeof(int));
}
free(aux);
}
/*
* Crea y devuelve una estructura de configuracion a traves
* de un nombre de fichero dado.
......
......@@ -46,6 +46,7 @@ int main(int argc, char *argv[]) {
int req;
int im_child;
int abort_needed = 0;
size_t i;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &req);
MPI_Comm_rank(MPI_COMM_WORLD, &myId);
......@@ -85,8 +86,14 @@ int main(int argc, char *argv[]) {
}
if(config_file->n_groups != group->grp + 1) { //TODO Llevar a otra funcion
MAM_Set_configuration(config_file->groups[group->grp+1].sm, config_file->groups[group->grp+1].ss,
config_file->groups[group->grp+1].phy_dist, config_file->groups[group->grp+1].rm, config_file->groups[group->grp+1].rs);
MAM_Set_configuration(config_file->groups[group->grp+1].sm, MAM_STRAT_SPAWN_CLEAR,
config_file->groups[group->grp+1].phy_dist, config_file->groups[group->grp+1].rm, MAM_STRAT_RED_CLEAR);
for(i=0; i<config_file->groups[group->grp+1].ss_len; i++) {
MAM_Set_key_configuration(MAM_SPAWN_STRATEGIES, config_file->groups[group->grp+1].ss[i], &req);
}
for(i=0; i<config_file->groups[group->grp+1].rs_len; i++) {
MAM_Set_key_configuration(MAM_RED_STRATEGIES, config_file->groups[group->grp+1].rs[i], &req);
}
MAM_Set_target_number(config_file->groups[group->grp+1].procs); // TODO TO BE DEPRECATED
if(group->grp != 0) {
......
......@@ -53,7 +53,9 @@ typedef struct
typedef struct
{
int iters, procs;
int sm, ss, phy_dist, rm, rs;
int sm, phy_dist, rm;
int *ss, *rs;
size_t ss_len, rs_len;
float factor;
} group_config_t;
......@@ -65,7 +67,7 @@ typedef struct
int granularity;
size_t sdr, adr;
MPI_Datatype config_type, group_type, iter_stage_type;
MPI_Datatype config_type, group_type, group_strats_type, iter_stage_type;
iter_stage_t *stages;
group_config_t *groups;
} configuration;
......
......@@ -13,6 +13,7 @@ void free_config_stage(iter_stage_t *stage, int *freed_ids, size_t *found_ids);
void def_struct_config_file(configuration *config_file);
void def_struct_groups(configuration *config_file);
void def_struct_groups_strategies(configuration *config_file);
void def_struct_iter_stage(configuration *config_file);
/*
......@@ -49,6 +50,7 @@ void init_config(char *file_name, configuration **user_config) {
*user_config=config;
}
def_struct_config_file(*user_config);
def_struct_groups_strategies(*user_config);
}
/*
......@@ -70,10 +72,12 @@ void malloc_config_resizes(configuration *user_config) {
user_config->groups[i].iters = 0;
user_config->groups[i].procs = 1;
user_config->groups[i].sm = 0;
user_config->groups[i].ss = 1;
user_config->groups[i].ss = NULL;
user_config->groups[i].ss_len = 0;
user_config->groups[i].phy_dist = 0;
user_config->groups[i].rm = 0;
user_config->groups[i].rs = 1;
user_config->groups[i].rs = NULL;
user_config->groups[i].rs_len = 0;
user_config->groups[i].factor = 1;
}
def_struct_groups(user_config);
......@@ -126,6 +130,11 @@ void free_config(configuration *user_config) {
for(i=0; i < user_config->n_stages; i++) {
free_config_stage(&(user_config->stages[i]), freed_ids, &found_ids);
}
for(i=0; i < user_config->n_groups; i++) {
free(user_config->groups[i].ss);
free(user_config->groups[i].rs);
}
//Liberar tipos derivados
MPI_Type_free(&(user_config->config_type));
user_config->config_type = MPI_DATATYPE_NULL;
......@@ -133,6 +142,9 @@ void free_config(configuration *user_config) {
MPI_Type_free(&(user_config->group_type));
user_config->group_type = MPI_DATATYPE_NULL;
MPI_Type_free(&(user_config->group_strats_type));
user_config->group_strats_type = MPI_DATATYPE_NULL;
MPI_Type_free(&(user_config->iter_stage_type));
user_config->iter_stage_type = MPI_DATATYPE_NULL;
......@@ -198,7 +210,7 @@ void free_config_stage(iter_stage_t *stage, int *freed_ids, size_t *found_ids) {
*/
void print_config(configuration *user_config) {
if(user_config != NULL) {
size_t i;
size_t i, j;
printf("Config loaded: R=%zu, S=%zu, granularity=%d, SDR=%zu, ADR=%zu, Rigid=%d, Capture_Method=%d\n",
user_config->n_resizes, user_config->n_stages, user_config->granularity, user_config->sdr, user_config->adr, user_config->rigid_times, user_config->capture_method);
for(i=0; i<user_config->n_stages; i++) {
......@@ -206,10 +218,19 @@ void print_config(configuration *user_config) {
i, user_config->stages[i].pt, user_config->stages[i].t_stage, user_config->stages[i].real_bytes, user_config->stages[i].t_capped);
}
for(i=0; i<user_config->n_groups; i++) {
printf("Group %zu: Iters=%d, Procs=%d, Factors=%f, Dist=%d, RM=%d, RS=%d, SM=%d, SS=%d\n",
printf("Group %zu: Iters=%d, Procs=%d, Factors=%f, Dist=%d, RM=%d, SM=%d",
i, user_config->groups[i].iters, user_config->groups[i].procs, user_config->groups[i].factor,
user_config->groups[i].phy_dist, user_config->groups[i].rm, user_config->groups[i].rs,
user_config->groups[i].sm, user_config->groups[i].ss);
user_config->groups[i].phy_dist, user_config->groups[i].rm, user_config->groups[i].sm);
printf(", RS=%d", user_config->groups[i].rs[0]);
for(j=1; j<user_config->groups[i].rs_len; j++) {
printf("/%d", user_config->groups[i].rs[j]);
}
printf(", SS=%d", user_config->groups[i].ss[0]);
for(j=1; j<user_config->groups[i].ss_len; j++) {
printf("/%d", user_config->groups[i].ss[j]);
}
printf("\n");
}
}
}
......@@ -237,10 +258,18 @@ void print_config_group(configuration *user_config, size_t grp) {
printf("Stage %zu: PT=%d, T_stage=%lf, bytes=%d, T_capped=%d\n",
i, user_config->stages[i].pt, user_config->stages[i].t_stage, user_config->stages[i].real_bytes, user_config->stages[i].t_capped);
}
printf("Group %zu: Iters=%d, Procs=%d, Factors=%f, Dist=%d, RM=%d, RS=%d, SM=%d, SS=%d, parents=%d, children=%d\n",
grp, user_config->groups[grp].iters, user_config->groups[grp].procs, user_config->groups[grp].factor,
user_config->groups[grp].phy_dist, user_config->groups[grp].rm, user_config->groups[grp].rs,
user_config->groups[grp].sm, user_config->groups[grp].ss, parents, sons);
printf("Group %zu: Iters=%d, Procs=%d, Factors=%f, Dist=%d, RM=%d, SM=%d", grp, user_config->groups[grp].iters, user_config->groups[grp].procs, user_config->groups[grp].factor,
user_config->groups[grp].phy_dist, user_config->groups[grp].rm, user_config->groups[grp].sm);
printf(", RS=%d", user_config->groups[grp].rs[0]);
for(i=1; i<user_config->groups[grp].rs_len; i++) {
printf("/%d", user_config->groups[grp].rs[i]);
}
printf(", SS=%d", user_config->groups[grp].ss[0]);
for(i=1; i<user_config->groups[grp].ss_len; i++) {
printf("/%d", user_config->groups[grp].ss[i]);
}
printf(", parents=%d, children=%d\n", parents, sons);
}
}
......@@ -261,8 +290,9 @@ void print_config_group(configuration *user_config, size_t grp) {
*/
void send_config_file(configuration *config_file, int root, MPI_Comm intercomm) {
MPI_Bcast(config_file, 1, config_file->config_type, root, intercomm);
MPI_Bcast(config_file->groups, config_file->n_groups, config_file->group_type, root, intercomm);
MPI_Bcast(config_file->stages, config_file->n_stages, config_file->iter_stage_type, root, intercomm);
MPI_Bcast(config_file->groups, config_file->n_groups, config_file->group_type, root, intercomm);
MPI_Bcast(config_file->groups, 1, config_file->group_strats_type, root, intercomm);
}
......@@ -279,6 +309,7 @@ void send_config_file(configuration *config_file, int root, MPI_Comm intercomm)
* la funcion "free_config".
*/
void recv_config_file(int root, MPI_Comm intercomm, configuration **config_file_out) {
size_t i;
configuration *config_file = malloc(sizeof(configuration));
def_struct_config_file(config_file);
......@@ -286,11 +317,17 @@ void recv_config_file(int root, MPI_Comm intercomm, configuration **config_file_
//Inicializado de estructuras internas
config_file->n_resizes = config_file->n_groups-1;
malloc_config_resizes(config_file); // Inicializar valores de grupos
malloc_config_stages(config_file); // Inicializar a NULL vectores stage
MPI_Bcast(config_file->groups, config_file->n_groups, config_file->group_type, root, intercomm);
malloc_config_resizes(config_file); // Inicializar valores de grupos
MPI_Bcast(config_file->stages, config_file->n_stages, config_file->iter_stage_type, root, intercomm);
MPI_Bcast(config_file->groups, config_file->n_groups, config_file->group_type, root, intercomm);
for(i=0; i<config_file->n_groups; i++) {
config_file->groups[i].ss = (int *) malloc(config_file->groups[i].ss_len * sizeof(int));
config_file->groups[i].rs = (int *) malloc(config_file->groups[i].rs_len * sizeof(int));
}
def_struct_groups_strategies(config_file); // Inicializar vectores de grupos
MPI_Bcast(config_file->groups, 1, config_file->group_strats_type, root, intercomm);
*config_file_out = config_file;
}
......@@ -340,7 +377,8 @@ void def_struct_groups(configuration *config_file) {
group_config_t *groups = config_file->groups;
// Rellenar vector types
types[0] = types[1] = types[2] = types[3] = types[4] = types[5] = types[6] = MPI_INT;
types[0] = types[1] = types[2] = types[4] = types[5] = MPI_INT;
types[3] = types[6] = MPI_UNSIGNED_LONG;
types[7] = MPI_FLOAT;
// Rellenar vector displs
......@@ -349,10 +387,10 @@ void def_struct_groups(configuration *config_file) {
MPI_Get_address(&(groups->iters), &displs[0]);
MPI_Get_address(&(groups->procs), &displs[1]);
MPI_Get_address(&(groups->sm), &displs[2]);
MPI_Get_address(&(groups->ss), &displs[3]);
MPI_Get_address(&(groups->ss_len), &displs[3]);
MPI_Get_address(&(groups->phy_dist), &displs[4]);
MPI_Get_address(&(groups->rm), &displs[5]);
MPI_Get_address(&(groups->rs), &displs[6]);
MPI_Get_address(&(groups->rs_len), &displs[6]);
MPI_Get_address(&(groups->factor), &displs[7]);
for(i=0;i<counts;i++) displs[i] -= dir;
......@@ -369,6 +407,42 @@ void def_struct_groups(configuration *config_file) {
}
}
/*
* Tipo derivado para enviar las estrategias
* de cada grupo con una sola comunicacion.
*/
void def_struct_groups_strategies(configuration *config_file) {
int i, counts = config_file->n_groups*2;
int *blocklengths;
MPI_Aint *displs, dir;
MPI_Datatype *types;
group_config_t *group;
blocklengths = (int *) malloc(counts * sizeof(int));
displs = (MPI_Aint *) malloc(counts * sizeof(MPI_Aint));
types = (MPI_Datatype *) malloc(counts * sizeof(MPI_Datatype));
MPI_Get_address(config_file->groups, &dir);
for(i = 0; i < counts; i+=2) {
group = &config_file->groups[i/2];
MPI_Get_address(group->ss, &displs[i]);
MPI_Get_address(group->rs, &displs[i+1]);
displs[i] -= dir;
displs[i+1] -= dir;
types[i] = types[i+1] = MPI_INT;
blocklengths[i] = group->ss_len;
blocklengths[i+1] = group->rs_len;
}
MPI_Type_create_struct(counts, blocklengths, displs, types, &config_file->group_strats_type);
MPI_Type_commit(&config_file->group_strats_type);
free(blocklengths);
free(displs);
free(types);
}
/*
* Tipo derivado para enviar elementos especificos
* de la estructuras de fases de iteracion en una sola comunicacion.
......
......@@ -7,7 +7,7 @@
#include "MAM_Configuration.h"
#include "malleabilityDataStructures.h"
void prepare_redistribution(int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, void **recv, struct Counts *s_counts, struct Counts *r_counts); //FIXME Choose name for is_sync
void prepare_redistribution(int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, void **recv, struct Counts *s_counts, struct Counts *r_counts);
void check_requests(struct Counts s_counts, struct Counts r_counts, MPI_Request **requests, size_t *request_qty);
void sync_point2point(void *send, void *recv, MPI_Datatype datatype, struct Counts s_counts, struct Counts r_counts, MPI_Comm comm);
......
......@@ -7,7 +7,7 @@ int state = MALL_UNRESERVED;
* de MaM.
*/
void MAM_Def_main_datatype() {
int i, counts = 9;
int i, counts = 10;
int blocklengths[counts];
MPI_Aint displs[counts];
MPI_Datatype types[counts];
......@@ -24,9 +24,10 @@ void MAM_Def_main_datatype() {
MPI_Get_address(&(mall_conf->red_method), &displs[3]);
MPI_Get_address(&(mall_conf->red_strategies), &displs[4]);
MPI_Get_address(&(mall->root_parents), &displs[5]);
MPI_Get_address(&(mall->num_cpus), &displs[6]);
MPI_Get_address(&(mall->num_nodes), &displs[7]);
MPI_Get_address(&(mall->nodelist_len), &displs[8]);
MPI_Get_address(&(mall->num_parents), &displs[6]); //TODO Add only when Intercomm strat active?
MPI_Get_address(&(mall->num_cpus), &displs[7]);
MPI_Get_address(&(mall->num_nodes), &displs[8]);
MPI_Get_address(&(mall->nodelist_len), &displs[9]);
MPI_Type_create_struct(counts, blocklengths, displs, types, &mall->struct_type);
MPI_Type_commit(&mall->struct_type);
......
......@@ -152,6 +152,7 @@ void MAM_Finalize() {
free(rep_a_data);
free(dist_s_data);
free(dist_a_data);
if(mall->nodelist != NULL) free(mall->nodelist);
MAM_Free_main_datatype();
free_malleability_times();
......@@ -517,6 +518,7 @@ int MAM_St_rms(int *mam_state) {
}
int MAM_St_spawn_start() {
mall->num_parents = mall->numP;
state = spawn_step();
//FIXME Esto es necesario pero feo
if(mall_conf->spawn_method == MALL_SPAWN_MERGE && mall->myId >= mall->numC){ mall->zombie = 1; }
......@@ -529,9 +531,6 @@ int MAM_St_spawn_start() {
}
int MAM_St_spawn_pending(int wait_completed) {
fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
if(mall->myId == 0)printf("TEST END\n");
fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
state = check_spawn_state(&(mall->intercomm), mall->comm, wait_completed);
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPTED) {
#if USE_MAL_BARRIERS
......@@ -909,7 +908,6 @@ int check_redistribution(int wait_completed) {
return MALL_USER_PENDING;
}
/*
* Termina la redistribución de los datos con los hijos, comprobando
* si se han realizado iteraciones con comunicaciones en segundo plano
......
......@@ -152,7 +152,7 @@ int GetCPUCount() {
* TODO
*/
int MAM_I_slurm_getenv_hosts_info() {
char *tmp = NULL, *token;
char *tmp = NULL, *tmp_copy, *token;
int cpus, count;
//int i, *cpus_counts, *nodes_counts, *aux;
......@@ -171,7 +171,10 @@ int MAM_I_slurm_getenv_hosts_info() {
tmp = getenv("SLURM_JOB_CPUS_PER_NODE");
if(tmp == NULL) return 1;
token = strtok(tmp, ",");
tmp_copy = (char *) malloc((strlen(tmp)+1) * sizeof(char));
strcpy(tmp_copy, tmp);
token = strtok(tmp_copy, ",");
//TODO When MaM considers heteregenous allocations, these will be needed instead of num_cpus.
//cpus_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//nodes_counts = (int *) malloc(mall->num_nodes * sizeof(int));
......@@ -200,6 +203,7 @@ int MAM_I_slurm_getenv_hosts_info() {
}
*/
free(tmp_copy);
return 0;
}
......
......@@ -144,7 +144,8 @@ void malleability_connect_children(MPI_Comm comm, MPI_Comm *parents) {
spawn_data->comm = comm;
MAM_Comm_main_structures(MALLEABILITY_ROOT); //FIXME What if root is another id different to 0? Send from spawn to root id?
MPI_Comm_remote_size(*parents, &spawn_data->initial_qty);
//MPI_Comm_remote_size(*parents, &spawn_data->initial_qty);
spawn_data->initial_qty = mall->num_parents;
MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_SINGLE, &(spawn_data->spawn_is_single));
MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_PTHREAD, &(spawn_data->spawn_is_async));
MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_INTERCOMM, &(spawn_data->spawn_is_intercomm));
......@@ -161,9 +162,7 @@ void malleability_connect_children(MPI_Comm comm, MPI_Comm *parents) {
merge(*spawn_data, parents, MALL_NOT_STARTED);
break;
}
mall->num_parents = spawn_data->initial_qty;
//mall->num_parents = spawn_data->initial_qty;
free(spawn_data);
}
......
......@@ -191,10 +191,12 @@ void generate_info_string(char *nodelist, int *procs_array, size_t nodes, MPI_In
* al crear procesos e indicar donde tienen que ser creados.
*/
void fill_str_hosts(char *nodelist, int *qty, size_t used_nodes, char **host_str) {
char *host;
char *host, *aux, *token;
size_t i=0,len=0;
char *token = strtok(nodelist, ",");
aux = (char *) malloc((strlen(nodelist)+1) * sizeof(char));
strcpy(aux, nodelist);
token = strtok(aux, ",");
while (token != NULL && i < used_nodes) {
host = strdup(token);
if (qty[i] != 0) {
......@@ -204,6 +206,7 @@ void fill_str_hosts(char *nodelist, int *qty, size_t used_nodes, char **host_str
free(host);
token = strtok(NULL, ",");
}
free(aux);
}
/*
......
......@@ -28,7 +28,7 @@ numP=$(bash $dir$execDir/BashScripts/getNumPNeeded.sh $configFile 0)
initial_nodelist=$(bash $dir$execDir/BashScripts/createInitialNodelist.sh $numP $cores $nodelist)
echo $initial_nodelist
echo "Test PreRUN $numP $nodelist"
mpirun -hosts $initial_nodelist -np $numP $dir$codeDir/build/a.out $configFile $outIndex $nodelist $nodes
mpirun -hosts $initial_nodelist -np $numP $dir$codeDir/build/a.out $configFile $outIndex
echo "END RUN"
sed -i 's/application called MPI_Abort(MPI_COMM_WORLD, -100) - process/shrink cleaning/g' slurm-$SLURM_JOB_ID.out
......
......@@ -18,8 +18,9 @@ echo "MPICH"
#module load mpich-3.4.1-noucx
#export HYDRA_DEBUG=1
mpirun --version
numP=$(bash $dir$execDir/BashScripts/getNumPNeeded.sh $configFile 0)
mpirun -np $numP valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --trace-children=yes --log-file=nc.vg.%p $dir$codeDir/build/a.out $configFile $outIndex $nodelist $nodes
mpirun -np $numP valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --trace-children=yes --log-file=nc.vg.%p $dir$codeDir/build/a.out $configFile $outIndex
echo "END RUN"
sed -i 's/application called MPI_Abort(MPI_COMM_WORLD, -100) - process/shrink cleaning/g' slurm-$SLURM_JOB_ID.out
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment