Commit e1b825e9 authored by iker_martin's avatar iker_martin
Browse files

Hotfix -- La distribucion de procesos creaba situaciones de oversubscription no deseadas

parent 4e3ecd1f
......@@ -223,6 +223,7 @@ void init_results_data(results_data *results, int resizes, int iters_size) {
results->iters_time = calloc(iters_size + 100, sizeof(double)); //FIXME Numero magico
results->iters_type = calloc(iters_size + 100, sizeof(int));
results->iter_index = 0;
}
void realloc_results_iters(results_data *results, int needed) {
......
......@@ -14,7 +14,7 @@ typedef struct {
double sync_start, sync_end, *sync_time;
double async_start, async_end, *async_time;
double exec_start, exec_time;
//Overcharge time is time spent in malleability that is from IO modules
//Overcharge time is time spent in malleability that is from IO modules
} results_data;
void send_results(results_data *results, int root, int resizes, MPI_Comm intercomm);
......
......@@ -122,6 +122,9 @@ int main(int argc, char *argv[]) {
group->grp = group->grp + 1;
}
//
// EMPIEZA LA EJECUCION-------------------------------
//
group->grp = group->grp - 1; // TODO REFACTOR???
do {
......@@ -153,6 +156,10 @@ int main(int argc, char *argv[]) {
reset_results_index(results);
} while((config_file->resizes > group->grp + 1) && (config_file->cst == COMM_SPAWN_MERGE || config_file->cst == COMM_SPAWN_MERGE_PTHREAD));
//
// TERMINA LA EJECUCION ----------------------------------------------------------
//
if(res==1) { // Se he llegado al final de la aplicacion
MPI_Barrier(comm); // TODO Posible error al utilizar SHRINK
......@@ -242,9 +249,21 @@ void iterate(double *matrix, int n, int async_comm, int iter) {
operations = time / Top; //FIXME Calcular una sola vez
for(i=0; i < operations; i++) {
aux += computePiSerial(n);
}
/*
if(time >= 1) {
sleep(time);
}
else {
unsigned int sleep_time = time * 1000000;
usleep(sleep_time);
}
*/
if(config_file->comm_tam) {
MPI_Bcast(group->compute_comm_array, config_file->comm_tam, MPI_CHAR, ROOT, comm);
......
......@@ -492,9 +492,17 @@ void node_dist(int type, int total_procs, int already_created, int **qty, int *u
} else if (type == 2) { // DIST CPUs
tamBl = slurm_data->num_cpus / slurm_data->num_nodes;
asigCores = 0;
i = already_created / tamBl;
*used_nodes = already_created / tamBl;
i = *used_nodes = already_created / tamBl;
remainder = already_created % tamBl;
//First node could already have existing procs
if (remainder) {
procs[i] = asigCores = tamBl - remainder;
i = (i+1) % slurm_data->num_nodes;
(*used_nodes)++;
}
//Assing tamBl to each node
while(asigCores+tamBl <= total_procs) {
asigCores += tamBl;
procs[i] += tamBl;
......@@ -502,7 +510,8 @@ void node_dist(int type, int total_procs, int already_created, int **qty, int *u
(*used_nodes)++;
}
if(asigCores < total_procs) {
//Last node could have less procs than tamBl
if(asigCores < total_procs) {
procs[i] += total_procs - asigCores;
(*used_nodes)++;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment