Commit 2639ab13 authored by Iker Martín Álvarez's avatar Iker Martín Álvarez
Browse files

Merge branch 'dev' into 'master'

New version of Proteo

See merge request martini/malleability_benchmark!6
parents 26305fac e83b5922
......@@ -3,45 +3,31 @@
#include <mpi.h>
#include "results.h"
#define RESULTS_EXTRA_SIZE 100
void def_results_type(results_data *results, int resizes, MPI_Datatype *results_type);
void compute_max(results_data *results, double *computed_array, int myId, int root, MPI_Comm comm);
void compute_mean(results_data *results, double *computed_array, int myId, int numP, int root, MPI_Comm comm);
void compute_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm);
void match_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm);
//======================================================||
//======================================================||
//================MPI RESULTS FUNCTIONS=================||
//======================================================||
//======================================================||
//TODO Generalizar ambas funciones en una sola
/*
* Envia una estructura de resultados al grupo de procesos al que se
* enlaza este grupo a traves del intercomunicador pasado como argumento.
*
* Esta funcion tiene que ser llamada por todos los procesos del mismo grupo
* e indicar cual es el proceso raiz que se encargara de enviar los
* resultados al otro grupo.
*/
void send_results(results_data *results, int root, int resizes, MPI_Comm intercomm) {
MPI_Datatype results_type;
// Obtener un tipo derivado para enviar todos los
// datos escalares con una sola comunicacion
def_results_type(results, resizes, &results_type);
MPI_Bcast(results, 1, results_type, root, intercomm);
//Liberar tipos derivados
MPI_Type_free(&results_type);
}
/*
* Recibe una estructura de resultados desde otro grupo de procesos
* y la devuelve. La memoria de la estructura se tiene que reservar con antelacion
* con la función "init_results_data".
* Comunica una estructura de resultados a todos los procesos del comunicador
* a traves de un tipo derivado.
*
* Esta funcion tiene que ser llamada por todos los procesos del mismo grupo
* e indicar cual es el proceso raiz del otro grupo que se encarga de enviar
* los resultados a este grupo.
* Si se llama con un intercommunicador, el grupo de procesos que envia los datos
* tiene que indicar en el proceso raiz el valor "MPI_ROOT" para "root" y el resto
* de ese grupo el valor "MPI_PROC_NULL". Los procesos del otro grupo tienen que
* indicar el Id del proceso raiz que ha puesto "MPI_ROOT".
*/
void recv_results(results_data *results, int root, int resizes, MPI_Comm intercomm) {
void results_comm(results_data *results, int root, size_t resizes, MPI_Comm intercomm) {
MPI_Datatype results_type;
// Obtener un tipo derivado para enviar todos los
......@@ -60,53 +46,38 @@ void recv_results(results_data *results, int root, int resizes, MPI_Comm interco
* En concreto son tres escalares y dos vectores de tamaño "resizes"
*/
void def_results_type(results_data *results, int resizes, MPI_Datatype *results_type) {
int i, counts = 5;
int blocklengths[] = {1, 1, 1, 1, 1};
int i, counts = 7;
int blocklengths[] = {1, 1, 1, 1, 1, 1, 1, 1};
MPI_Aint displs[counts], dir;
MPI_Datatype types[counts];
// Rellenar vector types
types[0] = types[1] = types[2] = types[3] = types[4] = MPI_DOUBLE;
blocklengths[3] = blocklengths[4] = resizes;
types[0] = types[1] = types[2] = types[3] = types[4] = types[5] = types[6] = MPI_DOUBLE;
blocklengths[2] = blocklengths[3] = blocklengths[4] = blocklengths[5] = blocklengths[6] = resizes;
// Rellenar vector displs
MPI_Get_address(results, &dir);
MPI_Get_address(&(results->sync_start), &displs[0]);
MPI_Get_address(&(results->async_start), &displs[1]);
MPI_Get_address(&(results->exec_start), &displs[2]);
MPI_Get_address(&(results->spawn_real_time[0]), &displs[3]);
MPI_Get_address(&(results->spawn_time[0]), &displs[4]); //TODO Revisar si se puede simplificar //FIXME Si hay mas de un spawn error?
MPI_Get_address(&(results->exec_start), &displs[0]);
MPI_Get_address(&(results->wasted_time), &displs[1]);
MPI_Get_address(results->sync_time, &displs[2]);
MPI_Get_address(results->async_time, &displs[3]);
MPI_Get_address(results->user_time, &displs[4]);
MPI_Get_address(results->spawn_time, &displs[5]);
MPI_Get_address(results->malleability_time, &displs[6]);
for(i=0;i<counts;i++) displs[i] -= dir;
MPI_Type_create_struct(counts, blocklengths, displs, types, results_type);
MPI_Type_commit(results_type);
}
//======================================================||
//======================================================||
//================SET RESULTS FUNCTIONS=================||
//======================================================||
//======================================================||
/*
* Guarda los resultados respecto a la redistribución de datos
* tras una reconfiguración. A llamar por los hijos tras
* terminar la redistribución y obtener la configuración.
*/
void set_results_post_reconfig(results_data *results, int grp, int sdr, int adr) {
if(sdr) { // Si no hay datos sincronos, el tiempo es 0
results->sync_time[grp] = results->sync_end - results->sync_start;
} else {
results->sync_time[grp] = 0;
}
if(adr) { // Si no hay datos asincronos, el tiempo es 0
results->async_time[grp] = results->async_end - results->async_start;
} else {
results->async_time[grp] = 0;
}
}
/*
* Pone el indice del siguiente elemento a escribir a 0 para los vectores
* que tengan que ver con las iteraciones.
......@@ -118,9 +89,9 @@ void set_results_post_reconfig(results_data *results, int grp, int sdr, int adr)
*/
void reset_results_index(results_data *results) {
results->iter_index = 0;
results->iters_async = 0;
}
/*
* Obtiene para cada iteracion, el tiempo maximo entre todos los procesos
* que han participado.
......@@ -128,11 +99,116 @@ void reset_results_index(results_data *results) {
* Es necesario obtener el maximo, pues es el que representa el tiempo real
* que se ha utilizado.
*/
void compute_results_iter(results_data *results, int myId, int root, MPI_Comm comm) {
if(myId == root)
MPI_Reduce(MPI_IN_PLACE, results->iters_time, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
else
MPI_Reduce(results->iters_time, NULL, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
void compute_results_iter(results_data *results, int myId, int numP, int root, size_t stages, int capture_method, MPI_Comm comm) {
size_t i, *used_ids;
switch(capture_method) {
case RESULTS_MAX:
compute_max(results, results->iters_time, myId, root, comm);
for(i=0; i<stages; i++) {
compute_max(results, results->stage_times[i], myId, root, comm);
}
break;
case RESULTS_MEAN:
compute_mean(results, results->iters_time, myId, numP, root, comm);
for(i=0; i<stages; i++) {
compute_mean(results, results->stage_times[i], myId, numP, root, comm);
}
break;
case RESULTS_MEDIAN:
used_ids = malloc(results->iter_index * sizeof(size_t));
compute_median(results, results->iters_time, used_ids, myId, numP, root, comm);
for(i=0; i<stages; i++) {
//compute_median(results, results->stage_times[i], myId, numP, root, comm);
match_median(results, results->stage_times[i], used_ids, myId, numP, root, comm);
}
free(used_ids);
break;
}
}
void compute_max(results_data *results, double *computed_array, int myId, int root, MPI_Comm comm) {
if(myId == root) {
MPI_Reduce(MPI_IN_PLACE, computed_array, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
} else {
MPI_Reduce(computed_array, NULL, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
}
}
void compute_mean(results_data *results, double *computed_array, int myId, int numP, int root, MPI_Comm comm) {
if(myId == root) {
MPI_Reduce(MPI_IN_PLACE, computed_array, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
for(size_t i=0; i<results->iter_index; i++) {
computed_array[i] = results->iters_time[i] / numP;
}
} else {
MPI_Reduce(computed_array, NULL, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
}
}
struct TimeWithIndex {
double time;
size_t index;
};
int compare(const void *a, const void *b) {
return ((struct TimeWithIndex *)a)->time - ((struct TimeWithIndex *)b)->time;
}
/*
* Calcula la mediana de un vector de tiempos replicado entre "numP" procesos.
* Se calcula la mediana para cada elemento del vector final y se devuelve este.
*
* Además se devuelve en el vector "used_ids" de que proceso se ha obtenido la mediana de cada elemento.
*/
void compute_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm) {
double *aux_all_iters, median;
struct TimeWithIndex *aux_id_iters;
if(myId == root) {
aux_all_iters = malloc(numP *results->iter_index * sizeof(double));
aux_id_iters = malloc(numP * sizeof(struct TimeWithIndex));
}
MPI_Gather(computed_array, results->iter_index, MPI_DOUBLE, aux_all_iters, results->iter_index, MPI_DOUBLE, root, comm);
if(myId == root) {
for(size_t i=0; i<results->iter_index; i++) {
for(int j=0; j<numP; j++) {
aux_id_iters[j].time = aux_all_iters[i+(results->iter_index*j)];
aux_id_iters[j].index = (size_t) j;
}
// Get Median
qsort(aux_id_iters, numP, sizeof(struct TimeWithIndex), &compare);
median = aux_id_iters[numP/2].time;
if (numP % 2 == 0) median = (aux_id_iters[numP/2 - 1].time + aux_id_iters[numP/2].time) / 2;
computed_array[i] = median;
used_ids[i] = aux_id_iters[numP/2].index; //FIXME What should be the index when numP is even?
}
free(aux_all_iters);
free(aux_id_iters);
}
}
/*
* Obtiene las medianas de un vector de tiempos replicado entre "numP" procesos.
* La mediana de cada elemento se obtiene consultando el vector "used_ids", que contiene
* que proceso tiene la mediana.
*
* Como resultado devuelve un vector con la mediana calculada.
*/
void match_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm) {
double *aux_all_iters;
size_t matched_id;
if(myId == root) {
aux_all_iters = malloc(numP * results->iter_index * sizeof(double));
}
MPI_Gather(computed_array, results->iter_index, MPI_DOUBLE, aux_all_iters, results->iter_index, MPI_DOUBLE, root, comm);
if(myId == root) {
for(size_t i=0; i<results->iter_index; i++) {
matched_id = used_ids[i];
computed_array[i] = aux_all_iters[i+(results->iter_index*matched_id)];
}
free(aux_all_iters);
}
}
//======================================================||
......@@ -144,28 +220,32 @@ void compute_results_iter(results_data *results, int myId, int root, MPI_Comm co
/*
* Imprime por pantalla los resultados locales.
* Estos son los relacionados con las iteraciones, que son el tiempo
* por iteracion, el tipo (Normal o durante communicacion asincrona)
* y cuantas operaciones internas se han realizado en cada iteracion.
* por iteracion, el tipo (Normal o durante communicacion asincrona).
*/
void print_iter_results(results_data results, int last_normal_iter_index) {
int i, aux;
void print_iter_results(results_data results) {
size_t i;
printf("Titer: ");
printf("Async_Iters: %ld\n", results.iters_async);
printf("T_iter: ");
for(i=0; i< results.iter_index; i++) {
printf("%lf ", results.iters_time[i]);
}
printf("\n");
}
printf("\nTtype: "); //FIXME modificar a imprimir solo la cantidad de asincronas
for(i=0; i< results.iter_index; i++) {
printf("%d ", results.iters_type[i] == 0);
}
/*
* Imprime por pantalla los resultados locales de un stage.
*/
void print_stage_results(results_data results, size_t n_stages) {
size_t i, j;
printf("\nTop: "); //TODO modificar a imprimir solo cuantas operaciones cuestan una iteracion?
for(i=0; i< results.iter_index; i++) {
aux = results.iters_type[i] == 0 ? results.iters_type[last_normal_iter_index] : results.iters_type[i];
printf("%d ", aux);
for(i=0; i < n_stages; i++) {
printf("T_stage %ld: ", i);
for(j=0; j < results.iter_index; j++) {
printf("%lf ", results.stage_times[i][j]);
}
printf("\n");
}
}
/*
......@@ -173,30 +253,35 @@ void print_iter_results(results_data results, int last_normal_iter_index) {
* Estos son el tiempo de creacion de procesos, los de comunicacion
* asincrona y sincrona y el tiempo total de ejecucion.
*/
void print_global_results(results_data results, int resizes) {
int i;
void print_global_results(results_data results, size_t resizes) {
size_t i;
printf("Tspawn: "); // FIXME REFACTOR Cambiar nombre a T_resize_real
for(i=0; i< resizes - 1; i++) {
printf("T_spawn: ");
for(i=0; i < resizes; i++) {
printf("%lf ", results.spawn_time[i]);
}
printf("\nTspawn_real: "); // FIXME REFACTOR Cambiar nombre a T_resize
for(i=0; i< resizes - 1; i++) {
printf("%lf ", results.spawn_real_time[i]);
}
printf("\nTsync: ");
for(i=1; i < resizes; i++) {
printf("\nT_SR: ");
for(i=0; i < resizes; i++) {
printf("%lf ", results.sync_time[i]);
}
printf("\nTasync: ");
for(i=1; i < resizes; i++) {
printf("\nT_AR: ");
for(i=0; i < resizes; i++) {
printf("%lf ", results.async_time[i]);
}
printf("\nTex: %lf\n", results.exec_time);
printf("\nT_US: ");
for(i=0; i < resizes; i++) {
printf("%lf ", results.user_time[i]);
}
printf("\nT_Malleability: ");
for(i=0; i < resizes; i++) {
printf("%lf ", results.malleability_time[i]);
}
printf("\nT_total: %lf\n", results.exec_time);
}
//======================================================||
......@@ -211,50 +296,92 @@ void print_global_results(results_data results, int resizes) {
* Los argumentos "resizes" y "iters_size" se necesitan para obtener el tamaño
* de los vectores de resultados.
*/
void init_results_data(results_data *results, int resizes, int iters_size) {
//*results = malloc(1 * sizeof(results_data)); FIXME Borrar
void init_results_data(results_data *results, size_t resizes, size_t stages, size_t iters_size) {
size_t i;
results->spawn_time = calloc(resizes, sizeof(double));
results->spawn_real_time = calloc(resizes, sizeof(double));
results->sync_time = calloc(resizes, sizeof(double));
results->async_time = calloc(resizes, sizeof(double));
results->user_time = calloc(resizes, sizeof(double));
results->malleability_time = calloc(resizes, sizeof(double));
results->wasted_time = 0;
results->iters_size = iters_size + RESULTS_EXTRA_SIZE;
results->iters_time = calloc(results->iters_size, sizeof(double));
results->stage_times = malloc(stages * sizeof(double*));
for(i=0; i<stages; i++) {
results->stage_times[i] = calloc(results->iters_size, sizeof(double));
}
results->iters_size = iters_size + 100;
results->iters_time = calloc(iters_size + 100, sizeof(double)); //FIXME Numero magico
results->iters_type = calloc(iters_size + 100, sizeof(int));
results->iters_async = 0;
results->iter_index = 0;
}
void realloc_results_iters(results_data *results, int needed) {
void realloc_results_iters(results_data *results, size_t stages, size_t needed) {
int error = 0;
double *time_aux;
int *type_aux;
size_t i;
if(results->iters_size >= needed) return;
time_aux = (double *) realloc(results->iters_time, needed * sizeof(double));
type_aux = (int *) realloc(results->iters_type, needed * sizeof(int));
if(time_aux == NULL) error = 1;
if(time_aux == NULL || type_aux == NULL) {
for(i=0; i<stages; i++) { //TODO Comprobar que no da error el realloc
results->stage_times[i] = (double *) realloc(results->stage_times[i], needed * sizeof(double));
if(results->stage_times[i] == NULL) error = 1;
}
if(error) {
fprintf(stderr, "Fatal error - No se ha podido realojar la memoria de resultados\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
results->iters_time = time_aux;
results->iters_type = type_aux;
results->iters_size = needed;
}
/*
* Libera toda la memoria asociada con una estructura de resultados.
* TODO Asegurar que ha sido inicializado?
*/
void free_results_data(results_data *results) {
void free_results_data(results_data *results, size_t stages) {
size_t i;
if(results != NULL) {
if(results->spawn_time != NULL) {
free(results->spawn_time);
free(results->spawn_real_time);
results->spawn_time = NULL;
}
if(results->sync_time != NULL) {
free(results->sync_time);
results->sync_time = NULL;
}
if(results->async_time != NULL) {
free(results->async_time);
results->async_time = NULL;
}
if(results->user_time != NULL) {
free(results->user_time);
results->user_time = NULL;
}
if(results->malleability_time != NULL) {
free(results->malleability_time);
results->malleability_time = NULL;
}
if(results->iters_time != NULL) {
free(results->iters_time);
free(results->iters_type);
results->iters_time = NULL;
}
for(i=0; i<stages; i++) {
if(results->stage_times[i] != NULL) {
free(results->stage_times[i]);
results->stage_times[i] = NULL;
}
}
if(results->stage_times != NULL) {
free(results->stage_times);
results->stage_times = NULL;
}
}
//free(*results); FIXME Borrar
}
#ifndef RESULTS_H
#define RESULTS_H
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define RESULTS_INIT_DATA_QTY 100
enum capture_methods{RESULTS_MAX, RESULTS_MEAN, RESULTS_MEDIAN};
typedef struct {
// Iters data
double *iters_time, **stage_times;
size_t iters_async, iter_index, iters_size;
// Spawn, Thread, Sync, Async and Exec time
double spawn_start, *spawn_time;
double sync_end, *sync_time;
double async_end, *async_time;
double user_end, *user_time;
double malleability_end, *malleability_time;
double exec_start, exec_time;
double wasted_time; // Time spent recalculating iter stages
} results_data;
void results_comm(results_data *results, int root, size_t resizes, MPI_Comm intercomm);
void reset_results_index(results_data *results);
void compute_results_iter(results_data *results, int myId, int numP, int root, size_t n_stages, int capture_method, MPI_Comm comm);
void print_iter_results(results_data results);
void print_stage_results(results_data results, size_t n_stages);
void print_global_results(results_data results, size_t resizes);
void init_results_data(results_data *results, size_t resizes, size_t stages, size_t iters_size);
void realloc_results_iters(results_data *results, size_t stages, size_t needed);
void free_results_data(results_data *results, size_t stages);
#endif
......@@ -4,180 +4,132 @@
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include "computing_func.h"
#include "../malleability/CommDist.h"
#include "../malleability/malleabilityManager.h"
#include "../malleability/malleabilityStates.h"
#include "process_stage.h"
#include "Main_datatypes.h"
#include "configuration.h"
#include "../IOcodes/results.h"
#include "../MaM/distribution_methods/Distributed_CommDist.h"
#include "../MaM/MAM.h"
#define ROOT 0
#define DR_MAX_SIZE 1000000000
int work();
void iterate(double *matrix, int n, int async_comm, int iter);
double iterate(int async_comm);
double iterate_relaxed(double *time, double *times_stages);
double iterate_rigid(double *time, double *times_stages);
void init_group_struct(char *argv[], int argc, int myId, int numP);
void init_application();
void obtain_op_times();
void free_application_data();
void free_zombie_process();
void print_general_info(int myId, int grp, int numP);
int print_local_results();
int print_final_results();
int create_out_file(char *nombre, int *ptr, int newstdout);
typedef struct {
int myId;
int numP;
int grp;
int iter_start;
int argc;
int numS; // Cantidad de procesos hijos
MPI_Comm children, parents;
char *compute_comm_array;
char **argv;
char *sync_array, *async_array;
} group_data;
void init_originals();
void init_targets();
void update_targets();
void user_redistribution(void *args);
configuration *config_file;
group_data *group;
results_data *results;
MPI_Comm comm;
MPI_Comm comm, new_comm;
int run_id = 0; // Utilizado para diferenciar más fácilmente ejecuciones en el análisis
int main(int argc, char *argv[]) {
int numP, myId, res;
int req;
int im_child;
//FIXME El codigo no es capaz de hacer mas de una redistribucion - Arreglar malleabilityTypes.c
int num_cpus, num_nodes; //nodelist_len; //FIXME Eliminar cuando se utilice Slurm
char *nodelist = NULL;
num_cpus = 20; //FIXME NUMERO MAGICO
if (argc >= 5) {
nodelist = argv[3];
//nodelist_len = strlen(nodelist);
num_nodes = atoi(argv[4]);
num_cpus = num_nodes * num_cpus;
}
size_t i;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &req);
MPI_Comm_size(MPI_COMM_WORLD, &numP);
MPI_Comm_rank(MPI_COMM_WORLD, &myId);
MPI_Comm_size(MPI_COMM_WORLD, &numP);
comm = MPI_COMM_WORLD;
new_comm = MPI_COMM_NULL;
if(req != MPI_THREAD_MULTIPLE) {
printf("No se ha obtenido la configuración de hilos necesaria\nSolicitada %d -- Devuelta %d\n", req, MPI_THREAD_MULTIPLE);
fflush(stdout);
MPI_Abort(MPI_COMM_WORLD, -50);
}
init_group_struct(argv, argc, myId, numP);
//FIXME No funciona en OpenMPI
im_child = init_malleability(myId, numP, ROOT, comm, argv[0], nodelist, num_cpus, num_nodes);
im_child = MAM_Init(ROOT, &comm, argv[0], user_redistribution, NULL);
if(!im_child) { //TODO REFACTOR Simplificar inicio
init_application();
//MAM_Use_valgrind(1);
set_benchmark_grp(group->grp);
set_benchmark_configuration(config_file);
set_benchmark_results(results);
if(im_child) {
update_targets();
} else {
init_application();
init_originals();
MPI_Barrier(comm);
results->exec_start = MPI_Wtime();
} else { //Init hijos
get_malleability_user_comm(&comm);
get_benchmark_configuration(&config_file);
get_benchmark_results(&results);
set_results_post_reconfig(results, group->grp, config_file->sdr, config_file->adr); //TODO Cambio al añadir nueva redistribucion
if(config_file->comm_tam) {
group->compute_comm_array = malloc(config_file->comm_tam * sizeof(char));
}
// TODO Refactor - Que sea una unica funcion
// Obtiene las variables que van a utilizar los hijos
void *value = NULL;
malleability_get_data(&value, 0, 1, 1);
group->grp = *((int *)value);
free(value);
malleability_get_data(&value, 1, 1, 1);
run_id = *((int *)value);
free(value);
malleability_get_data(&value, 2, 1, 1);
group->iter_start = *((int *)value);
free(value);
//FIXME Eliminar cuando se utilice SLURM
/*
malleability_get_data(&value, 4, 1, 1);
num_nodes = *((int *)value);
free(value);
malleability_get_data(&value, 5, 1, 1);
nodelist = (char *)value;
//free(value);
nodelist_len = strlen(nodelist);
*/
group->grp = group->grp + 1;
}
//
// EMPIEZA LA EJECUCION-------------------------------
//
group->grp = group->grp - 1; // TODO REFACTOR???
do {
group->grp = group->grp + 1;
set_benchmark_grp(group->grp);
get_malleability_user_comm(&comm);
MPI_Comm_size(comm, &(group->numP));
MPI_Comm_rank(comm, &(group->myId));
if(config_file->resizes != group->grp + 1) {
set_malleability_configuration(config_file->cst, config_file->css, config_file->phy_dist[group->grp+1], -1, config_file->aib, -1);
set_children_number(config_file->procs[group->grp+1]); // TODO TO BE DEPRECATED
if(group->grp != 0) {
obtain_op_times(0); //Obtener los nuevos valores de tiempo para el computo
MAM_Retrieve_times(&results->spawn_time[group->grp - 1], &results->sync_time[group->grp - 1], &results->async_time[group->grp - 1], &results->user_time[group->grp - 1], &results->malleability_time[group->grp - 1]);
}
if(group->grp == 0) {
malleability_add_data(&(group->grp), 1, MAL_INT, 1, 1);
malleability_add_data(&run_id, 1, MAL_INT, 1, 1);
malleability_add_data(&(group->iter_start), 1, MAL_INT, 1, 1);
if(config_file->n_groups != group->grp + 1) { //TODO Llevar a otra funcion
MAM_Set_configuration(config_file->groups[group->grp+1].sm, MAM_STRAT_SPAWN_CLEAR,
config_file->groups[group->grp+1].phy_dist, config_file->groups[group->grp+1].rm, MAM_STRAT_RED_CLEAR);
for(i=0; i<config_file->groups[group->grp+1].ss_len; i++) {
MAM_Set_key_configuration(MAM_SPAWN_STRATEGIES, config_file->groups[group->grp+1].ss[i], &req);
}
for(i=0; i<config_file->groups[group->grp+1].rs_len; i++) {
MAM_Set_key_configuration(MAM_RED_STRATEGIES, config_file->groups[group->grp+1].rs[i], &req);
}
MAM_Set_target_number(config_file->groups[group->grp+1].procs); // TODO TO BE DEPRECATED
//FIXME Eliminar cuando se utilice SLURM
//malleability_add_data(&num_nodes, 1, MAL_INT, 1, 1);
//malleability_add_data(&nodelist, nodelist_len, MAL_CHAR, 1, 1);
if(group->grp != 0) {
MAM_Data_modify(&(group->grp), 0, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
MAM_Data_modify(&(group->iter_start), 0, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
}
}
res = work();
if(res == MAL_ZOMBIE) break;
if(res==1) { // Se ha llegado al final de la aplicacion
MPI_Barrier(comm);
results->exec_time = MPI_Wtime() - results->exec_start - results->wasted_time;
print_local_results();
}
reset_results_index(results);
} while((config_file->resizes > group->grp + 1) && (config_file->cst == COMM_SPAWN_MERGE || config_file->cst == COMM_SPAWN_MERGE_PTHREAD));
group->grp = group->grp + 1;
} while(config_file->n_groups > group->grp);
//
// TERMINA LA EJECUCION ----------------------------------------------------------
//
if(res==1) { // Se he llegado al final de la aplicacion
MPI_Barrier(comm); // TODO Posible error al utilizar SHRINK
results->exec_time = MPI_Wtime() - results->exec_start;
}
print_final_results(); // Pasado este punto ya no pueden escribir los procesos
MPI_Barrier(comm);
if(comm != MPI_COMM_WORLD && comm != MPI_COMM_NULL) {
MPI_Comm_free(&comm);
}
if(group->myId == ROOT && (config_file->cst == COMM_SPAWN_MERGE || config_file->cst == COMM_SPAWN_MERGE_PTHREAD)) {
MPI_Abort(MPI_COMM_WORLD, -100);
}
free_application_data();
MPI_Finalize();
MPI_Finalize();
return 0;
}
......@@ -198,32 +150,31 @@ int main(int argc, char *argv[]) {
*/
int work() {
int iter, maxiter, state, res;
double *matrix = NULL;
maxiter = config_file->iters[group->grp];
//initMatrix(&matrix, config_file->matrix_tam);
state = MAL_NOT_STARTED;
int wait_completed = MAM_CHECK_COMPLETION;
maxiter = config_file->groups[group->grp].iters;
state = MAM_NOT_STARTED;
res = 0;
for(iter=group->iter_start; iter < maxiter; iter++) {
iterate(matrix, config_file->matrix_tam, state, iter);
iterate(state);
}
if(config_file->resizes != group->grp + 1)
state = malleability_checkpoint();
if(config_file->n_groups != group->grp + 1)
MAM_Checkpoint(&state, wait_completed, user_redistribution, NULL);
iter = 0;
while(state == MAL_DIST_PENDING || state == MAL_SPAWN_PENDING || state == MAL_SPAWN_SINGLE_PENDING) {
if(iter < config_file->iters[group->grp+1]) {
iterate(matrix, config_file->matrix_tam, state, iter);
while(state == MAM_PENDING || state == MAM_USER_PENDING) {
if(group->grp+1 < config_file->n_groups && iter < config_file->groups[group->grp+1].iters) {
iterate(state);
iter++;
group->iter_start = iter;
}
state = malleability_checkpoint();
} else { wait_completed = MAM_WAIT_COMPLETION; }
MAM_Checkpoint(&state, wait_completed, user_redistribution, NULL);
}
if(config_file->resizes - 1 == group->grp) res=1;
if(state == MAL_ZOMBIE) res=state;
//if(state == MAM_COMPLETED) {}
if(config_file->n_groups == group->grp + 1) { res=1; }
return res;
}
......@@ -237,50 +188,87 @@ int work() {
/*
* Simula la ejecucción de una iteración de computo en la aplicación
* que dura al menos un tiempo de "time" segundos.
* que dura al menos un tiempo determinado por la suma de todas las
* etapas definidas en la configuracion.
*/
void iterate(double *matrix, int n, int async_comm, int iter) {
double start_time, actual_time;
double time = config_file->general_time * config_file->factors[group->grp];
double Top = config_file->Top;
int i, operations = 0;
double iterate(int async_comm) {
double time, *times_stages_aux;
size_t i;
double aux = 0;
start_time = MPI_Wtime();
times_stages_aux = malloc(config_file->n_stages * sizeof(double));
operations = time / Top; //FIXME Calcular una sola vez
if(config_file->rigid_times) {
aux = iterate_rigid(&time, times_stages_aux);
} else {
aux = iterate_relaxed(&time, times_stages_aux);
}
for(i=0; i < operations; i++) {
aux += computePiSerial(n);
// Se esta realizando una redistribucion de datos asincrona
if(async_comm == MAM_PENDING || async_comm == MAM_USER_PENDING) {
// TODO Que diferencie entre tipo de partes asincronas?
results->iters_async += 1;
}
/*
if(time >= 1) {
sleep(time);
// TODO Pasar el resto de este código a results.c
if(results->iter_index == results->iters_size) { // Aumentar tamaño de ambos vectores de resultados
realloc_results_iters(results, config_file->n_stages, results->iters_size + 100);
}
else {
unsigned int sleep_time = time * 1000000;
usleep(sleep_time);
results->iters_time[results->iter_index] = time;
for(i=0; i < config_file->n_stages; i++) {
results->stage_times[i][results->iter_index] = times_stages_aux[i];
}
*/
results->iter_index = results->iter_index + 1;
// TODO Pasar hasta aqui
free(times_stages_aux);
if(config_file->comm_tam) {
MPI_Bcast(group->compute_comm_array, config_file->comm_tam, MPI_CHAR, ROOT, comm);
}
return aux;
}
/*
* Performs an iteration. The gathered times for iterations
* and stages could be IMPRECISE in order to ensure the
* global execution time is precise.
*/
double iterate_relaxed(double *time, double *times_stages) {
size_t i;
double start_time, start_time_stage, aux=0;
start_time = MPI_Wtime(); // Imprecise timings
actual_time = MPI_Wtime(); // Guardar tiempos
// TODO Que diferencie entre ambas en el IO
if(async_comm == MAL_DIST_PENDING || async_comm == MAL_SPAWN_PENDING || async_comm == MAL_SPAWN_SINGLE_PENDING) { // Se esta realizando una redistribucion de datos asincrona
operations=0;
for(i=0; i < config_file->n_stages; i++) {
start_time_stage = MPI_Wtime();
aux+= process_stage(*config_file, config_file->stages[i], *group, comm);
times_stages[i] = MPI_Wtime() - start_time_stage;
}
if(results->iter_index == results->iters_size) { // Aumentar tamaño de ambos vectores de resultados
realloc_results_iters(results, results->iters_size + 100);
*time = MPI_Wtime() - start_time; // Guardar tiempos
return aux;
}
/*
* Performs an iteration. The gathered times for iterations
* and stages are ensured to be precise but the global
* execution time could be imprecise.
*/
double iterate_rigid(double *time, double *times_stages) {
size_t i;
double start_time, start_time_stage, aux=0;
MPI_Barrier(comm);
start_time = MPI_Wtime();
for(i=0; i < config_file->n_stages; i++) {
start_time_stage = MPI_Wtime();
aux+= process_stage(*config_file, config_file->stages[i], *group, comm);
MPI_Barrier(comm);
times_stages[i] = MPI_Wtime() - start_time_stage;
}
results->iters_time[results->iter_index] = actual_time - start_time;
results->iters_type[results->iter_index] = operations;
results->iter_index = results->iter_index + 1;
MPI_Barrier(comm);
*time = MPI_Wtime() - start_time; // Guardar tiempos
return aux;
}
//======================================================||
......@@ -299,7 +287,8 @@ void print_general_info(int myId, int grp, int numP) {
char *version = malloc(MPI_MAX_LIBRARY_VERSION_STRING * sizeof(char));
MPI_Get_processor_name(name, &len);
MPI_Get_library_version(version, &len);
printf("P%d Nuevo GRUPO %d de %d procs en nodo %s con %s\n", myId, grp, numP, name, version);
//printf("P%d Nuevo GRUPO %d de %d procs en nodo %s con %s\n", myId, grp, numP, name, version);
printf("P%d Nuevo GRUPO %d de %d procs en nodo %s -- PID=%d\n", myId, grp, numP, name, getpid());
free(name);
free(version);
......@@ -313,7 +302,8 @@ int print_local_results() {
int ptr_local, ptr_out, err;
char *file_name;
compute_results_iter(results, group->myId, ROOT, comm);
// This function causes an overhead in the recorded time for last group
compute_results_iter(results, group->myId, group->numP, ROOT, config_file->n_stages, config_file->capture_method, comm);
if(group->myId == ROOT) {
ptr_out = dup(1);
......@@ -325,12 +315,14 @@ int print_local_results() {
create_out_file(file_name, &ptr_local, 1);
print_config_group(config_file, group->grp);
print_iter_results(*results, config_file->iters[group->grp] - 1);
print_iter_results(*results);
print_stage_results(*results, config_file->n_stages);
free(file_name);
fflush(stdout);
close(1);
dup(ptr_out);
close(ptr_out);
}
return 0;
}
......@@ -340,24 +332,27 @@ int print_local_results() {
* y las comunicaciones.
*/
int print_final_results() {
int ptr_global, err;
int ptr_global, err, ptr_out;
char *file_name;
if(group->myId == ROOT) {
if(group->grp == config_file->resizes -1) {
if(config_file->n_groups == group->grp) {
file_name = NULL;
file_name = malloc(20 * sizeof(char));
if(file_name == NULL) return -1; // No ha sido posible alojar la memoria
err = snprintf(file_name, 20, "R%d_Global.out", run_id);
if(err < 0) return -2; // No ha sido posible obtener el nombre de fichero
ptr_out = dup(1);
create_out_file(file_name, &ptr_global, 1);
print_config(config_file, group->grp);
print_global_results(*results, config_file->resizes);
print_config(config_file);
print_global_results(*results, config_file->n_resizes);
fflush(stdout);
free(file_name);
close(1);
dup(ptr_out);
}
}
return 0;
......@@ -367,7 +362,7 @@ int print_final_results() {
* Inicializa la estructura group
*/
void init_group_struct(char *argv[], int argc, int myId, int numP) {
group = malloc(1 * sizeof(group_data));
group = malloc(sizeof(group_data)); // Valgrind not freed
group->myId = myId;
group->numP = numP;
group->grp = 0;
......@@ -386,6 +381,8 @@ void init_group_struct(char *argv[], int argc, int myId, int numP) {
* se comunican con los padres para inicializar sus datos.
*/
void init_application() {
int i, last_index;
if(group->argc < 2) {
printf("Falta el fichero de configuracion. Uso:\n./programa config.ini id\nEl argumento numerico id es opcional\n");
MPI_Abort(MPI_COMM_WORLD, -1);
......@@ -394,61 +391,113 @@ void init_application() {
run_id = atoi(group->argv[2]);
}
config_file = read_ini_file(group->argv[1]);
init_config(group->argv[1], &config_file);
results = malloc(sizeof(results_data));
init_results_data(results, config_file->resizes, config_file->iters[group->grp]);
if(config_file->comm_tam) {
group->compute_comm_array = malloc(config_file->comm_tam * sizeof(char));
}
init_results_data(results, config_file->n_resizes, config_file->n_stages, config_file->groups[group->grp].iters);
if(config_file->sdr) {
malloc_comm_array(&(group->sync_array), config_file->sdr , group->myId, group->numP);
group->sync_data_groups = config_file->sdr % DR_MAX_SIZE ? config_file->sdr/DR_MAX_SIZE+1 : config_file->sdr/DR_MAX_SIZE;
group->sync_qty = (int *) malloc(group->sync_data_groups * sizeof(int)); // FIXME Valgrind not freed
group->sync_array = (char **) malloc(group->sync_data_groups * sizeof(char *)); // Valgrind not freed
last_index = group->sync_data_groups-1;
for(i=0; i<last_index; i++) {
group->sync_qty[i] = DR_MAX_SIZE;
malloc_comm_array(&(group->sync_array[i]), group->sync_qty[i], group->myId, group->numP);
}
group->sync_qty[last_index] = config_file->sdr % DR_MAX_SIZE ? config_file->sdr % DR_MAX_SIZE : DR_MAX_SIZE;
malloc_comm_array(&(group->sync_array[last_index]), group->sync_qty[last_index], group->myId, group->numP); // Valgrind not freed
}
if(config_file->adr) {
malloc_comm_array(&(group->async_array), config_file->adr , group->myId, group->numP);
group->async_data_groups = config_file->adr % DR_MAX_SIZE ? config_file->adr/DR_MAX_SIZE+1 : config_file->adr/DR_MAX_SIZE;
group->async_qty = (int *) malloc(group->async_data_groups * sizeof(int));
group->async_array = (char **) malloc(group->async_data_groups * sizeof(char *));
last_index = group->async_data_groups-1;
for(i=0; i<last_index; i++) {
group->async_qty[i] = DR_MAX_SIZE;
malloc_comm_array(&(group->async_array[i]), group->async_qty[i], group->myId, group->numP);
}
group->async_qty[last_index] = config_file->adr % DR_MAX_SIZE ? config_file->adr % DR_MAX_SIZE : DR_MAX_SIZE;
malloc_comm_array(&(group->async_array[last_index]), group->async_qty[last_index], group->myId, group->numP);
}
obtain_op_times();
obtain_op_times(1);
}
/*
* Obtiene cuanto tiempo es necesario para realizar una operacion de PI
*
* Si compute esta a 1 se considera que se esta inicializando el entorno
* y realizará trabajo extra.
*
* Si compute esta a 0 se considera un entorno inicializado y solo hay que
* realizar algunos cambios de reserva de memoria. Si es necesario recalcular
* algo se obtiene el total de tiempo utilizado en dichas tareas y se resta
* al tiempo total de ejecucion.
*/
void obtain_op_times() {
double result, start_time = MPI_Wtime();
int i, qty = 20000;
result = 0;
for(i=0; i<qty; i++) {
result += computePiSerial(config_file->matrix_tam);
}
//printf("Creado Top con valor %lf\n", result);
//fflush(stdout);
config_file->Top = (MPI_Wtime() - start_time) / qty; //Tiempo de una operacion
MPI_Bcast(&(config_file->Top), 1, MPI_DOUBLE, ROOT, comm);
void obtain_op_times(int compute) {
size_t i;
double time = 0;
for(i=0; i<config_file->n_stages; i++) {
time+=init_stage(config_file, i, *group, comm, compute);
}
if(!compute) {results->wasted_time += time;}
}
/*
* Libera toda la memoria asociada con la aplicacion
*/
void free_application_data() {
if(config_file->comm_tam) {
free(group->compute_comm_array);
int abort_needed;
size_t i;
if(config_file->sdr && group->sync_array != NULL) {
for(i=0; i<group->sync_data_groups; i++) {
free(group->sync_array[i]);
group->sync_array[i] = NULL;
}
if(config_file->sdr) {
free(group->sync_qty);
group->sync_qty = NULL;
free(group->sync_array);
group->sync_array = NULL;
}
if(config_file->adr) {
if(config_file->adr && group->async_array != NULL) {
for(i=0; i<group->async_data_groups; i++) {
free(group->async_array[i]);
group->async_array[i] = NULL;
}
free(group->async_qty);
group->async_qty = NULL;
free(group->async_array);
group->async_array = NULL;
}
abort_needed = MAM_Finalize();
free_zombie_process();
free(group);
if(abort_needed) { MPI_Abort(MPI_COMM_WORLD, -100); }
}
free_malleability();
free_config(config_file);
if(group->grp == 0) { //FIXME Revisar porque cuando es diferente a 0 no funciona
free_results_data(results);
/*
* Libera la memoria asociada a un proceso Zombie
*/
void free_zombie_process() {
free_results_data(results, config_file->n_stages);
free(results);
size_t i;
if(config_file->adr && group->async_array != NULL) {
for(i=0; i<group->async_data_groups; i++) {
free(group->async_array[i]);
group->async_array[i] = NULL;
}
free(group);
free(group->async_qty);
group->async_qty = NULL;
free(group->async_array);
group->async_array = NULL;
}
free_config(config_file);
}
......@@ -475,3 +524,106 @@ int create_out_file(char *nombre, int *ptr, int newstdout) {
return 0;
}
//======================================================||
//======================================================||
//================ INIT MALLEABILITY ===================||
//======================================================||
//======================================================||
//FIXME TENER EN CUENTA QUE ADR PUEDE SER 0
void init_originals() {
size_t i;
if(config_file->n_groups > 1) {
MAM_Data_add(&(group->grp), NULL, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
MAM_Data_add(&(group->iter_start), NULL, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
MAM_Data_add(&run_id, NULL, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
if(config_file->sdr) {
for(i=0; i<group->sync_data_groups; i++) {
MAM_Data_add(group->sync_array[i], NULL, group->sync_qty[i], MPI_CHAR, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE);
}
}
if(config_file->adr) {
for(i=0; i<group->async_data_groups; i++) {
MAM_Data_add(group->async_array[i], NULL, group->async_qty[i], MPI_CHAR, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
}
}
}
}
void init_targets() {
size_t total_qty;
void *value = NULL;
MPI_Datatype type;
MAM_Data_get_pointer(&value, 0, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
group->grp = *((int *)value);
group->grp = group->grp + 1;
recv_config_file(ROOT, new_comm, &config_file);
results = malloc(sizeof(results_data));
init_results_data(results, config_file->n_resizes, config_file->n_stages, config_file->groups[group->grp].iters);
results_comm(results, ROOT, config_file->n_resizes, new_comm);
}
void update_targets() { //FIXME Should not be needed after redist -- Declarar antes
size_t i, entries, total_qty;
void *value = NULL;
MPI_Datatype type;
MAM_Data_get_pointer(&value, 0, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
group->iter_start = *((int *)value);
MAM_Data_get_pointer(&value, 1, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
run_id = *((int *)value);
if(config_file->sdr) {
MAM_Data_get_entries(MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE, &entries);
group->sync_qty = (int *) malloc(entries * sizeof(int));
group->sync_array = (char **) malloc(entries * sizeof(char *));
for(i=0; i<entries; i++) {
MAM_Data_get_pointer(&value, i, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE);
group->sync_array[i] = (char *)value;
group->sync_qty[i] = DR_MAX_SIZE;
}
group->sync_qty[entries-1] = config_file->sdr % DR_MAX_SIZE ? config_file->sdr % DR_MAX_SIZE : DR_MAX_SIZE;
group->sync_data_groups = entries;
}
if(config_file->adr) {
MAM_Data_get_entries(MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT, &entries);
group->async_qty = (int *) malloc(entries * sizeof(int));
group->async_array = (char **) malloc(entries * sizeof(char *));
for(i=0; i<entries; i++) {
MAM_Data_get_pointer(&value, i, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
group->async_array[i] = (char *)value;
group->async_qty[i] = DR_MAX_SIZE;
}
group->async_qty[entries-1] = config_file->adr % DR_MAX_SIZE ? config_file->adr % DR_MAX_SIZE : DR_MAX_SIZE;
group->async_data_groups = entries;
}
}
void user_redistribution(void *args) {
int commited;
mam_user_reconf_t user_reconf;
MAM_Get_Reconf_Info(&user_reconf);
new_comm = user_reconf.comm;
if(user_reconf.rank_state == MAM_PROC_NEW_RANK) {
init_targets();
} else {
send_config_file(config_file, ROOT, new_comm);
results_comm(results, ROOT, config_file->n_resizes, new_comm);
print_local_results();
if(user_reconf.rank_state == MAM_PROC_ZOMBIE) {
free_zombie_process();
}
}
MAM_Resume_redistribution(&commited);
}
#ifndef MAIN_DATATYPES_H
#define MAIN_DATATYPES_H
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include "../MaM/distribution_methods/block_distribution.h"
#define ROOT 0
typedef struct {
int myId;
int numP;
unsigned int grp;
int iter_start;
int argc;
size_t sync_data_groups, async_data_groups;
MPI_Comm children, parents;
char *compute_comm_array, *compute_comm_recv;
char **argv;
char **sync_array, **async_array;
int *sync_qty, *async_qty;
} group_data;
typedef struct
{
int pt; // Procedure type to execute
int id; // Stage identifier
// Wether the stage completes after "operations" iterations (0)
// or after "t_stage" time has passed (1).
int t_capped;
double t_stage; // Time to complete the stage
double t_op;
int operations;
int bytes, real_bytes, my_bytes;
// Arrays to communicate data;
char* array, *full_array;
double* double_array;
int req_count;
MPI_Request *reqs;
// Arrays to indicate how many bytes are received from each rank
struct Counts counts;
} iter_stage_t;
typedef struct
{
int iters, procs;
int sm, phy_dist, rm;
int *ss, *rs;
size_t ss_len, rs_len;
float factor;
} group_config_t;
typedef struct
{
size_t n_groups, n_resizes, n_stages; // n_groups==n_resizes+1
size_t actual_group, actual_stage;
int rigid_times, capture_method;
int granularity;
size_t sdr, adr;
MPI_Datatype config_type, group_type, group_strats_type, iter_stage_type;
iter_stage_t *stages;
group_config_t *groups;
} configuration;
#endif
......@@ -7,16 +7,17 @@
/*
* Realiza una multiplicación de matrices de tamaño n
*/
double computeMatrix(double *matrix, int n) { //FIXME No da tiempos repetibles
double computeMatrix(double *matrix, int n) {
int row, col;
double aux;
aux=0;
for(row=0; row<n; row++) {
for(col=0; col<n; col++) {
aux += ( (int)(matrix[row*n + col] + exp(sqrt(row*col))) % n);
aux += (int)(matrix[row*n + col] * matrix[row*n + col]);
}
}
return aux;
}
......@@ -40,17 +41,31 @@ double computePiSerial(int n) {
/*
* Init matrix
*/
void initMatrix(double **matrix, int n) {
int i, j;
void initMatrix(double **matrix, size_t n) {
size_t i, j;
double *aux = NULL;
freeMatrix(matrix);
// Init matrix
if(matrix != NULL) {
*matrix = malloc(n * n * sizeof(double));
if(*matrix == NULL) { MPI_Abort(MPI_COMM_WORLD, -1);}
aux = (double *) malloc(n * n * sizeof(double));
if(aux == NULL) { perror("Computing matrix could not be allocated"); MPI_Abort(MPI_COMM_WORLD, -1);}
for(i=0; i < n; i++) {
for(j=0; j < n; j++) {
(*matrix)[i*n + j] = i+j;
aux[i*n + j] = (i+j) * 1.1;
}
}
*matrix = aux;
}
void freeMatrix(double **matrix) {
// Init matrix
if(*matrix != NULL) {
free(*matrix);
*matrix = NULL;
}
}
#ifndef COMPUTING_FUNC_H
#define COMPUTING_FUNC_H
double computeMatrix(double *matrix, int n);
double computePiSerial(int n);
void initMatrix(double **matrix, int n);
void initMatrix(double **matrix, size_t n);
void freeMatrix(double **matrix);
#endif
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include "comunication_func.h"
/*
* Realiza una comunicación punto a punto en orden
* El proceso X envia a X+1 y recibe de X-1
*/
void point_to_point(int myId, int numP, int root, MPI_Comm comm, char *array, int qty) {
int prev, next;
next = (myId+1) % numP;
prev = (myId == 0 ? numP-1 : myId-1);
if(myId == root) {
MPI_Send(array, qty, MPI_CHAR, next, 99, comm);
MPI_Recv(array, qty, MPI_CHAR, prev, 99, comm, MPI_STATUS_IGNORE);
} else {
MPI_Recv(array, qty, MPI_CHAR, prev, 99, comm, MPI_STATUS_IGNORE);
MPI_Send(array, qty, MPI_CHAR, next, 99, comm);
}
}
void point_to_point_inter(int myId, int numP, MPI_Comm comm, char *array, char *r_array, int qty) {
int target;
target = (myId + numP/2)%numP;
MPI_Sendrecv(array, qty, MPI_CHAR, target, 99, r_array, qty, MPI_CHAR, target, 99, comm, MPI_STATUS_IGNORE);
}
void point_to_point_asynch_inter(int myId, int numP, MPI_Comm comm, char *array, char *r_array, int qty, MPI_Request *reqs) {
int target;
target = (myId + numP/2)%numP;
if(myId < numP/2) {
MPI_Isend(array, qty, MPI_CHAR, target, 99, comm, &(reqs[0]));
MPI_Irecv(r_array, qty, MPI_CHAR, target, 99, comm, &(reqs[1]));
} else {
MPI_Irecv(r_array, qty, MPI_CHAR, target, 99, comm, &(reqs[0]));
MPI_Isend(array, qty, MPI_CHAR, target, 99, comm, &(reqs[1]));
}
}
#ifndef COMUNICATION_FUNC_H
#define COMUNICATION_FUNC_H
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
void point_to_point(int myId, int numP, int root, MPI_Comm comm, char *array, int qty);
void point_to_point_inter(int myId, int numP, MPI_Comm comm, char *array, char *r_array, int qty);
void point_to_point_asynch_inter(int myId, int numP, MPI_Comm comm, char *array, char *r_array, int qty, MPI_Request *reqs);
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "../IOcodes/read_ini.h"
#include "configuration.h"
#include "../MaM/distribution_methods/block_distribution.h"
void malloc_config_resizes(configuration *user_config);
void malloc_config_stages(configuration *user_config);
void free_config_stage(iter_stage_t *stage, int *freed_ids, size_t *found_ids);
void def_struct_config_file(configuration *config_file);
void def_struct_groups(configuration *config_file);
void def_struct_groups_strategies(configuration *config_file);
void def_struct_iter_stage(configuration *config_file);
/*
* Inicializa una estructura de configuracion
*
* Si el parametro "file_name" no es nulo,
* se obtiene la configuracion a partir de
* un fichero .ini
*
* En caso de que sea nulo, es el usuario
* el que tiene que elegir los valores a
* utilizar.
*/
void init_config(char *file_name, configuration **user_config) {
if(file_name != NULL) {
ext_functions_t mallocs;
mallocs.resizes_f = malloc_config_resizes;
mallocs.stages_f = malloc_config_stages;
*user_config = read_ini_file(file_name, mallocs);
} else {
configuration *config = NULL;
config = malloc(sizeof(configuration));
config->n_resizes=0;
config->n_groups=1;
malloc_config_resizes(config);
config->n_stages=1;
malloc_config_stages(config);
if(config == NULL) {
perror("Error when reserving configuration structure\n");
MPI_Abort(MPI_COMM_WORLD, -3);
return;
}
*user_config=config;
}
def_struct_config_file(*user_config);
def_struct_groups_strategies(*user_config);
}
/*
* Reserva de memoria para los vectores de la estructura de configuracion
*
* Si se llama desde fuera de este fichero, la memoria de la estructura
* tiene que reservarse con la siguiente linea:
* "configuration *config = malloc(sizeof(configuration));"
*
* Sin embargo se puede obtener a traves de las funciones
* - init_config
* - recv_config_file
*/
void malloc_config_resizes(configuration *user_config) {
size_t i;
if(user_config != NULL) {
user_config->groups = malloc(sizeof(group_config_t) * user_config->n_groups);
for(i=0; i<user_config->n_groups; i++) {
user_config->groups[i].iters = 0;
user_config->groups[i].procs = 1;
user_config->groups[i].sm = 0;
user_config->groups[i].ss = NULL;
user_config->groups[i].ss_len = 0;
user_config->groups[i].phy_dist = 0;
user_config->groups[i].rm = 0;
user_config->groups[i].rs = NULL;
user_config->groups[i].rs_len = 0;
user_config->groups[i].factor = 1;
}
def_struct_groups(user_config);
}
}
/*
* Inicializa la memoria para las fases de iteraciones.
* No se reserva memoria, pero si se pone a NULL
* para poder liberar correctamente cada fase.
*
* Se puede obtener a traves de las funciones
* - init_config
* - recv_config_file
*/
void malloc_config_stages(configuration *user_config) {
size_t i;
if(user_config != NULL) {
user_config->stages = malloc(sizeof(iter_stage_t) * user_config->n_stages);
for(i=0; i<user_config->n_stages; i++) {
user_config->stages[i].array = NULL;
user_config->stages[i].full_array = NULL;
user_config->stages[i].double_array = NULL;
user_config->stages[i].reqs = NULL;
user_config->stages[i].counts.counts = NULL;
user_config->stages[i].bytes = 0;
user_config->stages[i].my_bytes = 0;
user_config->stages[i].real_bytes = 0;
user_config->stages[i].operations = 0;
user_config->stages[i].pt = 0;
user_config->stages[i].id = -1;
user_config->stages[i].t_op = 0;
user_config->stages[i].t_stage = 0;
user_config->stages[i].t_capped = 0;
}
def_struct_iter_stage(user_config);
}
}
/*
* Libera toda la memoria de una estructura de configuracion
*/
void free_config(configuration *user_config) {
size_t i, found_ids;
int *freed_ids;
found_ids = 0;
if(user_config != NULL) {
freed_ids = (int *) malloc(user_config->n_stages * sizeof(int));
for(i=0; i < user_config->n_stages; i++) {
free_config_stage(&(user_config->stages[i]), freed_ids, &found_ids);
}
for(i=0; i < user_config->n_groups; i++) {
free(user_config->groups[i].ss);
free(user_config->groups[i].rs);
}
//Liberar tipos derivados
MPI_Type_free(&(user_config->config_type));
user_config->config_type = MPI_DATATYPE_NULL;
MPI_Type_free(&(user_config->group_type));
user_config->group_type = MPI_DATATYPE_NULL;
MPI_Type_free(&(user_config->group_strats_type));
user_config->group_strats_type = MPI_DATATYPE_NULL;
MPI_Type_free(&(user_config->iter_stage_type));
user_config->iter_stage_type = MPI_DATATYPE_NULL;
free(user_config->groups);
free(user_config->stages);
free(user_config);
free(freed_ids);
}
}
/*
* Libera toda la memoria de una stage
*/
void free_config_stage(iter_stage_t *stage, int *freed_ids, size_t *found_ids) {
size_t i;
int mpi_index, free_reqs;
free_reqs = 1;
if(stage->id > -1) {
for(i=0; i<*found_ids; i++) {
if(stage->id == freed_ids[i]) {
free_reqs = 0;
break;
}
}
if(free_reqs) {
freed_ids[*found_ids] = stage->id;
*found_ids=*found_ids + 1;
}
}
if(stage->array != NULL) {
free(stage->array);
stage->array = NULL;
}
if(stage->full_array != NULL) {
free(stage->full_array);
stage->full_array = NULL;
}
if(stage->double_array != NULL) {
free(stage->double_array);
stage->double_array = NULL;
}
if(stage->reqs != NULL && free_reqs) {
for(mpi_index=0; mpi_index<stage->req_count; mpi_index++) {
if(stage->reqs[mpi_index] != MPI_REQUEST_NULL) {
MPI_Request_free(&(stage->reqs[mpi_index]));
stage->reqs[mpi_index] = MPI_REQUEST_NULL;
}
}
free(stage->reqs);
stage->reqs = NULL;
}
if(stage->counts.counts != NULL) {
freeCounts(&(stage->counts));
}
}
/*
* Imprime por salida estandar toda la informacion que contiene
* la configuracion pasada como argumento
*/
void print_config(configuration *user_config) {
if(user_config != NULL) {
size_t i, j;
printf("Config loaded: R=%zu, S=%zu, granularity=%d, SDR=%zu, ADR=%zu, Rigid=%d, Capture_Method=%d\n",
user_config->n_resizes, user_config->n_stages, user_config->granularity, user_config->sdr, user_config->adr, user_config->rigid_times, user_config->capture_method);
for(i=0; i<user_config->n_stages; i++) {
printf("Stage %zu: PT=%d, T_stage=%lf, bytes=%d, T_capped=%d\n",
i, user_config->stages[i].pt, user_config->stages[i].t_stage, user_config->stages[i].real_bytes, user_config->stages[i].t_capped);
}
for(i=0; i<user_config->n_groups; i++) {
printf("Group %zu: Iters=%d, Procs=%d, Factors=%f, Dist=%d, RM=%d, SM=%d",
i, user_config->groups[i].iters, user_config->groups[i].procs, user_config->groups[i].factor,
user_config->groups[i].phy_dist, user_config->groups[i].rm, user_config->groups[i].sm);
printf(", RS=%d", user_config->groups[i].rs[0]);
for(j=1; j<user_config->groups[i].rs_len; j++) {
printf("/%d", user_config->groups[i].rs[j]);
}
printf(", SS=%d", user_config->groups[i].ss[0]);
for(j=1; j<user_config->groups[i].ss_len; j++) {
printf("/%d", user_config->groups[i].ss[j]);
}
printf("\n");
}
}
}
/*
* Imprime por salida estandar la informacion relacionada con un
* solo grupo de procesos en su configuracion.
*/
void print_config_group(configuration *user_config, size_t grp) {
size_t i;
if(user_config != NULL) {
int parents, sons;
parents = sons = 0;
if(grp > 0) {
parents = user_config->groups[grp-1].procs;
}
if(grp < user_config->n_groups - 1) {
sons = user_config->groups[grp+1].procs;
}
printf("Config: granularity=%d, SDR=%zu, ADR=%zu, Rigid=%d, Capture_Method=%d\n",
user_config->granularity, user_config->sdr, user_config->adr, user_config->rigid_times, user_config->capture_method);
for(i=0; i<user_config->n_stages; i++) {
printf("Stage %zu: PT=%d, T_stage=%lf, bytes=%d, T_capped=%d\n",
i, user_config->stages[i].pt, user_config->stages[i].t_stage, user_config->stages[i].real_bytes, user_config->stages[i].t_capped);
}
printf("Group %zu: Iters=%d, Procs=%d, Factors=%f, Dist=%d, RM=%d, SM=%d", grp, user_config->groups[grp].iters, user_config->groups[grp].procs, user_config->groups[grp].factor,
user_config->groups[grp].phy_dist, user_config->groups[grp].rm, user_config->groups[grp].sm);
printf(", RS=%d", user_config->groups[grp].rs[0]);
for(i=1; i<user_config->groups[grp].rs_len; i++) {
printf("/%d", user_config->groups[grp].rs[i]);
}
printf(", SS=%d", user_config->groups[grp].ss[0]);
for(i=1; i<user_config->groups[grp].ss_len; i++) {
printf("/%d", user_config->groups[grp].ss[i]);
}
printf(", parents=%d, children=%d\n", parents, sons);
}
}
//||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ||
//||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ||
//| FUNCIONES DE INTERCOMUNICACION DE ESTRUCTURA DE CONFIGURACION ||
//||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ||
//||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| |/
/*
* Envia una estructura de configuracion al grupo de procesos al que se
* enlaza este grupo a traves del intercomunicador pasado como argumento.
*
* Esta funcion tiene que ser llamada por todos los procesos del mismo grupo
* e indicar cual es el proceso raiz que se encargara de enviar la
* configuracion al otro grupo.
*/
void send_config_file(configuration *config_file, int root, MPI_Comm intercomm) {
MPI_Bcast(config_file, 1, config_file->config_type, root, intercomm);
MPI_Bcast(config_file->stages, config_file->n_stages, config_file->iter_stage_type, root, intercomm);
MPI_Bcast(config_file->groups, config_file->n_groups, config_file->group_type, root, intercomm);
MPI_Bcast(config_file->groups, 1, config_file->group_strats_type, root, intercomm);
}
/*
* Recibe una estructura de configuracion desde otro grupo de procesos
* y la devuelve. La memoria de la estructura se reserva en esta funcion.
*
* Esta funcion tiene que ser llamada por todos los procesos del mismo grupo
* e indicar cual es el proceso raiz del otro grupo que se encarga de enviar
* la configuracion a este grupo.
*
* La memoria de la configuracion devuelta tiene que ser liberada con
* la funcion "free_config".
*/
void recv_config_file(int root, MPI_Comm intercomm, configuration **config_file_out) {
size_t i;
configuration *config_file = malloc(sizeof(configuration));
def_struct_config_file(config_file);
MPI_Bcast(config_file, 1, config_file->config_type, root, intercomm);
//Inicializado de estructuras internas
config_file->n_resizes = config_file->n_groups-1;
malloc_config_stages(config_file); // Inicializar a NULL vectores stage
malloc_config_resizes(config_file); // Inicializar valores de grupos
MPI_Bcast(config_file->stages, config_file->n_stages, config_file->iter_stage_type, root, intercomm);
MPI_Bcast(config_file->groups, config_file->n_groups, config_file->group_type, root, intercomm);
for(i=0; i<config_file->n_groups; i++) {
config_file->groups[i].ss = (int *) malloc(config_file->groups[i].ss_len * sizeof(int));
config_file->groups[i].rs = (int *) malloc(config_file->groups[i].rs_len * sizeof(int));
}
def_struct_groups_strategies(config_file); // Inicializar vectores de grupos
MPI_Bcast(config_file->groups, 1, config_file->group_strats_type, root, intercomm);
*config_file_out = config_file;
}
/*
* Tipo derivado para enviar 7 elementos especificos
* de la estructura de configuracion con una sola comunicacion.
*/
void def_struct_config_file(configuration *config_file) {
int i, counts = 7;
int blocklengths[7] = {1, 1, 1, 1, 1, 1, 1};
MPI_Aint displs[counts], dir;
MPI_Datatype types[counts], type_size_t;
MPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(size_t), &type_size_t);
// Rellenar vector types
types[0] = types[1] = types[2] = types[3] = type_size_t;
types[4] = types[5] = types[6] = MPI_INT;
// Rellenar vector displs
MPI_Get_address(config_file, &dir);
MPI_Get_address(&(config_file->n_groups), &displs[0]);
MPI_Get_address(&(config_file->n_stages), &displs[1]);
MPI_Get_address(&(config_file->sdr), &displs[2]);
MPI_Get_address(&(config_file->adr), &displs[3]);
MPI_Get_address(&(config_file->granularity), &displs[4]);
MPI_Get_address(&(config_file->rigid_times), &displs[5]);
MPI_Get_address(&(config_file->capture_method), &displs[6]);
for(i=0;i<counts;i++) displs[i] -= dir;
MPI_Type_create_struct(counts, blocklengths, displs, types, &(config_file->config_type));
MPI_Type_commit(&(config_file->config_type));
}
/*
* Tipo derivado para enviar elementos especificos
* de la estructuras de la configuracion de cada grupo
* en una sola comunicacion.
*/
void def_struct_groups(configuration *config_file) {
int i, counts = 8;
int blocklengths[8] = {1, 1, 1, 1, 1, 1, 1, 1};
MPI_Aint displs[counts], dir;
MPI_Datatype types[counts], type_size_t, aux;
group_config_t *groups = config_file->groups;
MPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(size_t), &type_size_t);
// Rellenar vector types
types[0] = types[1] = types[2] = types[4] = types[5] = MPI_INT;
types[3] = types[6] = type_size_t;
types[7] = MPI_FLOAT;
// Rellenar vector displs
MPI_Get_address(groups, &dir);
MPI_Get_address(&(groups->iters), &displs[0]);
MPI_Get_address(&(groups->procs), &displs[1]);
MPI_Get_address(&(groups->sm), &displs[2]);
MPI_Get_address(&(groups->ss_len), &displs[3]);
MPI_Get_address(&(groups->phy_dist), &displs[4]);
MPI_Get_address(&(groups->rm), &displs[5]);
MPI_Get_address(&(groups->rs_len), &displs[6]);
MPI_Get_address(&(groups->factor), &displs[7]);
for(i=0;i<counts;i++) displs[i] -= dir;
if (config_file->n_groups == 1) {
MPI_Type_create_struct(counts, blocklengths, displs, types, &(config_file->group_type));
MPI_Type_commit(&(config_file->group_type));
} else { // Si hay mas de una fase(estructura), el "extent" se modifica.
MPI_Type_create_struct(counts, blocklengths, displs, types, &aux);
// Tipo derivado para enviar N elementos de la estructura
MPI_Type_create_resized(aux, 0, sizeof(group_config_t), &(config_file->group_type));
MPI_Type_commit(&(config_file->group_type));
MPI_Type_free(&aux);
}
}
/*
* Tipo derivado para enviar las estrategias
* de cada grupo con una sola comunicacion.
*/
void def_struct_groups_strategies(configuration *config_file) {
int i, counts = config_file->n_groups*2;
int *blocklengths;
MPI_Aint *displs, dir;
MPI_Datatype *types;
group_config_t *group;
blocklengths = (int *) malloc(counts * sizeof(int));
displs = (MPI_Aint *) malloc(counts * sizeof(MPI_Aint));
types = (MPI_Datatype *) malloc(counts * sizeof(MPI_Datatype));
MPI_Get_address(config_file->groups, &dir);
for(i = 0; i < counts; i+=2) {
group = &(config_file->groups[i/2]);
MPI_Get_address(group->ss, &displs[i]);
MPI_Get_address(group->rs, &displs[i+1]);
displs[i] -= dir;
displs[i+1] -= dir;
types[i] = types[i+1] = MPI_INT;
blocklengths[i] = group->ss_len;
blocklengths[i+1] = group->rs_len;
}
MPI_Type_create_struct(counts, blocklengths, displs, types, &config_file->group_strats_type);
MPI_Type_commit(&config_file->group_strats_type);
free(blocklengths);
free(displs);
free(types);
}
/*
* Tipo derivado para enviar elementos especificos
* de la estructuras de fases de iteracion en una sola comunicacion.
*/
void def_struct_iter_stage(configuration *config_file) {
int i, counts = 6;
int blocklengths[6] = {1, 1, 1, 1, 1, 1};
MPI_Aint displs[counts], dir;
MPI_Datatype aux, types[counts];
iter_stage_t *stages = config_file->stages;
// Rellenar vector types
types[0] = types[1] = types[2] = types[3] = MPI_INT;
types[4] = types[5] = MPI_DOUBLE;
// Rellenar vector displs
MPI_Get_address(stages, &dir);
MPI_Get_address(&(stages->pt), &displs[0]);
MPI_Get_address(&(stages->id), &displs[1]);
MPI_Get_address(&(stages->bytes), &displs[2]);
MPI_Get_address(&(stages->t_capped), &displs[3]);
MPI_Get_address(&(stages->t_stage), &displs[4]);
MPI_Get_address(&(stages->t_op), &displs[5]);
for(i=0;i<counts;i++) displs[i] -= dir;
if (config_file->n_stages == 1) {
MPI_Type_create_struct(counts, blocklengths, displs, types, &(config_file->iter_stage_type));
MPI_Type_commit(&(config_file->iter_stage_type));
} else { // Si hay mas de una fase(estructura), el "extent" se modifica.
MPI_Type_create_struct(counts, blocklengths, displs, types, &aux);
// Tipo derivado para enviar N elementos de la estructura
MPI_Type_create_resized(aux, 0, sizeof(iter_stage_t), &(config_file->iter_stage_type));
MPI_Type_commit(&(config_file->iter_stage_type));
MPI_Type_free(&aux);
}
}
#ifndef CONFIGURATION_H
#define CONFIGURATION_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "../Main/Main_datatypes.h"
typedef struct
{
int resizes;
int actual_resize;
int matrix_tam, comm_tam, sdr, adr;
int css, cst;
int aib;
float general_time;
double Top;
int *iters, *procs, *phy_dist;
float *factors;
} configuration;
configuration *read_ini_file(char *file_name);
void init_config(char *file_name, configuration **user_config);
void free_config(configuration *user_config);
void print_config(configuration *user_config, int grp);
void print_config_group(configuration *user_config, int grp);
void print_config(configuration *user_config);
void print_config_group(configuration *user_config, size_t grp);
// MPI Intercomm functions
void send_config_file(configuration *config_file, int root, MPI_Comm intercomm);
configuration *recv_config_file(int root, MPI_Comm intercomm);
void recv_config_file(int root, MPI_Comm intercomm, configuration **config_file_out);
#endif
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <mpi.h>
#include "computing_func.h"
#include "comunication_func.h"
#include "Main_datatypes.h"
#include "process_stage.h"
#include "../MaM/distribution_methods/block_distribution.h"
double init_emulation_comm_time(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm);
double init_emulation_icomm_time(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm);
double init_matrix_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute);
double init_pi_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute);
double init_comm_ptop_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute);
double init_comm_iptop_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute);
double init_comm_bcast_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute);
double init_comm_allgatherv_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute);
double init_comm_reduce_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute);
double init_comm_wait_pt(configuration *config_file, iter_stage_t *stage);
/*
* Calcula el tiempo por operacion o total de bytes a enviar
* de cada fase de iteración para despues realizar correctamente
* las iteraciones.
*
* Solo es calculado por el proceso ROOT que tras ello lo envia al
* resto de procesos.
*
* Si la bandera "compute" esta activada, se realizaran las operaciones
* para recalcular los tiempos desde 0. Si esta en falso solo se reservara
* la memoria necesaria y utilizara los valores obtenidos en anteriores
* llamadas. Todos los procesos tienen que indicar el mismo valor en
* la bandera.
*
* TODO Que el trabajo se divida entre los procesos.
* TODO No tiene en cuenta cambios entre maquinas heterogeneas.
*/
double init_stage(configuration *config_file, int stage_i, group_data group, MPI_Comm comm, int compute) {
double result = 0;
int qty = 5000;
iter_stage_t *stage = &(config_file->stages[stage_i]);
stage->operations = qty;
switch(stage->pt) {
//Computo
case COMP_MATRIX:
result = init_matrix_pt(group, config_file, stage, comm, compute);
break;
case COMP_PI:
result = init_pi_pt(group, config_file, stage, comm, compute);
break;
//Comunicación
case COMP_POINT:
result = init_comm_ptop_pt(group, config_file, stage, comm, compute);
break;
case COMP_IPOINT:
result = init_comm_iptop_pt(group, config_file, stage, comm, compute);
break;
case COMP_BCAST:
result = init_comm_bcast_pt(group, config_file, stage, comm, compute);
break;
case COMP_ALLGATHER:
result = init_comm_allgatherv_pt(group, config_file, stage, comm, compute);
break;
case COMP_REDUCE:
case COMP_ALLREDUCE:
result = init_comm_reduce_pt(group, config_file, stage, comm, compute);
break;
case COMP_WAIT:
result = init_comm_wait_pt(config_file, stage);
break;
}
return result;
}
/*
* Procesa una fase de la iteracion, concretando el tipo
* de operacion a realizar y llamando a la funcion que
* realizara la operacion.
*/
double process_stage(configuration config_file, iter_stage_t stage, group_data group, MPI_Comm comm) {
int i=0;
double result, t_start, t_total;
t_start = MPI_Wtime();
t_total = 0;
switch(stage.pt) {
//Computo
case COMP_PI:
for(i=0; i < stage.operations; i++) {
result += computePiSerial(config_file.granularity);
}
break;
case COMP_MATRIX:
for(i=0; i < stage.operations; i++) {
result += computeMatrix(stage.double_array, config_file.granularity);
}
break;
//Comunicaciones
case COMP_POINT:
if(stage.t_capped) {
while(t_total < stage.t_stage) {
point_to_point_inter(group.myId, group.numP, comm, stage.array, stage.full_array, stage.real_bytes);
t_total = MPI_Wtime() - t_start;
MPI_Bcast(&t_total, 1, MPI_DOUBLE, ROOT, comm);
}
} else {
for(i=0; i < stage.operations; i++) {
point_to_point_inter(group.myId, group.numP, comm, stage.array, stage.full_array, stage.real_bytes);
}
}
break;
case COMP_IPOINT:
for(i=0; i < stage.operations; i++) {
point_to_point_asynch_inter(group.myId, group.numP, comm, stage.array, stage.full_array, stage.real_bytes, &(stage.reqs[i*2])); //FIXME Magical number
}
break;
case COMP_BCAST:
if(stage.t_capped) {
while(t_total < stage.t_stage) {
MPI_Bcast(stage.array, stage.real_bytes, MPI_CHAR, ROOT, comm);
t_total = MPI_Wtime() - t_start;
MPI_Bcast(&t_total, 1, MPI_DOUBLE, ROOT, comm);
}
} else {
for(i=0; i < stage.operations; i++) {
MPI_Bcast(stage.array, stage.real_bytes, MPI_CHAR, ROOT, comm);
}
}
break;
case COMP_ALLGATHER:
if(stage.t_capped) {
while(t_total < stage.t_stage) {
MPI_Allgatherv(stage.array, stage.my_bytes, MPI_CHAR, stage.full_array, stage.counts.counts, stage.counts.displs, MPI_CHAR, comm);
t_total = MPI_Wtime() - t_start;
MPI_Bcast(&t_total, 1, MPI_DOUBLE, ROOT, comm);
}
} else {
for(i=0; i < stage.operations; i++) {
MPI_Allgatherv(stage.array, stage.my_bytes, MPI_CHAR, stage.full_array, stage.counts.counts, stage.counts.displs, MPI_CHAR, comm);
}
}
break;
case COMP_REDUCE:
if(stage.t_capped) {
while(t_total < stage.t_stage) {
MPI_Reduce(stage.array, stage.full_array, stage.real_bytes, MPI_CHAR, MPI_MAX, ROOT, comm);
t_total = MPI_Wtime() - t_start;
MPI_Bcast(&t_total, 1, MPI_DOUBLE, ROOT, comm);
}
} else {
for(i=0; i < stage.operations; i++) {
MPI_Reduce(stage.array, stage.full_array, stage.real_bytes, MPI_CHAR, MPI_MAX, ROOT, comm);
}
}
break;
case COMP_ALLREDUCE:
if(stage.t_capped) {
while(t_total < stage.t_stage) {
MPI_Allreduce(stage.array, stage.full_array, stage.real_bytes, MPI_CHAR, MPI_MAX, comm);
t_total = MPI_Wtime() - t_start;
MPI_Bcast(&t_total, 1, MPI_DOUBLE, ROOT, comm);
}
} else {
for(i=0; i < stage.operations; i++) {
MPI_Allreduce(stage.array, stage.full_array, stage.real_bytes, MPI_CHAR, MPI_MAX, comm);
}
}
break;
case COMP_WAIT:
if(stage.t_capped) { //FIXME Right now, COMP_WAIT with t_capped only works for P2P comms
int remaining;
i = 0;
// Wait until t_stage time has passed
while(t_total < stage.t_stage) {
MPI_Waitall(2, &(stage.reqs[i*2]), MPI_STATUSES_IGNORE); //FIXME Magical number
t_total = MPI_Wtime() - t_start;
i++;
MPI_Bcast(&t_total, 1, MPI_DOUBLE, ROOT, comm);
}
remaining = stage.operations - i;
// If there are operations remaning, terminate them
if (remaining) {
for(; i < stage.operations; i++) {
MPI_Cancel(&(stage.reqs[i*2])); //FIXME Magical number
MPI_Cancel(&(stage.reqs[i*2+1])); //FIXME Magical number
}
MPI_Waitall(remaining*2, &(stage.reqs[(stage.operations-remaining)*2]), MPI_STATUSES_IGNORE); //FIXME Magical number
}
} else {
MPI_Waitall(stage.req_count, stage.reqs, MPI_STATUSES_IGNORE);
}
break;
}
return result;
}
/*
* ========================================================================================
* ========================================================================================
* =================================INIT STAGE FUNCTIONS===================================
* ========================================================================================
* ========================================================================================
*/
double init_emulation_comm_time(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm) {
double start_time, end_time, time = 0;
double t_stage;
MPI_Barrier(comm);
start_time = MPI_Wtime();
process_stage(*config_file, *stage, group, comm);
MPI_Barrier(comm);
end_time = MPI_Wtime();
stage->t_op = (end_time - start_time) / stage->operations; //Tiempo de una operacion
t_stage = stage->t_stage * config_file->groups[group.grp].factor;
stage->operations = ceil(t_stage / stage->t_op);
MPI_Bcast(&(stage->operations), 1, MPI_INT, ROOT, comm);
return time;
}
double init_emulation_icomm_time(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm) {
double start_time, end_time, time = 0;
double t_stage;
iter_stage_t wait_stage;
wait_stage.pt = COMP_WAIT;
wait_stage.id = stage->id;
wait_stage.operations = stage->operations;
wait_stage.req_count = stage->req_count;
wait_stage.reqs = stage->reqs;
MPI_Barrier(comm);
start_time = MPI_Wtime();
process_stage(*config_file, *stage, group, comm);
process_stage(*config_file, wait_stage, group, comm);
MPI_Barrier(comm);
end_time = MPI_Wtime();
stage->t_op = (end_time - start_time) / stage->operations; //Tiempo de una operacion
t_stage = stage->t_stage * config_file->groups[group.grp].factor;
stage->operations = ceil(t_stage / stage->t_op);
MPI_Bcast(&(stage->operations), 1, MPI_INT, ROOT, comm);
return time;
}
double init_matrix_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute) {
double result, t_stage, start_time;
result = 0;
t_stage = stage->t_stage * config_file->groups[group.grp].factor;
initMatrix(&(stage->double_array), config_file->granularity);
if(compute) {
if(group.myId == ROOT) {
start_time = MPI_Wtime();
result+= process_stage(*config_file, *stage, group, comm);
stage->t_op = (MPI_Wtime() - start_time) / stage->operations; //Tiempo de una operacion
stage->operations = ceil(t_stage / stage->t_op);
}
MPI_Bcast(&(stage->operations), 1, MPI_INT, ROOT, comm);
} else {
stage->operations = ceil(t_stage / stage->t_op);
}
return result;
}
double init_pi_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute) {
double result, t_stage, start_time;
result = 0;
t_stage = stage->t_stage * config_file->groups[group.grp].factor;
if(compute) {
if(group.myId == ROOT) {
start_time = MPI_Wtime();
result+= process_stage(*config_file, *stage, group, comm);
stage->t_op = (MPI_Wtime() - start_time) / stage->operations; //Tiempo de una operacion
stage->operations = ceil(t_stage / stage->t_op);
}
MPI_Bcast(&(stage->operations), 1, MPI_INT, ROOT, comm);
} else {
stage->operations = ceil(t_stage / stage->t_op);
}
return result;
}
double init_comm_ptop_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute) {
double time = 0;
if(stage->array != NULL)
free(stage->array);
if(stage->full_array != NULL)
free(stage->full_array);
stage->real_bytes = (stage->bytes && !stage->t_capped) ? stage->bytes : config_file->granularity;
stage->array = calloc(stage->real_bytes, sizeof(char));
stage->full_array = calloc(stage->real_bytes, sizeof(char));
if(compute && !stage->bytes && !stage->t_capped) {
time = init_emulation_comm_time(group, config_file, stage, comm);
} else {
stage->operations = 1;
}
return time;
}
double init_comm_iptop_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute) {
int i;
double time = 0;
if(stage->array != NULL)
free(stage->array);
if(stage->full_array != NULL)
free(stage->full_array);
if(stage->reqs != NULL) //FIXME May be erroneous if request are active...
free(stage->reqs);
stage->real_bytes = (stage->bytes && !stage->t_capped) ? stage->bytes : config_file->granularity;
stage->array = calloc(stage->real_bytes, sizeof(char));
stage->full_array = calloc(stage->real_bytes, sizeof(char));
if(compute && !stage->bytes) { // t_capped is not considered in this case
stage->req_count = 2 * stage->operations; //FIXME Magical number
stage->reqs = (MPI_Request *) malloc(stage->req_count * sizeof(MPI_Request));
time = init_emulation_icomm_time(group, config_file, stage, comm);
free(stage->reqs);
} else {
stage->operations = 1;
}
stage->req_count = 2 * stage->operations; //FIXME Magical number
stage->reqs = (MPI_Request *) malloc(stage->req_count * sizeof(MPI_Request));
for(i=0; i < stage->req_count; i++) {
stage->reqs[i] = MPI_REQUEST_NULL;
}
return time;
}
// TODO Compute should be always 1 if the number of processes is different
double init_comm_bcast_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute) {
double time = 0;
if(stage->array != NULL)
free(stage->array);
stage->real_bytes = (stage->bytes && !stage->t_capped) ? stage->bytes : config_file->granularity;
stage->array = calloc(stage->real_bytes, sizeof(char)); //FIXME Valgrind indica unitialised
if(compute && !stage->bytes && !stage->t_capped) {
time = init_emulation_comm_time(group, config_file, stage, comm);
} else {
stage->operations = 1;
}
return time;
}
// TODO Compute should be always 1 if the number of processes is different
double init_comm_allgatherv_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute) {
double time=0;
struct Dist_data dist_data;
if(stage->array != NULL)
free(stage->array);
if(stage->counts.counts != NULL)
freeCounts(&(stage->counts));
if(stage->full_array != NULL)
free(stage->full_array);
stage->real_bytes = (stage->bytes && !stage->t_capped) ? stage->bytes : config_file->granularity;
prepare_comm_allgatherv(group.numP, stage->real_bytes, &(stage->counts));
get_block_dist(stage->real_bytes, group.myId, group.numP, &dist_data);
stage->my_bytes = dist_data.tamBl;
stage->array = calloc(stage->my_bytes, sizeof(char));
stage->full_array = calloc(stage->real_bytes, sizeof(char));
if(compute && !stage->bytes && !stage->t_capped) {
time = init_emulation_comm_time(group, config_file, stage, comm);
} else {
stage->operations = 1;
}
return time;
}
// TODO Compute should be always 1 if the number of processes is different
double init_comm_reduce_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute) {
double time = 0;
if(stage->array != NULL)
free(stage->array);
if(stage->full_array != NULL)
free(stage->full_array);
stage->real_bytes = (stage->bytes && !stage->t_capped) ? stage->bytes : config_file->granularity;
stage->array = calloc(stage->real_bytes, sizeof(char));
//Full array para el reduce necesita el mismo tamanyo
stage->full_array = calloc(stage->real_bytes, sizeof(char));
if(compute && !stage->bytes && !stage->t_capped) {
time = init_emulation_comm_time(group, config_file, stage, comm);
} else {
stage->operations = 1;
}
return time;
}
double init_comm_wait_pt(configuration *config_file, iter_stage_t *stage) {
size_t i;
double time = 0;
iter_stage_t aux_stage;
if(stage->id < 0) {
printf("Error when initializing wait stage. Id is negative\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
for(i=0; i<config_file->n_stages; i++) {
aux_stage = config_file->stages[i];
if(aux_stage.id == stage->id) { break; }
}
if(i == config_file->n_stages) {
printf("Error when initializing wait stage. Not found a corresponding id\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
stage->req_count = aux_stage.req_count;
stage->reqs = aux_stage.reqs;
return time;
}
#ifndef PROCESS_STAGE_H
#define PROCESS_STAGE_H
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include "Main_datatypes.h"
// 0 1 2 3 4 5 6 7 8
enum compute_methods{COMP_PI, COMP_MATRIX, COMP_POINT, COMP_IPOINT, COMP_WAIT, COMP_BCAST, COMP_ALLGATHER, COMP_REDUCE, COMP_ALLREDUCE};
double init_stage(configuration *config_file, int stage_i, group_data group, MPI_Comm comm, int compute);
//double stage_init_all();
double process_stage(configuration config_file, iter_stage_t stage, group_data group, MPI_Comm comm);
#endif
CC = gcc
MCC = mpicc
#C_FLAGS_ALL = -Wconversion -Wpedantic
C_FLAGS = -Wall -Wextra -Wshadow -Wfatal-errors
LD_FLAGS = -lm -pthread -lmam
MAM_HOME = ../MaM
MAM_FLAGS = -I$(MAM_HOME) -L$(MAM_HOME)/build
ifeq ($(shell test $(MAM_DEBUG) -gt 0; echo $$?),0)
C_FLAGS += -g
endif
# Final binary
BIN = a.out
# Put all auto generated stuff to this build dir.
BUILD_DIR = ./build
# List of all directories where source files are located
SRCDIRS = IOcodes Main
# List of all .c source files.
C_FILES = $(foreach dire, $(SRCDIRS), $(wildcard $(dire)/*.c))
# All .o files go to build dir.
OBJ = $(C_FILES:%.c=$(BUILD_DIR)/%.o)
# Gcc will create these .d files containing dependencies.
DEP = $(OBJ:%.o=%.d)
# BASIC RULES
.PHONY : clean clear install
all: install
clean:
-rm $(BUILD_DIR)/$(BIN) $(BUILD_DIR)/$(CONFIG) $(OBJ) $(DEP)
clear:
-rm -rf $(BUILD_DIR)
install: $(BIN) $(CONFIG)
echo "Done"
# SPECIFIC RULES
# Default target named after the binary.
$(BIN) : $(BUILD_DIR)/$(BIN)
# Actual target of the binary - depends on all .o files.
$(BUILD_DIR)/$(BIN) : $(OBJ)
$(MCC) $(C_FLAGS) $(MAM_FLAGS) $^ -o $@ $(LD_FLAGS)
# Include all .d files
# .d files are used for knowing the dependencies of each source file
-include $(DEP)
# Build target for every single object file.
# The potential dependency on header files is covered
# by calling `-include $(DEP)`.
# The -MMD flags additionaly creates a .d file with
# the same name as the .o file.
$(BUILD_DIR)/%.o : %.c
@mkdir -p $(@D)
$(MCC) $(C_FLAGS) $(MAM_FLAGS) $(DEF) -MMD -c $< -o $@
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <slurm/slurm.h>
//--------------PRIVATE DECLARATIONS---------------//
void node_dist(slurm_job_info_t job_record, int type, int total_procs, int **qty, int *used_nodes);
int create_hostfile(char *jobId, char **file_name);
int write_hostfile_node(int ptr, int qty, char *node_name);
void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_nodes);
int main(int argc, char *argv[]) {
int jobId, ptr, numP, dist;
char *tmp;
job_info_msg_t *j_info;
slurm_job_info_t last_record;
int used_nodes=0;
int *procs_array;
char *hostfile_name;
if(argc < 3) {
printf("Uso ./a.out numP physical_dist");
exit(-1);
}
numP = atoi(argv[1]);
dist = atoi(argv[2]);
// Get Slurm job info
tmp = getenv("SLURM_JOB_ID");
jobId = atoi(tmp);
slurm_load_job(&j_info, jobId, 1);
last_record = j_info->job_array[j_info->record_count - 1];
// GET NEW DISTRIBUTION
node_dist(last_record, dist, numP, &procs_array, &used_nodes);
// CREATE/UPDATE HOSTFILE
ptr = create_hostfile(tmp, &hostfile_name);
free(hostfile_name);
// SET NEW DISTRIBUTION
fill_hostfile(last_record, ptr, procs_array, used_nodes);
close(ptr);
// Free JOB INFO
slurm_free_job_info_msg(j_info);
return 0;
}
/*
* Obtiene la distribucion fisica del grupo de procesos a crear, devolviendo
* cuantos nodos se van a utilizar y la cantidad de procesos que alojara cada
* nodo.
*
* Se permiten dos tipos de distribuciones fisicas segun el valor de "type":
*
* (1): Orientada a equilibrar el numero de procesos entre
* todos los nodos disponibles.
* (2): Orientada a completar la capacidad de un nodo antes de
* ocupar otro nodo.
*/
void node_dist(slurm_job_info_t job_record, int type, int total_procs, int **qty, int *used_nodes) {
int i, asigCores;
int tamBl, remainder;
int *procs;
procs = calloc(job_record.num_nodes, sizeof(int)); // Numero de procesos por nodo
/* GET NEW DISTRIBUTION */
if(type == 1) { // DIST NODES
*used_nodes = job_record.num_nodes;
tamBl = total_procs / job_record.num_nodes;
remainder = total_procs % job_record.num_nodes;
for(i=0; i<remainder; i++) {
procs[i] = tamBl + 1;
}
for(i=remainder; i<job_record.num_nodes; i++) {
procs[i] = tamBl;
}
} else if (type == 2) { // DIST CPUs
tamBl = job_record.num_cpus / job_record.num_nodes;
asigCores = 0;
i = 0;
*used_nodes = 0;
while(asigCores+tamBl <= total_procs) {
asigCores += tamBl;
procs[i] += tamBl;
i = (i+1) % job_record.num_nodes;
(*used_nodes)++;
}
if(asigCores < total_procs) {
procs[i] += total_procs - asigCores;
(*used_nodes)++;
}
if(*used_nodes > job_record.num_nodes) *used_nodes = job_record.num_nodes; //FIXME Si ocurre esto no es un error?
}
*used_nodes=job_record.num_nodes;
// Antes se ponia aqui todos los nodos sin cpus a 1
*qty = procs;
}
/*
* Crea un fichero que se utilizara como hostfile
* para un nuevo grupo de procesos.
*
* El nombre es devuelto en el argumento "file_name",
* que tiene que ser un puntero vacio.
*
* Ademas se devuelve un descriptor de fichero para
* modificar el fichero.
*/
int create_hostfile(char *jobId, char **file_name) {
int ptr, err, len;
len = strlen(jobId) + 11;
*file_name = NULL;
*file_name = malloc( len * sizeof(char));
if(*file_name == NULL) return -1; // No ha sido posible alojar la memoria
err = snprintf(*file_name, len, "hostfile.o%s", jobId);
if(err < 0) return -2; // No ha sido posible obtener el nombre de fichero
ptr = open(*file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
if(ptr < 0) return -3; // No ha sido posible crear el fichero
return ptr; // Devolver puntero a fichero
}
/*
* Rellena un fichero hostfile indicado por ptr con los nombres
* de los nodos a utilizar indicados por "job_record" y la cantidad
* de procesos que alojara cada nodo indicado por "qty".
*/
void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_nodes) {
int i=0;
char *host;
hostlist_t hostlist;
hostlist = slurm_hostlist_create(job_record.nodes);
while ( (host = slurm_hostlist_shift(hostlist)) && i < used_nodes) {
if(qty[i] != 0)
write_hostfile_node(ptr, qty[i], host);
i++;
free(host);
}
slurm_hostlist_destroy(hostlist);
}
/*
* Escribe en el fichero hostfile indicado por ptr una nueva linea.
*
* Esta linea indica el nombre de un nodo y la cantidad de procesos a
* alojar en ese nodo.
*/
int write_hostfile_node(int ptr, int qty, char *node_name) {
int err, len_node, len_int, len;
char *line;
len_node = strlen(node_name);
len_int = snprintf(NULL, 0, "%d", qty);
len = len_node + len_int + 3;
line = malloc(len * sizeof(char));
if(line == NULL) return -1; // No ha sido posible alojar la memoria
err = snprintf(line, len, "%s:%d\n", node_name, qty);
if(err < 0) return -2; // No ha sido posible escribir en el fichero
write(ptr, line, len-1);
free(line);
return 0;
}
module load mpich-3.4.1-noucx
#mpicc -Wall Main/Main.c Main/computing_func.c IOcodes/results.c IOcodes/read_ini.c IOcodes/ini.c malleability/ProcessDist.c malleability/CommDist.c -pthread -lslurm -lm
mpicc -Wall Main/Main.c Main/computing_func.c IOcodes/results.c IOcodes/read_ini.c IOcodes/ini.c malleability/malleabilityManager.c malleability/malleabilityTypes.c malleability/malleabilityZombies.c malleability/ProcessDist.c malleability/CommDist.c -pthread -lslurm -lm
if [ $# -gt 0 ]
then
if [ $1 = "-e" ]
then
echo "Creado ejecutable para ejecuciones"
cp a.out bench.out
fi
fi
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <string.h>
#include "CommDist.h"
struct Dist_data {
int ini; //Primer elemento a enviar
int fin; //Ultimo elemento a enviar
int tamBl; // Total de elementos
int qty; // Total number of rows of the full disperse matrix
int myId;
int numP;
MPI_Comm intercomm;
};
struct Counts {
int *counts;
int *displs;
int *zero_arr;
};
void send_sync_arrays(struct Dist_data dist_data, char *array, int root, int numP_child, int idI, int idE, struct Counts counts);
void recv_sync_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts);
void send_async_arrays(struct Dist_data dist_data, char *array, int root, int numP_child, int idI, int idE, struct Counts counts, MPI_Request *comm_req);
void recv_async_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts, MPI_Request *comm_req);
void send_async_point_arrays(struct Dist_data dist_data, char *array, int rootBcast, int numP_child, int idI, int idE, struct Counts counts, MPI_Request *comm_req);
void recv_async_point_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts, MPI_Request *comm_req);
// DIST FUNCTIONS
void get_dist(int qty, int id, int numP, struct Dist_data *dist_data);
void set_counts(int id, int numP, struct Dist_data data_dist, int *sendcounts);
void getIds_intercomm(struct Dist_data dist_data, int numP_other, int **idS);
void mallocCounts(struct Counts *counts, int numP);
void freeCounts(struct Counts *counts);
void print_counts(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, const char* name);
/*
* Reserva memoria para un vector de hasta "qty" elementos.
* Los "qty" elementos se disitribuyen entre los "numP" procesos
* que llaman a esta funcion.
*/
void malloc_comm_array(char **array, int qty, int myId, int numP) {
struct Dist_data dist_data;
get_dist(qty, myId, numP, &dist_data);
if( (*array = malloc(dist_data.tamBl * sizeof(char))) == NULL) {
printf("Memory Error (Malloc Arrays(%d))\n", dist_data.tamBl);
exit(1);
}
/*
int i;
for(i=0; i<dist_data.tamBl; i++) {
(*array)[i] = '!' + i + dist_data.ini;
}
printf("P%d Tam %d String: %s\n", myId, dist_data.tamBl, *array);
*/
}
//================================================================================
//================================================================================
//========================SYNCHRONOUS FUNCTIONS===================================
//================================================================================
//================================================================================
/*
* Realiza un envio síncrono del vector array desde este grupo de procesos al grupo
* enlazado por el intercomunicador intercomm.
*
* El vector array no se modifica en esta funcion.
*/
int send_sync(char *array, int qty, int myId, int numP, int root, MPI_Comm intercomm, int numP_child) {
int rootBcast = MPI_PROC_NULL;
int *idS = NULL;
struct Counts counts;
struct Dist_data dist_data;
if(myId == root) rootBcast = MPI_ROOT;
get_dist(qty, myId, numP, &dist_data); // Distribucion de este proceso en su grupo
dist_data.intercomm = intercomm;
// Create arrays which contains info about how many elements will be send to each created process
mallocCounts(&counts, numP_child);
getIds_intercomm(dist_data, numP_child, &idS); // Obtener rango de Id hijos a los que este proceso manda datos
send_sync_arrays(dist_data, array, rootBcast, numP_child, idS[0], idS[1], counts);
freeCounts(&counts);
free(idS);
return 1;
}
/*
* Realiza una recepcion síncrona del vector array a este grupo de procesos desde el grupo
* enlazado por el intercomunicador intercomm.
*
* El vector array se reserva dentro de la funcion y se devuelve en el mismo argumento.
* Tiene que ser liberado posteriormente por el usuario.
*/
void recv_sync(char **array, int qty, int myId, int numP, int root, MPI_Comm intercomm, int numP_parents) {
int *idS = NULL;
struct Counts counts;
struct Dist_data dist_data;
// Obtener distribución para este hijo
get_dist(qty, myId, numP, &dist_data);
*array = malloc(dist_data.tamBl * sizeof(char));
//(*array)[dist_data.tamBl] = '\0';
dist_data.intercomm = intercomm;
/* PREPARAR DATOS DE RECEPCION SOBRE VECTOR*/
mallocCounts(&counts, numP_parents);
getIds_intercomm(dist_data, numP_parents, &idS); // Obtener el rango de Ids de padres del que este proceso recibira datos
recv_sync_arrays(dist_data, *array, root, numP_parents, idS[0], idS[1], counts);
//printf("S%d Tam %d String: %s END\n", myId, dist_data.tamBl, *array);
freeCounts(&counts);
free(idS);
}
/*
* Envia a los hijos un vector que es redistribuido a los procesos
* hijos. Antes de realizar la comunicacion, cada proceso padre calcula sobre que procesos
* del otro grupo se transmiten elementos.
*/
void send_sync_arrays(struct Dist_data dist_data, char *array, int rootBcast, int numP_child, int idI, int idE, struct Counts counts) {
int i;
// PREPARAR ENVIO DEL VECTOR
if(idI == 0) {
set_counts(0, numP_child, dist_data, counts.counts);
idI++;
}
for(i=idI; i<idE; i++) {
set_counts(i, numP_child, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
}
//print_counts(dist_data, counts.counts, counts.displs, numP_child, "Padres");
/* COMUNICACION DE DATOS */
MPI_Alltoallv(array, counts.counts, counts.displs, MPI_CHAR, NULL, counts.zero_arr, counts.zero_arr, MPI_CHAR, dist_data.intercomm);
}
/*
* Recibe de los padres un vector que es redistribuido a los procesos
* de este grupo. Antes de realizar la comunicacion cada hijo calcula sobre que procesos
* del otro grupo se transmiten elementos.
*/
void recv_sync_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts) {
int i;
char aux;
// Ajustar los valores de recepcion
if(idI == 0) {
set_counts(0, numP_parents, dist_data, counts.counts);
idI++;
}
for(i=idI; i<idE; i++) {
set_counts(i, numP_parents, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
}
//print_counts(dist_data, counts.counts, counts.displs, numP_parents, "Hijos");
/* COMUNICACION DE DATOS */
MPI_Alltoallv(&aux, counts.zero_arr, counts.zero_arr, MPI_CHAR, array, counts.counts, counts.displs, MPI_CHAR, dist_data.intercomm);
}
//================================================================================
//================================================================================
//========================ASYNCHRONOUS FUNCTIONS==================================
//================================================================================
//================================================================================
/*
* Realiza un envio asincrono del vector array desde este grupo de procesos al grupo
* enlazado por el intercomunicador intercomm.
*
* El objeto MPI_Request se devuelve con el manejador para comprobar si la comunicacion
* ha terminado.
*
* El vector array no se modifica en esta funcion.
*/
int send_async(char *array, int qty, int myId, int numP, int root, MPI_Comm intercomm, int numP_child, MPI_Request **comm_req, int parents_wait) {
int i, rootBcast = MPI_PROC_NULL;
int *idS = NULL;
struct Counts counts;
struct Dist_data dist_data;
if(myId == root) rootBcast = MPI_ROOT;
get_dist(qty, myId, numP, &dist_data); // Distribucion de este proceso en su grupo
dist_data.intercomm = intercomm;
// Create arrays which contains info about how many elements will be send to each created process
mallocCounts(&counts, numP_child);
getIds_intercomm(dist_data, numP_child, &idS); // Obtener rango de Id hijos a los que este proceso manda datos
// MAL_USE_THREAD sigue el camino sincrono
if(parents_wait == MAL_USE_NORMAL) {
//*comm_req = (MPI_Request *) malloc(sizeof(MPI_Request));
*comm_req[0] = MPI_REQUEST_NULL;
send_async_arrays(dist_data, array, rootBcast, numP_child, idS[0], idS[1], counts, &(*comm_req[0]));
} else if (parents_wait == MAL_USE_IBARRIER){
//*comm_req = (MPI_Request *) malloc(2 * sizeof(MPI_Request));
*comm_req[0] = MPI_REQUEST_NULL;
*comm_req[1] = MPI_REQUEST_NULL;
send_async_arrays(dist_data, array, rootBcast, numP_child, idS[0], idS[1], counts, &((*comm_req)[1]));
MPI_Ibarrier(intercomm, &((*comm_req)[0]) );
} else if (parents_wait == MAL_USE_POINT){
//*comm_req = (MPI_Request *) malloc(numP_child * sizeof(MPI_Request));
for(i=0; i<numP_child; i++){
(*comm_req)[i] = MPI_REQUEST_NULL;
}
send_async_point_arrays(dist_data, array, rootBcast, numP_child, idS[0], idS[1], counts, *comm_req);
} else if (parents_wait == MAL_USE_THREAD) { //TODO
}
freeCounts(&counts);
free(idS);
return 1;
}
/*
* Realiza una recepcion asincrona del vector array a este grupo de procesos desde el grupo
* enlazado por el intercomunicador intercomm.
*
* El vector array se reserva dentro de la funcion y se devuelve en el mismo argumento.
* Tiene que ser liberado posteriormente por el usuario.
*
* El argumento "parents_wait" sirve para indicar si se usará la versión en la los padres
* espera a que terminen de enviar, o en la que esperan a que los hijos acaben de recibir.
*/
void recv_async(char **array, int qty, int myId, int numP, int root, MPI_Comm intercomm, int numP_parents, int parents_wait) {
int *idS = NULL;
int wait_err, i;
struct Counts counts;
struct Dist_data dist_data;
MPI_Request *comm_req, aux;
// Obtener distribución para este hijo
get_dist(qty, myId, numP, &dist_data);
*array = malloc(dist_data.tamBl * sizeof(char));
dist_data.intercomm = intercomm;
/* PREPARAR DATOS DE RECEPCION SOBRE VECTOR*/
mallocCounts(&counts, numP_parents);
getIds_intercomm(dist_data, numP_parents, &idS); // Obtener el rango de Ids de padres del que este proceso recibira datos
// MAL_USE_THREAD sigue el camino sincrono
if(parents_wait == MAL_USE_POINT) {
comm_req = (MPI_Request *) malloc(numP_parents * sizeof(MPI_Request));
for(i=0; i<numP_parents; i++){
comm_req[i] = MPI_REQUEST_NULL;
}
recv_async_point_arrays(dist_data, *array, root, numP_parents, idS[0], idS[1], counts, comm_req);
wait_err = MPI_Waitall(numP_parents, comm_req, MPI_STATUSES_IGNORE);
} else if (parents_wait == MAL_USE_NORMAL || parents_wait == MAL_USE_IBARRIER) {
comm_req = (MPI_Request *) malloc(sizeof(MPI_Request));
*comm_req = MPI_REQUEST_NULL;
recv_async_arrays(dist_data, *array, root, numP_parents, idS[0], idS[1], counts, comm_req);
wait_err = MPI_Wait(comm_req, MPI_STATUS_IGNORE);
} else if (parents_wait == MAL_USE_THREAD) { //TODO
}
if(wait_err != MPI_SUCCESS) {
MPI_Abort(MPI_COMM_WORLD, wait_err);
}
if(parents_wait == MAL_USE_IBARRIER) { //MAL USE IBARRIER END
MPI_Ibarrier(intercomm, &aux);
MPI_Wait(&aux, MPI_STATUS_IGNORE); //Es necesario comprobar que la comunicación ha terminado para desconectar los grupos de procesos
}
//printf("S%d Tam %d String: %s END\n", myId, dist_data.tamBl, *array);
freeCounts(&counts);
free(idS);
free(comm_req);
}
/*
* Envia a los hijos un vector que es redistribuido a los procesos
* hijos. Antes de realizar la comunicacion, cada proceso padre calcula sobre que procesos
* del otro grupo se transmiten elementos.
*
* El envio se realiza a partir de una comunicación colectiva.
*/
void send_async_arrays(struct Dist_data dist_data, char *array, int rootBcast, int numP_child, int idI, int idE, struct Counts counts, MPI_Request *comm_req) {
int i;
// PREPARAR ENVIO DEL VECTOR
if(idI == 0) {
set_counts(0, numP_child, dist_data, counts.counts);
idI++;
}
for(i=idI; i<idE; i++) {
set_counts(i, numP_child, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
}
//print_counts(dist_data, counts.counts, counts.displs, numP_child, "Padres");
/* COMUNICACION DE DATOS */
MPI_Ialltoallv(array, counts.counts, counts.displs, MPI_CHAR, NULL, counts.zero_arr, counts.zero_arr, MPI_CHAR, dist_data.intercomm, comm_req);
}
/*
* Envia a los hijos un vector que es redistribuido a los procesos
* hijos. Antes de realizar la comunicacion, cada proceso padre calcula sobre que procesos
* del otro grupo se transmiten elementos.
*
* El envio se realiza a partir de varias comunicaciones punto a punto.
*/
void send_async_point_arrays(struct Dist_data dist_data, char *array, int rootBcast, int numP_child, int idI, int idE, struct Counts counts, MPI_Request *comm_req) {
int i;
// PREPARAR ENVIO DEL VECTOR
if(idI == 0) {
set_counts(0, numP_child, dist_data, counts.counts);
idI++;
MPI_Isend(array, counts.counts[0], MPI_CHAR, 0, 99, dist_data.intercomm, &(comm_req[0]));
}
for(i=idI; i<idE; i++) {
set_counts(i, numP_child, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
MPI_Isend(array+counts.displs[i], counts.counts[i], MPI_CHAR, i, 99, dist_data.intercomm, &(comm_req[i]));
}
//print_counts(dist_data, counts.counts, counts.displs, numP_child, "Padres");
}
/*
* Recibe de los padres un vector que es redistribuido a los procesos
* de este grupo. Antes de realizar la comunicacion cada hijo calcula sobre que procesos
* del otro grupo se transmiten elementos.
*
* La recepcion se realiza a partir de una comunicacion colectiva.
*/
void recv_async_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts, MPI_Request *comm_req) {
int i;
char *aux = malloc(1);
// Ajustar los valores de recepcion
if(idI == 0) {
set_counts(0, numP_parents, dist_data, counts.counts);
idI++;
}
for(i=idI; i<idE; i++) {
set_counts(i, numP_parents, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
}
//print_counts(dist_data, counts.counts, counts.displs, numP_parents, "Hijos");
/* COMUNICACION DE DATOS */
MPI_Ialltoallv(aux, counts.zero_arr, counts.zero_arr, MPI_CHAR, array, counts.counts, counts.displs, MPI_CHAR, dist_data.intercomm, comm_req);
free(aux);
}
/*
* Recibe de los padres un vector que es redistribuido a los procesos
* de este grupo. Antes de realizar la comunicacion cada hijo calcula sobre que procesos
* del otro grupo se transmiten elementos.
*
* La recepcion se realiza a partir de varias comunicaciones punto a punto.
*/
void recv_async_point_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts, MPI_Request *comm_req) {
int i;
// Ajustar los valores de recepcion
if(idI == 0) {
set_counts(0, numP_parents, dist_data, counts.counts);
idI++;
MPI_Irecv(array, counts.counts[0], MPI_CHAR, 0, 99, dist_data.intercomm, &(comm_req[0])); //FIXME BUffer recv
}
for(i=idI; i<idE; i++) {
set_counts(i, numP_parents, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
MPI_Irecv(array+counts.displs[i], counts.counts[i], MPI_CHAR, i, 99, dist_data.intercomm, &(comm_req[i])); //FIXME BUffer recv
}
//print_counts(dist_data, counts.counts, counts.displs, numP_parents, "Hijos");
}
/*
* ========================================================================================
* ========================================================================================
* ================================DISTRIBUTION FUNCTIONS==================================
* ========================================================================================
* ========================================================================================
*/
/*
* Obatains for "Id" and "numP", how many
* rows and elements per row will have process "Id"
* and fills the results in a Dist_data struct
*/
void get_dist(int qty, int id, int numP, struct Dist_data *dist_data) {
int rem;
dist_data->myId = id;
dist_data->numP = numP;
dist_data->qty = qty;
dist_data->tamBl = qty / numP;
rem = qty % numP;
if(id < rem) { // First subgroup
dist_data->ini = id * dist_data->tamBl + id;
dist_data->fin = (id+1) * dist_data->tamBl + (id+1);
} else { // Second subgroup
dist_data->ini = id * dist_data->tamBl + rem;
dist_data->fin = (id+1) * dist_data->tamBl + rem;
}
if(dist_data->fin > qty) {
dist_data->fin = qty;
}
if(dist_data->ini > dist_data->fin) {
dist_data->ini = dist_data->fin;
}
dist_data->tamBl = dist_data->fin - dist_data->ini;
}
/*
* Obtiene para el Id de un proceso dado, cuantos elementos
* enviara o recibira desde el proceso indicado en Dist_data.
*/
void set_counts(int id, int numP, struct Dist_data data_dist, int *sendcounts) {
struct Dist_data other;
int biggest_ini, smallest_end;
get_dist(data_dist.qty, id, numP, &other);
// Si el rango de valores no coincide, se pasa al siguiente proceso
if(data_dist.ini >= other.fin || data_dist.fin <= other.ini) {
return;
}
// Obtiene el proceso con mayor ini entre los dos procesos
if(data_dist.ini > other.ini) {
biggest_ini = data_dist.ini;
} else {
biggest_ini = other.ini;
}
// Obtiene el proceso con menor fin entre los dos procesos
if(data_dist.fin < other.fin) {
smallest_end = data_dist.fin;
} else {
smallest_end = other.fin;
}
sendcounts[id] = smallest_end - biggest_ini; // Numero de elementos a enviar/recibir del proceso Id
}
/*
* Obtiene para un proceso de un grupo a que rango procesos de
* otro grupo tiene que enviar o recibir datos.
*
* Devuelve el primer identificador y el último (Excluido) con el que
* comunicarse.
*/
void getIds_intercomm(struct Dist_data dist_data, int numP_other, int **idS) {
int idI, idE;
int tamOther = dist_data.qty / numP_other;
int remOther = dist_data.qty % numP_other;
// Indica el punto de corte del grupo de procesos externo que
// divide entre los procesos que tienen
// un tamaño tamOther + 1 y un tamaño tamOther
int middle = (tamOther + 1) * remOther;
// Calcular idI teniendo en cuenta si se comunica con un
// proceso con tamano tamOther o tamOther+1
if(middle > dist_data.ini) { // First subgroup (tamOther+1)
idI = dist_data.ini / (tamOther + 1);
} else { // Second subgroup (tamOther)
idI = ((dist_data.ini - middle) / tamOther) + remOther;
}
// Calcular idR teniendo en cuenta si se comunica con un
// proceso con tamano tamOther o tamOther+1
if(middle >= dist_data.fin) { // First subgroup (tamOther +1)
idE = dist_data.fin / (tamOther + 1);
idE = (dist_data.fin % (tamOther + 1) > 0 && idE+1 <= numP_other) ? idE+1 : idE;
} else { // Second subgroup (tamOther)
idE = ((dist_data.fin - middle) / tamOther) + remOther;
idE = ((dist_data.fin - middle) % tamOther > 0 && idE+1 <= numP_other) ? idE+1 : idE;
}
*idS = malloc(2 * sizeof(int));
(*idS)[0] = idI;
(*idS)[1] = idE;
}
/*
* Reserva memoria para los vectores de counts/displs de la funcion
* MPI_Alltoallv. Todos los vectores tienen un tamaño de numP, que es la
* cantidad de procesos en el otro grupo de procesos.
*
* El vector counts indica cuantos elementos se comunican desde este proceso
* al proceso "i" del otro grupo.
*
* El vector displs indica los desplazamientos necesarios para cada comunicacion
* con el proceso "i" del otro grupo.
*
* El vector zero_arr se utiliza cuando se quiere indicar un vector incializado
* a 0 en todos sus elementos. Sirve para indicar que no hay comunicacion.
*/
void mallocCounts(struct Counts *counts, int numP) {
counts->counts = calloc(numP, sizeof(int));
if(counts->counts == NULL) { MPI_Abort(MPI_COMM_WORLD, -2);}
counts->displs = calloc(numP, sizeof(int));
if(counts->displs == NULL) { MPI_Abort(MPI_COMM_WORLD, -2);}
counts->zero_arr = calloc(numP, sizeof(int));
if(counts->zero_arr == NULL) { MPI_Abort(MPI_COMM_WORLD, -2);}
}
/*
* Libera la memoria interna de una estructura Counts.
*
* No libera la memoria de la estructura counts si se ha alojado
* de forma dinamica.
*/
void freeCounts(struct Counts *counts) {
free(counts->counts);
free(counts->displs);
free(counts->zero_arr);
}
void print_counts(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, const char* name) {
int i;
for(i=0; i < size; i++) {
//if(xcounts[i] != 0) {
printf("P%d of %d | %scounts[%d]=%d disp=%d\n", data_dist.myId, data_dist.numP, name, i, xcounts[i], xdispls[i]);
//}
}
}
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <string.h>
#include "malleabilityStates.h"
//#define MAL_COMM_COMPLETED 0
//#define MAL_COMM_UNINITIALIZED 2
//#define MAL_ASYNC_PENDING 1
//#define MAL_USE_NORMAL 0
//#define MAL_USE_IBARRIER 1
//#define MAL_USE_POINT 2
//#define MAL_USE_THREAD 3
int send_sync(char *array, int qty, int myId, int numP, int root, MPI_Comm intercomm, int numP_child);
void recv_sync(char **array, int qty, int myId, int numP, int root, MPI_Comm intercomm, int numP_parents);
int send_async(char *array, int qty, int myId, int numP, int root, MPI_Comm intercomm, int numP_child, MPI_Request **comm_req, int parents_wait);
void recv_async(char **array, int qty, int myId, int numP, int root, MPI_Comm intercomm, int numP_parents, int parents_wait);
void malloc_comm_array(char **array, int qty, int myId, int numP);
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <mpi.h>
#include <string.h>
#include <slurm/slurm.h>
#include "ProcessDist.h"
int commState = MAL_NOT_STARTED;
struct Slurm_data *slurm_data;
pthread_t spawn_thread;
pthread_mutex_t spawn_mutex;
MPI_Comm *returned_comm;
double end_time; //FIXME REFACTOR
struct Slurm_data {
char *cmd; // Executable name
char *nodelist;
int num_cpus, num_nodes;
int qty_procs, result_procs;
MPI_Info info;
int type_creation;
int spawn_is_single;
};
typedef struct {
char *argv;
int numP_childs, myId, root, already_created;
int type_dist;
int spawn_is_single;
int spawn_method;
MPI_Comm comm;
}Creation_data;
//--------------PRIVATE SPAWN TYPE DECLARATIONS---------------//
void* thread_work(void* creation_data_arg);
//--------------PRIVATE DECLARATIONS---------------//
void processes_dist(char *argv, int numP_childs, int already_created, int type_dist);
void generic_spawn(int myId, int root, int is_single, MPI_Comm *child, MPI_Comm comm);
void single_spawn_connection(int myId, int root, MPI_Comm comm, MPI_Comm *child);
int create_processes(int myId, int root, MPI_Comm *child, MPI_Comm comm);
void node_dist(int type, int total_procs, int already_created, int **qty, int *used_nodes);
void fill_str_hostfile(int *qty, int used_nodes, char **hostfile_str);
int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name);
//@deprecated functions
int create_hostfile(char *jobId, char **file_name);
int write_hostfile_node(int ptr, int qty, char *node_name);
void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_nodes);
//--------------PUBLIC FUNCTIONS---------------//
/*
* Se solicita la creacion de un nuevo grupo de "numP" procesos con una distribucion
* fisica "type_dist".
*
* Se puede solicitar en primer plano, encargandose por tanto el proceso que llama a esta funcion,
* o en segundo plano, donde un hilo se encarga de configurar esta creacion.
*
* Si se pide en primer plano, al terminarla es posible llamar a "check_slurm_comm()" para crear
* los procesos.
*
* Si se pide en segundo plano, llamar a "check_slurm_comm()" comprobara si la configuracion para
* crearlos esta lista, y si es asi, los crea.
*
* Devuelve el estado de el procedimiento. Si no devuelve "COMM_FINISHED", es necesario llamar a
* "check_slurm_comm()".
*/
int init_slurm_comm(char *argv, int num_cpus, int num_nodes, char *nodelist, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child) {
int spawn_qty, already_created = 0;
slurm_data = malloc(sizeof(struct Slurm_data));
spawn_thread = pthread_self();
slurm_data->type_creation = type_creation;
slurm_data->spawn_is_single = spawn_is_single;
slurm_data->result_procs = numC;
slurm_data->num_cpus = num_cpus;
slurm_data->num_nodes = num_nodes;
slurm_data->nodelist = nodelist;
spawn_qty = numC;
if(type_creation == COMM_SPAWN_MERGE || type_creation == COMM_SPAWN_MERGE_PTHREAD) {
if (numP < slurm_data->result_procs) {
spawn_qty = slurm_data->result_procs - numP;
already_created = numP;
}
}
pthread_mutex_init(&spawn_mutex,NULL);
if(type_creation == COMM_SPAWN_SERIAL || slurm_data->type_creation == COMM_SPAWN_MERGE) {
if(myId == root) {
processes_dist(argv, spawn_qty, already_created, type_dist);
} else {
slurm_data->cmd = malloc(1 * sizeof(char));
slurm_data->info = MPI_INFO_NULL;
}
// WORK
generic_spawn(myId, root, slurm_data->spawn_is_single, child, comm);
// END WORK
if(myId == root && slurm_data->info != MPI_INFO_NULL) {
MPI_Info_free(&(slurm_data->info));
}
pthread_mutex_destroy(&spawn_mutex);
free(slurm_data->cmd);
free(slurm_data);
} else if(type_creation == COMM_SPAWN_PTHREAD || slurm_data->type_creation == COMM_SPAWN_MERGE_PTHREAD) {
commState = MAL_SPAWN_PENDING;
if((spawn_is_single && myId == root) || !spawn_is_single || (slurm_data->type_creation == COMM_SPAWN_MERGE_PTHREAD && numP > slurm_data->result_procs)) {
Creation_data *creation_data = (Creation_data *) malloc(sizeof(Creation_data));
creation_data->argv = argv;
creation_data->numP_childs = spawn_qty;
creation_data->already_created = already_created;
creation_data->myId = myId;
creation_data->root = root;
creation_data->type_dist = type_dist;
creation_data->comm = comm;
if(pthread_create(&spawn_thread, NULL, thread_work, (void *)creation_data)) {
printf("Error al crear el hilo de contacto con SLURM\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
}
}
return commState;
}
/*
* Comprueba si una configuracion para crear un nuevo grupo de procesos esta lista,
* y en caso de que lo este, se devuelve el communicador a estos nuevos procesos.
*/
int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child, MPI_Comm comm, MPI_Comm comm_thread, double *real_time) {
if(slurm_data->type_creation == COMM_SPAWN_PTHREAD || slurm_data->type_creation == COMM_SPAWN_MERGE_PTHREAD) {
if (slurm_data->type_creation == COMM_SPAWN_MERGE_PTHREAD && numP > slurm_data->result_procs) { //TODO REFACTOR
printf("Error Check spawn: Configuracion invalida\nSe intenta usar el método Spawn junto a un Shrink merge\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
if(!slurm_data->spawn_is_single || commState == MAL_SPAWN_SINGLE_PENDING || commState == MAL_SPAWN_COMPLETED) {
int state=-10;
//printf("[%d][3] Test min\n", myId); fflush(stdout);
//pthread_mutex_lock(&spawn_mutex); // TODO Descomentar
MPI_Allreduce(&commState, &state, 1, MPI_INT, MPI_MIN, comm);
//pthread_mutex_unlock(&spawn_mutex);
if(state != MAL_SPAWN_COMPLETED) return state; // Continue only if asynchronous process creation has ended
//printf("[%d][5] Test Passed-----------\n", myId); fflush(stdout);
if(pthread_join(spawn_thread, NULL)) {
printf("Error al esperar al hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
*child = *returned_comm;
} else if (slurm_data->spawn_is_single) {
//pthread_mutex_lock(&spawn_mutex); // TODO Descomentar
MPI_Bcast(&commState, 1, MPI_INT, root, comm);
//pthread_mutex_unlock(&spawn_mutex);
int threads_not_spawned = pthread_equal(pthread_self(), spawn_thread);
// Non-root processes join root to finalize the spawn
// They also must join if the application has ended its work
if(commState == MAL_SPAWN_SINGLE_START) {
commState = MAL_SPAWN_SINGLE_PENDING;
if(myId != root && threads_not_spawned) {
Creation_data *creation_data = (Creation_data *) malloc(sizeof(Creation_data));
creation_data->argv = NULL;
creation_data->numP_childs = -1;
creation_data->already_created = -1;
creation_data->myId = myId;
creation_data->root = root;
creation_data->type_dist = -1;
creation_data->comm = comm_thread;
if(pthread_create(&spawn_thread, NULL, thread_work, (void *)creation_data)) {
printf("Error al crear el hilo de apoyo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
}
}
// Continue only if asynchronous process creation has ended or application does not have more work
if(commState != MAL_SPAWN_COMPLETED) return commState;
//printf("[%d][4] Test Passed-----------\n", myId); fflush(stdout);
//Asegurar que los hilos han terminado
if(pthread_join(spawn_thread, NULL)) {
printf("Error al esperar al hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
*child = *returned_comm;
} else {
printf("Error Check spawn: Configuracion invalida\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
} else {
return commState;
}
//Free memory
if(myId == root && slurm_data->info != MPI_INFO_NULL) {
MPI_Info_free(&(slurm_data->info));
}
free(slurm_data->cmd);
free(slurm_data);
pthread_mutex_destroy(&spawn_mutex);
spawn_thread = pthread_self();
*real_time=end_time;
return commState;
}
/*
* Conectar grupo de hijos con grupo de padres
* Devuelve un intercomunicador para hablar con los padres
*
* Solo se utiliza cuando la creación de los procesos ha sido
* realizada por un solo proceso padre
*/
void malleability_establish_connection(int myId, int root, MPI_Comm *intercomm) {
char *port_name;
MPI_Comm newintercomm;
if(myId == root) {
port_name = (char *) malloc(MPI_MAX_PORT_NAME * sizeof(char));
MPI_Open_port(MPI_INFO_NULL, port_name);
MPI_Send(port_name, MPI_MAX_PORT_NAME, MPI_CHAR, root, 130, *intercomm);
} else {
port_name = malloc(1);
}
MPI_Comm_accept(port_name, MPI_INFO_NULL, root, MPI_COMM_WORLD, &newintercomm);
if(myId == root) {
MPI_Close_port(port_name);
}
free(port_name);
MPI_Comm_free(intercomm);
*intercomm = newintercomm;
}
//--------------PRIVATE THREAD FUNCTIONS---------------//
/*
* Funcion llamada por un hilo para que este se encarge
* de configurar la creacion de un nuevo grupo de procesos.
*
* Una vez esta lista la configuracion y es posible crear los procesos
* se avisa al hilo maestro.
*/
void* thread_work(void* creation_data_arg) {
Creation_data *creation_data = (Creation_data*) creation_data_arg;
returned_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
if(creation_data->myId == creation_data->root) {
processes_dist(creation_data->argv, creation_data->numP_childs, creation_data->already_created, creation_data->type_dist);
} else {
slurm_data->cmd = malloc(1 * sizeof(char));
slurm_data->info = MPI_INFO_NULL;
}
generic_spawn(creation_data->myId, creation_data->root, slurm_data->spawn_is_single, returned_comm, creation_data->comm);
free(creation_data);
pthread_exit(NULL);
}
//--------------PRIVATE SPAWN CREATION FUNCTIONS---------------//
/*
* Funcion generica para la creacion de procesos. Obtiene la configuracion
* y segun esta, elige como deberian crearse los procesos.
*
* Cuando termina, modifica la variable global para indicar este cambio
*/
void generic_spawn(int myId, int root, int spawn_is_single, MPI_Comm *child, MPI_Comm comm) {
if(spawn_is_single) {
single_spawn_connection(myId, root, comm, child);
} else {
int rootBcast = MPI_PROC_NULL;
if(myId == root) rootBcast = MPI_ROOT;
create_processes(myId, root, child, comm);
MPI_Bcast(&spawn_is_single, 1, MPI_INT, rootBcast, *child);
}
pthread_mutex_lock(&spawn_mutex);
commState = MAL_SPAWN_COMPLETED;
end_time = MPI_Wtime();
pthread_mutex_unlock(&spawn_mutex);
}
/*
* Crea un grupo de procesos segun la configuracion indicada por la funcion
* "processes_dist()".
*/
int create_processes(int myId, int root, MPI_Comm *child, MPI_Comm comm) {
int spawn_err = MPI_Comm_spawn(slurm_data->cmd, MPI_ARGV_NULL, slurm_data->qty_procs, slurm_data->info, root, comm, child, MPI_ERRCODES_IGNORE);
if(spawn_err != MPI_SUCCESS) {
printf("Error creating new set of %d procs.\n", slurm_data->qty_procs);
}
return spawn_err;
}
/*
* Si la variable "type" es 1, la creación es con la participación de todo el grupo de padres
* Si el valor es diferente, la creación es solo con la participación del proceso root
*/
void single_spawn_connection(int myId, int root, MPI_Comm comm, MPI_Comm *child){
char *port_name;
int auxiliar_conf = COMM_SPAWN_SINGLE;
MPI_Comm newintercomm;
if (myId == root) {
create_processes(myId, root, child, MPI_COMM_SELF);
MPI_Bcast(&auxiliar_conf, 1, MPI_INT, MPI_ROOT, *child);
port_name = (char *) malloc(MPI_MAX_PORT_NAME * sizeof(char));
MPI_Recv(port_name, MPI_MAX_PORT_NAME, MPI_CHAR, root, 130, *child, MPI_STATUS_IGNORE);
commState = MAL_SPAWN_SINGLE_START; // Indicate other processes to join root to end spawn procedure
} else {
port_name = malloc(1);
}
MPI_Comm_connect(port_name, MPI_INFO_NULL, root, comm, &newintercomm);
if(myId == root)
MPI_Comm_free(child);
free(port_name);
*child = newintercomm;
}
//--------------PRIVATE MERGE TYPE FUNCTIONS---------------//
/*
* Se encarga de que el grupo de procesos resultante se
* encuentren todos en un intra comunicador, uniendo a
* padres e hijos en un solo comunicador.
*
* Se llama antes de la redistribución de datos.
*
* TODO REFACTOR
*/
void proc_adapt_expand(int *numP, int numC, MPI_Comm intercomm, MPI_Comm *comm, int is_children_group) {
MPI_Comm new_comm = MPI_COMM_NULL;
MPI_Intercomm_merge(intercomm, is_children_group, &new_comm); //El que pone 0 va primero
//MPI_Comm_free(intercomm); TODO Nueva redistribucion para estos casos y liberar aqui
// *intercomm = MPI_COMM_NULL;
*numP = numC;
if(*comm != MPI_COMM_WORLD && *comm != MPI_COMM_NULL) {
MPI_Comm_free(comm);
}
*comm=new_comm;
}
/*
* Se encarga de que el grupo de procesos resultante se
* eliminen aquellos procesos que ya no son necesarios.
* Los procesos eliminados se quedaran como zombies.
*
* Se llama una vez ha terminado la redistribución de datos.
*/
void proc_adapt_shrink(int numC, MPI_Comm *comm, int myId) {
int color = MPI_UNDEFINED;
MPI_Comm new_comm = MPI_COMM_NULL;
if(myId < numC) {
color = 1;
}
MPI_Comm_split(*comm, color, myId, &new_comm);
if(*comm != MPI_COMM_WORLD && *comm != MPI_COMM_NULL)
//MPI_Comm_free(comm); FIXME
*comm=new_comm;
}
/*
* Configura la creacion de un nuevo grupo de procesos, reservando la memoria
* para una llamada a MPI_Comm_spawn, obteniendo una distribucion fisica
* para los procesos y creando un fichero hostfile.
*/
void processes_dist(char *argv, int numP_childs, int already_created, int type) {
//int jobId;
//char *tmp;
//job_info_msg_t *j_info;
//slurm_job_info_t last_record;
int used_nodes=0;
int *procs_array;
char *hostfile;
// Get Slurm job info
//tmp = getenv("SLURM_JOB_ID");
//jobId = atoi(tmp);
//slurm_load_job(&j_info, jobId, 1);
//last_record = j_info->job_array[j_info->record_count - 1];
//COPY PROGRAM NAME
slurm_data->cmd = malloc(strlen(argv) * sizeof(char));
strcpy(slurm_data->cmd, argv);
// GET NEW DISTRIBUTION
node_dist(type, numP_childs, already_created, &procs_array, &used_nodes);
slurm_data->qty_procs = numP_childs;
/*
// CREATE/UPDATE HOSTFILE
int ptr;
ptr = create_hostfile(tmp, &hostfile);
MPI_Info_create(&(slurm_data->info));
MPI_Info_set(slurm_data->info, "hostfile", hostfile);
free(hostfile);
// SET NEW DISTRIBUTION
fill_hostfile(last_record, ptr, procs_array, used_nodes);
close(ptr);
*/
// CREATE AND SET STRING HOSTFILE
fill_str_hostfile(procs_array, used_nodes, &hostfile);
MPI_Info_create(&(slurm_data->info));
MPI_Info_set(slurm_data->info, "hosts", hostfile);
free(hostfile);
free(procs_array);
// Free JOB INFO
//slurm_free_job_info_msg(j_info);
}
/*
* Obtiene la distribucion fisica del grupo de procesos a crear, devolviendo
* cuantos nodos se van a utilizar y la cantidad de procesos que alojara cada
* nodo.
*
* Se permiten dos tipos de distribuciones fisicas segun el valor de "type":
*
* COMM_PHY_NODES (1): Orientada a equilibrar el numero de procesos entre
* todos los nodos disponibles.
* COMM_PHY_CPU (2): Orientada a completar la capacidad de un nodo antes de
* ocupar otro nodo.
*/
void node_dist(int type, int total_procs, int already_created, int **qty, int *used_nodes) {
int i, asigCores;
int tamBl, remainder;
int *procs;
procs = calloc(slurm_data->num_nodes, sizeof(int)); // Numero de procesos por nodo
/* GET NEW DISTRIBUTION */
if(type == 1) { // DIST NODES
*used_nodes = slurm_data->num_nodes;
tamBl = total_procs / slurm_data->num_nodes;
remainder = total_procs % slurm_data->num_nodes;
for(i=0; i<remainder; i++) {
procs[i] = tamBl + 1;
}
for(i=remainder; i<slurm_data->num_nodes; i++) {
procs[i] = tamBl;
}
} else if (type == 2) { // DIST CPUs
tamBl = slurm_data->num_cpus / slurm_data->num_nodes;
asigCores = 0;
i = *used_nodes = already_created / tamBl;
remainder = already_created % tamBl;
//First node could already have existing procs
if (remainder) {
procs[i] = asigCores = tamBl - remainder;
i = (i+1) % slurm_data->num_nodes;
(*used_nodes)++;
}
//Assing tamBl to each node
while(asigCores+tamBl <= total_procs) {
asigCores += tamBl;
procs[i] += tamBl;
i = (i+1) % slurm_data->num_nodes;
(*used_nodes)++;
}
//Last node could have less procs than tamBl
if(asigCores < total_procs) {
procs[i] += total_procs - asigCores;
(*used_nodes)++;
}
if(*used_nodes > slurm_data->num_nodes) *used_nodes = slurm_data->num_nodes; //FIXME Si ocurre esto no es un error?
}
*qty = calloc(*used_nodes, sizeof(int)); // Numero de procesos por nodo
for(i=0; i< *used_nodes; i++) {
(*qty)[i] = procs[i];
}
free(procs);
}
/*
* Crea y devuelve una cadena para ser utilizada por la llave "hosts"
* al crear procesos e indicar donde tienen que ser creados.
*/
void fill_str_hostfile(int *qty, int used_nodes, char **hostfile_str) {
int i=0, len=0;
char *host;
hostlist_t hostlist;
hostlist = slurm_hostlist_create(slurm_data->nodelist);
while ( (host = slurm_hostlist_shift(hostlist)) && i < used_nodes) {
if(qty[i] != 0) {
len = write_str_node(hostfile_str, len, qty[i], host);
}
i++;
free(host);
}
slurm_hostlist_destroy(hostlist);
}
/*
* Añade en una cadena "qty" entradas de "node_name".
* Realiza la reserva de memoria y la realoja si es necesario.
*/
int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name) {
int err, len_node, len, i;
char *ocurrence;
len_node = strlen(node_name);
len = qty * (len_node + 1);
if(len_og == 0) { // Memoria no reservada
*hostfile_str = (char *) malloc(len * sizeof(char) - (1 * sizeof(char)));
} else { // Cadena ya tiene datos
*hostfile_str = (char *) realloc(*hostfile_str, (len_og + len) * sizeof(char) - (1 * sizeof(char)));
}
if(hostfile_str == NULL) return -1; // No ha sido posible alojar la memoria
ocurrence = (char *) malloc((len_node+1) * sizeof(char));
if(ocurrence == NULL) return -1; // No ha sido posible alojar la memoria
err = sprintf(ocurrence, ",%s", node_name);
if(err < 0) return -2; // No ha sido posible escribir sobre la variable auxiliar
i=0;
if(len_og == 0) { // Si se inicializa, la primera es una copia
i++;
strcpy(*hostfile_str, node_name);
}
for(; i<qty; i++){ // Las siguientes se conctanenan
strcat(*hostfile_str, ocurrence);
}
free(ocurrence);
return len+len_og;
}
//====================================================
//====================================================
//============DEPRECATED FUNCTIONS====================
//====================================================
//====================================================
/*
* @deprecated
* Crea un fichero que se utilizara como hostfile
* para un nuevo grupo de procesos.
*
* El nombre es devuelto en el argumento "file_name",
* que tiene que ser un puntero vacio.
*
* Ademas se devuelve un descriptor de fichero para
* modificar el fichero.
*/
int create_hostfile(char *jobId, char **file_name) {
int ptr, err, len;
len = strlen(jobId) + 11;
*file_name = NULL;
*file_name = malloc( len * sizeof(char));
if(*file_name == NULL) return -1; // No ha sido posible alojar la memoria
err = snprintf(*file_name, len, "hostfile.o%s", jobId);
if(err < 0) return -2; // No ha sido posible obtener el nombre de fichero
ptr = open(*file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
if(ptr < 0) return -3; // No ha sido posible crear el fichero
return ptr; // Devolver puntero a fichero
}
/*
* @deprecated
* Rellena un fichero hostfile indicado por ptr con los nombres
* de los nodos a utilizar indicados por "job_record" y la cantidad
* de procesos que alojara cada nodo indicado por "qty".
*/
void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_nodes) {
int i=0;
char *host;
hostlist_t hostlist;
hostlist = slurm_hostlist_create(job_record.nodes);
while ( (host = slurm_hostlist_shift(hostlist)) && i < used_nodes) {
write_hostfile_node(ptr, qty[i], host);
i++;
free(host);
}
slurm_hostlist_destroy(hostlist);
}
/*
* @deprecated
* Escribe en el fichero hostfile indicado por ptr una nueva linea.
*
* Esta linea indica el nombre de un nodo y la cantidad de procesos a
* alojar en ese nodo.
*/
int write_hostfile_node(int ptr, int qty, char *node_name) {
int err, len_node, len_int, len;
char *line;
len_node = strlen(node_name);
len_int = snprintf(NULL, 0, "%d", qty);
len = len_node + len_int + 3;
line = malloc(len * sizeof(char));
if(line == NULL) return -1; // No ha sido posible alojar la memoria
err = snprintf(line, len, "%s:%d\n", node_name, qty);
if(err < 0) return -2; // No ha sido posible escribir en el fichero
write(ptr, line, len-1);
free(line);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <string.h>
#include <slurm/slurm.h>
#include "malleabilityStates.h"
int init_slurm_comm(char *argv, int num_cpus, int num_nodes, char *nodelist, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child);
int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child, MPI_Comm comm, MPI_Comm comm_thread, double *end_real_time);
void malleability_establish_connection(int myId, int root, MPI_Comm *intercomm);
void proc_adapt_expand(int *numP, int numC, MPI_Comm intercomm, MPI_Comm *comm, int is_children_group);
void proc_adapt_shrink(int numC, MPI_Comm *comm, int myId);
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment