Commit 2639ab13 authored by Iker Martín Álvarez's avatar Iker Martín Álvarez
Browse files

Merge branch 'dev' into 'master'

New version of Proteo

See merge request martini/malleability_benchmark!6
parents 26305fac e83b5922
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sched.h>
#include <mpi.h>
#include "MAM_RMS.h"
#include "MAM_DataStructures.h"
#if MAM_USE_SLURM
#include <slurm/slurm.h>
int MAM_I_slurm_getenv_hosts_info();
int MAM_I_slurm_getjob_hosts_info();
#endif
int MAM_I_get_hosts_info();
int GetCPUCount();
void MAM_check_hosts() {
int not_filled = 1;
#if MAM_USE_SLURM
not_filled = MAM_I_slurm_getjob_hosts_info();
if(not_filled) {
#if MAM_DEBUG >= 2
DEBUG_FUNC("WARNING - RMS info retriever failed with slurm functions. Trying with ENV variables", mall->myId, mall->numP);
#endif
if(mall->nodelist != NULL) {
free(mall->nodelist);
mall->nodelist = NULL;
}
not_filled = MAM_I_slurm_getenv_hosts_info();
}
#endif
if(not_filled) {
if(mall->nodelist != NULL) {
free(mall->nodelist);
mall->nodelist = NULL;
}
not_filled = MAM_I_get_hosts_info();
}
if(not_filled) {
if(mall->myId == mall->root) printf("MAM FATAL ERROR: It has not been possible to obtain the nodelist\n");
fflush(stdout);
MPI_Abort(mall->comm, -50);
}
#if MAM_DEBUG >= 2
if(mall->myId == mall->root) {
DEBUG_FUNC("Obtained Nodelist", mall->myId, mall->numP);
printf("NODELIST: %s\nNODE_COUNT: %d NUM_CPUS_PER_NODE: %d\n", mall->nodelist, mall->num_nodes, mall->num_cpus);
fflush(stdout);
}
#endif
}
/*
* @brief Get if a group of processes uses an internode comunicator
*
* This function checks the physical distribution of all ranks in the
* original communicator passed to MaM. If all of them reside in the
* same host, false is returned. True is returned otherwise.
*
* @return Integer indicating if more than one node is used by the
* original communicator (>0) or only one (0).
*/
int MAM_Is_internode_group() {
int i, name_len, max_name_len, unique_count;
int myId, numP;
char *my_host, *all_hosts, *tested_host;
MPI_Comm_rank(mall->original_comm, &myId);
MPI_Comm_size(mall->original_comm, &numP);
unique_count = 0; //First node is not counted
if(numP == 1) return unique_count;
all_hosts = NULL;
my_host = (char *) malloc(MPI_MAX_PROCESSOR_NAME * sizeof(char));
MPI_Get_processor_name(my_host, &name_len);
MPI_Allreduce(&name_len, &max_name_len, 1, MPI_INT, MPI_MAX, mall->original_comm);
my_host[max_name_len] = '\0';
max_name_len++; // Len does not consider terminating character
if(myId == MAM_ROOT) {
all_hosts = (char *) malloc(numP * max_name_len * sizeof(char));
}
//FIXME Should be a Gatherv as each host could have unitialised chars between name_len and max_name_len
MPI_Gather(my_host, max_name_len, MPI_CHAR, all_hosts, max_name_len, MPI_CHAR, MAM_ROOT, mall->original_comm);
if(myId == MAM_ROOT) {
for (i = 1; i < numP; i++) {
tested_host = all_hosts + (i * max_name_len);
if (strcmp(my_host, tested_host) != 0) {
unique_count++;
break;
}
}
free(all_hosts);
}
MPI_Bcast(&unique_count, 1, MPI_INT, MAM_ROOT, mall->original_comm);
free(my_host);
return unique_count;
}
/*
* TODO
* FIXME Does not consider heterogenous machines for num_cpus
* FIXME Always returns 0... -- Perform error checking?
*/
int MAM_I_get_hosts_info() {
int i, j, name_len, max_name_len, unique_count, *unique_hosts;
char *my_host, *all_hosts, *confirmed_host, *tested_host;
all_hosts = NULL;
my_host = (char *) malloc(MPI_MAX_PROCESSOR_NAME * sizeof(char));
MPI_Get_processor_name(my_host, &name_len);
MPI_Allreduce(&name_len, &max_name_len, 1, MPI_INT, MPI_MAX, mall->comm);
my_host[max_name_len] = '\0';
max_name_len++; // Len does not consider terminating character
if(mall->myId == mall->root) {
all_hosts = (char *) malloc(mall->numP * max_name_len * sizeof(char));
unique_hosts = (int *) malloc(mall->numP * sizeof(int));
unique_hosts[0] = 0; //First host will always be unique
unique_count = 1;
}
//FIXME Should be a Gatherv as each host could have unitialised chars between name_len and max_name_len
MPI_Gather(my_host, max_name_len, MPI_CHAR, all_hosts, max_name_len, MPI_CHAR, mall->root, mall->comm);
if(mall->myId == mall->root) {
for (i = 1; i < mall->numP; i++) {
for (j = 0; j < unique_count; j++) {
tested_host = all_hosts + (i * max_name_len);
confirmed_host = all_hosts + (unique_hosts[j] * max_name_len);
if (strcmp(tested_host, confirmed_host) != 0) {
unique_hosts[unique_count] = i;
unique_count++;
break;
}
}
}
mall->num_nodes = unique_count;
mall->num_cpus = GetCPUCount();
mall->nodelist_len = unique_count*max_name_len;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, ""); //FIXME Strcat can be very inneficient...
for (i = 0; i < unique_count; i++) {
confirmed_host = all_hosts + (unique_hosts[i] * max_name_len);
strcat(mall->nodelist, confirmed_host);
if (i < unique_count - 1) {
strcat(mall->nodelist, ",");
}
}
free(all_hosts);
free(unique_hosts);
}
free(my_host);
return 0;
}
/*
* @brief Get the total number of CPUs available to the process.
*
* This function uses sched_getaffinity to obtain the CPU affinity of the current process
* and counts the number of CPUs in the affinity set. It adjusts the loop based on the
* maximum number of CPUs allowed on the system.
*
* @return The total number of CPUs available to the process.
*
* Code obtained from: https://stackoverflow.com/questions/4586405/how-to-get-the-number-of-cpus-in-linux-using-c
* The code has been slightly modified.
*/
int GetCPUCount() {
cpu_set_t cs;
CPU_ZERO(&cs);
sched_getaffinity(0, sizeof(cs), &cs);
int count = 0;
int max_cpus = sysconf(_SC_NPROCESSORS_ONLN);
for (int i = 0; i < max_cpus; i++) {
if (CPU_ISSET(i, &cs)) {
count++;
} else {
break;
}
}
return count;
}
#if MAM_USE_SLURM
/*
* TODO
*/
int MAM_I_slurm_getenv_hosts_info() {
char *tmp = NULL, *tmp_copy, *token;
int cpus, count;
//int i, *cpus_counts, *nodes_counts, *aux;
tmp = getenv("SLURM_JOB_NUM_NODES");
if(tmp == NULL) return 1;
mall->num_nodes = atoi(tmp);
tmp = NULL;
tmp = getenv("SLURM_JOB_NODELIST");
if(tmp == NULL) return 1;
mall->nodelist_len = strlen(tmp)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, tmp);
tmp = NULL;
//EXAMPLE - SLURM_JOB_CPUS_PER_NODE='72(x2),36'
//It indicates two nodes have 72 CPUs each and third has 36 cpus
tmp = getenv("SLURM_JOB_CPUS_PER_NODE");
if(tmp == NULL) return 1;
tmp_copy = (char *) malloc((strlen(tmp)+1) * sizeof(char));
strcpy(tmp_copy, tmp);
token = strtok(tmp_copy, ",");
//TODO When MaM considers heteregenous allocations, these will be needed instead of num_cpus.
//cpus_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//nodes_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//i = 0;
mall->num_cpus = 0;
while (token != NULL) {
// If actual token contains only one node, the second portion
// does not appear and sscanf does not modify "count"
// First portion --> "%d"
// Second portion -> "(x%d)"
count = 1;
if (sscanf(token, "%d(x%d)", &cpus, &count) >= 1) {
mall->num_cpus = cpus; // num_cpus stores the amount of cores per cpu
//cpus_per_node[i] = cpus;
//nodes_count[i] = count;
//i++;
}
token = strtok(NULL, ",");
}
/*
if(i < mall->num_nodes) {
aux = (int *) realloc(cpus_per_node, i * sizeof(int));
if(cpus_per_node != aux && cpus_per_node != NULL) free(cpus_per_node);
cpus_per_node = aux;
aux = (int *) realloc(nodes_counts, i * sizeof(int));
if(nodes_count != aux && nodes_count != NULL) free(nodes_count);
nodes_count = aux;
}
*/
free(tmp_copy);
return 0;
}
/*
* TODO
* FIXME Does not consider heterogenous machines
*/
int MAM_I_slurm_getjob_hosts_info() {
int jobId, err;
char *tmp = NULL;
job_info_msg_t *j_info;
slurm_job_info_t last_record;
tmp = getenv("SLURM_JOB_ID");
if(tmp == NULL) return 1;
jobId = atoi(tmp);
err = slurm_load_job(&j_info, jobId, 1); // FIXME Valgrind Not freed
if(err) return err;
last_record = j_info->job_array[j_info->record_count - 1];
mall->num_nodes = last_record.num_nodes;
mall->num_cpus = last_record.num_cpus / last_record.num_nodes;
mall->nodelist_len = strlen(last_record.nodes)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, last_record.nodes);
slurm_free_job_info_msg(j_info);
return 0;
}
#endif
//TODO REFACTOR PARA CUANDO SE COMUNIQUE CON RMS
// Get Slurm job info
//int jobId;
//char *tmp;
//job_info_msg_t *j_info;
//slurm_job_info_t last_record;
//tmp = getenv("SLURM_JOB_ID");
//jobId = atoi(tmp);
//slurm_load_job(&j_info, jobId, 1);
//last_record = j_info->job_array[j_info->record_count - 1];
// Free JOB INFO
//slurm_free_job_info_msg(j_info);
#ifndef MAM_RMS_H
#define MAM_RMS_H
void MAM_check_hosts();
int MAM_Is_internode_group();
#endif
#include "MAM_Times.h"
#include "MAM_DataStructures.h"
void def_malleability_times(MPI_Datatype *new_type);
void init_malleability_times() {
#if MAM_DEBUG >= 4
DEBUG_FUNC("Initializing recording structure", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(mall->comm);
#endif
mall_conf->times = (malleability_times_t *) malloc(sizeof(malleability_times_t));
if(mall_conf->times == NULL) {
perror("Error al crear la estructura de tiempos interna para maleabilidad\n");
MPI_Abort(MPI_COMM_WORLD, -5);
}
reset_malleability_times();
def_malleability_times(&mall_conf->times->times_type);
#if MAM_DEBUG >= 4
DEBUG_FUNC("Initialized recording structure", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(mall->comm);
#endif
}
void reset_malleability_times() {
malleability_times_t *times = mall_conf->times;
times->spawn_start = 0; times->sync_start = 0; times->async_start = 0; times->user_start = 0; times->malleability_start = 0;
times->sync_end = 0; times->async_end = 0; times->user_end = 0; times->malleability_end = 0;
times->spawn_time = 0;
}
void free_malleability_times() {
#if MAM_DEBUG >= 4
DEBUG_FUNC("Freeing recording structure", mall->myId, mall->numP); fflush(stdout);
#endif
if(mall_conf->times != NULL) {
if(mall_conf->times->times_type != MPI_DATATYPE_NULL) {
MPI_Type_free(&mall_conf->times->times_type);
mall_conf->times->times_type = MPI_DATATYPE_NULL;
}
free(mall_conf->times);
}
#if MAM_DEBUG >= 4
DEBUG_FUNC("Freed recording structure", mall->myId, mall->numP); fflush(stdout);
#endif
}
/*
* @brief Returns the times used for the different steps of last reconfiguration.
*
* This function is intended to be called when a reconfiguration has ended.
* It is designed to provide the necessary information for the user to perform data redistribution.
*
* Null values can be passed if there is no interest in retreiving particular times
*
* Parameters:
* - double *sp_time: A pointer where the spawn time will be saved.
* - double *sy_time: A pointer where the sychronous data redistribution time will be saved.
* - double *asy_time: A pointer where the asychronous data redistribution time will be saved.
* - double *user_time: A pointer where the user data redistribution time will be saved.
* - double *mall_time: A pointer where the malleability time will be saved.
*/
void MAM_Retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *user_time, double *mall_time) {
malleability_times_t *times = mall_conf->times;
if(sp_time != NULL) *sp_time = times->spawn_time;
if(sy_time != NULL) *sy_time = times->sync_end - times->sync_start;
if(asy_time != NULL) *asy_time = times->async_end - times->async_start;
if(user_time != NULL) *user_time = times->user_end - times->user_start;
if(mall_time != NULL) *mall_time = times->malleability_end - times->malleability_start;
}
void malleability_times_broadcast(int root) {
MPI_Bcast(mall_conf->times, 1, mall_conf->times->times_type, root, mall->intercomm);
}
void def_malleability_times(MPI_Datatype *new_type) {
int i, counts = 5;
int blocklengths[counts];
MPI_Aint displs[counts], dir;
MPI_Datatype types[counts];
blocklengths[0] = blocklengths[1] = blocklengths[2] = blocklengths[3] = blocklengths[4] = 1;
types[0] = types[1] = types[2] = types[3] = types[4] = MPI_DOUBLE;
// Se pasa el vector a traves de la direccion de "mall_conf"
// Rellenar vector displs
MPI_Get_address(mall_conf->times, &dir);
// Obtener direccion base
MPI_Get_address(&(mall_conf->times->spawn_time), &displs[0]);
MPI_Get_address(&(mall_conf->times->sync_start), &displs[1]);
MPI_Get_address(&(mall_conf->times->async_start), &displs[2]);
MPI_Get_address(&(mall_conf->times->user_start), &displs[3]);
MPI_Get_address(&(mall_conf->times->malleability_start), &displs[4]);
for(i=0;i<counts;i++) displs[i] -= dir;
MPI_Type_create_struct(counts, blocklengths, displs, types, new_type);
MPI_Type_commit(new_type);
}
#ifndef MAM_TIMES_H
#define MAM_TIMES_H
#include <mpi.h>
void init_malleability_times();
void reset_malleability_times();
void free_malleability_times();
void malleability_times_broadcast(int root);
void MAM_I_retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *user_time, double *mall_time);
#endif
#ifndef MAM_TIMES_RETRIEVE_H
#define MAM_TIMES_RETRIEVE_H
void MAM_Retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *user_time, double *mall_time);
#endif
#include "malleabilityTypes.h" #include "MAM_Types.h"
#include "MAM_DataStructures.h"
#include "MAM_Configuration.h"
void init_malleability_data_struct(malleability_data_t *data_struct, int size); void init_malleability_data_struct(malleability_data_t *data_struct, size_t size);
void realloc_malleability_data_struct(malleability_data_t *data_struct, int qty_to_add); void realloc_malleability_data_struct(malleability_data_t *data_struct, size_t qty_to_add);
void def_malleability_entries(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type); void def_malleability_entries(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type);
void def_malleability_qty_type(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type); void def_malleability_qty_type(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type);
...@@ -20,26 +22,58 @@ void def_malleability_qty_type(malleability_data_t *data_struct_rep, malleabilit ...@@ -20,26 +22,58 @@ void def_malleability_qty_type(malleability_data_t *data_struct_rep, malleabilit
* todos los padres. La nueva serie "data" solo representa los datos * todos los padres. La nueva serie "data" solo representa los datos
* que tiene este padre. * que tiene este padre.
*/ */
void add_data(void *data, int total_qty, int type, int request_qty, malleability_data_t *data_struct) { void add_data(void *data, size_t total_qty, MPI_Datatype type, size_t request_qty, malleability_data_t *data_struct) {
int i; size_t i;
if(data_struct->entries == 0) { if(data_struct->entries == 0) {
init_malleability_data_struct(data_struct, MALLEABILITY_INIT_DATA_QTY); init_malleability_data_struct(data_struct, MAM_TYPES_INIT_DATA_QTY);
} else if(data_struct->entries == data_struct->max_entries) { } else if(data_struct->entries == data_struct->max_entries) {
realloc_malleability_data_struct(data_struct, MALLEABILITY_INIT_DATA_QTY); realloc_malleability_data_struct(data_struct, MAM_TYPES_INIT_DATA_QTY);
} }
data_struct->qty[data_struct->entries] = total_qty; data_struct->qty[data_struct->entries] = total_qty;
data_struct->types[data_struct->entries] = type; data_struct->types[data_struct->entries] = type;
data_struct->arrays[data_struct->entries] = data; data_struct->arrays[data_struct->entries] = data;
data_struct->request_qty[data_struct->entries] = request_qty;
data_struct->requests[data_struct->entries] = (MPI_Request *) malloc(request_qty * sizeof(MPI_Request)); if(request_qty) {
for(i=0; i < request_qty; i++) { data_struct->requests[data_struct->entries] = (MPI_Request *) malloc(request_qty * sizeof(MPI_Request));
data_struct->requests[data_struct->entries][i] = MPI_REQUEST_NULL; for(i=0; i < request_qty; i++) {
data_struct->requests[data_struct->entries][i] = MPI_REQUEST_NULL;
}
} }
data_struct->entries+=1; data_struct->entries+=1;
} }
/*
* Modifica en la estructura de datos a comunicar con los hijos
* un set de datos de un total "total_qty" distribuido entre
* todos los padres. La nueva serie "data" solo representa los datos
* que tiene este padre.
*/
void modify_data(void *data, size_t index, size_t total_qty, MPI_Datatype type, size_t request_qty, malleability_data_t *data_struct) {
size_t i;
if(data_struct->entries < index) { // Index does not exist
return;
}
if(data_struct->requests[index] != NULL) {
//free(data_struct->requests[index]); TODO Error when trying to free
data_struct->requests[index] = NULL;
}
data_struct->qty[index] = total_qty;
data_struct->types[index] = type;
data_struct->arrays[index] = data;
data_struct->request_qty[index] = request_qty;
if(request_qty) {
data_struct->requests[index] = (MPI_Request *) malloc(request_qty * sizeof(MPI_Request));
for(i=0; i < request_qty; i++) {
data_struct->requests[index][i] = MPI_REQUEST_NULL;
}
}
}
/* /*
* Comunicar desde los padres a los hijos las estructuras de datos sincronas o asincronas * Comunicar desde los padres a los hijos las estructuras de datos sincronas o asincronas
...@@ -48,37 +82,42 @@ void add_data(void *data, int total_qty, int type, int request_qty, malleability ...@@ -48,37 +82,42 @@ void add_data(void *data, int total_qty, int type, int request_qty, malleability
* En el argumento "root" todos tienen que indicar quien es el proceso raiz de los padres * En el argumento "root" todos tienen que indicar quien es el proceso raiz de los padres
* unicamente. * unicamente.
*/ */
void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, int is_children_group, int myId, int root, MPI_Comm intercomm) { void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, int is_children_group) {
int i, rootBcast = MPI_PROC_NULL; int type_size;
size_t i, j;
MPI_Datatype entries_type, struct_type; MPI_Datatype entries_type, struct_type;
if(is_children_group) {
rootBcast = root;
} else {
if(myId == root) rootBcast = MPI_ROOT;
}
// Mandar primero numero de entradas // Mandar primero numero de entradas
def_malleability_entries(data_struct_dist, data_struct_rep, &entries_type); def_malleability_entries(data_struct_dist, data_struct_rep, &entries_type);
MPI_Bcast(MPI_BOTTOM, 1, entries_type, rootBcast, intercomm); MPI_Bcast(MPI_BOTTOM, 1, entries_type, mall->root_collectives, mall->intercomm);
if(is_children_group) { if(is_children_group) {
if(data_struct_rep->entries != 0) init_malleability_data_struct(data_struct_rep, data_struct_rep->entries); if(data_struct_rep->entries != 0) { init_malleability_data_struct(data_struct_rep, data_struct_rep->entries); }
if(data_struct_dist->entries != 0) init_malleability_data_struct(data_struct_dist, data_struct_dist->entries); if(data_struct_dist->entries != 0) { init_malleability_data_struct(data_struct_dist, data_struct_dist->entries); } //FIXME Valgrind not freed
} }
def_malleability_qty_type(data_struct_dist, data_struct_rep, &struct_type); def_malleability_qty_type(data_struct_dist, data_struct_rep, &struct_type);
MPI_Bcast(MPI_BOTTOM, 1, struct_type, rootBcast, intercomm); //FIXME Doy error MPI_Bcast(MPI_BOTTOM, 1, struct_type, mall->root_collectives, mall->intercomm);
if(is_children_group) { if(is_children_group) {
//data_struct->requests[data_struct->entries] = (MPI_Request *) malloc(request_qty * sizeof(MPI_Request)); FIXME Crear los requests?
//data_struct->requests[data_struct->entries][i] = MPI_REQUEST_NULL;
for(i=0; i < data_struct_rep->entries; i++) { for(i=0; i < data_struct_rep->entries; i++) {
data_struct_rep->arrays[i] = (void *) malloc(data_struct_rep->qty[i] * sizeof(int)); //TODO Tener en cuenta que no siempre es int MPI_Type_size(data_struct_rep->types[i], &type_size);
data_struct_rep->arrays[i] = (void *) malloc(data_struct_rep->qty[i] * (size_t) type_size); //FIXME This memory is not freed -- How should be done?
if(data_struct_rep->request_qty[i]) {
data_struct_rep->requests[i] = (MPI_Request *) malloc(data_struct_rep->request_qty[i] * sizeof(MPI_Request));
for(j=0; j < data_struct_rep->request_qty[i]; j++) {
data_struct_rep->requests[i][j] = MPI_REQUEST_NULL;
}
}
} }
for(i=0; i < data_struct_dist->entries; i++) { for(i=0; i < data_struct_dist->entries; i++) {
data_struct_dist->arrays[i] = (void *) malloc(data_struct_dist->qty[i] * sizeof(int)); //TODO Tener en cuenta que no siempre es int data_struct_dist->arrays[i] = (void *) NULL; // TODO Se podria inicializar aqui?
if(data_struct_dist->request_qty[i]) {
data_struct_dist->requests[i] = (MPI_Request *) malloc(data_struct_dist->request_qty[i] * sizeof(MPI_Request));
for(j=0; j < data_struct_dist->request_qty[i]; j++) {
data_struct_dist->requests[i][j] = MPI_REQUEST_NULL;
}
}
} }
} }
...@@ -97,14 +136,22 @@ void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *d ...@@ -97,14 +136,22 @@ void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *d
* caracteristicas de localización y uso. Se inicializa para utilizar hasta * caracteristicas de localización y uso. Se inicializa para utilizar hasta
* "size" elementos. * "size" elementos.
*/ */
void init_malleability_data_struct(malleability_data_t *data_struct, int size) { void init_malleability_data_struct(malleability_data_t *data_struct, size_t size) {
size_t i;
data_struct->max_entries = size; data_struct->max_entries = size;
data_struct->qty = (int *) malloc(size * sizeof(int)); data_struct->qty = (size_t *) malloc(size * sizeof(size_t));
data_struct->types = (int *) malloc(size * sizeof(int)); data_struct->types = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype));
data_struct->request_qty = (size_t *) malloc(size * sizeof(size_t));
data_struct->requests = (MPI_Request **) malloc(size * sizeof(MPI_Request *)); data_struct->requests = (MPI_Request **) malloc(size * sizeof(MPI_Request *));
data_struct->windows = (MPI_Win *) malloc(size * sizeof(MPI_Win));
data_struct->arrays = (void **) malloc(size * sizeof(void *)); data_struct->arrays = (void **) malloc(size * sizeof(void *));
data_struct->request_ibarrier = MPI_REQUEST_NULL; for(i=0; i<size; i++) { //calloc and memset does not ensure a NULL value
data_struct->requests[i] = NULL;
data_struct->windows[i] = MPI_WIN_NULL;
data_struct->arrays[i] = NULL;
}
} }
/* /*
...@@ -112,43 +159,83 @@ void init_malleability_data_struct(malleability_data_t *data_struct, int size) { ...@@ -112,43 +159,83 @@ void init_malleability_data_struct(malleability_data_t *data_struct, int size) {
* caracteristicas de localización y uso. Se anyaden "size" entradas nuevas * caracteristicas de localización y uso. Se anyaden "size" entradas nuevas
* a las ya existentes. * a las ya existentes.
*/ */
void realloc_malleability_data_struct(malleability_data_t *data_struct, int qty_to_add) { void realloc_malleability_data_struct(malleability_data_t *data_struct, size_t qty_to_add) {
int *qty_aux, *types_aux, needed; size_t i, needed, *qty_aux, *request_qty_aux;
MPI_Datatype *types_aux;
MPI_Win *windows_aux;
MPI_Request **requests_aux; MPI_Request **requests_aux;
void **arrays_aux; void **arrays_aux;
needed = data_struct->max_entries + qty_to_add; needed = data_struct->max_entries + qty_to_add;
qty_aux = (int *) realloc(data_struct->qty, needed * sizeof(int)); qty_aux = (size_t *) realloc(data_struct->qty, needed * sizeof(int));
types_aux = (int *) realloc(data_struct->types, needed * sizeof(int)); types_aux = (MPI_Datatype *) realloc(data_struct->types, needed * sizeof(MPI_Datatype));
request_qty_aux = (size_t *) realloc(data_struct->request_qty, needed * sizeof(int));
requests_aux = (MPI_Request **) realloc(data_struct->requests, needed * sizeof(MPI_Request *)); requests_aux = (MPI_Request **) realloc(data_struct->requests, needed * sizeof(MPI_Request *));
windows_aux = (MPI_Win *) realloc(data_struct->windows, needed * sizeof(MPI_Win));
arrays_aux = (void **) realloc(data_struct->arrays, needed * sizeof(void *)); arrays_aux = (void **) realloc(data_struct->arrays, needed * sizeof(void *));
if(qty_aux == NULL || arrays_aux == NULL || requests_aux == NULL || types_aux == NULL) { if(qty_aux == NULL || arrays_aux == NULL || requests_aux == NULL || types_aux == NULL || request_qty_aux == NULL || windows_aux == NULL) {
fprintf(stderr, "Fatal error - No se ha podido realojar la memoria constante de datos a redistribuir/comunicar\n"); fprintf(stderr, "Fatal error - No se ha podido realojar la memoria constante de datos a redistribuir/comunicar\n");
MPI_Abort(MPI_COMM_WORLD, 1); MPI_Abort(MPI_COMM_WORLD, 1);
} }
for(i=data_struct->max_entries; i<needed; i++) { //realloc does not ensure a NULL value
requests_aux[i] = NULL;
windows_aux[i] = MPI_WIN_NULL;
arrays_aux[i] = NULL;
}
// Check if old array can be freed
if(data_struct->qty != qty_aux && data_struct->qty != NULL) free(data_struct->qty);
if(data_struct->types != types_aux && data_struct->types != NULL) free(data_struct->types);
if(data_struct->request_qty != request_qty_aux && data_struct->request_qty != NULL) free(data_struct->request_qty);
if(data_struct->requests != requests_aux && data_struct->requests != NULL) free(data_struct->requests);
if(data_struct->windows != windows_aux && data_struct->windows != NULL) free(data_struct->windows);
if(data_struct->arrays != arrays_aux && data_struct->arrays != NULL) free(data_struct->arrays);
data_struct->qty = qty_aux; data_struct->qty = qty_aux;
data_struct->types = types_aux; data_struct->types = types_aux;
data_struct->request_qty = request_qty_aux;
data_struct->requests = requests_aux; data_struct->requests = requests_aux;
data_struct->windows = windows_aux;
data_struct->arrays = arrays_aux; data_struct->arrays = arrays_aux;
data_struct->max_entries = needed; data_struct->max_entries = needed;
} }
void free_malleability_data_struct(malleability_data_t *data_struct) { void free_malleability_data_struct(malleability_data_t *data_struct) {
int i, max; size_t i, j, max;
max = data_struct->entries; max = data_struct->entries;
if(max != 0) { if(max != 0) {
for(i=0; i<max; i++) { if(data_struct->qty != NULL) {
//free(data_struct->arrays[i]); //FIXME Valores alojados con 1 elemento no se liberan? free(data_struct->qty);
//free(data_struct->requests[i]); //TODO Plantear como crearlos }
if(data_struct->types != NULL) {
free(data_struct->types);
}
if(data_struct->requests != NULL && data_struct->request_qty != NULL) {
for(i=0; i<max; i++) {
if(data_struct->requests[i] != NULL) {
for(j=0; j<data_struct->request_qty[i]; j++) {
if(data_struct->requests[i][j] != MPI_REQUEST_NULL) {
MPI_Request_free(&(data_struct->requests[i][j]));
data_struct->requests[i][j] = MPI_REQUEST_NULL;
}
}
free(data_struct->requests[i]);
}
}
free(data_struct->request_qty);
free(data_struct->requests);
}
if(data_struct->windows != NULL) {
free(data_struct->windows);
} }
free(data_struct->qty); if(data_struct->arrays != NULL) {
free(data_struct->types); free(data_struct->arrays);
free(data_struct->requests); }
free(data_struct->arrays);
} }
} }
...@@ -166,10 +253,11 @@ void def_malleability_entries(malleability_data_t *data_struct_rep, malleability ...@@ -166,10 +253,11 @@ void def_malleability_entries(malleability_data_t *data_struct_rep, malleability
int counts = 2; int counts = 2;
int blocklengths[counts]; int blocklengths[counts];
MPI_Aint displs[counts]; MPI_Aint displs[counts];
MPI_Datatype types[counts]; MPI_Datatype types[counts], type_size_t;
MPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(size_t), &type_size_t);
blocklengths[0] = blocklengths[1] = 1; blocklengths[0] = blocklengths[1] = 1;
types[0] = types[1] = MPI_INT; types[0] = types[1] = type_size_t;
// Obtener direccion base // Obtener direccion base
MPI_Get_address(&(data_struct_rep->entries), &displs[0]); MPI_Get_address(&(data_struct_rep->entries), &displs[0]);
...@@ -187,20 +275,27 @@ void def_malleability_entries(malleability_data_t *data_struct_rep, malleability ...@@ -187,20 +275,27 @@ void def_malleability_entries(malleability_data_t *data_struct_rep, malleability
* TODO Refactor? * TODO Refactor?
*/ */
void def_malleability_qty_type(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type) { void def_malleability_qty_type(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type) {
int counts = 4; int counts = 6;
int blocklengths[counts]; int blocklengths[counts];
MPI_Aint displs[counts]; MPI_Aint displs[counts];
MPI_Datatype types[counts]; MPI_Datatype types[counts], type_size_t;
MPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(size_t), &type_size_t);
types[0] = types[1] = types[2] = types[3] = MPI_INT; types[0] = types[1] = types[3] = types[4] = type_size_t;
blocklengths[0] = blocklengths[1] = data_struct_rep->entries; types[2] = types[5] = MPI_INT;
blocklengths[2] = blocklengths[3] = data_struct_dist->entries; blocklengths[0] = blocklengths[1] = blocklengths[2] = data_struct_rep->entries;
blocklengths[3] = blocklengths[4] = blocklengths[5] = data_struct_dist->entries;
MPI_Get_address((data_struct_rep->qty), &displs[0]); MPI_Get_address((data_struct_rep->qty), &displs[0]);
MPI_Get_address((data_struct_rep->types), &displs[1]); MPI_Get_address((data_struct_rep->request_qty), &displs[1]);
MPI_Get_address((data_struct_dist->qty), &displs[2]); MPI_Get_address((data_struct_rep->types), &displs[2]); // MPI_Datatype uses typedef int to be declared
MPI_Get_address((data_struct_dist->types), &displs[3]); MPI_Get_address((data_struct_dist->qty), &displs[3]);
MPI_Get_address((data_struct_dist->request_qty), &displs[4]);
MPI_Get_address((data_struct_dist->types), &displs[5]); // MPI_Datatype uses typedef int to be declared
MPI_Type_create_struct(counts, blocklengths, displs, types, new_type); MPI_Type_create_struct(counts, blocklengths, displs, types, new_type);
MPI_Type_commit(new_type); MPI_Type_commit(new_type);
} }
#ifndef MAM_TYPES_H
#define MAM_TYPES_H
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <mpi.h> #include <mpi.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/stat.h> #include <sys/stat.h>
#include "malleabilityStates.h" #include "MAM_Constants.h"
#define MALLEABILITY_INIT_DATA_QTY 100 #define MAM_TYPES_INIT_DATA_QTY 100
typedef struct { typedef struct {
int entries; // Indica numero de vectores a comunicar (replicated data) size_t entries; // Indica numero de vectores a comunicar (replicated data)
int max_entries; size_t max_entries;
MPI_Request request_ibarrier; // Request para indicar que los padres esperan a que los hijos terminen de recibir size_t *qty; // Indica numero de elementos en cada subvector de sync_array
int *qty; // Indica numero de elementos en cada subvector de sync_array MPI_Datatype *types;
int *types;
// Vector de vectores de request. En cada elemento superior se indican los requests a comprobar para dar por finalizada // Vector de vectores de request. En cada elemento superior se indican los requests a comprobar para dar por finalizada
// la comunicacion de ese dato // la comunicacion de ese dato
size_t *request_qty;
MPI_Request **requests; MPI_Request **requests;
MPI_Win *windows;
void **arrays; // Cada subvector es una serie de datos a comunicar void **arrays; // Cada subvector es una serie de datos a comunicar
} malleability_data_t; } malleability_data_t;
void add_data(void *data, int total_qty, int type, int request_qty, malleability_data_t *data_struct); void add_data(void *data, size_t total_qty, MPI_Datatype type, size_t request_qty, malleability_data_t *data_struct);
void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, int is_children_group, int myId, int root, MPI_Comm intercomm); void modify_data(void *data, size_t index, size_t total_qty, MPI_Datatype type, size_t request_qty, malleability_data_t *data_struct);
void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, int is_children_group);
void free_malleability_data_struct(malleability_data_t *data_struct); void free_malleability_data_struct(malleability_data_t *data_struct);
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <mpi.h>
#include <signal.h>
#include "MAM_Zombies.h"
#include "MAM_DataStructures.h"
#define PIDS_QTY 320
//TODO Add option to allow the usage of signal USR2 or not.
//This code asumes ROOT of each group will be the last to be zombified
//
void MAM_I_zombies_collect(int new_zombies);
void MAM_I_zombies_split();
void MAM_I_zombies_suspend();
int MAM_I_zombies_awake();
void zombies_handler_usr2() {}
int *pids = NULL;
int zombies_qty = 0;
void MAM_Zombies_service_init() {
zombies_qty = 0;
pids = malloc(PIDS_QTY * sizeof(int));
for(int i=0; i<PIDS_QTY; i++) {
pids[i] = 0;
}
}
int MAM_Zombies_service_free() {
int request_abort = MAM_I_zombies_awake();
free(pids);
return request_abort;
}
void MAM_Zombies_update() {
int myId, numP, new_zombies;
MPI_Comm_rank(mall->original_comm, &myId);
MPI_Comm_size(mall->original_comm, &numP);
MPI_Allreduce(&mall->zombie, &new_zombies, 1, MPI_INT, MPI_SUM, mall->original_comm);
if(new_zombies && new_zombies < numP) {
MAM_I_zombies_collect(new_zombies);
MAM_I_zombies_split();
MAM_I_zombies_suspend();
if(myId == MAM_ROOT) zombies_qty += new_zombies;
}
}
void MAM_I_zombies_collect(int new_zombies) {
int pid = getpid();
int *pids_counts, *pids_displs;
int i, count, active;
int myId, numP;
MPI_Comm_rank(mall->original_comm, &myId);
MPI_Comm_size(mall->original_comm, &numP);
pids_counts = (int *) malloc(numP * sizeof(int));
pids_displs = (int *) malloc(numP * sizeof(int));
#if MAM_DEBUG > 2
if(myId == MAM_ROOT){ DEBUG_FUNC("Collecting zombies", mall->myId, mall->numP); } fflush(stdout);
#endif
count = mall->zombie;
if(myId == MAM_ROOT) {
active = numP - new_zombies;
for(i=0; i < active; i++) {
pids_counts[i] = 0;
}
pids_displs[i-1] = -1;
for(; i< active+new_zombies; i++) {
pids_counts[i] = 1;
pids_displs[i] = (pids_displs[i-1] + 1) + zombies_qty;
}
}
MPI_Gatherv(&pid, count, MPI_INT, pids, pids_counts, pids_displs, MPI_INT, MAM_ROOT, mall->original_comm);
free(pids_counts);
free(pids_displs);
}
void MAM_I_zombies_split() {
int myId, color;
MPI_Comm new_original_comm;
MPI_Comm_rank(mall->original_comm, &myId);
color = mall->zombie ? MPI_UNDEFINED : 1;
MPI_Comm_split(mall->original_comm, color, myId, &new_original_comm);
if(mall->original_comm != MPI_COMM_WORLD) MPI_Comm_free(&mall->original_comm);
if(new_original_comm != MPI_COMM_NULL) MPI_Comm_set_name(new_original_comm, "MAM_ORIGINAL");
mall->original_comm = new_original_comm;
}
void MAM_I_zombies_suspend() {
struct sigaction act;
if(!mall->zombie) return;
sigemptyset(&act.sa_mask);
act.sa_flags=0;
act.sa_handler=zombies_handler_usr2;
sigaction(SIGUSR2, &act, NULL);
sigset_t set;
sigprocmask(SIG_SETMASK,NULL,&set);
sigsuspend(&set);
}
int MAM_I_zombies_awake() {
if(mall->internode_group && zombies_qty) return 1; //Request Abort
for(int i=0; i < zombies_qty; i++) { // Despertar a los zombies
kill(pids[i], SIGUSR2);
}
zombies_qty = 0;
return 0; //Normal termination
}
#ifndef MAM_ZOMBIES_H
#define MAM_ZOMBIES_H
void MAM_Zombies_service_init();
int MAM_Zombies_service_free();
void MAM_Zombies_update();
#endif
CC = gcc
MCC = mpicc
#C_FLAGS_ALL = -Wconversion -Wpedantic
C_FLAGS = -Wall -Wextra -Wshadow -Wfatal-errors
LD_FLAGS = -lm -pthread
MAM_USE_SLURM ?= 0
MAM_USE_BARRIERS ?= 0
MAM_DEBUG ?= 0
DEF = -DMAM_USE_SLURM=$(MAM_USE_SLURM) -DMAM_USE_BARRIERS=$(MAM_USE_BARRIERS) -DMAM_DEBUG=$(MAM_DEBUG)
ifeq ($(MAM_USE_SLURM),1)
LD_FLAGS += -lslurm
endif
ifeq ($(shell test $(MAM_DEBUG) -gt 0; echo $$?),0)
C_FLAGS += -g
endif
# Final library
LIB = libmam.so
# Put all auto generated stuff to this build dir.
BUILD_DIR = ./build
# List of all directories where source files are located
SRCDIRS = . spawn_methods distribution_methods
# List of all .c source files.
C_FILES = $(foreach dire, $(SRCDIRS), $(wildcard $(dire)/*.c))
# All .o files go to build dir.
OBJ = $(C_FILES:%.c=$(BUILD_DIR)/%.o)
# Gcc will create these .d files containing dependencies.
DEP = $(OBJ:%.o=%.d)
# BASIC RULES
.PHONY : clean clear install
all: install
clean:
-rm $(BUILD_DIR)/$(LIB) $(OBJ) $(DEP)
clear:
-rm -rf $(BUILD_DIR)
install: $(LIB)
echo "Done"
# SPECIFIC RULES
# Default target named after the binary.
$(LIB) : $(BUILD_DIR)/$(LIB)
# Actual target of the binary - depends on all .o files.
$(BUILD_DIR)/$(LIB) : $(OBJ)
$(MCC) $(C_FLAGS) $^ -shared -o $@ $(LD_FLAGS)
# Include all .d files
# .d files are used for knowing the dependencies of each source file
-include $(DEP)
# Build target for every single object file.
# The potential dependency on header files is covered
# by calling `-include $(DEP)`.
# The -MMD flags additionaly creates a .d file with
# the same name as the .o file.
$(BUILD_DIR)/%.o : %.c
@mkdir -p $(@D)
$(MCC) $(C_FLAGS) $(DEF) -fpic -MMD -c $< -o $@
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment