Commit 3b51a4e4 authored by iker_martin's avatar iker_martin
Browse files

Added macros to control Debug, barriers and slurm usage

parent f5fe619d
......@@ -3,7 +3,15 @@ MCC = mpicc
#C_FLAGS_ALL = -Wconversion -Wpedantic
C_FLAGS = -Wall -Wextra -Wshadow -Wfatal-errors
LD_FLAGS = -lm -pthread
DEF =
USE_MAL_SLURM ?= 0
USE_MAL_BARRIERS ?= 0
USE_MAL_DEBUG ?= 0
ifeq ($(USE_MAL_SLURM),1)
LD_FLAGS += -lslurm
endif
DEF = -DUSE_MAL_SLURM=$(USE_MAL_SLURM) -DUSE_MAL_BARRIERS=$(USE_MAL_BARRIERS) -DUSE_MAL_DEBUG=$(USE_MAL_DEBUG)
.PHONY : clean clear install install_slurm
......@@ -61,8 +69,3 @@ clear:
install: $(BIN) $(CONFIG)
echo "Done"
# Builds target with slurm
install_slurm: LD_FLAGS += -lslurm
install_slurm: DEF += -DUSE_SLURM
install_slurm: install
......@@ -96,6 +96,11 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
return MALLEABILITY_CHILDREN;
}
#if USE_MAL_BARRIERS && USE_MAL_DEBUG
if(mall->myId == mall->root)
printf("MaM: Using barriers to record times.\n");
#endif
if(nodelist != NULL) { //TODO To be deprecated by using Slurm or else statement
mall->nodelist_len = strlen(nodelist);
} else { // If no nodelist is detected, get it from the actual run
......@@ -160,7 +165,10 @@ int malleability_checkpoint() {
case MALL_NOT_STARTED:
reset_malleability_times();
// Comprobar si se tiene que realizar un redimensionado
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->malleability_start = MPI_Wtime();
//if(CHECK_RMS()) {return MALL_DENIED;}
......@@ -175,7 +183,9 @@ int malleability_checkpoint() {
case MALL_SPAWN_SINGLE_PENDING:
state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time);
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPTED) {
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start;
malleability_checkpoint();
......@@ -200,13 +210,18 @@ int malleability_checkpoint() {
break;
case MALL_SPAWN_ADAPT_PENDING:
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_start = MPI_Wtime();
unset_spawn_postpone_flag(state);
state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time);
if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) {
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start;
malleability_checkpoint();
}
......@@ -218,7 +233,9 @@ int malleability_checkpoint() {
break;
case MALL_DIST_COMPLETED: //TODO No es esto muy feo?
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->malleability_end = MPI_Wtime();
state = MALL_COMPLETED;
break;
......@@ -502,7 +519,9 @@ void Children_init() {
comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_CHILDREN, mall->myId, root_parents, mall->intercomm);
if(dist_a_data->entries || rep_a_data->entries) { // Recibir datos asincronos
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(malleability_red_contains_strat(mall_conf->red_strategies, MALL_RED_THREAD, NULL)) {
recv_data(numP_parents, dist_a_data, MALLEABILITY_USE_SYNCHRONOUS);
......@@ -517,13 +536,17 @@ void Children_init() {
}
}
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->async_end= MPI_Wtime(); // Obtener timestamp de cuando termina comm asincrona
}
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_CHILDREN, mall->myId, root_parents, mall->intercomm);
if(dist_s_data->entries || rep_s_data->entries) { // Recibir datos sincronos
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
recv_data(numP_parents, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);
// TODO Crear funcion especifica y anyadir para Asinc
......@@ -537,7 +560,9 @@ void Children_init() {
}
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, root_parents, mall->intercomm);
}
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->sync_end = MPI_Wtime(); // Obtener timestamp de cuando termina comm sincrona
}
......@@ -547,7 +572,9 @@ void Children_init() {
malleability_comms_update(mall->intercomm);
}
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->malleability_end = MPI_Wtime(); // Obtener timestamp de cuando termina maleabilidad
MPI_Comm_disconnect(&(mall->intercomm)); //FIXME Error en OpenMPI + Merge
}
......@@ -563,13 +590,17 @@ void Children_init() {
* Si se pide en segundo plano devuelve el estado actual.
*/
int spawn_step(){
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_start = MPI_Wtime();
state = init_spawn(mall->name_exec, mall->num_cpus, mall->num_nodes, mall->nodelist, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_method, mall_conf->spawn_strategies, mall->thread_comm, &(mall->intercomm));
if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) {
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start;
}
return state;
......@@ -616,7 +647,9 @@ int start_redistribution() {
comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm);
if(dist_a_data->entries || rep_a_data->entries) { // Enviar datos asincronos
//FIXME No se envian los datos replicados (rep_a_data)
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->async_start = MPI_Wtime();
if(malleability_red_contains_strat(mall_conf->red_strategies, MALL_RED_THREAD, NULL)) {
return thread_creation();
......@@ -668,7 +701,9 @@ int check_redistribution() {
}
MPI_Comm_test_inter(mall->intercomm, &is_intercomm);
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(!is_intercomm) mall_conf->times->async_end = MPI_Wtime(); // Merge method only
return end_redistribution();
}
......@@ -695,7 +730,9 @@ int end_redistribution() {
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm);
if(dist_s_data->entries || rep_s_data->entries) { // Enviar datos sincronos
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->sync_start = MPI_Wtime();
send_data(mall->numC, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);
......@@ -710,7 +747,9 @@ int end_redistribution() {
}
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, rootBcast, mall->intercomm);
}
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(!is_intercomm) mall_conf->times->sync_end = MPI_Wtime(); // Merge method only
}
......@@ -738,7 +777,9 @@ int end_redistribution() {
///=============================================
//TODO Add comment
int shrink_redistribution() {
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
double time_extra = MPI_Wtime();
//TODO Create Commit function. Processes can perform tasks before that. Then call again Malleability to commit the change
......@@ -758,7 +799,10 @@ int shrink_redistribution() {
MPI_Comm_free(&(mall->intercomm));
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time += MPI_Wtime() - time_extra;
return MALL_DIST_COMPLETED;
} else {
......@@ -852,7 +896,10 @@ int thread_check() {
return -2;
}
MPI_Comm_test_inter(mall->intercomm, &is_intercomm);
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(!is_intercomm) mall_conf->times->async_end = MPI_Wtime(); // Merge method only
return end_redistribution();
}
......
......@@ -7,8 +7,6 @@
#include <mpi.h>
#include "ProcessDist.h"
//#define USE_SLURM
//--------------PRIVATE DECLARATIONS---------------//
void node_dist( struct physical_dist dist, int **qty, int *used_nodes);
......@@ -17,7 +15,7 @@ void compact_dist(struct physical_dist dist, int *used_nodes, int *procs);
void generate_info_string(int target_qty, MPI_Info *info);
//--------------------------------SLURM USAGE-------------------------------------//
#ifdef USE_SLURM
#if USE_MAL_SLURM
#include <slurm/slurm.h>
void generate_info_string_slurm(char *nodelist, int *procs_array, size_t nodes, MPI_Info *info);
void fill_str_hosts_slurm(char *nodelist, int *qty, size_t used_nodes, char **hostfile_str);
......@@ -77,7 +75,7 @@ int physical_struct_create(int target_qty, int already_created, int num_cpus, in
* a usar al crear los procesos.
*/
void processes_dist(struct physical_dist dist, MPI_Info *info_spawn) {
#ifdef USE_SLURM
#if USE_MAL_SLURM
int used_nodes=0;
int *procs_array;
// GET NEW DISTRIBUTION
......@@ -230,7 +228,7 @@ void generate_info_string(int target_qty, MPI_Info *info){
}
//--------------------------------SLURM USAGE-------------------------------------//
#ifdef USE_SLURM
#if USE_MAL_SLURM
/*
* Crea y devuelve un objeto MPI_Info con un par hosts/mapping
* en el que se indica el mappeado a utilizar en los nuevos
......@@ -314,7 +312,7 @@ int write_str_node(char **hostfile_str, size_t len_og, size_t qty, char *node_na
//====================================================
//--------------------------------SLURM USAGE-------------------------------------//
#ifdef USE_SLURM
#if USE_MAL_SLURM
/* FIXME Por revisar
* @deprecated
* Genera un fichero hostfile y lo anyade a un objeto
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment