Commit 3b51a4e4 authored by iker_martin's avatar iker_martin
Browse files

Added macros to control Debug, barriers and slurm usage

parent f5fe619d
...@@ -3,7 +3,15 @@ MCC = mpicc ...@@ -3,7 +3,15 @@ MCC = mpicc
#C_FLAGS_ALL = -Wconversion -Wpedantic #C_FLAGS_ALL = -Wconversion -Wpedantic
C_FLAGS = -Wall -Wextra -Wshadow -Wfatal-errors C_FLAGS = -Wall -Wextra -Wshadow -Wfatal-errors
LD_FLAGS = -lm -pthread LD_FLAGS = -lm -pthread
DEF =
USE_MAL_SLURM ?= 0
USE_MAL_BARRIERS ?= 0
USE_MAL_DEBUG ?= 0
ifeq ($(USE_MAL_SLURM),1)
LD_FLAGS += -lslurm
endif
DEF = -DUSE_MAL_SLURM=$(USE_MAL_SLURM) -DUSE_MAL_BARRIERS=$(USE_MAL_BARRIERS) -DUSE_MAL_DEBUG=$(USE_MAL_DEBUG)
.PHONY : clean clear install install_slurm .PHONY : clean clear install install_slurm
...@@ -61,8 +69,3 @@ clear: ...@@ -61,8 +69,3 @@ clear:
install: $(BIN) $(CONFIG) install: $(BIN) $(CONFIG)
echo "Done" echo "Done"
# Builds target with slurm
install_slurm: LD_FLAGS += -lslurm
install_slurm: DEF += -DUSE_SLURM
install_slurm: install
...@@ -96,6 +96,11 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex ...@@ -96,6 +96,11 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
return MALLEABILITY_CHILDREN; return MALLEABILITY_CHILDREN;
} }
#if USE_MAL_BARRIERS && USE_MAL_DEBUG
if(mall->myId == mall->root)
printf("MaM: Using barriers to record times.\n");
#endif
if(nodelist != NULL) { //TODO To be deprecated by using Slurm or else statement if(nodelist != NULL) { //TODO To be deprecated by using Slurm or else statement
mall->nodelist_len = strlen(nodelist); mall->nodelist_len = strlen(nodelist);
} else { // If no nodelist is detected, get it from the actual run } else { // If no nodelist is detected, get it from the actual run
...@@ -160,7 +165,10 @@ int malleability_checkpoint() { ...@@ -160,7 +165,10 @@ int malleability_checkpoint() {
case MALL_NOT_STARTED: case MALL_NOT_STARTED:
reset_malleability_times(); reset_malleability_times();
// Comprobar si se tiene que realizar un redimensionado // Comprobar si se tiene que realizar un redimensionado
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->malleability_start = MPI_Wtime(); mall_conf->times->malleability_start = MPI_Wtime();
//if(CHECK_RMS()) {return MALL_DENIED;} //if(CHECK_RMS()) {return MALL_DENIED;}
...@@ -175,7 +183,9 @@ int malleability_checkpoint() { ...@@ -175,7 +183,9 @@ int malleability_checkpoint() {
case MALL_SPAWN_SINGLE_PENDING: case MALL_SPAWN_SINGLE_PENDING:
state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time); state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time);
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPTED) { if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPTED) {
//MPI_Barrier(mall->comm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start; mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start;
malleability_checkpoint(); malleability_checkpoint();
...@@ -200,13 +210,18 @@ int malleability_checkpoint() { ...@@ -200,13 +210,18 @@ int malleability_checkpoint() {
break; break;
case MALL_SPAWN_ADAPT_PENDING: case MALL_SPAWN_ADAPT_PENDING:
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_start = MPI_Wtime(); mall_conf->times->spawn_start = MPI_Wtime();
unset_spawn_postpone_flag(state); unset_spawn_postpone_flag(state);
state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time); state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time);
if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) { if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) {
//MPI_Barrier(mall->comm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start; mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start;
malleability_checkpoint(); malleability_checkpoint();
} }
...@@ -218,7 +233,9 @@ int malleability_checkpoint() { ...@@ -218,7 +233,9 @@ int malleability_checkpoint() {
break; break;
case MALL_DIST_COMPLETED: //TODO No es esto muy feo? case MALL_DIST_COMPLETED: //TODO No es esto muy feo?
//MPI_Barrier(mall->comm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->malleability_end = MPI_Wtime(); mall_conf->times->malleability_end = MPI_Wtime();
state = MALL_COMPLETED; state = MALL_COMPLETED;
break; break;
...@@ -502,7 +519,9 @@ void Children_init() { ...@@ -502,7 +519,9 @@ void Children_init() {
comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_CHILDREN, mall->myId, root_parents, mall->intercomm); comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_CHILDREN, mall->myId, root_parents, mall->intercomm);
if(dist_a_data->entries || rep_a_data->entries) { // Recibir datos asincronos if(dist_a_data->entries || rep_a_data->entries) { // Recibir datos asincronos
//MPI_Barrier(mall->intercomm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(malleability_red_contains_strat(mall_conf->red_strategies, MALL_RED_THREAD, NULL)) { if(malleability_red_contains_strat(mall_conf->red_strategies, MALL_RED_THREAD, NULL)) {
recv_data(numP_parents, dist_a_data, MALLEABILITY_USE_SYNCHRONOUS); recv_data(numP_parents, dist_a_data, MALLEABILITY_USE_SYNCHRONOUS);
...@@ -517,13 +536,17 @@ void Children_init() { ...@@ -517,13 +536,17 @@ void Children_init() {
} }
} }
//MPI_Barrier(mall->intercomm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->async_end= MPI_Wtime(); // Obtener timestamp de cuando termina comm asincrona mall_conf->times->async_end= MPI_Wtime(); // Obtener timestamp de cuando termina comm asincrona
} }
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_CHILDREN, mall->myId, root_parents, mall->intercomm); comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_CHILDREN, mall->myId, root_parents, mall->intercomm);
if(dist_s_data->entries || rep_s_data->entries) { // Recibir datos sincronos if(dist_s_data->entries || rep_s_data->entries) { // Recibir datos sincronos
//MPI_Barrier(mall->intercomm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
recv_data(numP_parents, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS); recv_data(numP_parents, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);
// TODO Crear funcion especifica y anyadir para Asinc // TODO Crear funcion especifica y anyadir para Asinc
...@@ -537,7 +560,9 @@ void Children_init() { ...@@ -537,7 +560,9 @@ void Children_init() {
} }
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, root_parents, mall->intercomm); MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, root_parents, mall->intercomm);
} }
//MPI_Barrier(mall->intercomm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->sync_end = MPI_Wtime(); // Obtener timestamp de cuando termina comm sincrona mall_conf->times->sync_end = MPI_Wtime(); // Obtener timestamp de cuando termina comm sincrona
} }
...@@ -547,7 +572,9 @@ void Children_init() { ...@@ -547,7 +572,9 @@ void Children_init() {
malleability_comms_update(mall->intercomm); malleability_comms_update(mall->intercomm);
} }
//MPI_Barrier(mall->comm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->malleability_end = MPI_Wtime(); // Obtener timestamp de cuando termina maleabilidad mall_conf->times->malleability_end = MPI_Wtime(); // Obtener timestamp de cuando termina maleabilidad
MPI_Comm_disconnect(&(mall->intercomm)); //FIXME Error en OpenMPI + Merge MPI_Comm_disconnect(&(mall->intercomm)); //FIXME Error en OpenMPI + Merge
} }
...@@ -563,13 +590,17 @@ void Children_init() { ...@@ -563,13 +590,17 @@ void Children_init() {
* Si se pide en segundo plano devuelve el estado actual. * Si se pide en segundo plano devuelve el estado actual.
*/ */
int spawn_step(){ int spawn_step(){
//MPI_Barrier(mall->comm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_start = MPI_Wtime(); mall_conf->times->spawn_start = MPI_Wtime();
state = init_spawn(mall->name_exec, mall->num_cpus, mall->num_nodes, mall->nodelist, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_method, mall_conf->spawn_strategies, mall->thread_comm, &(mall->intercomm)); state = init_spawn(mall->name_exec, mall->num_cpus, mall->num_nodes, mall->nodelist, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_method, mall_conf->spawn_strategies, mall->thread_comm, &(mall->intercomm));
if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) { if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) {
//MPI_Barrier(mall->comm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start; mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start;
} }
return state; return state;
...@@ -616,7 +647,9 @@ int start_redistribution() { ...@@ -616,7 +647,9 @@ int start_redistribution() {
comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm); comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm);
if(dist_a_data->entries || rep_a_data->entries) { // Enviar datos asincronos if(dist_a_data->entries || rep_a_data->entries) { // Enviar datos asincronos
//FIXME No se envian los datos replicados (rep_a_data) //FIXME No se envian los datos replicados (rep_a_data)
//MPI_Barrier(mall->intercomm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->async_start = MPI_Wtime(); mall_conf->times->async_start = MPI_Wtime();
if(malleability_red_contains_strat(mall_conf->red_strategies, MALL_RED_THREAD, NULL)) { if(malleability_red_contains_strat(mall_conf->red_strategies, MALL_RED_THREAD, NULL)) {
return thread_creation(); return thread_creation();
...@@ -668,7 +701,9 @@ int check_redistribution() { ...@@ -668,7 +701,9 @@ int check_redistribution() {
} }
MPI_Comm_test_inter(mall->intercomm, &is_intercomm); MPI_Comm_test_inter(mall->intercomm, &is_intercomm);
//MPI_Barrier(mall->intercomm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(!is_intercomm) mall_conf->times->async_end = MPI_Wtime(); // Merge method only if(!is_intercomm) mall_conf->times->async_end = MPI_Wtime(); // Merge method only
return end_redistribution(); return end_redistribution();
} }
...@@ -695,7 +730,9 @@ int end_redistribution() { ...@@ -695,7 +730,9 @@ int end_redistribution() {
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm); comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm);
if(dist_s_data->entries || rep_s_data->entries) { // Enviar datos sincronos if(dist_s_data->entries || rep_s_data->entries) { // Enviar datos sincronos
//MPI_Barrier(mall->intercomm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->sync_start = MPI_Wtime(); mall_conf->times->sync_start = MPI_Wtime();
send_data(mall->numC, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS); send_data(mall->numC, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);
...@@ -710,7 +747,9 @@ int end_redistribution() { ...@@ -710,7 +747,9 @@ int end_redistribution() {
} }
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, rootBcast, mall->intercomm); MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, rootBcast, mall->intercomm);
} }
//MPI_Barrier(mall->intercomm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(!is_intercomm) mall_conf->times->sync_end = MPI_Wtime(); // Merge method only if(!is_intercomm) mall_conf->times->sync_end = MPI_Wtime(); // Merge method only
} }
...@@ -738,7 +777,9 @@ int end_redistribution() { ...@@ -738,7 +777,9 @@ int end_redistribution() {
///============================================= ///=============================================
//TODO Add comment //TODO Add comment
int shrink_redistribution() { int shrink_redistribution() {
//MPI_Barrier(mall->comm); #if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
double time_extra = MPI_Wtime(); double time_extra = MPI_Wtime();
//TODO Create Commit function. Processes can perform tasks before that. Then call again Malleability to commit the change //TODO Create Commit function. Processes can perform tasks before that. Then call again Malleability to commit the change
...@@ -758,7 +799,10 @@ int shrink_redistribution() { ...@@ -758,7 +799,10 @@ int shrink_redistribution() {
MPI_Comm_free(&(mall->intercomm)); MPI_Comm_free(&(mall->intercomm));
//MPI_Barrier(mall->comm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time += MPI_Wtime() - time_extra; mall_conf->times->spawn_time += MPI_Wtime() - time_extra;
return MALL_DIST_COMPLETED; return MALL_DIST_COMPLETED;
} else { } else {
...@@ -852,7 +896,10 @@ int thread_check() { ...@@ -852,7 +896,10 @@ int thread_check() {
return -2; return -2;
} }
MPI_Comm_test_inter(mall->intercomm, &is_intercomm); MPI_Comm_test_inter(mall->intercomm, &is_intercomm);
//MPI_Barrier(mall->intercomm);
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(!is_intercomm) mall_conf->times->async_end = MPI_Wtime(); // Merge method only if(!is_intercomm) mall_conf->times->async_end = MPI_Wtime(); // Merge method only
return end_redistribution(); return end_redistribution();
} }
......
...@@ -7,8 +7,6 @@ ...@@ -7,8 +7,6 @@
#include <mpi.h> #include <mpi.h>
#include "ProcessDist.h" #include "ProcessDist.h"
//#define USE_SLURM
//--------------PRIVATE DECLARATIONS---------------// //--------------PRIVATE DECLARATIONS---------------//
void node_dist( struct physical_dist dist, int **qty, int *used_nodes); void node_dist( struct physical_dist dist, int **qty, int *used_nodes);
...@@ -17,7 +15,7 @@ void compact_dist(struct physical_dist dist, int *used_nodes, int *procs); ...@@ -17,7 +15,7 @@ void compact_dist(struct physical_dist dist, int *used_nodes, int *procs);
void generate_info_string(int target_qty, MPI_Info *info); void generate_info_string(int target_qty, MPI_Info *info);
//--------------------------------SLURM USAGE-------------------------------------// //--------------------------------SLURM USAGE-------------------------------------//
#ifdef USE_SLURM #if USE_MAL_SLURM
#include <slurm/slurm.h> #include <slurm/slurm.h>
void generate_info_string_slurm(char *nodelist, int *procs_array, size_t nodes, MPI_Info *info); void generate_info_string_slurm(char *nodelist, int *procs_array, size_t nodes, MPI_Info *info);
void fill_str_hosts_slurm(char *nodelist, int *qty, size_t used_nodes, char **hostfile_str); void fill_str_hosts_slurm(char *nodelist, int *qty, size_t used_nodes, char **hostfile_str);
...@@ -77,7 +75,7 @@ int physical_struct_create(int target_qty, int already_created, int num_cpus, in ...@@ -77,7 +75,7 @@ int physical_struct_create(int target_qty, int already_created, int num_cpus, in
* a usar al crear los procesos. * a usar al crear los procesos.
*/ */
void processes_dist(struct physical_dist dist, MPI_Info *info_spawn) { void processes_dist(struct physical_dist dist, MPI_Info *info_spawn) {
#ifdef USE_SLURM #if USE_MAL_SLURM
int used_nodes=0; int used_nodes=0;
int *procs_array; int *procs_array;
// GET NEW DISTRIBUTION // GET NEW DISTRIBUTION
...@@ -230,7 +228,7 @@ void generate_info_string(int target_qty, MPI_Info *info){ ...@@ -230,7 +228,7 @@ void generate_info_string(int target_qty, MPI_Info *info){
} }
//--------------------------------SLURM USAGE-------------------------------------// //--------------------------------SLURM USAGE-------------------------------------//
#ifdef USE_SLURM #if USE_MAL_SLURM
/* /*
* Crea y devuelve un objeto MPI_Info con un par hosts/mapping * Crea y devuelve un objeto MPI_Info con un par hosts/mapping
* en el que se indica el mappeado a utilizar en los nuevos * en el que se indica el mappeado a utilizar en los nuevos
...@@ -314,7 +312,7 @@ int write_str_node(char **hostfile_str, size_t len_og, size_t qty, char *node_na ...@@ -314,7 +312,7 @@ int write_str_node(char **hostfile_str, size_t len_og, size_t qty, char *node_na
//==================================================== //====================================================
//--------------------------------SLURM USAGE-------------------------------------// //--------------------------------SLURM USAGE-------------------------------------//
#ifdef USE_SLURM #if USE_MAL_SLURM
/* FIXME Por revisar /* FIXME Por revisar
* @deprecated * @deprecated
* Genera un fichero hostfile y lo anyade a un objeto * Genera un fichero hostfile y lo anyade a un objeto
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment