Commit e466e997 authored by iker_martin's avatar iker_martin
Browse files

Updated maleable CG to use the new MaM interface

parent b7bcaffe
......@@ -12,17 +12,12 @@ cores=20
numP=$1
matrix=$2
numC=$3
msm=$4
mss=$5
mrm=$6
is_sync=$8
qty=$9
qty=$4
initial_nodelist=$(bash $dirCG/BashScripts/createInitialNodelist.sh $numP $cores $nodelist)
echo "Test numP=$numP numC=$numC Meths=$msm $mrm $mss -- Is_synch=$is_sync qty=$qty"
echo "Test numP=$numP numC=$numC -- qty=$qty"
for ((i=0; i<qty; i++))
do
mpirun -hosts $initial_nodelist -np $numP $dirCG/build/a.out $matrix $numC $msm $mss $mrm $mss $is_sync $nodelist $nodes
mpirun -hosts $initial_nodelist -np $numP $dirCG/build/a.out $matrix $numC
done
echo "End"
......
......@@ -6,34 +6,22 @@
scriptDir="$(dirname "$0")"
source $scriptDir/config.txt
dirM="${dirCG}"
dirM="${dirCG}../SparseMatrix"
export dirCG
matrix="Queen_4147.rb"
#matrix="Queen_4147.rb"
#matrix="audikw_1.rb"
#matrix="bcsstk01.rsa"
matrix="bcsstk01.rsa"
module load /home/martini/MODULES/modulefiles/mpich-4.0.3-ofi
nodelist=$SLURM_JOB_NODELIST
nodes=$SLURM_JOB_NUM_NODES
cores=20
numP=$1
numC=$2
msm=$3
mrm=$4
mss=$5
send_sync=$6
qty=1
if [ $# -gt 6 ]
if [ $# -gt 2 ]
then
qty=$7
qty=$3
fi
initial_nodelist=$(bash $dirCG/BashScripts/createInitialNodelist.sh $numP $cores $nodelist)
echo "Test numP=$numP numC=$numC Meths=$msm $mrm $mss -- Is_synch=$send_sync qty=$qty"
for ((i=0; i<qty; i++))
do
mpirun -hosts $initial_nodelist -np $numP $dirCG/build/a.out $dirM/$matrix $numC $msm $mss $mrm $mss $send_sync $nodelist $nodes
done
sbatch -p P1 -N 1 $dirCG/Exec/generalRun.sh $numP $dirM/$matrix $numC $qty
echo "End"
This diff is collapsed.
......@@ -5,6 +5,14 @@ C_FLAGS =
LD_FLAGS = -lm -pthread
DEF =
USE_MAL_SLURM ?= 1
USE_MAL_BARRIERS ?= 0
USE_MAL_DEBUG ?= 0
ifeq ($(USE_MAL_SLURM),1)
LD_FLAGS += -lslurm
endif
DEF = -DUSE_MAL_SLURM=$(USE_MAL_SLURM) -DUSE_MAL_BARRIERS=$(USE_MAL_BARRIERS) -DUSE_MAL_DEBUG=$(USE_MAL_DEBUG)
.PHONY : clean clear install install_slurm
......@@ -59,8 +67,3 @@ clear:
install: $(BIN)
echo "Done"
# Builds target with slurm
install_slurm: LD_FLAGS += -lslurm
install_slurm: DEF += -DUSE_SLURM
install_slurm: install
This diff is collapsed.
......@@ -7,22 +7,13 @@
#include <string.h>
#include "malleabilityStates.h"
//#define MAL_COMM_COMPLETED 0
//#define MAL_COMM_UNINITIALIZED 2
//#define MAL_ASYNC_PENDING 1
void sync_communication(void *send, void **recv, int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, MPI_Comm comm);
//#define MAL_USE_NORMAL 0
//#define MAL_USE_IBARRIER 1
//#define MAL_USE_POINT 2
//#define MAL_USE_THREAD 3
int sync_communication(void *send, void **recv, int qty, int mal_type, int dependency, int myId, int numP, int numO, int is_children_group, int comm_type, MPI_Comm comm);
int async_communication(void *send, void **recv, int qty, int mal_type, int dependency, int myId, int numP, int numO, int is_children_group, int red_method, int red_strategies, MPI_Comm comm, MPI_Request **requests, size_t *request_qty);
int send_async(char *array, int qty, int mal_type, int myId, int numP, MPI_Comm intercomm, int numP_child, MPI_Request **comm_req, int red_method, int red_strategies);
void recv_async(char **array, int qty, int mal_type, int myId, int numP, MPI_Comm intercomm, int numP_parents, int red_method, int red_strategies);
void async_communication_start(void *send, void **recv, int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, MPI_Comm comm, MPI_Request **requests, size_t *request_qty, MPI_Win *win);
int async_communication_check(int is_children_group, MPI_Request *requests, size_t request_qty);
void async_communication_wait(MPI_Request *requests, size_t request_qty);
void async_communication_end(MPI_Request *requests, size_t request_qty, MPI_Win *win);
void malloc_comm_array(char **array, int qty, int myId, int numP);
int malleability_red_contains_strat(int comm_strategies, int strategy, int *result);
#endif
#ifndef MAM_H
#define MAM_H
#include "malleabilityStates.h"
#include "malleabilityManager.h"
#include "MAM_Configuration.h"
#endif
#include "MAM_Configuration.h"
#include "MAM_Init_Configuration.h"
#include "malleabilityDataStructures.h"
#include <limits.h>
typedef struct {
unsigned int *value, default_value;
int config_max_length;
union {
int (*set_config_simple)(unsigned int, unsigned int *);
int (*set_config_complex)(unsigned int);
};
char *env_name;
} mam_config_setting_t;
int MAM_I_set_method(unsigned int new_method, unsigned int *method);
int MAM_I_set_spawn_strat(unsigned int strategy, unsigned int *strategies);
int MAM_I_set_red_strat(unsigned int strategy, unsigned int *strategies);
int MAM_I_set_target_number(unsigned int new_numC);
int MAM_I_configuration_get_defaults();
int MAM_I_contains_strat(unsigned int comm_strategies, unsigned int strategy);
int MAM_I_add_strat(unsigned int *comm_strategies, unsigned int strategy);
int MAM_I_remove_strat(unsigned int *comm_strategies, unsigned int strategy);
mam_config_setting_t configSettings[] = {
{NULL, MALL_SPAWN_MERGE, MAM_METHODS_SPAWN_LEN, {.set_config_simple = MAM_I_set_method }, MAM_SPAWN_METHOD_ENV},
{NULL, MAM_STRAT_SPAWN_CLEAR, MAM_STRATS_SPAWN_LEN, {.set_config_simple = MAM_I_set_spawn_strat }, MAM_SPAWN_STRATS_ENV},
{NULL, MALL_DIST_COMPACT, MAM_METHODS_PHYSICAL_DISTRIBUTION_LEN, {.set_config_simple = MAM_I_set_method }, MAM_PHYSICAL_DISTRIBUTION_METHOD_ENV},
{NULL, MALL_RED_BASELINE, MAM_METHODS_RED_LEN, {.set_config_simple = MAM_I_set_method }, MAM_RED_METHOD_ENV},
{NULL, MAM_STRAT_RED_CLEAR, MAM_STRATS_RED_LEN, {.set_config_simple = MAM_I_set_red_strat }, MAM_RED_STRATS_ENV},
{NULL, 1, INT_MAX, {.set_config_complex = MAM_I_set_target_number }, MAM_NUM_TARGETS_ENV}
};
unsigned int masks_spawn[] = {MAM_STRAT_CLEAR_VALUE, MAM_MASK_PTHREAD, MAM_MASK_SPAWN_SINGLE, MAM_MASK_SPAWN_INTERCOMM};
unsigned int masks_red[] = {MAM_STRAT_CLEAR_VALUE, MAM_MASK_PTHREAD, MAM_MASK_RED_WAIT_SOURCES, MAM_MASK_RED_WAIT_TARGETS};
/**
* @brief Set configuration parameters for MAM.
*
* This function allows setting various configuration parameters for MAM
* such as spawn method, spawn strategies, spawn physical distribution,
* redistribution method, and red strategies.
*
* @param spawn_method The spawn method reconfiguration.
* @param spawn_strategies The spawn strategies reconfiguration.
* @param spawn_dist The spawn physical distribution method reconfiguration.
* @param red_method The redistribution method reconfiguration.
* @param red_strategies The redesitribution strategy for reconfiguration.
*/
void MAM_Set_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int red_method, int red_strategies) {
int i, aux;
int aux_array[] = {spawn_method, spawn_strategies, spawn_dist, red_method, red_strategies};
if(state > MALL_NOT_STARTED) return;
mam_config_setting_t *config = NULL;
for (i = 0; i < MAM_KEY_COUNT-1; i++) { //FIXME Numero magico para no cambiar num_targets
aux = aux_array[i];
config = &configSettings[i];
if (0 <= aux && aux < config->config_max_length) {
if(i == MAM_NUM_TARGETS) {
config->set_config_complex(aux);
} else {
config->set_config_simple(aux, config->value);
}
}
}
}
/*
* @brief Set the configuration value for a specific key in MAM.
*
* Modifies the configuration value associated with the given key
* to the specified "required" value. The final value set is returned in the
* "provided" parameter.
*
* @param key The key for which the configuration value is to be modified.
* @param required The required value to set for the specified key.
* @param provided Pointer to an integer where the final value set will be stored.
* This parameter is updated with the actual value after modification.
* For strategy keys the value is "MAM_STRATS_ADDED" if "required" has
* been added, or "MAM_STRATS_MODIFIED" if multiple strategies of the
* key have been modified.
*/
void MAM_Set_key_configuration(int key, int required, int *provided) {
int i, aux;
if(provided == NULL) provided = &aux;
*provided = MALL_DENIED;
if(required < 0 || state > MALL_NOT_STARTED) return;
mam_config_setting_t *config = NULL;
for (i = 0; i < MAM_KEY_COUNT; i++) {
if (key == i) {
config = &configSettings[i];
break;
}
}
if (config != NULL) {
if (required < config->config_max_length) {
if(i == MAM_NUM_TARGETS) {
*provided = config->set_config_complex(required);
} else {
*provided = config->set_config_simple(required, config->value);
}
} else {*provided = *(config->value); }
} else { printf("MAM: Key %d does not exist\n", key); }
}
/*
* Retorna si una estrategia aparece o no
*/
int MAM_Contains_strat(int key, unsigned int strategy, int *result) {
int strategies, aux = MAM_OK;
unsigned int len = 0, mask;
switch(key) {
case MAM_SPAWN_STRATEGIES:
strategies = mall_conf->spawn_strategies;
mask = masks_spawn[strategy];
len = MAM_STRATS_SPAWN_LEN;
break;
case MAM_RED_STRATEGIES:
strategies = mall_conf->red_strategies;
mask = masks_red[strategy];
len = MAM_STRATS_RED_LEN;
break;
default:
aux = MALL_DENIED;
break;
}
if(aux == MAM_OK && strategy < len) {
aux = MAM_I_contains_strat(strategies, mask);
} else {
aux = 0;
}
if(result != NULL) *result = aux;
return aux;
}
/*
* //TODO
* Tiene que ser llamado despues de setear la config
*/
int MAM_Set_target_number(unsigned int numC){
return MAM_I_set_target_number(numC);
}
//======================================================||
//===============MAM_INIT FUNCTIONS=====================||
//======================================================||
//======================================================||
void MAM_Init_configuration() {
if(mall == NULL || mall_conf == NULL) {
printf("MAM FATAL ERROR: Setting initial config without previous mallocs\n");
fflush(stdout);
MPI_Abort(MPI_COMM_WORLD, -50);
}
configSettings[MAM_SPAWN_METHOD].value = &mall_conf->spawn_method;
configSettings[MAM_SPAWN_STRATEGIES].value = &mall_conf->spawn_strategies;
configSettings[MAM_PHYSICAL_DISTRIBUTION].value = &mall_conf->spawn_dist;
configSettings[MAM_RED_METHOD].value = &mall_conf->red_method;
configSettings[MAM_RED_STRATEGIES].value = &mall_conf->red_strategies;
}
void MAM_Set_initial_configuration() {
int not_filled = 1;
not_filled = MAM_I_configuration_get_defaults();
if(not_filled) {
if(mall->myId == mall->root) printf("MAM WARNING: Starting configuration not set\n");
fflush(stdout);
MPI_Abort(mall->comm, -50);
}
#if USE_MAL_DEBUG >= 2
if(mall->myId == mall->root) {
DEBUG_FUNC("Initial configuration settled", mall->myId, mall->numP);
fflush(stdout);
}
#endif
}
void MAM_Check_configuration() {
if(mall->numC == mall->numP) { // Migrate
MAM_Set_key_configuration(MAM_SPAWN_METHOD, MALL_SPAWN_BASELINE, NULL);
}
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) {
if(MAM_I_contains_strat(mall_conf->spawn_strategies, MAM_MASK_SPAWN_INTERCOMM)) {
MAM_I_remove_strat(&mall_conf->spawn_strategies, MAM_MASK_SPAWN_INTERCOMM);
}
if(mall->numP > mall->numC && MAM_I_contains_strat(mall_conf->spawn_strategies, MAM_MASK_SPAWN_SINGLE)) {
MAM_I_remove_strat(&mall_conf->spawn_strategies, MAM_MASK_SPAWN_SINGLE);
}
}
if(mall_conf->red_method == MALL_RED_RMA_LOCK || mall_conf->red_method == MALL_RED_RMA_LOCKALL) {
if(MAM_I_contains_strat(mall_conf->spawn_strategies, MAM_MASK_SPAWN_INTERCOMM)) {
MAM_I_remove_strat(&mall_conf->spawn_strategies, MAM_MASK_SPAWN_INTERCOMM);
}
if(!MAM_I_contains_strat(mall_conf->red_strategies, MAM_MASK_RED_WAIT_TARGETS) &&
!MAM_I_contains_strat(mall_conf->red_strategies, MAM_MASK_PTHREAD)) {
MAM_I_set_red_strat(MAM_STRAT_RED_WAIT_TARGETS, &mall_conf->red_strategies);
}
}
}
//======================================================||
//================PRIVATE FUNCTIONS=====================||
//======================================================||
//======================================================||
int MAM_I_configuration_get_defaults() {
size_t i;
int set_value;
char *tmp = NULL;
mam_config_setting_t *config = NULL;
for (i = 0; i < MAM_KEY_COUNT; i++) {
config = &configSettings[i];
tmp = getenv(config->env_name);
if(tmp != NULL) {
set_value = atoi(tmp);
} else {
set_value = config->default_value;
}
if (0 <= set_value && set_value < config->config_max_length) {
if(i == MAM_NUM_TARGETS) {
config->set_config_complex(set_value);
} else {
config->set_config_simple(set_value, config->value);
}
}
tmp = NULL;
}
return 0;
}
int MAM_I_set_method(unsigned int new_method, unsigned int *method) {
*method = new_method;
return *method;
}
int MAM_I_set_spawn_strat(unsigned int strategy, unsigned int *strategies) {
int result = 0;
int strat_removed = 0;
switch(strategy) {
case MAM_STRAT_SPAWN_CLEAR:
*strategies = MAM_STRAT_CLEAR_VALUE;
result = MAM_STRATS_MODIFIED;
break;
case MAM_STRAT_SPAWN_PTHREAD:
result = MAM_I_add_strat(strategies, MAM_MASK_PTHREAD);
break;
case MAM_STRAT_SPAWN_SINGLE:
result = MAM_I_add_strat(strategies, MAM_MASK_SPAWN_SINGLE);
break;
case MAM_STRAT_SPAWN_INTERCOMM:
result = MAM_I_add_strat(strategies, MAM_MASK_SPAWN_INTERCOMM);
break;
default:
//Unkown strategy
result = MALL_DENIED;
break;
}
if(strat_removed) {
result = MAM_STRATS_MODIFIED;
}
return result;
}
int MAM_I_set_red_strat(unsigned int strategy, unsigned int *strategies) {
int result = 0;
int strat_removed = 0;
switch(strategy) {
case MAM_STRAT_RED_CLEAR:
*strategies = MAM_STRAT_CLEAR_VALUE;
result = MAM_STRATS_MODIFIED;
break;
case MAM_STRAT_RED_PTHREAD: //TODO - IMPROVEMENT - This could be done with a single operation instead of 3.
result = MAM_I_add_strat(strategies, MAM_MASK_PTHREAD);
if(result == MAM_STRATS_ADDED) {
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_RED_WAIT_SOURCES);
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_RED_WAIT_TARGETS);
}
break;
case MAM_STRAT_RED_WAIT_SOURCES:
result = MAM_I_add_strat(strategies, MAM_MASK_RED_WAIT_SOURCES);
if(result == MAM_STRATS_ADDED) {
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_RED_WAIT_TARGETS);
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_PTHREAD);
}
break;
case MAM_STRAT_RED_WAIT_TARGETS:
result = MAM_I_add_strat(strategies, MAM_MASK_RED_WAIT_TARGETS);
if(result == MAM_STRATS_ADDED) {
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_RED_WAIT_SOURCES);
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_PTHREAD);
}
break;
default:
//Unkown strategy
result = MALL_DENIED;
break;
}
if(strat_removed) {
result = MAM_STRATS_MODIFIED;
}
return result;
}
int MAM_I_set_target_number(unsigned int new_numC) {
if(state > MALL_NOT_STARTED || new_numC == 0) return MALL_DENIED;
mall->numC = (int) new_numC;
return new_numC;
}
/*
* Returns 1 if strategy is applied, 0 otherwise
*/
int MAM_I_contains_strat(unsigned int comm_strategies, unsigned int strategy) {
return comm_strategies & strategy;
}
int MAM_I_add_strat(unsigned int *comm_strategies, unsigned int strategy) {
if(MAM_I_contains_strat(*comm_strategies, strategy)) return MAM_OK;
*comm_strategies |= strategy;
return MAM_STRATS_ADDED;
}
int MAM_I_remove_strat(unsigned int *comm_strategies, unsigned int strategy) {
if(!MAM_I_contains_strat(*comm_strategies, strategy)) return MAM_OK;
*comm_strategies &= ~strategy;
return MAM_STRATS_MODIFIED;
}
#ifndef MAM_CONFIGURATION_H
#define MAM_CONFIGURATION_H
#include <mpi.h>
#include "malleabilityStates.h"
#define MAM_STRAT_CLEAR_VALUE 0
#define MAM_STRATS_ADDED 1
#define MAM_STRATS_MODIFIED 2
#define MAM_MASK_PTHREAD 0x01
#define MAM_MASK_SPAWN_SINGLE 0x02
#define MAM_MASK_SPAWN_INTERCOMM 0x04
#define MAM_MASK_RED_WAIT_SOURCES 0x02
#define MAM_MASK_RED_WAIT_TARGETS 0x04
int MAM_Contains_strat(int key, unsigned int strategy, int *result);
void MAM_Set_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int red_method, int red_strategies);
void MAM_Set_key_configuration(int key, int required, int *provided);
int MAM_Set_target_number(unsigned int numC);
#endif
#ifndef MAM_INIT_CONFIGURATION_H
#define MAM_INIT_CONFIGURATION_H
#include <mpi.h>
#include "malleabilityStates.h"
void MAM_Init_configuration();
void MAM_Set_initial_configuration();
void MAM_Check_configuration();
#endif
......@@ -104,12 +104,8 @@ void get_block_dist(int qty, int id, int numP, struct Dist_data *dist_data) {
dist_data->fin = (id+1) * dist_data->tamBl + rem;
}
if(dist_data->fin > qty) {
dist_data->fin = qty;
}
if(dist_data->ini > dist_data->fin) {
dist_data->ini = dist_data->fin;
}
if(dist_data->fin > qty) { dist_data->fin = qty; }
if(dist_data->ini > dist_data->fin) { dist_data->ini = dist_data->fin; }
dist_data->tamBl = dist_data->fin - dist_data->ini;
}
......@@ -131,18 +127,10 @@ void set_interblock_counts(int id, int numP, struct Dist_data data_dist, int off
}
// Obtiene el proceso con mayor ini entre los dos procesos
if(data_dist.ini > other.ini) {
biggest_ini = data_dist.ini;
} else {
biggest_ini = other.ini;
}
biggest_ini = (data_dist.ini > other.ini) ? data_dist.ini : other.ini;
// Obtiene el proceso con menor fin entre los dos procesos
if(data_dist.fin < other.fin) {
smallest_end = data_dist.fin;
} else {
smallest_end = other.fin;
}
smallest_end = (data_dist.fin < other.fin) ? data_dist.fin : other.fin;
sendcounts[id] = smallest_end - biggest_ini; // Numero de elementos a enviar/recibir del proceso Id
}
......
#include "malleabilityDataStructures.h"
int state = MALL_UNRESERVED;
/*
* Crea un tipo derivado para mandar las dos estructuras principales
* de MaM.
*/
void MAM_Def_main_datatype() {
int i, counts = 10;
int blocklengths[counts];
MPI_Aint displs[counts];
MPI_Datatype types[counts];
for(i=0; i<5; i++) {
blocklengths[i] = 1;
types[i] = MPI_UNSIGNED;
}
for(i=5; i<counts; i++) {
blocklengths[i] = 1;
types[i] = MPI_INT;
}
// Obtain base direction
MPI_Get_address(&(mall_conf->spawn_method), &displs[0]);
MPI_Get_address(&(mall_conf->spawn_strategies), &displs[1]);
MPI_Get_address(&(mall_conf->spawn_dist), &displs[2]);
MPI_Get_address(&(mall_conf->red_method), &displs[3]);
MPI_Get_address(&(mall_conf->red_strategies), &displs[4]);
MPI_Get_address(&(mall->root_parents), &displs[5]);
MPI_Get_address(&(mall->num_parents), &displs[6]); //TODO Add only when Intercomm strat active?
MPI_Get_address(&(mall->num_cpus), &displs[7]);
MPI_Get_address(&(mall->num_nodes), &displs[8]);
MPI_Get_address(&(mall->nodelist_len), &displs[9]);
MPI_Type_create_struct(counts, blocklengths, displs, types, &mall->struct_type);
MPI_Type_commit(&mall->struct_type);
}
void MAM_Free_main_datatype() {
if(mall->struct_type != MPI_DATATYPE_NULL) {
MPI_Type_free(&mall->struct_type);
}
}
/*
* Comunica datos necesarios de las estructuras
* principales de MAM de sources a targets.
*/
void MAM_Comm_main_structures(int rootBcast) {
MPI_Bcast(MPI_BOTTOM, 1, mall->struct_type, rootBcast, mall->intercomm);
if(mall->nodelist == NULL) {
mall->nodelist = calloc(mall->nodelist_len+1, sizeof(char));
mall->nodelist[mall->nodelist_len] = '\0';
}
MPI_Bcast(mall->nodelist, mall->nodelist_len, MPI_CHAR, rootBcast, mall->intercomm);
}
......@@ -4,27 +4,63 @@
/*
* Shows available data structures for inner ussage.
*/
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include <pthread.h>
#include "malleabilityStates.h"
/* --- SPAWN STRUCTURES --- */
struct physical_dist {
int num_cpus, num_nodes;
char *nodelist;
int target_qty, already_created;
int dist_type, info_type;
};
#define DEBUG_FUNC(debug_string, rank, numP) printf("MaM [P%d/%d]: %s -- %s:%s:%d\n", rank, numP, debug_string, __FILE__, __func__, __LINE__)
/* --- TIME CAPTURE STRUCTURE --- */
typedef struct {
int myId, root, root_parents;
int spawn_qty, initial_qty, target_qty;
int already_created;
int spawn_method, spawn_is_single, spawn_is_async;
char *cmd; //Executable name
MPI_Info mapping;
MPI_Datatype dtype;
struct physical_dist dist; // Used to create mapping var
MPI_Comm comm, returned_comm;
} Spawn_data;
// Spawn, Sync and Async time
double spawn_start, spawn_time;
double sync_start, sync_end;
double async_start, async_end;
double malleability_start, malleability_end;
MPI_Datatype times_type;
} malleability_times_t;
/* --- GLOBAL STRUCTURES --- */
typedef struct {
unsigned int spawn_method;
unsigned int spawn_dist;
unsigned int spawn_strategies;
unsigned int red_method;
unsigned int red_strategies;
malleability_times_t *times;
} malleability_config_t;
typedef struct {
int myId, numP, numC, zombie;
int root, root_collectives;
int num_parents, root_parents;
pthread_t async_thread;
MPI_Comm comm, thread_comm;
MPI_Comm intercomm, tmp_comm;
MPI_Comm *user_comm;
MPI_Datatype struct_type;
// Specific vars for Wait_targets strat
int wait_targets_posted;
MPI_Request wait_targets;
char *name_exec, *nodelist;
int num_cpus, num_nodes, nodelist_len;
} malleability_t;
/* --- VARIABLES --- */
malleability_config_t *mall_conf;
malleability_t *mall;
extern int state;
/* --- FUNCTIONS --- */
void MAM_Def_main_datatype();
void MAM_Free_main_datatype();
void MAM_Comm_main_structures(int rootBcast);
#endif
This diff is collapsed.
......@@ -9,21 +9,25 @@
#include <mpi.h>
#include "malleabilityStates.h"
int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes);
void free_malleability();
int malleability_checkpoint();
void set_benchmark_grp(int grp);
typedef struct {
int numS, numT;
int rank_state;
MPI_Comm comm;
} mam_user_reconf_t;
void set_malleability_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int red_method, int red_strategies);
void set_children_number(int numC); // TODO TO BE DEPRECATED
void get_malleability_user_comm(MPI_Comm *comm);
int MAM_Init(int root, MPI_Comm *comm, char *name_exec, void (*user_function)(void *), void *user_args);
void MAM_Finalize();
int MAM_Checkpoint(int *mam_state, int wait_completed, void (*user_function)(void *), void *user_args);
void MAM_Resume_redistribution(int *mam_state);
void malleability_add_data(void *data, size_t total_qty, int type, int dependency, int is_replicated, int is_constant);
void malleability_modify_data(void *data, size_t index, size_t total_qty, int type, int dependency, int is_replicated, int is_constant);
void malleability_get_entries(size_t *entries, int is_replicated, int is_constant);
void malleability_get_data(void **data, size_t index, int is_replicated, int is_constant);
int MAM_Get_Reconf_Info(mam_user_reconf_t *reconf_info);
void MAM_Data_add(void *data, size_t *index, size_t total_qty, MPI_Datatype type, int is_replicated, int is_constant);
void MAM_Data_modify(void *data, size_t index, size_t total_qty, MPI_Datatype type, int is_replicated, int is_constant);
void MAM_Data_get_entries(int is_replicated, int is_constant, size_t *entries);
void MAM_Data_get_pointer(void **data, size_t index, size_t *total_qty, MPI_Datatype *type, int is_replicated, int is_constant);
void MAM_Retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *mall_time);
void retrieve_results(double *sp_time, double *sy_time, double *asy_time, double *mall_time, double *ex_start); //FIXME BORRAR
void set_global_time(double ex_start); //FIXME BORRAR
#endif
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sched.h>
#include <mpi.h>
#include "malleabilityRMS.h"
#include "malleabilityDataStructures.h"
#if USE_MAL_SLURM
#include <slurm/slurm.h>
int MAM_I_slurm_getenv_hosts_info();
int MAM_I_slurm_getjob_hosts_info();
#endif
int MAM_I_get_hosts_info();
int GetCPUCount();
void MAM_check_hosts() {
int not_filled = 1;
#if USE_MAL_SLURM
not_filled = MAM_I_slurm_getenv_hosts_info();
if(not_filled) {
if(mall->nodelist != NULL) {
free(mall->nodelist);
mall->nodelist = NULL;
}
not_filled = MAM_I_slurm_getjob_hosts_info();
}
#endif
if(not_filled) {
if(mall->nodelist != NULL) {
free(mall->nodelist);
mall->nodelist = NULL;
}
not_filled = MAM_I_get_hosts_info();
}
if(not_filled) {
if(mall->myId == mall->root) printf("MAM FATAL ERROR: It has not been possible to obtain the nodelist\n");
fflush(stdout);
MPI_Abort(mall->comm, -50);
}
#if USE_MAL_DEBUG >= 2
if(mall->myId == mall->root) {
DEBUG_FUNC("Obtained Nodelist", mall->myId, mall->numP);
printf("NODELIST: %s\nNODE_COUNT: %d NUM_CPUS_PER_NODE: %d\n", mall->nodelist, mall->num_nodes, mall->num_cpus);
fflush(stdout);
}
#endif
}
/*
* TODO
* FIXME Does not consider heterogenous machines for num_cpus
* FIXME Always returns 0... -- Perform error checking?
*/
int MAM_I_get_hosts_info() {
int i, j, name_len, max_name_len, unique_count, *unique_hosts;
char *my_host, *all_hosts, *confirmed_host, *tested_host;
all_hosts = NULL;
my_host = (char *) malloc(MPI_MAX_PROCESSOR_NAME * sizeof(char));
MPI_Get_processor_name(my_host, &name_len);
MPI_Allreduce(&name_len, &max_name_len, 1, MPI_INT, MPI_MAX, mall->comm);
my_host[max_name_len] = '\0';
max_name_len++; // Len does not consider terminating character
if(mall->myId == mall->root) {
all_hosts = (char *) malloc(mall->numP * max_name_len * sizeof(char));
unique_hosts = (int *) malloc(mall->numP * sizeof(int));
unique_hosts[0] = 0; //First host will always be unique
unique_count = 1;
}
MPI_Gather(my_host, max_name_len, MPI_CHAR, all_hosts, max_name_len, MPI_CHAR, mall->root, mall->comm);
if(mall->myId == mall->root) {
for (i = 1; i < mall->numP; i++) {
for (j = 0; j < unique_count; j++) {
tested_host = all_hosts + (i * max_name_len);
confirmed_host = all_hosts + (unique_hosts[j] * max_name_len);
if (strcmp(tested_host, confirmed_host) != 0) {
unique_hosts[unique_count] = i;
unique_count++;
break;
}
}
}
mall->num_nodes = unique_count;
mall->num_cpus = GetCPUCount();
mall->nodelist_len = unique_count*max_name_len;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, ""); //FIXME Strcat can be very inneficient...
for (i = 0; i < unique_count; i++) {
confirmed_host = all_hosts + (unique_hosts[i] * max_name_len);
strcat(mall->nodelist, confirmed_host);
if (i < unique_count - 1) {
strcat(mall->nodelist, ",");
}
}
free(all_hosts);
free(unique_hosts);
}
free(my_host);
return 0;
}
/*
* @brief Get the total number of CPUs available to the process.
*
* This function uses sched_getaffinity to obtain the CPU affinity of the current process
* and counts the number of CPUs in the affinity set. It adjusts the loop based on the
* maximum number of CPUs allowed on the system.
*
* @return The total number of CPUs available to the process.
*
* Code obtained from: https://stackoverflow.com/questions/4586405/how-to-get-the-number-of-cpus-in-linux-using-c
* The code has been slightly modified.
*/
int GetCPUCount() {
cpu_set_t cs;
CPU_ZERO(&cs);
sched_getaffinity(0, sizeof(cs), &cs);
int count = 0;
int max_cpus = sysconf(_SC_NPROCESSORS_ONLN);
for (int i = 0; i < max_cpus; i++) {
if (CPU_ISSET(i, &cs)) {
count++;
} else {
break;
}
}
return count;
}
#if USE_MAL_SLURM
/*
* TODO
*/
int MAM_I_slurm_getenv_hosts_info() {
char *tmp = NULL, *tmp_copy, *token;
int cpus, count;
//int i, *cpus_counts, *nodes_counts, *aux;
tmp = getenv("SLURM_JOB_NUM_NODES");
if(tmp == NULL) return 1;
mall->num_nodes = atoi(tmp);
tmp = NULL;
tmp = getenv("SLURM_JOB_NODELIST");
if(tmp == NULL) return 1;
mall->nodelist_len = strlen(tmp)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, tmp);
tmp = NULL;
tmp = getenv("SLURM_JOB_CPUS_PER_NODE");
if(tmp == NULL) return 1;
tmp_copy = (char *) malloc((strlen(tmp)+1) * sizeof(char));
strcpy(tmp_copy, tmp);
token = strtok(tmp_copy, ",");
//TODO When MaM considers heteregenous allocations, these will be needed instead of num_cpus.
//cpus_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//nodes_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//i = 0;
mall->num_cpus = 0;
while (token != NULL) {
count = 1; // The count is not present when is 1 node.
if (sscanf(token, "%d(x%d)", &cpus, &count) >= 1) {
mall->num_cpus = cpus; // num_cpus stores the amount of cores per cpu
//cpus_per_node[i] = cpus;
//nodes_count[i] = count;
//i++;
}
token = strtok(NULL, ",");
}
/*
if(i < mall->num_nodes) {
aux = (int *) realloc(cpus_per_node, i * sizeof(int));
if(cpus_per_node != aux && cpus_per_node != NULL) free(cpus_per_node);
cpus_per_node = aux;
aux = (int *) realloc(nodes_counts, i * sizeof(int));
if(nodes_count != aux && nodes_count != NULL) free(nodes_count);
nodes_count = aux;
}
*/
free(tmp_copy);
return 0;
}
/*
* TODO
* FIXME Does not consider heterogenous machines
*/
int MAM_I_slurm_getjob_hosts_info() {
int jobId, err;
char *tmp = NULL;
job_info_msg_t *j_info;
slurm_job_info_t last_record;
tmp = getenv("SLURM_JOB_ID");
if(tmp == NULL) return 1;
jobId = atoi(tmp);
err = slurm_load_job(&j_info, jobId, 1);
if(err) return err;
last_record = j_info->job_array[j_info->record_count - 1];
mall->num_nodes = last_record.num_nodes;
mall->num_cpus = last_record.num_cpus;
mall->nodelist_len = strlen(last_record.nodes)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, last_record.nodes);
slurm_free_job_info_msg(j_info);
return 0;
}
#endif
//TODO REFACTOR PARA CUANDO SE COMUNIQUE CON RMS
// Get Slurm job info
//int jobId;
//char *tmp;
//job_info_msg_t *j_info;
//slurm_job_info_t last_record;
//tmp = getenv("SLURM_JOB_ID");
//jobId = atoi(tmp);
//slurm_load_job(&j_info, jobId, 1);
//last_record = j_info->job_array[j_info->record_count - 1];
// Free JOB INFO
//slurm_free_job_info_msg(j_info);
#ifndef MALLEABILITY_RMS_H
#define MALLEABILITY_RMS_H
void MAM_check_hosts();
#endif
......@@ -6,32 +6,46 @@
//States
#define MALL_DENIED -1
enum mall_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_ZOMBIE, MALL_SPAWN_PENDING, MALL_SPAWN_SINGLE_PENDING,
#define MAM_OK 0
enum mall_inner_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_RMS_COMPLETED, MALL_SPAWN_PENDING, MALL_SPAWN_SINGLE_PENDING,
MALL_SPAWN_SINGLE_COMPLETED, MALL_SPAWN_ADAPT_POSTPONE, MALL_SPAWN_COMPLETED, MALL_DIST_PENDING, MALL_DIST_COMPLETED,
MALL_SPAWN_ADAPT_PENDING, MALL_SPAWN_ADAPTED, MALL_COMPLETED};
enum mall_spawn_methods{MALL_SPAWN_BASELINE, MALL_SPAWN_MERGE};
#define MALL_SPAWN_PTHREAD 2
#define MALL_SPAWN_SINGLE 3
MALL_SPAWN_ADAPT_PENDING, MALL_USER_PENDING, MALL_USER_COMPLETED, MALL_SPAWN_ADAPTED, MALL_COMPLETED};
enum mam_states{MAM_UNRESERVED, MAM_NOT_STARTED, MAM_PENDING, MAM_USER_PENDING, MAM_COMPLETED};
enum mam_proc_states{MAM_PROC_CONTINUE, MAM_PROC_NEW_RANK, MAM_PROC_ZOMBIE};
enum mall_redistribution_methods{MALL_RED_BASELINE, MALL_RED_POINT, MALL_RED_RMA_LOCK, MALL_RED_RMA_LOCKALL, MALL_RED_IBARRIER};
#define MALL_RED_THREAD 2
//#define MALL_RED_IBARRIER 3 Agregar como estrategia y eliminar como método
enum mall_spawn_methods{MALL_SPAWN_BASELINE, MALL_SPAWN_MERGE, MAM_METHODS_SPAWN_LEN};
enum mam_spawn_strategies{MAM_STRAT_SPAWN_CLEAR, MAM_STRAT_SPAWN_PTHREAD, MAM_STRAT_SPAWN_SINGLE, MAM_STRAT_SPAWN_INTERCOMM, MAM_STRATS_SPAWN_LEN};
enum mam_phy_dist_methods{MALL_DIST_SPREAD = 1, MALL_DIST_COMPACT, MAM_METHODS_PHYSICAL_DISTRIBUTION_LEN}; //FIXME Cambiar nombres a PHY_DIST?
enum mam_phy_info_methods{MALL_DIST_STRING = 1, MALL_DIST_HOSTFILE}; //FIXME Cambiar nombres a PHY_DIST?
enum mall_redistribution_methods{MALL_RED_BASELINE, MALL_RED_POINT, MALL_RED_RMA_LOCK, MALL_RED_RMA_LOCKALL, MAM_METHODS_RED_LEN};
enum mam_red_strategies{MAM_STRAT_RED_CLEAR, MAM_STRAT_RED_PTHREAD, MAM_STRAT_RED_WAIT_SOURCES, MAM_STRAT_RED_WAIT_TARGETS, MAM_STRATS_RED_LEN};
/* KEYS & VALUES for config*/
enum mam_key_values{MAM_SPAWN_METHOD=0, MAM_SPAWN_STRATEGIES, MAM_PHYSICAL_DISTRIBUTION, MAM_RED_METHOD, MAM_RED_STRATEGIES, MAM_NUM_TARGETS, MAM_KEY_COUNT};
#define MAM_SPAWN_METHOD_ENV "MAM_SPAWN_METHOD"
#define MAM_SPAWN_STRATS_ENV "MAM_SPAWN_STRATS"
#define MAM_PHYSICAL_DISTRIBUTION_METHOD_ENV "MAM_PHYSICAL_DISTRIBUTION_METHOD"
#define MAM_RED_METHOD_ENV "MAM_RED_METHOD"
#define MAM_RED_STRATS_ENV "MAM_RED_STRATS"
#define MAM_NUM_TARGETS_ENV "MAM_NUM_TARGETS"
#define MALLEABILITY_ROOT 0
#define MAL_APP_EXECUTING 0
#define MAL_APP_ENDED 1
#define MAL_INT 0
#define MAL_CHAR 1
#define MAL_DOUBLE 2
#define MAL_DATA_ALONE -1
#define MAL_DATA_INDEPENDENT 0
#define MAL_DATA_DEPENDENT 1
////////////////
#define MAM_CHECK_COMPLETION 0
#define MAM_WAIT_COMPLETION 1
#define MALLEABILITY_CHILDREN 1
#define MALLEABILITY_NOT_CHILDREN 0
#define MAM_DATA_DISTRIBUTED 0
#define MAM_DATA_REPLICATED 1
#define MAM_DATA_VARIABLE 0
#define MAM_DATA_CONSTANT 1
#endif
#include "malleabilityTimes.h"
#include "malleabilityDataStructures.h"
void def_malleability_times(MPI_Datatype *new_type);
void init_malleability_times() {
#if USE_MAL_DEBUG
DEBUG_FUNC("Initializing recording structure", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(mall->comm);
#endif
mall_conf->times = (malleability_times_t *) malloc(sizeof(malleability_times_t));
if(mall_conf->times == NULL) {
perror("Error al crear la estructura de tiempos interna para maleabilidad\n");
MPI_Abort(MPI_COMM_WORLD, -5);
}
reset_malleability_times();
def_malleability_times(&mall_conf->times->times_type);
#if USE_MAL_DEBUG
DEBUG_FUNC("Initialized recording structure", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(mall->comm);
#endif
}
void reset_malleability_times() {
malleability_times_t *times = mall_conf->times;
times->spawn_start = 0; times->sync_start = 0; times->async_start = 0; times->malleability_start = 0;
times->sync_end = 0; times->async_end = 0; times->malleability_end = 0;
times->spawn_time = 0;
}
void free_malleability_times() {
#if USE_MAL_DEBUG
DEBUG_FUNC("Freeing recording structure", mall->myId, mall->numP); fflush(stdout);
#endif
if(mall_conf->times != NULL) {
if(mall_conf->times->times_type != MPI_DATATYPE_NULL) {
MPI_Type_free(&mall_conf->times->times_type);
mall_conf->times->times_type = MPI_DATATYPE_NULL;
}
free(mall_conf->times);
}
#if USE_MAL_DEBUG
DEBUG_FUNC("Freed recording structure", mall->myId, mall->numP); fflush(stdout);
#endif
}
void malleability_times_broadcast(int root) {
MPI_Bcast(mall_conf->times, 1, mall_conf->times->times_type, root, mall->intercomm);
}
void MAM_I_retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *mall_time) {
malleability_times_t *times = mall_conf->times;
*sp_time = times->spawn_time;
*sy_time = times->sync_end - times->sync_start;
*asy_time = times->async_end - times->async_start;
*mall_time = times->malleability_end - times->malleability_start;
}
void def_malleability_times(MPI_Datatype *new_type) {
int i, counts = 4;
int blocklengths[counts];
MPI_Aint displs[counts], dir;
MPI_Datatype types[counts];
blocklengths[0] = blocklengths[1] = blocklengths[2] = blocklengths[3] = 1;
types[0] = types[1] = types[2] = types[3] = MPI_DOUBLE;
// Se pasa el vector a traves de la direccion de "mall_conf"
// Rellenar vector displs
MPI_Get_address(mall_conf->times, &dir);
// Obtener direccion base
MPI_Get_address(&(mall_conf->times->spawn_time), &displs[0]);
MPI_Get_address(&(mall_conf->times->sync_start), &displs[1]);
MPI_Get_address(&(mall_conf->times->async_start), &displs[2]);
MPI_Get_address(&(mall_conf->times->malleability_start), &displs[3]);
for(i=0;i<counts;i++) displs[i] -= dir;
MPI_Type_create_struct(counts, blocklengths, displs, types, new_type);
MPI_Type_commit(new_type);
}
#ifndef MALLEABILITY_TIMES_H
#define MALLEABILITY_TIMES_H
#include <mpi.h>
void init_malleability_times();
void reset_malleability_times();
void free_malleability_times();
void malleability_times_broadcast(int root);
void MAM_I_retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *mall_time);
#endif
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment