Commit 7836c7b6 authored by iker_martin's avatar iker_martin
Browse files
parents 352fcd52 07aa4fe0
objects := ini.o read_ini.o results.o
DISTRI_LOC = $(TOP)/malleability/distribution_methods/block_distribution.h
CC := gcc
MCC := mpicc
CFLAGS := -Wall -Wextra
all: $(objects)
ini.o: ini.c ini.h
echo $(BUILDDIR)
$(CC) $(CFLAGS) -c -o $(BUILDDIR)/$@ $<
read_ini.o: read_ini.c read_ini.h ini.h $(DISTRI_LOC)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@ $<
results.o: results.c results.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@ $<
#$@ --> Objeto
#$< --> Source
......@@ -3,7 +3,7 @@
#include <string.h>
#include <mpi.h>
#include "read_ini.h"
#include "../malleability/ProcessDist.h"
#include "../malleability/spawn_methods/ProcessDist.h"
#include "../malleability/distribution_methods/block_distribution.h"
#include "ini.h"
......@@ -79,9 +79,9 @@ static int handler(void* user, const char* section, const char* name,
if(pconfig->actual_resize < pconfig->n_resizes) {
char *aux = strdup(value);
if (strcmp(aux, "spread") == 0) {
pconfig->phy_dist[pconfig->actual_resize] = COMM_PHY_SPREAD;
pconfig->phy_dist[pconfig->actual_resize] = MALL_DIST_SPREAD;
} else {
pconfig->phy_dist[pconfig->actual_resize] = COMM_PHY_COMPACT;
pconfig->phy_dist[pconfig->actual_resize] = MALL_DIST_COMPACT;
}
free(aux);
pconfig->actual_resize = pconfig->actual_resize+1; // Ultimo elemento del grupo
......@@ -265,7 +265,6 @@ void print_config_group(configuration *user_config, int grp) {
* configuracion al otro grupo.
*/
void send_config_file(configuration *config_file, int root, MPI_Comm intercomm) {
MPI_Datatype config_type, config_type_array, iter_stage_type;
// Obtener un tipo derivado para enviar todos los
......@@ -304,10 +303,7 @@ void send_config_file(configuration *config_file, int root, MPI_Comm intercomm)
* la funcion "free_config".
*/
void recv_config_file(int root, MPI_Comm intercomm, configuration **config_file_out) {
MPI_Datatype config_type, config_type_array, iter_stage_type;
configuration *config_file = malloc(sizeof(configuration) * 1);
// Obtener un tipo derivado para recibir todos los
......
......@@ -112,14 +112,13 @@ int main(int argc, char *argv[]) {
do {
group->grp = group->grp + 1;
if(group->grp != 0) obtain_op_times(0); //Obtener los nuevos valores de tiempo para el computo
set_benchmark_grp(group->grp);
get_malleability_user_comm(&comm);
MPI_Comm_size(comm, &(group->numP));
MPI_Comm_rank(comm, &(group->myId));
if(group->grp != 0) {
obtain_op_times(0); //Obtener los nuevos valores de tiempo para el computo
}
if(config_file->n_resizes != group->grp + 1) {
set_malleability_configuration(config_file->sm, config_file->ss, config_file->phy_dist[group->grp+1], -1, config_file->at, -1);
if(config_file->n_resizes != group->grp + 1) { //TODO Llevar a otra funcion
set_malleability_configuration(config_file->sm, config_file->ss, config_file->phy_dist[group->grp+1], config_file->at, -1);
set_children_number(config_file->procs[group->grp+1]); // TODO TO BE DEPRECATED
if(group->grp == 0) {
......@@ -134,11 +133,15 @@ int main(int argc, char *argv[]) {
}
res = work();
if(res == MAL_ZOMBIE) break;
if(res == MALL_ZOMBIE) break;
get_malleability_user_comm(&comm);
MPI_Comm_size(comm, &(group->numP));
MPI_Comm_rank(comm, &(group->myId));
print_local_results();
reset_results_index(results);
} while((config_file->n_resizes > group->grp + 1) && (config_file->sm == COMM_SPAWN_MERGE || config_file->sm == COMM_SPAWN_MERGE_PTHREAD));
} while(config_file->n_resizes > group->grp + 1 && config_file->sm == MALL_SPAWN_MERGE);
//
// TERMINA LA EJECUCION ----------------------------------------------------------
......@@ -155,7 +158,7 @@ int main(int argc, char *argv[]) {
MPI_Comm_free(&comm);
}
if(group->myId == ROOT && (config_file->sm == COMM_SPAWN_MERGE || config_file->sm == COMM_SPAWN_MERGE_PTHREAD)) {
if(group->myId == ROOT && config_file->sm == MALL_SPAWN_MERGE) {
MPI_Abort(MPI_COMM_WORLD, -100);
}
free_application_data();
......@@ -185,7 +188,7 @@ int work() {
double *matrix = NULL;
maxiter = config_file->iters[group->grp];
state = MAL_NOT_STARTED;
state = MALL_NOT_STARTED;
res = 0;
for(iter=group->iter_start; iter < maxiter; iter++) {
......@@ -196,7 +199,7 @@ int work() {
state = malleability_checkpoint();
iter = 0;
while(state == MAL_DIST_PENDING || state == MAL_SPAWN_PENDING || state == MAL_SPAWN_SINGLE_PENDING) {
while(state == MALL_DIST_PENDING || state == MALL_SPAWN_PENDING || state == MALL_SPAWN_SINGLE_PENDING || state == MALL_SPAWN_ADAPT_POSTPONE) {
if(iter < config_file->iters[group->grp+1]) {
iterate(matrix, config_file->granularity, state, iter);
iter++;
......@@ -207,7 +210,7 @@ int work() {
if(config_file->n_resizes - 1 == group->grp) res=1;
if(state == MAL_ZOMBIE) res=state;
if(state == MALL_ZOMBIE) res=state;
return res;
}
......@@ -239,7 +242,7 @@ double iterate(double *matrix, int n, int async_comm, int iter) {
actual_time = MPI_Wtime(); // Guardar tiempos
// TODO Que diferencie entre ambas en el IO
if(async_comm == MAL_DIST_PENDING || async_comm == MAL_SPAWN_PENDING || async_comm == MAL_SPAWN_SINGLE_PENDING) { // Se esta realizando una redistribucion de datos asincrona
if(async_comm == MALL_DIST_PENDING || async_comm == MALL_SPAWN_PENDING || async_comm == MALL_SPAWN_SINGLE_PENDING) { // Se esta realizando una redistribucion de datos asincrona
cnt_async=1;
}
......@@ -409,7 +412,7 @@ void obtain_op_times(int compute) {
for(i=0; i<config_file->n_stages; i++) {
time+=init_stage(config_file, i, *group, comm, compute);
}
if(!compute) results->wasted_time += time;
if(!compute) {results->wasted_time += time;}
}
/*
......
objects1 := computing_func comunication_func linear_reg
objects2 := process_stage
objects3 := Main
#Pasar nombre a level -- Los objects/headers/source añadir sufijo
DISTRI_LOC = $(TOP)/malleability/distribution_methods/block_distribution.h
MALLEABILITY_DEPENDS = $(TOP)/malleability/CommDist.h $(TOP)/malleability/malleabilityStates.h $(TOP)/malleability/malleabilityManager.h
DEPENDS := Main_datatypes.h
CC := gcc
MCC := mpicc
CFLAGS := -Wall -Wextra
all: $(objects1) $(objects2) $(objects3)
$(objects1): %: %.c %.h $(DEPENDS)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects2): %: %.c %.h $(objects1).h $(DEPENDS) $(DISTRI_LOC)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects3): %: %.c $(objects2).h $(DEPENDS) $(TOP)/IOcodes/read_ini.h $(TOP)/IOcodes/results.h $(MALLEABILITY_DEPENDS)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
......@@ -267,19 +267,18 @@ void linear_regression_stage(iter_stage_t *stage, group_data group, MPI_Comm com
*/
double init_matrix_pt(group_data group, configuration *config_file, iter_stage_t *stage, MPI_Comm comm, int compute) {
double result, t_stage;
double result, t_stage, start_time;
result = 0;
t_stage = stage->t_stage * config_file->factors[group.grp];
initMatrix(&(stage->double_array), config_file->granularity);
double start_time = MPI_Wtime();
if(group.myId == ROOT && compute) {
result+= process_stage(*config_file, *stage, group, comm);
}
if(compute) {
start_time = MPI_Wtime();
if(group.myId == ROOT) {
result+= process_stage(*config_file, *stage, group, comm);
stage->t_op = (MPI_Wtime() - start_time) / stage->operations; //Tiempo de una operacion
}
MPI_Bcast(&(stage->t_op), 1, MPI_DOUBLE, ROOT, comm);
}
stage->operations = t_stage / stage->t_op;
......@@ -292,13 +291,12 @@ double init_pi_pt(group_data group, configuration *config_file, iter_stage_t *st
result = 0;
t_stage = stage->t_stage * config_file->factors[group.grp];
if(compute) {
start_time = MPI_Wtime();
if(group.myId == ROOT && compute) {
if(group.myId == ROOT) {
result+= process_stage(*config_file, *stage, group, comm);
}
if(compute) {
stage->t_op = (MPI_Wtime() - start_time) / stage->operations; //Tiempo de una operacion
}
MPI_Bcast(&(stage->t_op), 1, MPI_DOUBLE, ROOT, comm);
}
stage->operations = t_stage / stage->t_op;
......@@ -360,13 +358,13 @@ double init_comm_allgatherv_pt(group_data group, configuration *config_file, ite
MPI_Reduce(&time, NULL, 1, MPI_DOUBLE, MPI_MAX, ROOT, comm);
}
}
if(stage->counts.counts != NULL)
freeCounts(&(stage->counts));
prepare_comm_allgatherv(group.numP, stage->real_bytes, &(stage->counts));
get_block_dist(stage->real_bytes, group.myId, group.numP, &dist_data);
stage->my_bytes = dist_data.tamBl;
if(stage->array != NULL)
free(stage->array);
stage->array = malloc(sizeof(char) * stage->my_bytes);
......
......@@ -8,11 +8,12 @@
enum compute_methods{COMP_PI, COMP_MATRIX, COMP_POINT, COMP_BCAST, COMP_ALLGATHER, COMP_REDUCE, COMP_ALLREDUCE};
//FIXME Refactor el void
double init_stage(configuration *config_file, int stage_i, group_data group, MPI_Comm comm, int compute);
//double stage_init_all();
double process_stage(configuration config_file, iter_stage_t stage, group_data group, MPI_Comm comm);
double latency(int myId, int numP, MPI_Comm comm);
double bandwidth(int myId, int numP, MPI_Comm comm, double latency, int n);
//FIXME Refactor el void??
void linear_regression_stage(iter_stage_t *stage, group_data group, MPI_Comm comm);
#endif
export TOP := $(dir $(CURDIR)/$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
BUILD := build
EXEC := exec
EXECDIR := $(addprefix $(TOP),$(EXEC))
export BUILDDIR = $(addprefix $(TOP),$(BUILD))
SUBDIRS := IOcodes Main malleability
.PHONY: subdirs $(SUBDIRS) build all clean clear
#
#
#
#
CC := gcc
MCC := mpicc
CFLAGS := -Wall -Wextra
LIBFLAGS := -lm -lslurm -pthread
#
#
#
#
all: subdirs exec
install:
#runTests
exec: subdirs
mkdir -p $(EXECDIR) all
$(MCC) $(CFLAGS) -o $(EXECDIR)/a.out $(wildcard $(BUILDDIR)/*.o) $(LIBFLAGS)
subdirs: $(SUBDIRS)
$(SUBDIRS): | $(BUILD)
$(MAKE) -C $@
# Carpeta en la que almacenar los compilados y los ejecutables
$(BUILD):
mkdir -p $(BUILDDIR)
CC = gcc
MCC = mpicc
C_FLAGS_ALL = -Wfatal-errors -Wall -Wextra -Wpedantic -Wconversion -Wshadow
C_FLAGS = -Wall
LD_FLAGS = -lm -lslurm -pthread
.PHONY : clean clear
# Final binary
BIN = a.out
# Put all auto generated stuff to this build dir.
BUILD_DIR = ./build
# List of all directories where source files are located
SRCDIRS = IOcodes Main malleability malleability/spawn_methods malleability/distribution_methods
# List of all .c source files.
C_FILES = $(foreach dire, $(SRCDIRS), $(wildcard $(dire)/*.c))
# All .o files go to build dir.
OBJ = $(C_FILES:%.c=$(BUILD_DIR)/%.o)
# Gcc will create these .d files containing dependencies.
DEP = $(OBJ:%.o=%.d)
# Default target named after the binary.
$(BIN) : $(BUILD_DIR)/$(BIN)
# Actual target of the binary - depends on all .o files.
$(BUILD_DIR)/$(BIN) : $(OBJ)
$(MCC) $(C_FLAGS) $^ -o $@ $(LD_FLAGS)
# Include all .d files
# .d files are used for knowing the dependencies of each source file
-include $(DEP)
# Build target for every single object file.
# The potential dependency on header files is covered
# by calling `-include $(DEP)`.
# The -MMD flags additionaly creates a .d file with
# the same name as the .o file.
$(BUILD_DIR)/%.o : %.c
mkdir -p $(@D)
$(MCC) $(C_FLAGS) -MMD -c $< -o $@
clean:
-rm $(BUILDDIR)/*.o
-rm $(BUILD_DIR)/$(BIN) $(OBJ) $(DEP)
clear:
-rm -rf $(BUILDDIR)
-rm -rf $(EXECDIR)
install:
mpicc -Wall Main/Main.c Main/computing_func.c Main/comunication_func.c Main/linear_reg.c Main/process_stage.c IOcodes/results.c IOcodes/read_ini.c IOcodes/ini.c malleability/malleabilityManager.c malleability/malleabilityTypes.c malleability/malleabilityZombies.c malleability/ProcessDist.c malleability/CommDist.c malleability/distribution_methods/block_distribution.c -pthread -lslurm -lm
if [ $# -gt 0 ]
then
if [ $1 = "-e" ]
then
echo "Creado ejecutable para ejecuciones"
cp a.out bench.out
fi
fi
dir_targets := distribution_methods
objects1 := CommDist
objects2 := malleabilityTypes ProcessDist
objects3 := malleabilityZombies
objects4 := malleabilityManager
DEPENDS := $(addsuffix .h, malleabilityDataStructures malleabilityStates)
CC := gcc
MCC := mpicc
CFLAGS := -Wall -Wextra
.PHONY: $(dir_targets) subdir
all: subdir $(objects1) $(objects2) $(objects3) $(objects4)
subdir: $(dir_targets)
$(dir_targets): %:
$(MAKE) -C $@
$(objects1): %: %.c %.h $(DEPENDS) $(dir_targets)/block_distribution.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects2): %: %.c %.h $(DEPENDS)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects3): %: %.c %.h $(DEPENDS) $(TOP)/IOcodes/results.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects4): %: %.c %.h $(objects1).h $(objects2).h $(objects3).h $(DEPENDS) \
$(TOP)/IOcodes/read_ini.h $(TOP)/IOcodes/results.h $(TOP)/Main/Main_datatypes.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <mpi.h>
#include <string.h>
#include <slurm/slurm.h>
#include "ProcessDist.h"
int commState = MAL_NOT_STARTED;
struct Slurm_data *slurm_data;
pthread_t spawn_thread;
pthread_mutex_t spawn_mutex;
MPI_Comm *returned_comm;
double end_time; //FIXME REFACTOR
struct Slurm_data {
char *cmd; // Executable name
char *nodelist;
int num_cpus, num_nodes;
int qty_procs, result_procs;
MPI_Info info;
int type_creation;
int spawn_is_single;
};
typedef struct {
char *argv;
int numP_childs, myId, root, already_created;
int type_dist;
int spawn_is_single;
int spawn_method;
MPI_Comm comm;
}Creation_data;
//--------------PRIVATE SPAWN TYPE DECLARATIONS---------------//
void* thread_work(void* creation_data_arg);
//--------------PRIVATE DECLARATIONS---------------//
void processes_dist(char *argv, int numP_childs, int already_created, int type_dist);
void generic_spawn(int myId, int root, int is_single, MPI_Comm *child, MPI_Comm comm);
void single_spawn_connection(int myId, int root, MPI_Comm comm, MPI_Comm *child);
int create_processes(int myId, int root, MPI_Comm *child, MPI_Comm comm);
void node_dist(int type, int total_procs, int already_created, int **qty, int *used_nodes);
void fill_str_hostfile(int *qty, int used_nodes, char **hostfile_str);
int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name);
//@deprecated functions
int create_hostfile(char *jobId, char **file_name);
int write_hostfile_node(int ptr, int qty, char *node_name);
void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_nodes);
//--------------PUBLIC FUNCTIONS---------------//
/*
* Se solicita la creacion de un nuevo grupo de "numP" procesos con una distribucion
* fisica "type_dist".
*
* Se puede solicitar en primer plano, encargandose por tanto el proceso que llama a esta funcion,
* o en segundo plano, donde un hilo se encarga de configurar esta creacion.
*
* Si se pide en primer plano, al terminarla es posible llamar a "check_slurm_comm()" para crear
* los procesos.
*
* Si se pide en segundo plano, llamar a "check_slurm_comm()" comprobara si la configuracion para
* crearlos esta lista, y si es asi, los crea.
*
* Devuelve el estado de el procedimiento. Si no devuelve "COMM_FINISHED", es necesario llamar a
* "check_slurm_comm()".
*/
int init_slurm_comm(char *argv, int num_cpus, int num_nodes, char *nodelist, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child) {
int spawn_qty, already_created = 0;
slurm_data = malloc(sizeof(struct Slurm_data));
spawn_thread = pthread_self();
slurm_data->type_creation = type_creation;
slurm_data->spawn_is_single = spawn_is_single;
slurm_data->result_procs = numC;
slurm_data->num_cpus = num_cpus;
slurm_data->num_nodes = num_nodes;
slurm_data->nodelist = nodelist;
spawn_qty = numC;
if(type_creation == COMM_SPAWN_MERGE || type_creation == COMM_SPAWN_MERGE_PTHREAD) {
if (numP < slurm_data->result_procs) {
spawn_qty = slurm_data->result_procs - numP;
already_created = numP;
}
}
pthread_mutex_init(&spawn_mutex,NULL);
if(type_creation == COMM_SPAWN_SERIAL || slurm_data->type_creation == COMM_SPAWN_MERGE) {
if(myId == root) {
processes_dist(argv, spawn_qty, already_created, type_dist);
} else {
slurm_data->cmd = malloc(1 * sizeof(char));
slurm_data->info = MPI_INFO_NULL;
}
// WORK
generic_spawn(myId, root, slurm_data->spawn_is_single, child, comm);
// END WORK
if(myId == root && slurm_data->info != MPI_INFO_NULL) {
MPI_Info_free(&(slurm_data->info));
}
pthread_mutex_destroy(&spawn_mutex);
free(slurm_data->cmd);
free(slurm_data);
} else if(type_creation == COMM_SPAWN_PTHREAD || slurm_data->type_creation == COMM_SPAWN_MERGE_PTHREAD) {
commState = MAL_SPAWN_PENDING;
if((spawn_is_single && myId == root) || !spawn_is_single || (slurm_data->type_creation == COMM_SPAWN_MERGE_PTHREAD && numP > slurm_data->result_procs)) {
Creation_data *creation_data = (Creation_data *) malloc(sizeof(Creation_data));
creation_data->argv = argv;
creation_data->numP_childs = spawn_qty;
creation_data->already_created = already_created;
creation_data->myId = myId;
creation_data->root = root;
creation_data->type_dist = type_dist;
creation_data->comm = comm;
if(pthread_create(&spawn_thread, NULL, thread_work, (void *)creation_data)) {
printf("Error al crear el hilo de contacto con SLURM\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
}
}
return commState;
}
/*
* Comprueba si una configuracion para crear un nuevo grupo de procesos esta lista,
* y en caso de que lo este, se devuelve el communicador a estos nuevos procesos.
*/
int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child, MPI_Comm comm, MPI_Comm comm_thread, double *real_time) {
if(slurm_data->type_creation == COMM_SPAWN_PTHREAD || slurm_data->type_creation == COMM_SPAWN_MERGE_PTHREAD) {
if (slurm_data->type_creation == COMM_SPAWN_MERGE_PTHREAD && numP > slurm_data->result_procs) { //TODO REFACTOR
printf("Error Check spawn: Configuracion invalida\nSe intenta usar el método Spawn junto a un Shrink merge\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
if(!slurm_data->spawn_is_single || commState == MAL_SPAWN_SINGLE_PENDING || commState == MAL_SPAWN_COMPLETED) {
int state=-10;
//printf("[%d][3] Test min\n", myId); fflush(stdout);
//pthread_mutex_lock(&spawn_mutex); // TODO Descomentar
MPI_Allreduce(&commState, &state, 1, MPI_INT, MPI_MIN, comm);
//pthread_mutex_unlock(&spawn_mutex);
if(state != MAL_SPAWN_COMPLETED) return state; // Continue only if asynchronous process creation has ended
//printf("[%d][5] Test Passed-----------\n", myId); fflush(stdout);
if(pthread_join(spawn_thread, NULL)) {
printf("Error al esperar al hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
*child = *returned_comm;
} else if (slurm_data->spawn_is_single) {
//pthread_mutex_lock(&spawn_mutex); // TODO Descomentar
MPI_Bcast(&commState, 1, MPI_INT, root, comm);
//pthread_mutex_unlock(&spawn_mutex);
int threads_not_spawned = pthread_equal(pthread_self(), spawn_thread);
// Non-root processes join root to finalize the spawn
// They also must join if the application has ended its work
if(commState == MAL_SPAWN_SINGLE_START) {
commState = MAL_SPAWN_SINGLE_PENDING;
if(myId != root && threads_not_spawned) {
Creation_data *creation_data = (Creation_data *) malloc(sizeof(Creation_data));
creation_data->argv = NULL;
creation_data->numP_childs = -1;
creation_data->already_created = -1;
creation_data->myId = myId;
creation_data->root = root;
creation_data->type_dist = -1;
creation_data->comm = comm_thread;
if(pthread_create(&spawn_thread, NULL, thread_work, (void *)creation_data)) {
printf("Error al crear el hilo de apoyo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
}
}
// Continue only if asynchronous process creation has ended or application does not have more work
if(commState != MAL_SPAWN_COMPLETED) return commState;
//printf("[%d][4] Test Passed-----------\n", myId); fflush(stdout);
//Asegurar que los hilos han terminado
if(pthread_join(spawn_thread, NULL)) {
printf("Error al esperar al hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
*child = *returned_comm;
} else {
printf("Error Check spawn: Configuracion invalida\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
} else {
return commState;
}
//Free memory
if(myId == root && slurm_data->info != MPI_INFO_NULL) {
MPI_Info_free(&(slurm_data->info));
}
free(slurm_data->cmd);
free(slurm_data);
pthread_mutex_destroy(&spawn_mutex);
spawn_thread = pthread_self();
*real_time=end_time;
return commState;
}
/*
* Conectar grupo de hijos con grupo de padres
* Devuelve un intercomunicador para hablar con los padres
*
* Solo se utiliza cuando la creación de los procesos ha sido
* realizada por un solo proceso padre
*/
void malleability_establish_connection(int myId, int root, MPI_Comm *intercomm) {
char *port_name;
MPI_Comm newintercomm;
if(myId == root) {
port_name = (char *) malloc(MPI_MAX_PORT_NAME * sizeof(char));
MPI_Open_port(MPI_INFO_NULL, port_name);
MPI_Send(port_name, MPI_MAX_PORT_NAME, MPI_CHAR, root, 130, *intercomm);
} else {
port_name = malloc(1);
}
MPI_Comm_accept(port_name, MPI_INFO_NULL, root, MPI_COMM_WORLD, &newintercomm);
if(myId == root) {
MPI_Close_port(port_name);
}
free(port_name);
MPI_Comm_free(intercomm);
*intercomm = newintercomm;
}
//--------------PRIVATE THREAD FUNCTIONS---------------//
/*
* Funcion llamada por un hilo para que este se encarge
* de configurar la creacion de un nuevo grupo de procesos.
*
* Una vez esta lista la configuracion y es posible crear los procesos
* se avisa al hilo maestro.
*/
void* thread_work(void* creation_data_arg) {
Creation_data *creation_data = (Creation_data*) creation_data_arg;
returned_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
if(creation_data->myId == creation_data->root) {
processes_dist(creation_data->argv, creation_data->numP_childs, creation_data->already_created, creation_data->type_dist);
} else {
slurm_data->cmd = malloc(1 * sizeof(char));
slurm_data->info = MPI_INFO_NULL;
}
generic_spawn(creation_data->myId, creation_data->root, slurm_data->spawn_is_single, returned_comm, creation_data->comm);
free(creation_data);
pthread_exit(NULL);
}
//--------------PRIVATE SPAWN CREATION FUNCTIONS---------------//
/*
* Funcion generica para la creacion de procesos. Obtiene la configuracion
* y segun esta, elige como deberian crearse los procesos.
*
* Cuando termina, modifica la variable global para indicar este cambio
*/
void generic_spawn(int myId, int root, int spawn_is_single, MPI_Comm *child, MPI_Comm comm) {
if(spawn_is_single) {
single_spawn_connection(myId, root, comm, child);
} else {
int rootBcast = MPI_PROC_NULL;
if(myId == root) rootBcast = MPI_ROOT;
create_processes(myId, root, child, comm);
MPI_Bcast(&spawn_is_single, 1, MPI_INT, rootBcast, *child);
}
pthread_mutex_lock(&spawn_mutex);
commState = MAL_SPAWN_COMPLETED;
end_time = MPI_Wtime();
pthread_mutex_unlock(&spawn_mutex);
}
/*
* Crea un grupo de procesos segun la configuracion indicada por la funcion
* "processes_dist()".
*/
int create_processes(int myId, int root, MPI_Comm *child, MPI_Comm comm) {
int spawn_err = MPI_Comm_spawn(slurm_data->cmd, MPI_ARGV_NULL, slurm_data->qty_procs, slurm_data->info, root, comm, child, MPI_ERRCODES_IGNORE);
if(spawn_err != MPI_SUCCESS) {
printf("Error creating new set of %d procs.\n", slurm_data->qty_procs);
}
return spawn_err;
}
/*
* Si la variable "type" es 1, la creación es con la participación de todo el grupo de padres
* Si el valor es diferente, la creación es solo con la participación del proceso root
*/
void single_spawn_connection(int myId, int root, MPI_Comm comm, MPI_Comm *child){
char *port_name;
int auxiliar_conf = COMM_SPAWN_SINGLE;
MPI_Comm newintercomm;
if (myId == root) {
create_processes(myId, root, child, MPI_COMM_SELF);
MPI_Bcast(&auxiliar_conf, 1, MPI_INT, MPI_ROOT, *child);
port_name = (char *) malloc(MPI_MAX_PORT_NAME * sizeof(char));
MPI_Recv(port_name, MPI_MAX_PORT_NAME, MPI_CHAR, root, 130, *child, MPI_STATUS_IGNORE);
commState = MAL_SPAWN_SINGLE_START; // Indicate other processes to join root to end spawn procedure
} else {
port_name = malloc(1);
}
MPI_Comm_connect(port_name, MPI_INFO_NULL, root, comm, &newintercomm);
if(myId == root)
MPI_Comm_free(child);
free(port_name);
*child = newintercomm;
}
//--------------PRIVATE MERGE TYPE FUNCTIONS---------------//
/*
* Se encarga de que el grupo de procesos resultante se
* encuentren todos en un intra comunicador, uniendo a
* padres e hijos en un solo comunicador.
*
* Se llama antes de la redistribución de datos.
*
* TODO REFACTOR
*/
void proc_adapt_expand(int *numP, int numC, MPI_Comm intercomm, MPI_Comm *comm, int is_children_group) {
MPI_Comm new_comm = MPI_COMM_NULL;
MPI_Intercomm_merge(intercomm, is_children_group, &new_comm); //El que pone 0 va primero
//MPI_Comm_free(intercomm); TODO Nueva redistribucion para estos casos y liberar aqui
// *intercomm = MPI_COMM_NULL;
*numP = numC;
if(*comm != MPI_COMM_WORLD && *comm != MPI_COMM_NULL) {
MPI_Comm_free(comm);
}
*comm=new_comm;
}
/*
* Se encarga de que el grupo de procesos resultante se
* eliminen aquellos procesos que ya no son necesarios.
* Los procesos eliminados se quedaran como zombies.
*
* Se llama una vez ha terminado la redistribución de datos.
*/
void proc_adapt_shrink(int numC, MPI_Comm *comm, int myId) {
int color = MPI_UNDEFINED;
MPI_Comm new_comm = MPI_COMM_NULL;
if(myId < numC) {
color = 1;
}
MPI_Comm_split(*comm, color, myId, &new_comm);
if(*comm != MPI_COMM_WORLD && *comm != MPI_COMM_NULL)
//MPI_Comm_free(comm); FIXME
*comm=new_comm;
}
/*
* Configura la creacion de un nuevo grupo de procesos, reservando la memoria
* para una llamada a MPI_Comm_spawn, obteniendo una distribucion fisica
* para los procesos y creando un fichero hostfile.
*/
void processes_dist(char *argv, int numP_childs, int already_created, int type) {
//int jobId;
//char *tmp;
//job_info_msg_t *j_info;
//slurm_job_info_t last_record;
int used_nodes=0;
int *procs_array;
char *hostfile;
// Get Slurm job info
//tmp = getenv("SLURM_JOB_ID");
//jobId = atoi(tmp);
//slurm_load_job(&j_info, jobId, 1);
//last_record = j_info->job_array[j_info->record_count - 1];
//COPY PROGRAM NAME
slurm_data->cmd = malloc(strlen(argv) * sizeof(char));
strcpy(slurm_data->cmd, argv);
// GET NEW DISTRIBUTION
node_dist(type, numP_childs, already_created, &procs_array, &used_nodes);
slurm_data->qty_procs = numP_childs;
/*
// CREATE/UPDATE HOSTFILE
int ptr;
ptr = create_hostfile(tmp, &hostfile);
MPI_Info_create(&(slurm_data->info));
MPI_Info_set(slurm_data->info, "hostfile", hostfile);
free(hostfile);
// SET NEW DISTRIBUTION
fill_hostfile(last_record, ptr, procs_array, used_nodes);
close(ptr);
*/
// CREATE AND SET STRING HOSTFILE
fill_str_hostfile(procs_array, used_nodes, &hostfile);
MPI_Info_create(&(slurm_data->info));
MPI_Info_set(slurm_data->info, "hosts", hostfile);
free(hostfile);
free(procs_array);
// Free JOB INFO
//slurm_free_job_info_msg(j_info);
}
/*
* Obtiene la distribucion fisica del grupo de procesos a crear, devolviendo
* cuantos nodos se van a utilizar y la cantidad de procesos que alojara cada
* nodo.
*
* Se permiten dos tipos de distribuciones fisicas segun el valor de "type":
*
* COMM_PHY_NODES (1): Orientada a equilibrar el numero de procesos entre
* todos los nodos disponibles.
* COMM_PHY_CPU (2): Orientada a completar la capacidad de un nodo antes de
* ocupar otro nodo.
*/
void node_dist(int type, int total_procs, int already_created, int **qty, int *used_nodes) {
int i, asigCores;
int tamBl, remainder;
int *procs;
procs = calloc(slurm_data->num_nodes, sizeof(int)); // Numero de procesos por nodo
/* GET NEW DISTRIBUTION */
if(type == 1) { // DIST NODES
*used_nodes = slurm_data->num_nodes;
tamBl = total_procs / slurm_data->num_nodes;
remainder = total_procs % slurm_data->num_nodes;
for(i=0; i<remainder; i++) {
procs[i] = tamBl + 1;
}
for(i=remainder; i<slurm_data->num_nodes; i++) {
procs[i] = tamBl;
}
} else if (type == 2) { // DIST CPUs
tamBl = slurm_data->num_cpus / slurm_data->num_nodes;
asigCores = 0;
i = *used_nodes = already_created / tamBl;
remainder = already_created % tamBl;
//First node could already have existing procs
if (remainder) {
procs[i] = asigCores = tamBl - remainder;
i = (i+1) % slurm_data->num_nodes;
(*used_nodes)++;
}
//Assing tamBl to each node
while(asigCores+tamBl <= total_procs) {
asigCores += tamBl;
procs[i] += tamBl;
i = (i+1) % slurm_data->num_nodes;
(*used_nodes)++;
}
//Last node could have less procs than tamBl
if(asigCores < total_procs) {
procs[i] += total_procs - asigCores;
(*used_nodes)++;
}
if(*used_nodes > slurm_data->num_nodes) *used_nodes = slurm_data->num_nodes; //FIXME Si ocurre esto no es un error?
}
*qty = calloc(*used_nodes, sizeof(int)); // Numero de procesos por nodo
for(i=0; i< *used_nodes; i++) {
(*qty)[i] = procs[i];
}
free(procs);
}
/*
* Crea y devuelve una cadena para ser utilizada por la llave "hosts"
* al crear procesos e indicar donde tienen que ser creados.
*/
void fill_str_hostfile(int *qty, int used_nodes, char **hostfile_str) {
int i=0, len=0;
char *host;
hostlist_t hostlist;
hostlist = slurm_hostlist_create(slurm_data->nodelist);
while ( (host = slurm_hostlist_shift(hostlist)) && i < used_nodes) {
if(qty[i] != 0) {
len = write_str_node(hostfile_str, len, qty[i], host);
}
i++;
free(host);
}
slurm_hostlist_destroy(hostlist);
}
/*
* Añade en una cadena "qty" entradas de "node_name".
* Realiza la reserva de memoria y la realoja si es necesario.
*/
int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name) {
int err, len_node, len, i;
char *ocurrence;
len_node = strlen(node_name);
len = qty * (len_node + 1);
if(len_og == 0) { // Memoria no reservada
*hostfile_str = (char *) malloc(len * sizeof(char) - (1 * sizeof(char)));
} else { // Cadena ya tiene datos
*hostfile_str = (char *) realloc(*hostfile_str, (len_og + len) * sizeof(char) - (1 * sizeof(char)));
}
if(hostfile_str == NULL) return -1; // No ha sido posible alojar la memoria
ocurrence = (char *) malloc((len_node+1) * sizeof(char));
if(ocurrence == NULL) return -1; // No ha sido posible alojar la memoria
err = sprintf(ocurrence, ",%s", node_name);
if(err < 0) return -2; // No ha sido posible escribir sobre la variable auxiliar
i=0;
if(len_og == 0) { // Si se inicializa, la primera es una copia
i++;
strcpy(*hostfile_str, node_name);
}
for(; i<qty; i++){ // Las siguientes se conctanenan
strcat(*hostfile_str, ocurrence);
}
free(ocurrence);
return len+len_og;
}
//====================================================
//====================================================
//============DEPRECATED FUNCTIONS====================
//====================================================
//====================================================
/*
* @deprecated
* Crea un fichero que se utilizara como hostfile
* para un nuevo grupo de procesos.
*
* El nombre es devuelto en el argumento "file_name",
* que tiene que ser un puntero vacio.
*
* Ademas se devuelve un descriptor de fichero para
* modificar el fichero.
*/
int create_hostfile(char *jobId, char **file_name) {
int ptr, err, len;
len = strlen(jobId) + 11;
*file_name = NULL;
*file_name = malloc( len * sizeof(char));
if(*file_name == NULL) return -1; // No ha sido posible alojar la memoria
err = snprintf(*file_name, len, "hostfile.o%s", jobId);
if(err < 0) return -2; // No ha sido posible obtener el nombre de fichero
ptr = open(*file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
if(ptr < 0) return -3; // No ha sido posible crear el fichero
return ptr; // Devolver puntero a fichero
}
/*
* @deprecated
* Rellena un fichero hostfile indicado por ptr con los nombres
* de los nodos a utilizar indicados por "job_record" y la cantidad
* de procesos que alojara cada nodo indicado por "qty".
*/
void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_nodes) {
int i=0;
char *host;
hostlist_t hostlist;
hostlist = slurm_hostlist_create(job_record.nodes);
while ( (host = slurm_hostlist_shift(hostlist)) && i < used_nodes) {
write_hostfile_node(ptr, qty[i], host);
i++;
free(host);
}
slurm_hostlist_destroy(hostlist);
}
/*
* @deprecated
* Escribe en el fichero hostfile indicado por ptr una nueva linea.
*
* Esta linea indica el nombre de un nodo y la cantidad de procesos a
* alojar en ese nodo.
*/
int write_hostfile_node(int ptr, int qty, char *node_name) {
int err, len_node, len_int, len;
char *line;
len_node = strlen(node_name);
len_int = snprintf(NULL, 0, "%d", qty);
len = len_node + len_int + 3;
line = malloc(len * sizeof(char));
if(line == NULL) return -1; // No ha sido posible alojar la memoria
err = snprintf(line, len, "%s:%d\n", node_name, qty);
if(err < 0) return -2; // No ha sido posible escribir en el fichero
write(ptr, line, len-1);
free(line);
return 0;
}
#ifndef PROCESS_DIST_H
#define PROCESS_DIST_H
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <string.h>
#include <slurm/slurm.h>
#include "malleabilityStates.h"
int init_slurm_comm(char *argv, int num_cpus, int num_nodes, char *nodelist, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child);
int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child, MPI_Comm comm, MPI_Comm comm_thread, double *end_real_time);
void malleability_establish_connection(int myId, int root, MPI_Comm *intercomm);
void proc_adapt_expand(int *numP, int numC, MPI_Comm intercomm, MPI_Comm *comm, int is_children_group);
void proc_adapt_shrink(int numC, MPI_Comm *comm, int myId);
#endif
objects1 := block_distribution
CC := gcc
MCC := mpicc
CFLAGS := -Wall -Wextra
all: $(objects1)
$(objects1): %: %.c %.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
......@@ -11,7 +11,7 @@ struct physical_dist {
int num_cpus, num_nodes;
char *nodelist;
int target_qty, already_created;
int dist_type;
int dist_type, info_type;
};
typedef struct {
......@@ -25,10 +25,6 @@ typedef struct {
struct physical_dist dist; // Used to create mapping var
MPI_Comm comm, returned_comm;
// To control the spawn state
pthread_mutex_t spawn_mutex;
pthread_cond_t cond_adapt_rdy;
} Spawn_data;
#endif
#include <pthread.h>
#include "malleabilityManager.h"
#include "malleabilityStates.h"
#include "malleabilityDataStructures.h"
#include "malleabilityTypes.h"
#include "malleabilityZombies.h"
#include "ProcessDist.h"
#include "spawn_methods/GenericSpawn.h"
#include "CommDist.h"
#define MALLEABILITY_ROOT 0
#define MALLEABILITY_USE_SYNCHRONOUS 0
#define MALLEABILITY_USE_ASYNCHRONOUS 1
......@@ -25,11 +25,14 @@ int thread_creation();
int thread_check();
void* thread_async_work();
void print_comms_state();
typedef struct {
int spawn_type;
int spawn_method;
int spawn_dist;
int spawn_is_single;
int spawn_threaded;
int spawn_strategies;
//int spawn_is_single;
//int spawn_threaded;
int comm_type;
int comm_threaded;
......@@ -49,7 +52,7 @@ typedef struct { //FIXME numC_spawned no se esta usando
int num_cpus, num_nodes;
} malleability_t;
int state = MAL_UNRESERVED; //FIXME Mover a otro lado
int state = MALL_UNRESERVED; //FIXME Mover a otro lado
malleability_config_t *mall_conf;
malleability_t *mall;
......@@ -81,6 +84,8 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
MPI_Comm_dup(comm, &dup_comm);
MPI_Comm_dup(comm, &thread_comm);
MPI_Comm_set_name(dup_comm, "MPI_COMM_MALL");
MPI_Comm_set_name(thread_comm, "MPI_COMM_MALL_THREAD");
mall->myId = myId;
mall->numP = numP;
......@@ -99,7 +104,7 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
dist_s_data->entries = 0;
dist_a_data->entries = 0;
state = MAL_NOT_STARTED;
state = MALL_NOT_STARTED;
// Si son el primer grupo de procesos, obtienen los datos de los padres
MPI_Comm_get_parent(&(mall->intercomm));
......@@ -136,10 +141,11 @@ void free_malleability() {
zombies_awake();
zombies_service_free();
state = MAL_UNRESERVED;
state = MALL_UNRESERVED;
}
/*
* TODO Reescribir
* Se realiza el redimensionado de procesos por parte de los padres.
*
* Se crean los nuevos procesos con la distribucion fisica elegida y
......@@ -155,45 +161,69 @@ void free_malleability() {
* y finalmente se desconectan los dos grupos de procesos.
*/
int malleability_checkpoint() {
double end_real_time;
if(state == MAL_UNRESERVED) return MAL_UNRESERVED;
if(state == MAL_NOT_STARTED) {
switch(state) {
case MALL_UNRESERVED:
break;
case MALL_NOT_STARTED:
// Comprobar si se tiene que realizar un redimensionado
//if(CHECK_RMS()) {return MAL_DENIED;}
//if(CHECK_RMS()) {return MALL_DENIED;}
state = spawn_step();
if (state == MAL_SPAWN_COMPLETED){
state = start_redistribution();
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPT_POSTPONE){
malleability_checkpoint();
}
break;
} else if(state == MAL_SPAWN_PENDING || state == MAL_SPAWN_SINGLE_PENDING) { // Comprueba si el spawn ha terminado y comienza la redistribucion
double end_real_time;
if(mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD && mall->numP > mall->numC) {
state = shrink_redistribution(); //TODO REFACTOR
} else {
state = check_slurm_comm(mall->myId, mall->root, mall->numP, &(mall->intercomm), mall->comm, mall->thread_comm, &end_real_time);
if (state == MAL_SPAWN_COMPLETED) {
case MALL_SPAWN_PENDING: // Comprueba si el spawn ha terminado y comienza la redistribucion
case MALL_SPAWN_SINGLE_PENDING:
state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time);
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPTED) {
mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start;
if(mall_conf->spawn_type == COMM_SPAWN_PTHREAD || mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD) {
mall_conf->results->spawn_real_time[mall_conf->grp] = end_real_time - mall_conf->results->spawn_start;
malleability_checkpoint();
}
//TODO Si es MERGE SHRINK, metodo diferente de redistribucion de datos
break;
case MALL_SPAWN_ADAPT_POSTPONE:
case MALL_SPAWN_COMPLETED:
state = start_redistribution();
}
}
malleability_checkpoint();
break;
} else if(state == MAL_DIST_PENDING) {
case MALL_DIST_PENDING:
if(mall_conf->comm_type == MAL_USE_THREAD) {
state = thread_check();
} else {
state = check_redistribution();
}
if(state != MALL_DIST_PENDING) {
malleability_checkpoint();
}
break;
case MALL_SPAWN_ADAPT_PENDING:
mall_conf->results->spawn_start = MPI_Wtime();
unset_spawn_postpone_flag(state);
state = check_spawn_state(&(mall->intercomm), mall->comm, &end_real_time);
if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) {
mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start;
}
break;
case MALL_SPAWN_ADAPTED:
state = shrink_redistribution();
malleability_checkpoint();
break;
case MALL_DIST_COMPLETED: //TODO No es esto muy feo?
state = MALL_COMPLETED;
break;
}
return state;
}
......@@ -220,11 +250,10 @@ void get_benchmark_results(results_data **results) {
}
//-------------------------------------------------------------------------------------------------------------
void set_malleability_configuration(int spawn_type, int spawn_is_single, int spawn_dist, int spawn_threaded, int comm_type, int comm_threaded) {
mall_conf->spawn_type = spawn_type;
mall_conf->spawn_is_single = spawn_is_single;
void set_malleability_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int comm_type, int comm_threaded) {
mall_conf->spawn_method = spawn_method;
mall_conf->spawn_strategies = spawn_strategies;
mall_conf->spawn_dist = spawn_dist;
mall_conf->spawn_threaded = spawn_threaded;
mall_conf->comm_type = comm_type;
mall_conf->comm_threaded = comm_threaded;
}
......@@ -234,16 +263,13 @@ void set_malleability_configuration(int spawn_type, int spawn_is_single, int spa
* Tiene que ser llamado despues de setear la config
*/
void set_children_number(int numC){
if((mall_conf->spawn_type == COMM_SPAWN_MERGE || mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD) && (numC - mall->numP >= 0)) {
if((mall_conf->spawn_method == MALL_SPAWN_MERGE) && (numC >= mall->numP)) {
mall->numC = numC;
mall->numC_spawned = numC - mall->numP;
if(numC == mall->numP) { // Migrar
mall->numC_spawned = numC;
if(mall_conf->spawn_type == COMM_SPAWN_MERGE)
mall_conf->spawn_type = COMM_SPAWN_SERIAL;
else
mall_conf->spawn_type = COMM_SPAWN_PTHREAD;
mall_conf->spawn_method = MALL_SPAWN_BASELINE;
}
} else {
mall->numC = numC;
......@@ -409,17 +435,11 @@ void recv_data(int numP_parents, malleability_data_t *data_struct, int is_asynch
*/
void Children_init() {
int numP_parents, root_parents, i;
int spawn_is_single;
MPI_Comm aux;
MPI_Bcast(&spawn_is_single, 1, MPI_INT, MALLEABILITY_ROOT, mall->intercomm);
if(spawn_is_single) {
malleability_establish_connection(mall->myId, MALLEABILITY_ROOT, &(mall->intercomm));
}
MPI_Bcast(&(mall_conf->spawn_type), 1, MPI_INT, MALLEABILITY_ROOT, mall->intercomm);
int is_intercomm;
MPI_Bcast(&root_parents, 1, MPI_INT, MALLEABILITY_ROOT, mall->intercomm);
MPI_Bcast(&numP_parents, 1, MPI_INT, root_parents, mall->intercomm);
malleability_connect_children(mall->myId, mall->numP, mall->root, mall->comm, &numP_parents, &root_parents, &(mall->intercomm));
MPI_Comm_test_inter(mall->intercomm, &is_intercomm);
// TODO A partir de este punto tener en cuenta si es BASELINE o MERGE
recv_config_file(mall->root, mall->intercomm, &(mall_conf->config_file));
......@@ -457,22 +477,19 @@ void Children_init() {
}
}
if(mall_conf->spawn_type == COMM_SPAWN_MERGE || mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD) {
proc_adapt_expand(&(mall->numP), mall->numP+numP_parents, mall->intercomm, &(mall->comm), MALLEABILITY_CHILDREN);
if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm));
MPI_Comm_dup(mall->comm, &aux);
mall->thread_comm = aux;
MPI_Comm_dup(mall->comm, &aux);
mall->user_comm = aux;
}
// Guardar los resultados de esta transmision
recv_results(mall_conf->results, mall->root, mall_conf->config_file->n_resizes, mall->intercomm);
if(!is_intercomm) {
if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm));
if(mall->comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->comm));
if(mall->user_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->user_comm)); //TODO No es peligroso?
MPI_Comm_disconnect(&(mall->intercomm));
MPI_Comm_dup(mall->intercomm, &(mall->thread_comm));
MPI_Comm_dup(mall->intercomm, &(mall->comm));
MPI_Comm_dup(mall->intercomm, &(mall->user_comm));
}
MPI_Comm_disconnect(&(mall->intercomm));
}
//======================================================||
......@@ -488,18 +505,10 @@ void Children_init() {
int spawn_step(){
mall_conf->results->spawn_start = MPI_Wtime();
if((mall_conf->spawn_type == COMM_SPAWN_MERGE || mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD) && mall->numP > mall->numC) {
state = shrink_redistribution();
return state;
}
state = init_slurm_comm(mall->name_exec, mall->num_cpus, mall->num_nodes, mall->nodelist, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_type, mall_conf->spawn_is_single, mall->thread_comm, &(mall->intercomm));
state = init_spawn(mall->name_exec, mall->num_cpus, mall->num_nodes, mall->nodelist, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_method, mall_conf->spawn_strategies, mall->thread_comm, &(mall->intercomm));
if(mall_conf->spawn_type == COMM_SPAWN_SERIAL || mall_conf->spawn_type == COMM_SPAWN_MERGE)
if(!malleability_spawn_contains_strat(mall_conf->spawn_strategies, MALL_SPAWN_PTHREAD, NULL)) {
mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start;
else if(mall_conf->spawn_type == COMM_SPAWN_PTHREAD || mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD) {
//mall_conf->results->spawn_thread_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start;
//mall_conf->results->spawn_start = MPI_Wtime();
}
return state;
}
......@@ -520,23 +529,33 @@ int spawn_step(){
* grupos de procesos.
*/
int start_redistribution() {
int rootBcast = MPI_PROC_NULL;
if(mall->myId == mall->root) rootBcast = MPI_ROOT;
int rootBcast, is_intercomm;
is_intercomm = 0;
if(mall->intercomm != MPI_COMM_NULL) {
MPI_Comm_test_inter(mall->intercomm, &is_intercomm);
} else {
// Si no tiene comunicador creado, se debe a que se ha pospuesto el Spawn
// y se trata del spawn Merge Shrink
MPI_Comm_dup(mall->comm, &(mall->intercomm));
}
MPI_Bcast(&(mall_conf->spawn_type), 1, MPI_INT, rootBcast, mall->intercomm);
MPI_Bcast(&(mall->root), 1, MPI_INT, rootBcast, mall->intercomm);
MPI_Bcast(&(mall->numP), 1, MPI_INT, rootBcast, mall->intercomm);
if(is_intercomm) {
rootBcast = mall->myId == mall->root ? MPI_ROOT : MPI_PROC_NULL;
} else {
rootBcast = mall->root;
}
send_config_file(mall_conf->config_file, rootBcast, mall->intercomm);
if(dist_a_data->entries || rep_a_data->entries) { // Recibir datos asincronos
if(dist_a_data->entries || rep_a_data->entries) { // Enviar datos asincronos
mall_conf->results->async_start = MPI_Wtime();
comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm);
if(mall_conf->comm_type == MAL_USE_THREAD) {
return thread_creation();
} else {
send_data(mall->numC, dist_a_data, MALLEABILITY_USE_ASYNCHRONOUS);
return MAL_DIST_PENDING;
return MALL_DIST_PENDING;
}
}
return end_redistribution();
......@@ -580,7 +599,7 @@ int check_redistribution() {
}
MPI_Allreduce(&completed, &all_completed, 1, MPI_INT, MPI_MIN, mall->comm);
if(!all_completed) return MAL_DIST_PENDING; // Continue only if asynchronous send has ended
if(!all_completed) return MALL_DIST_PENDING; // Continue only if asynchronous send has ended
if(mall_conf->comm_type == MAL_USE_IBARRIER) {
......@@ -601,9 +620,22 @@ int check_redistribution() {
* Finalmente termina enviando los datos temporales a los hijos.
*/
int end_redistribution() {
int result, i, rootBcast = MPI_PROC_NULL;
MPI_Comm aux;
if(mall->myId == mall->root) rootBcast = MPI_ROOT;
int i, is_intercomm, rootBcast, local_state;
is_intercomm = 0;
if(mall->intercomm != MPI_COMM_NULL) {
MPI_Comm_test_inter(mall->intercomm, &is_intercomm);
} else {
// Si no tiene comunicador creado, se debe a que se ha pospuesto el Spawn
// y se trata del spawn Merge Shrink
mall->intercomm = mall->comm;
}
if(is_intercomm) {
rootBcast = mall->myId == mall->root ? MPI_ROOT : MPI_PROC_NULL;
} else {
rootBcast = mall->root;
}
if(dist_s_data->entries || rep_s_data->entries) { // Enviar datos sincronos
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_NOT_CHILDREN, mall->myId, mall->root, mall->intercomm);
......@@ -622,111 +654,71 @@ int end_redistribution() {
}
}
if(mall_conf->spawn_type == COMM_SPAWN_MERGE || mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD) {
double time_adapt = MPI_Wtime();
proc_adapt_expand(&(mall->numP), mall->numC, mall->intercomm, &(mall->comm), MALLEABILITY_NOT_CHILDREN);
send_results(mall_conf->results, rootBcast, mall_conf->config_file->n_resizes, mall->intercomm);
local_state = MALL_DIST_COMPLETED;
if(!is_intercomm) { // Merge Spawn
if(mall->numP < mall->numC) { // Expand
if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm));
if(mall->comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->comm));
if(mall->user_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->user_comm)); //TODO No es peligroso?
MPI_Comm_dup(mall->comm, &aux);
mall->thread_comm = aux;
MPI_Comm_dup(mall->comm, &aux);
mall->user_comm = aux;
mall_conf->results->spawn_time[mall_conf->grp] += MPI_Wtime() - time_adapt;
MPI_Comm_dup(mall->intercomm, &(mall->thread_comm));
MPI_Comm_dup(mall->intercomm, &(mall->comm));
MPI_Comm_dup(mall->intercomm, &(mall->user_comm));
// result = MAL_DIST_ADAPTED;
MPI_Comm_set_name(mall->thread_comm, "MPI_COMM_MALL_THREAD");
MPI_Comm_set_name(mall->comm, "MPI_COMM_MALL");
MPI_Comm_set_name(mall->user_comm, "MPI_COMM_MALL_USER");
} else { // Shrink || Merge Shrink requiere de mas tareas
local_state = MALL_SPAWN_ADAPT_PENDING;
}
}
send_results(mall_conf->results, rootBcast, mall_conf->config_file->n_resizes, mall->intercomm);
result = MAL_DIST_COMPLETED;
/*FIXMENOW En algun momento P0 cambia tanto su comm como intercomm respecto al resto...*/
MPI_Barrier(mall->comm); //FIXMENOW Por alguna razon da error en Comm
if(mall->intercomm != MPI_COMM_NULL && mall->intercomm != MPI_COMM_WORLD) {
//FIXMENOW Intercomm se borra, pero no es COMM WORLD ni COMM NULL
MPI_Comm_disconnect(&(mall->intercomm));
state = MAL_NOT_STARTED;
return result;
}
return local_state;
}
///=============================================
///=============================================
///=============================================
double time_adapt, time_adapt_end;
int state_shrink=0; //TODO Refactor
pthread_t thread_shrink;
MPI_Comm comm_shrink;
int thread_shrink_creation();
void *thread_shrink_work();
/*
* Crea una hebra para ejecutar una comunicación en segundo plano.
*/
int thread_shrink_creation() {
if(pthread_create(&thread_shrink, NULL, thread_shrink_work, NULL)) {
printf("Error al crear el hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
return MAL_SPAWN_PENDING;
}
void* thread_shrink_work() {
proc_adapt_shrink(mall->numC, &comm_shrink, mall->myId);
time_adapt_end = MPI_Wtime();
state_shrink=2;
pthread_exit(NULL);
}
///=============================================
///=============================================
///=============================================
int shrink_redistribution() {
int global_state;
double time_aux;
MPI_Comm aux_comm;
if(mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD) {
if(state_shrink == 0) {
time_adapt = MPI_Wtime();
state_shrink = 1;
MPI_Comm_dup(mall->comm, &comm_shrink);
thread_shrink_creation();
return MAL_SPAWN_PENDING;
} else if(state_shrink>0) {
MPI_Allreduce(&state_shrink, &global_state, 1, MPI_INT, MPI_MIN, mall->comm);
if(global_state < 2) return MAL_SPAWN_PENDING;
time_aux = MPI_Wtime();
if(pthread_join(thread_shrink, NULL)) {
printf("Error al esperar al hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -10;
}
MPI_Comm_dup(mall->comm, &aux_comm);
mall->comm = comm_shrink;
}
} else {
time_adapt = MPI_Wtime();
MPI_Comm_dup(mall->comm, &aux_comm);
proc_adapt_shrink( mall->numC, &(mall->comm), mall->myId);
}
double time_extra = MPI_Wtime();
//TODO REFACTOR -- Que solo la llamada de collect iters este fuera de los hilos
zombies_collect_suspended(aux_comm, mall->myId, mall->numP, mall->numC, mall->root, (void *) mall_conf->results, mall->user_comm);
zombies_collect_suspended(mall->comm, mall->myId, mall->numP, mall->numC, mall->root, (void *) mall_conf->results);
if(mall->myId < mall->numC) {
MPI_Comm_free(&aux_comm);
MPI_Comm_dup(mall->comm, &aux_comm);
mall->thread_comm = aux_comm;
MPI_Comm_dup(mall->comm, &aux_comm);
mall->user_comm = aux_comm;
if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm));
if(mall->comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->comm));
if(mall->user_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->user_comm)); //TODO No es peligroso?
MPI_Comm_dup(mall->intercomm, &(mall->thread_comm));
MPI_Comm_dup(mall->intercomm, &(mall->comm));
MPI_Comm_dup(mall->intercomm, &(mall->user_comm));
MPI_Comm_set_name(mall->thread_comm, "MPI_COMM_MALL_THREAD");
MPI_Comm_set_name(mall->comm, "MPI_COMM_MALL");
MPI_Comm_set_name(mall->user_comm, "MPI_COMM_MALL_USER");
MPI_Comm_free(&(mall->intercomm));
mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - time_adapt;
if(mall_conf->spawn_type == COMM_SPAWN_MERGE_PTHREAD) {
mall_conf->results->spawn_real_time[mall_conf->grp] = time_adapt_end - time_adapt + MPI_Wtime() - time_aux;
mall_conf->results->spawn_time[mall_conf->grp] += MPI_Wtime() - time_extra;
if(malleability_spawn_contains_strat(mall_conf->spawn_strategies,MALL_SPAWN_PTHREAD, NULL)) {
mall_conf->results->spawn_real_time[mall_conf->grp] += MPI_Wtime() - time_extra;
}
return MAL_DIST_COMPLETED; //FIXME Refactor Poner a SPAWN_COMPLETED
return MALL_DIST_COMPLETED;
} else {
return MAL_ZOMBIE;
return MALL_ZOMBIE;
}
}
......@@ -746,7 +738,7 @@ int thread_creation() {
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
return MAL_DIST_PENDING;
return MALL_DIST_PENDING;
}
/*
......@@ -760,8 +752,8 @@ int thread_check() {
// Comprueba que todos los hilos han terminado la distribucion (Mismo valor en commAsync)
MPI_Allreduce(&state, &all_completed, 1, MPI_INT, MPI_MAX, mall->comm);
if(all_completed != MAL_DIST_COMPLETED) return MAL_DIST_PENDING; // Continue only if asynchronous send has ended
//FIXME No se tiene en cuenta el estado MAL_APP_ENDED
if(all_completed != MALL_DIST_COMPLETED) return MALL_DIST_PENDING; // Continue only if asynchronous send has ended
//FIXME No se tiene en cuenta el estado MALL_APP_ENDED
if(pthread_join(mall->async_thread, NULL)) {
printf("Error al esperar al hilo\n");
......@@ -782,6 +774,26 @@ int thread_check() {
*/
void* thread_async_work() {
send_data(mall->numC, dist_a_data, MALLEABILITY_USE_SYNCHRONOUS);
state = MAL_DIST_COMPLETED;
state = MALL_DIST_COMPLETED;
pthread_exit(NULL);
}
//==============================================================================
/*
* Muestra por pantalla el estado actual de todos los comunicadores
*/
void print_comms_state() {
int tester;
char *test = malloc(MPI_MAX_OBJECT_NAME * sizeof(char));
MPI_Comm_get_name(mall->comm, test, &tester);
printf("P%d Comm=%d Name=%s\n", mall->myId, mall->comm, test);
MPI_Comm_get_name(mall->user_comm, test, &tester);
printf("P%d Comm=%d Name=%s\n", mall->myId, mall->user_comm, test);
if(mall->intercomm != MPI_COMM_NULL) {
MPI_Comm_get_name(mall->intercomm, test, &tester);
printf("P%d Comm=%d Name=%s\n", mall->myId, mall->intercomm, test);
}
free(test);
}
......@@ -18,7 +18,7 @@ void indicate_ending_malleability(int new_outside_state);
int malleability_checkpoint();
void set_benchmark_grp(int grp);
void set_malleability_configuration(int spawn_type, int spawn_is_single, int spawn_dist, int spawn_threaded, int comm_type, int comm_threaded);
void set_malleability_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int comm_type, int comm_threaded);
void set_children_number(int numC); // TODO TO BE DEPRECATED
void get_malleability_user_comm(MPI_Comm *comm);
......
......@@ -5,6 +5,7 @@
#include <stdlib.h>
//States
/*
#define MAL_UNRESERVED -1
#define MAL_DENIED -2
#define MAL_ZOMBIE -3
......@@ -17,20 +18,27 @@
#define MAL_DIST_PENDING 6
#define MAL_DIST_COMPLETED 7
#define MAL_DIST_ADAPTED 8
*/
//enum mall_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_ZOMBIE, MALL_SPAWN_PENDING,
// MALL_SPAWN_SINGLE_PENDING, MALL_SPAWN_SINGLE_COMPLETED, MALL_SPAWN_ADAPT_POSTPONE,
// MALL_SPAWN_COMPLETED, MALL_DIST_PENDING, MALL_DIST_COMPLETED, MALL_DIST_ADAPTED};
//enum mall_spawn_methods{MALL_SPAWN_BASELINE, MALL_SPAWN_MERGE};
//enum mall_spawn_strategies{MALL_SPAWN_SERIAL, MALL_SPAWN_PTHREAD, MALL_SPAWN_MULTIPLE, MALL_SPAWN_SINGLE};
enum mall_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_ZOMBIE, MALL_SPAWN_PENDING, MALL_SPAWN_SINGLE_PENDING,
MALL_SPAWN_SINGLE_COMPLETED, MALL_SPAWN_ADAPT_POSTPONE, MALL_SPAWN_COMPLETED, MALL_DIST_PENDING, MALL_DIST_COMPLETED,
MALL_SPAWN_ADAPT_PENDING, MALL_SPAWN_ADAPTED, MALL_COMPLETED};
enum mall_spawn_methods{MALL_SPAWN_BASELINE, MALL_SPAWN_MERGE};
#define MALL_SPAWN_PTHREAD 2
#define MALL_SPAWN_SINGLE 3
#define MALLEABILITY_ROOT 0
#define MAL_APP_EXECUTING 0
#define MAL_APP_ENDED 1
// TODO Refactor
/*
#define COMM_PHY_SPREAD 1
#define COMM_PHY_COMPACT 2
*/
/*
// SPAWN METHODS
#define COMM_SPAWN_SERIAL 0
#define COMM_SPAWN_PTHREAD 1
......@@ -43,7 +51,7 @@
#define COMM_SPAWN_SINGLE 1
//#define COMM_SPAWN_PTHREAD 2
//#define COMM_SPAWN_SINGLE 3
*/
#define MAL_USE_NORMAL 0
#define MAL_USE_IBARRIER 1
#define MAL_USE_POINT 2
......
......@@ -49,13 +49,15 @@ void add_data(void *data, int total_qty, int type, int request_qty, malleability
* unicamente.
*/
void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, int is_children_group, int myId, int root, MPI_Comm intercomm) {
int i, rootBcast = MPI_PROC_NULL;
int i, is_intercomm, rootBcast = MPI_PROC_NULL;
MPI_Datatype entries_type, struct_type;
if(is_children_group) {
rootBcast = root;
MPI_Comm_test_inter(intercomm, &is_intercomm);
if(is_intercomm && !is_children_group) {
rootBcast = myId == root ? MPI_ROOT : MPI_PROC_NULL;
} else {
if(myId == root) rootBcast = MPI_ROOT;
rootBcast = root;
}
// Mandar primero numero de entradas
......
......@@ -18,7 +18,7 @@ int offset_pids, *pids = NULL;
void gestor_usr2() {}
void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int root, void *results_void, MPI_Comm user_comm) {
void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int root, void *results_void) {
int pid = getpid();
int *pids_counts = malloc(numP * sizeof(int));
int *pids_displs = malloc(numP * sizeof(int));
......@@ -41,10 +41,12 @@ void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int
free(pids_counts);
free(pids_displs);
if(myId >= numC) {
// FIXME No deberia estar aqui
// Needed to ensure iteration times are collected before suspending these processes
results_data *results = (results_data *) results_void;
compute_results_iter(results, myId, root, user_comm);
compute_results_iter(results, myId, root, comm);
if(myId >= numC) {
zombies_suspend();
}
}
......
......@@ -10,7 +10,7 @@
//#include <slurm/slurm.h>
#include <signal.h>
void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int root, void *results_void, MPI_Comm user_comm);
void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int root, void *results_void);
void zombies_service_init();
void zombies_service_free();
void zombies_awake();
......
......@@ -4,6 +4,7 @@
#include <pthread.h>
#include "../malleabilityStates.h"
#include "Baseline.h"
#include "Spawn_state.h"
//--------------PRIVATE DECLARATIONS---------------//
int baseline_spawn(Spawn_data spawn_data, MPI_Comm comm, MPI_Comm *child);
......@@ -24,10 +25,10 @@ void baseline_establish_connection(int myId, int root, MPI_Comm *parents);
* nada los hijos.
*/
int baseline(Spawn_data spawn_data, MPI_Comm *child) { //TODO Tratamiento de errores
int numRanks;
MPI_Comm_size(spawn_data.comm, &numRanks);
MPI_Comm intercomm;
MPI_Comm_get_parent(&intercomm);
if (spawn_data.initial_qty == numRanks) { // Parents path
if (intercomm == MPI_COMM_NULL) { // Parents path
if(spawn_data.spawn_is_single) {
baseline_single_spawn(spawn_data, child);
} else {
......@@ -78,11 +79,8 @@ int baseline_single_spawn(Spawn_data spawn_data, MPI_Comm *child) {
port_name = (char *) malloc(MPI_MAX_PORT_NAME * sizeof(char));
MPI_Recv(port_name, MPI_MAX_PORT_NAME, MPI_CHAR, spawn_data.root, 130, *child, MPI_STATUS_IGNORE);
if(spawn_data.spawn_is_async) {
pthread_mutex_lock(&(spawn_data.spawn_mutex));
commState = MALL_SPAWN_SINGLE_COMPLETED; // Indicate other processes to join root to end spawn procedure
pthread_mutex_unlock(&(spawn_data.spawn_mutex));
}
set_spawn_state(MALL_SPAWN_SINGLE_COMPLETED, spawn_data.spawn_is_async); // Indicate other processes to join root to end spawn procedure
} else {
port_name = malloc(1);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment