Commit 30685e95 authored by iker_martin's avatar iker_martin
Browse files

Modificada salida de resultados, ahora tambien se indican los tiempos para...

Modificada salida de resultados, ahora tambien se indican los tiempos para cada etapa de una iteracion como locales
parent 9a0629d9
objects1 := ini
objects2 := results read_ini
objects := ini.o read_ini.o results.o
DISTRI_LOC = $(TOP)/malleability/distribution_methods/block_distribution.h
CC := gcc
MCC := mpicc
CFLAGS := -Wall
CFLAGS := -Wall -Wextra
all: $(objects1) $(objects2)
all: $(objects)
$(objects1): %: %.c %.h
$(CC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects2): %: %.c %.h $(objects1).h $(TOP)/malleability/distribution_methods/block_distribution.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
ini.o: ini.c ini.h
echo $(BUILDDIR)
$(CC) $(CFLAGS) -c -o $(BUILDDIR)/$@ $<
read_ini.o: read_ini.c read_ini.h ini.h $(DISTRI_LOC)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@ $<
results.o: results.c results.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@ $<
#$@ --> Objeto
#$< --> Source
......@@ -9,7 +9,7 @@
void malloc_config_resizes(configuration *user_config, int resizes);
void init_config_stages(configuration *user_config, int stages);
void init_config_stages(configuration *user_config);
void def_struct_config_file(configuration *config_file, MPI_Datatype *config_type);
void def_struct_config_file_array(configuration *config_file, MPI_Datatype *config_type);
void def_struct_iter_stage(iter_stage_t *stages, int n_stages, MPI_Datatype *config_type);
......@@ -38,7 +38,7 @@ static int handler(void* user, const char* section, const char* name,
} else if (MATCH("general", "S")) {
pconfig->n_stages = atoi(value);
pconfig->stages = malloc(sizeof(iter_stage_t) * pconfig->n_stages);
init_config_stages(pconfig, pconfig->n_stages);
init_config_stages(pconfig);
} else if (MATCH("general", "Granularity")) {
pconfig->granularity = atoi(value);
} else if (MATCH("general", "SDR")) {
......@@ -150,7 +150,7 @@ void malloc_config_resizes(configuration *user_config, int resizes) {
* - read_ini_file
* - recv_config_file
*/
void init_config_stages(configuration *user_config, int stages) {
void init_config_stages(configuration *user_config) {
int i;
if(user_config != NULL) {
for(i=0; i<user_config->n_stages; i++) {
......@@ -159,6 +159,8 @@ void init_config_stages(configuration *user_config, int stages) {
user_config->stages[i].double_array = NULL;
user_config->stages[i].counts.counts = NULL;
user_config->stages[i].real_bytes = 0;
user_config->stages[i].intercept = 0;
user_config->stages[i].slope = 0;
}
}
}
......@@ -206,7 +208,7 @@ void free_config(configuration *user_config) {
void print_config(configuration *user_config, int grp) {
if(user_config != NULL) {
int i;
printf("Config loaded: resizes=%d, stages=%d, granularity=%d, sdr=%d, adr=%d, at=%d, sm=%d, ss=%d, latency=%lf, bw=%lf || grp=%d\n",
printf("Config loaded: R=%d, S=%d, granularity=%d, SDR=%d, ADR=%d, AT=%d, SM=%d, SS=%d, latency=%2.8f, bw=%lf || grp=%d\n",
user_config->n_resizes, user_config->n_stages, user_config->granularity, user_config->sdr, user_config->adr,
user_config->at, user_config->sm, user_config->ss, user_config->latency_m, user_config->bw_m, grp);
for(i=0; i<user_config->n_stages; i++) {
......@@ -237,7 +239,7 @@ void print_config_group(configuration *user_config, int grp) {
sons = user_config->procs[grp+1];
}
printf("Config: granularity=%d, sdr=%d, adr=%d, at=%d, sm=%d, ss=%d, latency=%lf, bw=%lf\n",
printf("Config: granularity=%d, SDR=%d, ADR=%d, AT=%d, SM=%d, SS=%d, latency=%2.8f, bw=%lf\n",
user_config->granularity, user_config->sdr, user_config->adr, user_config->at, user_config->sm, user_config->ss, user_config->latency_m, user_config->bw_m);
for(i=0; i<user_config->n_stages; i++) {
printf("Stage %d: PT=%d, T_stage=%lf, bytes=%d, Intercept=%lf, Slope=%lf\n",
......@@ -331,7 +333,7 @@ void recv_config_file(int root, MPI_Comm intercomm, configuration **config_file_
MPI_Type_free(&config_type_array);
MPI_Type_free(&iter_stage_type);
init_config_stages(config_file, config_file->n_stages); // Inicializar a NULL vectores
init_config_stages(config_file); // Inicializar a NULL vectores
*config_file_out = config_file;
}
......
......@@ -5,45 +5,7 @@
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "../malleability/distribution_methods/block_distribution.h"
typedef struct
{
int pt; // Procedure type
float t_stage; // Time to complete the stage
double t_op;
int operations;
int bytes, real_bytes, my_bytes;
// Variables to represent linear regresion
// for collective calls.
double slope, intercept;
// Arrays to communicate data;
char* array, *full_array;
double* double_array;
// Arrays to indicate how many bytes are received from each rank
//int *counts, *displs;
struct Counts counts;
} iter_stage_t;
typedef struct
{
int n_resizes, n_stages;
int actual_resize, actual_stage;
int granularity, sdr, adr;
int sm, ss;
int at;
double latency_m, bw_m;
int *iters, *procs, *phy_dist;
float *factors;
iter_stage_t *stages;
} configuration;
#include "../Main/Main_datatypes.h"
configuration *read_ini_file(char *file_name);
void free_config(configuration *user_config);
......
......@@ -136,6 +136,28 @@ void compute_results_iter(results_data *results, int myId, int root, MPI_Comm co
MPI_Reduce(results->iters_time, NULL, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
}
/*
* Obtiene para cada stage de cada iteracion, el tiempo maximo entre todos los procesos
* que han participado.
*
* Es necesario obtener el maximo, pues es el que representa el tiempo real
* que se ha utilizado.
*/
void compute_results_stages(results_data *results, int myId, int root, int stages, MPI_Comm comm) {
int i;
if(myId == root) {
for(i=0; i<stages; i++) {
MPI_Reduce(MPI_IN_PLACE, results->stage_times[i], results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
}
}
else {
for(i=0; i<stages; i++) {
MPI_Reduce(results->stage_times[i], NULL, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
}
}
}
//======================================================||
//======================================================||
//===============PRINT RESULTS FUNCTIONS================||
......@@ -147,15 +169,30 @@ void compute_results_iter(results_data *results, int myId, int root, MPI_Comm co
* Estos son los relacionados con las iteraciones, que son el tiempo
* por iteracion, el tipo (Normal o durante communicacion asincrona).
*/
void print_iter_results(results_data results, int last_normal_iter_index) {
void print_iter_results(results_data results) {
int i;
printf("Titer: ");
printf("T_iter: ");
for(i=0; i< results.iter_index; i++) {
printf("%lf ", results.iters_time[i]);
}
printf("\nTtype: %d\n", results.iters_async);
printf("\nAsync_Iters: %d\n", results.iters_async);
}
/*
* Imprime por pantalla los resultados locales de un stage.
*/
void print_stage_results(results_data results, int n_stages) {
int i, j;
for(i=0; i<n_stages; i++) {
printf("T_stage %d: ", i);
for(j=0; j< results.iter_index; j++) {
printf("%lf ", results.stage_times[i][j]);
}
printf("\n");
}
}
/*
......@@ -166,27 +203,27 @@ void print_iter_results(results_data results, int last_normal_iter_index) {
void print_global_results(results_data results, int resizes) {
int i;
printf("Tspawn: "); // FIXME REFACTOR Cambiar nombre a T_resize_real
printf("T_spawn: "); // FIXME REFACTOR Cambiar nombre a T_resize_real
for(i=0; i< resizes - 1; i++) {
printf("%lf ", results.spawn_time[i]);
}
printf("\nTspawn_real: "); // FIXME REFACTOR Cambiar nombre a T_resize
printf("\nT_spawn_real: "); // FIXME REFACTOR Cambiar nombre a T_resize
for(i=0; i< resizes - 1; i++) {
printf("%lf ", results.spawn_real_time[i]);
}
printf("\nTsync: ");
printf("\nT_SR: ");
for(i=1; i < resizes; i++) {
printf("%lf ", results.sync_time[i]);
}
printf("\nTasync: ");
printf("\nT_AR: ");
for(i=1; i < resizes; i++) {
printf("%lf ", results.async_time[i]);
}
printf("\nTex: %lf\n", results.exec_time);
printf("\nT_total: %lf\n", results.exec_time);
}
//======================================================||
......@@ -201,7 +238,8 @@ void print_global_results(results_data results, int resizes) {
* Los argumentos "resizes" y "iters_size" se necesitan para obtener el tamaño
* de los vectores de resultados.
*/
void init_results_data(results_data *results, int resizes, int iters_size) {
void init_results_data(results_data *results, int resizes, int stages, int iters_size) {
int i;
results->spawn_time = calloc(resizes, sizeof(double));
results->spawn_real_time = calloc(resizes, sizeof(double));
......@@ -211,15 +249,25 @@ void init_results_data(results_data *results, int resizes, int iters_size) {
results->iters_size = iters_size + 100;
results->iters_time = calloc(iters_size + 100, sizeof(double)); //FIXME Numero magico
results->stage_times = malloc(stages * sizeof(double*)); //FIXME Numero magico
for(i=0; i<stages; i++) {
results->stage_times[i] = calloc(iters_size + 100, sizeof(double)); //FIXME Numero magico
}
results->iters_async = 0;
results->iter_index = 0;
}
void realloc_results_iters(results_data *results, int needed) {
void realloc_results_iters(results_data *results, int stages, int needed) {
int i;
double *time_aux;
time_aux = (double *) realloc(results->iters_time, needed * sizeof(double));
for(i=0; i<stages; i++) { //TODO Comprobar que no da error el realloc
results->stage_times[i] = (double *) realloc(results->stage_times[i], needed * sizeof(double));
}
if(time_aux == NULL) {
fprintf(stderr, "Fatal error - No se ha podido realojar la memoria de resultados\n");
MPI_Abort(MPI_COMM_WORLD, 1);
......@@ -231,7 +279,8 @@ void realloc_results_iters(results_data *results, int needed) {
/*
* Libera toda la memoria asociada con una estructura de resultados.
*/
void free_results_data(results_data *results) {
void free_results_data(results_data *results, int stages) {
int i;
if(results != NULL) {
free(results->spawn_time);
free(results->spawn_real_time);
......@@ -239,5 +288,9 @@ void free_results_data(results_data *results) {
free(results->async_time);
free(results->iters_time);
for(i=0; i<stages; i++) {
free(results->stage_times[i]);
}
free(results->stage_times);
}
}
......@@ -9,7 +9,7 @@
typedef struct {
// Iters data
double *iters_time;
double *iters_time, **stage_times;
int iters_async, iter_index, iters_size;
// Spawn, Thread, Sync, Async and Exec time
......@@ -27,11 +27,14 @@ void set_results_post_reconfig(results_data *results, int grp, int sdr, int adr)
void reset_results_index(results_data *results);
void compute_results_iter(results_data *results, int myId, int root, MPI_Comm comm);
void compute_results_stages(results_data *results, int myId, int root, int n_stages, MPI_Comm comm);
void print_iter_results(results_data results, int last_normal_iter_index);
void print_iter_results(results_data results);
void print_stage_results(results_data results, int n_stages);
void print_global_results(results_data results, int resizes);
void init_results_data(results_data *results, int resizes, int iters_size);
void realloc_results_iters(results_data *results, int needed);
void free_results_data(results_data *results);
void init_results_data(results_data *results, int resizes, int stages, int iters_size);
void realloc_results_iters(results_data *results, int stages, int needed);
void free_results_data(results_data *results, int stages);
#endif
......@@ -224,14 +224,17 @@ int work() {
* que dura al menos un tiempo de "time" segundos.
*/
double iterate(double *matrix, int n, int async_comm, int iter) {
double start_time, actual_time;
double start_time, start_time_stage, actual_time, *times_stages;
int i, cnt_async = 0;
double aux = 0;
times_stages = malloc(config_file->n_stages * sizeof(double));
start_time = MPI_Wtime();
for(i=0; i < config_file->n_stages; i++) {
start_time_stage = MPI_Wtime();
aux+= process_stage(*config_file, config_file->stages[i], *group, comm);
times_stages[i] = MPI_Wtime() - start_time_stage;
}
actual_time = MPI_Wtime(); // Guardar tiempos
......@@ -241,12 +244,17 @@ double iterate(double *matrix, int n, int async_comm, int iter) {
}
if(results->iter_index == results->iters_size) { // Aumentar tamaño de ambos vectores de resultados
realloc_results_iters(results, results->iters_size + 100);
realloc_results_iters(results, config_file->n_stages, results->iters_size + 100);
}
results->iters_time[results->iter_index] = actual_time - start_time;
for(i=0; i < config_file->n_stages; i++) {
results->stage_times[i][results->iter_index] = times_stages[i];
}
results->iters_async += cnt_async;
results->iter_index = results->iter_index + 1;
free(times_stages);
return aux;
}
......@@ -292,7 +300,8 @@ int print_local_results() {
create_out_file(file_name, &ptr_local, 1);
print_config_group(config_file, group->grp);
print_iter_results(*results, config_file->iters[group->grp] - 1);
print_iter_results(*results);
print_stage_results(*results, config_file->n_stages);
free(file_name);
fflush(stdout);
......@@ -366,7 +375,7 @@ void init_application() {
config_file = read_ini_file(group->argv[1]);
results = malloc(sizeof(results_data));
init_results_data(results, config_file->n_resizes, config_file->iters[group->grp]);
init_results_data(results, config_file->n_resizes, config_file->n_stages, config_file->iters[group->grp]);
if(config_file->sdr) {
malloc_comm_array(&(group->sync_array), config_file->sdr , group->myId, group->numP);
}
......@@ -375,12 +384,11 @@ void init_application() {
}
int message_tam = 100000000;
message_tam = 10240000;
//for(int i=0; i<10; i++) {
for(int i=0; i<3; i++) {
config_file->latency_m = latency(group->myId, group->numP, comm);
config_file->bw_m = bandwidth(group->myId, group->numP, comm, config_file->latency_m, message_tam);
//if(group->myId == ROOT) printf("numP=%d Lat=%lf Bw=%lf\n", group->numP, config_file->latency_m, config_file->bw_m);
//}
}
obtain_op_times(1);
}
......@@ -416,12 +424,12 @@ void free_application_data() {
}
free_malleability();
free_config(config_file);
if(group->grp == 0) { //FIXME Revisar porque cuando es diferente a 0 no funciona
free_results_data(results);
free_results_data(results, config_file->n_stages);
free(results);
}
free_config(config_file);
free(group);
}
......
......@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include "../malleability/distribution_methods/block_distribution.h"
#define ROOT 0
......@@ -23,4 +24,42 @@ typedef struct {
char *sync_array, *async_array;
} group_data;
typedef struct
{
int pt; // Procedure type
float t_stage; // Time to complete the stage
double t_op;
int operations;
int bytes, real_bytes, my_bytes;
// Variables to represent linear regresion
// for collective calls.
double slope, intercept;
// Arrays to communicate data;
char* array, *full_array;
double* double_array;
// Arrays to indicate how many bytes are received from each rank
//int *counts, *displs;
struct Counts counts;
} iter_stage_t;
typedef struct
{
int n_resizes, n_stages;
int actual_resize, actual_stage;
int granularity, sdr, adr;
int sm, ss;
int at;
double latency_m, bw_m;
int *iters, *procs, *phy_dist;
float *factors;
iter_stage_t *stages;
} configuration;
#endif
objects1 := computing_func comunication_func linear_reg
objects2 := process_stage
objects3 := Main
depends := Main_datatypes
#Pasar nombre a level -- Los objects/headers/source añadir sufijo
DISTRI_LOC = $(TOP)/malleability/distribution_methods/block_distribution.h
MALLEABILITY_DEPENDS = $(TOP)/malleability/CommDist.h $(TOP)/malleability/malleabilityStates.h $(TOP)/malleability/malleabilityManager.h
DEPENDS := Main_datatypes.h
CC := gcc
MCC := mpicc
CFLAGS := -Wall
CFLAGS := -Wall -Wextra
all: $(objects1) $(objects2) $(objects3)
$(objects1): %: %.c %.h $(depends).h
$(objects1): %: %.c %.h $(DEPENDS)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects2): %: %.c %.h $(objects1).h $(depends).h $(TOP)/IOcodes/read_ini.h \
$(TOP)/malleability/distribution_methods/block_distribution.h
$(objects2): %: %.c %.h $(objects1).h $(DEPENDS) $(DISTRI_LOC)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects3): %: %.c $(objects2).h $(depends).h $(TOP)/IOcodes/read_ini.h $(TOP)/IOcodes/results.h \
$(TOP)/malleability/CommDist.h $(TOP)/malleability/malleabilityStates.h \
$(TOP)/malleability/malleabilityManager.h
$(objects3): %: %.c $(objects2).h $(DEPENDS) $(TOP)/IOcodes/read_ini.h $(TOP)/IOcodes/results.h $(MALLEABILITY_DEPENDS)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
......@@ -7,7 +7,6 @@
#include "linear_reg.h"
#include "Main_datatypes.h"
#include "process_stage.h"
//#include "../malleability/malleabilityManager.h" //FIXME Refactor
#include "../malleability/distribution_methods/block_distribution.h"
void linear_regression_stage(iter_stage_t *stage, group_data group, MPI_Comm comm);
......@@ -129,16 +128,16 @@ double latency(int myId, int numP, MPI_Comm comm) {
MPI_Barrier(comm);
start_time = MPI_Wtime();
if(myId == 0) {
if(myId == ROOT) {
for(i=0; i<loop_count; i++){
MPI_Send(&aux, 0, MPI_CHAR, numP-1, 99, comm);
}
MPI_Recv(&aux, 0, MPI_CHAR, numP-1, 99, comm, MPI_STATUS_IGNORE);
} else if(myId+1 == numP) {
for(i=0; i<loop_count; i++){
MPI_Recv(&aux, 0, MPI_CHAR, 0, 99, comm, MPI_STATUS_IGNORE);
MPI_Recv(&aux, 0, MPI_CHAR, ROOT, 99, comm, MPI_STATUS_IGNORE);
}
MPI_Send(&aux, 0, MPI_CHAR, 0, 99, comm);
MPI_Send(&aux, 0, MPI_CHAR, ROOT, 99, comm);
}
MPI_Barrier(comm);
stop_time = MPI_Wtime();
......@@ -167,16 +166,16 @@ double bandwidth(int myId, int numP, MPI_Comm comm, double latency, int n) {
MPI_Barrier(comm);
start_time = MPI_Wtime();
if(myId == 0) {
if(myId == ROOT) {
for(i=0; i<loop_count; i++){
MPI_Send(aux, n, MPI_CHAR, numP-1, 99, comm);
}
MPI_Recv(aux, 0, MPI_CHAR, numP-1, 99, comm, MPI_STATUS_IGNORE);
} else if(myId+1 == numP) {
for(i=0; i<loop_count; i++){
MPI_Recv(aux, n, MPI_CHAR, 0, 99, comm, MPI_STATUS_IGNORE);
MPI_Recv(aux, n, MPI_CHAR, ROOT, 99, comm, MPI_STATUS_IGNORE);
}
MPI_Send(aux, 0, MPI_CHAR, 0, 99, comm);
MPI_Send(aux, 0, MPI_CHAR, ROOT, 99, comm);
}
MPI_Barrier(comm);
stop_time = MPI_Wtime();
......@@ -206,6 +205,9 @@ void linear_regression_stage(iter_stage_t *stage, group_data group, MPI_Comm com
}
}
// TODO Calcular solo si no se ha calculado para otra fase.
// Si se ha calculado antes, copiar esos valores
switch(stage->pt) {
//Comunicaciones
case COMP_BCAST:
......@@ -239,7 +241,11 @@ void linear_regression_stage(iter_stage_t *stage, group_data group, MPI_Comm com
}
printf("\n");
*/
//if(stage->t_stage < 0.1) {
//lr_compute(8*loop_iters, bytes, times, &(stage->slope), &(stage->intercept));
//} else {
lr_compute(tam, bytes, times, &(stage->slope), &(stage->intercept));
//}
} else {
MPI_Reduce(times, NULL, LR_ARRAY_TAM * loop_iters, MPI_DOUBLE, MPI_MAX, ROOT, comm);
}
......
......@@ -5,8 +5,6 @@
#include <stdio.h>
#include <mpi.h>
#include "Main_datatypes.h"
//#include "../malleability/malleabilityManager.h" //FIXME Refactor
#include "../IOcodes/read_ini.h"
enum compute_methods{COMP_PI, COMP_MATRIX, COMP_POINT, COMP_BCAST, COMP_ALLGATHER, COMP_REDUCE, COMP_ALLREDUCE};
......
export TOP := $(dir $(CURDIR)/$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
BUILD := build
EXEC := exec
EXECDIR := $(addprefix $(TOP),$(EXEC))
export BUILDDIR = $(addprefix $(TOP),$(BUILD))
SUBDIRS := IOcodes Main malleability
.PHONY: subdirs $(SUBDIRS) build all
.PHONY: subdirs $(SUBDIRS) build all clean clear
#
#
#
#
CC := gcc
MCC := mpicc
CFLAGS := -Wall
CFLAGS := -Wall -Wextra
LIBFLAGS := -lm -lslurm -pthread
#
#
......@@ -18,19 +21,25 @@ LIBFLAGS := -lm -lslurm -pthread
#
all: subdirs exec
install:
#runTests
exec: subdirs
$(MCC) $(CFLAGS) -o test.out $(wildcard $(BUILDDIR)/*.o) $(LIBFLAGS)
mkdir -p $(EXECDIR) all
$(MCC) $(CFLAGS) -o $(EXECDIR)/a.out $(wildcard $(BUILDDIR)/*.o) $(LIBFLAGS)
subdirs: $(SUBDIRS)
$(SUBDIRS): | $(BUILD)
$(MAKE) -C $@
# Orden de compilacion para las carpetas
# Carpeta en la que almacenar los compilados
# Carpeta en la que almacenar los compilados y los ejecutables
$(BUILD):
mkdir -p $(BUILD)
mkdir -p $(BUILDDIR)
clean:
-rm -rf $(BUILD)
-rm $(BUILDDIR)/*.o
clear:
-rm -rf $(BUILDDIR)
-rm -rf $(EXECDIR)
......@@ -5,26 +5,6 @@
#include "distribution_methods/block_distribution.h"
#include "CommDist.h"
/*
struct Dist_data {
int ini; //Primer elemento a enviar
int fin; //Ultimo elemento a enviar
int tamBl; // Total de elementos
int qty; // Total number of rows of the full disperse matrix
int myId;
int numP;
MPI_Comm intercomm;
};
struct Counts {
int *counts;
int *displs;
int *zero_arr;
};
*/
void send_sync_arrays(struct Dist_data dist_data, char *array, int root, int numP_child, int idI, int idE, struct Counts counts);
void recv_sync_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts);
......@@ -34,16 +14,7 @@ void recv_async_arrays(struct Dist_data dist_data, char *array, int root, int nu
void send_async_point_arrays(struct Dist_data dist_data, char *array, int rootBcast, int numP_child, int idI, int idE, struct Counts counts, MPI_Request *comm_req);
void recv_async_point_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts, MPI_Request *comm_req);
// DIST FUNCTIONS
void get_dist(int qty, int id, int numP, struct Dist_data *dist_data);
void set_counts(int id, int numP, struct Dist_data data_dist, int *sendcounts);
void getIds_intercomm(struct Dist_data dist_data, int numP_other, int **idS);
/*
void mallocCounts(struct Counts *counts, int numP);
void freeCounts(struct Counts *counts);
void print_counts(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, const char* name);
*/
/*
* Reserva memoria para un vector de hasta "qty" elementos.
* Los "qty" elementos se disitribuyen entre los "numP" procesos
......@@ -52,7 +23,7 @@ void print_counts(struct Dist_data data_dist, int *xcounts, int *xdispls, int si
void malloc_comm_array(char **array, int qty, int myId, int numP) {
struct Dist_data dist_data;
get_dist(qty, myId, numP, &dist_data);
get_block_dist(qty, myId, numP, &dist_data);
if( (*array = malloc(dist_data.tamBl * sizeof(char))) == NULL) {
printf("Memory Error (Malloc Arrays(%d))\n", dist_data.tamBl);
exit(1);
......@@ -88,7 +59,7 @@ int send_sync(char *array, int qty, int myId, int numP, int root, MPI_Comm inter
if(myId == root) rootBcast = MPI_ROOT;
get_dist(qty, myId, numP, &dist_data); // Distribucion de este proceso en su grupo
get_block_dist(qty, myId, numP, &dist_data); // Distribucion de este proceso en su grupo
dist_data.intercomm = intercomm;
// Create arrays which contains info about how many elements will be send to each created process
......@@ -118,7 +89,7 @@ void recv_sync(char **array, int qty, int myId, int numP, int root, MPI_Comm int
struct Dist_data dist_data;
// Obtener distribución para este hijo
get_dist(qty, myId, numP, &dist_data);
get_block_dist(qty, myId, numP, &dist_data);
*array = malloc(dist_data.tamBl * sizeof(char));
//(*array)[dist_data.tamBl] = '\0';
dist_data.intercomm = intercomm;
......@@ -142,7 +113,8 @@ void recv_sync(char **array, int qty, int myId, int numP, int root, MPI_Comm int
*/
void send_sync_arrays(struct Dist_data dist_data, char *array, int rootBcast, int numP_child, int idI, int idE, struct Counts counts) {
int i;
prepare_comm_alltoall(dist_data.myId, dist_data.numP, numP_child, dist_data.qty, &counts);
/*int i;
// PREPARAR ENVIO DEL VECTOR
if(idI == 0) {
set_counts(0, numP_child, dist_data, counts.counts);
......@@ -151,7 +123,8 @@ void send_sync_arrays(struct Dist_data dist_data, char *array, int rootBcast, in
for(i=idI; i<idE; i++) {
set_counts(i, numP_child, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
}
}*/
//print_counts(dist_data, counts.counts, counts.displs, numP_child, "Padres");
/* COMUNICACION DE DATOS */
MPI_Alltoallv(array, counts.counts, counts.displs, MPI_CHAR, NULL, counts.zero_arr, counts.zero_arr, MPI_CHAR, dist_data.intercomm);
......@@ -164,10 +137,11 @@ void send_sync_arrays(struct Dist_data dist_data, char *array, int rootBcast, in
*/
void recv_sync_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts) {
int i;
char aux;
prepare_comm_alltoall(dist_data.myId, dist_data.numP, numP_parents, dist_data.qty, &counts);
// Ajustar los valores de recepcion
/*
if(idI == 0) {
set_counts(0, numP_parents, dist_data, counts.counts);
idI++;
......@@ -175,7 +149,7 @@ void recv_sync_arrays(struct Dist_data dist_data, char *array, int root, int num
for(i=idI; i<idE; i++) {
set_counts(i, numP_parents, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
}
}*/
//print_counts(dist_data, counts.counts, counts.displs, numP_parents, "Hijos");
/* COMUNICACION DE DATOS */
......@@ -206,7 +180,7 @@ int send_async(char *array, int qty, int myId, int numP, int root, MPI_Comm inte
if(myId == root) rootBcast = MPI_ROOT;
get_dist(qty, myId, numP, &dist_data); // Distribucion de este proceso en su grupo
get_block_dist(qty, myId, numP, &dist_data); // Distribucion de este proceso en su grupo
dist_data.intercomm = intercomm;
// Create arrays which contains info about how many elements will be send to each created process
......@@ -259,7 +233,7 @@ void recv_async(char **array, int qty, int myId, int numP, int root, MPI_Comm in
MPI_Request *comm_req, aux;
// Obtener distribución para este hijo
get_dist(qty, myId, numP, &dist_data);
get_block_dist(qty, myId, numP, &dist_data);
*array = malloc(dist_data.tamBl * sizeof(char));
dist_data.intercomm = intercomm;
......@@ -308,8 +282,9 @@ void recv_async(char **array, int qty, int myId, int numP, int root, MPI_Comm in
* El envio se realiza a partir de una comunicación colectiva.
*/
void send_async_arrays(struct Dist_data dist_data, char *array, int rootBcast, int numP_child, int idI, int idE, struct Counts counts, MPI_Request *comm_req) {
int i;
prepare_comm_alltoall(dist_data.myId, dist_data.numP, numP_child, dist_data.qty, &counts);
/*
// PREPARAR ENVIO DEL VECTOR
if(idI == 0) {
set_counts(0, numP_child, dist_data, counts.counts);
......@@ -319,6 +294,7 @@ void send_async_arrays(struct Dist_data dist_data, char *array, int rootBcast, i
set_counts(i, numP_child, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
}
*/
//print_counts(dist_data, counts.counts, counts.displs, numP_child, "Padres");
/* COMUNICACION DE DATOS */
......@@ -334,8 +310,9 @@ void send_async_arrays(struct Dist_data dist_data, char *array, int rootBcast, i
*/
void send_async_point_arrays(struct Dist_data dist_data, char *array, int rootBcast, int numP_child, int idI, int idE, struct Counts counts, MPI_Request *comm_req) {
int i;
prepare_comm_alltoall(dist_data.myId, dist_data.numP, numP_child, dist_data.qty, &counts);
// PREPARAR ENVIO DEL VECTOR
/*
if(idI == 0) {
set_counts(0, numP_child, dist_data, counts.counts);
idI++;
......@@ -345,6 +322,11 @@ void send_async_point_arrays(struct Dist_data dist_data, char *array, int rootBc
set_counts(i, numP_child, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
MPI_Isend(array+counts.displs[i], counts.counts[i], MPI_CHAR, i, 99, dist_data.intercomm, &(comm_req[i]));
}*/
for(i=0; i<numP_child; i++) { //TODO Esta propuesta ya no usa el IdI y Ide
if(counts.counts[0] != 0) {
MPI_Isend(array+counts.displs[i], counts.counts[i], MPI_CHAR, i, 99, dist_data.intercomm, &(comm_req[i]));
}
}
//print_counts(dist_data, counts.counts, counts.displs, numP_child, "Padres");
}
......@@ -357,10 +339,11 @@ void send_async_point_arrays(struct Dist_data dist_data, char *array, int rootBc
* La recepcion se realiza a partir de una comunicacion colectiva.
*/
void recv_async_arrays(struct Dist_data dist_data, char *array, int root, int numP_parents, int idI, int idE, struct Counts counts, MPI_Request *comm_req) {
int i;
char *aux = malloc(1);
// Ajustar los valores de recepcion
prepare_comm_alltoall(dist_data.myId, dist_data.numP, numP_parents, dist_data.qty, &counts);
/*
if(idI == 0) {
set_counts(0, numP_parents, dist_data, counts.counts);
idI++;
......@@ -368,7 +351,7 @@ void recv_async_arrays(struct Dist_data dist_data, char *array, int root, int nu
for(i=idI; i<idE; i++) {
set_counts(i, numP_parents, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
}
}*/
//print_counts(dist_data, counts.counts, counts.displs, numP_parents, "Hijos");
/* COMUNICACION DE DATOS */
......@@ -387,6 +370,8 @@ void recv_async_point_arrays(struct Dist_data dist_data, char *array, int root,
int i;
// Ajustar los valores de recepcion
prepare_comm_alltoall(dist_data.myId, dist_data.numP, numP_parents, dist_data.qty, &counts);
/*
if(idI == 0) {
set_counts(0, numP_parents, dist_data, counts.counts);
idI++;
......@@ -396,6 +381,11 @@ void recv_async_point_arrays(struct Dist_data dist_data, char *array, int root,
set_counts(i, numP_parents, dist_data, counts.counts);
counts.displs[i] = counts.displs[i-1] + counts.counts[i-1];
MPI_Irecv(array+counts.displs[i], counts.counts[i], MPI_CHAR, i, 99, dist_data.intercomm, &(comm_req[i])); //FIXME BUffer recv
}*/
for(i=0; i<numP_parents; i++) { //TODO Esta propuesta ya no usa el IdI y Ide
if(counts.counts[0] != 0) {
MPI_Irecv(array+counts.displs[i], counts.counts[i], MPI_CHAR, i, 99, dist_data.intercomm, &(comm_req[i])); //FIXME BUffer recv
}
}
//print_counts(dist_data, counts.counts, counts.displs, numP_parents, "Hijos");
}
......@@ -408,71 +398,6 @@ void recv_async_point_arrays(struct Dist_data dist_data, char *array, int root,
* ========================================================================================
*/
/*
* Obatains for "Id" and "numP", how many
* rows and elements per row will have process "Id"
* and fills the results in a Dist_data struct
*/
void get_dist(int qty, int id, int numP, struct Dist_data *dist_data) {
int rem;
dist_data->myId = id;
dist_data->numP = numP;
dist_data->qty = qty;
dist_data->tamBl = qty / numP;
rem = qty % numP;
if(id < rem) { // First subgroup
dist_data->ini = id * dist_data->tamBl + id;
dist_data->fin = (id+1) * dist_data->tamBl + (id+1);
} else { // Second subgroup
dist_data->ini = id * dist_data->tamBl + rem;
dist_data->fin = (id+1) * dist_data->tamBl + rem;
}
if(dist_data->fin > qty) {
dist_data->fin = qty;
}
if(dist_data->ini > dist_data->fin) {
dist_data->ini = dist_data->fin;
}
dist_data->tamBl = dist_data->fin - dist_data->ini;
}
/*
* Obtiene para el Id de un proceso dado, cuantos elementos
* enviara o recibira desde el proceso indicado en Dist_data.
*/
void set_counts(int id, int numP, struct Dist_data data_dist, int *sendcounts) {
struct Dist_data other;
int biggest_ini, smallest_end;
get_dist(data_dist.qty, id, numP, &other);
// Si el rango de valores no coincide, se pasa al siguiente proceso
if(data_dist.ini >= other.fin || data_dist.fin <= other.ini) {
return;
}
// Obtiene el proceso con mayor ini entre los dos procesos
if(data_dist.ini > other.ini) {
biggest_ini = data_dist.ini;
} else {
biggest_ini = other.ini;
}
// Obtiene el proceso con menor fin entre los dos procesos
if(data_dist.fin < other.fin) {
smallest_end = data_dist.fin;
} else {
smallest_end = other.fin;
}
sendcounts[id] = smallest_end - biggest_ini; // Numero de elementos a enviar/recibir del proceso Id
}
/*
* Obtiene para un proceso de un grupo a que rango procesos de
* otro grupo tiene que enviar o recibir datos.
......
dir_targets := distribution_methods
objects1 := CommDist
objects2 := malleabilityTypes malleabilityZombies ProcessDist
objects3 := malleabilityManager
depends := $(addsuffix .h, malleabilityDataStructures malleabilityStates)
objects2 := malleabilityTypes ProcessDist
objects3 := malleabilityZombies
objects4 := malleabilityManager
DEPENDS := $(addsuffix .h, malleabilityDataStructures malleabilityStates)
CC := gcc
MCC := mpicc
CFLAGS := -Wall
CFLAGS := -Wall -Wextra
.PHONY: $(dir_targets) subdir
all: subdir $(objects1) $(objects2) $(objects3)
all: subdir $(objects1) $(objects2) $(objects3) $(objects4)
subdir: $(dir_targets)
$(dir_targets): %:
$(MAKE) -C $@
$(objects1): %: %.c %.h $(depends) $(dir_targets)/block_distribution.h
$(objects1): %: %.c %.h $(DEPENDS) $(dir_targets)/block_distribution.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects2): %: %.c %.h $(depends) $(TOP)/IOcodes/results.h
$(objects2): %: %.c %.h $(DEPENDS)
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
$(objects3): %: %.c %.h $(objects1).h $(objects2).h $(depends) \
$(TOP)/IOcodes/read_ini.h $(TOP)/IOcodes/results.h
$(objects3): %: %.c %.h $(DEPENDS) $(TOP)/IOcodes/results.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
#$(objects1) $(objects2) $(objects3)
# CommDist.c
# malleabilityTypes.c malleabilityZombies.c ProcessDist.c
#
# malleabilityManager.c
# malleabilityDataStructures.h malleabilityStates.h
$(objects4): %: %.c %.h $(objects1).h $(objects2).h $(objects3).h $(DEPENDS) \
$(TOP)/IOcodes/read_ini.h $(TOP)/IOcodes/results.h $(TOP)/Main/Main_datatypes.h
$(MCC) $(CFLAGS) -c -o $(BUILDDIR)/$@.o $<
objects1 := block_distribution
CC := gcc
MCC := mpicc
CFLAGS := -Wall
CFLAGS := -Wall -Wextra
all: $(objects1)
......
......@@ -23,7 +23,7 @@ int shrink_redistribution();
int thread_creation();
int thread_check();
void* thread_async_work(void* void_arg);
void* thread_async_work();
typedef struct {
int spawn_type;
......@@ -424,7 +424,7 @@ void Children_init() {
recv_config_file(mall->root, mall->intercomm, &(mall_conf->config_file));
mall_conf->results = (results_data *) malloc(sizeof(results_data));
init_results_data(mall_conf->results, mall_conf->config_file->n_resizes, RESULTS_INIT_DATA_QTY);
init_results_data(mall_conf->results, mall_conf->config_file->n_resizes, mall_conf->config_file->n_stages, RESULTS_INIT_DATA_QTY);
if(dist_a_data->entries || rep_a_data->entries) { // Recibir datos asincronos
comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_CHILDREN, mall->myId, root_parents, mall->intercomm);
......@@ -780,7 +780,7 @@ int thread_check() {
* Cuando termina la comunicación la hebra maestra puede comprobarlo
* por el valor "commAsync".
*/
void* thread_async_work(void* void_arg) {
void* thread_async_work() {
send_data(mall->numC, dist_a_data, MALLEABILITY_USE_SYNCHRONOUS);
state = MAL_DIST_COMPLETED;
pthread_exit(NULL);
......
......@@ -9,6 +9,7 @@
#include <mpi.h>
#include "../IOcodes/read_ini.h"
#include "../IOcodes/results.h"
#include "../Main/Main_datatypes.h"
#include "malleabilityStates.h"
int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment