Commit 17c190f7 authored by iker_martin's avatar iker_martin
Browse files

Modificando recogida de tiempos. WIP. Se va a realizar un traslado del...

Modificando recogida de tiempos. WIP. Se va a realizar un traslado del proyecto a GitLab del grupo HPC&A
parent afb37003
...@@ -110,11 +110,15 @@ void reset_results_index(results_data *results) { ...@@ -110,11 +110,15 @@ void reset_results_index(results_data *results) {
* Es necesario obtener el maximo, pues es el que representa el tiempo real * Es necesario obtener el maximo, pues es el que representa el tiempo real
* que se ha utilizado. * que se ha utilizado.
*/ */
void compute_results_iter(results_data *results, int myId, int root, MPI_Comm comm) { void compute_results_iter(results_data *results, int myId, int numP, int root, MPI_Comm comm) { //TODO Probar a quedarse la MEDIA en vez de MAX?
if(myId == root) if(myId == root) {
MPI_Reduce(MPI_IN_PLACE, results->iters_time, (int) results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm); MPI_Reduce(MPI_IN_PLACE, results->iters_time, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
else for(size_t i=0; i<results->iter_index; i++) {
MPI_Reduce(results->iters_time, NULL, (int) results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm); results->iters_time[i] = results->iters_time[i] / numP;
}
} else {
MPI_Reduce(results->iters_time, NULL, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
}
} }
...@@ -125,16 +129,19 @@ void compute_results_iter(results_data *results, int myId, int root, MPI_Comm co ...@@ -125,16 +129,19 @@ void compute_results_iter(results_data *results, int myId, int root, MPI_Comm co
* Es necesario obtener el maximo, pues es el que representa el tiempo real * Es necesario obtener el maximo, pues es el que representa el tiempo real
* que se ha utilizado. * que se ha utilizado.
*/ */
void compute_results_stages(results_data *results, int myId, int root, int stages, MPI_Comm comm) { void compute_results_stages(results_data *results, int myId, int numP, int root, int stages, MPI_Comm comm) { //TODO Probar a quedarse la MEDIA en vez de MAX?
int i; int i;
if(myId == root) { if(myId == root) {
for(i=0; i<stages; i++) { for(i=0; i<stages; i++) {
MPI_Reduce(MPI_IN_PLACE, results->stage_times[i], (int) results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm); MPI_Reduce(MPI_IN_PLACE, results->stage_times[i], results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
for(size_t j=0; j<results->iter_index; j++) {
results->stage_times[i][j] = results->stage_times[i][j] / numP;
}
} }
} }
else { else {
for(i=0; i<stages; i++) { for(i=0; i<stages; i++) {
MPI_Reduce(results->stage_times[i], NULL, (int) results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm); MPI_Reduce(results->stage_times[i], NULL, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
} }
} }
} }
......
...@@ -25,8 +25,8 @@ void comm_results(results_data *results, int root, size_t resizes, MPI_Comm inte ...@@ -25,8 +25,8 @@ void comm_results(results_data *results, int root, size_t resizes, MPI_Comm inte
void set_results_post_reconfig(results_data *results, int grp, int sdr, int adr); void set_results_post_reconfig(results_data *results, int grp, int sdr, int adr);
void reset_results_index(results_data *results); void reset_results_index(results_data *results);
void compute_results_iter(results_data *results, int myId, int root, MPI_Comm comm); void compute_results_iter(results_data *results, int myId, int numP, int root, MPI_Comm comm);
void compute_results_stages(results_data *results, int myId, int root, int n_stages, MPI_Comm comm); void compute_results_stages(results_data *results, int myId, int numP, int root, int n_stages, MPI_Comm comm);
void print_iter_results(results_data results); void print_iter_results(results_data results);
void print_stage_results(results_data results, size_t n_stages); void print_stage_results(results_data results, size_t n_stages);
......
...@@ -41,7 +41,7 @@ int main(int argc, char *argv[]) { ...@@ -41,7 +41,7 @@ int main(int argc, char *argv[]) {
//FIXME El codigo no es capaz de hacer mas de una redistribucion - Arreglar malleabilityTypes.c //FIXME El codigo no es capaz de hacer mas de una redistribucion - Arreglar malleabilityTypes.c
int num_cpus, num_nodes; //nodelist_len; //FIXME Eliminar cuando se utilice Slurm int num_cpus, num_nodes; //nodelist_len; //FIXME Eliminar cuando se utilice Slurm
char *nodelist = NULL; char *nodelist = NULL;
num_cpus = 20; //FIXME NUMERO MAGICO num_cpus = 20; //FIXME NUMERO MAGICO //TODO Usar openMP para obtener el valor con un pragma
if (argc >= 5) { if (argc >= 5) {
nodelist = argv[3]; nodelist = argv[3];
//nodelist_len = strlen(nodelist); //nodelist_len = strlen(nodelist);
...@@ -236,9 +236,9 @@ double iterate(int async_comm) { ...@@ -236,9 +236,9 @@ double iterate(int async_comm) {
times_stages_aux = malloc(config_file->n_stages * sizeof(double)); times_stages_aux = malloc(config_file->n_stages * sizeof(double));
if(config_file->rigid_times) { if(config_file->rigid_times) {
iterate_relaxed(&time, times_stages_aux); aux = iterate_rigid(&time, times_stages_aux);
} else { } else {
iterate_rigid(&time, times_stages_aux); aux = iterate_relaxed(&time, times_stages_aux);
} }
// Se esta realizando una redistribucion de datos asincrona // Se esta realizando una redistribucion de datos asincrona
...@@ -264,16 +264,16 @@ double iterate(int async_comm) { ...@@ -264,16 +264,16 @@ double iterate(int async_comm) {
/* /*
* Performs an iteration. The gathered times for iterations * Performs an iteration. The gathered times for iterations
* and stages could be imprecise in order to ensure the * and stages could be IMPRECISE in order to ensure the
* global execution time is precise. * global execution time is precise.
*/ */
double iterate_relaxed(double *time, double *times_stages) { double iterate_relaxed(double *time, double *times_stages) {
size_t i; size_t i;
double start_time, start_time_stage, aux=0; double start_time, start_time_stage, aux=0;
start_time = MPI_Wtime(); start_time = MPI_Wtime(); // Imprecise timings
for(i=0; i < config_file->n_stages; i++) { for(i=0; i < config_file->n_stages; i++) {
start_time_stage = MPI_Wtime(); start_time_stage = MPI_Wtime();
aux+= process_stage(*config_file, config_file->stages[i], *group, comm); aux+= process_stage(*config_file, config_file->stages[i], *group, comm);
times_stages[i] = MPI_Wtime() - start_time_stage; times_stages[i] = MPI_Wtime() - start_time_stage;
} }
...@@ -295,12 +295,13 @@ double iterate_rigid(double *time, double *times_stages) { ...@@ -295,12 +295,13 @@ double iterate_rigid(double *time, double *times_stages) {
start_time = MPI_Wtime(); start_time = MPI_Wtime();
for(i=0; i < config_file->n_stages; i++) { for(i=0; i < config_file->n_stages; i++) {
MPI_Barrier(comm);
start_time_stage = MPI_Wtime(); start_time_stage = MPI_Wtime();
aux+= process_stage(*config_file, config_file->stages[i], *group, comm); aux+= process_stage(*config_file, config_file->stages[i], *group, comm);
MPI_Barrier(comm);
times_stages[i] = MPI_Wtime() - start_time_stage; times_stages[i] = MPI_Wtime() - start_time_stage;
} }
MPI_Barrier(comm);
*time = MPI_Wtime() - start_time; // Guardar tiempos *time = MPI_Wtime() - start_time; // Guardar tiempos
return aux; return aux;
} }
...@@ -335,7 +336,7 @@ int print_local_results() { ...@@ -335,7 +336,7 @@ int print_local_results() {
int ptr_local, ptr_out, err; int ptr_local, ptr_out, err;
char *file_name; char *file_name;
compute_results_iter(results, group->myId, ROOT, comm); compute_results_iter(results, group->myId, group->numP, ROOT, comm);
if(group->myId == ROOT) { if(group->myId == ROOT) {
ptr_out = dup(1); ptr_out = dup(1);
......
...@@ -225,8 +225,8 @@ double init_matrix_pt(group_data group, configuration *config_file, iter_stage_t ...@@ -225,8 +225,8 @@ double init_matrix_pt(group_data group, configuration *config_file, iter_stage_t
initMatrix(&(stage->double_array), config_file->granularity); initMatrix(&(stage->double_array), config_file->granularity);
if(compute) { if(compute) {
start_time = MPI_Wtime();
if(group.myId == ROOT) { if(group.myId == ROOT) {
start_time = MPI_Wtime();
result+= process_stage(*config_file, *stage, group, comm); result+= process_stage(*config_file, *stage, group, comm);
stage->t_op = (MPI_Wtime() - start_time) / stage->operations; //Tiempo de una operacion stage->t_op = (MPI_Wtime() - start_time) / stage->operations; //Tiempo de una operacion
} }
...@@ -243,8 +243,8 @@ double init_pi_pt(group_data group, configuration *config_file, iter_stage_t *st ...@@ -243,8 +243,8 @@ double init_pi_pt(group_data group, configuration *config_file, iter_stage_t *st
result = 0; result = 0;
t_stage = stage->t_stage * config_file->groups[group.grp].factor; t_stage = stage->t_stage * config_file->groups[group.grp].factor;
if(compute) { if(compute) {
start_time = MPI_Wtime();
if(group.myId == ROOT) { if(group.myId == ROOT) {
start_time = MPI_Wtime();
result+= process_stage(*config_file, *stage, group, comm); result+= process_stage(*config_file, *stage, group, comm);
stage->t_op = (MPI_Wtime() - start_time) / stage->operations; //Tiempo de una operacion stage->t_op = (MPI_Wtime() - start_time) / stage->operations; //Tiempo de una operacion
} }
...@@ -261,6 +261,7 @@ void init_comm_ptop_pt(group_data group, configuration *config_file, iter_stage_ ...@@ -261,6 +261,7 @@ void init_comm_ptop_pt(group_data group, configuration *config_file, iter_stage_
if(stage->array != NULL) if(stage->array != NULL)
free(stage->array); free(stage->array);
if(aux_bytes == 0) { if(aux_bytes == 0) {
MPI_Barrier(comm);
//aux_bytes = (stage->t_stage - config_file->latency_m) * config_file->bw_m; //aux_bytes = (stage->t_stage - config_file->latency_m) * config_file->bw_m;
init_emulation_comm_time(group, config_file, stage, comm); init_emulation_comm_time(group, config_file, stage, comm);
} }
...@@ -277,6 +278,7 @@ double init_comm_bcast_pt(group_data group, configuration *config_file, iter_sta ...@@ -277,6 +278,7 @@ double init_comm_bcast_pt(group_data group, configuration *config_file, iter_sta
stage->real_bytes = stage->bytes; stage->real_bytes = stage->bytes;
stage->array = malloc(stage->real_bytes * sizeof(char)); stage->array = malloc(stage->real_bytes * sizeof(char));
} else { // Prepare to emulate Collective as PtoP } else { // Prepare to emulate Collective as PtoP
MPI_Barrier(comm);
time = init_emulation_comm_time(group, config_file, stage, comm); time = init_emulation_comm_time(group, config_file, stage, comm);
} }
return time; return time;
...@@ -304,6 +306,7 @@ double init_comm_allgatherv_pt(group_data group, configuration *config_file, ite ...@@ -304,6 +306,7 @@ double init_comm_allgatherv_pt(group_data group, configuration *config_file, ite
stage->array = malloc(stage->my_bytes * sizeof(char)); stage->array = malloc(stage->my_bytes * sizeof(char));
stage->full_array = malloc(stage->real_bytes * sizeof(char)); stage->full_array = malloc(stage->real_bytes * sizeof(char));
} else { } else {
MPI_Barrier(comm);
time = init_emulation_comm_time(group, config_file, stage, comm); time = init_emulation_comm_time(group, config_file, stage, comm);
} }
...@@ -323,6 +326,7 @@ double init_comm_reduce_pt(group_data group, configuration *config_file, iter_st ...@@ -323,6 +326,7 @@ double init_comm_reduce_pt(group_data group, configuration *config_file, iter_st
//Full array para el reduce necesita el mismo tamanyo //Full array para el reduce necesita el mismo tamanyo
stage->full_array = malloc(stage->real_bytes * sizeof(char)); stage->full_array = malloc(stage->real_bytes * sizeof(char));
} else { } else {
MPI_Barrier(comm);
init_emulation_comm_time(group, config_file, stage, comm); init_emulation_comm_time(group, config_file, stage, comm);
} }
......
...@@ -43,7 +43,7 @@ void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int ...@@ -43,7 +43,7 @@ void zombies_collect_suspended(MPI_Comm comm, int myId, int numP, int numC, int
// FIXME No deberia estar aqui // FIXME No deberia estar aqui
// Needed to ensure iteration times are collected before suspending these processes // Needed to ensure iteration times are collected before suspending these processes
results_data *results = (results_data *) results_void; results_data *results = (results_data *) results_void;
compute_results_iter(results, myId, root, comm); compute_results_iter(results, myId, numP,root, comm);
if(myId >= numC) { if(myId >= numC) {
zombies_suspend(); zombies_suspend();
......
...@@ -16,7 +16,10 @@ echo "MPICH" ...@@ -16,7 +16,10 @@ echo "MPICH"
#module load mpich-3.4.1-noucx #module load mpich-3.4.1-noucx
#export HYDRA_DEBUG=1 #export HYDRA_DEBUG=1
numP=$(bash recordMachinefile.sh $configFile) aux=$(grep "\[resize0\]" -n $1 | cut -d ":" -f1)
read -r ini fin <<<$(echo $aux)
diff=$(( fin - ini ))
numP=$(head -$fin $1 | tail -$diff | cut -d ';' -f1 | grep Procs | cut -d '=' -f2)
#mpirun -np 4 /home/martini/Instalaciones/valgrind-mpich-3.4.1-noucx/bin/valgrind --leak-check=full --show-leak-kinds=all --log-file=nc.vg.%p $dir$codeDir/a.out $configFile $outIndex $nodelist $nodes #mpirun -np 4 /home/martini/Instalaciones/valgrind-mpich-3.4.1-noucx/bin/valgrind --leak-check=full --show-leak-kinds=all --log-file=nc.vg.%p $dir$codeDir/a.out $configFile $outIndex $nodelist $nodes
mpirun -np $numP $dir$codeDir/build/a.out $configFile $outIndex $nodelist $nodes mpirun -np $numP $dir$codeDir/build/a.out $configFile $outIndex $nodelist $nodes
......
...@@ -60,6 +60,8 @@ for resize in range(resizes): ...@@ -60,6 +60,8 @@ for resize in range(resizes):
if proc_time != 0: # Si el argumento proc_time es 0, todos los grupos tienen un factor de 1 if proc_time != 0: # Si el argumento proc_time es 0, todos los grupos tienen un factor de 1
factor = proc_time / float(procs) factor = proc_time / float(procs)
if proc_time != int(procs):
factor = factor/0.85 # Para reducir la escalabilidad por un porcentaje
else: else:
factor = 1 factor = 1
......
...@@ -62,6 +62,9 @@ do ...@@ -62,6 +62,9 @@ do
procs_array=(${procs_array[@]} $value) procs_array=(${procs_array[@]} $value)
done done
i=$(($i + 1))
procs_array[$i]=120
#Crear carpeta de resultados #Crear carpeta de resultados
cd $dir$ResultsDir cd $dir$ResultsDir
name_res=$node_qty"N-"$(date '+%m-%d') name_res=$node_qty"N-"$(date '+%m-%d')
......
...@@ -4,33 +4,55 @@ ...@@ -4,33 +4,55 @@
#SBATCH -p P1 #SBATCH -p P1
dir="/home/martini/malleability_benchmark" dir="/home/martini/malleability_benchmark"
codeDir="/Codes" codeDir="/Codes/build"
ResultsDir="/Results"
nodelist=$SLURM_JOB_NODELIST nodelist=$SLURM_JOB_NODELIST
nodes=$SLURM_JOB_NUM_NODES nodes=$SLURM_JOB_NUM_NODES
module load mpich-3.4.1-noucx if [ $# -lt 1 ]
then
echo "Not enough arguments. Usage:"
echo "singleRun.sh config.ini [outFileIndex] [Qty] [Output path]"
exit 1
fi
echo "START TEST" echo "START TEST"
#$1 == configFile #$1 == configFile
#$2 == outFileIndex #$2 == outFileIndex
#$3 == cantidad de ejecuciones #$3 == Qty of repetitions
#$4 == Output path
configFile=$1
outFileIndex=$2
qty=1
if [ $# -gt 2 ] if [ $# -gt 2 ]
then then
qty=$3 qty=$3
else if [ $# -gt 3 ]
qty=1 then
output=$4
fi
fi fi
aux=$(grep "\[resize0\]" -n $configFile | cut -d ":" -f1)
read -r ini fin <<<$(echo $aux)
diff=$(( fin - ini ))
numP=$(head -$fin $configFile | tail -$diff | cut -d ';' -f1 | grep Procs | cut -d '=' -f2)
for ((i=0; i<qty; i++)) for ((i=0; i<qty; i++))
do do
echo "Iter $i" echo "Iter $i"
numP=$(bash $dir$codeDir/recordMachinefile.sh $1) mpirun $dir$codeDir/a.out $configFile $outFileIndex $nodelist $nodes
mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/exec/a.out $1 $2 $nodelist $nodes
rm hostfile.o$SLURM_JOB_ID
done done
echo "END TEST" echo "END TEST"
sed -i 's/application called MPI_Abort(MPI_COMM_WORLD, -100) - process/shrink cleaning/g' slurm-$SLURM_JOB_ID.out sed -i 's/application called MPI_Abort(MPI_COMM_WORLD, -100) - process/shrink cleaning/g' slurm-$SLURM_JOB_ID.out
if [ $# -gt 3 ]
then
echo "Moving data to $output\nMoved files:"
ls R${outFileIndex}_G*
mv R${outFileIndex}_G* $output
fi
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment