Commit 7b214a6c authored by iker_martin's avatar iker_martin
Browse files

Se ha eliminado la llamada a Slurm para evitar intromisiones

parent 14574ff0
...@@ -50,6 +50,16 @@ int main(int argc, char *argv[]) { ...@@ -50,6 +50,16 @@ int main(int argc, char *argv[]) {
int req; int req;
int im_child; int im_child;
//FIXME El codigo no es capaz de hacer mas de una redistribucion - Arreglar malleabilityTypes.c
int num_cpus, num_nodes; //nodelist_len; //FIXME Eliminar cuando se utilice Slurm
char *nodelist = NULL;
num_cpus = 20; //FIXME NUMERO MAGICO
if (argc >= 5) {
nodelist = argv[3];
//nodelist_len = strlen(nodelist);
num_nodes = atoi(argv[4]);
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &req); MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &req);
MPI_Comm_size(MPI_COMM_WORLD, &numP); MPI_Comm_size(MPI_COMM_WORLD, &numP);
MPI_Comm_rank(MPI_COMM_WORLD, &myId); MPI_Comm_rank(MPI_COMM_WORLD, &myId);
...@@ -60,7 +70,7 @@ int main(int argc, char *argv[]) { ...@@ -60,7 +70,7 @@ int main(int argc, char *argv[]) {
} }
init_group_struct(argv, argc, myId, numP); init_group_struct(argv, argc, myId, numP);
im_child = init_malleability(myId, numP, ROOT, comm, argv[0]); im_child = init_malleability(myId, numP, ROOT, comm, argv[0], nodelist, num_cpus, num_nodes);
if(!im_child) { //TODO REFACTOR Simplificar inicio if(!im_child) { //TODO REFACTOR Simplificar inicio
init_application(); init_application();
...@@ -96,6 +106,18 @@ int main(int argc, char *argv[]) { ...@@ -96,6 +106,18 @@ int main(int argc, char *argv[]) {
group->iter_start = *((int *)value); group->iter_start = *((int *)value);
free(value); free(value);
//FIXME Eliminar cuando se utilice SLURM
/*
malleability_get_data(&value, 4, 1, 1);
num_nodes = *((int *)value);
free(value);
malleability_get_data(&value, 5, 1, 1);
nodelist = (char *)value;
//free(value);
nodelist_len = strlen(nodelist);
*/
group->grp = group->grp + 1; group->grp = group->grp + 1;
} }
...@@ -116,6 +138,10 @@ int main(int argc, char *argv[]) { ...@@ -116,6 +138,10 @@ int main(int argc, char *argv[]) {
malleability_add_data(&(group->grp), 1, MAL_INT, 1, 1); malleability_add_data(&(group->grp), 1, MAL_INT, 1, 1);
malleability_add_data(&run_id, 1, MAL_INT, 1, 1); malleability_add_data(&run_id, 1, MAL_INT, 1, 1);
malleability_add_data(&(group->iter_start), 1, MAL_INT, 1, 1); malleability_add_data(&(group->iter_start), 1, MAL_INT, 1, 1);
//FIXME Eliminar cuando se utilice SLURM
//malleability_add_data(&num_nodes, 1, MAL_INT, 1, 1);
//malleability_add_data(&nodelist, nodelist_len, MAL_CHAR, 1, 1);
} }
} }
......
...@@ -18,6 +18,8 @@ double end_time; //FIXME REFACTOR ...@@ -18,6 +18,8 @@ double end_time; //FIXME REFACTOR
struct Slurm_data { struct Slurm_data {
char *cmd; // Executable name char *cmd; // Executable name
char *nodelist;
int num_cpus, num_nodes;
int qty_procs, result_procs; int qty_procs, result_procs;
MPI_Info info; MPI_Info info;
int type_creation; int type_creation;
...@@ -44,9 +46,9 @@ void generic_spawn(int myId, int root, int is_single, MPI_Comm *child, MPI_Comm ...@@ -44,9 +46,9 @@ void generic_spawn(int myId, int root, int is_single, MPI_Comm *child, MPI_Comm
void single_spawn_connection(int myId, int root, MPI_Comm comm, MPI_Comm *child); void single_spawn_connection(int myId, int root, MPI_Comm comm, MPI_Comm *child);
int create_processes(int myId, int root, MPI_Comm *child, MPI_Comm comm); int create_processes(int myId, int root, MPI_Comm *child, MPI_Comm comm);
void node_dist(slurm_job_info_t job_record, int type, int total_procs, int already_created, int **qty, int *used_nodes); void node_dist(int type, int total_procs, int already_created, int **qty, int *used_nodes);
void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, char **hostfile_str); void fill_str_hostfile(int *qty, int used_nodes, char **hostfile_str);
int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name); int write_str_node(char **hostfile_str, int len_og, int qty, char *node_name);
//@deprecated functions //@deprecated functions
...@@ -72,7 +74,7 @@ void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_node ...@@ -72,7 +74,7 @@ void fill_hostfile(slurm_job_info_t job_record, int ptr, int *qty, int used_node
* Devuelve el estado de el procedimiento. Si no devuelve "COMM_FINISHED", es necesario llamar a * Devuelve el estado de el procedimiento. Si no devuelve "COMM_FINISHED", es necesario llamar a
* "check_slurm_comm()". * "check_slurm_comm()".
*/ */
int init_slurm_comm(char *argv, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child) { int init_slurm_comm(char *argv, int num_cpus, int num_nodes, char *nodelist, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child) {
int spawn_qty, already_created = 0; int spawn_qty, already_created = 0;
slurm_data = malloc(sizeof(struct Slurm_data)); slurm_data = malloc(sizeof(struct Slurm_data));
...@@ -80,6 +82,9 @@ int init_slurm_comm(char *argv, int myId, int numP, int numC, int root, int type ...@@ -80,6 +82,9 @@ int init_slurm_comm(char *argv, int myId, int numP, int numC, int root, int type
slurm_data->type_creation = type_creation; slurm_data->type_creation = type_creation;
slurm_data->spawn_is_single = spawn_is_single; slurm_data->spawn_is_single = spawn_is_single;
slurm_data->result_procs = numC; slurm_data->result_procs = numC;
slurm_data->num_cpus = num_cpus;
slurm_data->num_nodes = num_nodes;
slurm_data->nodelist = nodelist;
spawn_qty = numC; spawn_qty = numC;
if(type_creation == COMM_SPAWN_MERGE || type_creation == COMM_SPAWN_MERGE_PTHREAD) { if(type_creation == COMM_SPAWN_MERGE || type_creation == COMM_SPAWN_MERGE_PTHREAD) {
if (numP < slurm_data->result_procs) { if (numP < slurm_data->result_procs) {
...@@ -406,27 +411,27 @@ void proc_adapt_shrink(int numC, MPI_Comm *comm, int myId) { ...@@ -406,27 +411,27 @@ void proc_adapt_shrink(int numC, MPI_Comm *comm, int myId) {
* para los procesos y creando un fichero hostfile. * para los procesos y creando un fichero hostfile.
*/ */
void processes_dist(char *argv, int numP_childs, int already_created, int type) { void processes_dist(char *argv, int numP_childs, int already_created, int type) {
int jobId; //int jobId;
char *tmp; //char *tmp;
job_info_msg_t *j_info; //job_info_msg_t *j_info;
slurm_job_info_t last_record; //slurm_job_info_t last_record;
int used_nodes=0; int used_nodes=0;
int *procs_array; int *procs_array;
char *hostfile; char *hostfile;
// Get Slurm job info // Get Slurm job info
tmp = getenv("SLURM_JOB_ID"); //tmp = getenv("SLURM_JOB_ID");
jobId = atoi(tmp); //jobId = atoi(tmp);
slurm_load_job(&j_info, jobId, 1); //slurm_load_job(&j_info, jobId, 1);
last_record = j_info->job_array[j_info->record_count - 1]; //last_record = j_info->job_array[j_info->record_count - 1];
//COPY PROGRAM NAME //COPY PROGRAM NAME
slurm_data->cmd = malloc(strlen(argv) * sizeof(char)); slurm_data->cmd = malloc(strlen(argv) * sizeof(char));
strcpy(slurm_data->cmd, argv); strcpy(slurm_data->cmd, argv);
// GET NEW DISTRIBUTION // GET NEW DISTRIBUTION
node_dist(last_record, type, numP_childs, already_created, &procs_array, &used_nodes); node_dist(type, numP_childs, already_created, &procs_array, &used_nodes); //TODO REFACTOR
slurm_data->qty_procs = numP_childs; slurm_data->qty_procs = numP_childs;
/* /*
...@@ -442,16 +447,17 @@ void processes_dist(char *argv, int numP_childs, int already_created, int type) ...@@ -442,16 +447,17 @@ void processes_dist(char *argv, int numP_childs, int already_created, int type)
close(ptr); close(ptr);
*/ */
// TEST // CREATE AND SET STRING HOSTFILE
fill_str_hostfile(last_record, procs_array, used_nodes, &hostfile); fill_str_hostfile(procs_array, used_nodes, &hostfile); //TODO REFACTOR
MPI_Info_create(&(slurm_data->info)); MPI_Info_create(&(slurm_data->info));
printf("hosts %s\n", hostfile);
MPI_Info_set(slurm_data->info, "hosts", hostfile); MPI_Info_set(slurm_data->info, "hosts", hostfile);
free(hostfile); free(hostfile);
free(procs_array); free(procs_array);
// Free JOB INFO // Free JOB INFO
slurm_free_job_info_msg(j_info); //slurm_free_job_info_msg(j_info); //TODO REFACTOR
} }
/* /*
...@@ -466,26 +472,26 @@ void processes_dist(char *argv, int numP_childs, int already_created, int type) ...@@ -466,26 +472,26 @@ void processes_dist(char *argv, int numP_childs, int already_created, int type)
* COMM_PHY_CPU (2): Orientada a completar la capacidad de un nodo antes de * COMM_PHY_CPU (2): Orientada a completar la capacidad de un nodo antes de
* ocupar otro nodo. * ocupar otro nodo.
*/ */
void node_dist(slurm_job_info_t job_record, int type, int total_procs, int already_created, int **qty, int *used_nodes) { void node_dist(int type, int total_procs, int already_created, int **qty, int *used_nodes) {
int i, asigCores; int i, asigCores;
int tamBl, remainder; int tamBl, remainder;
int *procs; int *procs;
procs = calloc(job_record.num_nodes, sizeof(int)); // Numero de procesos por nodo procs = calloc(slurm_data->num_nodes, sizeof(int)); // Numero de procesos por nodo
/* GET NEW DISTRIBUTION */ /* GET NEW DISTRIBUTION */
if(type == 1) { // DIST NODES if(type == 1) { // DIST NODES
*used_nodes = job_record.num_nodes; *used_nodes = slurm_data->num_nodes;
tamBl = total_procs / job_record.num_nodes; tamBl = total_procs / slurm_data->num_nodes;
remainder = total_procs % job_record.num_nodes; remainder = total_procs % slurm_data->num_nodes;
for(i=0; i<remainder; i++) { for(i=0; i<remainder; i++) {
procs[i] = tamBl + 1; procs[i] = tamBl + 1;
} }
for(i=remainder; i<job_record.num_nodes; i++) { for(i=remainder; i<slurm_data->num_nodes; i++) {
procs[i] = tamBl; procs[i] = tamBl;
} }
} else if (type == 2) { // DIST CPUs } else if (type == 2) { // DIST CPUs
tamBl = job_record.num_cpus / job_record.num_nodes; tamBl = slurm_data->num_cpus / slurm_data->num_nodes;
asigCores = 0; asigCores = 0;
i = already_created / tamBl; i = already_created / tamBl;
*used_nodes = already_created / tamBl; *used_nodes = already_created / tamBl;
...@@ -493,7 +499,7 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea ...@@ -493,7 +499,7 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea
while(asigCores+tamBl <= total_procs) { while(asigCores+tamBl <= total_procs) {
asigCores += tamBl; asigCores += tamBl;
procs[i] += tamBl; procs[i] += tamBl;
i = (i+1) % job_record.num_nodes; i = (i+1) % slurm_data->num_nodes;
(*used_nodes)++; (*used_nodes)++;
} }
...@@ -501,7 +507,7 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea ...@@ -501,7 +507,7 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea
procs[i] += total_procs - asigCores; procs[i] += total_procs - asigCores;
(*used_nodes)++; (*used_nodes)++;
} }
if(*used_nodes > job_record.num_nodes) *used_nodes = job_record.num_nodes; //FIXME Si ocurre esto no es un error? if(*used_nodes > slurm_data->num_nodes) *used_nodes = slurm_data->num_nodes; //FIXME Si ocurre esto no es un error?
} }
*qty = calloc(*used_nodes, sizeof(int)); // Numero de procesos por nodo *qty = calloc(*used_nodes, sizeof(int)); // Numero de procesos por nodo
...@@ -516,12 +522,12 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea ...@@ -516,12 +522,12 @@ void node_dist(slurm_job_info_t job_record, int type, int total_procs, int alrea
* Crea y devuelve una cadena para ser utilizada por la llave "hosts" * Crea y devuelve una cadena para ser utilizada por la llave "hosts"
* al crear procesos e indicar donde tienen que ser creados. * al crear procesos e indicar donde tienen que ser creados.
*/ */
void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, char **hostfile_str) { void fill_str_hostfile(int *qty, int used_nodes, char **hostfile_str) {
int i=0, len=0; int i=0, len=0;
char *host; char *host;
hostlist_t hostlist; hostlist_t hostlist;
hostlist = slurm_hostlist_create(job_record.nodes); hostlist = slurm_hostlist_create(slurm_data->nodelist);
while ( (host = slurm_hostlist_shift(hostlist)) && i < used_nodes) { while ( (host = slurm_hostlist_shift(hostlist)) && i < used_nodes) {
if(qty[i] != 0) { if(qty[i] != 0) {
len = write_str_node(hostfile_str, len, qty[i], host); len = write_str_node(hostfile_str, len, qty[i], host);
...@@ -530,7 +536,6 @@ void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, ch ...@@ -530,7 +536,6 @@ void fill_str_hostfile(slurm_job_info_t job_record, int *qty, int used_nodes, ch
free(host); free(host);
} }
slurm_hostlist_destroy(hostlist); slurm_hostlist_destroy(hostlist);
} }
/* /*
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <slurm/slurm.h> #include <slurm/slurm.h>
#include "malleabilityStates.h" #include "malleabilityStates.h"
int init_slurm_comm(char *argv, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child); int init_slurm_comm(char *argv, int num_cpus, int num_nodes, char *nodelist, int myId, int numP, int numC, int root, int type_dist, int type_creation, int spawn_is_single, MPI_Comm comm, MPI_Comm *child);
int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child, MPI_Comm comm, MPI_Comm comm_thread, double *end_real_time); int check_slurm_comm(int myId, int root, int numP, MPI_Comm *child, MPI_Comm comm, MPI_Comm comm_thread, double *end_real_time);
void malleability_establish_connection(int myId, int root, MPI_Comm *intercomm); void malleability_establish_connection(int myId, int root, MPI_Comm *intercomm);
......
...@@ -45,7 +45,8 @@ typedef struct { //FIXME numC_spawned no se esta usando ...@@ -45,7 +45,8 @@ typedef struct { //FIXME numC_spawned no se esta usando
MPI_Comm intercomm; MPI_Comm intercomm;
MPI_Comm user_comm; MPI_Comm user_comm;
char *name_exec; char *name_exec, *nodelist;
int num_cpus, num_nodes;
} malleability_t; } malleability_t;
int state = MAL_UNRESERVED; //FIXME Mover a otro lado int state = MAL_UNRESERVED; //FIXME Mover a otro lado
...@@ -68,7 +69,7 @@ malleability_data_t *dist_a_data; ...@@ -68,7 +69,7 @@ malleability_data_t *dist_a_data;
* la comunicacion los procesos hijo estan preparados para ejecutar la * la comunicacion los procesos hijo estan preparados para ejecutar la
* aplicacion. * aplicacion.
*/ */
int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec) { int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes) {
MPI_Comm dup_comm, thread_comm; MPI_Comm dup_comm, thread_comm;
mall_conf = (malleability_config_t *) malloc(sizeof(malleability_config_t)); mall_conf = (malleability_config_t *) malloc(sizeof(malleability_config_t));
...@@ -87,7 +88,11 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex ...@@ -87,7 +88,11 @@ int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_ex
mall->comm = dup_comm; mall->comm = dup_comm;
mall->thread_comm = thread_comm; // TODO Refactor -- Crear solo si es necesario? mall->thread_comm = thread_comm; // TODO Refactor -- Crear solo si es necesario?
mall->user_comm = comm; mall->user_comm = comm;
mall->name_exec = name_exec; mall->name_exec = name_exec;
mall->nodelist = nodelist;
mall->num_cpus = num_cpus;
mall->num_nodes = num_nodes;
rep_s_data->entries = 0; rep_s_data->entries = 0;
rep_a_data->entries = 0; rep_a_data->entries = 0;
...@@ -436,7 +441,13 @@ void Children_init() { ...@@ -436,7 +441,13 @@ void Children_init() {
// TODO Crear funcion especifica y anyadir para Asinc // TODO Crear funcion especifica y anyadir para Asinc
// TODO Tener en cuenta el tipo y qty // TODO Tener en cuenta el tipo y qty
for(i=0; i<rep_s_data->entries; i++) { for(i=0; i<rep_s_data->entries; i++) {
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], MPI_INT, root_parents, mall->intercomm); MPI_Datatype datatype;
if(rep_s_data->types[i] == MAL_INT) {
datatype = MPI_INT;
} else {
datatype = MPI_CHAR;
}
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, root_parents, mall->intercomm);
} }
} }
...@@ -476,7 +487,7 @@ int spawn_step(){ ...@@ -476,7 +487,7 @@ int spawn_step(){
return state; return state;
} }
state = init_slurm_comm(mall->name_exec, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_type, mall_conf->spawn_is_single, mall->thread_comm, &(mall->intercomm)); state = init_slurm_comm(mall->name_exec, mall->num_cpus, mall->num_nodes, mall->nodelist, mall->myId, mall->numP, mall->numC, mall->root, mall_conf->spawn_dist, mall_conf->spawn_type, mall_conf->spawn_is_single, mall->thread_comm, &(mall->intercomm));
if(mall_conf->spawn_type == COMM_SPAWN_SERIAL || mall_conf->spawn_type == COMM_SPAWN_MERGE) if(mall_conf->spawn_type == COMM_SPAWN_SERIAL || mall_conf->spawn_type == COMM_SPAWN_MERGE)
mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start; mall_conf->results->spawn_time[mall_conf->grp] = MPI_Wtime() - mall_conf->results->spawn_start;
...@@ -593,9 +604,15 @@ int end_redistribution() { ...@@ -593,9 +604,15 @@ int end_redistribution() {
send_data(mall->numC, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS); send_data(mall->numC, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);
// TODO Crear funcion especifica y anyadir para Asinc // TODO Crear funcion especifica y anyadir para Asinc
// TODO Tener en cuenta el tipo y qty // TODO Tener en cuenta el tipo
for(i=0; i<rep_s_data->entries; i++) { for(i=0; i<rep_s_data->entries; i++) {
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], MPI_INT, rootBcast, mall->intercomm); MPI_Datatype datatype;
if(rep_s_data->types[i] == MAL_INT) {
datatype = MPI_INT;
} else {
datatype = MPI_CHAR;
}
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], datatype, rootBcast, mall->intercomm);
} }
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include "../IOcodes/results.h" #include "../IOcodes/results.h"
#include "malleabilityStates.h" #include "malleabilityStates.h"
int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec); int init_malleability(int myId, int numP, int root, MPI_Comm comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes);
void free_malleability(); void free_malleability();
void indicate_ending_malleability(int new_outside_state); void indicate_ending_malleability(int new_outside_state);
int malleability_checkpoint(); int malleability_checkpoint();
......
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
dir="/home/martini/malleability_benchmark" dir="/home/martini/malleability_benchmark"
codeDir="/Codes" codeDir="/Codes"
nodelist=$SLURM_JOB_NODELIST
nodes=$SLURM_JOB_NUM_NODES
echo "MPICH" echo "MPICH"
module load mpich-3.4.1-noucx module load mpich-3.4.1-noucx
#export HYDRA_DEBUG=1 #export HYDRA_DEBUG=1
...@@ -13,8 +16,7 @@ module load mpich-3.4.1-noucx ...@@ -13,8 +16,7 @@ module load mpich-3.4.1-noucx
numP=$(bash recordMachinefile.sh $1) numP=$(bash recordMachinefile.sh $1)
#mpirun -f hostfile.o$SLURM_JOB_ID ./a.out $1 $2 mpirun -print-all-exitcodes -f hostfile.o$SLURM_JOB_ID $dir$codeDir/a.out $1 $2 $nodelist $nodes
mpirun -print-all-exitcodes -f hostfile.o$SLURM_JOB_ID $dir$codeDir/a.out $1 $2
rm hostfile.o$SLURM_JOB_ID rm hostfile.o$SLURM_JOB_ID
echo "END RUN" echo "END RUN"
......
[general] [general]
resizes=1 ; Numero de redistribuciones resizes=1 ; Numero de redistribuciones
matrix_tam=1000 ; Tamaño en bytes de la matriz de computo matrix_tam=1000 ; Tamaño en bytes de la matriz de computo
comm_tam=10000000 ; Tamaño en bytes de los datos a comunicar en cada iteracion. Una sola vez comm_tam=1000 ; Tamaño en bytes de los datos a comunicar en cada iteracion. Una sola vez
SDR=1000 ; Tamaño en bytes a redistribuir de forma sincrona SDR=100000000 ; Tamaño en bytes a redistribuir de forma sincrona
ADR=1000 ; Tamaño en bytes a redistribuir de forma asincrona 1000000000 ADR=100000000 ; Tamaño en bytes a redistribuir de forma asincrona 1000000000
AIB=3 ; Indica si las redistribuciones asíncronas se consideran terminadas para los padres AIB=3 ; Indica si las redistribuciones asíncronas se consideran terminadas para los padres
; cuando terminan de enviar (0) o cuando terminan de recibir los valores (1) ; cuando terminan de enviar (0), cuando terminan de recibir los valores (1)
; o usar comunicaciones punto a punto (2), o utilizar hebras(3)
CST=0 ; Indica como realizar el spawn. (0) Para el método baseline, (1) para el método
; baseline con hilos, (2) para el método merge y (3) para el método merge
; con hilos
CSS=0 ; Indica si el spawn se realiza con todos los procesos (0) o solo participa
; el proceso raiz (1)
time=1 ; Tiempo necesario para realizar una iteracion time=1 ; Tiempo necesario para realizar una iteracion
; end [general]
[resize0] ; Grupo inicial(mpirun) [resize0] ; Grupo inicial(mpirun)
iters=1 ; Numero de iteraciones a realizar por este grupo iters=10 ; Numero de iteraciones a realizar por este grupo
procs=2 ; Cantidad de procesos en el grupo procs=2 ; Cantidad de procesos en el grupo
factor=1 ; Factor de coste factor=1 ; Factor de coste
physical_dist=node ; Tipo de redistribución física de los procesos physical_dist=node ; Tipo de redistribución física de los procesos
...@@ -21,3 +28,4 @@ iters=20 ...@@ -21,3 +28,4 @@ iters=20
procs=4 procs=4
factor=0.5 factor=0.5
physical_dist=node physical_dist=node
;end [resize1]
...@@ -5,6 +5,10 @@ ...@@ -5,6 +5,10 @@
dir="/home/martini/malleability_benchmark" dir="/home/martini/malleability_benchmark"
codeDir="/Codes" codeDir="/Codes"
ResultsDir="/Results" ResultsDir="/Results"
nodelist=$SLURM_JOB_NODELIST
nodes=$SLURM_JOB_NUM_NODES
module load mpich-3.4.1-noucx module load mpich-3.4.1-noucx
name_dir=$1 name_dir=$1
...@@ -40,7 +44,7 @@ do ...@@ -40,7 +44,7 @@ do
for index in 1 2 3 4 5 6 7 8 9 10 for index in 1 2 3 4 5 6 7 8 9 10
do do
numP=$(bash $dir$codeDir/recordMachinefile.sh $config_file) # Crea el fichero hostfile numP=$(bash $dir$codeDir/recordMachinefile.sh $config_file) # Crea el fichero hostfile
mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/./bench.out $config_file $i mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/./bench.out $config_file $i $nodelist $nodes
rm hostfile.o$SLURM_JOB_ID rm hostfile.o$SLURM_JOB_ID
done done
......
...@@ -6,6 +6,9 @@ dir="/home/martini/malleability_benchmark" ...@@ -6,6 +6,9 @@ dir="/home/martini/malleability_benchmark"
codeDir="/Codes" codeDir="/Codes"
ResultsDir="/Results" ResultsDir="/Results"
nodelist=$SLURM_JOB_NODELIST
nodes=$SLURM_JOB_NUM_NODES
module load mpich-3.4.1-noucx module load mpich-3.4.1-noucx
echo "START TEST" echo "START TEST"
...@@ -24,7 +27,7 @@ for ((i=0; i<qty; i++)) ...@@ -24,7 +27,7 @@ for ((i=0; i<qty; i++))
do do
echo "Iter $i" echo "Iter $i"
numP=$(bash $dir$codeDir/recordMachinefile.sh $1) numP=$(bash $dir$codeDir/recordMachinefile.sh $1)
mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/bench.out $1 $2 mpirun -f hostfile.o$SLURM_JOB_ID $dir$codeDir/bench.out $1 $2 $nodelist $nodes
rm hostfile.o$SLURM_JOB_ID rm hostfile.o$SLURM_JOB_ID
done done
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment