Commit 023318fb authored by iker_martin's avatar iker_martin
Browse files

Added functions to automatically discover node info. Other minor changes.

parent 69193a34
......@@ -48,15 +48,6 @@ int main(int argc, char *argv[]) {
int im_child;
int abort_needed = 0;
int num_cpus, num_nodes;
char *nodelist = NULL;
num_cpus = 20; //FIXME NUMERO MAGICO //TODO Usar openMP para obtener el valor con un pragma
if (argc >= 5) {
nodelist = argv[3];
num_nodes = atoi(argv[4]);
num_cpus = num_nodes * num_cpus;
}
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &req);
MPI_Comm_rank(MPI_COMM_WORLD, &myId);
MPI_Comm_size(MPI_COMM_WORLD, &numP);
......@@ -70,7 +61,7 @@ int main(int argc, char *argv[]) {
}
init_group_struct(argv, argc, myId, numP);
im_child = MAM_Init(ROOT, &comm, argv[0], nodelist, num_cpus, num_nodes, user_redistribution, NULL);
im_child = MAM_Init(ROOT, &comm, argv[0], user_redistribution, NULL);
if(im_child) {
update_targets();
......
......@@ -6,6 +6,7 @@
#include "malleabilityTypes.h"
#include "malleabilityZombies.h"
#include "malleabilityTimes.h"
#include "malleabilityRMS.h"
#include "spawn_methods/GenericSpawn.h"
#include "CommDist.h"
......@@ -18,7 +19,8 @@ void send_data(int numP_children, malleability_data_t *data_struct, int is_async
void recv_data(int numP_parents, malleability_data_t *data_struct, int is_asynchronous);
int MAM_St_not_started(int *mam_state);
int MAM_St_rms(int *mam_state);
int MAM_St_spawn_start();
int MAM_St_spawn_pending(int wait_completed);
int MAM_St_red_start();
int MAM_St_red_pending(int *mam_state, int wait_completed);
......@@ -66,7 +68,7 @@ mam_user_reconf_t *user_reconf;
* la comunicacion los procesos hijo estan preparados para ejecutar la
* aplicacion.
*/
int MAM_Init(int root, MPI_Comm *comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes, void (*user_function)(void *), void *user_args) {
int MAM_Init(int root, MPI_Comm *comm, char *name_exec, void (*user_function)(void *), void *user_args) {
MPI_Comm dup_comm, thread_comm;
mall_conf = (malleability_config_t *) malloc(sizeof(malleability_config_t));
......@@ -99,9 +101,7 @@ int MAM_Init(int root, MPI_Comm *comm, char *name_exec, char *nodelist, int num_
mall->tmp_comm = MPI_COMM_NULL;
mall->name_exec = name_exec;
mall->nodelist = nodelist;
mall->num_cpus = num_cpus;
mall->num_nodes = num_nodes;
mall->nodelist = NULL;
rep_s_data->entries = 0;
rep_a_data->entries = 0;
......@@ -116,24 +116,18 @@ int MAM_Init(int root, MPI_Comm *comm, char *name_exec, char *nodelist, int num_
// Si son el primer grupo de procesos, obtienen los datos de los padres
MPI_Comm_get_parent(&(mall->intercomm));
if(mall->intercomm != MPI_COMM_NULL ) {
if(mall->intercomm != MPI_COMM_NULL) {
Children_init(user_function, user_args);
return MALLEABILITY_CHILDREN;
}
MAM_check_hosts();
#if USE_MAL_BARRIERS && USE_MAL_DEBUG
if(mall->myId == mall->root)
printf("MaM: Using barriers to record times.\n");
#endif
if(nodelist != NULL) { //TODO To be deprecated by using Slurm or else statement
mall->nodelist_len = strlen(nodelist);
} else { // If no nodelist is detected, get it from the actual run
mall->nodelist = malloc(MPI_MAX_PROCESSOR_NAME * sizeof(char));
MPI_Get_processor_name(mall->nodelist, &mall->nodelist_len);
//TODO Get name of each process and create real nodelist
}
#if USE_MAL_DEBUG
DEBUG_FUNC("MaM has been initialized correctly as parents", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(*comm);
#endif
......@@ -191,7 +185,10 @@ int MAM_Checkpoint(int *mam_state, int wait_completed, void (*user_function)(voi
*mam_state = MAM_UNRESERVED;
break;
case MALL_NOT_STARTED:
call_checkpoint = MAM_St_not_started(mam_state);
call_checkpoint = MAM_St_rms(mam_state);
break;
case MALL_RMS_COMPLETED:
call_checkpoint = MAM_St_spawn_start();
break;
case MALL_SPAWN_PENDING: // Comprueba si el spawn ha terminado
......@@ -567,8 +564,9 @@ void recv_data(int numP_parents, malleability_data_t *data_struct, int is_asynch
//======================================================||
//======================================================||
int MAM_St_not_started(int *mam_state) {
int MAM_St_rms(int *mam_state) {
*mam_state = MAM_NOT_STARTED;
state = MALL_RMS_COMPLETED;
reset_malleability_times();
// Comprobar si se tiene que realizar un redimensionado
......@@ -577,7 +575,10 @@ int MAM_St_not_started(int *mam_state) {
#endif
mall_conf->times->malleability_start = MPI_Wtime();
//if(CHECK_RMS()) {return MALL_DENIED;}
return 1;
}
int MAM_St_spawn_start() {
state = spawn_step();
//FIXME Esto es necesario pero feo
if(mall_conf->spawn_method == MALL_SPAWN_MERGE && mall->myId >= mall->numC){ mall->zombie = 1; }
......
......@@ -15,7 +15,7 @@ typedef struct {
MPI_Comm comm;
} mam_user_reconf_t;
int MAM_Init(int root, MPI_Comm *comm, char *name_exec, char *nodelist, int num_cpus, int num_nodes, void (*user_function)(void *), void *user_args);
int MAM_Init(int root, MPI_Comm *comm, char *name_exec, void (*user_function)(void *), void *user_args);
void MAM_Finalize();
int MAM_Checkpoint(int *mam_state, int wait_completed, void (*user_function)(void *), void *user_args);
void MAM_Resume_redistribution(int *mam_state);
......
#define _GNU_SOURCE
#include "malleabilityRMS.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sched.h>
#if USE_MAL_SLURM
#include <slurm/slurm.h>
int MAM_I_slurm_getenv_hosts_info();
int MAM_I_slurm_getjob_hosts_info();
#endif
int MAM_I_get_hosts_info();
int GetCPUCount();
void MAM_check_hosts() {
int not_filled = 1;
#if USE_MAL_SLURM
not_filled = MAM_I_slurm_getenv_hosts_info();
if(not_filled) {
if(mall->nodelist != NULL) {
free(mall->nodelist);
mall->nodelist = NULL;
}
not_filled = MAM_I_slurm_getjob_hosts_info();
}
#endif
if(not_filled) {
if(mall->nodelist != NULL) {
free(mall->nodelist);
mall->nodelist = NULL;
}
not_filled = MAM_I_get_hosts_info();
}
if(not_filled) {
if(mall->myId == mall->root) printf("MAM FATAL ERROR: It has not been possible to obtain the nodelist\n");
fflush(stdout);
MPI_Abort(mall->comm, -50);
}
#if USE_MAL_DEBUG >= 2
if(mall->myId == mall->root) {
DEBUG_FUNC("Obtained Nodelist", mall->myId, mall->numP);
printf("NODELIST: %s\nNODE_COUNT: %d NUM_CPUS_PER_NODE: %d\n", mall->nodelist, mall->num_nodes, mall->num_cpus);
fflush(stdout);
}
#endif
}
/*
* TODO
* FIXME Does not consider heterogenous machines for num_cpus
* FIXME Always returns 0... -- Perform error checking?
*/
int MAM_I_get_hosts_info() {
int i, j, name_len, max_name_len, unique_count, *unique_hosts;
char *my_host, *all_hosts, *confirmed_host, *tested_host;
all_hosts = NULL;
my_host = (char *) malloc(MPI_MAX_PROCESSOR_NAME * sizeof(char));
MPI_Get_processor_name(my_host, &name_len);
MPI_Allreduce(&name_len, &max_name_len, 1, MPI_INT, MPI_MAX, mall->comm);
my_host[max_name_len] = '\0';
max_name_len++; // Len does not consider terminating character
if(mall->myId == mall->root) {
all_hosts = (char *) malloc(mall->numP * max_name_len * sizeof(char));
unique_hosts = (int *) malloc(mall->numP * sizeof(int));
unique_hosts[0] = 0; //First host will always be unique
unique_count = 1;
}
MPI_Gather(my_host, max_name_len, MPI_CHAR, all_hosts, max_name_len, MPI_CHAR, mall->root, mall->comm);
if(mall->myId == mall->root) {
for (i = 1; i < mall->numP; i++) {
for (j = 0; j < unique_count; j++) {
tested_host = all_hosts + (i * max_name_len);
confirmed_host = all_hosts + (unique_hosts[j] * max_name_len);
if (strcmp(tested_host, confirmed_host) != 0) {
unique_hosts[unique_count] = i;
unique_count++;
break;
}
}
}
mall->num_nodes = unique_count;
mall->num_cpus = GetCPUCount();
mall->nodelist_len = unique_count*max_name_len;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, ""); //FIXME Strcat can be very inneficient...
for (i = 0; i < unique_count; i++) {
confirmed_host = all_hosts + (unique_hosts[i] * max_name_len);
strcat(mall->nodelist, confirmed_host);
if (i < unique_count - 1) {
strcat(mall->nodelist, ",");
}
}
free(all_hosts);
free(unique_hosts);
}
free(my_host);
return 0;
}
/*
* @brief Get the total number of CPUs available to the process.
*
* This function uses sched_getaffinity to obtain the CPU affinity of the current process
* and counts the number of CPUs in the affinity set. It adjusts the loop based on the
* maximum number of CPUs allowed on the system.
*
* @return The total number of CPUs available to the process.
*
* Code obtained from: https://stackoverflow.com/questions/4586405/how-to-get-the-number-of-cpus-in-linux-using-c
* The code has been slightly modified.
*/
int GetCPUCount() {
cpu_set_t cs;
CPU_ZERO(&cs);
sched_getaffinity(0, sizeof(cs), &cs);
int count = 0;
int max_cpus = sysconf(_SC_NPROCESSORS_ONLN);
for (int i = 0; i < max_cpus; i++) {
if (CPU_ISSET(i, &cs)) {
count++;
} else {
break;
}
}
return count;
}
#if USE_MAL_SLURM
/*
* TODO
*/
int MAM_I_slurm_getenv_hosts_info() {
char *tmp = NULL, *token;
int cpus, count;
//int i, *cpus_counts, *nodes_counts, *aux;
tmp = getenv("SLURM_JOB_NUM_NODES");
if(tmp == NULL) return 1;
mall->num_nodes = atoi(tmp);
tmp = NULL;
tmp = getenv("SLURM_JOB_NODELIST");
if(tmp == NULL) return 1;
mall->nodelist_len = strlen(tmp)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, tmp);
tmp = NULL;
tmp = getenv("SLURM_JOB_CPUS_PER_NODE");
if(tmp == NULL) return 1;
token = strtok(tmp, ",");
//TODO When MaM considers heteregenous allocations, these will be needed instead of num_cpus.
//cpus_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//nodes_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//i = 0;
mall->num_cpus = 0;
while (token != NULL) {
count = 1; // The count is not present when is 1 node.
if (sscanf(token, "%d(x%d)", &cpus, &count) >= 1) {
mall->num_cpus = cpus; // num_cpus stores the amount of cores per cpu
//cpus_per_node[i] = cpus;
//nodes_count[i] = count;
//i++;
}
token = strtok(NULL, ",");
}
/*
if(i < mall->num_nodes) {
aux = (int *) realloc(cpus_per_node, i * sizeof(int));
if(cpus_per_node != aux && cpus_per_node != NULL) free(cpus_per_node);
cpus_per_node = aux;
aux = (int *) realloc(nodes_counts, i * sizeof(int));
if(nodes_count != aux && nodes_count != NULL) free(nodes_count);
nodes_count = aux;
}
*/
return 0;
}
/*
* TODO
* FIXME Does not consider heterogenous machines
*/
int MAM_I_slurm_getjob_hosts_info() {
int jobId, err;
char *tmp = NULL;
job_info_msg_t *j_info;
slurm_job_info_t last_record;
tmp = getenv("SLURM_JOB_ID");
if(tmp == NULL) return 1;
jobId = atoi(tmp);
err = slurm_load_job(&j_info, jobId, 1);
if(err) return err;
last_record = j_info->job_array[j_info->record_count - 1];
mall->num_nodes = last_record.num_nodes;
mall->num_cpus = last_record.num_cpus;
mall->nodelist_len = strlen(last_record.nodes)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, last_record.nodes);
slurm_free_job_info_msg(j_info);
return 0;
}
#endif
#ifndef MALLEABILITY_RMS_H
#define MALLEABILITY_RMS_H
#include <mpi.h>
#include "malleabilityDataStructures.h"
void MAM_check_hosts();
#endif
......@@ -6,7 +6,7 @@
//States
#define MALL_DENIED -1
enum mall_inner_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_SPAWN_PENDING, MALL_SPAWN_SINGLE_PENDING,
enum mall_inner_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_RMS_COMPLETED, MALL_SPAWN_PENDING, MALL_SPAWN_SINGLE_PENDING,
MALL_SPAWN_SINGLE_COMPLETED, MALL_SPAWN_ADAPT_POSTPONE, MALL_SPAWN_COMPLETED, MALL_DIST_PENDING, MALL_DIST_COMPLETED,
MALL_SPAWN_ADAPT_PENDING, MALL_USER_PENDING, MALL_USER_COMPLETED, MALL_SPAWN_ADAPTED, MALL_COMPLETED};
enum mam_states{MAM_UNRESERVED, MAM_NOT_STARTED, MAM_PENDING, MAM_USER_PENDING, MAM_COMPLETED};
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment