Commit 4bcc805a authored by iker_martin's avatar iker_martin
Browse files

Added MaM interface

parent 1cd76849
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <string.h>
#include "distribution_methods/block_distribution.h"
#include "CommDist.h"
#include "MAM_Configuration.h"
#include "malleabilityDataStructures.h"
void prepare_redistribution(int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, void **recv, struct Counts *s_counts, struct Counts *r_counts);
void check_requests(struct Counts s_counts, struct Counts r_counts, MPI_Request **requests, size_t *request_qty);
void sync_point2point(void *send, void *recv, MPI_Datatype datatype, struct Counts s_counts, struct Counts r_counts, MPI_Comm comm);
void sync_rma(void *send, void *recv, MPI_Datatype datatype, struct Counts r_counts, int tamBl, MPI_Comm comm);
void sync_rma_lock(void *recv, MPI_Datatype datatype, struct Counts r_counts, MPI_Win win);
void sync_rma_lockall(void *recv, MPI_Datatype datatype, struct Counts r_counts, MPI_Win win);
void async_point2point(void *send, void *recv, MPI_Datatype datatype, struct Counts s_counts, struct Counts r_counts, MPI_Comm comm, MPI_Request *requests);
void async_rma(void *send, void *recv, MPI_Datatype datatype, struct Counts r_counts, int tamBl, MPI_Comm comm, MPI_Request *requests, MPI_Win *win);
void async_rma_lock(void *recv, MPI_Datatype datatype, struct Counts r_counts, MPI_Win win, MPI_Request *requests);
void async_rma_lockall(void *recv, MPI_Datatype datatype, struct Counts r_counts, MPI_Win win, MPI_Request *requests);
/*
* Reserva memoria para un vector de hasta "qty" elementos.
* Los "qty" elementos se disitribuyen entre los "numP" procesos
* que llaman a esta funcion.
*/
void malloc_comm_array(char **array, int qty, int myId, int numP) {
struct Dist_data dist_data;
get_block_dist(qty, myId, numP, &dist_data);
if( (*array = calloc(dist_data.tamBl, sizeof(char))) == NULL) {
printf("Memory Error (Malloc Arrays(%d))\n", dist_data.tamBl);
exit(1);
}
/*
int i;
for(i=0; i<dist_data.tamBl; i++) {
(*array)[i] = '!' + i + dist_data.ini;
}
printf("P%d Tam %d String: %s\n", myId, dist_data.tamBl, *array);
*/
}
//================================================================================
//================================================================================
//========================SYNCHRONOUS FUNCTIONS===================================
//================================================================================
//================================================================================
/*
* Performs a communication to redistribute an array in a block distribution.
* In the redistribution is differenciated parent group from the children and the values each group indicates can be
* different.
*
* - send (IN): Array with the data to send. This data can not be null for parents.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* If the process receives data and is NULL, the behaviour is undefined.
* - qty (IN): Sum of elements shared by all processes that will send data.
* - numP (IN): Size of the local group. If it is a children group, this parameter must correspond to using
* "MPI_Comm_size(comm)". For the parents is not always the size obtained from "comm".
* - numO (IN): Amount of processes in the remote group. For the parents is the target quantity of processes after the
* resize, while for the children is the amount of parents.
* - is_children_group (IN): Indicates wether this MPI rank is a children(TRUE) or a parent(FALSE).
* - comm (IN): Communicator to use to perform the redistribution.
*
*/
void sync_communication(void *send, void **recv, int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, MPI_Comm comm) {
struct Counts s_counts, r_counts;
struct Dist_data dist_data;
/* PREPARE COMMUNICATION */
prepare_redistribution(qty, datatype, numP, numO, is_children_group, recv, &s_counts, &r_counts);
/* PERFORM COMMUNICATION */
switch(mall_conf->red_method) {
case MALL_RED_RMA_LOCKALL:
case MALL_RED_RMA_LOCK:
if(is_children_group) {
dist_data.tamBl = 0;
} else {
get_block_dist(qty, mall->myId, numO, &dist_data);
}
sync_rma(send, *recv, datatype, r_counts, dist_data.tamBl, comm);
break;
case MALL_RED_POINT:
sync_point2point(send, *recv, datatype, s_counts, r_counts, comm);
break;
case MALL_RED_BASELINE:
default:
MPI_Alltoallv(send, s_counts.counts, s_counts.displs, datatype, *recv, r_counts.counts, r_counts.displs, datatype, comm);
break;
}
freeCounts(&s_counts);
freeCounts(&r_counts);
}
/*
* Performs a series of blocking point2point communications to redistribute an array in a block distribution.
* It should be called after calculating how data should be redistributed.
*
* - send (IN): Array with the data to send. This value can not be NULL for parents.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to
* receive data. If the process receives data and is NULL, the behaviour is undefined.
* - s_counts (IN): Struct which describes how many elements will send this process to each children and
* the displacements.
* - r_counts (IN): Structure which describes how many elements will receive this process from each parent
* and the displacements.
* - comm (IN): Communicator to use to perform the redistribution.
*
*/
void sync_point2point(void *send, void *recv, MPI_Datatype datatype, struct Counts s_counts, struct Counts r_counts, MPI_Comm comm) {
int i, j, init, end, total_sends, datasize;
size_t offset, offset2;
MPI_Request *sends;
MPI_Type_size(datatype, &datasize);
init = s_counts.idI;
end = s_counts.idE;
if(mall_conf->spawn_method == MALL_SPAWN_MERGE && (s_counts.idI == mall->myId || s_counts.idE == mall->myId + 1)) {
offset = s_counts.displs[mall->myId] * datasize;
offset2 = r_counts.displs[mall->myId] * datasize;
memcpy(recv+offset2, send+offset, s_counts.counts[mall->myId]);
if(s_counts.idI == mall->myId) init = s_counts.idI+1;
else end = s_counts.idE-1;
}
total_sends = end - init;
j = 0;
if(total_sends > 0) {
sends = (MPI_Request *) malloc(total_sends * sizeof(MPI_Request));
}
for(i=init; i<end; i++) {
sends[j] = MPI_REQUEST_NULL;
offset = s_counts.displs[i] * datasize;
MPI_Isend(send+offset, s_counts.counts[i], datatype, i, 99, comm, &(sends[j]));
j++;
}
init = r_counts.idI;
end = r_counts.idE;
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) {
if(r_counts.idI == mall->myId) init = r_counts.idI+1;
else if(r_counts.idE == mall->myId + 1) end = r_counts.idE-1;
}
for(i=init; i<end; i++) {
offset = r_counts.displs[i] * datasize;
MPI_Recv(recv+offset, r_counts.counts[i], datatype, i, 99, comm, MPI_STATUS_IGNORE);
}
if(total_sends > 0) {
MPI_Waitall(total_sends, sends, MPI_STATUSES_IGNORE);
free(sends);
}
}
/*
* Performs synchronous MPI-RMA operations to redistribute an array in a block distribution. Is should be called after calculating
* how data should be redistributed
*
* - send (IN): Array with the data to send. This value can be NULL for children.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* If the process receives data and is NULL, the behaviour is undefined.
* - r_counts (IN): Structure which describes how many elements will receive this process from each parent and the
* displacements.
* - tamBl (IN): How many elements are stored in the parameter "send".
* - comm (IN): Communicator to use to perform the redistribution. Must be an intracommunicator as MPI-RMA requirements.
*
* FIXME: In libfabric one of these macros defines the maximum amount of BYTES that can be communicated in a SINGLE MPI_Get
* A window can have more bytes than the amount shown in those macros, therefore, if you want to read more than that amount
* you need to perform multiples Gets.
* prov/psm3/psm3/psm_config.h:179:#define MQ_SHM_THRESH_RNDV 16000
* prov/psm3/psm3/ptl_am/am_config.h:62:#define PSMI_MQ_RV_THRESH_CMA 16000
* prov/psm3/psm3/ptl_am/am_config.h:65:#define PSMI_MQ_RV_THRESH_NO_KASSIST 16000
*/
void sync_rma(void *send, void *recv, MPI_Datatype datatype, struct Counts r_counts, int tamBl, MPI_Comm comm) {
int datasize;
MPI_Win win;
MPI_Type_size(datatype, &datasize);
MPI_Win_create(send, (MPI_Aint)tamBl * datasize, datasize, MPI_INFO_NULL, comm, &win);
#if USE_MAL_DEBUG >= 3
DEBUG_FUNC("Created Window for synchronous RMA communication", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(comm);
#endif
switch(mall_conf->red_method) {
case MALL_RED_RMA_LOCKALL:
sync_rma_lockall(recv, datatype, r_counts, win);
break;
case MALL_RED_RMA_LOCK:
sync_rma_lock(recv, datatype, r_counts, win);
break;
}
#if USE_MAL_DEBUG >= 3
DEBUG_FUNC("Completed synchronous RMA communication", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(comm);
#endif
MPI_Win_free(&win);
}
/*
* Performs a passive MPI-RMA data redistribution for a single array using the passive epochs Lock/Unlock.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* If the process receives data and is NULL, the behaviour is undefined.
* - r_counts (IN): Structure which describes how many elements will receive this process from each parent and the
* displacements.
* - win (IN): Window to use to perform the redistribution.
*
*/
void sync_rma_lock(void *recv, MPI_Datatype datatype, struct Counts r_counts, MPI_Win win) {
int i, target_displs, datasize;
size_t offset;
MPI_Type_size(datatype, &datasize);
target_displs = r_counts.first_target_displs; //TODO Check that datasize is not needed
for(i=r_counts.idI; i<r_counts.idE; i++) {
offset = r_counts.displs[i] * datasize;
MPI_Win_lock(MPI_LOCK_SHARED, i, MPI_MODE_NOCHECK, win);
MPI_Get(recv+offset, r_counts.counts[i], datatype, i, target_displs, r_counts.counts[i], datatype, win);
MPI_Win_unlock(i, win);
target_displs=0;
}
}
/*
* Performs a passive MPI-RMA data redistribution for a single array using the passive epochs Lockall/Unlockall.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* If the process receives data and is NULL, the behaviour is undefined.
* - r_counts (IN): Structure which describes how many elements will receive this process from each parent and the
* displacements.
* - win (IN): Window to use to perform the redistribution.
*
*/
void sync_rma_lockall(void *recv, MPI_Datatype datatype, struct Counts r_counts, MPI_Win win) {
int i, target_displs, datasize;
size_t offset;
MPI_Type_size(datatype, &datasize);
target_displs = r_counts.first_target_displs; //TODO Check that datasize is not needed
MPI_Win_lock_all(MPI_MODE_NOCHECK, win);
for(i=r_counts.idI; i<r_counts.idE; i++) {
offset = r_counts.displs[i] * datasize;
MPI_Get(recv+offset, r_counts.counts[i], datatype, i, target_displs, r_counts.counts[i], datatype, win);
target_displs=0;
}
MPI_Win_unlock_all(win);
}
//================================================================================
//================================================================================
//========================ASYNCHRONOUS FUNCTIONS==================================
//================================================================================
//================================================================================
/*
* Performs a communication to redistribute an array in a block distribution with non-blocking MPI functions.
* In the redistribution is differenciated parent group from the children and the values each group indicates can be
* different.
*
* - send (IN): Array with the data to send. This data can not be null for parents.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* If the process receives data and is NULL, the behaviour is undefined.
* - qty (IN): Sum of elements shared by all processes that will send data.
* - numP (IN): Size of the local group. If it is a children group, this parameter must correspond to using
* "MPI_Comm_size(comm)". For the parents is not always the size obtained from "comm".
* - numO (IN): Amount of processes in the remote group. For the parents is the target quantity of processes after the
* resize, while for the children is the amount of parents.
* - is_children_group (IN): Indicates wether this MPI rank is a children(TRUE) or a parent(FALSE).
* - comm (IN): Communicator to use to perform the redistribution.
* - requests (OUT): Pointer to array of requests to be used to determine if the communication has ended. If the pointer
* is null or not enough space has been reserved the pointer is allocated/reallocated.
* - request_qty (OUT): Quantity of requests to be used. If a process sends and receives data, this value will be
* modified to the expected value.
*
*/
void async_communication_start(void *send, void **recv, int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, MPI_Comm comm, MPI_Request **requests, size_t *request_qty, MPI_Win *win) {
struct Counts s_counts, r_counts;
struct Dist_data dist_data;
/* PREPARE COMMUNICATION */
prepare_redistribution(qty, datatype, numP, numO, is_children_group, recv, &s_counts, &r_counts);
check_requests(s_counts, r_counts, requests, request_qty);
/* PERFORM COMMUNICATION */
switch(mall_conf->red_method) {
case MALL_RED_RMA_LOCKALL:
case MALL_RED_RMA_LOCK:
if(is_children_group) {
dist_data.tamBl = 0;
} else {
get_block_dist(qty, mall->myId, numO, &dist_data);
}
async_rma(send, *recv, datatype, r_counts, dist_data.tamBl, comm, *requests, win);
break;
case MALL_RED_POINT:
async_point2point(send, *recv, datatype, s_counts, r_counts, comm, *requests);
break;
case MALL_RED_BASELINE:
default:
MPI_Ialltoallv(send, s_counts.counts, s_counts.displs, datatype, *recv, r_counts.counts, r_counts.displs, datatype, comm, &((*requests)[0]));
break;
}
freeCounts(&s_counts);
freeCounts(&r_counts);
}
/*
* Checks if a set of requests have been completed (1) or not (0).
*
* - is_children_group (IN): Indicates wether this MPI rank is a children(TRUE) or a parent(FALSE).
* - requests (IN): Pointer to array of requests to be used to determine if the communication has ended.
* - request_qty (IN): Quantity of requests in "requests".
*
* returns: An integer indicating if the operation has been completed(TRUE) or not(FALSE).
*/
int async_communication_check(int is_children_group, MPI_Request *requests, size_t request_qty) {
int completed, req_completed, test_err;
size_t i;
completed = 1;
test_err = MPI_SUCCESS;
if (is_children_group) return 1; //FIXME Deberia devolver un num negativo
for(i=0; i<request_qty; i++) {
test_err = MPI_Test(&(requests[i]), &req_completed, MPI_STATUS_IGNORE);
completed = completed && req_completed;
}
//test_err = MPI_Testall(request_qty, requests, &completed, MPI_STATUSES_IGNORE); //FIXME Some kind of bug with Mpich.
if (test_err != MPI_SUCCESS && test_err != MPI_ERR_PENDING) {
printf("P%d aborting -- Test Async\n", mall->myId);
MPI_Abort(MPI_COMM_WORLD, test_err);
}
return completed;
}
/*
* Waits until the completion of a set of requests. If the Ibarrier strategy
* is being used, the corresponding ibarrier is posted.
*
* - comm (IN): Communicator to use to confirm finalizations of redistribution
* - requests (IN): Pointer to array of requests to be used to determine if the communication has ended.
* - request_qty (IN): Quantity of requests in "requests".
*/
void async_communication_wait(MPI_Request *requests, size_t request_qty) {
MPI_Waitall(request_qty, requests, MPI_STATUSES_IGNORE);
#if USE_MAL_DEBUG >= 3
DEBUG_FUNC("Processes Waitall completed", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
}
/*
* Frees Requests/Windows associated to a particular redistribution.
* Should be called for each output result of calling "async_communication_start".
*
* - requests (IN): Pointer to array of requests to be used to determine if the communication has ended.
* - request_qty (IN): Quantity of requests in "requests".
* - win (IN): Window to free.
*/
void async_communication_end(MPI_Request *requests, size_t request_qty, MPI_Win *win) {
//Para la desconexión de ambos grupos de procesos es necesario indicar a MPI que esta comm
//ha terminado, aunque solo se pueda llegar a este punto cuando ha terminado
if(MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_WAIT_TARGETS, NULL)) { MPI_Waitall(request_qty, requests, MPI_STATUSES_IGNORE); }
if(mall_conf->red_method == MALL_RED_RMA_LOCKALL || mall_conf->red_method == MALL_RED_RMA_LOCK) { MPI_Win_free(win); }
}
/*
* Performs a series of non-blocking point2point communications to redistribute an array in a block distribution.
* It should be called after calculating how data should be redistributed.
*
* - send (IN): Array with the data to send. This value can not be NULL for parents.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to
* receive data. If the process receives data and is NULL, the behaviour is undefined.
* - s_counts (IN): Struct which describes how many elements will send this process to each children and
* the displacements.
* - r_counts (IN): Structure which describes how many elements will receive this process from each parent
* and the displacements.
* - comm (IN): Communicator to use to perform the redistribution.
* - requests (OUT): Pointer to array of requests to be used to determine if the communication has ended.
*
*/
void async_point2point(void *send, void *recv, MPI_Datatype datatype, struct Counts s_counts, struct Counts r_counts, MPI_Comm comm, MPI_Request *requests) {
int i, j = 0, datasize;
size_t offset;
MPI_Type_size(datatype, &datasize);
for(i=s_counts.idI; i<s_counts.idE; i++) {
offset = s_counts.displs[i] * datasize;
MPI_Isend(send+offset, s_counts.counts[i], datatype, i, 99, comm, &(requests[j]));
j++;
}
for(i=r_counts.idI; i<r_counts.idE; i++) {
offset = r_counts.displs[i] * datasize;
MPI_Irecv(recv+offset, r_counts.counts[i], datatype, i, 99, comm, &(requests[j]));
j++;
}
}
/*
* Performs asynchronous MPI-RMA operations to redistribute an array in a block distribution. Is should be called after calculating
* how data should be redistributed.
*
* - send (IN): Array with the data to send. This value can be NULL for children.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* If the process receives data and is NULL, the behaviour is undefined.
* - r_counts (IN): Structure which describes how many elements will receive this process from each parent and the
* displacements.
* - tamBl (IN): How many elements are stored in the parameter "send".
* - comm (IN): Communicator to use to perform the redistribution. Must be an intracommunicator as MPI-RMA requirements.
* - window (OUT): Pointer to a window object used for the RMA operations.
* - requests (OUT): Pointer to array of requests to be used to determine if the communication has ended.
*
*/
void async_rma(void *send, void *recv, MPI_Datatype datatype, struct Counts r_counts, int tamBl, MPI_Comm comm, MPI_Request *requests, MPI_Win *win) {
int datasize;
MPI_Type_size(datatype, &datasize);
MPI_Win_create(send, (MPI_Aint)tamBl * datasize, datasize, MPI_INFO_NULL, comm, win);
switch(mall_conf->red_method) {
case MALL_RED_RMA_LOCKALL:
async_rma_lockall(recv, datatype, r_counts, *win, requests);
break;
case MALL_RED_RMA_LOCK:
async_rma_lock(recv, datatype, r_counts, *win, requests);
break;
}
}
/*
* Performs an asynchronous and passive MPI-RMA data redistribution for a single array using the passive epochs Lock/Unlock.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* If the process receives data and is NULL, the behaviour is undefined.
* - r_counts (IN): Structure which describes how many elements will receive this process from each parent and the
* displacements.
* - win (IN): Window to use to perform the redistribution.
* - requests (OUT): Pointer to array of requests to be used to determine if the communication has ended.
*
*/
void async_rma_lock(void *recv, MPI_Datatype datatype, struct Counts r_counts, MPI_Win win, MPI_Request *requests) {
int i, target_displs, j = 0, datasize;
size_t offset;
MPI_Type_size(datatype, &datasize);
target_displs = r_counts.first_target_displs; //TODO Check that datasize is not needed
for(i=r_counts.idI; i<r_counts.idE; i++) {
offset = r_counts.displs[i] * datasize;
MPI_Win_lock(MPI_LOCK_SHARED, i, MPI_MODE_NOCHECK, win);
MPI_Rget(recv+offset, r_counts.counts[i], datatype, i, target_displs, r_counts.counts[i], datatype, win, &(requests[j]));
MPI_Win_unlock(i, win);
target_displs=0;
j++;
}
}
/*
* Performs an asynchronous and passive MPI-RMA data redistribution for a single array using the passive epochs Lockall/Unlockall.
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* If the process receives data and is NULL, the behaviour is undefined.
* - r_counts (IN): Structure which describes how many elements will receive this process from each parent and the
* displacements.
* - win (IN): Window to use to perform the redistribution.
* - requests (OUT): Pointer to array of requests to be used to determine if the communication has ended.
*
*/
void async_rma_lockall(void *recv, MPI_Datatype datatype, struct Counts r_counts, MPI_Win win, MPI_Request *requests) {
int i, target_displs, j = 0, datasize;
size_t offset;
MPI_Type_size(datatype, &datasize);
target_displs = r_counts.first_target_displs; //TODO Check that datasize is not needed
MPI_Win_lock_all(MPI_MODE_NOCHECK, win);
for(i=r_counts.idI; i<r_counts.idE; i++) {
offset = r_counts.displs[i] * datasize;
MPI_Rget(recv+offset, r_counts.counts[i], datatype, i, target_displs, r_counts.counts[i], datatype, win, &(requests[j]));
target_displs=0;
j++;
}
MPI_Win_unlock_all(win);
}
/*
* ========================================================================================
* ========================================================================================
* ================================DISTRIBUTION FUNCTIONS==================================
* ========================================================================================
* ========================================================================================
*/
/*
* Performs a communication to redistribute an array in a block distribution. For each process calculates
* how many elements sends/receives to other processes for the new group.
*
* - qty (IN): Sum of elements shared by all processes that will send data.
* - numP (IN): Size of the local group. If it is a children group, this parameter must correspond to using
* "MPI_Comm_size(comm)". For the parents is not always the size obtained from "comm".
* - numO (IN): Amount of processes in the remote group. For the parents is the target quantity of processes after the
* resize, while for the children is the amount of parents.
* - is_children_group (IN): Indicates wether this MPI rank is a children(TRUE) or a parent(FALSE).
* - recv (OUT): Array where data will be written. A NULL value is allowed if the process is not going to receive data.
* process receives data and is NULL, the behaviour is undefined.
* - s_counts (OUT): Struct where is indicated how many elements sends this process to processes in the new group.
* - r_counts (OUT): Struct where is indicated how many elements receives this process from other processes in the previous group.
*
*/
void prepare_redistribution(int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, void **recv, struct Counts *s_counts, struct Counts *r_counts) {
int array_size = numO;
int offset_ids = 0;
int datasize;
struct Dist_data dist_data;
if(mall_conf->spawn_method == MALL_SPAWN_BASELINE) {
offset_ids = MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_INTERCOMM, NULL) ?
0 : numP;
} else {
array_size = numP > numO ? numP : numO;
}
mallocCounts(s_counts, array_size+offset_ids);
mallocCounts(r_counts, array_size+offset_ids);
MPI_Type_size(datatype, &datasize); //FIXME Right now derived datatypes are not ensured to work
if(is_children_group) {
offset_ids = 0;
prepare_comm_alltoall(mall->myId, numP, numO, qty, offset_ids, r_counts);
// Obtener distribución para este hijo
get_block_dist(qty, mall->myId, numP, &dist_data);
*recv = malloc(dist_data.tamBl * datasize);
#if USE_MAL_DEBUG >= 4
get_block_dist(qty, mall->myId, numP, &dist_data);
print_counts(dist_data, r_counts->counts, r_counts->displs, numO+offset_ids, 0, "Targets Recv");
#endif
} else {
#if USE_MAL_DEBUG >= 4
get_block_dist(qty, mall->myId, numP, &dist_data);
#endif
prepare_comm_alltoall(mall->myId, numP, numO, qty, offset_ids, s_counts);
if(mall_conf->spawn_method == MALL_SPAWN_MERGE && mall->myId < numO) {
prepare_comm_alltoall(mall->myId, numO, numP, qty, offset_ids, r_counts);
// Obtener distribución para este hijo y reservar vector de recibo
get_block_dist(qty, mall->myId, numO, &dist_data);
*recv = malloc(dist_data.tamBl * datasize);
#if USE_MAL_DEBUG >= 4
print_counts(dist_data, r_counts->counts, r_counts->displs, array_size, 0, "Sources&Targets Recv");
#endif
}
#if USE_MAL_DEBUG >= 4
print_counts(dist_data, s_counts->counts, s_counts->displs, numO+offset_ids, 0, "Sources Send");
#endif
}
}
/*
* Ensures that the array of request of a process has an amount of elements equal to the amount of communication
* functions the process will perform. In case the array is not initialized or does not have enough space it is
* allocated/reallocated to the minimum amount of space needed.
*
* - s_counts (IN): Struct where is indicated how many elements sends this process to processes in the new group.
* - r_counts (IN): Struct where is indicated how many elements receives this process from other processes in the previous group.
* - requests (IN/OUT): Pointer to array of requests to be used to determine if the communication has ended. If the pointer
* is null or not enough space has been reserved the pointer is allocated/reallocated.
* - request_qty (IN/OUT): Quantity of requests to be used. If the value is smaller than the amount of communication
* functions to perform, it is modified to the minimum value.
*/
void check_requests(struct Counts s_counts, struct Counts r_counts, MPI_Request **requests, size_t *request_qty) {
size_t i, sum;
MPI_Request *aux;
switch(mall_conf->red_method) {
case MALL_RED_BASELINE:
sum = 1;
break;
case MALL_RED_POINT:
default:
sum = (size_t) s_counts.idE - s_counts.idI;
sum += (size_t) r_counts.idE - r_counts.idI;
break;
}
if (*requests != NULL && sum <= *request_qty) return; // Expected amount of requests
if (*requests == NULL) {
*requests = (MPI_Request *) malloc(sum * sizeof(MPI_Request));
} else { // Array exists, but is too small
aux = (MPI_Request *) realloc(*requests, sum * sizeof(MPI_Request));
*requests = aux;
}
if (*requests == NULL) {
fprintf(stderr, "Fatal error - It was not possible to allocate/reallocate memory for the MPI_Requests before the redistribution\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
for(i=0; i < sum; i++) {
(*requests)[i] = MPI_REQUEST_NULL;
}
*request_qty = sum;
}
#ifndef COMMDIST_H
#define COMMDIST_H
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <string.h>
#include "malleabilityStates.h"
void sync_communication(void *send, void **recv, int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, MPI_Comm comm);
void async_communication_start(void *send, void **recv, int qty, MPI_Datatype datatype, int numP, int numO, int is_children_group, MPI_Comm comm, MPI_Request **requests, size_t *request_qty, MPI_Win *win);
int async_communication_check(int is_children_group, MPI_Request *requests, size_t request_qty);
void async_communication_wait(MPI_Request *requests, size_t request_qty);
void async_communication_end(MPI_Request *requests, size_t request_qty, MPI_Win *win);
void malloc_comm_array(char **array, int qty, int myId, int numP);
#endif
#ifndef MAM_H
#define MAM_H
#include "malleabilityStates.h"
#include "malleabilityManager.h"
#include "MAM_Configuration.h"
#endif
#include "MAM_Configuration.h"
#include "MAM_Init_Configuration.h"
#include "malleabilityDataStructures.h"
#include <limits.h>
typedef struct {
unsigned int *value, default_value;
int config_max_length;
union {
int (*set_config_simple)(unsigned int, unsigned int *);
int (*set_config_complex)(unsigned int);
};
char *env_name;
} mam_config_setting_t;
int MAM_I_set_method(unsigned int new_method, unsigned int *method);
int MAM_I_set_spawn_strat(unsigned int strategy, unsigned int *strategies);
int MAM_I_set_red_strat(unsigned int strategy, unsigned int *strategies);
int MAM_I_set_target_number(unsigned int new_numC);
int MAM_I_configuration_get_defaults();
int MAM_I_contains_strat(unsigned int comm_strategies, unsigned int strategy);
int MAM_I_add_strat(unsigned int *comm_strategies, unsigned int strategy);
int MAM_I_remove_strat(unsigned int *comm_strategies, unsigned int strategy);
mam_config_setting_t configSettings[] = {
{NULL, MALL_SPAWN_MERGE, MAM_METHODS_SPAWN_LEN, {.set_config_simple = MAM_I_set_method }, MAM_SPAWN_METHOD_ENV},
{NULL, MAM_STRAT_SPAWN_CLEAR, MAM_STRATS_SPAWN_LEN, {.set_config_simple = MAM_I_set_spawn_strat }, MAM_SPAWN_STRATS_ENV},
{NULL, MALL_DIST_COMPACT, MAM_METHODS_PHYSICAL_DISTRIBUTION_LEN, {.set_config_simple = MAM_I_set_method }, MAM_PHYSICAL_DISTRIBUTION_METHOD_ENV},
{NULL, MALL_RED_BASELINE, MAM_METHODS_RED_LEN, {.set_config_simple = MAM_I_set_method }, MAM_RED_METHOD_ENV},
{NULL, MAM_STRAT_RED_CLEAR, MAM_STRATS_RED_LEN, {.set_config_simple = MAM_I_set_red_strat }, MAM_RED_STRATS_ENV},
{NULL, 1, INT_MAX, {.set_config_complex = MAM_I_set_target_number }, MAM_NUM_TARGETS_ENV}
};
unsigned int masks_spawn[] = {MAM_STRAT_CLEAR_VALUE, MAM_MASK_PTHREAD, MAM_MASK_SPAWN_SINGLE, MAM_MASK_SPAWN_INTERCOMM};
unsigned int masks_red[] = {MAM_STRAT_CLEAR_VALUE, MAM_MASK_PTHREAD, MAM_MASK_RED_WAIT_SOURCES, MAM_MASK_RED_WAIT_TARGETS};
/**
* @brief Set configuration parameters for MAM.
*
* This function allows setting various configuration parameters for MAM
* such as spawn method, spawn strategies, spawn physical distribution,
* redistribution method, and red strategies.
*
* @param spawn_method The spawn method reconfiguration.
* @param spawn_strategies The spawn strategies reconfiguration.
* @param spawn_dist The spawn physical distribution method reconfiguration.
* @param red_method The redistribution method reconfiguration.
* @param red_strategies The redesitribution strategy for reconfiguration.
*/
void MAM_Set_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int red_method, int red_strategies) {
int i, aux;
int aux_array[] = {spawn_method, spawn_strategies, spawn_dist, red_method, red_strategies};
if(state > MALL_NOT_STARTED) return;
mam_config_setting_t *config = NULL;
for (i = 0; i < MAM_KEY_COUNT-1; i++) { //FIXME Numero magico para no cambiar num_targets
aux = aux_array[i];
config = &configSettings[i];
if (0 <= aux && aux < config->config_max_length) {
if(i == MAM_NUM_TARGETS) {
config->set_config_complex(aux);
} else {
config->set_config_simple(aux, config->value);
}
}
}
}
/*
* @brief Set the configuration value for a specific key in MAM.
*
* Modifies the configuration value associated with the given key
* to the specified "required" value. The final value set is returned in the
* "provided" parameter.
*
* @param key The key for which the configuration value is to be modified.
* @param required The required value to set for the specified key.
* @param provided Pointer to an integer where the final value set will be stored.
* This parameter is updated with the actual value after modification.
* For strategy keys the value is "MAM_STRATS_ADDED" if "required" has
* been added, or "MAM_STRATS_MODIFIED" if multiple strategies of the
* key have been modified.
*/
void MAM_Set_key_configuration(int key, int required, int *provided) {
int i, aux;
if(provided == NULL) provided = &aux;
*provided = MALL_DENIED;
if(required < 0 || state > MALL_NOT_STARTED) return;
mam_config_setting_t *config = NULL;
for (i = 0; i < MAM_KEY_COUNT; i++) {
if (key == i) {
config = &configSettings[i];
break;
}
}
if (config != NULL) {
if (required < config->config_max_length) {
if(i == MAM_NUM_TARGETS) {
*provided = config->set_config_complex(required);
} else {
*provided = config->set_config_simple(required, config->value);
}
} else {*provided = *(config->value); }
} else { printf("MAM: Key %d does not exist\n", key); }
}
/*
* Retorna si una estrategia aparece o no
*/
int MAM_Contains_strat(int key, unsigned int strategy, int *result) {
int strategies, aux = MAM_OK;
unsigned int len = 0, mask;
switch(key) {
case MAM_SPAWN_STRATEGIES:
strategies = mall_conf->spawn_strategies;
mask = masks_spawn[strategy];
len = MAM_STRATS_SPAWN_LEN;
break;
case MAM_RED_STRATEGIES:
strategies = mall_conf->red_strategies;
mask = masks_red[strategy];
len = MAM_STRATS_RED_LEN;
break;
default:
aux = MALL_DENIED;
break;
}
if(aux == MAM_OK && strategy < len) {
aux = MAM_I_contains_strat(strategies, mask);
} else {
aux = 0;
}
if(result != NULL) *result = aux;
return aux;
}
/*
* //TODO
* Tiene que ser llamado despues de setear la config
*/
int MAM_Set_target_number(unsigned int numC){
return MAM_I_set_target_number(numC);
}
//======================================================||
//===============MAM_INIT FUNCTIONS=====================||
//======================================================||
//======================================================||
void MAM_Init_configuration() {
if(mall == NULL || mall_conf == NULL) {
printf("MAM FATAL ERROR: Setting initial config without previous mallocs\n");
fflush(stdout);
MPI_Abort(MPI_COMM_WORLD, -50);
}
configSettings[MAM_SPAWN_METHOD].value = &mall_conf->spawn_method;
configSettings[MAM_SPAWN_STRATEGIES].value = &mall_conf->spawn_strategies;
configSettings[MAM_PHYSICAL_DISTRIBUTION].value = &mall_conf->spawn_dist;
configSettings[MAM_RED_METHOD].value = &mall_conf->red_method;
configSettings[MAM_RED_STRATEGIES].value = &mall_conf->red_strategies;
}
void MAM_Set_initial_configuration() {
int not_filled = 1;
not_filled = MAM_I_configuration_get_defaults();
if(not_filled) {
if(mall->myId == mall->root) printf("MAM WARNING: Starting configuration not set\n");
fflush(stdout);
MPI_Abort(mall->comm, -50);
}
#if USE_MAL_DEBUG >= 2
if(mall->myId == mall->root) {
DEBUG_FUNC("Initial configuration settled", mall->myId, mall->numP);
fflush(stdout);
}
#endif
}
void MAM_Check_configuration() {
if(mall->numC == mall->numP) { // Migrate
MAM_Set_key_configuration(MAM_SPAWN_METHOD, MALL_SPAWN_BASELINE, NULL);
}
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) {
if(MAM_I_contains_strat(mall_conf->spawn_strategies, MAM_MASK_SPAWN_INTERCOMM)) {
MAM_I_remove_strat(&mall_conf->spawn_strategies, MAM_MASK_SPAWN_INTERCOMM);
}
if(mall->numP > mall->numC && MAM_I_contains_strat(mall_conf->spawn_strategies, MAM_MASK_SPAWN_SINGLE)) {
MAM_I_remove_strat(&mall_conf->spawn_strategies, MAM_MASK_SPAWN_SINGLE);
}
}
if(mall_conf->red_method == MALL_RED_RMA_LOCK || mall_conf->red_method == MALL_RED_RMA_LOCKALL) {
if(MAM_I_contains_strat(mall_conf->spawn_strategies, MAM_MASK_SPAWN_INTERCOMM)) {
MAM_I_remove_strat(&mall_conf->spawn_strategies, MAM_MASK_SPAWN_INTERCOMM);
}
if(!MAM_I_contains_strat(mall_conf->red_strategies, MAM_MASK_RED_WAIT_TARGETS) &&
!MAM_I_contains_strat(mall_conf->red_strategies, MAM_MASK_PTHREAD)) {
MAM_I_set_red_strat(MAM_STRAT_RED_WAIT_TARGETS, &mall_conf->red_strategies);
}
}
}
//======================================================||
//================PRIVATE FUNCTIONS=====================||
//======================================================||
//======================================================||
int MAM_I_configuration_get_defaults() {
size_t i;
int set_value;
char *tmp = NULL;
mam_config_setting_t *config = NULL;
for (i = 0; i < MAM_KEY_COUNT; i++) {
config = &configSettings[i];
tmp = getenv(config->env_name);
if(tmp != NULL) {
set_value = atoi(tmp);
} else {
set_value = config->default_value;
}
if (0 <= set_value && set_value < config->config_max_length) {
if(i == MAM_NUM_TARGETS) {
config->set_config_complex(set_value);
} else {
config->set_config_simple(set_value, config->value);
}
}
tmp = NULL;
}
return 0;
}
int MAM_I_set_method(unsigned int new_method, unsigned int *method) {
*method = new_method;
return *method;
}
int MAM_I_set_spawn_strat(unsigned int strategy, unsigned int *strategies) {
int result = 0;
int strat_removed = 0;
switch(strategy) {
case MAM_STRAT_SPAWN_CLEAR:
*strategies = MAM_STRAT_CLEAR_VALUE;
result = MAM_STRATS_MODIFIED;
break;
case MAM_STRAT_SPAWN_PTHREAD:
result = MAM_I_add_strat(strategies, MAM_MASK_PTHREAD);
break;
case MAM_STRAT_SPAWN_SINGLE:
result = MAM_I_add_strat(strategies, MAM_MASK_SPAWN_SINGLE);
break;
case MAM_STRAT_SPAWN_INTERCOMM:
result = MAM_I_add_strat(strategies, MAM_MASK_SPAWN_INTERCOMM);
break;
default:
//Unkown strategy
result = MALL_DENIED;
break;
}
if(strat_removed) {
result = MAM_STRATS_MODIFIED;
}
return result;
}
int MAM_I_set_red_strat(unsigned int strategy, unsigned int *strategies) {
int result = 0;
int strat_removed = 0;
switch(strategy) {
case MAM_STRAT_RED_CLEAR:
*strategies = MAM_STRAT_CLEAR_VALUE;
result = MAM_STRATS_MODIFIED;
break;
case MAM_STRAT_RED_PTHREAD: //TODO - IMPROVEMENT - This could be done with a single operation instead of 3.
result = MAM_I_add_strat(strategies, MAM_MASK_PTHREAD);
if(result == MAM_STRATS_ADDED) {
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_RED_WAIT_SOURCES);
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_RED_WAIT_TARGETS);
}
break;
case MAM_STRAT_RED_WAIT_SOURCES:
result = MAM_I_add_strat(strategies, MAM_MASK_RED_WAIT_SOURCES);
if(result == MAM_STRATS_ADDED) {
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_RED_WAIT_TARGETS);
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_PTHREAD);
}
break;
case MAM_STRAT_RED_WAIT_TARGETS:
result = MAM_I_add_strat(strategies, MAM_MASK_RED_WAIT_TARGETS);
if(result == MAM_STRATS_ADDED) {
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_RED_WAIT_SOURCES);
strat_removed += MAM_I_remove_strat(strategies, MAM_MASK_PTHREAD);
}
break;
default:
//Unkown strategy
result = MALL_DENIED;
break;
}
if(strat_removed) {
result = MAM_STRATS_MODIFIED;
}
return result;
}
int MAM_I_set_target_number(unsigned int new_numC) {
if(state > MALL_NOT_STARTED || new_numC == 0) return MALL_DENIED;
mall->numC = (int) new_numC;
return new_numC;
}
/*
* Returns 1 if strategy is applied, 0 otherwise
*/
int MAM_I_contains_strat(unsigned int comm_strategies, unsigned int strategy) {
return comm_strategies & strategy;
}
int MAM_I_add_strat(unsigned int *comm_strategies, unsigned int strategy) {
if(MAM_I_contains_strat(*comm_strategies, strategy)) return MAM_OK;
*comm_strategies |= strategy;
return MAM_STRATS_ADDED;
}
int MAM_I_remove_strat(unsigned int *comm_strategies, unsigned int strategy) {
if(!MAM_I_contains_strat(*comm_strategies, strategy)) return MAM_OK;
*comm_strategies &= ~strategy;
return MAM_STRATS_MODIFIED;
}
#ifndef MAM_CONFIGURATION_H
#define MAM_CONFIGURATION_H
#include <mpi.h>
#include "malleabilityStates.h"
#define MAM_STRAT_CLEAR_VALUE 0
#define MAM_STRATS_ADDED 1
#define MAM_STRATS_MODIFIED 2
#define MAM_MASK_PTHREAD 0x01
#define MAM_MASK_SPAWN_SINGLE 0x02
#define MAM_MASK_SPAWN_INTERCOMM 0x04
#define MAM_MASK_RED_WAIT_SOURCES 0x02
#define MAM_MASK_RED_WAIT_TARGETS 0x04
int MAM_Contains_strat(int key, unsigned int strategy, int *result);
void MAM_Set_configuration(int spawn_method, int spawn_strategies, int spawn_dist, int red_method, int red_strategies);
void MAM_Set_key_configuration(int key, int required, int *provided);
int MAM_Set_target_number(unsigned int numC);
#endif
#ifndef MAM_INIT_CONFIGURATION_H
#define MAM_INIT_CONFIGURATION_H
#include <mpi.h>
#include "malleabilityStates.h"
void MAM_Init_configuration();
void MAM_Set_initial_configuration();
void MAM_Check_configuration();
#endif
CC = gcc
MCC = mpicc
#C_FLAGS_ALL = -Wall -Wextra -Wshadow -Wfatal-errors -Wconversion -Wpedantic
C_FLAGS =
LD_FLAGS = -lm -pthread
DEF =
USE_MAL_SLURM ?= 1
USE_MAL_BARRIERS ?= 0
USE_MAL_DEBUG ?= 0
ifeq ($(USE_MAL_SLURM),1)
LD_FLAGS += -lslurm
endif
DEF = -DUSE_MAL_SLURM=$(USE_MAL_SLURM) -DUSE_MAL_BARRIERS=$(USE_MAL_BARRIERS) -DUSE_MAL_DEBUG=$(USE_MAL_DEBUG)
.PHONY : clean clear install install_slurm
# Final binary
BIN = libmam.so
# Put all auto generated stuff to this build dir.
BUILD_DIR = ./build
# List of all directories where source files are located
SRCDIRS = . spawn_methods distribution_methods
# List of all .c source files.
C_FILES = $(foreach dire, $(SRCDIRS), $(wildcard $(dire)/*.c))
# All .o files go to build dir.
OBJ = $(C_FILES:%.c=$(BUILD_DIR)/%.o)
# Gcc will create these .d files containing dependencies.
DEP = $(OBJ:%.o=%.d)
# Default target named after the binary.
$(BIN) : $(BUILD_DIR)/$(BIN)
# Actual target of the binary - depends on all .o files.
$(BUILD_DIR)/$(BIN) : $(OBJ)
$(MCC) $(C_FLAGS) -shared $^ -o $@ $(LD_FLAGS)
# Include all .d files
# .d files are used for knowing the dependencies of each source file
-include $(DEP)
# Build target for every single object file.
# The potential dependency on header files is covered
# by calling `-include $(DEP)`.
# The -MMD flags additionaly creates a .d file with
# the same name as the .o file.
$(BUILD_DIR)/%.o : %.c
mkdir -p $(@D)
$(MCC) $(C_FLAGS) $(DEF) -fpic -MMD -c $< -o $@
clean:
-rm $(BUILD_DIR)/$(BIN) $(OBJ) $(DEP)
clear:
-rm -rf $(BUILDDIR)
install: $(BIN)
echo "Done"
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include "block_distribution.h"
void set_interblock_counts(int id, int numP, struct Dist_data data_dist, int offset_ids, int *sendcounts);
void get_util_ids(struct Dist_data dist_data, int numP_other, int **idS);
/*
* Prepares a communication from "numP" processes to "numP_other" processes
* of "n" elements an returns an struct of counts with 3 arrays to perform the
* communications.
*
* The struct should be freed with freeCounts
*/
void prepare_comm_alltoall(int myId, int numP, int numP_other, int n, int offset_ids, struct Counts *counts) {
int i, *idS, first_id = 0;
struct Dist_data dist_data, dist_target;
if(counts == NULL) {
fprintf(stderr, "Counts is NULL for rank %d/%d ", myId, numP);
MPI_Abort(MPI_COMM_WORLD, -3);
}
get_block_dist(n, myId, numP, &dist_data);
get_util_ids(dist_data, numP_other, &idS);
counts->idI = idS[0] + offset_ids;
counts->idE = idS[1] + offset_ids;
get_block_dist(n, idS[0], numP_other, &dist_target); // RMA Specific operation -- uses idS[0], not idI
counts->first_target_displs = dist_data.ini - dist_target.ini; // RMA Specific operation
if(idS[0] == 0) { // Uses idS[0], not idI
set_interblock_counts(counts->idI, numP_other, dist_data, offset_ids, counts->counts);
first_id++;
}
for(i=counts->idI + first_id; i<counts->idE; i++) {
set_interblock_counts(i, numP_other, dist_data, offset_ids, counts->counts);
counts->displs[i] = counts->displs[i-1] + counts->counts[i-1];
}
free(idS);
for(i=0; i<numP_other; i++) {
if(counts->counts[i] < 0) {
fprintf(stderr, "Counts value [i=%d/%d] is negative for rank %d/%d ", i, numP_other, myId, numP);
MPI_Abort(MPI_COMM_WORLD, -3);
}
if(counts->displs[i] < 0) {
fprintf(stderr, "Displs value [i=%d/%d] is negative for rank %d/%d ", i, numP_other, myId, numP);
MPI_Abort(MPI_COMM_WORLD, -3);
}
}
}
/*
* Prepares a communication of "numP" processes of "n" elements an
* returns an struct of counts with 3 arrays to perform the
* communications.
*
* The struct should be freed with freeCounts
*/
void prepare_comm_allgatherv(int numP, int n, struct Counts *counts) {
int i;
struct Dist_data dist_data;
mallocCounts(counts, numP);
get_block_dist(n, 0, numP, &dist_data);
counts->counts[0] = dist_data.tamBl;
for(i=1; i<numP; i++){
get_block_dist(n, i, numP, &dist_data);
counts->counts[i] = dist_data.tamBl;
counts->displs[i] = counts->displs[i-1] + counts->counts[i-1];
}
}
/*
* ========================================================================================
* ========================================================================================
* ================================DISTRIBUTION FUNCTIONS==================================
* ========================================================================================
* ========================================================================================
*/
/*
* Obatains for "Id" and "numP", how many
* elements per row will have process "Id"
* and fills the results in a Dist_data struct
*/
void get_block_dist(int qty, int id, int numP, struct Dist_data *dist_data) {
int rem;
dist_data->myId = id;
dist_data->numP = numP;
dist_data->qty = qty;
dist_data->tamBl = qty / numP;
rem = qty % numP;
if(id < rem) { // First subgroup
dist_data->ini = id * dist_data->tamBl + id;
dist_data->fin = (id+1) * dist_data->tamBl + (id+1);
} else { // Second subgroup
dist_data->ini = id * dist_data->tamBl + rem;
dist_data->fin = (id+1) * dist_data->tamBl + rem;
}
if(dist_data->fin > qty) { dist_data->fin = qty; }
if(dist_data->ini > dist_data->fin) { dist_data->ini = dist_data->fin; }
dist_data->tamBl = dist_data->fin - dist_data->ini;
}
/*
* Obtiene para el Id de un proceso dado, cuantos elementos
* enviara o recibira desde el proceso indicado en Dist_data.
*/
void set_interblock_counts(int id, int numP, struct Dist_data data_dist, int offset_ids, int *sendcounts) {
struct Dist_data other;
int biggest_ini, smallest_end;
get_block_dist(data_dist.qty, id - offset_ids, numP, &other);
// Si el rango de valores no coincide, se pasa al siguiente proceso
if(data_dist.ini >= other.fin || data_dist.fin <= other.ini) {
return;
}
// Obtiene el proceso con mayor ini entre los dos procesos
biggest_ini = (data_dist.ini > other.ini) ? data_dist.ini : other.ini;
// Obtiene el proceso con menor fin entre los dos procesos
smallest_end = (data_dist.fin < other.fin) ? data_dist.fin : other.fin;
sendcounts[id] = smallest_end - biggest_ini; // Numero de elementos a enviar/recibir del proceso Id
}
/*
* Obtiene para un proceso de un grupo a que rango procesos de
* otro grupo tiene que enviar o recibir datos.
*
* Devuelve el primer identificador y el último (Excluido) con el que
* comunicarse.
*/
void get_util_ids(struct Dist_data dist_data, int numP_other, int **idS) {
int idI, idE;
int tamOther = dist_data.qty / numP_other;
int remOther = dist_data.qty % numP_other;
// Indica el punto de corte del grupo de procesos externo que
// divide entre los procesos que tienen
// un tamaño tamOther + 1 y un tamaño tamOther
int middle = (tamOther + 1) * remOther;
// Calcular idI teniendo en cuenta si se comunica con un
// proceso con tamano tamOther o tamOther+1
if(middle > dist_data.ini) { // First subgroup (tamOther+1)
idI = dist_data.ini / (tamOther + 1);
} else { // Second subgroup (tamOther)
idI = ((dist_data.ini - middle) / tamOther) + remOther;
}
// Calcular idR teniendo en cuenta si se comunica con un
// proceso con tamano tamOther o tamOther+1
if(middle >= dist_data.fin) { // First subgroup (tamOther +1)
idE = dist_data.fin / (tamOther + 1);
idE = (dist_data.fin % (tamOther + 1) > 0 && idE+1 <= numP_other) ? idE+1 : idE;
} else { // Second subgroup (tamOther)
idE = ((dist_data.fin - middle) / tamOther) + remOther;
idE = ((dist_data.fin - middle) % tamOther > 0 && idE+1 <= numP_other) ? idE+1 : idE;
}
*idS = malloc(2 * sizeof(int));
(*idS)[0] = idI;
(*idS)[1] = idE;
}
/*
* ========================================================================================
* ========================================================================================
* ==============================INIT/FREE/PRINT FUNCTIONS=================================
* ========================================================================================
* ========================================================================================
*/
/*
* Reserva memoria para los vectores de counts/displs de la funcion
* MPI_Alltoallv. Todos los vectores tienen un tamaño de numP, que es la
* cantidad de procesos en el otro grupo de procesos.
*
* El vector counts indica cuantos elementos se comunican desde este proceso
* al proceso "i" del otro grupo.
*
* El vector displs indica los desplazamientos necesarios para cada comunicacion
* con el proceso "i" del otro grupo.
*
*/
void mallocCounts(struct Counts *counts, size_t numP) {
counts->counts = calloc(numP, sizeof(int));
if(counts->counts == NULL) { MPI_Abort(MPI_COMM_WORLD, -2);}
counts->displs = calloc(numP, sizeof(int));
if(counts->displs == NULL) { MPI_Abort(MPI_COMM_WORLD, -2);}
counts->len = numP;
counts->idI = -1;
counts->idE = -1;
counts->first_target_displs = -1;
}
/*
* Libera la memoria interna de una estructura Counts.
*
* No libera la memoria de la estructura counts si se ha alojado
* de forma dinamica.
*/
void freeCounts(struct Counts *counts) {
if(counts == NULL) {
return;
}
if(counts->counts != NULL) {
free(counts->counts);
counts->counts = NULL;
}
if(counts->displs != NULL) {
free(counts->displs);
counts->displs = NULL;
}
}
/*
* Muestra la informacion de comunicaciones de un proceso
* Si se activa la bandera "include_zero" a verdadero se mostraran para el vector
* xcounts los valores a 0.
*
* En "name" se puede indicar un string con el fin de identificar mejor a que vectores
* se refiere la llamada.
*/
void print_counts(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, int include_zero, const char* name) {
int i;
for(i=0; i < size; i++) {
if(xcounts[i] != 0 || include_zero) {
printf("P%d of %d | %scounts[%d]=%d disp=%d\n", data_dist.myId, data_dist.numP, name, i, xcounts[i], xdispls[i]);
}
}
}
#ifndef mall_block_distribution
#define mall_block_distribution
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
struct Dist_data {
int ini; //Primer elemento a enviar
int fin; //Ultimo elemento a enviar
int tamBl; // Total de elementos
int qty; // Total number of rows of the full disperse matrix
int myId;
int numP;
MPI_Comm intercomm;
};
struct Counts {
int len, idI, idE;
int first_target_displs; // RMA. Indicates displacement for first target when performing a Get.
int *counts;
int *displs;
};
void prepare_comm_alltoall(int myId, int numP, int numP_other, int n, int offset_ids, struct Counts *counts);
void prepare_comm_allgatherv(int numP, int n, struct Counts *counts);
void get_block_dist(int qty, int id, int numP, struct Dist_data *dist_data);
void mallocCounts(struct Counts *counts, size_t numP);
void freeCounts(struct Counts *counts);
void print_counts(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, int include_zero, const char* name);
#endif
#include "malleabilityDataStructures.h"
int state = MALL_UNRESERVED;
/*
* Crea un tipo derivado para mandar las dos estructuras principales
* de MaM.
*/
void MAM_Def_main_datatype() {
int i, counts = 10;
int blocklengths[counts];
MPI_Aint displs[counts];
MPI_Datatype types[counts];
for(i=0; i<5; i++) {
blocklengths[i] = 1;
types[i] = MPI_UNSIGNED;
}
for(i=5; i<counts; i++) {
blocklengths[i] = 1;
types[i] = MPI_INT;
}
// Obtain base direction
MPI_Get_address(&(mall_conf->spawn_method), &displs[0]);
MPI_Get_address(&(mall_conf->spawn_strategies), &displs[1]);
MPI_Get_address(&(mall_conf->spawn_dist), &displs[2]);
MPI_Get_address(&(mall_conf->red_method), &displs[3]);
MPI_Get_address(&(mall_conf->red_strategies), &displs[4]);
MPI_Get_address(&(mall->root_parents), &displs[5]);
MPI_Get_address(&(mall->num_parents), &displs[6]); //TODO Add only when Intercomm strat active?
MPI_Get_address(&(mall->num_cpus), &displs[7]);
MPI_Get_address(&(mall->num_nodes), &displs[8]);
MPI_Get_address(&(mall->nodelist_len), &displs[9]);
MPI_Type_create_struct(counts, blocklengths, displs, types, &mall->struct_type);
MPI_Type_commit(&mall->struct_type);
}
void MAM_Free_main_datatype() {
if(mall->struct_type != MPI_DATATYPE_NULL) {
MPI_Type_free(&mall->struct_type);
}
}
/*
* Comunica datos necesarios de las estructuras
* principales de MAM de sources a targets.
*/
void MAM_Comm_main_structures(int rootBcast) {
MPI_Bcast(MPI_BOTTOM, 1, mall->struct_type, rootBcast, mall->intercomm);
if(mall->nodelist == NULL) {
mall->nodelist = calloc(mall->nodelist_len+1, sizeof(char));
mall->nodelist[mall->nodelist_len] = '\0';
}
MPI_Bcast(mall->nodelist, mall->nodelist_len, MPI_CHAR, rootBcast, mall->intercomm);
}
#ifndef MALLEABILITY_DATA_STRUCTURES_H
#define MALLEABILITY_DATA_STRUCTURES_H
/*
* Shows available data structures for inner ussage.
*/
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include <pthread.h>
#include "malleabilityStates.h"
#define DEBUG_FUNC(debug_string, rank, numP) printf("MaM [P%d/%d]: %s -- %s:%s:%d\n", rank, numP, debug_string, __FILE__, __func__, __LINE__)
/* --- TIME CAPTURE STRUCTURE --- */
typedef struct {
// Spawn, Sync and Async time
double spawn_start, spawn_time;
double sync_start, sync_end;
double async_start, async_end;
double malleability_start, malleability_end;
MPI_Datatype times_type;
} malleability_times_t;
/* --- GLOBAL STRUCTURES --- */
typedef struct {
unsigned int spawn_method;
unsigned int spawn_dist;
unsigned int spawn_strategies;
unsigned int red_method;
unsigned int red_strategies;
malleability_times_t *times;
} malleability_config_t;
typedef struct {
int myId, numP, numC, zombie;
int root, root_collectives;
int num_parents, root_parents;
pthread_t async_thread;
MPI_Comm comm, thread_comm;
MPI_Comm intercomm, tmp_comm;
MPI_Comm *user_comm;
MPI_Datatype struct_type;
// Specific vars for Wait_targets strat
int wait_targets_posted;
MPI_Request wait_targets;
char *name_exec, *nodelist;
int num_cpus, num_nodes, nodelist_len;
} malleability_t;
/* --- VARIABLES --- */
malleability_config_t *mall_conf;
malleability_t *mall;
extern int state;
/* --- FUNCTIONS --- */
void MAM_Def_main_datatype();
void MAM_Free_main_datatype();
void MAM_Comm_main_structures(int rootBcast);
#endif
#include <pthread.h>
#include <string.h>
//#include "malleabilityManager.h"
#include "MAM.h"
#include "malleabilityStates.h"
#include "malleabilityDataStructures.h"
#include "malleabilityTypes.h"
#include "malleabilityZombies.h"
#include "malleabilityTimes.h"
#include "malleabilityRMS.h"
#include "MAM_Init_Configuration.h"
#include "spawn_methods/GenericSpawn.h"
#include "CommDist.h"
#define MALLEABILITY_USE_SYNCHRONOUS 0
#define MALLEABILITY_USE_ASYNCHRONOUS 1
void MAM_Commit(int *mam_state);
void send_data(int numP_children, malleability_data_t *data_struct, int is_asynchronous);
void recv_data(int numP_parents, malleability_data_t *data_struct, int is_asynchronous);
int MAM_St_rms(int *mam_state);
int MAM_St_spawn_start();
int MAM_St_spawn_pending(int wait_completed);
int MAM_St_red_start();
int MAM_St_red_pending(int *mam_state, int wait_completed);
int MAM_St_user_start(int *mam_state);
int MAM_St_user_pending(int *mam_state, int wait_completed, void (*user_function)(void *), void *user_args);
int MAM_St_user_completed();
int MAM_St_spawn_adapt_pending(int wait_completed);
int MAM_St_spawn_adapted(int *mam_state);
int MAM_St_red_completed(int *mam_state);
int MAM_St_completed(int *mam_state);
void Children_init(void (*user_function)(void *), void *user_args);
int spawn_step();
int start_redistribution();
int check_redistribution(int wait_completed);
int end_redistribution();
int shrink_redistribution();
int thread_creation();
int thread_check(int wait_completed);
void* thread_async_work();
void print_comms_state();
void malleability_comms_update(MPI_Comm comm);
int MAM_I_convert_key(char *key);
void MAM_I_create_user_struct(int is_children_group);
malleability_data_t *rep_s_data;
malleability_data_t *dist_s_data;
malleability_data_t *rep_a_data;
malleability_data_t *dist_a_data;
mam_user_reconf_t *user_reconf;
/*
* Inicializa la reserva de memoria para el modulo de maleabilidad
* creando todas las estructuras necesarias y copias de comunicadores
* para no interferir en la aplicación.
*
* Si es llamada por un grupo de procesos creados de forma dinámica,
* inicializan la comunicacion con sus padres. En este caso, al terminar
* la comunicacion los procesos hijo estan preparados para ejecutar la
* aplicacion.
*/
int MAM_Init(int root, MPI_Comm *comm, char *name_exec, void (*user_function)(void *), void *user_args) {
MPI_Comm dup_comm, thread_comm;
mall_conf = (malleability_config_t *) malloc(sizeof(malleability_config_t));
mall = (malleability_t *) malloc(sizeof(malleability_t));
user_reconf = (mam_user_reconf_t *) malloc(sizeof(mam_user_reconf_t));
MPI_Comm_rank(*comm, &(mall->myId));
MPI_Comm_size(*comm, &(mall->numP));
#if USE_MAL_DEBUG
DEBUG_FUNC("Initializing MaM", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(*comm);
#endif
rep_s_data = (malleability_data_t *) malloc(sizeof(malleability_data_t));
dist_s_data = (malleability_data_t *) malloc(sizeof(malleability_data_t));
rep_a_data = (malleability_data_t *) malloc(sizeof(malleability_data_t));
dist_a_data = (malleability_data_t *) malloc(sizeof(malleability_data_t));
MPI_Comm_dup(*comm, &dup_comm);
MPI_Comm_dup(*comm, &thread_comm);
MPI_Comm_set_name(dup_comm, "MAM_MAIN");
MPI_Comm_set_name(thread_comm, "MAM_THREAD");
mall->root = root;
mall->root_parents = root;
mall->zombie = 0;
mall->comm = dup_comm;
mall->thread_comm = thread_comm;
mall->user_comm = comm;
mall->tmp_comm = MPI_COMM_NULL;
mall->name_exec = name_exec;
mall->nodelist = NULL;
mall->nodelist_len = 0;
rep_s_data->entries = 0;
rep_a_data->entries = 0;
dist_s_data->entries = 0;
dist_a_data->entries = 0;
state = MALL_NOT_STARTED;
MAM_Init_configuration();
zombies_service_init();
init_malleability_times();
MAM_Def_main_datatype();
// Si son el primer grupo de procesos, obtienen los datos de los padres
MPI_Comm_get_parent(&(mall->intercomm));
if(mall->intercomm != MPI_COMM_NULL) {
Children_init(user_function, user_args);
return MALLEABILITY_CHILDREN;
}
MAM_check_hosts();
MAM_Set_initial_configuration();
#if USE_MAL_BARRIERS && USE_MAL_DEBUG
if(mall->myId == mall->root)
printf("MaM: Using barriers to record times.\n");
#endif
#if USE_MAL_DEBUG
DEBUG_FUNC("MaM has been initialized correctly as parents", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(*comm);
#endif
return MALLEABILITY_NOT_CHILDREN;
}
/*
* Elimina toda la memoria reservado por el modulo
* de maleabilidad y asegura que los zombies
* despierten si los hubiese.
*/
void MAM_Finalize() {
free_malleability_data_struct(rep_s_data);
free_malleability_data_struct(rep_a_data);
free_malleability_data_struct(dist_s_data);
free_malleability_data_struct(dist_a_data);
free(rep_s_data);
free(rep_a_data);
free(dist_s_data);
free(dist_a_data);
if(mall->nodelist != NULL) free(mall->nodelist);
MAM_Free_main_datatype();
free_malleability_times();
if(mall->comm != MPI_COMM_WORLD && mall->comm != MPI_COMM_NULL) MPI_Comm_free(&(mall->comm));
if(mall->thread_comm != MPI_COMM_WORLD && mall->thread_comm != MPI_COMM_NULL) MPI_Comm_free(&(mall->thread_comm));
if(mall->intercomm != MPI_COMM_WORLD && mall->intercomm != MPI_COMM_NULL) { MPI_Comm_disconnect(&(mall->intercomm)); } //FIXME Error en OpenMPI + Merge
free(mall);
free(mall_conf);
free(user_reconf);
zombies_awake();
zombies_service_free();
state = MALL_UNRESERVED;
}
/*
* TODO Reescribir
* Comprueba el estado de la maleabilidad. Intenta avanzar en la misma
* si es posible. Funciona como una máquina de estados.
* Retorna el estado de la maleabilidad concreto y modifica el argumento
* "mam_state" a uno generico.
*
* El argumento "wait_completed" se utiliza para esperar a la finalización de
* las tareas llevadas a cabo por parte de MAM.
*
*/
int MAM_Checkpoint(int *mam_state, int wait_completed, void (*user_function)(void *), void *user_args) {
int call_checkpoint = 0;
//TODO This could be changed to an array with the functions to call in each case
switch(state) {
case MALL_UNRESERVED:
*mam_state = MAM_UNRESERVED;
break;
case MALL_NOT_STARTED:
call_checkpoint = MAM_St_rms(mam_state);
break;
case MALL_RMS_COMPLETED:
call_checkpoint = MAM_St_spawn_start();
break;
case MALL_SPAWN_PENDING: // Comprueba si el spawn ha terminado
case MALL_SPAWN_SINGLE_PENDING:
call_checkpoint = MAM_St_spawn_pending(wait_completed);
break;
case MALL_SPAWN_ADAPT_POSTPONE:
case MALL_SPAWN_COMPLETED:
call_checkpoint = MAM_St_red_start();
break;
case MALL_DIST_PENDING:
call_checkpoint = MAM_St_red_pending(mam_state, wait_completed);
break;
case MALL_USER_START:
call_checkpoint = MAM_St_user_start(mam_state);
break;
case MALL_USER_PENDING:
call_checkpoint = MAM_St_user_pending(mam_state, wait_completed, user_function, user_args);
break;
case MALL_USER_COMPLETED:
call_checkpoint = MAM_St_user_completed();
break;
case MALL_SPAWN_ADAPT_PENDING:
call_checkpoint = MAM_St_spawn_adapt_pending(wait_completed);
break;
case MALL_SPAWN_ADAPTED:
case MALL_DIST_COMPLETED:
call_checkpoint = MAM_St_completed(mam_state);
break;
}
if(call_checkpoint) { MAM_Checkpoint(mam_state, wait_completed, user_function, user_args); }
if(state > MALL_NOT_STARTED && state < MALL_COMPLETED) *mam_state = MAM_PENDING;
return state;
}
/*
* TODO
*/
void MAM_Resume_redistribution(int *mam_state) {
state = MALL_USER_COMPLETED;
if(mam_state != NULL) *mam_state = MAM_PENDING;
}
/*
* TODO
*/
void MAM_Commit(int *mam_state) {
int zombies = 0;
#if USE_MAL_DEBUG
if(mall->myId == mall->root){ DEBUG_FUNC("Trying to commit", mall->myId, mall->numP); } fflush(stdout);
#endif
// Get times before commiting
if(mall_conf->spawn_method == MALL_SPAWN_BASELINE) {
// This communication is only needed when a root process will become a zombie
malleability_times_broadcast(mall->root_collectives);
}
// Free unneded communicators
if(mall->tmp_comm != MPI_COMM_WORLD && mall->tmp_comm != MPI_COMM_NULL) MPI_Comm_free(&(mall->tmp_comm));
if(*(mall->user_comm) != MPI_COMM_WORLD && *(mall->user_comm) != MPI_COMM_NULL) MPI_Comm_free(mall->user_comm);
// Zombies treatment
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) {
MPI_Allreduce(&mall->zombie, &zombies, 1, MPI_INT, MPI_MAX, mall->comm);
if(zombies) {
zombies_collect_suspended(mall->comm);
}
}
// Zombies KILL
if(mall->zombie) {
#if USE_MAL_DEBUG >= 2
DEBUG_FUNC("Is terminating as zombie", mall->myId, mall->numP); fflush(stdout);
#endif
MAM_Finalize();
MPI_Finalize();
exit(0);
}
// Reset/Free communicators
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) { malleability_comms_update(mall->intercomm); }
if(mall->intercomm != MPI_COMM_NULL && mall->intercomm != MPI_COMM_WORLD) { MPI_Comm_disconnect(&(mall->intercomm)); } //FIXME Error en OpenMPI + Merge
MPI_Comm_rank(mall->comm, &mall->myId);
MPI_Comm_size(mall->comm, &mall->numP);
mall->root = mall_conf->spawn_method == MALL_SPAWN_BASELINE ? mall->root : mall->root_parents;
mall->root_parents = mall->root;
state = MALL_NOT_STARTED;
if(mam_state != NULL) *mam_state = MAM_COMPLETED;
// Set new communicator
if(mall_conf->spawn_method == MALL_SPAWN_BASELINE) { *(mall->user_comm) = MPI_COMM_WORLD; }
else if(mall_conf->spawn_method == MALL_SPAWN_MERGE) { MPI_Comm_dup(mall->comm, mall->user_comm); }
#if USE_MAL_DEBUG
if(mall->myId == mall->root) DEBUG_FUNC("Reconfiguration has been commited", mall->myId, mall->numP); fflush(stdout);
#endif
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->malleability_end = MPI_Wtime();
}
/*
* This function adds data to a data structure based on whether the operation is synchronous or asynchronous,
* and whether the data is replicated or distributed. It takes the following parameters:
* - data: a pointer to the data to be added
* - index: a pointer to a size_t variable where the index of the added data will be stored
* - total_qty: the amount of elements in data
* - type: the MPI datatype of the data
* - is_replicated: a flag indicating whether the data is replicated (MAM_DATA_REPLICATED) or not (MAM_DATA_DISTRIBUTED)
* - is_constant: a flag indicating whether the operation is asynchronous (MAM_DATA_CONSTANT) or synchronous (MAM_DATA_VARIABLE)
* Finally, it updates the index with the index of the last added data if index is not NULL.
*/
void MAM_Data_add(void *data, size_t *index, size_t total_qty, MPI_Datatype type, int is_replicated, int is_constant) {
size_t total_reqs = 0, returned_index;
if(is_constant) { //Async
if(is_replicated) {
total_reqs = 1;
add_data(data, total_qty, type, total_reqs, rep_a_data);
returned_index = rep_a_data->entries-1;
} else {
if(mall_conf->red_method == MALL_RED_BASELINE) {
total_reqs = 1;
} else if(mall_conf->red_method == MALL_RED_POINT || mall_conf->red_method == MALL_RED_RMA_LOCK || mall_conf->red_method == MALL_RED_RMA_LOCKALL) {
total_reqs = mall->numC;
}
add_data(data, total_qty, type, total_reqs, dist_a_data);
returned_index = dist_a_data->entries-1;
}
} else { //Sync
if(is_replicated) {
add_data(data, total_qty, type, total_reqs, rep_s_data);
returned_index = rep_s_data->entries-1;
} else {
add_data(data, total_qty, type, total_reqs, dist_s_data);
returned_index = dist_s_data->entries-1;
}
}
if(index != NULL) *index = returned_index;
}
/*
* This function modifies a data entry to a data structure based on whether the operation is synchronous or asynchronous,
* and whether the data is replicated or distributed. It takes the following parameters:
* - data: a pointer to the data to be added
* - index: a value indicating which entry will be modified
* - total_qty: the amount of elements in data
* - type: the MPI datatype of the data
* - is_replicated: a flag indicating whether the data is replicated (MAM_DATA_REPLICATED) or not (MAM_DATA_DISTRIBUTED)
* - is_constant: a flag indicating whether the operation is asynchronous (MAM_DATA_CONSTANT) or synchronous (MAM_DATA_VARIABLE)
*/
void MAM_Data_modify(void *data, size_t index, size_t total_qty, MPI_Datatype type, int is_replicated, int is_constant) {
size_t total_reqs = 0;
if(is_constant) {
if(is_replicated) {
total_reqs = 1;
modify_data(data, index, total_qty, type, total_reqs, rep_a_data); //FIXME total_reqs==0 ???
} else {
if(mall_conf->red_method == MALL_RED_BASELINE) {
total_reqs = 1;
} else if(mall_conf->red_method == MALL_RED_POINT || mall_conf->red_method == MALL_RED_RMA_LOCK || mall_conf->red_method == MALL_RED_RMA_LOCKALL) {
total_reqs = mall->numC;
}
modify_data(data, index, total_qty, type, total_reqs, dist_a_data);
}
} else {
if(is_replicated) {
modify_data(data, index, total_qty, type, total_reqs, rep_s_data);
} else {
modify_data(data, index, total_qty, type, total_reqs, dist_s_data);
}
}
}
/*
* This functions returns how many data entries are available for one of the specific data structures.
* It takes the following parameters:
* - is_replicated: a flag indicating whether the structure is replicated (MAM_DATA_REPLICATED) or not (MAM_DATA_DISTRIBUTED)
* - is_constant: a flag indicating whether the operation is asynchronous (MAM_DATA_CONSTANT) or synchronous (MAM_DATA_VARIABLE)
* - entries: a pointer where the amount of entries will be stored
*/
void MAM_Data_get_entries(int is_replicated, int is_constant, size_t *entries){
if(is_constant) {
if(is_replicated) {
*entries = rep_a_data->entries;
} else {
*entries = dist_a_data->entries;
}
} else {
if(is_replicated) {
*entries = rep_s_data->entries;
} else {
*entries = dist_s_data->entries;
}
}
}
/*
* This function returns a data entry to a data structure based on whether the operation is synchronous or asynchronous,
* and whether the data is replicated or distributed. It takes the following parameters:
* - index: a value indicating which entry will be modified
* - is_replicated: a flag indicating whether the data is replicated (MAM_DATA_REPLICATED) or not (MAM_DATA_DISTRIBUTED)
* - is_constant: a flag indicating whether the operation is asynchronous (MAM_DATA_CONSTANT) or synchronous (MAM_DATA_VARIABLE)
* - data: a pointer where the data will be stored. The user must free it
* - total_qty: the amount of elements in data for all ranks
* - local_qty: the amount of elements in data for this rank
*/
void MAM_Data_get_pointer(void **data, size_t index, size_t *total_qty, MPI_Datatype *type, int is_replicated, int is_constant) {
malleability_data_t *data_struct;
if(is_constant) {
if(is_replicated) {
data_struct = rep_a_data;
} else {
data_struct = dist_a_data;
}
} else {
if(is_replicated) {
data_struct = rep_s_data;
} else {
data_struct = dist_s_data;
}
}
*data = data_struct->arrays[index];
*total_qty = data_struct->qty[index];
*type = data_struct->types[index];
//get_block_dist(qty, mall->myId, mall->numP, &dist_data); //FIXME Asegurar que numP es correcto
}
/*
* @brief Returns a structure to perform data redistribution during a reconfiguration.
*
* This function is intended to be called when the state of MaM is MALL_USER_PENDING only.
* It is designed to provide the necessary information for the user to perform data redistribution.
*
* Parameters:
* - mam_user_reconf_t *reconf_info: A pointer to a mam_user_reconf_t structure where the function will store the required information for data redistribution.
*
* Return Value:
* - MAM_OK: If the function successfully retrieves the reconfiguration information.
* - MALL_DENIED: If the function is called when the state of the MaM is not MALL_USER_PENDING.
*/
int MAM_Get_Reconf_Info(mam_user_reconf_t *reconf_info) {
if(state != MALL_USER_PENDING) return MALL_DENIED;
*reconf_info = *user_reconf;
return MAM_OK;
}
/*
* @brief Returns the times used for the different steps of last reconfiguration.
*
* This function is intended to be called when a reconfiguration has ended.
* It is designed to provide the necessary information for the user to perform data redistribution.
*
* Parameters:
* - double *sp_time: A pointer where the spawn time will be saved.
* - double *sy_time: A pointer where the sychronous data redistribution time will be saved.
* - double *asy_time: A pointer where the asychronous data redistribution time will be saved.
* - double *mall_time: A pointer where the malleability time will be saved.
*/
void MAM_Retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *mall_time) {
MAM_I_retrieve_times(sp_time, sy_time, asy_time, mall_time);
}
//======================================================||
//================PRIVATE FUNCTIONS=====================||
//================DATA COMMUNICATION====================||
//======================================================||
//======================================================||
/*
* Funcion generalizada para enviar datos desde los hijos.
* La asincronizidad se refiere a si el hilo padre e hijo lo hacen
* de forma bloqueante o no. El padre puede tener varios hilos.
*/
void send_data(int numP_children, malleability_data_t *data_struct, int is_asynchronous) {
size_t i;
void *aux_send, *aux_recv;
if(is_asynchronous) {
for(i=0; i < data_struct->entries; i++) {
aux_send = data_struct->arrays[i];
aux_recv = NULL;
async_communication_start(aux_send, &aux_recv, data_struct->qty[i], data_struct->types[i], mall->numP, numP_children, MALLEABILITY_NOT_CHILDREN,
mall->intercomm, &(data_struct->requests[i]), &(data_struct->request_qty[i]), &(data_struct->windows[i]));
if(aux_recv != NULL) data_struct->arrays[i] = aux_recv;
}
} else {
for(i=0; i < data_struct->entries; i++) {
aux_send = data_struct->arrays[i];
aux_recv = NULL;
sync_communication(aux_send, &aux_recv, data_struct->qty[i], data_struct->types[i], mall->numP, numP_children, MALLEABILITY_NOT_CHILDREN, mall->intercomm);
if(aux_recv != NULL) data_struct->arrays[i] = aux_recv;
}
}
}
/*
* Funcion generalizada para recibir datos desde los hijos.
* La asincronizidad se refiere a si el hilo padre e hijo lo hacen
* de forma bloqueante o no. El padre puede tener varios hilos.
*/
void recv_data(int numP_parents, malleability_data_t *data_struct, int is_asynchronous) {
size_t i;
void *aux, *aux_s = NULL;
if(is_asynchronous) {
for(i=0; i < data_struct->entries; i++) {
aux = data_struct->arrays[i];
async_communication_start(aux_s, &aux, data_struct->qty[i], data_struct->types[i], mall->numP, numP_parents, MALLEABILITY_CHILDREN,
mall->intercomm, &(data_struct->requests[i]), &(data_struct->request_qty[i]), &(data_struct->windows[i]));
data_struct->arrays[i] = aux;
}
} else {
for(i=0; i < data_struct->entries; i++) {
aux = data_struct->arrays[i];
sync_communication(aux_s, &aux, data_struct->qty[i], data_struct->types[i], mall->numP, numP_parents, MALLEABILITY_CHILDREN, mall->intercomm);
data_struct->arrays[i] = aux;
}
}
}
//======================================================||
//================PRIVATE FUNCTIONS=====================||
//====================MAM STAGES========================||
//======================================================||
//======================================================||
//======================================================||
//======================================================||
//======================================================||
//======================================================||
int MAM_St_rms(int *mam_state) {
reset_malleability_times();
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->malleability_start = MPI_Wtime();
*mam_state = MAM_NOT_STARTED;
state = MALL_RMS_COMPLETED;
MAM_Check_configuration();
mall->wait_targets_posted = 0;
//if(CHECK_RMS()) {return MALL_DENIED;}
return 1;
}
int MAM_St_spawn_start() {
mall->num_parents = mall->numP;
state = spawn_step();
//FIXME Esto es necesario pero feo
if(mall_conf->spawn_method == MALL_SPAWN_MERGE && mall->myId >= mall->numC){ mall->zombie = 1; }
else if(mall_conf->spawn_method == MALL_SPAWN_BASELINE){ mall->zombie = 1; }
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPT_POSTPONE){
return 1;
}
return 0;
}
int MAM_St_spawn_pending(int wait_completed) {
state = check_spawn_state(&(mall->intercomm), mall->comm, wait_completed);
if (state == MALL_SPAWN_COMPLETED || state == MALL_SPAWN_ADAPTED) {
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start;
return 1;
}
return 0;
}
int MAM_St_red_start() {
if(MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_INTERCOMM, NULL)) {
mall->root_collectives = mall->myId == mall->root ? MPI_ROOT : MPI_PROC_NULL;
} else {
mall->root_collectives = mall->root;
}
state = start_redistribution();
return 1;
}
int MAM_St_red_pending(int *mam_state, int wait_completed) {
if(MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_PTHREAD, NULL)) {
state = thread_check(wait_completed);
} else {
state = check_redistribution(wait_completed);
}
if(state != MALL_DIST_PENDING) {
state = MALL_USER_START;
return 1;
}
return 0;
}
int MAM_St_user_start(int *mam_state) {
if(MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_INTERCOMM, NULL)) {
MPI_Intercomm_merge(mall->intercomm, MALLEABILITY_NOT_CHILDREN, &mall->tmp_comm); //El que pone 0 va primero
} else {
MPI_Comm_dup(mall->intercomm, &mall->tmp_comm);
}
MPI_Comm_set_name(mall->tmp_comm, "MAM_USER_TMP");
state = MALL_USER_PENDING;
*mam_state = MAM_USER_PENDING;
return 1;
}
int MAM_St_user_pending(int *mam_state, int wait_completed, void (*user_function)(void *), void *user_args) {
#if USE_MAL_DEBUG
if(mall->myId == mall->root) DEBUG_FUNC("Starting USER redistribution", mall->myId, mall->numP); fflush(stdout);
#endif
if(user_function != NULL) {
MAM_I_create_user_struct(MALLEABILITY_NOT_CHILDREN);
do {
user_function(user_args);
} while(wait_completed && state == MALL_USER_PENDING);
} else {
MAM_Resume_redistribution(mam_state);
}
if(state != MALL_USER_PENDING) {
#if USE_MAL_DEBUG
if(mall->myId == mall->root) DEBUG_FUNC("Ended USER redistribution", mall->myId, mall->numP); fflush(stdout);
#endif
return 1;
}
return 0;
}
int MAM_St_user_completed() {
state = end_redistribution();
return 1;
}
int MAM_St_spawn_adapt_pending(int wait_completed) {
wait_completed = 1;
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_start = MPI_Wtime();
unset_spawn_postpone_flag(state);
state = check_spawn_state(&(mall->intercomm), mall->comm, wait_completed);
/* TODO Comentar problema, basicamente indicar que no es posible de la forma actual
* Ademas es solo para una operación que hemos visto como "extremadamente" rápida
if(!MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_PTHREAD, NULL)) {
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->spawn_start;
return 1;
}
return 0;
*/
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->spawn_start;
return 1;
}
int MAM_St_completed(int *mam_state) {
MAM_Commit(mam_state);
return 0;
}
//======================================================||
//================PRIVATE FUNCTIONS=====================||
//=====================CHILDREN=========================||
//======================================================||
//======================================================||
//======================================================||
//======================================================||
//======================================================||
//======================================================||
/*
* Inicializacion de los datos de los hijos.
* En la misma se reciben datos de los padres: La configuracion
* de la ejecucion a realizar; y los datos a recibir de los padres
* ya sea de forma sincrona, asincrona o ambas.
*/
void Children_init(void (*user_function)(void *), void *user_args) {
size_t i;
#if USE_MAL_DEBUG
DEBUG_FUNC("MaM will now initialize spawned processes", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
malleability_connect_children(mall->comm, &(mall->intercomm));
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) { // For Merge Method, these processes will be added
MPI_Comm_rank(mall->intercomm, &mall->myId);
MPI_Comm_size(mall->intercomm, &mall->numP);
}
mall->root_collectives = mall->root_parents;
#if USE_MAL_DEBUG
DEBUG_FUNC("Spawned have completed spawn step", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_CHILDREN);
if(dist_a_data->entries || rep_a_data->entries) { // Recibir datos asincronos
#if USE_MAL_DEBUG >= 2
DEBUG_FUNC("Spawned start asynchronous redistribution", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_PTHREAD, NULL)) {
recv_data(mall->num_parents, dist_a_data, MALLEABILITY_USE_SYNCHRONOUS);
for(i=0; i<rep_a_data->entries; i++) {
MPI_Bcast(rep_a_data->arrays[i], rep_a_data->qty[i], rep_a_data->types[i], mall->root_collectives, mall->intercomm);
}
} else {
recv_data(mall->num_parents, dist_a_data, MALLEABILITY_USE_ASYNCHRONOUS);
for(i=0; i<rep_a_data->entries; i++) {
MPI_Ibcast(rep_a_data->arrays[i], rep_a_data->qty[i], rep_a_data->types[i], mall->root_collectives, mall->intercomm, &(rep_a_data->requests[i][0]));
}
#if USE_MAL_DEBUG >= 2
DEBUG_FUNC("Spawned started asynchronous redistribution", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
for(i=0; i<rep_a_data->entries; i++) {
async_communication_wait(rep_a_data->requests[i], rep_a_data->request_qty[i]);
}
for(i=0; i<dist_a_data->entries; i++) {
async_communication_wait(dist_a_data->requests[i], dist_a_data->request_qty[i]);
}
if(MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_WAIT_TARGETS, NULL)) {
MPI_Ibarrier(mall->intercomm, &mall->wait_targets);
mall->wait_targets_posted = 1;
MPI_Wait(&mall->wait_targets, MPI_STATUS_IGNORE);
}
#if USE_MAL_DEBUG >= 2
DEBUG_FUNC("Spawned waited for all asynchronous redistributions", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
for(i=0; i<dist_a_data->entries; i++) {
async_communication_end(dist_a_data->requests[i], dist_a_data->request_qty[i], &(dist_a_data->windows[i]));
}
for(i=0; i<rep_a_data->entries; i++) {
async_communication_end(rep_a_data->requests[i], rep_a_data->request_qty[i], &(rep_a_data->windows[i]));
}
}
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->async_end= MPI_Wtime(); // Obtener timestamp de cuando termina comm asincrona
}
#if USE_MAL_DEBUG
DEBUG_FUNC("Spawned have completed asynchronous data redistribution step", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
if(MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_INTERCOMM, NULL)) {
MPI_Intercomm_merge(mall->intercomm, MALLEABILITY_CHILDREN, &mall->tmp_comm); //El que pone 0 va primero
} else {
MPI_Comm_dup(mall->intercomm, &mall->tmp_comm);
}
MPI_Comm_set_name(mall->tmp_comm, "MAM_USER_TMP");
if(user_function != NULL) {
state = MALL_USER_PENDING;
MAM_I_create_user_struct(MALLEABILITY_CHILDREN);
user_function(user_args);
}
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_CHILDREN);
if(dist_s_data->entries || rep_s_data->entries) { // Recibir datos sincronos
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
recv_data(mall->num_parents, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);
for(i=0; i<rep_s_data->entries; i++) {
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], rep_s_data->types[i], mall->root_collectives, mall->intercomm);
}
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->sync_end = MPI_Wtime(); // Obtener timestamp de cuando termina comm sincrona
}
#if USE_MAL_DEBUG
DEBUG_FUNC("Targets have completed synchronous data redistribution step", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
MAM_Commit(NULL);
#if USE_MAL_DEBUG
DEBUG_FUNC("MaM has been initialized correctly for new ranks", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
}
//======================================================||
//================PRIVATE FUNCTIONS=====================||
//=====================PARENTS==========================||
//======================================================||
//======================================================||
//======================================================||
//======================================================||
/*
* Se encarga de realizar la creacion de los procesos hijos.
* Si se pide en segundo plano devuelve el estado actual.
*/
int spawn_step(){
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_start = MPI_Wtime();
state = init_spawn(mall->thread_comm, &(mall->intercomm));
if(!MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_PTHREAD, NULL)) {
#if USE_MAL_BARRIERS
MPI_Barrier(mall->comm);
#endif
mall_conf->times->spawn_time = MPI_Wtime() - mall_conf->times->malleability_start;
}
return state;
}
/*
* Comienza la redistribucion de los datos con el nuevo grupo de procesos.
*
* Primero se envia la configuracion a utilizar al nuevo grupo de procesos y a continuacion
* se realiza el envio asincrono y/o sincrono si lo hay.
*
* En caso de que haya comunicacion asincrona, se comienza y se termina la funcion
* indicando que se ha comenzado un envio asincrono.
*
* Si no hay comunicacion asincrono se pasa a realizar la sincrona si la hubiese.
*
* Finalmente se envian datos sobre los resultados a los hijos y se desconectan ambos
* grupos de procesos.
*/
int start_redistribution() {
size_t i;
if(mall->intercomm == MPI_COMM_NULL) {
// Si no tiene comunicador creado, se debe a que se ha pospuesto el Spawn
// y se trata del spawn Merge Shrink
MPI_Comm_dup(mall->comm, &(mall->intercomm));
}
comm_data_info(rep_a_data, dist_a_data, MALLEABILITY_NOT_CHILDREN);
if(dist_a_data->entries || rep_a_data->entries) { // Enviar datos asincronos
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->async_start = MPI_Wtime();
if(MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_PTHREAD, NULL)) {
return thread_creation();
} else {
send_data(mall->numC, dist_a_data, MALLEABILITY_USE_ASYNCHRONOUS);
for(i=0; i<rep_a_data->entries; i++) {
MPI_Ibcast(rep_a_data->arrays[i], rep_a_data->qty[i], rep_a_data->types[i], mall->root_collectives, mall->intercomm, &(rep_a_data->requests[i][0]));
}
if(mall->zombie && MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_WAIT_TARGETS, NULL)) {
MPI_Ibarrier(mall->intercomm, &mall->wait_targets);
mall->wait_targets_posted = 1;
}
return MALL_DIST_PENDING;
}
}
return MALL_USER_START;
}
/*
* Comprueba si la redistribucion asincrona ha terminado.
* Si no ha terminado la funcion termina indicandolo, en caso contrario,
* se continua con la comunicacion sincrona, el envio de resultados y
* se desconectan los grupos de procesos.
*
* Esta funcion permite dos modos de funcionamiento al comprobar si la
* comunicacion asincrona ha terminado.
* Si se utiliza el modo "MAL_USE_NORMAL" o "MAL_USE_POINT", se considera
* terminada cuando los padres terminan de enviar.
* Si se utiliza el modo "MAL_USE_IBARRIER", se considera terminada cuando
* los hijos han terminado de recibir.
* //FIXME Modificar para que se tenga en cuenta rep_a_data
*/
int check_redistribution(int wait_completed) {
int completed, local_completed, all_completed;
size_t i, req_qty;
MPI_Request *req_completed;
MPI_Win window;
local_completed = 1;
#if USE_MAL_DEBUG >= 2
DEBUG_FUNC("Sources are testing for all asynchronous redistributions", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
if(wait_completed) {
if(MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_WAIT_TARGETS, NULL) && !mall->wait_targets_posted) {
MPI_Ibarrier(mall->intercomm, &mall->wait_targets);
mall->wait_targets_posted = 1;
}
for(i=0; i<dist_a_data->entries; i++) {
req_completed = dist_a_data->requests[i];
req_qty = dist_a_data->request_qty[i];
async_communication_wait(req_completed, req_qty);
}
for(i=0; i<rep_a_data->entries; i++) {
req_completed = rep_a_data->requests[i];
req_qty = rep_a_data->request_qty[i];
async_communication_wait(req_completed, req_qty);
}
if(MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_WAIT_TARGETS, NULL)) { MPI_Wait(&mall->wait_targets, MPI_STATUS_IGNORE); }
} else {
if(mall->wait_targets_posted) {
MPI_Test(&mall->wait_targets, &local_completed, MPI_STATUS_IGNORE);
} else {
for(i=0; i<dist_a_data->entries; i++) {
req_completed = dist_a_data->requests[i];
req_qty = dist_a_data->request_qty[i];
completed = async_communication_check(MALLEABILITY_NOT_CHILDREN, req_completed, req_qty);
local_completed = local_completed && completed;
}
for(i=0; i<rep_a_data->entries; i++) {
req_completed = rep_a_data->requests[i];
req_qty = rep_a_data->request_qty[i];
completed = async_communication_check(MALLEABILITY_NOT_CHILDREN, req_completed, req_qty);
local_completed = local_completed && completed;
}
if(local_completed && MAM_Contains_strat(MAM_RED_STRATEGIES, MAM_STRAT_RED_WAIT_TARGETS, NULL)) {
MPI_Ibarrier(mall->intercomm, &mall->wait_targets);
mall->wait_targets_posted = 1;
MPI_Test(&mall->wait_targets, &local_completed, MPI_STATUS_IGNORE); //TODO - Figure out if last process takes profit from calling here
}
}
#if USE_MAL_DEBUG >= 2
DEBUG_FUNC("Sources will now check a global decision", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
MPI_Allreduce(&local_completed, &all_completed, 1, MPI_INT, MPI_MIN, mall->comm);
if(!all_completed) return MALL_DIST_PENDING; // Continue only if asynchronous send has ended
}
#if USE_MAL_DEBUG >= 2
DEBUG_FUNC("Sources sent asynchronous redistributions", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD);
#endif
for(i=0; i<dist_a_data->entries; i++) {
req_completed = dist_a_data->requests[i];
req_qty = dist_a_data->request_qty[i];
window = dist_a_data->windows[i];
async_communication_end(req_completed, req_qty, &window);
}
for(i=0; i<rep_a_data->entries; i++) {
req_completed = rep_a_data->requests[i];
req_qty = rep_a_data->request_qty[i];
window = rep_a_data->windows[i];
async_communication_end(req_completed, req_qty, &window);
}
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) mall_conf->times->async_end = MPI_Wtime(); // Merge method only
return MALL_USER_PENDING;
}
/*
* Termina la redistribución de los datos con los hijos, comprobando
* si se han realizado iteraciones con comunicaciones en segundo plano
* y enviando cuantas iteraciones se han realizado a los hijos.
*
* Además se realizan las comunicaciones síncronas se las hay.
* Finalmente termina enviando los datos temporales a los hijos.
*/
int end_redistribution() {
size_t i;
int local_state;
comm_data_info(rep_s_data, dist_s_data, MALLEABILITY_NOT_CHILDREN);
if(dist_s_data->entries || rep_s_data->entries) { // Enviar datos sincronos
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
mall_conf->times->sync_start = MPI_Wtime();
send_data(mall->numC, dist_s_data, MALLEABILITY_USE_SYNCHRONOUS);
for(i=0; i<rep_s_data->entries; i++) {
MPI_Bcast(rep_s_data->arrays[i], rep_s_data->qty[i], rep_s_data->types[i], mall->root_collectives, mall->intercomm);
}
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) mall_conf->times->sync_end = MPI_Wtime(); // Merge method only
}
local_state = MALL_DIST_COMPLETED;
if(mall_conf->spawn_method == MALL_SPAWN_MERGE && mall->numP > mall->numC) { // Merge Shrink
local_state = MALL_SPAWN_ADAPT_PENDING;
}
return local_state;
}
// TODO MOVER A OTRO LADO??
//======================================================||
//================PRIVATE FUNCTIONS=====================||
//===============COMM PARENTS THREADS===================||
//======================================================||
//======================================================||
int comm_state; //FIXME Usar un handler
/*
* Crea una hebra para ejecutar una comunicación en segundo plano.
*/
int thread_creation() {
comm_state = MALL_DIST_PENDING;
if(pthread_create(&(mall->async_thread), NULL, thread_async_work, NULL)) {
printf("Error al crear el hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -1;
}
return comm_state;
}
/*
* Comprobación por parte de una hebra maestra que indica
* si una hebra esclava ha terminado su comunicación en segundo plano.
*
* El estado de la comunicación es devuelto al finalizar la función.
*/
int thread_check(int wait_completed) {
int all_completed = 0;
if(wait_completed && comm_state == MALL_DIST_PENDING) {
if(pthread_join(mall->async_thread, NULL)) {
printf("Error al esperar al hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -2;
}
}
// Comprueba que todos los hilos han terminado la distribucion (Mismo valor en commAsync)
MPI_Allreduce(&comm_state, &all_completed, 1, MPI_INT, MPI_MAX, mall->comm);
if(all_completed != MALL_DIST_COMPLETED) return MALL_DIST_PENDING; // Continue only if asynchronous send has ended
if(pthread_join(mall->async_thread, NULL)) {
printf("Error al esperar al hilo\n");
MPI_Abort(MPI_COMM_WORLD, -1);
return -2;
}
#if USE_MAL_BARRIERS
MPI_Barrier(mall->intercomm);
#endif
if(mall_conf->spawn_method == MALL_SPAWN_MERGE) mall_conf->times->async_end = MPI_Wtime(); // Merge method only
return MALL_USER_PENDING;
}
/*
* Función ejecutada por una hebra.
* Ejecuta una comunicación síncrona con los hijos que
* para el usuario se puede considerar como en segundo plano.
*
* Cuando termina la comunicación la hebra maestra puede comprobarlo
* por el valor "commAsync".
*/
void* thread_async_work() {
size_t i;
send_data(mall->numC, dist_a_data, MALLEABILITY_USE_SYNCHRONOUS);
for(i=0; i<rep_a_data->entries; i++) {
MPI_Bcast(rep_a_data->arrays[i], rep_a_data->qty[i], rep_a_data->types[i], mall->root_collectives, mall->intercomm);
}
comm_state = MALL_DIST_COMPLETED;
pthread_exit(NULL);
}
//==============================================================================
/*
* Muestra por pantalla el estado actual de todos los comunicadores
*/
void print_comms_state() {
int tester;
char *test = malloc(MPI_MAX_OBJECT_NAME * sizeof(char));
MPI_Comm_get_name(mall->comm, test, &tester);
printf("P%d Comm=%d Name=%s\n", mall->myId, mall->comm, test);
MPI_Comm_get_name(*(mall->user_comm), test, &tester);
printf("P%d Comm=%d Name=%s\n", mall->myId, *(mall->user_comm), test);
if(mall->intercomm != MPI_COMM_NULL) {
MPI_Comm_get_name(mall->intercomm, test, &tester);
printf("P%d Comm=%d Name=%s\n", mall->myId, mall->intercomm, test);
}
free(test);
}
/*
* Función solo necesaria en Merge
*/
void malleability_comms_update(MPI_Comm comm) {
if(mall->thread_comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->thread_comm));
if(mall->comm != MPI_COMM_WORLD) MPI_Comm_free(&(mall->comm));
MPI_Comm_dup(comm, &(mall->thread_comm));
MPI_Comm_dup(comm, &(mall->comm));
MPI_Comm_set_name(mall->thread_comm, "MAM_THREAD");
MPI_Comm_set_name(mall->comm, "MAM_MAIN");
}
/*
* TODO Por hacer
*/
void MAM_I_create_user_struct(int is_children_group) {
user_reconf->comm = mall->tmp_comm;
if(is_children_group) {
user_reconf->rank_state = MAM_PROC_NEW_RANK;
user_reconf->numS = mall->num_parents;
user_reconf->numT = mall->numP;
} else {
user_reconf->numS = mall->numP;
user_reconf->numT = mall->numC;
if(mall->zombie) user_reconf->rank_state = MAM_PROC_ZOMBIE;
else user_reconf->rank_state = MAM_PROC_CONTINUE;
}
}
#ifndef MALLEABILITY_MANAGER_H
#define MALLEABILITY_MANAGER_H
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <mpi.h>
#include "malleabilityStates.h"
typedef struct {
int numS, numT;
int rank_state;
MPI_Comm comm;
} mam_user_reconf_t;
int MAM_Init(int root, MPI_Comm *comm, char *name_exec, void (*user_function)(void *), void *user_args);
void MAM_Finalize();
int MAM_Checkpoint(int *mam_state, int wait_completed, void (*user_function)(void *), void *user_args);
void MAM_Resume_redistribution(int *mam_state);
int MAM_Get_Reconf_Info(mam_user_reconf_t *reconf_info);
void MAM_Data_add(void *data, size_t *index, size_t total_qty, MPI_Datatype type, int is_replicated, int is_constant);
void MAM_Data_modify(void *data, size_t index, size_t total_qty, MPI_Datatype type, int is_replicated, int is_constant);
void MAM_Data_get_entries(int is_replicated, int is_constant, size_t *entries);
void MAM_Data_get_pointer(void **data, size_t index, size_t *total_qty, MPI_Datatype *type, int is_replicated, int is_constant);
void MAM_Retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *mall_time);
#endif
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sched.h>
#include <mpi.h>
#include "malleabilityRMS.h"
#include "malleabilityDataStructures.h"
#if USE_MAL_SLURM
#include <slurm/slurm.h>
int MAM_I_slurm_getenv_hosts_info();
int MAM_I_slurm_getjob_hosts_info();
#endif
int MAM_I_get_hosts_info();
int GetCPUCount();
void MAM_check_hosts() {
int not_filled = 1;
#if USE_MAL_SLURM
not_filled = MAM_I_slurm_getenv_hosts_info();
if(not_filled) {
if(mall->nodelist != NULL) {
free(mall->nodelist);
mall->nodelist = NULL;
}
not_filled = MAM_I_slurm_getjob_hosts_info();
}
#endif
if(not_filled) {
if(mall->nodelist != NULL) {
free(mall->nodelist);
mall->nodelist = NULL;
}
not_filled = MAM_I_get_hosts_info();
}
if(not_filled) {
if(mall->myId == mall->root) printf("MAM FATAL ERROR: It has not been possible to obtain the nodelist\n");
fflush(stdout);
MPI_Abort(mall->comm, -50);
}
#if USE_MAL_DEBUG >= 2
if(mall->myId == mall->root) {
DEBUG_FUNC("Obtained Nodelist", mall->myId, mall->numP);
printf("NODELIST: %s\nNODE_COUNT: %d NUM_CPUS_PER_NODE: %d\n", mall->nodelist, mall->num_nodes, mall->num_cpus);
fflush(stdout);
}
#endif
}
/*
* TODO
* FIXME Does not consider heterogenous machines for num_cpus
* FIXME Always returns 0... -- Perform error checking?
*/
int MAM_I_get_hosts_info() {
int i, j, name_len, max_name_len, unique_count, *unique_hosts;
char *my_host, *all_hosts, *confirmed_host, *tested_host;
all_hosts = NULL;
my_host = (char *) malloc(MPI_MAX_PROCESSOR_NAME * sizeof(char));
MPI_Get_processor_name(my_host, &name_len);
MPI_Allreduce(&name_len, &max_name_len, 1, MPI_INT, MPI_MAX, mall->comm);
my_host[max_name_len] = '\0';
max_name_len++; // Len does not consider terminating character
if(mall->myId == mall->root) {
all_hosts = (char *) malloc(mall->numP * max_name_len * sizeof(char));
unique_hosts = (int *) malloc(mall->numP * sizeof(int));
unique_hosts[0] = 0; //First host will always be unique
unique_count = 1;
}
MPI_Gather(my_host, max_name_len, MPI_CHAR, all_hosts, max_name_len, MPI_CHAR, mall->root, mall->comm);
if(mall->myId == mall->root) {
for (i = 1; i < mall->numP; i++) {
for (j = 0; j < unique_count; j++) {
tested_host = all_hosts + (i * max_name_len);
confirmed_host = all_hosts + (unique_hosts[j] * max_name_len);
if (strcmp(tested_host, confirmed_host) != 0) {
unique_hosts[unique_count] = i;
unique_count++;
break;
}
}
}
mall->num_nodes = unique_count;
mall->num_cpus = GetCPUCount();
mall->nodelist_len = unique_count*max_name_len;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, ""); //FIXME Strcat can be very inneficient...
for (i = 0; i < unique_count; i++) {
confirmed_host = all_hosts + (unique_hosts[i] * max_name_len);
strcat(mall->nodelist, confirmed_host);
if (i < unique_count - 1) {
strcat(mall->nodelist, ",");
}
}
free(all_hosts);
free(unique_hosts);
}
free(my_host);
return 0;
}
/*
* @brief Get the total number of CPUs available to the process.
*
* This function uses sched_getaffinity to obtain the CPU affinity of the current process
* and counts the number of CPUs in the affinity set. It adjusts the loop based on the
* maximum number of CPUs allowed on the system.
*
* @return The total number of CPUs available to the process.
*
* Code obtained from: https://stackoverflow.com/questions/4586405/how-to-get-the-number-of-cpus-in-linux-using-c
* The code has been slightly modified.
*/
int GetCPUCount() {
cpu_set_t cs;
CPU_ZERO(&cs);
sched_getaffinity(0, sizeof(cs), &cs);
int count = 0;
int max_cpus = sysconf(_SC_NPROCESSORS_ONLN);
for (int i = 0; i < max_cpus; i++) {
if (CPU_ISSET(i, &cs)) {
count++;
} else {
break;
}
}
return count;
}
#if USE_MAL_SLURM
/*
* TODO
*/
int MAM_I_slurm_getenv_hosts_info() {
char *tmp = NULL, *tmp_copy, *token;
int cpus, count;
//int i, *cpus_counts, *nodes_counts, *aux;
tmp = getenv("SLURM_JOB_NUM_NODES");
if(tmp == NULL) return 1;
mall->num_nodes = atoi(tmp);
tmp = NULL;
tmp = getenv("SLURM_JOB_NODELIST");
if(tmp == NULL) return 1;
mall->nodelist_len = strlen(tmp)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, tmp);
tmp = NULL;
tmp = getenv("SLURM_JOB_CPUS_PER_NODE");
if(tmp == NULL) return 1;
tmp_copy = (char *) malloc((strlen(tmp)+1) * sizeof(char));
strcpy(tmp_copy, tmp);
token = strtok(tmp_copy, ",");
//TODO When MaM considers heteregenous allocations, these will be needed instead of num_cpus.
//cpus_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//nodes_counts = (int *) malloc(mall->num_nodes * sizeof(int));
//i = 0;
mall->num_cpus = 0;
while (token != NULL) {
count = 1; // The count is not present when is 1 node.
if (sscanf(token, "%d(x%d)", &cpus, &count) >= 1) {
mall->num_cpus = cpus; // num_cpus stores the amount of cores per cpu
//cpus_per_node[i] = cpus;
//nodes_count[i] = count;
//i++;
}
token = strtok(NULL, ",");
}
/*
if(i < mall->num_nodes) {
aux = (int *) realloc(cpus_per_node, i * sizeof(int));
if(cpus_per_node != aux && cpus_per_node != NULL) free(cpus_per_node);
cpus_per_node = aux;
aux = (int *) realloc(nodes_counts, i * sizeof(int));
if(nodes_count != aux && nodes_count != NULL) free(nodes_count);
nodes_count = aux;
}
*/
free(tmp_copy);
return 0;
}
/*
* TODO
* FIXME Does not consider heterogenous machines
*/
int MAM_I_slurm_getjob_hosts_info() {
int jobId, err;
char *tmp = NULL;
job_info_msg_t *j_info;
slurm_job_info_t last_record;
tmp = getenv("SLURM_JOB_ID");
if(tmp == NULL) return 1;
jobId = atoi(tmp);
err = slurm_load_job(&j_info, jobId, 1);
if(err) return err;
last_record = j_info->job_array[j_info->record_count - 1];
mall->num_nodes = last_record.num_nodes;
mall->num_cpus = last_record.num_cpus;
mall->nodelist_len = strlen(last_record.nodes)+1;
mall->nodelist = (char *) malloc(mall->nodelist_len * sizeof(char));
strcpy(mall->nodelist, last_record.nodes);
slurm_free_job_info_msg(j_info);
return 0;
}
#endif
//TODO REFACTOR PARA CUANDO SE COMUNIQUE CON RMS
// Get Slurm job info
//int jobId;
//char *tmp;
//job_info_msg_t *j_info;
//slurm_job_info_t last_record;
//tmp = getenv("SLURM_JOB_ID");
//jobId = atoi(tmp);
//slurm_load_job(&j_info, jobId, 1);
//last_record = j_info->job_array[j_info->record_count - 1];
// Free JOB INFO
//slurm_free_job_info_msg(j_info);
#ifndef MALLEABILITY_RMS_H
#define MALLEABILITY_RMS_H
void MAM_check_hosts();
#endif
#ifndef MALLEABILITY_STATES_H
#define MALLEABILITY_STATES_H
#include <stdio.h>
#include <stdlib.h>
//States
#define MALL_DENIED -1
#define MAM_OK 0
enum mall_inner_states{MALL_UNRESERVED, MALL_NOT_STARTED, MALL_RMS_COMPLETED, MALL_SPAWN_PENDING, MALL_SPAWN_SINGLE_PENDING,
MALL_SPAWN_SINGLE_COMPLETED, MALL_SPAWN_ADAPT_POSTPONE, MALL_SPAWN_COMPLETED, MALL_DIST_PENDING, MALL_DIST_COMPLETED,
MALL_SPAWN_ADAPT_PENDING, MALL_USER_START, MALL_USER_PENDING, MALL_USER_COMPLETED, MALL_SPAWN_ADAPTED, MALL_COMPLETED};
enum mam_states{MAM_UNRESERVED, MAM_NOT_STARTED, MAM_PENDING, MAM_USER_PENDING, MAM_COMPLETED};
enum mam_proc_states{MAM_PROC_CONTINUE, MAM_PROC_NEW_RANK, MAM_PROC_ZOMBIE};
enum mall_spawn_methods{MALL_SPAWN_BASELINE, MALL_SPAWN_MERGE, MAM_METHODS_SPAWN_LEN};
enum mam_spawn_strategies{MAM_STRAT_SPAWN_CLEAR, MAM_STRAT_SPAWN_PTHREAD, MAM_STRAT_SPAWN_SINGLE, MAM_STRAT_SPAWN_INTERCOMM, MAM_STRATS_SPAWN_LEN};
enum mam_phy_dist_methods{MALL_DIST_SPREAD = 1, MALL_DIST_COMPACT, MAM_METHODS_PHYSICAL_DISTRIBUTION_LEN}; //FIXME Cambiar nombres a PHY_DIST?
enum mam_phy_info_methods{MALL_DIST_STRING = 1, MALL_DIST_HOSTFILE}; //FIXME Cambiar nombres a PHY_DIST?
enum mall_redistribution_methods{MALL_RED_BASELINE, MALL_RED_POINT, MALL_RED_RMA_LOCK, MALL_RED_RMA_LOCKALL, MAM_METHODS_RED_LEN};
enum mam_red_strategies{MAM_STRAT_RED_CLEAR, MAM_STRAT_RED_PTHREAD, MAM_STRAT_RED_WAIT_SOURCES, MAM_STRAT_RED_WAIT_TARGETS, MAM_STRATS_RED_LEN};
/* KEYS & VALUES for config*/
enum mam_key_values{MAM_SPAWN_METHOD=0, MAM_SPAWN_STRATEGIES, MAM_PHYSICAL_DISTRIBUTION, MAM_RED_METHOD, MAM_RED_STRATEGIES, MAM_NUM_TARGETS, MAM_KEY_COUNT};
#define MAM_SPAWN_METHOD_ENV "MAM_SPAWN_METHOD"
#define MAM_SPAWN_STRATS_ENV "MAM_SPAWN_STRATS"
#define MAM_PHYSICAL_DISTRIBUTION_METHOD_ENV "MAM_PHYSICAL_DISTRIBUTION_METHOD"
#define MAM_RED_METHOD_ENV "MAM_RED_METHOD"
#define MAM_RED_STRATS_ENV "MAM_RED_STRATS"
#define MAM_NUM_TARGETS_ENV "MAM_NUM_TARGETS"
#define MALLEABILITY_ROOT 0
#define MAL_APP_EXECUTING 0
#define MAL_APP_ENDED 1
#define MAM_CHECK_COMPLETION 0
#define MAM_WAIT_COMPLETION 1
#define MALLEABILITY_CHILDREN 1
#define MALLEABILITY_NOT_CHILDREN 0
#define MAM_DATA_DISTRIBUTED 0
#define MAM_DATA_REPLICATED 1
#define MAM_DATA_VARIABLE 0
#define MAM_DATA_CONSTANT 1
#endif
#include "malleabilityTimes.h"
#include "malleabilityDataStructures.h"
void def_malleability_times(MPI_Datatype *new_type);
void init_malleability_times() {
#if USE_MAL_DEBUG
DEBUG_FUNC("Initializing recording structure", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(mall->comm);
#endif
mall_conf->times = (malleability_times_t *) malloc(sizeof(malleability_times_t));
if(mall_conf->times == NULL) {
perror("Error al crear la estructura de tiempos interna para maleabilidad\n");
MPI_Abort(MPI_COMM_WORLD, -5);
}
reset_malleability_times();
def_malleability_times(&mall_conf->times->times_type);
#if USE_MAL_DEBUG
DEBUG_FUNC("Initialized recording structure", mall->myId, mall->numP); fflush(stdout); MPI_Barrier(mall->comm);
#endif
}
void reset_malleability_times() {
malleability_times_t *times = mall_conf->times;
times->spawn_start = 0; times->sync_start = 0; times->async_start = 0; times->malleability_start = 0;
times->sync_end = 0; times->async_end = 0; times->malleability_end = 0;
times->spawn_time = 0;
}
void free_malleability_times() {
#if USE_MAL_DEBUG
DEBUG_FUNC("Freeing recording structure", mall->myId, mall->numP); fflush(stdout);
#endif
if(mall_conf->times != NULL) {
if(mall_conf->times->times_type != MPI_DATATYPE_NULL) {
MPI_Type_free(&mall_conf->times->times_type);
mall_conf->times->times_type = MPI_DATATYPE_NULL;
}
free(mall_conf->times);
}
#if USE_MAL_DEBUG
DEBUG_FUNC("Freed recording structure", mall->myId, mall->numP); fflush(stdout);
#endif
}
void malleability_times_broadcast(int root) {
MPI_Bcast(mall_conf->times, 1, mall_conf->times->times_type, root, mall->intercomm);
}
void MAM_I_retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *mall_time) {
malleability_times_t *times = mall_conf->times;
*sp_time = times->spawn_time;
*sy_time = times->sync_end - times->sync_start;
*asy_time = times->async_end - times->async_start;
*mall_time = times->malleability_end - times->malleability_start;
}
void def_malleability_times(MPI_Datatype *new_type) {
int i, counts = 4;
int blocklengths[counts];
MPI_Aint displs[counts], dir;
MPI_Datatype types[counts];
blocklengths[0] = blocklengths[1] = blocklengths[2] = blocklengths[3] = 1;
types[0] = types[1] = types[2] = types[3] = MPI_DOUBLE;
// Se pasa el vector a traves de la direccion de "mall_conf"
// Rellenar vector displs
MPI_Get_address(mall_conf->times, &dir);
// Obtener direccion base
MPI_Get_address(&(mall_conf->times->spawn_time), &displs[0]);
MPI_Get_address(&(mall_conf->times->sync_start), &displs[1]);
MPI_Get_address(&(mall_conf->times->async_start), &displs[2]);
MPI_Get_address(&(mall_conf->times->malleability_start), &displs[3]);
for(i=0;i<counts;i++) displs[i] -= dir;
MPI_Type_create_struct(counts, blocklengths, displs, types, new_type);
MPI_Type_commit(new_type);
}
#ifndef MALLEABILITY_TIMES_H
#define MALLEABILITY_TIMES_H
#include <mpi.h>
void init_malleability_times();
void reset_malleability_times();
void free_malleability_times();
void malleability_times_broadcast(int root);
void MAM_I_retrieve_times(double *sp_time, double *sy_time, double *asy_time, double *mall_time);
#endif
#include "malleabilityTypes.h"
#include "malleabilityDataStructures.h"
#include "MAM_Configuration.h"
void init_malleability_data_struct(malleability_data_t *data_struct, size_t size);
void realloc_malleability_data_struct(malleability_data_t *data_struct, size_t qty_to_add);
void def_malleability_entries(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type);
void def_malleability_qty_type(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type);
//======================================================||
//======================================================||
//===================PUBLIC FUNCTIONS===================||
//======================================================||
//======================================================||
/*
* Anyade en la estructura de datos a comunicar con los hijos
* un nuevo set de datos de un total "total_qty" distribuido entre
* todos los padres. La nueva serie "data" solo representa los datos
* que tiene este padre.
*/
void add_data(void *data, size_t total_qty, MPI_Datatype type, size_t request_qty, malleability_data_t *data_struct) {
size_t i;
if(data_struct->entries == 0) {
init_malleability_data_struct(data_struct, MALLEABILITY_INIT_DATA_QTY);
} else if(data_struct->entries == data_struct->max_entries) {
realloc_malleability_data_struct(data_struct, MALLEABILITY_INIT_DATA_QTY);
}
data_struct->qty[data_struct->entries] = total_qty;
data_struct->types[data_struct->entries] = type;
data_struct->arrays[data_struct->entries] = data;
data_struct->request_qty[data_struct->entries] = request_qty;
if(request_qty) {
data_struct->requests[data_struct->entries] = (MPI_Request *) malloc(request_qty * sizeof(MPI_Request));
for(i=0; i < request_qty; i++) {
data_struct->requests[data_struct->entries][i] = MPI_REQUEST_NULL;
}
}
data_struct->entries+=1;
}
/*
* Modifica en la estructura de datos a comunicar con los hijos
* un set de datos de un total "total_qty" distribuido entre
* todos los padres. La nueva serie "data" solo representa los datos
* que tiene este padre.
*/
void modify_data(void *data, size_t index, size_t total_qty, MPI_Datatype type, size_t request_qty, malleability_data_t *data_struct) {
size_t i;
if(data_struct->entries < index) { // Index does not exist
return;
}
if(data_struct->requests[index] != NULL) {
//free(data_struct->requests[index]); TODO Error when trying to free
data_struct->requests[index] = NULL;
}
data_struct->qty[index] = total_qty;
data_struct->types[index] = type;
data_struct->arrays[index] = data;
data_struct->request_qty[index] = request_qty;
if(request_qty) {
data_struct->requests[index] = (MPI_Request *) malloc(request_qty * sizeof(MPI_Request));
for(i=0; i < request_qty; i++) {
data_struct->requests[index][i] = MPI_REQUEST_NULL;
}
}
}
/*
* Comunicar desde los padres a los hijos las estructuras de datos sincronas o asincronas
* No es necesario que las estructuras esten inicializadas para un buen funcionamiento.
*
* En el argumento "root" todos tienen que indicar quien es el proceso raiz de los padres
* unicamente.
*/
void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, int is_children_group) {
int type_size;
size_t i, j;
MPI_Datatype entries_type, struct_type;
// Mandar primero numero de entradas
def_malleability_entries(data_struct_dist, data_struct_rep, &entries_type);
MPI_Bcast(MPI_BOTTOM, 1, entries_type, mall->root_collectives, mall->intercomm);
if(is_children_group && ( data_struct_rep->entries != 0 || data_struct_dist->entries != 0 )) {
init_malleability_data_struct(data_struct_rep, data_struct_rep->entries);
init_malleability_data_struct(data_struct_dist, data_struct_dist->entries);
}
def_malleability_qty_type(data_struct_dist, data_struct_rep, &struct_type);
MPI_Bcast(MPI_BOTTOM, 1, struct_type, mall->root_collectives, mall->intercomm);
if(is_children_group) {
for(i=0; i < data_struct_rep->entries; i++) {
MPI_Type_size(data_struct_rep->types[i], &type_size);
data_struct_rep->arrays[i] = (void *) malloc(data_struct_rep->qty[i] * type_size);
data_struct_rep->requests[i] = (MPI_Request *) malloc(data_struct_rep->request_qty[i] * sizeof(MPI_Request));
for(j=0; j < data_struct_rep->request_qty[i]; j++) {
data_struct_rep->requests[i][j] = MPI_REQUEST_NULL;
}
}
for(i=0; i < data_struct_dist->entries; i++) {
data_struct_dist->arrays[i] = (void *) NULL; // TODO Se podria inicializar aqui?
data_struct_dist->requests[i] = (MPI_Request *) malloc(data_struct_dist->request_qty[i] * sizeof(MPI_Request));
for(j=0; j < data_struct_dist->request_qty[i]; j++) {
data_struct_dist->requests[i][j] = MPI_REQUEST_NULL;
}
}
}
MPI_Type_free(&entries_type);
MPI_Type_free(&struct_type);
}
//======================================================||
//======================================================||
//=========INIT/REALLOC/FREE RESULTS FUNCTIONS==========||
//======================================================||
//======================================================||
/*
* Inicializa la estructura que describe una serie de datos con las mismas
* caracteristicas de localización y uso. Se inicializa para utilizar hasta
* "size" elementos.
*/
void init_malleability_data_struct(malleability_data_t *data_struct, size_t size) {
size_t i;
data_struct->max_entries = size;
data_struct->qty = (size_t *) malloc(size * sizeof(size_t));
data_struct->types = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype));
data_struct->request_qty = (size_t *) malloc(size * sizeof(size_t));
data_struct->requests = (MPI_Request **) malloc(size * sizeof(MPI_Request *));
data_struct->windows = (MPI_Win *) malloc(size * sizeof(MPI_Win));
data_struct->arrays = (void **) malloc(size * sizeof(void *));
for(i=0; i<size; i++) { //calloc and memset does not ensure a NULL value
data_struct->requests[i] = NULL;
data_struct->arrays[i] = NULL;
}
}
/*
* Realoja la estructura que describe una serie de datos con las mismas
* caracteristicas de localización y uso. Se anyaden "size" entradas nuevas
* a las ya existentes.
*/
void realloc_malleability_data_struct(malleability_data_t *data_struct, size_t qty_to_add) {
size_t i, needed, *qty_aux, *request_qty_aux;
MPI_Datatype *types_aux;
MPI_Win *windows_aux;
MPI_Request **requests_aux;
void **arrays_aux;
needed = data_struct->max_entries + qty_to_add;
qty_aux = (size_t *) realloc(data_struct->qty, needed * sizeof(int));
types_aux = (MPI_Datatype *) realloc(data_struct->types, needed * sizeof(MPI_Datatype));
request_qty_aux = (size_t *) realloc(data_struct->request_qty, needed * sizeof(int));
requests_aux = (MPI_Request **) realloc(data_struct->requests, needed * sizeof(MPI_Request *));
windows_aux = (MPI_Win *) realloc(data_struct->windows, needed * sizeof(MPI_Win));
arrays_aux = (void **) realloc(data_struct->arrays, needed * sizeof(void *));
if(qty_aux == NULL || arrays_aux == NULL || requests_aux == NULL || types_aux == NULL || request_qty_aux == NULL || windows_aux == NULL) {
fprintf(stderr, "Fatal error - No se ha podido realojar la memoria constante de datos a redistribuir/comunicar\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
for(i=data_struct->max_entries; i<needed; i++) { //realloc does not ensure a NULL value
requests_aux[i] = NULL;
arrays_aux[i] = NULL;
}
// Check if old array can be freed
if(data_struct->qty != qty_aux && data_struct->qty != NULL) free(data_struct->qty);
if(data_struct->types != types_aux && data_struct->types != NULL) free(data_struct->types);
if(data_struct->request_qty != request_qty_aux && data_struct->request_qty != NULL) free(data_struct->request_qty);
if(data_struct->requests != requests_aux && data_struct->requests != NULL) free(data_struct->requests);
if(data_struct->windows != windows_aux && data_struct->windows != NULL) free(data_struct->windows);
if(data_struct->arrays != arrays_aux && data_struct->arrays != NULL) free(data_struct->arrays);
data_struct->qty = qty_aux;
data_struct->types = types_aux;
data_struct->request_qty = request_qty_aux;
data_struct->requests = requests_aux;
data_struct->windows = windows_aux;
data_struct->arrays = arrays_aux;
data_struct->max_entries = needed;
}
void free_malleability_data_struct(malleability_data_t *data_struct) {
size_t i, j, max;
max = data_struct->entries;
if(max != 0) {
if(data_struct->qty != NULL) {
free(data_struct->qty);
}
if(data_struct->types != NULL) {
free(data_struct->types);
}
if(data_struct->requests != NULL && data_struct->request_qty != NULL) {
for(i=0; i<max; i++) {
if(data_struct->requests[i] != NULL) {
for(j=0; j<data_struct->request_qty[i]; j++) {
if(data_struct->requests[i][j] != MPI_REQUEST_NULL) {
MPI_Request_free(&(data_struct->requests[i][j]));
data_struct->requests[i][j] = MPI_REQUEST_NULL;
}
}
free(data_struct->requests[i]);
}
}
free(data_struct->request_qty);
free(data_struct->requests);
}
if(data_struct->windows != NULL) {
free(data_struct->windows);
}
if(data_struct->arrays != NULL) {
free(data_struct->arrays);
}
}
}
//======================================================||
//======================================================||
//================MPI DERIVED DATATYPES=================||
//======================================================||
//======================================================||
/*
* Crea un tipo derivado para mandar el numero de entradas
* en dos estructuras de descripcion de datos.
*/
void def_malleability_entries(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type) {
int counts = 2;
int blocklengths[counts];
MPI_Aint displs[counts];
MPI_Datatype types[counts], type_size_t;
MPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(size_t), &type_size_t);
blocklengths[0] = blocklengths[1] = 1;
types[0] = types[1] = type_size_t;
// Obtener direccion base
MPI_Get_address(&(data_struct_rep->entries), &displs[0]);
MPI_Get_address(&(data_struct_dist->entries), &displs[1]);
MPI_Type_create_struct(counts, blocklengths, displs, types, new_type);
MPI_Type_commit(new_type);
}
/*
* Crea un tipo derivado para mandar las cantidades y tipo
* de datos de dos estructuras de descripcion de datos.
* El vector de "requests" no es enviado ya que solo es necesario
* en los padres.
* TODO Refactor?
*/
void def_malleability_qty_type(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, MPI_Datatype *new_type) {
int counts = 6;
int blocklengths[counts];
MPI_Aint displs[counts];
MPI_Datatype types[counts], type_size_t;
MPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(size_t), &type_size_t);
types[0] = types[1] = types[3] = types[4] = type_size_t;
types[2] = types[5] = MPI_INT;
blocklengths[0] = blocklengths[1] = blocklengths[2] = data_struct_rep->entries;
blocklengths[3] = blocklengths[4] = blocklengths[5] = data_struct_dist->entries;
MPI_Get_address((data_struct_rep->qty), &displs[0]);
MPI_Get_address((data_struct_rep->request_qty), &displs[1]);
MPI_Get_address((data_struct_rep->types), &displs[2]); // MPI_Datatype uses typedef int to be declared
MPI_Get_address((data_struct_dist->qty), &displs[3]);
MPI_Get_address((data_struct_dist->request_qty), &displs[4]);
MPI_Get_address((data_struct_dist->types), &displs[5]); // MPI_Datatype uses typedef int to be declared
MPI_Type_create_struct(counts, blocklengths, displs, types, new_type);
MPI_Type_commit(new_type);
}
#ifndef MALLEABILITY_TYPES_H
#define MALLEABILITY_TYPES_H
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <fcntl.h>
#include <sys/stat.h>
#include "malleabilityStates.h"
#define MALLEABILITY_INIT_DATA_QTY 100
typedef struct {
size_t entries; // Indica numero de vectores a comunicar (replicated data)
size_t max_entries;
size_t *qty; // Indica numero de elementos en cada subvector de sync_array
MPI_Datatype *types;
// Vector de vectores de request. En cada elemento superior se indican los requests a comprobar para dar por finalizada
// la comunicacion de ese dato
size_t *request_qty;
MPI_Request **requests;
MPI_Win *windows;
void **arrays; // Cada subvector es una serie de datos a comunicar
} malleability_data_t;
void add_data(void *data, size_t total_qty, MPI_Datatype type, size_t request_qty, malleability_data_t *data_struct);
void modify_data(void *data, size_t index, size_t total_qty, MPI_Datatype type, size_t request_qty, malleability_data_t *data_struct);
void comm_data_info(malleability_data_t *data_struct_rep, malleability_data_t *data_struct_dist, int is_children_group);
void free_malleability_data_struct(malleability_data_t *data_struct);
#endif
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment