Commit 1cd76849 authored by iker_martin's avatar iker_martin
Browse files

Modified code to allow the usage of MaM. IMKL is no longer required for MaM requirements.

parent 9135f28c
This diff is collapsed.
......@@ -29,6 +29,35 @@ void CreateSparseMatrix (ptr_SparseMatrix p_spr, int index, int numR, int numC,
CreateDoubles (&(p_spr->vval), numE+(numR+1)*msr);
}
// This routine creates the first part of a sparseMatrix from the next parameters
// * numR defines the number of rows
// * numC defines the number of columns
// * msr indicates if the MSR is the format used to the sparse matrix
// If msr is actived, numE doesn't include the diagonal elements
// The parameter index indicates if 0-indexing or 1-indexing is used.
void CreateSparseMatrixVptr (ptr_SparseMatrix spr, int numR, int numC,
int msr)
{
spr->dim1 = numR; spr->dim2 = numC;
CreateInts (&(spr->vptr), numR+1);
*(spr->vptr) = ((msr)? (numR+1): 0);
}
// This routine creates the second part of a sparseMatrix from the next parameters
// * numR defines the number of rows
// * numC defines the number of columns
// * numE defines the number of nonzero elements
// * msr indicates if the MSR is the format used to the sparse matrix
// If msr is actived, numE doesn't include the diagonal elements
// The parameter index indicates if 0-indexing or 1-indexing is used.
void CreateSparseMatrixValues (ptr_SparseMatrix spr, int numR, int numC, int numE,
int msr)
{
CreateInts (&(spr->vpos), numE+(numR+1)*msr);
CreateDoubles (&(spr->vval), numE+(numR+1)*msr);
}
// This routine liberates the memory related to matrix spr
void RemoveSparseMatrix (ptr_SparseMatrix spr) {
// First the scalar are initiated
......@@ -37,6 +66,16 @@ void RemoveSparseMatrix (ptr_SparseMatrix spr) {
RemoveInts (&(spr->vptr)); RemoveDoubles (&(spr->vval));
}
// This routine liberates the memory related to matrix spr when
// vptr and vpos have been allocated separetely
void RemoveSparseMatrix2 (ptr_SparseMatrix spr)
{
spr->dim1 = -1; spr->dim2 = -1;
RemoveInts (&(spr->vptr));
RemoveInts (&(spr->vpos));
RemoveDoubles (&(spr->vval));
}
/*********************************************************************************/
// This routine creates de sparse matrix dst from the symmetric matrix spr.
......
......@@ -22,8 +22,14 @@ typedef struct
extern void CreateSparseMatrix (ptr_SparseMatrix p_spr, int index, int numR, int numC, int numE,
int msr);
extern void CreateSparseMatrixVptr (ptr_SparseMatrix spr, int numR, int numC,
int msr);
extern void CreateSparseMatrixValues (ptr_SparseMatrix spr, int numR, int numC, int numE,
int msr);
// This routine liberates the memory related to matrix spr
extern void RemoveSparseMatrix (ptr_SparseMatrix spr);
extern void RemoveSparseMatrix2 (ptr_SparseMatrix spr);
/*********************************************************************************/
......
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "ToolsMAM.h"
struct Dist_data {
int ini;
int fin;
int tamBl; // Numero de filas
int n;
int myId;
int numP;
int numP_parents;
MPI_Comm comm;
};
//----------------------------------------------------------------------------------------------------
void get_dist(int total_r, int id, int numP, struct Dist_data *dist_data);
void prepare_redist_counts(int *counts, int *displs, int numP_other, int offset, struct Dist_data dist_data, int *vptr);
void prepare_redist_counts_vlen(int *counts, int *displs, int numP_other, int offset, struct Dist_data dist_data);
void set_counts(int id, int numP, struct Dist_data data_dist, int offset, int *sendcounts);
void getIds_intercomm(struct Dist_data dist_data, int numP_other, int *idS);
//----------------------------------------------------------------------------------------------------
void print_counts2(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, int include_zero, const char* name);
void print_global_results(double start_time);
//----------------------------------------------------------------------------------------------------
/*
* Funcion encargada de realizar la redistribucion de datos
* asíncrona del usuario.
*
* Calcula el total de elementos a enviar/recibir por cada proceso
* y tras ello llama a la funcion Ialltoallv dos veces.
*
* Además inicializa la memoria para aquellos procesos que vayan
* a recibir datos.
*/
void targets_distribution(mam_user_reconf_t user_reconf, user_redist_t *user_data) {
int i, n, offset, elems, numP, *vlen, *rank_states;
int *scounts, *rcounts, *sdispls, *rdispls;
size_t total_qty;
void *value = NULL;
struct Dist_data dist_data;
MPI_Datatype type;
int aux_int;
int *recv_vpos = &aux_int;
double aux_double;
double *recv_vval = &aux_double;
MPI_Comm_size(user_reconf.comm, &numP);
scounts = (int *) calloc(numP, sizeof(int));
sdispls = (int *) calloc(numP, sizeof(int));
rcounts = (int *) calloc(numP, sizeof(int));
rdispls = (int *) calloc(numP, sizeof(int));
offset = 0;
rank_states = (int *) malloc(numP * sizeof(int));
MPI_Allgather(&user_reconf.rank_state, 1, MPI_INT, rank_states, 1, MPI_INT, user_reconf.comm);
MAM_Data_get_pointer(&value, 0, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
vlen = ((int *)value);
n = (int) total_qty;
if(user_reconf.rank_state != MAM_PROC_ZOMBIE) {
MPI_Comm_rank(user_data->comm, &dist_data.myId);
dist_data.numP = user_reconf.numT;
if(user_reconf.rank_state == MAM_PROC_NEW_RANK) {
user_data->array_vpos = &aux_int;
user_data->array_vval = &aux_double;
for(i=0; i<user_reconf.numS; i++) {
if(rank_states[i] == MAM_PROC_CONTINUE) {
dist_data.myId += user_reconf.numS;
break;
}
}
}
//get_dist(n, dist_data.myId, dist_data.numP, &dist_data);
//CreateSparseMatrixVptr(&user_data->other_subm, dist_data.tamBl, n, 0);
user_data->other_subm.vptr[0] = 0;
for(i=0; i<dist_data.tamBl; i++) {
user_data->other_subm.vptr[i+1] = vlen[i];
}
//TransformLengthtoHeader(user_data->other_subm.vptr, user_data->other_subm.dim1); // The array is converted from vlen to vptr
elems = user_data->other_subm.vptr[dist_data.tamBl];
//CreateSparseMatrixValues(&user_data->other_subm, dist_data.tamBl, n, elems, 0);
recv_vpos = user_data->other_subm.vpos;
recv_vval = user_data->other_subm.vval;
prepare_redist_counts(rcounts, rdispls, user_reconf.numS, offset, dist_data, user_data->other_subm.vptr);
}
if(user_reconf.rank_state != MAM_PROC_NEW_RANK) {
MPI_Comm_rank(user_data->comm, &dist_data.myId);
dist_data.numP = user_reconf.numS;
//get_dist(n, dist_data.myId, dist_data.numP, &dist_data);
offset = (user_reconf.numS + user_reconf.numT) == numP ?
user_reconf.numS : 0;
//prepare_redist_counts(scounts, sdispls, user_reconf.numT, offset, dist_data, user_data->array_vptr);
}
// COMUNICACION DE DATOS //
MPI_Ialltoallv(user_data->array_vpos, scounts, sdispls, MPI_INT, recv_vpos, rcounts, rdispls, MPI_INT, user_reconf.comm, &user_data->reqs[0]);
MPI_Ialltoallv(user_data->array_vval, scounts, sdispls, MPI_DOUBLE, recv_vval, rcounts, rdispls, MPI_DOUBLE, user_reconf.comm, &user_data->reqs[1]);
free(rank_states);
free(scounts); free(sdispls); free(rcounts); free(rdispls);
}
/*
* ========================================================================================
* ========================================================================================
* ================================DISTRIBUTION FUNCTIONS==================================
* ========================================================================================
* ========================================================================================
*/
/*
* Obtiene para el Id que se pasa junto a su
* numero de procesos total, con cuantas filas (tamBl),
* elementos por fila, y total de filas (fin - ini)
* con las que va a trabajar el proceso
*/
void get_dist(int total_r, int id, int numP, struct Dist_data *dist_data) {
int rem;
dist_data->n = total_r;
dist_data->tamBl = total_r / numP;
rem = total_r % numP;
if(id < rem) { // First subgroup
dist_data->ini = id * dist_data->tamBl + id;
dist_data->fin = (id+1) * dist_data->tamBl + (id+1);
} else { // Second subgroup
dist_data->ini = id * dist_data->tamBl + rem;
dist_data->fin = (id+1) * dist_data->tamBl + rem;
}
if(dist_data->fin > total_r) {
dist_data->fin = total_r;
}
if(dist_data->ini > dist_data->fin) {
dist_data->ini = dist_data->fin;
}
dist_data->tamBl = dist_data->fin - dist_data->ini;
}
void prepare_redist_counts(int *counts, int *displs, int numP_other, int offset, struct Dist_data dist_data, int *vptr) {
int idS[2], i, idS_zero;
int last_index, first_index;
getIds_intercomm(dist_data, numP_other, idS);
idS[0] += offset;
idS[1] += offset;
idS_zero = 0;
if(!idS[0]) {
set_counts(0, numP_other, dist_data, offset, counts);
idS_zero = 1;
}
for(i=idS[0] + idS_zero; i<idS[1]; i++) {
set_counts(i, numP_other, dist_data, offset, counts);
displs[i] = displs[i-1] + counts[i-1];
}
if(!idS[0]) {
last_index = counts[0];
first_index = 0;
counts[0] = vptr[last_index] - vptr[first_index];
}
for(i=idS[0] + idS_zero; i<idS[1]; i++) {
last_index = displs[i] + counts[i];
first_index = displs[i];
counts[i] = vptr[last_index] - vptr[first_index];
displs[i] = displs[i-1] + counts[i-1];
}
}
void prepare_redist_counts_vlen(int *counts, int *displs, int numP_other, int offset, struct Dist_data dist_data) {
int idS[2], i, idS_zero;
getIds_intercomm(dist_data, numP_other, idS);
idS[0] += offset;
idS[1] += offset;
idS_zero = 0;
if(!idS[0]) {
set_counts(0, numP_other, dist_data, offset, counts);
idS_zero = 1;
}
for(i=idS[0] + idS_zero; i<idS[1]; i++) {
set_counts(i, numP_other, dist_data, offset, counts);
displs[i] = displs[i-1] + counts[i-1];
}
}
/*
* Obtiene para un Id de proceso, cuantos elementos va
* a enviar/recibir el proceso myId
*/
void set_counts(int id, int numP, struct Dist_data data_dist, int offset, int *sendcounts) {
struct Dist_data other;
int biggest_ini, smallest_end;
get_dist(data_dist.n, id-offset, numP, &other);
// Si el rango de valores no coincide, se pasa al siguiente proceso
if(data_dist.ini >= other.fin || data_dist.fin <= other.ini) {
return;
}
// Obtiene el proceso con mayor ini entre los dos procesos
biggest_ini = (data_dist.ini > other.ini) ? data_dist.ini : other.ini;
// Obtiene el proceso con menor fin entre los dos procesos
smallest_end = (data_dist.fin < other.fin) ? data_dist.fin : other.fin;
sendcounts[id] = smallest_end - biggest_ini; // Numero de elementos a enviar/recibir del proceso Id
}
/*
* Obtiene para un proceso de un grupo a que rango procesos de
* otro grupo tiene que enviar o recibir datos.
*
* Devuelve el primer identificador y el último (Excluido) con el que
* comunicarse.
*/
void getIds_intercomm(struct Dist_data dist_data, int numP_other, int *idS) {
int idI, idE;
int tamOther = dist_data.n / numP_other;
int remOther = dist_data.n % numP_other;
int middle = (tamOther + 1) * remOther;
if(middle > dist_data.ini) { // First subgroup
idI = dist_data.ini / (tamOther + 1);
} else { // Second subgroup
idI = ((dist_data.ini - middle) / tamOther) + remOther;
}
if(middle >= dist_data.fin) { // First subgroup
idE = dist_data.fin / (tamOther + 1);
idE = (dist_data.fin % (tamOther + 1) > 0 && idE+1 <= numP_other) ? idE+1 : idE;
} else { // Second subgroup
idE = ((dist_data.fin - middle) / tamOther) + remOther;
idE = ((dist_data.fin - middle) % tamOther > 0 && idE+1 <= numP_other) ? idE+1 : idE;
}
idS[0] = idI;
idS[1] = idE;
}
void print_counts2(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, int include_zero, const char* name) {
int i;
for(i=0; i < size; i++) {
if(xcounts[i] != 0 || include_zero) {
printf("P%d of %d | %scounts[%d]=%d disp=%d\n", data_dist.myId, data_dist.numP, name, i, xcounts[i], xdispls[i]);
}
}
}
void print_global_results(double start_time) {
double sp_time, sy_time, asy_time, mall_time, global_time;
MAM_Retrieve_times(&sp_time, &sy_time, &asy_time, &mall_time);
global_time = MPI_Wtime() - start_time;
printf("T_spawn: %lf", sp_time);
printf("\nT_SR: %lf", sy_time);
printf("\nT_AR: %lf", asy_time);
printf("\nT_Malleability: %lf", mall_time);
printf("\nT_total: %lf\n", global_time);
}
#ifndef ToolsMAM
#define ToolsMAM 1
#include "SparseProduct.h"
#include <mpi.h>
#include "../malleability/MAM.h"
typedef struct {
int n, initiated;
SparseMatrix other_subm;
int *recv_vlen;
int *array_vptr, *array_vlen, *array_vpos;
double *array_vval;
MPI_Comm comm;
MPI_Request reqs[2];
} user_redist_t;
static const user_redist_t empty_user_data = {
.n = 0,
.initiated = 0,
.recv_vlen = NULL,
.array_vptr = NULL,
.array_vlen = NULL,
.array_vpos = NULL,
.array_vval = NULL,
.comm = MPI_COMM_NULL,
};
extern void targets_distribution(mam_user_reconf_t user_reconf, user_redist_t *user_data);
//extern void targets_distribution_synch(mam_user_reconf_t user_reconf, user_redist_t *user_data);
#endif
......@@ -224,6 +224,21 @@ int DistributeMatrix (SparseMatrix spr, int index, ptr_SparseMatrix sprL, int in
return dim;
}
int ComputeMatrixSizes (int dim, int *vdimL, int *vdspL, MPI_Comm comm) {
int myId, nProcs;
int i, divL, rstL;
// Getiing the parameter of the communicator
MPI_Comm_rank(comm, &myId); MPI_Comm_size(comm, &nProcs);
// Calculating the vectors of sizes (vdimL) and displacements (vdspl)
divL = (dim / nProcs); rstL = (dim % nProcs);
for (i=0; i<nProcs; i++) vdimL[i] = divL + (i < rstL);
vdspL[0] = 0; for (i=0; i<nProcs; i++) vdspL[i+1] = vdspL[i] + vdimL[i];
return dim;
}
/*********************************************************************************/
// vcols is a vector with dimPos elements, including integer values from 0 to dim-1
......
......@@ -104,6 +104,7 @@ extern int ComputeSprMatrixRecvWeights (int prc_src, int sizes, MPI_Comm comm);
extern int DistributeMatrix (SparseMatrix spr, int index, ptr_SparseMatrix sprL, int indexL,
int *vdimL, int *vdspL, int root, MPI_Comm comm);
extern int ComputeMatrixSizes (int dim, int *vdimL, int *vdspL, MPI_Comm comm);
/*********************************************************************************/
......
#include <stdio.h>
#include <stdlib.h>
#include "mymkl.h"
/**********************************************/
void rcopy (int *n, double *x, int *incx, double *y, int *incy) {
int i, dim = *n, ix = *incx, iy = *incy;
double *px = x, *py = y;
for (i=0; i<dim; i++) {
*py = *px; px += ix; py += iy;
}
}
void rscal (int *n, double *alpha, double *x, int *incx) {
int i, dim = *n, ix = *incx;
double *px = x, a = *alpha;
for (i=0; i<dim; i++) {
*px *= a; px += ix;
}
}
void raxpy (int *n, double *alpha, double *x, int *incx, double *y, int *incy) {
int i, dim = *n, ix = *incx, iy = *incy;
double *px = x, *py = y, a = *alpha;
for (i=0; i<dim; i++) {
*py += *px * a; px += ix; py += iy;
}
}
double rdot (int *n, double *x, int *incx, double *y, int *incy) {
int i, dim = *n, ix = *incx, iy = *incy;
double aux = 0.0, *px = x, *py = y;
for (i=0; i<dim; i++) {
aux += *py * *px; px += ix; py += iy;
}
return aux;
}
/**********************************************/
#ifndef mymkl
#define mymkl 1
void rcopy (int *n, double *x, int *incx, double *y, int *incy);
void rscal (int *n, double *alpha, double *x, int *incx);
void raxpy (int *n, double *alpha, double *x, int *incx, double *y, int *incy);
double rdot (int *n, double *x, int *incx, double *y, int *incy);
#endif
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment