Commit 1cd76849 authored by iker_martin's avatar iker_martin
Browse files

Modified code to allow the usage of MaM. IMKL is no longer required for MaM requirements.

parent 9135f28c
...@@ -3,10 +3,12 @@ ...@@ -3,10 +3,12 @@
#include <unistd.h> #include <unistd.h>
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
#include <mkl_blas.h> //#include <mkl_blas.h>
#include "mymkl.h"
#include <mpi.h> #include <mpi.h>
#include <hb_io.h> #include <hb_io.h>
#include <vector> //#include <vector>
#include <sys/prctl.h>
#include "reloj.h" #include "reloj.h"
#include "ScalarVectors.h" #include "ScalarVectors.h"
...@@ -15,6 +17,9 @@ ...@@ -15,6 +17,9 @@
#include "matrix.h" #include "matrix.h"
#include "common.h" #include "common.h"
#include "../malleability/MAM.h"
#include "ToolsMAM.h"
// ================================================================================ // ================================================================================
#define DIRECT_ERROR 1 #define DIRECT_ERROR 1
...@@ -24,48 +29,82 @@ ...@@ -24,48 +29,82 @@
#define COLL_P2P_SPMV 0 #define COLL_P2P_SPMV 0
#endif #endif
void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, int myId) { typedef struct {
int size = mat.dim2, sizeR = mat.dim1; double tol, tol0;
int iter, n;
double rho;
double *x, *b;
double *s, *q, *r, *p, *r0, *y, *p_hat, *q_hat;
double *aux;
SparseMatrix matL;
#if PRECOND
double *diags;
#endif
#if DIRECT_ERROR
double *res_err, *x_exact;
double direct_err;
#endif
double t1;
int *sizes, *dspls;
int my_size, my_dspl;
int *vlen;
int myId, numP;
MPI_Comm comm;
} Compute_data;
void originals_set_data(Compute_data *computeData, int num_target);
void targets_update(Compute_data *computeData, user_redist_t *user_data);
void user_func(void *args);
void dump(Compute_data *computeData);
void BiCGStab_init (Compute_data *computeData) {
int size = computeData->matL.dim2, sizeR = computeData->matL.dim1;
int IONE = 1; int IONE = 1;
double DONE = 1.0, DMONE = -1.0, DZERO = 0.0; double DONE = 1.0, DMONE = -1.0, DZERO = 0.0;
int n, n_dist, iter, maxiter, nProcs; int n, n_dist, myId, nProcs;
double beta, tol, tol0, alpha, umbral, rho, omega, tmp; double t2;
double *s = NULL, *q = NULL, *r = NULL, *p = NULL, *r0 = NULL, *y = NULL, *p_hat = NULL, *q_hat = NULL;
double *aux = NULL;
double t1, t2, t3, t4;
double reduce[2];
#if PRECOND #if PRECOND
int i, *posd = NULL; int i;
double *diags = NULL; int *posd = NULL;
computeData->diags = NULL;
#endif #endif
MPI_Comm_size(MPI_COMM_WORLD, &nProcs); computeData->s = NULL; computeData->q = NULL; computeData->r = NULL; computeData->p = NULL;
n = size; n_dist = sizeR; maxiter = 16 * size; umbral = 1.0e-8; computeData->r0 = NULL; computeData->y = NULL; computeData->p_hat = NULL; computeData->q_hat = NULL;
CreateDoubles (&s, n_dist); computeData->aux = NULL;
CreateDoubles (&q, n_dist); myId = computeData->myId;
CreateDoubles (&r, n_dist); nProcs = computeData->numP;
CreateDoubles (&r0, n_dist); n = size; n_dist = sizeR;
CreateDoubles (&p, n_dist); CreateDoubles (&computeData->s, n_dist);
CreateDoubles (&y, n_dist); CreateDoubles (&computeData->q, n_dist);
CreateDoubles (&computeData->r, n_dist);
CreateDoubles (&computeData->r0, n_dist);
CreateDoubles (&computeData->p, n_dist);
CreateDoubles (&computeData->y, n_dist);
#if DIRECT_ERROR #if DIRECT_ERROR
// init exact solution // init exact solution
double *res_err = NULL, *x_exact = NULL; computeData->res_err = NULL; computeData->x_exact = NULL;
CreateDoubles (&x_exact, n_dist); CreateDoubles (&computeData->x_exact, n_dist);
CreateDoubles (&res_err, n_dist); CreateDoubles (&computeData->res_err, n_dist);
InitDoubles (x_exact, n_dist, DONE, DZERO); InitDoubles (computeData->x_exact, n_dist, DONE, DZERO);
#endif // DIRECT_ERROR #endif // DIRECT_ERROR
#if PRECOND #if PRECOND
CreateInts (&posd, n_dist); CreateInts (&posd, n_dist);
CreateDoubles (&p_hat, n_dist); CreateDoubles (&computeData->p_hat, n_dist);
CreateDoubles (&q_hat, n_dist); CreateDoubles (&computeData->q_hat, n_dist);
CreateDoubles (&diags, n_dist); CreateDoubles (&computeData->diags, n_dist);
GetDiagonalSparseMatrix2 (mat, dspls[myId], diags, posd); GetDiagonalSparseMatrix2 (computeData->matL, computeData->dspls[myId], computeData->diags, posd);
#pragma omp parallel for #pragma omp parallel for
for (i=0; i<n_dist; i++) for (i=0; i<n_dist; i++)
diags[i] = DONE / diags[i]; computeData->diags[i] = DONE / computeData->diags[i];
#endif #endif
CreateDoubles (&aux, n); CreateDoubles (&computeData->aux, n);
#ifdef SPMV_OPTIMIZED #ifdef SPMV_OPTIMIZED
int *permP = NULL, *ipermP = NULL; int *permP = NULL, *ipermP = NULL;
...@@ -85,55 +124,74 @@ void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, i ...@@ -85,55 +124,74 @@ void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, i
PermuteInts (mat.vpos, ipermP, mat.vptr[mat.dim1]); PermuteInts (mat.vpos, ipermP, mat.vptr[mat.dim1]);
#endif #endif
iter = 0; computeData->iter = 0;
#ifdef SPMV_OPTIMIZED #ifdef SPMV_OPTIMIZED
joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, x, vecP, vdimP, vdspP, joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, x, vecP, vdimP, vdspP,
vdimR, vdspR, vectDatatypeP, vectDatatypeR); vdimR, vdspR, vectDatatypeP, vectDatatypeR);
InitDoubles (s, sizeR, DZERO, DZERO); InitDoubles (s, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * x ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * x
#else #else
MPI_Allgatherv (x, sizeR, MPI_DOUBLE, aux, sizes, dspls, MPI_DOUBLE, MPI_COMM_WORLD); MPI_Allgatherv (computeData->x, sizeR, MPI_DOUBLE, computeData->aux, computeData->sizes, computeData->dspls, MPI_DOUBLE, computeData->comm);
InitDoubles (s, sizeR, DZERO, DZERO); InitDoubles (computeData->s, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, aux, s); // s = A * x ProdSparseMatrixVectorByRows (computeData->matL, 0, computeData->aux, computeData->s); // s = A * x
#endif #endif
dcopy (&n_dist, b, &IONE, r, &IONE); // r = b rcopy (&n_dist, computeData->b, &IONE, computeData->r, &IONE); // r = b
daxpy (&n_dist, &DMONE, s, &IONE, r, &IONE); // r -= s raxpy (&n_dist, &DMONE, computeData->s, &IONE, computeData->r, &IONE); // r -= s
dcopy (&n_dist, r, &IONE, p, &IONE); // p = r rcopy (&n_dist, computeData->r, &IONE, computeData->p, &IONE); // p = r
dcopy (&n_dist, r, &IONE, r0, &IONE); // r0 = r rcopy (&n_dist, computeData->r, &IONE, computeData->r0, &IONE); // r0 = r
// compute tolerance and <r0,r0> // compute tolerance and <r0,r0>
rho = ddot (&n_dist, r, &IONE, r, &IONE); computeData->rho = rdot (&n_dist, computeData->r, &IONE, computeData->r, &IONE);
MPI_Allreduce (MPI_IN_PLACE, &rho, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce (MPI_IN_PLACE, &computeData->rho, 1, MPI_DOUBLE, MPI_SUM, computeData->comm);
tol0 = sqrt (rho); computeData->tol0 = sqrt (computeData->rho);
tol = tol0; computeData->tol = computeData->tol0;
#if DIRECT_ERROR #if DIRECT_ERROR
// compute direct error // compute direct error
double direct_err; rcopy (&n_dist, computeData->x_exact, &IONE, computeData->res_err, &IONE); // res_err = x_exact
dcopy (&n_dist, x_exact, &IONE, res_err, &IONE); // res_err = x_exact raxpy (&n_dist, &DMONE, computeData->x, &IONE, computeData->res_err, &IONE); // res_err -= x
daxpy (&n_dist, &DMONE, x, &IONE, res_err, &IONE); // res_err -= x
// compute inf norm // compute inf norm
direct_err = norm_inf(n_dist, res_err); computeData->direct_err = norm_inf(n_dist, computeData->res_err);
MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &computeData->direct_err, 1, MPI_DOUBLE, MPI_MAX, computeData->comm);
// // compute euclidean norm // // compute euclidean norm
// direct_err = ddot (&n_dist, res_err, &IONE, res_err, &IONE); // direct_err = res_err' * res_err // direct_err = rdot (&n_dist, res_err, &IONE, res_err, &IONE); // direct_err = res_err' * res_err
// MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
// direct_err = sqrt(direct_err); // direct_err = sqrt(direct_err);
#endif // DIRECT_ERROR #endif // DIRECT_ERROR
MPI_Barrier(MPI_COMM_WORLD); #if PRECOND
RemoveInts (&posd);
#endif
MPI_Barrier(computeData->comm);
if (myId == 0) if (myId == 0)
reloj (&t1, &t2); reloj (&computeData->t1, &t2);
}
while ((iter < maxiter) && (tol > umbral)) { void BiCGStab_compute (Compute_data *computeData, user_redist_t *user_data) {
int size = computeData->matL.dim2, sizeR = computeData->matL.dim1;
int IONE = 1;
double DONE = 1.0, DMONE = -1.0, DZERO = 0.0;
int n, n_dist;
int maxiter, myId, reconfigure, rec_iter, state;
double beta, alpha, umbral, omega, tmp;
double t3, t4;
double reduce[2];
n = size; n_dist = sizeR; maxiter = 16 * size; rec_iter = maxiter / 2; umbral = 1.0e-8;
myId = computeData->myId;
state = -1;
reconfigure = 0; rec_iter = -100;
while ((computeData->iter < maxiter) && (computeData->tol > umbral)) {
#if PRECOND #if PRECOND
VvecDoubles (DONE, diags, p, DZERO, p_hat, n_dist); // p_hat = D^-1 * p VvecDoubles (DONE, computeData->diags, computeData->p, DZERO, computeData->p_hat, n_dist); // p_hat = D^-1 * p
#else #else
p_hat = p; computeData->p_hat = computeData->p;
#endif #endif
#ifdef SPMV_OPTIMIZED #ifdef SPMV_OPTIMIZED
joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, p_hat, vecP, vdimP, joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, p_hat, vecP, vdimP,
...@@ -141,31 +199,31 @@ void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, i ...@@ -141,31 +199,31 @@ void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, i
InitDoubles (s, sizeR, DZERO, DZERO); InitDoubles (s, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * p ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * p
#else #else
MPI_Allgatherv (p_hat, sizeR, MPI_DOUBLE, aux, sizes, dspls, MPI_DOUBLE, MPI_COMM_WORLD); MPI_Allgatherv (computeData->p_hat, sizeR, MPI_DOUBLE, computeData->aux, computeData->sizes, computeData->dspls, MPI_DOUBLE, computeData->comm);
InitDoubles (s, sizeR, DZERO, DZERO); InitDoubles (computeData->s, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, aux, s); // s = A * p ProdSparseMatrixVectorByRows (computeData->matL, 0, computeData->aux, computeData->s); // s = A * p
#endif #endif
if (myId == 0) if (myId == 0)
#if DIRECT_ERROR #if DIRECT_ERROR
printf ("%d \t %g \t %g \t %g \n", iter, tol, umbral, direct_err); printf ("%d \t %g \t %g \t %g \n", computeData->iter, computeData->tol, umbral, computeData->direct_err);
#else #else
printf ("%d \t %g \n", iter, tol); printf ("%d \t %g \n", computeData->iter, computeData->tol);
#endif // DIRECT_ERROR #endif // DIRECT_ERROR
alpha = ddot (&n_dist, r0, &IONE, s, &IONE); alpha = rdot (&n_dist, computeData->r0, &IONE, computeData->s, &IONE);
MPI_Allreduce (MPI_IN_PLACE, &alpha, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce (MPI_IN_PLACE, &alpha, 1, MPI_DOUBLE, MPI_SUM, computeData->comm);
alpha = rho / alpha; alpha = computeData->rho / alpha;
dcopy (&n_dist, r, &IONE, q, &IONE); // q = r rcopy (&n_dist, computeData->r, &IONE, computeData->q, &IONE); // q = r
tmp = -alpha; tmp = -alpha;
daxpy (&n_dist, &tmp, s, &IONE, q, &IONE); // q = r - alpha * s; raxpy (&n_dist, &tmp, computeData->s, &IONE, computeData->q, &IONE); // q = r - alpha * s;
// second spmv // second spmv
#if PRECOND #if PRECOND
VvecDoubles (DONE, diags, q, DZERO, q_hat, n_dist); // q_hat = D^-1 * q VvecDoubles (DONE, computeData->diags, computeData->q, DZERO, computeData->q_hat, n_dist); // q_hat = D^-1 * q
#else #else
q_hat = q; computeData->q_hat = computeData->q;
#endif #endif
#ifdef SPMV_OPTIMIZED #ifdef SPMV_OPTIMIZED
joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, q_hat, vecP, vdimP, joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, q_hat, vecP, vdimP,
...@@ -173,66 +231,81 @@ void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, i ...@@ -173,66 +231,81 @@ void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, i
InitDoubles (y, sizeR, DZERO, DZERO); InitDoubles (y, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, vecP, y); // y = A * q ProdSparseMatrixVectorByRows (mat, 0, vecP, y); // y = A * q
#else #else
MPI_Allgatherv (q_hat, sizeR, MPI_DOUBLE, aux, sizes, dspls, MPI_DOUBLE, MPI_COMM_WORLD); MPI_Allgatherv (computeData->q_hat, sizeR, MPI_DOUBLE, computeData->aux, computeData->sizes, computeData->dspls, MPI_DOUBLE, computeData->comm);
InitDoubles (y, sizeR, DZERO, DZERO); InitDoubles (computeData->y, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, aux, y); // y = A * q ProdSparseMatrixVectorByRows (computeData->matL, 0, computeData->aux, computeData->y); // y = A * q
#endif #endif
// omega = <q, y> / <y, y> // omega = <q, y> / <y, y>
reduce[0] = ddot (&n_dist, q, &IONE, y, &IONE); reduce[0] = rdot (&n_dist, computeData->q, &IONE, computeData->y, &IONE);
reduce[1] = ddot (&n_dist, y, &IONE, y, &IONE); reduce[1] = rdot (&n_dist, computeData->y, &IONE, computeData->y, &IONE);
MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, computeData->comm);
omega = reduce[0] / reduce[1]; omega = reduce[0] / reduce[1];
// x+1 = x + alpha * p + omega * q // x+1 = x + alpha * p + omega * q
daxpy (&n_dist, &alpha, p_hat, &IONE, x, &IONE); raxpy (&n_dist, &alpha, computeData->p_hat, &IONE, computeData->x, &IONE);
daxpy (&n_dist, &omega, q_hat, &IONE, x, &IONE); raxpy (&n_dist, &omega, computeData->q_hat, &IONE, computeData->x, &IONE);
// r+1 = q - omega * y // r+1 = q - omega * y
dcopy (&n_dist, q, &IONE, r, &IONE); // r = q rcopy (&n_dist, computeData->q, &IONE, computeData->r, &IONE); // r = q
tmp = -omega; tmp = -omega;
daxpy (&n_dist, &tmp, y, &IONE, r, &IONE); // r = q - omega * y; raxpy (&n_dist, &tmp, computeData->y, &IONE, computeData->r, &IONE); // r = q - omega * y;
// rho = <r0, r+1> and tolerance // rho = <r0, r+1> and tolerance
reduce[0] = ddot (&n_dist, r0, &IONE, r, &IONE); reduce[0] = rdot (&n_dist, computeData->r0, &IONE, computeData->r, &IONE);
reduce[1] = ddot (&n_dist, r, &IONE, r, &IONE); reduce[1] = rdot (&n_dist, computeData->r, &IONE, computeData->r, &IONE);
MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, computeData->comm);
tmp = reduce[0]; tmp = reduce[0];
tol = sqrt (reduce[1]) / tol0; computeData->tol = sqrt (reduce[1]) / computeData->tol0;
// beta = (alpha / omega) * <r0, r+1> / <r0, r> // beta = (alpha / omega) * <r0, r+1> / <r0, r>
beta = (alpha / omega) * (tmp / rho); beta = (alpha / omega) * (tmp / computeData->rho);
rho = tmp; computeData->rho = tmp;
// p+1 = r+1 + beta * (p - omega * s) // p+1 = r+1 + beta * (p - omega * s)
tmp = -omega; tmp = -omega;
daxpy (&n_dist, &tmp, s, &IONE, p, &IONE); // p -= omega * s raxpy (&n_dist, &tmp, computeData->s, &IONE, computeData->p, &IONE); // p -= omega * s
dscal (&n_dist, &beta, p, &IONE); // p = beta * p rscal (&n_dist, &beta, computeData->p, &IONE); // p = beta * p
daxpy (&n_dist, &DONE, r, &IONE, p, &IONE); // p += r raxpy (&n_dist, &DONE, computeData->r, &IONE, computeData->p, &IONE); // p += r
#if DIRECT_ERROR #if DIRECT_ERROR
// compute direct error // compute direct error
dcopy (&n_dist, x_exact, &IONE, res_err, &IONE); // res_err = x_exact rcopy (&n_dist, computeData->x_exact, &IONE, computeData->res_err, &IONE); // res_err = x_exact
daxpy (&n_dist, &DMONE, x, &IONE, res_err, &IONE); // res_err -= x raxpy (&n_dist, &DMONE, computeData->x, &IONE, computeData->res_err, &IONE); // res_err -= x
// compute inf norm // compute inf norm
direct_err = norm_inf(n_dist, res_err); computeData->direct_err = norm_inf(n_dist, computeData->res_err);
MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &computeData->direct_err, 1, MPI_DOUBLE, MPI_MAX, computeData->comm);
// // compute euclidean norm // // compute euclidean norm
// direct_err = ddot (&n_dist, res_err, &IONE, res_err, &IONE); // direct_err = rdot (&n_dist, res_err, &IONE, res_err, &IONE);
// MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
// direct_err = sqrt(direct_err); // direct_err = sqrt(direct_err);
#endif // DIRECT_ERROR #endif // DIRECT_ERROR
iter++; computeData->iter++;
if (computeData->iter == rec_iter) { reconfigure = 1;}
if (reconfigure) {
MAM_Checkpoint(&state, MAM_CHECK_COMPLETION, user_func, (void *) user_data);
if(state == MAM_COMPLETED) {
reconfigure = 0;
//free_computeData(computeData);
targets_update(computeData, user_data);
}
}
} }
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(computeData->comm);
if (myId == 0) if (myId == 0)
reloj (&t3, &t4); reloj (&t3, &t4);
if(state == MAM_PENDING) {
MAM_Checkpoint(&state, MAM_WAIT_COMPLETION, user_func, (void *) user_data);
//free_computeData(computeData);
//targets_update(dist_data, computeData, user_data);
}
#ifdef SPMV_OPTIMIZED #ifdef SPMV_OPTIMIZED
// Code required after the loop // Code required after the loop
PermuteInts (mat.vpos, permP, mat.vptr[mat.dim1]); PermuteInts (mat.vpos, permP, mat.vptr[mat.dim1]);
...@@ -246,18 +319,31 @@ void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, i ...@@ -246,18 +319,31 @@ void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, i
if (myId == 0) { if (myId == 0) {
printf ("Size: %d \n", n); printf ("Size: %d \n", n);
printf ("Iter: %d \n", iter); printf ("Iter: %d \n", computeData->iter);
printf ("Tol: %g \n", tol); printf ("Tol: %g \n", computeData->tol);
printf ("Time_loop: %20.10e\n", (t3-t1)); printf ("Time_loop: %20.10e\n", (t3-computeData->t1));
printf ("Time_iter: %20.10e\n", (t3-t1)/iter); printf ("Time_iter: %20.10e\n", (t3-computeData->t1)/computeData->iter);
} }
}
void BiCGStab_free (Compute_data *computeData) {
RemoveDoubles (&aux); RemoveDoubles (&s); RemoveDoubles (&q); RemoveDoubles (&computeData->aux); RemoveDoubles (&computeData->s);
RemoveDoubles (&r); RemoveDoubles (&p); RemoveDoubles (&r0); RemoveDoubles (&y); RemoveDoubles (&computeData->q); RemoveDoubles (&computeData->r);
RemoveDoubles (&computeData->p); RemoveDoubles (&computeData->r0); RemoveDoubles (&computeData->y);
#if PRECOND #if PRECOND
RemoveDoubles (&diags); RemoveInts (&posd); RemoveDoubles (&computeData->diags);
RemoveDoubles(&p_hat); RemoveDoubles (&q_hat); RemoveDoubles(&computeData->p_hat); RemoveDoubles (&computeData->q_hat);
#endif #endif
RemoveDoubles (&computeData->x);
RemoveDoubles (&computeData->b);
RemoveInts (&computeData->sizes); RemoveInts (&computeData->dspls);
RemoveSparseMatrix (&computeData->matL);
}
void originals_free() {
} }
/*********************************************************************************/ /*********************************************************************************/
...@@ -268,128 +354,165 @@ int main (int argc, char **argv) { ...@@ -268,128 +354,165 @@ int main (int argc, char **argv) {
int index = 0, indexL = 0; int index = 0, indexL = 0;
SparseMatrix mat = {0, 0, NULL, NULL, NULL}, sym = {0, 0, NULL, NULL, NULL}; SparseMatrix mat = {0, 0, NULL, NULL, NULL}, sym = {0, 0, NULL, NULL, NULL};
int root = 0, myId, nProcs; int root = 0, myId, nProcs, isTarget, numTarget, req;
int dimL, dspL, *vdimL = NULL, *vdspL = NULL; int dimL, dspL, *vdimL = NULL, *vdspL = NULL;
SparseMatrix matL = {0, 0, NULL, NULL, NULL}; SparseMatrix matL = {0, 0, NULL, NULL, NULL};
double *sol1L = NULL, *sol2L = NULL; double *sol1L = NULL, *sol2L = NULL;
double beta;
int mat_from_file, nodes, size_param, stencil_points; int IONE = 1;
double DMONE = -1.0;
if (argc == 3) { int mat_from_file, nodes, size_param, stencil_points;
mat_from_file = atoi(argv[2]); Compute_data computeData;
} else { user_redist_t user_data;
mat_from_file = atoi(argv[2]);
nodes = atoi(argv[3]);
size_param = atoi(argv[4]);
stencil_points = atoi(argv[5]);
}
/***************************************/ /***************************************/
MPI_Init (&argc, &argv); MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &req);
// Definition of the variables nProcs and myId // Definition of the variables nProcs and myId
MPI_Comm_size(MPI_COMM_WORLD, &nProcs); MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &myId); MPI_Comm_rank(MPI_COMM_WORLD, &myId);
root = nProcs-1; root = nProcs-1;
root = 0; root = 0;
computeData.myId = myId;
computeData.numP = nProcs;
computeData.comm = MPI_COMM_WORLD;
user_data = empty_user_data;
user_data.comm = computeData.comm;
prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
isTarget = MAM_Init(root, &computeData.comm, argv[0], user_func, (void *) &user_data);
if(isTarget) {
targets_update(&computeData, &user_data);
} else {
/***************************************/
if (argc == 4) {
mat_from_file = atoi(argv[2]);
} else {
mat_from_file = atoi(argv[2]);
nodes = atoi(argv[3]);
size_param = atoi(argv[4]);
stencil_points = atoi(argv[5]);
}
/***************************************/ /***************************************/
printf ("A\n"); printf ("A\n");
CreateInts (&vdimL, nProcs); CreateInts (&vdspL, nProcs); CreateInts (&vdimL, nProcs); CreateInts (&vdspL, nProcs);
if(mat_from_file) { if(mat_from_file) {
if (myId == root) { if (myId == root) {
// Creating the matrix // Creating the matrix
ReadMatrixHB (argv[1], &sym); ReadMatrixHB (argv[1], &sym);
TransposeSparseMatrices (sym, 0, &mat, 0); TransposeSparseMatrices (sym, 0, &mat, 0);
dim = mat.dim1; dim = mat.dim1;
} }
numTarget = atoi(argv[3]);
// Distributing the matrix // Distributing the matrix
dim = DistributeMatrix (mat, index, &matL, indexL, vdimL, vdspL, root, MPI_COMM_WORLD); dim = DistributeMatrix (mat, index, &matL, indexL, vdimL, vdspL, root, MPI_COMM_WORLD);
dimL = vdimL[myId]; dspL = vdspL[myId]; dimL = vdimL[myId]; dspL = vdspL[myId];
printf ("B\n"); if (myId == root) {
} RemoveSparseMatrix (&mat);
else { RemoveSparseMatrix (&sym);
dim = size_param * size_param * size_param; }
int divL, rstL, i; printf ("B\n");
divL = (dim / nProcs); rstL = (dim % nProcs); }
for (i=0; i<nProcs; i++) vdimL[i] = divL + (i < rstL); else {
vdspL[0] = 0; for (i=1; i<nProcs; i++) vdspL[i] = vdspL[i-1] + vdimL[i-1]; dim = size_param * size_param * size_param;
dimL = vdimL[myId]; dspL = vdspL[myId]; int divL, rstL, i;
int band_width = size_param * (size_param + 1) + 1; divL = (dim / nProcs); rstL = (dim % nProcs);
band_width = 100 * nodes; for (i=0; i<nProcs; i++) vdimL[i] = divL + (i < rstL);
long nnz_here = ((long) (stencil_points + 2 * band_width)) * dimL; vdspL[0] = 0; for (i=1; i<nProcs; i++) vdspL[i] = vdspL[i-1] + vdimL[i-1];
printf ("dimL: %d, nodes: %d, size_param: %d, band_width: %d, stencil_points: %d, nnz_here: %ld\n", dimL = vdimL[myId]; dspL = vdspL[myId];
dimL, nodes, size_param, band_width, stencil_points, nnz_here); int band_width = size_param * (size_param + 1) + 1;
allocate_matrix(dimL, dim, nnz_here, &matL); band_width = 100 * nodes;
generate_Poisson3D_filled(&matL, size_param, stencil_points, band_width, dspL, dimL, dim); long nnz_here = ((long) (stencil_points + 2 * band_width)) * dimL;
printf ("dimL: %d, nodes: %d, size_param: %d, band_width: %d, stencil_points: %d, nnz_here: %ld\n",
// To generate ill-conditioned matrices dimL, nodes, size_param, band_width, stencil_points, nnz_here);
// double factor = 1.0e6; allocate_matrix(dimL, dim, nnz_here, &matL);
// ScaleFirstRowCol(matL, dspL, dimL, myId, root, factor); generate_Poisson3D_filled(&matL, size_param, stencil_points, band_width, dspL, dimL, dim);
}
MPI_Barrier(MPI_COMM_WORLD); // To generate ill-conditioned matrices
// double factor = 1.0e6;
// Creating the vectors // ScaleFirstRowCol(matL, dspL, dimL, myId, root, factor);
CreateDoubles (&sol1, dim); }
CreateDoubles (&sol2, dim); MPI_Barrier(MPI_COMM_WORLD);
CreateDoubles (&sol1L, dimL);
CreateDoubles (&sol2L, dimL); // Creating the vectors
CreateDoubles (&sol1, dim);
InitDoubles (sol2, dim, 0.0, 0.0); // CreateDoubles (&sol2, dim);
InitDoubles (sol1L, dimL, 0.0, 0.0); CreateDoubles (&sol1L, dimL);
InitDoubles (sol2L, dimL, 0.0, 0.0); CreateDoubles (&sol2L, dimL);
// InitDoubles (sol2, dim, 0.0, 0.0);
InitDoubles (sol1L, dimL, 0.0, 0.0);
InitDoubles (sol2L, dimL, 0.0, 0.0);
/***************************************/ /***************************************/
printf ("C\n"); printf ("C\n");
int IONE = 1; beta = 1.0 / sqrt(dim);
double beta = 1.0 / sqrt(dim); if(mat_from_file) {
if(mat_from_file) { // compute b = A * x_c, x_c = 1/sqrt(nbrows)
// compute b = A * x_c, x_c = 1/sqrt(nbrows) InitDoubles (sol1, dim, 1.0, 0.0);
InitDoubles (sol1, dim, 1.0, 0.0); ProdSparseMatrixVectorByRows (matL, 0, sol1, sol1L); // s = A * x
ProdSparseMatrixVectorByRows (matL, 0, sol1, sol1L); // s = A * x rscal (&dimL, &beta, sol1L, &IONE); // s = beta * s
dscal (&dimL, &beta, sol1L, &IONE); // s = beta * s } else {
} else { InitDoubles (sol1, dim, 0.0, 0.0);
InitDoubles (sol1, dim, 0.0, 0.0);
int k=0;
int k=0; int *vptrM = matL.vptr;
int *vptrM = matL.vptr; for (int i=0; i < matL.dim1; i++) {
for (int i=0; i < matL.dim1; i++) { for(int j=vptrM[i]; j<vptrM[i+1]; j++) {
for(int j=vptrM[i]; j<vptrM[i+1]; j++) { sol1L[k] += matL.vval[j];
sol1L[k] += matL.vval[j]; }
} }
} }
printf ("D\n");
// MPI_Scatterv (sol2, vdimL, vdspL, MPI_DOUBLE, sol2L, dimL, MPI_DOUBLE, root, MPI_COMM_WORLD); //FIXME It does not seem to do anything
printf ("E\n");
computeData.sizes = vdimL;
computeData.my_size = dimL;
computeData.dspls = vdspL;
computeData.my_dspl = dspL;
computeData.b = sol1L;
computeData.x = sol2L;
computeData.matL = matL;
computeData.n = computeData.matL.dim2;
RemoveDoubles (&sol1);
BiCGStab_init (&computeData);
originals_set_data(&computeData, numTarget);
dump(&computeData);
} }
printf ("D\n");
MPI_Scatterv (sol2, vdimL, vdspL, MPI_DOUBLE, sol2L, dimL, MPI_DOUBLE, root, MPI_COMM_WORLD); BiCGStab_compute (&computeData, &user_data);
printf ("E\n");
BiCGStab (matL, sol2L, sol1L, vdimL, vdspL, myId);
printf ("F\n"); printf ("F\n");
// Error computation ||b-Ax|| // Error computation ||b-Ax||
// if(mat_from_file) { // if(mat_from_file) {
MPI_Allgatherv (sol2L, dimL, MPI_DOUBLE, sol2, vdimL, vdspL, MPI_DOUBLE, MPI_COMM_WORLD); dim = matL.dim2;
InitDoubles (sol2L, dimL, 0, 0); CreateDoubles (&sol2, dim);
ProdSparseMatrixVectorByRows (matL, 0, sol2, sol2L); InitDoubles (sol2, dim, 0.0, 0.0);
double DMONE = -1.0; MPI_Allgatherv (computeData.x, computeData.my_size, MPI_DOUBLE, sol2, computeData.sizes, computeData.dspls, MPI_DOUBLE, computeData.comm);
daxpy (&dimL, &DMONE, sol2L, &IONE, sol1L, &IONE); InitDoubles (computeData.x, computeData.my_size, 0, 0);
beta = ddot (&dimL, sol1L, &IONE, sol1L, &IONE); ProdSparseMatrixVectorByRows (computeData.matL, 0, sol2, computeData.x);
MPI_Allreduce (MPI_IN_PLACE, &beta, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); raxpy (&dimL, &DMONE, computeData.x, &IONE, computeData.b, &IONE);
beta = rdot (&computeData.my_size, computeData.b, &IONE, computeData.b, &IONE);
MPI_Allreduce (MPI_IN_PLACE, &beta, 1, MPI_DOUBLE, MPI_SUM, computeData.comm);
// } else { // } else {
// // case with x_exact = {1.0} // // case with x_exact = {1.0}
// for (int i=0; i<dimL; i++) // for (int i=0; i<dimL; i++)
// sol2L[i] -= 1.0; // sol2L[i] -= 1.0;
// beta = ddot (&dimL, sol2L, &IONE, sol2L, &IONE); // beta = rdot (&dimL, sol2L, &IONE, sol2L, &IONE);
// } // }
beta = sqrt(beta); beta = sqrt(beta);
...@@ -398,18 +521,197 @@ int main (int argc, char **argv) { ...@@ -398,18 +521,197 @@ int main (int argc, char **argv) {
/***************************************/ /***************************************/
// Freeing memory // Freeing memory
RemoveDoubles (&sol1); BiCGStab_free (&computeData);
RemoveDoubles (&sol2); RemoveDoubles (&sol2);
RemoveDoubles (&sol1L);
RemoveDoubles (&sol2L);
RemoveInts (&vdspL); RemoveInts (&vdimL);
if (myId == root) {
RemoveSparseMatrix (&mat);
RemoveSparseMatrix (&sym);
}
MAM_Finalize ();
MPI_Finalize (); MPI_Finalize ();
return 0; return 0;
} }
/* MAM New functions */
/*
* Función para declarar los datos a comunicar por parte de MAM
*/
void originals_set_data(Compute_data *computeData, int num_target) {
TransformHeadertoLength (computeData->matL.vptr, computeData->n);
CreateInts (&computeData->vlen, computeData->n);
CopyInts (computeData->matL.vptr, computeData->vlen, computeData->n);
TransformLengthtoHeader (computeData->matL.vptr, computeData->n);
MAM_Set_target_number(num_target);
MAM_Data_add(&(computeData->n), NULL, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
MAM_Data_add(&(computeData->tol0), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
MAM_Data_add(&(computeData->t1), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
MAM_Data_add(&(computeData->iter), NULL, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
MAM_Data_add(&(computeData->tol), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
MAM_Data_add(&(computeData->rho), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
#if DIRECT_ERROR
MAM_Data_add(&(computeData->direct_err), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
#endif
MAM_Data_add(computeData->vlen, NULL, computeData->n, MPI_INT, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); //TODO Calcular vlen
MAM_Data_add(computeData->r0, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
MAM_Data_add(computeData->b, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
#if PRECOND
MAM_Data_add(computeData->diags, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
#endif
#if DIRECT_ERROR
MAM_Data_add(computeData->x_exact, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
#endif
MAM_Data_add(computeData->p, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE);
MAM_Data_add(computeData->r, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE);
MAM_Data_add(computeData->x, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE);
}
void targets_update(Compute_data *computeData, user_redist_t *user_data) {
size_t entry, total_qty;
void *value = NULL;
MPI_Datatype type;
MPI_Comm_size(computeData->comm, &computeData->numP);
MPI_Comm_rank(computeData->comm, &computeData->myId);
entry = 0;
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
computeData->n = *((int *)value);
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
computeData->tol0 = *((double *)value);
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT);
computeData->t1 = *((double *)value);
entry = 0;
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
computeData->iter = *((int *)value);
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
computeData->tol = *((double *)value);
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
computeData->rho = *((double *)value);
#if DIRECT_ERROR
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE);
computeData->direct_err = *((double *)value);
#endif
entry = 0;
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
computeData->vlen = ((int *)value);
//computeData->vlen = user_data->recv_vlen;
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
computeData->r0 = ((double *)value);
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
computeData->b = ((double *)value);
#if PRECOND
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
computeData->diags = ((double *)value);
#endif
#if DIRECT_ERROR
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
computeData->x_exact = ((double *)value);
#endif
entry = 0;
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE);
computeData->p = ((double *)value);
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE);
computeData->r = ((double *)value);
MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE);
computeData->x = ((double *)value);
int n_dist = computeData->matL.dim1;
int n = computeData->n;
CreateInts (&computeData->sizes, computeData->numP);
CreateInts (&computeData->dspls, computeData->numP);
CreateDoubles (&computeData->s, n_dist);
CreateDoubles (&computeData->q, n_dist);
CreateDoubles (&computeData->y, n_dist);
CreateDoubles (&computeData->aux, n);
#if PRECOND
CreateDoubles (&computeData->p_hat, n_dist);
CreateDoubles (&computeData->q_hat, n_dist);
#endif
#if DIRECT_ERROR
CreateDoubles (&computeData->res_err, n_dist);
#endif
ComputeMatrixSizes (n, computeData->sizes, computeData->dspls, computeData->comm);
computeData->my_size = computeData->sizes[computeData->myId];
computeData->my_dspl = computeData->dspls[computeData->myId];
computeData->matL = user_data->other_subm;
*user_data = empty_user_data;
user_data->array_vptr = computeData->matL.vptr;
user_data->array_vlen = computeData->vlen;
user_data->array_vpos = computeData->matL.vpos;
user_data->array_vval = computeData->matL.vval;
user_data->comm = computeData->comm;
}
void user_func(void *args) {
int local_flag, flag = 0;
mam_user_reconf_t user_reconf;
MAM_Get_Reconf_Info(&user_reconf);
user_redist_t *user_data = (user_redist_t *) args;
if(!user_data->initiated) {
//targets_distribution_synch(user_reconf, user_data);
//flag = 1;
targets_distribution(user_reconf, user_data);
user_data->initiated = 1;
if(user_reconf.rank_state == MAM_PROC_NEW_RANK) {
MPI_Waitall(2, user_data->reqs, MPI_STATUSES_IGNORE);
flag = 1;
}
} else {
MPI_Testall(2, user_data->reqs, &local_flag, MPI_STATUSES_IGNORE);
MPI_Allreduce(&local_flag, &flag, 1, MPI_INT, MPI_MIN, user_data->comm);
}
if(flag) MAM_Resume_redistribution(NULL);
}
void dump(Compute_data *computeData) {
int i;
if(computeData->myId == 0) printf("TamBL=");
fflush(stdout); MPI_Barrier(computeData->comm);
for(i=0; i<computeData->numP; i++) {
if(computeData->myId == i) {
printf("%d, ", computeData->my_size);
}
fflush(stdout);
sleep(1);
MPI_Barrier(computeData->comm);
}
if(computeData->myId == 0) printf("\n");
fflush(stdout); MPI_Barrier(computeData->comm);
if(computeData->myId == 0) printf("Vlen=");
fflush(stdout); MPI_Barrier(computeData->comm);
for(i=0; i<computeData->numP; i++) {
if(computeData->myId == i) {
for(int j=0; j<computeData->my_size; j++) {
printf("%d, ", computeData->vlen[j]);
}
}
fflush(stdout);
sleep(1);
MPI_Barrier(computeData->comm);
}
if(computeData->myId == 0) printf("\n");
fflush(stdout); MPI_Barrier(computeData->comm);
}
...@@ -29,6 +29,35 @@ void CreateSparseMatrix (ptr_SparseMatrix p_spr, int index, int numR, int numC, ...@@ -29,6 +29,35 @@ void CreateSparseMatrix (ptr_SparseMatrix p_spr, int index, int numR, int numC,
CreateDoubles (&(p_spr->vval), numE+(numR+1)*msr); CreateDoubles (&(p_spr->vval), numE+(numR+1)*msr);
} }
// This routine creates the first part of a sparseMatrix from the next parameters
// * numR defines the number of rows
// * numC defines the number of columns
// * msr indicates if the MSR is the format used to the sparse matrix
// If msr is actived, numE doesn't include the diagonal elements
// The parameter index indicates if 0-indexing or 1-indexing is used.
void CreateSparseMatrixVptr (ptr_SparseMatrix spr, int numR, int numC,
int msr)
{
spr->dim1 = numR; spr->dim2 = numC;
CreateInts (&(spr->vptr), numR+1);
*(spr->vptr) = ((msr)? (numR+1): 0);
}
// This routine creates the second part of a sparseMatrix from the next parameters
// * numR defines the number of rows
// * numC defines the number of columns
// * numE defines the number of nonzero elements
// * msr indicates if the MSR is the format used to the sparse matrix
// If msr is actived, numE doesn't include the diagonal elements
// The parameter index indicates if 0-indexing or 1-indexing is used.
void CreateSparseMatrixValues (ptr_SparseMatrix spr, int numR, int numC, int numE,
int msr)
{
CreateInts (&(spr->vpos), numE+(numR+1)*msr);
CreateDoubles (&(spr->vval), numE+(numR+1)*msr);
}
// This routine liberates the memory related to matrix spr // This routine liberates the memory related to matrix spr
void RemoveSparseMatrix (ptr_SparseMatrix spr) { void RemoveSparseMatrix (ptr_SparseMatrix spr) {
// First the scalar are initiated // First the scalar are initiated
...@@ -37,6 +66,16 @@ void RemoveSparseMatrix (ptr_SparseMatrix spr) { ...@@ -37,6 +66,16 @@ void RemoveSparseMatrix (ptr_SparseMatrix spr) {
RemoveInts (&(spr->vptr)); RemoveDoubles (&(spr->vval)); RemoveInts (&(spr->vptr)); RemoveDoubles (&(spr->vval));
} }
// This routine liberates the memory related to matrix spr when
// vptr and vpos have been allocated separetely
void RemoveSparseMatrix2 (ptr_SparseMatrix spr)
{
spr->dim1 = -1; spr->dim2 = -1;
RemoveInts (&(spr->vptr));
RemoveInts (&(spr->vpos));
RemoveDoubles (&(spr->vval));
}
/*********************************************************************************/ /*********************************************************************************/
// This routine creates de sparse matrix dst from the symmetric matrix spr. // This routine creates de sparse matrix dst from the symmetric matrix spr.
......
...@@ -22,8 +22,14 @@ typedef struct ...@@ -22,8 +22,14 @@ typedef struct
extern void CreateSparseMatrix (ptr_SparseMatrix p_spr, int index, int numR, int numC, int numE, extern void CreateSparseMatrix (ptr_SparseMatrix p_spr, int index, int numR, int numC, int numE,
int msr); int msr);
extern void CreateSparseMatrixVptr (ptr_SparseMatrix spr, int numR, int numC,
int msr);
extern void CreateSparseMatrixValues (ptr_SparseMatrix spr, int numR, int numC, int numE,
int msr);
// This routine liberates the memory related to matrix spr // This routine liberates the memory related to matrix spr
extern void RemoveSparseMatrix (ptr_SparseMatrix spr); extern void RemoveSparseMatrix (ptr_SparseMatrix spr);
extern void RemoveSparseMatrix2 (ptr_SparseMatrix spr);
/*********************************************************************************/ /*********************************************************************************/
......
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "ToolsMAM.h"
struct Dist_data {
int ini;
int fin;
int tamBl; // Numero de filas
int n;
int myId;
int numP;
int numP_parents;
MPI_Comm comm;
};
//----------------------------------------------------------------------------------------------------
void get_dist(int total_r, int id, int numP, struct Dist_data *dist_data);
void prepare_redist_counts(int *counts, int *displs, int numP_other, int offset, struct Dist_data dist_data, int *vptr);
void prepare_redist_counts_vlen(int *counts, int *displs, int numP_other, int offset, struct Dist_data dist_data);
void set_counts(int id, int numP, struct Dist_data data_dist, int offset, int *sendcounts);
void getIds_intercomm(struct Dist_data dist_data, int numP_other, int *idS);
//----------------------------------------------------------------------------------------------------
void print_counts2(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, int include_zero, const char* name);
void print_global_results(double start_time);
//----------------------------------------------------------------------------------------------------
/*
* Funcion encargada de realizar la redistribucion de datos
* asíncrona del usuario.
*
* Calcula el total de elementos a enviar/recibir por cada proceso
* y tras ello llama a la funcion Ialltoallv dos veces.
*
* Además inicializa la memoria para aquellos procesos que vayan
* a recibir datos.
*/
void targets_distribution(mam_user_reconf_t user_reconf, user_redist_t *user_data) {
int i, n, offset, elems, numP, *vlen, *rank_states;
int *scounts, *rcounts, *sdispls, *rdispls;
size_t total_qty;
void *value = NULL;
struct Dist_data dist_data;
MPI_Datatype type;
int aux_int;
int *recv_vpos = &aux_int;
double aux_double;
double *recv_vval = &aux_double;
MPI_Comm_size(user_reconf.comm, &numP);
scounts = (int *) calloc(numP, sizeof(int));
sdispls = (int *) calloc(numP, sizeof(int));
rcounts = (int *) calloc(numP, sizeof(int));
rdispls = (int *) calloc(numP, sizeof(int));
offset = 0;
rank_states = (int *) malloc(numP * sizeof(int));
MPI_Allgather(&user_reconf.rank_state, 1, MPI_INT, rank_states, 1, MPI_INT, user_reconf.comm);
MAM_Data_get_pointer(&value, 0, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT);
vlen = ((int *)value);
n = (int) total_qty;
if(user_reconf.rank_state != MAM_PROC_ZOMBIE) {
MPI_Comm_rank(user_data->comm, &dist_data.myId);
dist_data.numP = user_reconf.numT;
if(user_reconf.rank_state == MAM_PROC_NEW_RANK) {
user_data->array_vpos = &aux_int;
user_data->array_vval = &aux_double;
for(i=0; i<user_reconf.numS; i++) {
if(rank_states[i] == MAM_PROC_CONTINUE) {
dist_data.myId += user_reconf.numS;
break;
}
}
}
//get_dist(n, dist_data.myId, dist_data.numP, &dist_data);
//CreateSparseMatrixVptr(&user_data->other_subm, dist_data.tamBl, n, 0);
user_data->other_subm.vptr[0] = 0;
for(i=0; i<dist_data.tamBl; i++) {
user_data->other_subm.vptr[i+1] = vlen[i];
}
//TransformLengthtoHeader(user_data->other_subm.vptr, user_data->other_subm.dim1); // The array is converted from vlen to vptr
elems = user_data->other_subm.vptr[dist_data.tamBl];
//CreateSparseMatrixValues(&user_data->other_subm, dist_data.tamBl, n, elems, 0);
recv_vpos = user_data->other_subm.vpos;
recv_vval = user_data->other_subm.vval;
prepare_redist_counts(rcounts, rdispls, user_reconf.numS, offset, dist_data, user_data->other_subm.vptr);
}
if(user_reconf.rank_state != MAM_PROC_NEW_RANK) {
MPI_Comm_rank(user_data->comm, &dist_data.myId);
dist_data.numP = user_reconf.numS;
//get_dist(n, dist_data.myId, dist_data.numP, &dist_data);
offset = (user_reconf.numS + user_reconf.numT) == numP ?
user_reconf.numS : 0;
//prepare_redist_counts(scounts, sdispls, user_reconf.numT, offset, dist_data, user_data->array_vptr);
}
// COMUNICACION DE DATOS //
MPI_Ialltoallv(user_data->array_vpos, scounts, sdispls, MPI_INT, recv_vpos, rcounts, rdispls, MPI_INT, user_reconf.comm, &user_data->reqs[0]);
MPI_Ialltoallv(user_data->array_vval, scounts, sdispls, MPI_DOUBLE, recv_vval, rcounts, rdispls, MPI_DOUBLE, user_reconf.comm, &user_data->reqs[1]);
free(rank_states);
free(scounts); free(sdispls); free(rcounts); free(rdispls);
}
/*
* ========================================================================================
* ========================================================================================
* ================================DISTRIBUTION FUNCTIONS==================================
* ========================================================================================
* ========================================================================================
*/
/*
* Obtiene para el Id que se pasa junto a su
* numero de procesos total, con cuantas filas (tamBl),
* elementos por fila, y total de filas (fin - ini)
* con las que va a trabajar el proceso
*/
void get_dist(int total_r, int id, int numP, struct Dist_data *dist_data) {
int rem;
dist_data->n = total_r;
dist_data->tamBl = total_r / numP;
rem = total_r % numP;
if(id < rem) { // First subgroup
dist_data->ini = id * dist_data->tamBl + id;
dist_data->fin = (id+1) * dist_data->tamBl + (id+1);
} else { // Second subgroup
dist_data->ini = id * dist_data->tamBl + rem;
dist_data->fin = (id+1) * dist_data->tamBl + rem;
}
if(dist_data->fin > total_r) {
dist_data->fin = total_r;
}
if(dist_data->ini > dist_data->fin) {
dist_data->ini = dist_data->fin;
}
dist_data->tamBl = dist_data->fin - dist_data->ini;
}
void prepare_redist_counts(int *counts, int *displs, int numP_other, int offset, struct Dist_data dist_data, int *vptr) {
int idS[2], i, idS_zero;
int last_index, first_index;
getIds_intercomm(dist_data, numP_other, idS);
idS[0] += offset;
idS[1] += offset;
idS_zero = 0;
if(!idS[0]) {
set_counts(0, numP_other, dist_data, offset, counts);
idS_zero = 1;
}
for(i=idS[0] + idS_zero; i<idS[1]; i++) {
set_counts(i, numP_other, dist_data, offset, counts);
displs[i] = displs[i-1] + counts[i-1];
}
if(!idS[0]) {
last_index = counts[0];
first_index = 0;
counts[0] = vptr[last_index] - vptr[first_index];
}
for(i=idS[0] + idS_zero; i<idS[1]; i++) {
last_index = displs[i] + counts[i];
first_index = displs[i];
counts[i] = vptr[last_index] - vptr[first_index];
displs[i] = displs[i-1] + counts[i-1];
}
}
void prepare_redist_counts_vlen(int *counts, int *displs, int numP_other, int offset, struct Dist_data dist_data) {
int idS[2], i, idS_zero;
getIds_intercomm(dist_data, numP_other, idS);
idS[0] += offset;
idS[1] += offset;
idS_zero = 0;
if(!idS[0]) {
set_counts(0, numP_other, dist_data, offset, counts);
idS_zero = 1;
}
for(i=idS[0] + idS_zero; i<idS[1]; i++) {
set_counts(i, numP_other, dist_data, offset, counts);
displs[i] = displs[i-1] + counts[i-1];
}
}
/*
* Obtiene para un Id de proceso, cuantos elementos va
* a enviar/recibir el proceso myId
*/
void set_counts(int id, int numP, struct Dist_data data_dist, int offset, int *sendcounts) {
struct Dist_data other;
int biggest_ini, smallest_end;
get_dist(data_dist.n, id-offset, numP, &other);
// Si el rango de valores no coincide, se pasa al siguiente proceso
if(data_dist.ini >= other.fin || data_dist.fin <= other.ini) {
return;
}
// Obtiene el proceso con mayor ini entre los dos procesos
biggest_ini = (data_dist.ini > other.ini) ? data_dist.ini : other.ini;
// Obtiene el proceso con menor fin entre los dos procesos
smallest_end = (data_dist.fin < other.fin) ? data_dist.fin : other.fin;
sendcounts[id] = smallest_end - biggest_ini; // Numero de elementos a enviar/recibir del proceso Id
}
/*
* Obtiene para un proceso de un grupo a que rango procesos de
* otro grupo tiene que enviar o recibir datos.
*
* Devuelve el primer identificador y el último (Excluido) con el que
* comunicarse.
*/
void getIds_intercomm(struct Dist_data dist_data, int numP_other, int *idS) {
int idI, idE;
int tamOther = dist_data.n / numP_other;
int remOther = dist_data.n % numP_other;
int middle = (tamOther + 1) * remOther;
if(middle > dist_data.ini) { // First subgroup
idI = dist_data.ini / (tamOther + 1);
} else { // Second subgroup
idI = ((dist_data.ini - middle) / tamOther) + remOther;
}
if(middle >= dist_data.fin) { // First subgroup
idE = dist_data.fin / (tamOther + 1);
idE = (dist_data.fin % (tamOther + 1) > 0 && idE+1 <= numP_other) ? idE+1 : idE;
} else { // Second subgroup
idE = ((dist_data.fin - middle) / tamOther) + remOther;
idE = ((dist_data.fin - middle) % tamOther > 0 && idE+1 <= numP_other) ? idE+1 : idE;
}
idS[0] = idI;
idS[1] = idE;
}
void print_counts2(struct Dist_data data_dist, int *xcounts, int *xdispls, int size, int include_zero, const char* name) {
int i;
for(i=0; i < size; i++) {
if(xcounts[i] != 0 || include_zero) {
printf("P%d of %d | %scounts[%d]=%d disp=%d\n", data_dist.myId, data_dist.numP, name, i, xcounts[i], xdispls[i]);
}
}
}
void print_global_results(double start_time) {
double sp_time, sy_time, asy_time, mall_time, global_time;
MAM_Retrieve_times(&sp_time, &sy_time, &asy_time, &mall_time);
global_time = MPI_Wtime() - start_time;
printf("T_spawn: %lf", sp_time);
printf("\nT_SR: %lf", sy_time);
printf("\nT_AR: %lf", asy_time);
printf("\nT_Malleability: %lf", mall_time);
printf("\nT_total: %lf\n", global_time);
}
#ifndef ToolsMAM
#define ToolsMAM 1
#include "SparseProduct.h"
#include <mpi.h>
#include "../malleability/MAM.h"
typedef struct {
int n, initiated;
SparseMatrix other_subm;
int *recv_vlen;
int *array_vptr, *array_vlen, *array_vpos;
double *array_vval;
MPI_Comm comm;
MPI_Request reqs[2];
} user_redist_t;
static const user_redist_t empty_user_data = {
.n = 0,
.initiated = 0,
.recv_vlen = NULL,
.array_vptr = NULL,
.array_vlen = NULL,
.array_vpos = NULL,
.array_vval = NULL,
.comm = MPI_COMM_NULL,
};
extern void targets_distribution(mam_user_reconf_t user_reconf, user_redist_t *user_data);
//extern void targets_distribution_synch(mam_user_reconf_t user_reconf, user_redist_t *user_data);
#endif
...@@ -224,6 +224,21 @@ int DistributeMatrix (SparseMatrix spr, int index, ptr_SparseMatrix sprL, int in ...@@ -224,6 +224,21 @@ int DistributeMatrix (SparseMatrix spr, int index, ptr_SparseMatrix sprL, int in
return dim; return dim;
} }
int ComputeMatrixSizes (int dim, int *vdimL, int *vdspL, MPI_Comm comm) {
int myId, nProcs;
int i, divL, rstL;
// Getiing the parameter of the communicator
MPI_Comm_rank(comm, &myId); MPI_Comm_size(comm, &nProcs);
// Calculating the vectors of sizes (vdimL) and displacements (vdspl)
divL = (dim / nProcs); rstL = (dim % nProcs);
for (i=0; i<nProcs; i++) vdimL[i] = divL + (i < rstL);
vdspL[0] = 0; for (i=0; i<nProcs; i++) vdspL[i+1] = vdspL[i] + vdimL[i];
return dim;
}
/*********************************************************************************/ /*********************************************************************************/
// vcols is a vector with dimPos elements, including integer values from 0 to dim-1 // vcols is a vector with dimPos elements, including integer values from 0 to dim-1
......
...@@ -104,6 +104,7 @@ extern int ComputeSprMatrixRecvWeights (int prc_src, int sizes, MPI_Comm comm); ...@@ -104,6 +104,7 @@ extern int ComputeSprMatrixRecvWeights (int prc_src, int sizes, MPI_Comm comm);
extern int DistributeMatrix (SparseMatrix spr, int index, ptr_SparseMatrix sprL, int indexL, extern int DistributeMatrix (SparseMatrix spr, int index, ptr_SparseMatrix sprL, int indexL,
int *vdimL, int *vdspL, int root, MPI_Comm comm); int *vdimL, int *vdspL, int root, MPI_Comm comm);
extern int ComputeMatrixSizes (int dim, int *vdimL, int *vdspL, MPI_Comm comm);
/*********************************************************************************/ /*********************************************************************************/
......
#include <stdio.h>
#include <stdlib.h>
#include "mymkl.h"
/**********************************************/
void rcopy (int *n, double *x, int *incx, double *y, int *incy) {
int i, dim = *n, ix = *incx, iy = *incy;
double *px = x, *py = y;
for (i=0; i<dim; i++) {
*py = *px; px += ix; py += iy;
}
}
void rscal (int *n, double *alpha, double *x, int *incx) {
int i, dim = *n, ix = *incx;
double *px = x, a = *alpha;
for (i=0; i<dim; i++) {
*px *= a; px += ix;
}
}
void raxpy (int *n, double *alpha, double *x, int *incx, double *y, int *incy) {
int i, dim = *n, ix = *incx, iy = *incy;
double *px = x, *py = y, a = *alpha;
for (i=0; i<dim; i++) {
*py += *px * a; px += ix; py += iy;
}
}
double rdot (int *n, double *x, int *incx, double *y, int *incy) {
int i, dim = *n, ix = *incx, iy = *incy;
double aux = 0.0, *px = x, *py = y;
for (i=0; i<dim; i++) {
aux += *py * *px; px += ix; py += iy;
}
return aux;
}
/**********************************************/
#ifndef mymkl
#define mymkl 1
void rcopy (int *n, double *x, int *incx, double *y, int *incy);
void rscal (int *n, double *alpha, double *x, int *incx);
void raxpy (int *n, double *alpha, double *x, int *incx, double *y, int *incy);
double rdot (int *n, double *x, int *incx, double *y, int *incy);
#endif
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment