Commit 9135f28c authored by iker_martin's avatar iker_martin
Browse files

First commit. Added initial code for BiCGStab.

parent fbf4338a
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include <mkl_blas.h>
#include <mpi.h>
#include <hb_io.h>
#include <vector>
#include "reloj.h"
#include "ScalarVectors.h"
#include "SparseProduct.h"
#include "ToolsMPI.h"
#include "matrix.h"
#include "common.h"
// ================================================================================
#define DIRECT_ERROR 1
#define PRECOND 1
// #define SPMV_OPTIMIZED 1
#ifdef SPMV_OPTIMIZED
#define COLL_P2P_SPMV 0
#endif
void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, int myId) {
int size = mat.dim2, sizeR = mat.dim1;
int IONE = 1;
double DONE = 1.0, DMONE = -1.0, DZERO = 0.0;
int n, n_dist, iter, maxiter, nProcs;
double beta, tol, tol0, alpha, umbral, rho, omega, tmp;
double *s = NULL, *q = NULL, *r = NULL, *p = NULL, *r0 = NULL, *y = NULL, *p_hat = NULL, *q_hat = NULL;
double *aux = NULL;
double t1, t2, t3, t4;
double reduce[2];
#if PRECOND
int i, *posd = NULL;
double *diags = NULL;
#endif
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
n = size; n_dist = sizeR; maxiter = 16 * size; umbral = 1.0e-8;
CreateDoubles (&s, n_dist);
CreateDoubles (&q, n_dist);
CreateDoubles (&r, n_dist);
CreateDoubles (&r0, n_dist);
CreateDoubles (&p, n_dist);
CreateDoubles (&y, n_dist);
#if DIRECT_ERROR
// init exact solution
double *res_err = NULL, *x_exact = NULL;
CreateDoubles (&x_exact, n_dist);
CreateDoubles (&res_err, n_dist);
InitDoubles (x_exact, n_dist, DONE, DZERO);
#endif // DIRECT_ERROR
#if PRECOND
CreateInts (&posd, n_dist);
CreateDoubles (&p_hat, n_dist);
CreateDoubles (&q_hat, n_dist);
CreateDoubles (&diags, n_dist);
GetDiagonalSparseMatrix2 (mat, dspls[myId], diags, posd);
#pragma omp parallel for
for (i=0; i<n_dist; i++)
diags[i] = DONE / diags[i];
#endif
CreateDoubles (&aux, n);
#ifdef SPMV_OPTIMIZED
int *permP = NULL, *ipermP = NULL;
int *vdspP = NULL, *vdimP = NULL, *vdspR = NULL, *vdimR = NULL;
double *vecP = NULL;
MPI_Datatype *vectDatatypeP = NULL, *vectDatatypeR = NULL;
CreateInts (&ipermP, size);
CreateInts (&vdimP, nProcs); CreateInts (&vdspP, nProcs + 1);
CreateInts (&vdimR, nProcs); CreateInts (&vdspR, nProcs + 1);
vectDatatypeP = (MPI_Datatype *) malloc (nProcs * sizeof (MPI_Datatype));
vectDatatypeR = (MPI_Datatype *) malloc (nProcs * sizeof (MPI_Datatype));
createAlltoallwStruct (COLL_P2P_SPMV, MPI_COMM_WORLD, mat, sizes, dspls, vdimP,
vdspP, &aux, &permP, ipermP, vdimR, vdspR, vectDatatypeP, vectDatatypeR);
// Code required before the loop
PermuteInts (mat.vpos, ipermP, mat.vptr[mat.dim1]);
#endif
iter = 0;
#ifdef SPMV_OPTIMIZED
joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, x, vecP, vdimP, vdspP,
vdimR, vdspR, vectDatatypeP, vectDatatypeR);
InitDoubles (s, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * x
#else
MPI_Allgatherv (x, sizeR, MPI_DOUBLE, aux, sizes, dspls, MPI_DOUBLE, MPI_COMM_WORLD);
InitDoubles (s, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, aux, s); // s = A * x
#endif
dcopy (&n_dist, b, &IONE, r, &IONE); // r = b
daxpy (&n_dist, &DMONE, s, &IONE, r, &IONE); // r -= s
dcopy (&n_dist, r, &IONE, p, &IONE); // p = r
dcopy (&n_dist, r, &IONE, r0, &IONE); // r0 = r
// compute tolerance and <r0,r0>
rho = ddot (&n_dist, r, &IONE, r, &IONE);
MPI_Allreduce (MPI_IN_PLACE, &rho, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
tol0 = sqrt (rho);
tol = tol0;
#if DIRECT_ERROR
// compute direct error
double direct_err;
dcopy (&n_dist, x_exact, &IONE, res_err, &IONE); // res_err = x_exact
daxpy (&n_dist, &DMONE, x, &IONE, res_err, &IONE); // res_err -= x
// compute inf norm
direct_err = norm_inf(n_dist, res_err);
MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
// // compute euclidean norm
// direct_err = ddot (&n_dist, res_err, &IONE, res_err, &IONE); // direct_err = res_err' * res_err
// MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
// direct_err = sqrt(direct_err);
#endif // DIRECT_ERROR
MPI_Barrier(MPI_COMM_WORLD);
if (myId == 0)
reloj (&t1, &t2);
while ((iter < maxiter) && (tol > umbral)) {
#if PRECOND
VvecDoubles (DONE, diags, p, DZERO, p_hat, n_dist); // p_hat = D^-1 * p
#else
p_hat = p;
#endif
#ifdef SPMV_OPTIMIZED
joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, p_hat, vecP, vdimP,
vdspP, vdimR, vdspR, vectDatatypeP, vectDatatypeR);
InitDoubles (s, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * p
#else
MPI_Allgatherv (p_hat, sizeR, MPI_DOUBLE, aux, sizes, dspls, MPI_DOUBLE, MPI_COMM_WORLD);
InitDoubles (s, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, aux, s); // s = A * p
#endif
if (myId == 0)
#if DIRECT_ERROR
printf ("%d \t %g \t %g \t %g \n", iter, tol, umbral, direct_err);
#else
printf ("%d \t %g \n", iter, tol);
#endif // DIRECT_ERROR
alpha = ddot (&n_dist, r0, &IONE, s, &IONE);
MPI_Allreduce (MPI_IN_PLACE, &alpha, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
alpha = rho / alpha;
dcopy (&n_dist, r, &IONE, q, &IONE); // q = r
tmp = -alpha;
daxpy (&n_dist, &tmp, s, &IONE, q, &IONE); // q = r - alpha * s;
// second spmv
#if PRECOND
VvecDoubles (DONE, diags, q, DZERO, q_hat, n_dist); // q_hat = D^-1 * q
#else
q_hat = q;
#endif
#ifdef SPMV_OPTIMIZED
joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, q_hat, vecP, vdimP,
vdspP, vdimR, vdspR, vectDatatypeP, vectDatatypeR);
InitDoubles (y, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, vecP, y); // y = A * q
#else
MPI_Allgatherv (q_hat, sizeR, MPI_DOUBLE, aux, sizes, dspls, MPI_DOUBLE, MPI_COMM_WORLD);
InitDoubles (y, sizeR, DZERO, DZERO);
ProdSparseMatrixVectorByRows (mat, 0, aux, y); // y = A * q
#endif
// omega = <q, y> / <y, y>
reduce[0] = ddot (&n_dist, q, &IONE, y, &IONE);
reduce[1] = ddot (&n_dist, y, &IONE, y, &IONE);
MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
omega = reduce[0] / reduce[1];
// x+1 = x + alpha * p + omega * q
daxpy (&n_dist, &alpha, p_hat, &IONE, x, &IONE);
daxpy (&n_dist, &omega, q_hat, &IONE, x, &IONE);
// r+1 = q - omega * y
dcopy (&n_dist, q, &IONE, r, &IONE); // r = q
tmp = -omega;
daxpy (&n_dist, &tmp, y, &IONE, r, &IONE); // r = q - omega * y;
// rho = <r0, r+1> and tolerance
reduce[0] = ddot (&n_dist, r0, &IONE, r, &IONE);
reduce[1] = ddot (&n_dist, r, &IONE, r, &IONE);
MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
tmp = reduce[0];
tol = sqrt (reduce[1]) / tol0;
// beta = (alpha / omega) * <r0, r+1> / <r0, r>
beta = (alpha / omega) * (tmp / rho);
rho = tmp;
// p+1 = r+1 + beta * (p - omega * s)
tmp = -omega;
daxpy (&n_dist, &tmp, s, &IONE, p, &IONE); // p -= omega * s
dscal (&n_dist, &beta, p, &IONE); // p = beta * p
daxpy (&n_dist, &DONE, r, &IONE, p, &IONE); // p += r
#if DIRECT_ERROR
// compute direct error
dcopy (&n_dist, x_exact, &IONE, res_err, &IONE); // res_err = x_exact
daxpy (&n_dist, &DMONE, x, &IONE, res_err, &IONE); // res_err -= x
// compute inf norm
direct_err = norm_inf(n_dist, res_err);
MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
// // compute euclidean norm
// direct_err = ddot (&n_dist, res_err, &IONE, res_err, &IONE);
// MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
// direct_err = sqrt(direct_err);
#endif // DIRECT_ERROR
iter++;
}
MPI_Barrier(MPI_COMM_WORLD);
if (myId == 0)
reloj (&t3, &t4);
#ifdef SPMV_OPTIMIZED
// Code required after the loop
PermuteInts (mat.vpos, permP, mat.vptr[mat.dim1]);
// Freeing memory for Permutation
free (vectDatatypeR); vectDatatypeR = NULL; free (vectDatatypeP); vectDatatypeP = NULL;
RemoveDoubles (&vecP); RemoveInts (&permP);
RemoveInts (&vdspR); RemoveInts (&vdimR); RemoveInts (&vdspP); RemoveInts (&vdimP);
RemoveInts (&ipermP);
#endif
if (myId == 0) {
printf ("Size: %d \n", n);
printf ("Iter: %d \n", iter);
printf ("Tol: %g \n", tol);
printf ("Time_loop: %20.10e\n", (t3-t1));
printf ("Time_iter: %20.10e\n", (t3-t1)/iter);
}
RemoveDoubles (&aux); RemoveDoubles (&s); RemoveDoubles (&q);
RemoveDoubles (&r); RemoveDoubles (&p); RemoveDoubles (&r0); RemoveDoubles (&y);
#if PRECOND
RemoveDoubles (&diags); RemoveInts (&posd);
RemoveDoubles(&p_hat); RemoveDoubles (&q_hat);
#endif
}
/*********************************************************************************/
int main (int argc, char **argv) {
int dim;
double *sol1 = NULL, *sol2 = NULL;
int index = 0, indexL = 0;
SparseMatrix mat = {0, 0, NULL, NULL, NULL}, sym = {0, 0, NULL, NULL, NULL};
int root = 0, myId, nProcs;
int dimL, dspL, *vdimL = NULL, *vdspL = NULL;
SparseMatrix matL = {0, 0, NULL, NULL, NULL};
double *sol1L = NULL, *sol2L = NULL;
int mat_from_file, nodes, size_param, stencil_points;
if (argc == 3) {
mat_from_file = atoi(argv[2]);
} else {
mat_from_file = atoi(argv[2]);
nodes = atoi(argv[3]);
size_param = atoi(argv[4]);
stencil_points = atoi(argv[5]);
}
/***************************************/
MPI_Init (&argc, &argv);
// Definition of the variables nProcs and myId
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &myId);
root = nProcs-1;
root = 0;
/***************************************/
printf ("A\n");
CreateInts (&vdimL, nProcs); CreateInts (&vdspL, nProcs);
if(mat_from_file) {
if (myId == root) {
// Creating the matrix
ReadMatrixHB (argv[1], &sym);
TransposeSparseMatrices (sym, 0, &mat, 0);
dim = mat.dim1;
}
// Distributing the matrix
dim = DistributeMatrix (mat, index, &matL, indexL, vdimL, vdspL, root, MPI_COMM_WORLD);
dimL = vdimL[myId]; dspL = vdspL[myId];
printf ("B\n");
}
else {
dim = size_param * size_param * size_param;
int divL, rstL, i;
divL = (dim / nProcs); rstL = (dim % nProcs);
for (i=0; i<nProcs; i++) vdimL[i] = divL + (i < rstL);
vdspL[0] = 0; for (i=1; i<nProcs; i++) vdspL[i] = vdspL[i-1] + vdimL[i-1];
dimL = vdimL[myId]; dspL = vdspL[myId];
int band_width = size_param * (size_param + 1) + 1;
band_width = 100 * nodes;
long nnz_here = ((long) (stencil_points + 2 * band_width)) * dimL;
printf ("dimL: %d, nodes: %d, size_param: %d, band_width: %d, stencil_points: %d, nnz_here: %ld\n",
dimL, nodes, size_param, band_width, stencil_points, nnz_here);
allocate_matrix(dimL, dim, nnz_here, &matL);
generate_Poisson3D_filled(&matL, size_param, stencil_points, band_width, dspL, dimL, dim);
// To generate ill-conditioned matrices
// double factor = 1.0e6;
// ScaleFirstRowCol(matL, dspL, dimL, myId, root, factor);
}
MPI_Barrier(MPI_COMM_WORLD);
// Creating the vectors
CreateDoubles (&sol1, dim);
CreateDoubles (&sol2, dim);
CreateDoubles (&sol1L, dimL);
CreateDoubles (&sol2L, dimL);
InitDoubles (sol2, dim, 0.0, 0.0);
InitDoubles (sol1L, dimL, 0.0, 0.0);
InitDoubles (sol2L, dimL, 0.0, 0.0);
/***************************************/
printf ("C\n");
int IONE = 1;
double beta = 1.0 / sqrt(dim);
if(mat_from_file) {
// compute b = A * x_c, x_c = 1/sqrt(nbrows)
InitDoubles (sol1, dim, 1.0, 0.0);
ProdSparseMatrixVectorByRows (matL, 0, sol1, sol1L); // s = A * x
dscal (&dimL, &beta, sol1L, &IONE); // s = beta * s
} else {
InitDoubles (sol1, dim, 0.0, 0.0);
int k=0;
int *vptrM = matL.vptr;
for (int i=0; i < matL.dim1; i++) {
for(int j=vptrM[i]; j<vptrM[i+1]; j++) {
sol1L[k] += matL.vval[j];
}
}
}
printf ("D\n");
MPI_Scatterv (sol2, vdimL, vdspL, MPI_DOUBLE, sol2L, dimL, MPI_DOUBLE, root, MPI_COMM_WORLD);
printf ("E\n");
BiCGStab (matL, sol2L, sol1L, vdimL, vdspL, myId);
printf ("F\n");
// Error computation ||b-Ax||
// if(mat_from_file) {
MPI_Allgatherv (sol2L, dimL, MPI_DOUBLE, sol2, vdimL, vdspL, MPI_DOUBLE, MPI_COMM_WORLD);
InitDoubles (sol2L, dimL, 0, 0);
ProdSparseMatrixVectorByRows (matL, 0, sol2, sol2L);
double DMONE = -1.0;
daxpy (&dimL, &DMONE, sol2L, &IONE, sol1L, &IONE);
beta = ddot (&dimL, sol1L, &IONE, sol1L, &IONE);
MPI_Allreduce (MPI_IN_PLACE, &beta, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
// } else {
// // case with x_exact = {1.0}
// for (int i=0; i<dimL; i++)
// sol2L[i] -= 1.0;
// beta = ddot (&dimL, sol2L, &IONE, sol2L, &IONE);
// }
beta = sqrt(beta);
if (myId == 0)
printf ("Error: %20.10e\n", beta);
/***************************************/
// Freeing memory
RemoveDoubles (&sol1);
RemoveDoubles (&sol2);
RemoveDoubles (&sol1L);
RemoveDoubles (&sol2L);
RemoveInts (&vdspL); RemoveInts (&vdimL);
if (myId == root) {
RemoveSparseMatrix (&mat);
RemoveSparseMatrix (&sym);
}
MPI_Finalize ();
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <ScalarVectors.h>
/*********************************************************************************/
void CreateInts (int **vint, int dim) {
if ((*vint = (int *) malloc (sizeof(int)*dim)) == NULL)
{ printf ("Memory Error (CreateInts(%d))\n", dim); exit (1); }
}
void RemoveInts (int **vint) {
if (*vint != NULL) { free (*vint); *vint = NULL; }
}
void InitInts (int *vint, int dim, int frst, int incr) {
int i, *p1 = vint, num = frst;
for (i=0; i<dim; i++)
{ *(p1++) = num; num += incr; }
}
void CopyInts (int *src, int *dst, int dim) {
memmove (dst, src, sizeof(int) * dim);
}
void CopyShiftInts (int *src, int *dst, int dim, int shft) {
int i, *p1 = src, *p2 = dst;
if (shft == 0)
CopyInts (src, dst, dim);
else
for (i=0; i<dim; i++)
*(p2++) = *(p1++) + shft;
}
void TransformLengthtoHeader (int *vint, int dim) {
int i, *pi = vint;
for (i=0; i<dim; i++) { *(pi+1) += *pi; pi++; }
}
void TransformHeadertoLength (int *vint, int dim) {
int i, *pi = vint+dim;
for (i=dim; i>0; i--) { *(pi) -= *(pi-1); pi--; }
}
void ComputeHeaderfromLength (int *len, int *head, int dim) {
int i, *pi1 = len, *pi2 = head;
for (i=0; i<dim; i++) { *(pi2+1) = (*pi2) +(*(pi1++)); pi2++; }
}
void ComputeLengthfromHeader (int *head, int *len, int dim) {
int i, *pi1 = head, *pi2 = len;
for (i=0; i<dim; i++) { *(pi2++) = (*(pi1+1)) -(*pi1); pi1++; }
}
int AddInts (int *vint, int dim) {
int i, *pi = vint, aux = 0;
for (i=0; i<dim; i++) {
aux += *pi; pi++;
}
return aux;
}
// The permutation defined by perm is applied on vec, whose size is dim.
void PermuteInts (int *vec, int *perm, int dim) {
int i, *pi = vec;
for (i=0; i<dim; i++) { *pi = perm[*pi]; pi++; }
}
// Apply the inverse of perm, and store it on iperm, whose size is dim.
void ComputeInvPermutation (int *perm, int *iperm, int dim) {
int i, *pi1 = perm;
for (i=0; i<dim; i++) { iperm[*(pi1++)] = i; }
}
// Scale by scal the elements of vint, whose size is dim.
void ScaleInts (int *vint, int scal, int dim) {
int i;
int *pi = vint;
for (i=0; i<dim; i++)
*(pi++) *= scal;
}
/*********************************************************************************/
void CreateDoubles (double **vdbl, int dim) {
if ((*vdbl = (double *) malloc (sizeof(double)*dim)) == NULL)
{ printf ("Memory Error (CreateDoubles(%d))\n", dim); exit (1); }
}
void RemoveDoubles (double **vdbl) {
if (*vdbl != NULL) { free (*vdbl); *vdbl = NULL; }
}
void InitDoubles (double *vdbl, int dim, double frst, double incr) {
int i;
double *pd = vdbl, num = frst;
for (i=0; i<dim; i++)
{ *(pd++) = num; num += incr; }
}
void InitRandDoubles (double *vdbl, int dim, double frst, double last) {
int i;
double *pd = vdbl, size = last - frst;
for (i=0; i<dim; i++)
{ *(pd++) = frst + (size * (rand() / (RAND_MAX + 1.0))); }
}
void CopyDoubles (double *src, double *dst, int dim) {
memmove (dst, src, sizeof(double) * dim);
}
void ScaleDoubles (double *vdbl, double scal, int dim) {
int i;
double *pd = vdbl;
for (i=0; i<dim; i++)
*(pd++) *= scal;
}
double DotDoubles (double *vdbl1, double *vdbl2, int dim) {
int i;
double *pd1 = vdbl1, *pd2 = vdbl2, res = 0.0;
for (i=0; i<dim; i++)
res += (*(pd2++)) * (*(pd1++));
return res;
}
void VvecDoubles (double alfa, double *src1, double *src2, double beta, double *dst, int dim) {
int i;
for (i = 0; i < dim; i++) {
//dst[i] = (beta * dst[i]) + (alfa * src1[i] * src2[i]);
double tmp = alfa * src1[i] * src2[i];
dst[i] = fma(beta, dst[i], tmp);
}
}
/*********************************************************************************/
/*********************************************************************************/
extern void CreateInts (int **vint, int dim);
extern void RemoveInts (int **vint);
extern void InitInts (int *vint, int dim, int frst, int incr);
extern void CopyInts (int *src, int *dst, int dim);
extern void CopyShiftInts (int *src, int *dst, int dim, int shft);
extern void TransformLengthtoHeader (int *vint, int dim);
extern void TransformHeadertoLength (int *vint, int dim);
extern void ComputeHeaderfromLength (int *len, int *head, int dim);
extern void ComputeLengthfromHeader (int *head, int *len, int dim);
extern int AddInts (int *vint, int dim);
// The permutation defined by perm is applied on vec, whose size is dim.
extern void PermuteInts (int *vec, int *perm, int dim);
// Apply the inverse of perm, and store it on iperm, whose size is dim.
extern void ComputeInvPermutation (int *perm, int *iperm, int dim);
// Scale by scal the elements of vint, whose size is dim.
extern void ScaleInts (int *vint, int scal, int dim);
/*********************************************************************************/
extern void CreateDoubles (double **vdbl, int dim);
extern void RemoveDoubles (double **vdbl);
extern void InitDoubles (double *vdbl, int dim, double frst, double incr);
extern void InitRandDoubles (double *vdbl, int dim, double frst, double last);
extern void CopyDoubles (double *src, double *dst, int dim);
extern void ScaleDoubles (double *vdbl, double scal, int dim);
extern double DotDoubles (double *vdbl1, double *vdbl2, int dim);
extern void VvecDoubles (double alfa, double *src1, double *src2, double beta, double *dst, int dim);
/*********************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/// #include "InputOutput.h"
#include "ScalarVectors.h"
#include "hb_io.h"
#include "SparseProduct.h"
/*********************************************************************************/
// This routine creates a sparseMatrix from the next parameters
// * numR defines the number of rows
// * numC defines the number of columns
// * numE defines the number of nonzero elements
// * msr indicates if the MSR is the format used to the sparse matrix
// If msr is actived, numE doesn't include the diagonal elements
// The parameter index indicates if 0-indexing or 1-indexing is used.
void CreateSparseMatrix (ptr_SparseMatrix p_spr, int index, int numR, int numC, int numE, int msr) {
// printf (" index = %d , numR = %d , numC = %d , numE = %d\n", index, numR, numC, numE);
// The scalar components of the structure are initiated
p_spr->dim1 = numR; p_spr->dim2 = numC;
// Only one malloc is made for the vectors of indices
CreateInts (&(p_spr->vptr), numE+numR+1);
// The first component of the vectors depends on the used format
*(p_spr->vptr) = ((msr)? (numR+1): 0) + index;
p_spr->vpos = p_spr->vptr + ((msr)? 0: (numR+1));
// The number of nonzero elements depends on the format used
CreateDoubles (&(p_spr->vval), numE+(numR+1)*msr);
}
// This routine liberates the memory related to matrix spr
void RemoveSparseMatrix (ptr_SparseMatrix spr) {
// First the scalar are initiated
spr->dim1 = -1; spr->dim2 = -1;
// The vectors are liberated
RemoveInts (&(spr->vptr)); RemoveDoubles (&(spr->vval));
}
/*********************************************************************************/
// This routine creates de sparse matrix dst from the symmetric matrix spr.
// The parameters indexS and indexD indicate, respectivaly, if 0-indexing or 1-indexing is used
// to store the sparse matrices.
void DesymmetrizeSparseMatrices (SparseMatrix src, int indexS, ptr_SparseMatrix dst, int indexD) {
int n = src.dim1, nnz = 0;
int *sizes = NULL;
int *pp1 = NULL, *pp2 = NULL, *pp3 = NULL, *pp4 = NULL, *pp5 = NULL;
int i, j, dim, indexDS = indexD - indexS;
double *pd3 = NULL, *pd4 = NULL;
// The vector sizes is created and initiated
CreateInts (&sizes, n); InitInts (sizes, n, 0, 0);
// This loop counts the number of elements in each row
pp1 = src.vptr; pp3 = src.vpos + *pp1 - indexS;
pp2 = pp1 + 1 ; pp4 = sizes - indexS;
for (i=indexS; i<(n+indexS); i++) {
// The size of the corresponding row is accumulated
dim = (*pp2 - *pp1); pp4[i] += dim;
// Now each component of the row is analyzed
for (j=0; j<dim; j++) {
// The nondiagonals elements define another element in the graph
if (*pp3 != i) pp4[*pp3]++;
pp3++;
}
pp1 = pp2++;
}
// Compute the number of nonzeros of the new sparse matrix
nnz = AddInts (sizes, n);
// Create the new sparse matrix
CreateSparseMatrix (dst, indexD, n, n, nnz, 0);
// Fill the vector of pointers
CopyInts (sizes, (dst->vptr) + 1, n);
dst->vptr[0] = indexD; TransformLengthtoHeader (dst->vptr, n);
// The vector sizes is initiated with the beginning of each row
CopyInts (dst->vptr, sizes, n);
// This loop fills the contents of vector vpos
pp1 = src.vptr; pp3 = src.vpos + *pp1 - indexS;
pp2 = pp1 + 1 ; pp4 = dst->vpos - indexD; pp5 = sizes - indexS;
pd3 = src.vval + *pp1 - indexS; pd4 = dst->vval - indexD;
for (i=indexS; i<(n+indexS); i++) {
dim = (*pp2 - *pp1);
for (j=0; j<dim; j++) {
// The elements in the i-th row
pp4[pp5[i] ] = *pp3+indexDS;
pd4[pp5[i]++] = *pd3;
if (*pp3 != i) {
// The nondiagonals elements define another element in the graph
pp4[pp5[*pp3] ] = i+indexDS;
pd4[pp5[*pp3]++] = *pd3;
}
pp3++; pd3++;
}
pp1 = pp2++;
}
// The memory related to the vector sizes is liberated
RemoveInts (&sizes);
}
/*********************************************************************************/
// This routine creates de sparse matrix dst from the matrix spr.
// The parameters indexS and indexD indicate, respectivaly, if 0-indexing or 1-indexing is used
// to store the sparse matrices.
void TransposeSparseMatrices (SparseMatrix src, int indexS, ptr_SparseMatrix dst, int indexD) {
int n = src.dim1, nnz = 0;
int *sizes = NULL;
int *pp1 = NULL, *pp2 = NULL, *pp3 = NULL, *pp4 = NULL, *pp5 = NULL;
int i, j, dim, indexDS = indexD - indexS;
double *pd3 = NULL, *pd4 = NULL;
// The vector sizes is created and initiated
CreateInts (&sizes, n); InitInts (sizes, n, 0, 0);
// This loop counts the number of elements in each row
pp1 = src.vptr; pp3 = src.vpos + *pp1 - indexS;
pp2 = pp1 + 1 ; pp4 = sizes - indexS;
for (i=indexS; i<(n+indexS); i++) {
// The size of the corresponding row is accumulated
dim = (*pp2 - *pp1);
// Now each component of the row is analyzed
for (j=0; j<dim; j++) {
pp4[*pp3]++;
pp3++;
}
pp1 = pp2++;
}
// Compute the number of nonzeros of the new sparse matrix
nnz = AddInts (sizes, n);
// Create the new sparse matrix
CreateSparseMatrix (dst, indexD, n, n, nnz, 0);
// Fill the vector of pointers
CopyInts (sizes, (dst->vptr) + 1, n);
dst->vptr[0] = indexD; TransformLengthtoHeader (dst->vptr, n);
// The vector sizes is initiated with the beginning of each row
CopyInts (dst->vptr, sizes, n);
// This loop fills the contents of vector vpos
pp1 = src.vptr; pp3 = src.vpos + *pp1 - indexS;
pp2 = pp1 + 1 ; pp4 = dst->vpos - indexD; pp5 = sizes - indexS;
pd3 = src.vval + *pp1 - indexS; pd4 = dst->vval - indexD;
for (i=indexS; i<(n+indexS); i++) {
dim = (*pp2 - *pp1);
for (j=0; j<dim; j++) {
// The elements in the i-th column
pp4[pp5[*pp3] ] = i+indexDS;
pd4[pp5[*pp3]++] = *pd3;
pp3++; pd3++;
}
pp1 = pp2++;
}
// The memory related to the vector sizes is liberated
RemoveInts (&sizes);
}
/*********************************************************************************/
int ReadMatrixHB (char *filename, ptr_SparseMatrix p_spr) {
int *colptr = NULL;
double *exact = NULL;
double *guess = NULL;
int indcrd;
char *indfmt = NULL;
FILE *input;
char *key = NULL;
char *mxtype = NULL;
int ncol;
int neltvl;
int nnzero;
int nrhs;
int nrhsix;
int nrow;
int ptrcrd;
char *ptrfmt = NULL;
int rhscrd;
char *rhsfmt = NULL;
int *rhsind = NULL;
int *rhsptr = NULL;
char *rhstyp = NULL;
double *rhsval = NULL;
double *rhsvec = NULL;
int *rowind = NULL;
char *title = NULL;
int totcrd;
int valcrd;
char *valfmt = NULL;
double *values = NULL;
printf ("\nTEST09\n");
printf (" HB_FILE_READ reads all the data in an HB file.\n");
printf (" HB_FILE_MODULE is the module that stores the data.\n");
input = fopen (filename, "r");
if ( !input ) {
printf ("\n TEST09 - Warning!\n Error opening the file %s .\n", filename);
return -1;
}
hb_file_read ( input, &title, &key, &totcrd, &ptrcrd, &indcrd,
&valcrd, &rhscrd, &mxtype, &nrow, &ncol, &nnzero, &neltvl,
&ptrfmt, &indfmt, &valfmt, &rhsfmt, &rhstyp, &nrhs, &nrhsix,
&colptr, &rowind, &values, &rhsval, &rhsptr, &rhsind, &rhsvec,
&guess, &exact );
fclose (input);
// Conversion Fortran to C
CopyShiftInts (colptr, colptr, nrow+1, -1);
CopyShiftInts (rowind, rowind, nnzero, -1);
// Data assignment
p_spr->dim1 = nrow ; p_spr->dim2 = ncol ;
p_spr->vptr = colptr; p_spr->vpos = rowind; p_spr->vval = values;
// Memory liberation
free (exact ); free (guess ); free (indfmt);
free (key ); free (mxtype); free (ptrfmt);
free (rhsfmt); free (rhsind); free (rhsptr);
free (rhstyp); free (rhsval); free (rhsvec);
free (title ); free (valfmt);
return 0;
}
/*********************************************************************************/
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
void ProdSparseMatrixVector2 (SparseMatrix spr, int index, double *vec, double *res) {
int i, j;
int *pp1 = spr.vptr, *pp2 = pp1+1, *pi1 = spr.vpos + *pp1 - index;
double aux, *pvec = vec - index, *pd2 = res;
double *pd1 = spr.vval + *pp1 - index;
// If the MSR format is used, first the diagonal has to be processed
if (spr.vptr == spr.vpos)
VvecDoubles (1.0, spr.vval, vec, 1.0, res, spr.dim1);
for (i=0; i<spr.dim1; i++) {
// The dot product between the row i and the vector vec is computed
aux = 0.0;
for (j=*pp1; j<*pp2; j++)
aux += *(pd1++) * pvec[*(pi1++)];
// for (j=spr.vptr[i]; j<spr.vptr[i+1]; j++)
// aux += spr.vval[j] * pvec[spr.vpos[j]];
// Accumulate the obtained value on the result
*(pd2++) += aux; pp1 = pp2++;
}
}
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
void ProdSparseMatrixVectorByRows (SparseMatrix spr, int index, double *vec, double *res) {
int i, j, dim = spr.dim1;
int *pp1 = spr.vptr, *pi1 = spr.vpos + *pp1 - index;
double aux, *pvec = vec + *pp1 - index;
double *pd1 = spr.vval + *pp1 - index;
// Process all the rows of the matrix
for (i=0; i<dim; i++) {
// The dot product between the row i and the vector vec is computed
aux = 0.0;
for (j=pp1[i]; j<pp1[i+1]; j++)
aux = fma(pd1[j], pvec[pi1[j]], aux);
// Accumulate the obtained value on the result
res[i] += aux;
}
}
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
void ProdSparseMatrixVectorByRows_OMP (SparseMatrix spr, int index, double *vec, double *res) {
int i, j, dim = spr.dim1;
int *pp1 = spr.vptr, *pi1 = spr.vpos + *pp1 - index;
double aux, *pvec = vec + *pp1 - index;
double *pd1 = spr.vval + *pp1 - index;
// Process all the rows of the matrix
#pragma omp parallel for private(j, aux)
for (i=0; i<dim; i++) {
// The dot product between the row i and the vector vec is computed
aux = 0.0;
for (j=pp1[i]; j<pp1[i+1]; j++)
aux += pd1[j] * pvec[pi1[j]];
// Accumulate the obtained value on the result
res[i] += aux;
}
}
/*void ProdSparseMatrixVectorByRows_OMPTasks (SparseMatrix spr, int index, double *vec, double *res, int bm) {
int i, dim = spr.dim1;
// Process all the rows of the matrix
//#pragma omp taskloop grainsize(bm)
for ( i=0; i<dim; i+=bm) {
int cs = dim - i;
int c = cs < bm ? cs : bm;
// for (i=0; i<dim; i++) {
#pragma omp task depend(inout:res[i:i+c-1]) //shared(c)
{
// printf("Task SPMV ---- i: %d, c: %d \n", i, c);
int *pp1 = spr.vptr, *pi1 = spr.vpos + *pp1 - index;
double aux, *pvec = vec + *pp1 - index;
double *pd1 = spr.vval + *pp1 - index;
// The dot product between the row i and the vector vec is computed
aux = 0.0;
for(int idx=i; idx < i+c; idx++){
// printf("Task SPMV ---- idx: %d\n", idx);
for (int j=pp1[idx]; j<pp1[idx+1]; j++)
aux += pd1[j] * pvec[pi1[j]];
// Accumulate the obtained value on the result
res[idx] += aux;
}
}
}
}
*/
void ProdSparseMatrixVectorByRows_OMPTasks (SparseMatrix spr, int index, double *vec, double *res, int bm) {
int i, j, idx, dim = spr.dim1;
int *pp1 = spr.vptr, *pi1 = spr.vpos + *pp1 - index;
double aux, *pvec = vec + *pp1 - index;
double *pd1 = spr.vval + *pp1 - index;
// Process all the rows of the matrix
#pragma omp taskloop grainsize(bm)
for ( i=0; i<dim; i++ ) {
// The dot product between the row i and the vector vec is computed
aux = 0.0;
for (j=pp1[i]; j<pp1[i+1]; j++)
aux += pd1[j] * pvec[pi1[j]];
// Accumulate the obtained value on the result
res[i] += aux;
}
}
/*********************************************************************************/
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
void ProdSparseMatrixVectorByCols (SparseMatrix spr, int index, double *vec, double *res) {
int i, j, dim = spr.dim1;
int *pp1 = spr.vptr, *pi1 = spr.vpos + *pp1 - index;
double aux, *pres = res + *pp1 - index;
double *pd1 = spr.vval + *pp1 - index;
// Process all the columns of the matrix
for (i=0; i<dim; i++) {
// The result is scaled by the column i and the scalar vec[i]
aux = vec[i];
for (j=pp1[i]; j<pp1[i+1]; j++)
pres[pi1[j]] += pd1[j] * aux;
}
}
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
void ProdSparseMatrixVectorByCols_OMP (SparseMatrix spr, int index, double *vec, double *res) {
int i, j, dim = spr.dim1;
int *pp1 = spr.vptr, *pi1 = spr.vpos + *pp1 - index;
double aux, *pres = res + *pp1 - index;
double *pd1 = spr.vval + *pp1 - index;
// Process all the columns of the matrix
#pragma omp parallel for private(j, aux)
for (i=0; i<dim; i++) {
// The result is scaled by the column i and the scalar vec[i]
for (j=pp1[i]; j<pp1[i+1]; j++) {
aux = vec[i] * pd1[j];
#pragma omp atomic
pres[pi1[j]] += aux;
}
}
}
/*********************************************************************************/
void GetDiagonalSparseMatrix2 (SparseMatrix spr, int shft, double *diag, int *posd) {
int i, j, dim = (spr.dim1 < spr.dim2) ? spr.dim1 : spr.dim2;
int *pp1 = NULL, *pp2 = NULL, *pi1 = NULL, *pi2 = posd;
double *pd1 = NULL, *pd2 = diag;
if (spr.vptr == spr.vpos)
CopyDoubles (spr.vval, diag, spr.dim1);
else {
pp1 = spr.vptr; pp2 = pp1+1; j = (*pp2-*pp1);
pi1 = spr.vpos+*pp1; pd1 = spr.vval+*pp1;
for (i=0; i<dim; i++) {
while ((j > 0) && (*pi1 < (i+shft))) {
pi1++; pd1++; j--;
}
*(pd2++) = ((j > 0) && (*pi1 == (i+shft))) ? *pd1: 0.0;
//*(pi2++) = ((j > 0) && (*pi1 == (i+shft))) ? *pp2-j: -1;
pi1 += j; pd1 += j; pp1 = (pp2++); j = (*pp2-*pp1);
}
}
}
/*********************************************************************************/
#ifndef SparseProductTip
#define SparseProductTip 1
typedef struct
{
int dim1, dim2;
int *vptr;
int *vpos;
double *vval;
} SparseMatrix, *ptr_SparseMatrix;
/*********************************************************************************/
// This routine creates a sparseMatrix from the next parameters
// * numR defines the number of rows
// * numC defines the number of columns
// * numE defines the number of nonzero elements
// * msr indicates if the MSR is the format used to the sparse matrix
// If msr is actived, numE doesn't include the diagonal elements
// The parameter index indicates if 0-indexing or 1-indexing is used.
extern void CreateSparseMatrix (ptr_SparseMatrix p_spr, int index, int numR, int numC, int numE,
int msr);
// This routine liberates the memory related to matrix spr
extern void RemoveSparseMatrix (ptr_SparseMatrix spr);
/*********************************************************************************/
// This routine creates de sparse matrix dst from the symmetric matrix spr.
// The parameters indexS and indexD indicate, respectivaly, if 0-indexing or 1-indexing is used
// to store the sparse matrices.
extern void DesymmetrizeSparseMatrices (SparseMatrix src, int indexS, ptr_SparseMatrix dst,
int indexD);
/*********************************************************************************/
// This routine creates de sparse matrix dst from the matrix spr.
// The parameters indexS and indexD indicate, respectivaly, if 0-indexing or 1-indexing is used
// to store the sparse matrices.
void TransposeSparseMatrices (SparseMatrix src, int indexS, ptr_SparseMatrix dst, int indexD);
/*********************************************************************************/
extern int ReadMatrixHB (char *filename, ptr_SparseMatrix p_spr);
/*********************************************************************************/
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
extern void ProdSparseMatrixVector2 (SparseMatrix spr, int index, double *vec, double *res);
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
extern void ProdSparseMatrixVectorByRows (SparseMatrix spr, int index, double *vec, double *res);
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
extern void ProdSparseMatrixVectorByRows_OMP (SparseMatrix spr, int index, double *vec, double *res);
/*********************************************************************************/
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
extern void ProdSparseMatrixVectorByCols (SparseMatrix spr, int index, double *vec, double *res);
// This routine computes the product { res += spr * vec }.
// The parameter index indicates if 0-indexing or 1-indexing is used,
extern void ProdSparseMatrixVectorByCols_OMP (SparseMatrix spr, int index, double *vec, double *res);
/*********************************************************************************/
extern void GetDiagonalSparseMatrix2 (SparseMatrix spr, int shft, double *diag, int *posd);
/*********************************************************************************/
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include <ScalarVectors.h>
#include "ToolsMPI.h"
// #define PRINT_SEND_RESOTRNF_VECTORS 1
/*********************************************************************************/
void Synchronization (MPI_Comm Synch_Comm, char *message) {
int my_id, i ;
MPI_Comm_rank(Synch_Comm, &my_id);
MPI_Barrier (Synch_Comm);
printf ("(%d) %s\n", my_id, message);
if (my_id == 0) printf ("Waiting ... \n");
if (my_id == 0) scanf ("%d", &i);
if (my_id == 0) printf (" ... done\n");
MPI_Barrier (Synch_Comm);
}
/*********************************************************************************/
// Return true if the corresponding asynchonous communication,
// defined by data, has been finalized
int TestSimple (void *data) {
int flag = 0;
ptr_SimpleNode smpnode = (ptr_SimpleNode) data;
// Verify if the communication has finalized
MPI_Test (&(smpnode->req), &flag, &(smpnode->sta));
if (flag) {
// Remove the data included in the simple node
MPI_Wait (&(smpnode->req), &(smpnode->sta));
free (smpnode);
}
// Returns the result
return flag;
}
// Return true if the corresponding asynchonous communication,
// defined by data, has been finalized
int TestPacket (void *data) {
int flag = 0;
ptr_PacketNode pcknode = (ptr_PacketNode) data;
// Verify if the communication has finalized
MPI_Test (&(pcknode->req), &flag, &(pcknode->sta));
if (flag) {
// Remove the data included in the pack
MPI_Wait (&(pcknode->req), &(pcknode->sta));
MPI_Type_free (&(pcknode->pack));
free (pcknode);
}
// Returns the result
return flag;
}
// Detect the lost messages whose destination is one process
// into the processes of communicator Err_Comm
void DetectErrorsMPI (MPI_Comm Err_Comm) {
int my_id, flag= 0;
MPI_Status st;
// Definition of the variable my_id
MPI_Comm_rank(Err_Comm, &my_id);
// Test if some message exists
MPI_Iprobe (MPI_ANY_SOURCE, MPI_ANY_TAG, Err_Comm, &flag, &st);
if (flag) {
printf ("%d --> (%d,%d)\n", my_id, st.MPI_SOURCE, st.MPI_TAG);
}
}
/*********************************************************************************/
// Prepare the structures required to send/receive a SparseMatrix structure
// * spr refers to the SparseMatrix from where the data is obtained
// * size is the number of rows to be sent
// * weight is the number of nonzeros to be sent
// * pcknode, where the resulted packet appears
void MakeSprMatrixPacket (SparseMatrix spr, int size, int weight, ptr_PacketNode pcknode) {
int k;
int *lblq = pcknode->lblq;
MPI_Aint *dspl = pcknode->dspl;
MPI_Datatype *type = pcknode->type;
// Definition of reference pointer
pcknode->ptr = (unsigned char *) spr.vptr;
// Definition of the required vectors to create the packet
type[0] = MPI_INT ; lblq[0] = size+1; dspl[0] = (MPI_Aint) spr.vptr;
type[1] = MPI_INT ; lblq[1] = weight; dspl[1] = (MPI_Aint) spr.vpos;
type[2] = MPI_DOUBLE; lblq[2] = weight; dspl[2] = (MPI_Aint) spr.vval;
type[3] = MPI_UB ; lblq[3] = 1 ; dspl[3] = (MPI_Aint) (spr.vptr+size+1);
for (k=3; k>=0; k--) dspl[k] -= dspl[0];
// Creation of the packet
MPI_Type_create_struct (4, lblq, dspl, type, &(pcknode->pack));
MPI_Type_commit(&(pcknode->pack));
}
void MakeSprMatrixSendPacket (SparseMatrix spr, int *vlen, int dimL, int dspL,
ptr_PacketNode pcknode) {
int k, weight, dspZ;
int *lblq = pcknode->lblq;
MPI_Aint *dspl = pcknode->dspl;
MPI_Datatype *type = pcknode->type;
// printf ("dimL = %d , dspL = %d\n", dimL, dspL);
// PrintInts (vlen, spr.dim1);
// PrintInts (spr.vptr, spr.dim1+1);
// Definition of reference pointer
pcknode->ptr = (unsigned char *) (vlen+dspL);
// Definition of the required vectors to create the packet
dspZ = spr.vptr[dspL]; weight = spr.vptr[dspL+dimL] - dspZ;
// printf ("dspZ = %d , weight = %d\n", dspZ, weight);
type[0] = MPI_INT ; lblq[0] = dimL ; dspl[0] = (MPI_Aint) (vlen+dspL );
type[1] = MPI_INT ; lblq[1] = weight; dspl[1] = (MPI_Aint) (spr.vpos+dspZ );
type[2] = MPI_DOUBLE; lblq[2] = weight; dspl[2] = (MPI_Aint) (spr.vval+dspZ );
type[3] = MPI_UB ; lblq[3] = 1 ; dspl[3] = (MPI_Aint) (vlen+dimL+dspL);
for (k=3; k>=0; k--) dspl[k] -= dspl[0];
// Creation of the packet
MPI_Type_create_struct (4, lblq, dspl, type, &(pcknode->pack));
MPI_Type_commit(&(pcknode->pack));
}
void MakeSprMatrixRecvPacket (SparseMatrix sprL, int nnzL, ptr_PacketNode pcknode) {
int k, dimL = sprL.dim1;
int *lblq = pcknode->lblq;
MPI_Aint *dspl = pcknode->dspl;
MPI_Datatype *type = pcknode->type;
// printf ("nnzL = %d\n", nnzL);
// Definition of reference pointer
pcknode->ptr = (unsigned char *) (sprL.vptr+1);
// Definition of the required vectors to create the packet
type[0] = MPI_INT ; lblq[0] = dimL; dspl[0] = (MPI_Aint) (sprL.vptr+1);
type[1] = MPI_INT ; lblq[1] = nnzL; dspl[1] = (MPI_Aint) sprL.vpos;
type[2] = MPI_DOUBLE; lblq[2] = nnzL; dspl[2] = (MPI_Aint) sprL.vval;
type[3] = MPI_UB ; lblq[3] = 1 ; dspl[3] = (MPI_Aint) (sprL.vptr+1+dimL);
for (k=3; k>=0; k--) dspl[k] -= dspl[0];
// Creation of the packet
MPI_Type_create_struct (4, lblq, dspl, type, &(pcknode->pack));
MPI_Type_commit(&(pcknode->pack));
}
// Compute the number of nonzeros elements of a PermSprMatrixRecvPacket packet
// * prc_src is the processor from which the messages is sent
// * dimL is the number of rows to be received
// * comm is the communicator in which the messages is sent
int ComputeSprMatrixRecvWeights (int prc_src, int dimL, MPI_Comm comm) {
int tam, tam_int, tam_double, tam_ub;
MPI_Status sta;
// Definition of sizes
MPI_Type_size(MPI_INT, &tam_int);
MPI_Type_size(MPI_DOUBLE, &tam_double);
MPI_Type_size(MPI_UB, &tam_ub);
MPI_Probe (prc_src, Tag_Send_Packet_Matrix_To_Leaf, comm, &sta);
MPI_Get_count (&sta, MPI_BYTE, &tam);
// Return the number of nonzeros included in a packet
return (tam - (dimL*tam_int + tam_ub)) / (tam_int + tam_double);
}
int DistributeMatrix (SparseMatrix spr, int index, ptr_SparseMatrix sprL, int indexL,
int *vdimL, int *vdspL, int root, MPI_Comm comm) {
int myId, nProcs;
int i, dim = spr.dim1, divL, rstL, dimL, dspL, nnzL;
ptr_PacketNode pcknode;
// Getiing the parameter of the communicator
MPI_Comm_rank(comm, &myId); MPI_Comm_size(comm, &nProcs);
// Broadcasting the matrix dimension
MPI_Bcast (&dim, 1, MPI_INT, root, MPI_COMM_WORLD);
// Calculating the vectors of sizes (vdimL) and displacements (vdspl)
divL = (dim / nProcs); rstL = (dim % nProcs);
for (i=0; i<nProcs; i++) vdimL[i] = divL + (i < rstL);
vdspL[0] = 0; for (i=0; i<nProcs; i++) vdspL[i+1] = vdspL[i] + vdimL[i];
dimL = vdimL[myId]; dspL = vdspL[myId];
// Distribution of the matrix, by blocks
if (root == myId) {
int *vlen = NULL;
CreateInts (&vlen, dim); ComputeLengthfromHeader (spr.vptr, vlen, dim);
for (i=0; i<nProcs; i++) {
if (i != myId) {
// Creating the message for each destination
pcknode = (ptr_PacketNode) malloc (sizeof(PacketNode));
MakeSprMatrixSendPacket (spr, vlen, vdimL[i], vdspL[i], pcknode);
MPI_Send (pcknode->ptr, 1, pcknode->pack, i, Tag_Send_Packet_Matrix_To_Leaf, comm);
MPI_Type_free (&(pcknode->pack));
free (pcknode);
}
}
nnzL = spr.vptr[dspL+dimL] - spr.vptr[dspL];
CreateSparseMatrix (sprL, indexL, dimL, dim, nnzL, 0);
CopyInts (vlen+dspL, sprL->vptr+1, dimL);
CopyInts (spr.vpos+spr.vptr[dspL], sprL->vpos, nnzL);
CopyDoubles (spr.vval+spr.vptr[dspL], sprL->vval, nnzL);
RemoveInts (&vlen);
} else {
MPI_Status sta;
// Compute the number of nonzeroes and creating the local matrix
nnzL = ComputeSprMatrixRecvWeights (root, dimL, comm);
CreateSparseMatrix (sprL, indexL, dimL, dim, nnzL, 0);
// Receiving the data on the local matrix
pcknode = (ptr_PacketNode) malloc (sizeof(PacketNode));
MakeSprMatrixRecvPacket (*sprL, nnzL, pcknode);
MPI_Recv (pcknode->ptr, 1, pcknode->pack, root, Tag_Send_Packet_Matrix_To_Leaf,
comm, &sta);
MPI_Type_free (&(pcknode->pack));
free (pcknode);
}
*(sprL->vptr) = indexL; TransformLengthtoHeader (sprL->vptr, dimL);
return dim;
}
/*********************************************************************************/
// vcols is a vector with dimPos elements, including integer values from 0 to dim-1
// The routine creates a bitmap determining which col index exists in vcols.
// The bitmao is stored in colsJoin whose size is colsJoin_dim
void joinColumns (int dim, int *vcols, int dimPos, unsigned char **colsJoin,
int *colsJoin_dim) {
int i, div, rem;
int vec_dim = (dim + (sizeof(unsigned char) * 8) - 1) / (sizeof(unsigned char) * 8);
unsigned char *vec = (unsigned char *) malloc(sizeof(unsigned char) * vec_dim);
for (i=0; i<vec_dim; i++) {
vec[i] = 0x0;
}
for (i=0; i<dimPos; i++) {
div = vcols[i] / (sizeof(unsigned char) * 8);
rem = vcols[i] % (sizeof(unsigned char) * 8);
vec[div] |= (1 << rem);
}
*colsJoin = vec;
*colsJoin_dim = vec_dim;
}
// From colsJoin, this routine creates vector perm including the contents of the bitmap
// Knowing the partition defined by nProcs and vdspL, this routine extends this
// partition on perm, using vdimP and vdspP vectors
int createPerm (unsigned char *colsJoin, int colsJoin_dim, int *perm, int dim,
int *vdspL, int *vdimP, int *vdspP, int nProcs) {
int i, j, prc = 1, k = 0, col = 0;
vdspP[0] = 0;
for (i=0; i<colsJoin_dim; i++) {
if (colsJoin[i] != 0x0) {
unsigned char car = 0x1;
for (j=0; j<8*sizeof(unsigned char); j++) {
if (col == vdspL[prc]) {
vdimP[prc-1] = k - vdspP[prc-1];
vdspP[prc] = k;
prc++;
}
if (colsJoin[i] & car) {
perm[k] = col;
k++;
}
car <<= 1;
col++;
}
} else {
col += 8*sizeof(unsigned char);
while ((prc <= nProcs) && (col >= vdspL[prc])) {
vdimP[prc-1] = k - vdspP[prc-1];
vdspP[prc] = k;
prc++;
}
}
}
while ((prc <= nProcs) && (col >= vdspL[prc])) {
vdimP[prc-1] = k - vdspP[prc-1];
vdspP[prc] = k;
prc++;
}
return k;
}
// Creation of the MPI_Datatype vectors to perform a reduction of the communications
// vectDatatypeP includes MPI_DOUBLE for all processes.
// vectDatatypeR includes the permutation required for each process.
void createVectMPITypes (int myId, int nProcs, int *vdimL, int *vdimP,
int *permR, int *vdimR, int *vdspR,
MPI_Datatype *vectDatatypeP, MPI_Datatype *vectDatatypeR) {
int i;
for (i=0; i<nProcs; i++) {
vectDatatypeP[i] = MPI_DOUBLE;
if (i == myId) {
MPI_Type_contiguous(vdimR[i], MPI_DOUBLE, vectDatatypeR+i);
} else {
MPI_Type_create_indexed_block(vdimR[i], 1, permR+vdspR[i], MPI_DOUBLE,
vectDatatypeR+i);
}
MPI_Type_commit(vectDatatypeR+i);
}
}
// Creation of all structures to perform a reduction of the communications
// coll_p2p adapts the contents of the structure for collective or p2p operations
// vecP is created to store the required elements to complete the product in the process
// permP is created and included the permutation to be applied on vcols.
void createAlltoallwStruct (int coll_p2p, MPI_Comm comm, SparseMatrix matL,
int *vdimL, int *vdspL, int *vdimP, int *vdspP,
double **vecP, int **permP, int *ipermP,
int *vdimR, int *vdspR,
MPI_Datatype *vectDatatypeP, MPI_Datatype *vectDatatypeR) {
// Definition of the variables nProcs and myId
int myId, nProcs;
MPI_Comm_size(comm, &nProcs); MPI_Comm_rank(comm, &myId);
// Creation of column bitmap related to myId.
int colsJoin_dim = 0, dim = matL.dim2;
unsigned char *colsJoin = NULL;
joinColumns (dim, matL.vpos, matL.vptr[matL.dim1], &colsJoin, &colsJoin_dim);
// Creation of permutations, getting informaction from column bitmap
int permP_dim = 0;
permP_dim = createPerm (colsJoin, colsJoin_dim, ipermP, dim, vdspL,
vdimP, vdspP, nProcs);
free (colsJoin); colsJoin = NULL;
CreateDoubles (vecP, permP_dim);
CreateInts (permP, permP_dim); CopyInts (ipermP, *permP, permP_dim);
InitInts (ipermP, dim, -1, 0);
ComputeInvPermutation (*permP, ipermP, permP_dim);
// Definition of sizes of the sending pattern from myId
MPI_Alltoall (vdimP, 1, MPI_INT, vdimR, 1, MPI_INT, MPI_COMM_WORLD);
vdspR[0] = 0; ComputeHeaderfromLength (vdimR, vdspR, nProcs);
// Creation of the sending pattern from myId
int *permR = NULL, permR_dim = 0;;
permR_dim = vdspR[nProcs];
CreateInts (&permR, permR_dim);
MPI_Alltoallv (*permP, vdimP, vdspP, MPI_INT,
permR , vdimR, vdspR, MPI_INT, MPI_COMM_WORLD);
CopyShiftInts (permR, permR, permR_dim, -vdspL[myId]);
// Definition of the MPI_Datatype vectors required for communication
createVectMPITypes (myId, nProcs, vdimL, vdimP, permR, vdimR, vdspR,
vectDatatypeP, vectDatatypeR);
// Computation of percentage of communication is done
int saving = vdspR[nProcs] - vdimL[myId];
MPI_Allreduce (MPI_IN_PLACE, &saving, 1, MPI_INT, MPI_SUM, comm);
if (myId == 0) {
printf ("%d nnzs of %d = %f %% \n", saving, dim * (nProcs - 1),
100.0 * saving / (dim * (nProcs - 1)));
}
// Adaptation of sizes to complete the comunication
InitInts (vdimR, nProcs, 1, 0);
InitInts (vdspR, nProcs+1, 0, 0);
// This step is only required for MPI_Alltoallw
if (coll_p2p) {
ScaleInts (vdspP, sizeof(double), nProcs+1);
}
// Freeing unuseful structures
RemoveInts (&permR);
}
// Communications to complete a MPI_Alltoallv reducing the communications
// All elements required to compute SpMV is stored on vecP from vecL
// coll_p2p marks if collective or p2p operations are used
void joinDistributeVectorSPMV (int coll_p2p, MPI_Comm comm, double *vecL,
double *vecP, int *vdimP, int *vdspP, int *vdimR,
int *vdspR, MPI_Datatype *vectDatatypeP,
MPI_Datatype *vectDatatypeR) {
if (coll_p2p) {
// Comunication using a collective operation
MPI_Alltoallw (vecL, vdimR, vdspR, vectDatatypeR,
vecP, vdimP, vdspP, vectDatatypeP, MPI_COMM_WORLD);
} else {
// Definition of the variables nProcs, myId and other variables
int i, k = 0, myId, nProcs;
MPI_Comm_size (comm, &nProcs); MPI_Comm_rank(comm, &myId);
// Definition of the vectors for implementing non-blocking communications
MPI_Status vectSta[2*nProcs-2];
MPI_Request vectReq[2*nProcs-2];
// Non-blocking send communications
for (i=0; i<nProcs; i++) {
if (i != myId) {
MPI_Isend (vecL+vdspR[i], vdimR[i], vectDatatypeR[i], i, Tag_NonBlocking_SpMV,
comm, vectReq+k);
k++;
}
}
// Non-blocking receive communications
for (i=0; i<nProcs; i++) {
if (i != myId) {
MPI_Irecv (vecP+vdspP[i], vdimP[i], vectDatatypeP[i], i, Tag_NonBlocking_SpMV,
comm, vectReq+k);
k++;
}
}
// Local copy
memcpy (vecP+vdspP[myId], vecL, vdimP[myId] * sizeof(double));
// Waiting until all communications are complete
MPI_Waitall (2*nProcs-2, vectReq, vectSta);
}
}
/*********************************************************************************/
#ifndef ToolsMPI
#define ToolsMPI 1
// #include <SparseMatricesNew.h>
#include <SparseProduct.h>
/*********************************************************************************/
#define Tag_Demand_Matrix_From_Root 1001
#define Tag_Send_Task_To_Leaf 1002
#define Tag_Receive_Dims_Factor_From_Leaf 1003
#define Tag_End_Distribution_To_Leaf 1004
#define Tag_Send_Dims_Matrix_To_Leaf 1006
#define Tag_Send_Data_Matrix_To_Leaf 1007
#define Tag_Demand_Vector_From_Root 1011
#define Tag_Send_Dims_Vector_To_Father 1015
#define Tag_Send_Data_Vector_To_Father 1016
#define Tag_Send_Task_To_Root 1021
#define Tag_Send_Solution_To_Root 1022
#define Tag_Send_Dims_Vector_To_Children 1025
#define Tag_Send_Data_Vector_To_Children 1026
#define Tag_End_Resolution_To_Leaf 1031
#define Tag_Send_Vector_Up_1 1041
#define Tag_Send_Vector_Up_2 1042
#define Tag_Send_Packet_Matrix_To_Leaf 210
#define Tag_Receive_Data_Factor_From_Leaf 220
#define Tag_Send_Vector_To_Leaf 230
// #define Tag_FactorVector 240
#define Tag_NonBlocking_SpMV 1051
/*********************************************************************************/
// typedef struct SimpleNode {
typedef struct {
MPI_Status sta;
MPI_Request req;
} SimpleNode, *ptr_SimpleNode;
// Return true if the corresponding asynchonous communication,
// defined by data, has been finalized
extern int TestSimple (void *data);
/*********************************************************************************/
// #define MaxPacketSize 10000
#define MaxPacketSize 5000
// typedef struct PacketNode {
typedef struct {
unsigned char *ptr;
int lblq[2*MaxPacketSize+3], vlen[MaxPacketSize];
MPI_Aint dspl[2*MaxPacketSize+3];
MPI_Datatype type[2*MaxPacketSize+3];
MPI_Datatype pack;
MPI_Status sta;
MPI_Request req;
} PacketNode, *ptr_PacketNode;
/*********************************************************************************/
extern void Synchronization (MPI_Comm Synch_Comm, char *message);
/*********************************************************************************/
// Return true if the corresponding asynchonous communication,
// defined by data, has been finalized
extern int TestSimple (void *data);
// Return true if the corresponding asynchonous communication,
// defined by data, has been finalized
extern int TestPacket (void *data);
// Detect the lost messages whose destination is one process
// into the processes of communicator Err_Comm
extern void DetectErrorsMPI (MPI_Comm Err_Comm);
/*********************************************************************************/
// Prepare the structures required to send/receive a SparseMatrix structure
// * spr refers to the SparseMatrix from where the data is obtained
// * size is the number of rows to be sent
// * weight is the number of nonzeros to be sent
// * pcknode, where the resulted packet appears
extern void MakeSprMatrixPacket (SparseMatrix spr, int size, int weight, ptr_PacketNode pcknode);
extern void MakeSprMatrixSendPacket (SparseMatrix spr, int *len, int dimL, int dspL,
ptr_PacketNode pcknode);
extern void MakeSprMatrixRecvPacket (SparseMatrix sprL, int nnzL, ptr_PacketNode pcknode);
// Compute the number of nonzeros elements of a PermSprMatrixRecvPacket packet
// * prc_src is the processor from which the messages is sent
// * sizes is the number of rows to be received
// * comm is the communicator in which the messages is sent
extern int ComputeSprMatrixRecvWeights (int prc_src, int sizes, MPI_Comm comm);
extern int DistributeMatrix (SparseMatrix spr, int index, ptr_SparseMatrix sprL, int indexL,
int *vdimL, int *vdspL, int root, MPI_Comm comm);
/*********************************************************************************/
// vcols is a vector with dimPos elements, including integer values from 0 to dim-1
// The routine creates a bitmap determining which col index exists in vcols.
// The bitmao is stored in colsJoin whose size is colsJoin_dim
extern void joinColumns (int dim, int *vcols, int dimPos, unsigned char **colsJoin,
int *colsJoin_dim);
// From colsJoin, this routine creates vector perm including the contents of the bitmap
// Knowing the partition defined by nProcs and vdspL, this routine extends this
// partition on perm, using vdimP and vdspP vectors
extern int createPerm (unsigned char *colsJoin, int colsJoin_dim, int *perm, int dim,
int *vdspL, int *vdimP, int *vdspP, int nProcs);
// Creation of the MPI_Datatype vectors to perform a reduction of the communications
// vectDatatypeP includes MPI_DOUBLE for all processes.
// vectDatatypeR includes the permutation required for each process.
extern void createVectMPITypes (int myId, int nProcs, int *vdimL, int *vdimP,
int *permR, int *vdimR, int *vdspR,
MPI_Datatype *vectDatatypeP, MPI_Datatype *vectDatatypeR);
// Creation of all structures to perform a reduction of the communications
// coll_p2p adapts the contents of the structure for collective or p2p operations
// vecP is created to store the required elements to complete the product in the process
// permP is created and included the permutation to be applied on vcols.
extern void createAlltoallwStruct (int coll_p2p, MPI_Comm comm, SparseMatrix matL,
int *vdimL, int *vdspL, int *vdimP, int *vdspP,
double **vecP, int **permP, int *ipermP,
int *vdimR, int *vdspR,
MPI_Datatype *vectDatatypeP, MPI_Datatype *vectDatatypeR);
// Communications to complete a MPI_Alltoallv reducing the communications
// All elements required to compute SpMV is stored on vecP from vecL
// coll_p2p marks if collective or p2p operations are used
extern void joinDistributeVectorSPMV (int coll_p2p, MPI_Comm comm, double *vecL,
double *vecP, int *vdimP, int *vdspP, int *vdimR,
int *vdspR, MPI_Datatype *vectDatatypeP,
MPI_Datatype *vectDatatypeR);
/*********************************************************************************/
#endif
#ifndef COMMON_H
#define COMMON_H
#include <math.h>
double norm_inf(int n_dist, double *res_err) {
double nrm = 0.0;
for (int i = 0; i < n_dist; i++) {
double tmp = fabs(res_err[i]);
if (nrm < tmp)
nrm = tmp;
}
return nrm;
}
#endif // COMMON_H
This diff is collapsed.
int ch_eqi ( char ch1, char ch2 );
int ch_is_digit ( char c );
int ch_is_format_code ( char c );
int ch_to_digit ( char ch );
void hb_exact_read ( FILE *input, int nrow, int nrhs, int rhscrd,
char *rhsfmt, char *rhstyp, double exact[] );
void hb_exact_write ( FILE *output, int nrow, int nrhs, int rhscrd,
char *rhsfmt, char *rhstyp, double exact[] );
void hb_file_read ( FILE *input, char **title, char **key, int *totcrd,
int *ptrcrd, int *indcrd, int *valcrd, int *rhscrd, char **mxtype, int *nrow,
int *ncol, int *nnzero, int *neltvl, char **ptrfmt, char **indfmt, char **valfmt,
char **rhsfmt, char **rhstyp, int *nrhs, int *nrhsix, int **colptr,
int **rowind, double **values, double **rhsval, int **rhsptr, int **rhsind,
double **rhsvec, double **guess, double **exact );
void hb_file_write ( FILE *output, char *title, char *key, int totcrd,
int ptrcrd, int indcrd, int valcrd, int rhscrd, char *mxtype, int nrow,
int ncol, int nnzero, int neltvl, char *ptrfmt, char *indfmt, char *valfmt,
char *rhsfmt, char *rhstyp, int nrhs, int nrhsix, int colptr[],
int rowind[], double values[], double rhsval[], int rhsptr[], int rhsind[],
double rhsvec[], double guess[], double exact[] );
void hb_guess_read ( FILE *input, int nrow, int nrhs, int rhscrd,
char *rhsfmt, char *rhstyp, double guess[] );
void hb_guess_write ( FILE *output, int nrow, int nrhs, int rhscrd,
char *rhsfmt, char *rhstyp, double guess[] );
void hb_header_print ( char *title, char *key, int totcrd, int ptrcrd,
int indcrd, int valcrd, int rhscrd, char *mxtype, int nrow, int ncol,
int nnzero, int neltvl, char *ptrfmt, char *indfmt, char *valfmt,
char *rhsfmt, char *rhstyp, int nrhs, int nrhsix );
void hb_header_read ( FILE *input, char **title, char **key, int *totcrd,
int *ptrcrd, int *indcrd, int *valcrd, int *rhscrd, char **mxtype, int *nrow,
int *ncol, int *nnzero, int *neltvl, char **ptrfmt, char **indfmt, char **valfmt,
char **rhsfmt, char **rhstyp, int *nrhs, int *nrhsix );
void hb_header_write ( FILE *output, char *title, char *key, int totcrd,
int ptrcrd, int indcrd, int valcrd, int rhscrd, char *mxtype, int nrow,
int ncol, int nnzero, int neltvl, char *ptrfmt, char *indfmt, char *valfmt,
char *rhsfmt, char *rhstyp, int nrhs, int nrhsix );
double *hb_matvec_a_mem ( int nrow, int ncol, int nnzero, int nrhs,
int colptr[], int rowind[], double values[], double exact[] );
void hb_rhs_read ( FILE *input, int nrow, int nnzero, int nrhs, int nrhsix,
int rhscrd, char *ptrfmt, char *indfmt, char *rhsfmt, char *mxtype,
char *rhstyp, double rhsval[], int rhsind[], int rhsptr[], double rhsvec[] );
void hb_rhs_write ( FILE *output, int nrow, int nnzero, int nrhs, int nrhsix,
int rhscrd, char *ptrfmt, char *indfmt, char *rhsfmt, char *mxtype,
char *rhstyp, double rhsval[], int rhsind[], int rhsptr[], double rhsvec[] );
void hb_structure_print ( int ncol, char *mxtype, int nnzero, int neltvl,
int colptr[], int rowind[] );
void hb_structure_read ( FILE *input, int ncol, char *mxtype, int nnzero,
int neltvl, int ptrcrd, char *ptrfmt, int indcrd, char *indfmt,
int colptr[], int rowind[] );
void hb_structure_write ( FILE *output, int ncol, char *mxtype,
int nnzero, int neltvl, char *ptrfmt, char *indfmt, int colptr[],
int rowind[] );
int *hb_ua_colind ( int ncol, int colptr[], int nnzero );
void hb_values_print ( int ncol, int colptr[], char *mxtype, int nnzero,
int neltvl, double values[] );
void hb_values_read ( FILE *input, int valcrd, char *mxtype, int nnzero,
int neltvl, char *valfmt, double values[] );
void hb_values_write ( FILE *output, int valcrd, char *mxtype,
int nnzero, int neltvl, char *valfmt, double values[] );
double *hb_vecmat_a_mem ( int nrow, int ncol, int nnzero, int nrhs,
int colptr[], int rowind[], double values[], double exact[] );
int i4_max ( int i1, int i2 );
int i4_min ( int i1, int i2 );
void i4vec_print ( int n, int a[], char *title );
void i4vec_print_part ( int n, int a[], int max_print, char *title );
void r8mat_print ( int m, int n, double a[], char *title );
void r8mat_print_some ( int m, int n, double a[], int ilo, int jlo, int ihi,
int jhi, char *title );
void r8vec_print ( int n, double a[], char *title );
void r8vec_print_part ( int n, double a[], int max_print, char *title );
int s_len_trim ( char *s );
char *s_substring ( char *s, int a, int b );
void s_to_format ( char *s, int *r, char *code, int *w, int *m );
void s_trim ( char *s );
void timestamp ( );
## ============================================================
## INTEL COMPILERS
## ============================================================
#
#CC = mpicc
#CFLAGS = -Wall -g -openmp -I. -I${MKLROOT}/include
#CFLAGS = -Wall -openmp -I. -I${MKLROOT}/include
#CLFLAGS = -Wall -fopenmp -I. -I${MKLROOT}/include
#
#CLINKER = mpicc
#FLINKER = mpif77
#LDFLAGS = -openmp
#LIBLIST = -L. -lhbio -lclock -lsparsenew -lvector -lm -lc
#LIBLIST = -L. -lhbio -lclock -lvector -lm -lc
#LIBLIST = -L. -lhbio -lclock -lm -lc
#LIBLIST = -L. -lsparse -lvector -lclock -lm -lc
#
##LIBMKL = -L$(MKLROOT)/lib/intel64 $(MKL_FMULTIS_INTEL)
#LIBMKL = -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread
# ============================================================
# GNU COMPILERS
# ============================================================
CC = mpicxx
CFLAGS = -std=c++11 -mavx -fabi-version=0 -Wall -fopenmp -I. -I${MKLROOT}/include -I${HOME}/libs
CFLAGS = -std=c++11 -Wall -fopenmp -I. -I${MKLROOT}/include -I${HOME}/libs
CLFLAGS = -Wall -fopenmp -I. -I${MKLROOT}/include
CLINKER = mpicxx
LDFLAGS = -fopenmp
LIBLIST = -L. -lhbio -lclock -lsparsenew -lvector -lm -lc
LIBLIST = -L. -lhbio -lclock -lvector -lm -lc
LIBLIST = -L. -lhbio -lclock -lm -lc
LIBLIST = -L. -lsparse -lvector -lclock -lm -lc
LIBMKL = -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread
# ============================================================
AR = ar
ARFLAGS = ru
RL = ranlib
# ============================================================
OBJS_CLOCK = reloj.o
OBJS_VECTOR = ScalarVectors.o
OBJS_SPARSE = hb_io.o SparseProduct.o
OBJS = $(OBJS_CLOCK) $(OBJS_VECTOR) $(OBJS_SPARSE)
# ============================================================
default: libclock.a libvector.a libsparse.a BiCGStab
libshared.a : $(OBJS)
$(AR) $(ARFLAGS) $@ $?
$(RL) $(RLFLAGS) $@
libclock.a : $(OBJS_CLOCK)
$(AR) $(ARFLAGS) $@ $?
$(RL) $(RLFLAGS) $@
libvector.a : $(OBJS_VECTOR)
$(AR) $(ARFLAGS) $@ $?
$(RL) $(RLFLAGS) $@
libsparse.a : $(OBJS_SPARSE)
$(AR) $(ARFLAGS) $@ $?
$(RL) $(RLFLAGS) $@
BiCGStab: BiCGStab.o ToolsMPI.o matrix.o
$(CLINKER) $(LDFLAGS) -o BiCGStab BiCGStab.o ToolsMPI.o matrix.o $(LIBMKL) $(LIBLIST)
# ============================================================
.c.o:
echo compiling
$(CC) $(CFLAGS) -c $*.c
clean:
rm -f *.o *.a BiCGStab
# ============================================================
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
//#include "global.h"
//#include "debug.h"
#include "matrix.h"
//#include "cg_aux_conLabel.h"
// finite-difference method for a 3D Poisson's equation with a 7, 19 or 27 point stencil
void generate_Poisson3D(ptr_SparseMatrix A, const int p, const int stencil_points, int dspL, int dimL, int dim)
{
int p2 = p * p, i, j=0; //, pos=0;
int pos = 0;
int *vptr = A->vptr;
const int *stenc_c;
const double *stenc_v;
const int stenc_c7[] = { -p2, -p, -1, 0, 1, p, p2};
const double stenc_v7[] = { -1.0, -1.0, -1.0, 6.0, -1.0, -1.0, -1.0};
const double r = 1.0;
const int stenc_c19[] =
{
-p2-p, -p2-1, -p2+0, -p2+1, -p2+p,
-p-1, -p, -p+1, -1, 0, 1, p-1, p, p+1,
p2-p, p2-1, p2+0, p2+1, p2+p
};
const double stenc_v19[] =
{
-(1+r), -(1+r), -(8*r-4), -(1+r), -(1+r),
-2, -(6-2*r), -2, -(6-2*r), -(-32-16*r), -(6-2*r), -2, -(6-2*r), -2,
-(1+r), -(1+r), -(8*r-4), -(1+r), -(1+r)
};
const int stenc_c27[] =
{
-p2-p-1, -p2-p, -p2-p+1, -p2-1, -p2+0, -p2+1, -p2+p-1, -p2+p, -p2+p+1,
-p-1, -p, -p+1, -1, 0, 1, p-1, p, p+1,
p2-p-1, p2-p, p2-p+1, p2-1, p2+0, p2+1, p2+p-1, p2+p, p2+p+1
};
const double stenc_v27[] =
{
-(2+r), -(8-10*r), -(2+r), -(8-10*r), -(100*r-40), -(8-10*r), -(2+r), -(8-10*r), -(2+r),
-(20-2*r), -(80-20*r), -(20-2*r), -(80-20*r), -(-400-200*r), -(80-20*r), -(20-2*r), -(80-20*r), -(20-2*r),
-(2+r), -(8-10*r), -(2+r), -(8-10*r), -(100*r-40), -(8-10*r), -(2+r), -(8-10*r), -(2+r)
};
if( stencil_points == 7 )
{
stenc_c = stenc_c7;
stenc_v = stenc_v7;
}
else if( stencil_points == 19 )
{
stenc_c = stenc_c19;
stenc_v = stenc_v19;
}
else if( stencil_points == 27 )
{
stenc_c = stenc_c27;
stenc_v = stenc_v27;
}
else
// this should be impossible, but silences compiler warnings
return;
// to compute the nnz, we just need to know that each stencil point at distance |d| from the diagonal
// will be excluded from the matrix on d lines, otherwise each stencil point is on each line
// let's only do the part here.
//for(j=start_row; j<end_row; j++)
printf("Generate matrix ---- dim: %d, dimL: %d, dspL: %d\n", dim, dimL, dspL);
for(j=0; j<dimL; j++) //M
{
vptr[j] = pos;
// printf("A->vptr[%d]: %d \n", j, A->vptr[j]);
for(i=0; i<stencil_points; i++){
int val = j + dspL + stenc_c[i];
//long val = j + dspL + stenc_c[i];
// printf("j: %d dspL: %d, stenc_c[%d]: %d, dim: %d \n", j, dspL, i, stenc_c[i], dim);
if( val >= 0 && val < dim )
{
A->vpos[pos] = val;
A->vval[pos] = stenc_v[i];
// printf("j: %d, stenc_c[%d]: %d, A->vpos[%d]: %d, A->vval[%d]: %f\n", j, i, stenc_c[i], pos, A->vpos[pos], pos, A->vval[pos]);
pos++;
}
}
}
// point to just beyond last element
//MA->vptr[j] = pos;
vptr[j] = pos;
//A->elemc = pos;
// A->vptr[j-start_row] = pos;
}
// finite-difference method for a 3D Poisson's equation with a 7, 19 or 27 point stencil
void generate_Poisson3D_filled(ptr_SparseMatrix A, const int p, const int stencil_points, int band_width, int dspL, int dimL, int dim)
{
int p2 = p * p, i, j=0; //, pos=0;
int pos = 0;
int *vptr = A->vptr;
const double value = 0.1;
const int *stenc_c;
const double *stenc_v;
double eps = 0.0001;
const int stenc_c7[] = { -p2, -p, -1, 0, 1, p, p2};
const double stenc_v7[] = { -1.0, -1.0, -1.0, 6.0, -(1.0-eps), -(1.0-eps), -(1.0-eps)};
const double r = 1.0;
const int stenc_c19[] =
{
-p2-p, -p2-1, -p2+0, -p2+1, -p2+p,
-p-1, -p, -p+1, -1, 0, 1, p-1, p, p+1,
p2-p, p2-1, p2+0, p2+1, p2+p
};
const double stenc_v19[] =
{
-(1+r), -(1+r), -(8*r-4), -(1+r), -(1+r),
-2, -(6-2*r), -2, -(6-2*r), -(-32-16*r), -(6-2*r), -2, -(6-2*r), -2,
-(1+r), -(1+r), -(8*r-4), -(1+r), -(1+r)
};
const int stenc_c27[] =
{
-p2-p-1, -p2-p, -p2-p+1, -p2-1, -p2+0, -p2+1, -p2+p-1, -p2+p, -p2+p+1,
-p-1, -p, -p+1, -1, 0, 1, p-1, p, p+1,
p2-p-1, p2-p, p2-p+1, p2-1, p2+0, p2+1, p2+p-1, p2+p, p2+p+1
};
const double stenc_v27[] =
{
-(2+r), -(8-10*r), -(2+r), -(8-10*r), -(100*r-40), -(8-10*r+eps), -(2+r-eps), -(8-10*r+eps), -(2+r-eps),
-(20-2*r), -(80-20*r), -(20-2*r), -(80-20*r), -(-400-200*r), -(80-20*r+eps), -(20-2*r+eps), -(80-20*r+eps), -(20-2*r+eps),
-(2+r), -(8-10*r), -(2+r), -(8-10*r), -(100*r-40-eps), -(8-10*r+eps), -(2+r-eps), -(8-10*r+eps), -(2+r-eps)
};
if( stencil_points == 7 )
{
stenc_c = stenc_c7;
stenc_v = stenc_v7;
}
else if( stencil_points == 19 )
{
stenc_c = stenc_c19;
stenc_v = stenc_v19;
}
else if( stencil_points == 27 )
{
stenc_c = stenc_c27;
stenc_v = stenc_v27;
}
else
// this should be impossible, but silences compiler warnings
return;
// to compute the nnz, we just need to know that each stencil point at distance |d| from the diagonal
// will be excluded from the matrix on d lines, otherwise each stencil point is on each line
// let's only do the part here.
//for(j=start_row; j<end_row; j++)
printf("Generate matrix ---- dim: %d, dimL: %d, dspL: %d, band_width: %d \n", dim, dimL, dspL, band_width);
for(j=0; j<dimL; j++) //M
{
int iii;
//long iii;
int jjj = j + dspL;
int prv = 0;
vptr[j] = pos;
//printf("j: %d, dspL: %d, pos: %d, dim: %d, band_width: %d\n", j, dspL, pos, dim, band_width);
//printf(stderr, "A->vptr[%d]: %ld \n", j, vptr[j]);
for(i=0; i<stencil_points; i++){
int val = j + dspL + stenc_c[i];
//long val = j + dspL + stenc_c[i];
//printf("j: %d, i: %d, val: %d, dspL: %d, pos: %d, stenc_c[%d]: %d, dim: %d, band_width: %d\n", j, i, val, dspL, pos, i, stenc_c[i], dim, band_width);
//printf("j: %d, val: %d, dspL: %d, pos: %ld , stenc_c[%d]: %d, dim: %d, band_width: %d \n", j, val, dspL, pos, i, stenc_c[i], dim, band_width);
// if( j + dspL + stenc_c[i] >= 0 && j + dspL + stenc_c[i] < dim )
if( val >= 0 && val < dim )
{
// Analyzing if val is into the band
// if( val >= (j-band_width) && val <= (j+band_width) ) {
if( val >= (jjj-band_width) && val <= (jjj+band_width) ) {
// Adding the elements in the band which are previous to val
// int kk1 = ((j-band_width) < 0)? 0: (j-band_width);
int kk1 = ((jjj-band_width) < 0)? 0: (jjj-band_width);
if (prv != 0) kk1 = prv;
//printf("Entra en if kk1: %d, val: %d j: %d, band_width: %d, j-band_width: %d, j+band_width: %d \n", kk1, val, j, band_width, j-band_width, j+band_width);
for (iii=kk1; iii<val; iii++) {
A->vpos[pos] = iii;
A->vval[pos] = value;
pos++;
}
prv = val + 1;
// Choosing the correct value to val
// if ( val == j ) {
if ( val == jjj ) {
//printf("Diagonal val: %d, pos: %ld \n", val, pos);
A->vpos[pos] = val;
A->vval[pos] = stenc_v[i] + band_width * value;
//A->vval[pos] = stenc_v[i] + (2 * band_width * value) / 20;
pos++;
} else {
//printf("No Diagonal val: %d, j: %ld \n", val, pos);
A->vpos[pos] = val;
A->vval[pos] = stenc_v[i] + value;
pos++;
}
// } else if (val < (j-band_width)) {
} else if (val < (jjj-band_width)) {
// Choosing the correct value to val
A->vpos[pos] = val;
A->vval[pos] = stenc_v[i];
pos++;
// prv = val + 1;
} else {
// Adding the elements in the band which are previous to val
// int kk2 = ((j+band_width) >= dim)? dim-1: (j+band_width);
int kk2 = ((jjj+band_width) >= dim)? dim-1: (jjj+band_width);
// if (prv == 0) prv = j+1;
if (prv == 0) prv = jjj+1;
//printf("No entra en if prv: %d, kk2: %d, val: %d, j: %d, band_width: %d, j-band_width: %d, j+band_width: %d \n", prv, kk2, val, j, band_width, j-band_width, j+band_width);
for (iii=prv; iii<=kk2; iii++) {
A->vpos[pos] = iii;
A->vval[pos] = value;
pos++;
}
prv = kk2 + 1;
// Choosing the correct value to val
A->vpos[pos] = val;
A->vval[pos] = stenc_v[i];
pos++;
}
/*
// A->vpos[pos] = j + stenc_c[i] + dspL;
A->vpos[pos] = val;
A->vval[pos] = stenc_v[i];
// printf("j: %d, stenc_c[%d]: %d, A->vpos[%d]: %d, A->vval[%d]: %f\n", j, i, stenc_c[i], pos, A->vpos[pos], pos, A->vval[pos]);
pos++;
*/
}
}
// if (prv <= (j+band_width)) {
if (prv <= (jjj+band_width)) {
// int kk2 = ((j+band_width) >= dim)? dim-1: (j+band_width);
int kk2 = ((jjj+band_width) >= dim)? dim-1: (jjj+band_width);
// if (prv == 0) prv = j+1;
if (prv == 0) prv = jjj+1;
//printf("No entra en if prv: %d, kk2: %d, j: %d, band_width: %d, j-band_width: %d, j+band_width: %d \n", prv, kk2, j, band_width, j-band_width, j+band_width);
for (iii=prv; iii<=kk2; iii++) {
A->vpos[pos] = iii;
A->vval[pos] = value;
pos++;
}
}
}
// point to just beyond last element
//A->vptr[j] = pos;
vptr[j] = pos;
//A->elemc = pos;
// A->vptr[j-start_row] = pos;
printf("FIN Generate matrix ---- dim: %d, dimL: %d, dspL: %d, band_width: %d \n", dim, dimL, dspL, band_width);
}
void generate_Poisson3D_perm(ptr_SparseMatrix A, const int p, const int stencil_points, int init, int step, int dimL, int dim)
{
int p2 = p * p, i, j=0; //, pos=0;
int pos = 0;
int *vptr = A->vptr;
const int *stenc_c;
const double *stenc_v;
const int stenc_c7[] = { -p2, -p, -1, 0, 1, p, p2};
//const double stenc_v7[] = { 1.0, 1.0, 1.0,-6.0, 1.0, 1.0, 1.0};
const double stenc_v7[] = { -1.0, -1.0, -1.0, 6.0, -1.0, -1.0, -1.0};
const double r = 1.0;
const int stenc_c19[] =
{
-p2-p, -p2-1, -p2+0, -p2+1, -p2+p,
-p-1, -p, -p+1, -1, 0, 1, p-1, p, p+1,
p2-p, p2-1, p2+0, p2+1, p2+p
};
//const double stenc_v19[] =
// {
// 1+r, 1+r, 8*r-4, 1+r, 1+r,
// 2, 6-2*r, 2, 6-2*r, -32-16*r, 6-2*r, 2, 6-2*r, 2,
// 1+r, 1+r, 8*r-4, 1+r, 1+r
// };
const double stenc_v19[] =
{
-(1+r), -(1+r), -(8*r-4), -(1+r), -(1+r),
-2, -(6-2*r), -2, -(6-2*r), -(-32-16*r), -(6-2*r), -2, -(6-2*r), -2,
-(1+r), -(1+r), -(8*r-4), -(1+r), -(1+r)
};
const int stenc_c27[] =
{
-p2-p-1, -p2-p, -p2-p+1, -p2-1, -p2+0, -p2+1, -p2+p-1, -p2+p, -p2+p+1,
-p-1, -p, -p+1, -1, 0, 1, p-1, p, p+1,
p2-p-1, p2-p, p2-p+1, p2-1, p2+0, p2+1, p2+p-1, p2+p, p2+p+1
};
//const double stenc_v27[] =
// {
// 2+r, 8-10*r, 2+r, 8-10*r, 100*r-40, 8-10*r, 2+r, 8-10*r, 2+r,
// 20-2*r, 80-20*r, 20-2*r, 80-20*r, -400-200*r, 80-20*r, 20-2*r, 80-20*r, 20-2*r,
// 2+r, 8-10*r, 2+r, 8-10*r, 100*r-40, 8-10*r, 2+r, 8-10*r, 2+r
// };
const double stenc_v27[] =
{
-(2+r), -(8-10*r), -(2+r), -(8-10*r), -(100*r-40), -(8-10*r), -(2+r), -(8-10*r), -(2+r),
-(20-2*r), -(80-20*r), -(20-2*r), -(80-20*r), -(-400-200*r), -(80-20*r), -(20-2*r), -(80-20*r), -(20-2*r),
-(2+r), -(8-10*r), -(2+r), -(8-10*r), -(100*r-40), -(8-10*r), -(2+r), -(8-10*r), -(2+r)
};
if( stencil_points == 7 )
{
stenc_c = stenc_c7;
stenc_v = stenc_v7;
}
else if( stencil_points == 19 )
{
stenc_c = stenc_c19;
stenc_v = stenc_v19;
}
else if( stencil_points == 27 )
{
stenc_c = stenc_c27;
stenc_v = stenc_v27;
}
else
// this should be impossible, but silences compiler warnings
return;
// to compute the nnz, we just need to know that each stencil point at distance |d| from the diagonal
// will be excluded from the matrix on d lines, otherwise each stencil point is on each line
/*A->nnz = stencil_points * p3 ;
for(i=0; i<stencil_points; i++)
A->nnz -= abs(stenc_c[i]);*/ //M
//A->nnz = A->nnz / nProcs; //M
// let's only do the part here.
//for(j=start_row; j<end_row; j++)
int dimB = (step == 1) ? 0: (dim / step);
int resB = (step == 1) ? 0: (dim % step);
int row=init;
// printf ("init = %d , step = %d , dimB = %d , dim = %d\n", init, step, dimB, dim);
for(j=0; j<dimL; j++) //M
// for(j=init; j<dim; j+=step) //M
{
//A->vptr[j-start_row] = pos;
//MA->vptr[j] = pos;
vptr[j] = pos;
// printf("A->vptr[%d]: %d \n", j, A->vptr[j]);
// printf ("(%d) row = %d , pos = %d\n", init, row-1, pos);
for(i=0; i<stencil_points; i++){
int val = row + stenc_c[i];
int val1 = (val / step);
int val2 = (val % step);
// int k = (val2 * dimB + val1) ;
int k = (val2 * dimB + val1 + ((val2 < resB)? val2: resB)) ;
//long k = (val2 * dimB + val1 + ((val2 < resB)? val2: resB)) ;
// printf("j: %d dspL: %d, stenc_c[%d]: %d, dim: %d \n", j, dspL, i, stenc_c[i], dim);
// if( k >= 0 && k < dim )
if( val >= 0 && val < dim )
{
A->vpos[pos] = k;
A->vval[pos] = stenc_v[i];
// printf("j: %d, stenc_c[%d]: %d, A->vpos[%d]: %d, A->vval[%d]: %f\n", j, i, stenc_c[i], pos, A->vpos[pos], pos, A->vval[pos]);
pos++;
}
}
row += step;
}
// point to just beyond last element
//M A->vptr[j] = pos;
vptr[j] = pos;
//A->elemc = pos;
// A->vptr[j-start_row] = pos;
}
void allocate_matrix(const int m, const int n, const int nnz, ptr_SparseMatrix A)
{
A->dim1 = m;
A->dim2 = n;
//A->elemc = nnz;
//long *vptr = A->vptr;
//A->vptr = (int*)calloc((n+1), sizeof(int));
A->vptr = (int*)calloc((n+1), sizeof(int));
//vptr = (long*)calloc((n+1), sizeof(long));
A->vpos = (int*)calloc(nnz, sizeof(int));
//A->vpos = (long *)calloc(nnz, sizeof(long));
A->vval = (double*)calloc(nnz, sizeof(double));
/* if( ! A->vval )
{
fprintf(stderr, "Allocating vval failed !\n");
exit(2);
}
if (! A->vpos )
{
fprintf(stderr, "Allocating vpos failed !\n");
exit(2);
}
if (! A->vptr )
{
fprintf(stderr, "Allocating vptr failed !\n");
exit(2);
}*/
if( ! A->vval || ! A->vpos || ! A->vptr )
{
fprintf(stderr, "Allocating sparse matrix of size %d rows and %d non-zeros failed !\n", n, nnz);
exit(2);
}
fprintf(stderr, "Matrix allocated\n");
}
void ScaleFirstRowCol(SparseMatrix A, int despL, int dimL, int myId, int root, double factor){
// To generate ill-conditioned matrices
int i;
if (myId == root) {
for (i=A.vptr[0]; i< A.vptr[1]; i++)
A.vval[i] *= factor;
}
if (despL == 0) {
i = 0;
while((i < dimL) && (A.vpos[A.vptr[i]] == 0)) {
A.vval[A.vptr[i]] *= factor;
i++;
}
}
}
/*
void deallocate_matrix(Matrix *A)
{
free(A->r);
free(A->c);
if( A->v )
free(A->v);
}*/
#ifndef MATRIX_H_INCLUDED
#define MATRIX_H_INCLUDED
#include <stdio.h>
#include <SparseProduct.h>
typedef struct Matrix
{
int n, m, *c, *r;
long nnz;
double *v;
} Matrix;
typedef enum
{
FROM_FILE = 0,
POISSON3D
} matrix_type;
void generate_Poisson3D(ptr_SparseMatrix A, const int p, const int stencil_points, int dspL, int dimL, int dim);
// memory utility functions
void allocate_matrix(const int n, const int m, const int nnz, ptr_SparseMatrix A);
void generate_Poisson3D_filled(ptr_SparseMatrix A, const int p, const int stencil_points, int band_width, int dspL, int dimL, int dim);
void generate_Poisson3D_perm(ptr_SparseMatrix A, const int p, const int stencil_points, int init, int step, int dimL, int dim);
void ScaleFirstRowCol(SparseMatrix A, int despL, int dimL, int myId, int root, double factor);
#endif // MATRIX_H_INCLUDED
#include <sys/time.h>
#include <sys/types.h>
#include <sys/times.h>
#include <unistd.h>
static double timetick;
static double tstart = 0.0;
static double ucpustart = 0.0;
static int first = 1;
void reloj (double *elapsed, double *ucpu)
{
struct tms cpu;
struct timeval tp;
// struct timezone tzp;
if(first) {
/* Initialize clock */
timetick = 1.0 / (double)(sysconf(_SC_CLK_TCK));
first = 0;
gettimeofday(&tp, NULL); // gettimeofday(&tp, &tzp);
tstart = (double)tp.tv_sec + (double)tp.tv_usec * 1.0e-6;
/* Initialize CPU time */
times(&cpu);
ucpustart = (double)(cpu.tms_utime + cpu.tms_cutime) * timetick;
/* Return values */
*elapsed = 0.0e0;
*ucpu = 0.0e0;
}
else {
/* Get clock time */
gettimeofday(&tp, NULL); // gettimeofday(&tp, &tzp);
*elapsed = (double)tp.tv_sec + (double)tp.tv_usec * 1.0e-6 - tstart;
/* Get CPU time */
times(&cpu);
*ucpu = (double)(cpu.tms_utime + cpu.tms_cutime) * timetick - ucpustart;
}
return;
}
#include <sys/time.h>
#include <sys/types.h>
#include <sys/times.h>
#include <unistd.h>
extern void reloj (double *elapsed, double *ucpu);
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment