#include #include #include #include #include #include #include #include #include #include "reloj.h" #include "ScalarVectors.h" #include "SparseProduct.h" #include "ToolsMPI.h" #include "matrix.h" #include "common.h" // ================================================================================ #define DIRECT_ERROR 1 #define PRECOND 1 // #define SPMV_OPTIMIZED 1 #ifdef SPMV_OPTIMIZED #define COLL_P2P_SPMV 0 #endif void BiCGStab (SparseMatrix mat, double *x, double *b, int *sizes, int *dspls, int myId) { int size = mat.dim2, sizeR = mat.dim1; int IONE = 1; double DONE = 1.0, DMONE = -1.0, DZERO = 0.0; int n, n_dist, iter, maxiter, nProcs; double beta, tol, tol0, alpha, umbral, rho, omega, tmp; double *s = NULL, *q = NULL, *r = NULL, *p = NULL, *r0 = NULL, *y = NULL, *p_hat = NULL, *q_hat = NULL; double *aux = NULL; double t1, t2, t3, t4; double reduce[2]; #if PRECOND int i, *posd = NULL; double *diags = NULL; #endif MPI_Comm_size(MPI_COMM_WORLD, &nProcs); n = size; n_dist = sizeR; maxiter = 16 * size; umbral = 1.0e-8; CreateDoubles (&s, n_dist); CreateDoubles (&q, n_dist); CreateDoubles (&r, n_dist); CreateDoubles (&r0, n_dist); CreateDoubles (&p, n_dist); CreateDoubles (&y, n_dist); #if DIRECT_ERROR // init exact solution double *res_err = NULL, *x_exact = NULL; CreateDoubles (&x_exact, n_dist); CreateDoubles (&res_err, n_dist); InitDoubles (x_exact, n_dist, DONE, DZERO); #endif // DIRECT_ERROR #if PRECOND CreateInts (&posd, n_dist); CreateDoubles (&p_hat, n_dist); CreateDoubles (&q_hat, n_dist); CreateDoubles (&diags, n_dist); GetDiagonalSparseMatrix2 (mat, dspls[myId], diags, posd); #pragma omp parallel for for (i=0; i rho = ddot (&n_dist, r, &IONE, r, &IONE); MPI_Allreduce (MPI_IN_PLACE, &rho, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); tol0 = sqrt (rho); tol = tol0; #if DIRECT_ERROR // compute direct error double direct_err; dcopy (&n_dist, x_exact, &IONE, res_err, &IONE); // res_err = x_exact daxpy (&n_dist, &DMONE, x, &IONE, res_err, &IONE); // res_err -= x // compute inf norm direct_err = norm_inf(n_dist, res_err); MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); // // compute euclidean norm // direct_err = ddot (&n_dist, res_err, &IONE, res_err, &IONE); // direct_err = res_err' * res_err // MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // direct_err = sqrt(direct_err); #endif // DIRECT_ERROR MPI_Barrier(MPI_COMM_WORLD); if (myId == 0) reloj (&t1, &t2); while ((iter < maxiter) && (tol > umbral)) { #if PRECOND VvecDoubles (DONE, diags, p, DZERO, p_hat, n_dist); // p_hat = D^-1 * p #else p_hat = p; #endif #ifdef SPMV_OPTIMIZED joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, p_hat, vecP, vdimP, vdspP, vdimR, vdspR, vectDatatypeP, vectDatatypeR); InitDoubles (s, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * p #else MPI_Allgatherv (p_hat, sizeR, MPI_DOUBLE, aux, sizes, dspls, MPI_DOUBLE, MPI_COMM_WORLD); InitDoubles (s, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (mat, 0, aux, s); // s = A * p #endif if (myId == 0) #if DIRECT_ERROR printf ("%d \t %g \t %g \t %g \n", iter, tol, umbral, direct_err); #else printf ("%d \t %g \n", iter, tol); #endif // DIRECT_ERROR alpha = ddot (&n_dist, r0, &IONE, s, &IONE); MPI_Allreduce (MPI_IN_PLACE, &alpha, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); alpha = rho / alpha; dcopy (&n_dist, r, &IONE, q, &IONE); // q = r tmp = -alpha; daxpy (&n_dist, &tmp, s, &IONE, q, &IONE); // q = r - alpha * s; // second spmv #if PRECOND VvecDoubles (DONE, diags, q, DZERO, q_hat, n_dist); // q_hat = D^-1 * q #else q_hat = q; #endif #ifdef SPMV_OPTIMIZED joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, q_hat, vecP, vdimP, vdspP, vdimR, vdspR, vectDatatypeP, vectDatatypeR); InitDoubles (y, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (mat, 0, vecP, y); // y = A * q #else MPI_Allgatherv (q_hat, sizeR, MPI_DOUBLE, aux, sizes, dspls, MPI_DOUBLE, MPI_COMM_WORLD); InitDoubles (y, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (mat, 0, aux, y); // y = A * q #endif // omega = / reduce[0] = ddot (&n_dist, q, &IONE, y, &IONE); reduce[1] = ddot (&n_dist, y, &IONE, y, &IONE); MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); omega = reduce[0] / reduce[1]; // x+1 = x + alpha * p + omega * q daxpy (&n_dist, &alpha, p_hat, &IONE, x, &IONE); daxpy (&n_dist, &omega, q_hat, &IONE, x, &IONE); // r+1 = q - omega * y dcopy (&n_dist, q, &IONE, r, &IONE); // r = q tmp = -omega; daxpy (&n_dist, &tmp, y, &IONE, r, &IONE); // r = q - omega * y; // rho = and tolerance reduce[0] = ddot (&n_dist, r0, &IONE, r, &IONE); reduce[1] = ddot (&n_dist, r, &IONE, r, &IONE); MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); tmp = reduce[0]; tol = sqrt (reduce[1]) / tol0; // beta = (alpha / omega) * / beta = (alpha / omega) * (tmp / rho); rho = tmp; // p+1 = r+1 + beta * (p - omega * s) tmp = -omega; daxpy (&n_dist, &tmp, s, &IONE, p, &IONE); // p -= omega * s dscal (&n_dist, &beta, p, &IONE); // p = beta * p daxpy (&n_dist, &DONE, r, &IONE, p, &IONE); // p += r #if DIRECT_ERROR // compute direct error dcopy (&n_dist, x_exact, &IONE, res_err, &IONE); // res_err = x_exact daxpy (&n_dist, &DMONE, x, &IONE, res_err, &IONE); // res_err -= x // compute inf norm direct_err = norm_inf(n_dist, res_err); MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); // // compute euclidean norm // direct_err = ddot (&n_dist, res_err, &IONE, res_err, &IONE); // MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // direct_err = sqrt(direct_err); #endif // DIRECT_ERROR iter++; } MPI_Barrier(MPI_COMM_WORLD); if (myId == 0) reloj (&t3, &t4); #ifdef SPMV_OPTIMIZED // Code required after the loop PermuteInts (mat.vpos, permP, mat.vptr[mat.dim1]); // Freeing memory for Permutation free (vectDatatypeR); vectDatatypeR = NULL; free (vectDatatypeP); vectDatatypeP = NULL; RemoveDoubles (&vecP); RemoveInts (&permP); RemoveInts (&vdspR); RemoveInts (&vdimR); RemoveInts (&vdspP); RemoveInts (&vdimP); RemoveInts (&ipermP); #endif if (myId == 0) { printf ("Size: %d \n", n); printf ("Iter: %d \n", iter); printf ("Tol: %g \n", tol); printf ("Time_loop: %20.10e\n", (t3-t1)); printf ("Time_iter: %20.10e\n", (t3-t1)/iter); } RemoveDoubles (&aux); RemoveDoubles (&s); RemoveDoubles (&q); RemoveDoubles (&r); RemoveDoubles (&p); RemoveDoubles (&r0); RemoveDoubles (&y); #if PRECOND RemoveDoubles (&diags); RemoveInts (&posd); RemoveDoubles(&p_hat); RemoveDoubles (&q_hat); #endif } /*********************************************************************************/ int main (int argc, char **argv) { int dim; double *sol1 = NULL, *sol2 = NULL; int index = 0, indexL = 0; SparseMatrix mat = {0, 0, NULL, NULL, NULL}, sym = {0, 0, NULL, NULL, NULL}; int root = 0, myId, nProcs; int dimL, dspL, *vdimL = NULL, *vdspL = NULL; SparseMatrix matL = {0, 0, NULL, NULL, NULL}; double *sol1L = NULL, *sol2L = NULL; int mat_from_file, nodes, size_param, stencil_points; if (argc == 3) { mat_from_file = atoi(argv[2]); } else { mat_from_file = atoi(argv[2]); nodes = atoi(argv[3]); size_param = atoi(argv[4]); stencil_points = atoi(argv[5]); } /***************************************/ MPI_Init (&argc, &argv); // Definition of the variables nProcs and myId MPI_Comm_size(MPI_COMM_WORLD, &nProcs); MPI_Comm_rank(MPI_COMM_WORLD, &myId); root = nProcs-1; root = 0; /***************************************/ printf ("A\n"); CreateInts (&vdimL, nProcs); CreateInts (&vdspL, nProcs); if(mat_from_file) { if (myId == root) { // Creating the matrix ReadMatrixHB (argv[1], &sym); TransposeSparseMatrices (sym, 0, &mat, 0); dim = mat.dim1; } // Distributing the matrix dim = DistributeMatrix (mat, index, &matL, indexL, vdimL, vdspL, root, MPI_COMM_WORLD); dimL = vdimL[myId]; dspL = vdspL[myId]; printf ("B\n"); } else { dim = size_param * size_param * size_param; int divL, rstL, i; divL = (dim / nProcs); rstL = (dim % nProcs); for (i=0; i