#include #include #include #include #include //#include #include "mymkl.h" #include #include //#include #include #include "reloj.h" #include "ScalarVectors.h" #include "SparseProduct.h" #include "ToolsMPI.h" #include "matrix.h" #include "common.h" #include "../malleability/MAM.h" #include "ToolsMAM.h" // ================================================================================ #define DIRECT_ERROR 1 #define PRECOND 1 // #define SPMV_OPTIMIZED 1 #ifdef SPMV_OPTIMIZED #define COLL_P2P_SPMV 0 #endif typedef struct { double tol, tol0; int iter, n; double rho; double *x, *b; double *s, *q, *r, *p, *r0, *y, *p_hat, *q_hat; double *aux; SparseMatrix matL; #if PRECOND double *diags; #endif #if DIRECT_ERROR double *res_err, *x_exact; double direct_err; #endif double t1; int *sizes, *dspls; int my_size, my_dspl; int *vlen; int myId, numP, isOriginal; MPI_Comm comm; } Compute_data; void BiCGStab_free (Compute_data *computeData); void originals_set_data(Compute_data *computeData, user_redist_t *user_data, int num_target); void targets_update(Compute_data *computeData, user_redist_t *user_data); void user_func(void *args); void dump(Compute_data *computeData); void BiCGStab_init (Compute_data *computeData) { int size = computeData->matL.dim2, sizeR = computeData->matL.dim1; int IONE = 1; double DONE = 1.0, DMONE = -1.0, DZERO = 0.0; int n, n_dist, myId, nProcs; double t2; #if PRECOND int i; int *posd = NULL; computeData->diags = NULL; #endif computeData->s = NULL; computeData->q = NULL; computeData->r = NULL; computeData->p = NULL; computeData->r0 = NULL; computeData->y = NULL; computeData->p_hat = NULL; computeData->q_hat = NULL; computeData->aux = NULL; myId = computeData->myId; nProcs = computeData->numP; n = size; n_dist = sizeR; CreateDoubles (&computeData->s, n_dist); CreateDoubles (&computeData->q, n_dist); CreateDoubles (&computeData->r, n_dist); CreateDoubles (&computeData->r0, n_dist); CreateDoubles (&computeData->p, n_dist); CreateDoubles (&computeData->y, n_dist); #if DIRECT_ERROR // init exact solution computeData->res_err = NULL; computeData->x_exact = NULL; CreateDoubles (&computeData->x_exact, n_dist); CreateDoubles (&computeData->res_err, n_dist); InitDoubles (computeData->x_exact, n_dist, DONE, DZERO); #endif // DIRECT_ERROR #if PRECOND CreateInts (&posd, n_dist); CreateDoubles (&computeData->p_hat, n_dist); CreateDoubles (&computeData->q_hat, n_dist); CreateDoubles (&computeData->diags, n_dist); GetDiagonalSparseMatrix2 (computeData->matL, computeData->dspls[myId], computeData->diags, posd); #pragma omp parallel for for (i=0; idiags[i] = DONE / computeData->diags[i]; InitDoubles (computeData->p_hat, n_dist, DZERO, DZERO); InitDoubles (computeData->q_hat, n_dist, DZERO, DZERO); #endif CreateDoubles (&computeData->aux, n); #ifdef SPMV_OPTIMIZED int *permP = NULL, *ipermP = NULL; int *vdspP = NULL, *vdimP = NULL, *vdspR = NULL, *vdimR = NULL; double *vecP = NULL; MPI_Datatype *vectDatatypeP = NULL, *vectDatatypeR = NULL; CreateInts (&ipermP, size); CreateInts (&vdimP, nProcs); CreateInts (&vdspP, nProcs + 1); CreateInts (&vdimR, nProcs); CreateInts (&vdspR, nProcs + 1); vectDatatypeP = (MPI_Datatype *) malloc (nProcs * sizeof (MPI_Datatype)); vectDatatypeR = (MPI_Datatype *) malloc (nProcs * sizeof (MPI_Datatype)); createAlltoallwStruct (COLL_P2P_SPMV, MPI_COMM_WORLD, mat, sizes, dspls, vdimP, vdspP, &aux, &permP, ipermP, vdimR, vdspR, vectDatatypeP, vectDatatypeR); // Code required before the loop PermuteInts (mat.vpos, ipermP, mat.vptr[mat.dim1]); #endif computeData->iter = 0; #ifdef SPMV_OPTIMIZED joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, x, vecP, vdimP, vdspP, vdimR, vdspR, vectDatatypeP, vectDatatypeR); InitDoubles (s, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * x #else MPI_Allgatherv (computeData->x, sizeR, MPI_DOUBLE, computeData->aux, computeData->sizes, computeData->dspls, MPI_DOUBLE, computeData->comm); InitDoubles (computeData->s, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (computeData->matL, 0, computeData->aux, computeData->s); // s = A * x #endif rcopy (&n_dist, computeData->b, &IONE, computeData->r, &IONE); // r = b raxpy (&n_dist, &DMONE, computeData->s, &IONE, computeData->r, &IONE); // r -= s rcopy (&n_dist, computeData->r, &IONE, computeData->p, &IONE); // p = r rcopy (&n_dist, computeData->r, &IONE, computeData->r0, &IONE); // r0 = r // compute tolerance and computeData->rho = rdot (&n_dist, computeData->r, &IONE, computeData->r, &IONE); MPI_Allreduce (MPI_IN_PLACE, &computeData->rho, 1, MPI_DOUBLE, MPI_SUM, computeData->comm); computeData->tol0 = sqrt (computeData->rho); computeData->tol = computeData->tol0; #if DIRECT_ERROR // compute direct error rcopy (&n_dist, computeData->x_exact, &IONE, computeData->res_err, &IONE); // res_err = x_exact raxpy (&n_dist, &DMONE, computeData->x, &IONE, computeData->res_err, &IONE); // res_err -= x // compute inf norm computeData->direct_err = norm_inf(n_dist, computeData->res_err); MPI_Allreduce(MPI_IN_PLACE, &computeData->direct_err, 1, MPI_DOUBLE, MPI_MAX, computeData->comm); // // compute euclidean norm // direct_err = rdot (&n_dist, res_err, &IONE, res_err, &IONE); // direct_err = res_err' * res_err // MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // direct_err = sqrt(direct_err); #endif // DIRECT_ERROR #if PRECOND RemoveInts (&posd); #endif MPI_Barrier(computeData->comm); if (myId == 0) reloj (&computeData->t1, &t2); } void BiCGStab_compute (Compute_data *computeData, user_redist_t *user_data) { int size = computeData->matL.dim2, sizeR = computeData->matL.dim1; int IONE = 1; double DONE = 1.0, DMONE = -1.0, DZERO = 0.0; int n, n_dist; int maxiter, myId, reconfigure, rec_iter, state; double beta, alpha, umbral, omega, tmp; double t3, t4; double reduce[2]; n = size; n_dist = sizeR; maxiter = 16 * size; rec_iter = maxiter / 2; umbral = 1.0e-8; myId = computeData->myId; state = -1; reconfigure = 0; rec_iter = 1; maxiter=100; while ((computeData->iter < maxiter) && (computeData->tol > umbral)) { #if PRECOND VvecDoubles (DONE, computeData->diags, computeData->p, DZERO, computeData->p_hat, n_dist); // p_hat = D^-1 * p #else computeData->p_hat = computeData->p; #endif #ifdef SPMV_OPTIMIZED joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, p_hat, vecP, vdimP, vdspP, vdimR, vdspR, vectDatatypeP, vectDatatypeR); InitDoubles (s, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (mat, 0, vecP, s); // s = A * p #else MPI_Allgatherv (computeData->p_hat, sizeR, MPI_DOUBLE, computeData->aux, computeData->sizes, computeData->dspls, MPI_DOUBLE, computeData->comm); InitDoubles (computeData->s, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (computeData->matL, 0, computeData->aux, computeData->s); // s = A * p #endif if (myId == 0) #if DIRECT_ERROR printf ("PD=%d %d \t %g \t %g \t %g \n", computeData->numP, computeData->iter, computeData->tol, umbral, computeData->direct_err); #else printf ("%d \t %g \n", computeData->iter, computeData->tol); #endif // DIRECT_ERROR alpha = rdot (&n_dist, computeData->r0, &IONE, computeData->s, &IONE); MPI_Allreduce (MPI_IN_PLACE, &alpha, 1, MPI_DOUBLE, MPI_SUM, computeData->comm); alpha = computeData->rho / alpha; rcopy (&n_dist, computeData->r, &IONE, computeData->q, &IONE); // q = r tmp = -alpha; raxpy (&n_dist, &tmp, computeData->s, &IONE, computeData->q, &IONE); // q = r - alpha * s; // second spmv #if PRECOND VvecDoubles (DONE, computeData->diags, computeData->q, DZERO, computeData->q_hat, n_dist); // q_hat = D^-1 * q #else computeData->q_hat = computeData->q; #endif #ifdef SPMV_OPTIMIZED joinDistributeVectorSPMV (COLL_P2P_SPMV, MPI_COMM_WORLD, q_hat, vecP, vdimP, vdspP, vdimR, vdspR, vectDatatypeP, vectDatatypeR); InitDoubles (y, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (mat, 0, vecP, y); // y = A * q #else MPI_Allgatherv (computeData->q_hat, sizeR, MPI_DOUBLE, computeData->aux, computeData->sizes, computeData->dspls, MPI_DOUBLE, computeData->comm); InitDoubles (computeData->y, sizeR, DZERO, DZERO); ProdSparseMatrixVectorByRows (computeData->matL, 0, computeData->aux, computeData->y); // y = A * q #endif // omega = / reduce[0] = rdot (&n_dist, computeData->q, &IONE, computeData->y, &IONE); reduce[1] = rdot (&n_dist, computeData->y, &IONE, computeData->y, &IONE); MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, computeData->comm); omega = reduce[0] / reduce[1]; // x+1 = x + alpha * p + omega * q raxpy (&n_dist, &alpha, computeData->p_hat, &IONE, computeData->x, &IONE); raxpy (&n_dist, &omega, computeData->q_hat, &IONE, computeData->x, &IONE); // r+1 = q - omega * y rcopy (&n_dist, computeData->q, &IONE, computeData->r, &IONE); // r = q tmp = -omega; raxpy (&n_dist, &tmp, computeData->y, &IONE, computeData->r, &IONE); // r = q - omega * y; // rho = and tolerance reduce[0] = rdot (&n_dist, computeData->r0, &IONE, computeData->r, &IONE); reduce[1] = rdot (&n_dist, computeData->r, &IONE, computeData->r, &IONE); MPI_Allreduce (MPI_IN_PLACE, reduce, 2, MPI_DOUBLE, MPI_SUM, computeData->comm); tmp = reduce[0]; computeData->tol = sqrt (reduce[1]) / computeData->tol0; // beta = (alpha / omega) * / beta = (alpha / omega) * (tmp / computeData->rho); computeData->rho = tmp; // p+1 = r+1 + beta * (p - omega * s) tmp = -omega; raxpy (&n_dist, &tmp, computeData->s, &IONE, computeData->p, &IONE); // p -= omega * s rscal (&n_dist, &beta, computeData->p, &IONE); // p = beta * p raxpy (&n_dist, &DONE, computeData->r, &IONE, computeData->p, &IONE); // p += r #if DIRECT_ERROR // compute direct error rcopy (&n_dist, computeData->x_exact, &IONE, computeData->res_err, &IONE); // res_err = x_exact raxpy (&n_dist, &DMONE, computeData->x, &IONE, computeData->res_err, &IONE); // res_err -= x // compute inf norm computeData->direct_err = norm_inf(n_dist, computeData->res_err); MPI_Allreduce(MPI_IN_PLACE, &computeData->direct_err, 1, MPI_DOUBLE, MPI_MAX, computeData->comm); // // compute euclidean norm // direct_err = rdot (&n_dist, res_err, &IONE, res_err, &IONE); // MPI_Allreduce(MPI_IN_PLACE, &direct_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // direct_err = sqrt(direct_err); #endif // DIRECT_ERROR computeData->iter++; if (computeData->iter == rec_iter) { reconfigure = 1;} if (reconfigure) { // dump(computeData); MAM_Checkpoint(&state, MAM_CHECK_COMPLETION, user_func, (void *) user_data); if(state == MAM_COMPLETED) { reconfigure = 0; BiCGStab_free (computeData); targets_update(computeData, user_data); sizeR = computeData->matL.dim1; n_dist = sizeR; // dump(computeData); } } } MPI_Barrier(computeData->comm); if (myId == 0) reloj (&t3, &t4); if(state == MAM_PENDING) { MAM_Checkpoint(&state, MAM_WAIT_COMPLETION, user_func, (void *) user_data); BiCGStab_free (computeData); targets_update(computeData, user_data); } #ifdef SPMV_OPTIMIZED // Code required after the loop PermuteInts (mat.vpos, permP, mat.vptr[mat.dim1]); // Freeing memory for Permutation free (vectDatatypeR); vectDatatypeR = NULL; free (vectDatatypeP); vectDatatypeP = NULL; RemoveDoubles (&vecP); RemoveInts (&permP); RemoveInts (&vdspR); RemoveInts (&vdimR); RemoveInts (&vdspP); RemoveInts (&vdimP); RemoveInts (&ipermP); #endif if (myId == 0) { printf ("Size: %d \n", n); printf ("Iter: %d \n", computeData->iter); printf ("Tol: %g \n", computeData->tol); printf ("Time_loop: %20.10e\n", (t3-computeData->t1)); printf ("Time_iter: %20.10e\n", (t3-computeData->t1)/computeData->iter); } } void BiCGStab_free (Compute_data *computeData) { RemoveDoubles (&computeData->x); RemoveDoubles (&computeData->b); RemoveDoubles (&computeData->aux); RemoveDoubles (&computeData->s); RemoveDoubles (&computeData->q); RemoveDoubles (&computeData->r); RemoveDoubles (&computeData->p); RemoveDoubles (&computeData->r0); RemoveDoubles (&computeData->y); #if PRECOND RemoveDoubles (&computeData->diags); RemoveDoubles(&computeData->p_hat); RemoveDoubles (&computeData->q_hat); #endif #if DIRECT_ERROR RemoveDoubles (&computeData->res_err); RemoveDoubles (&computeData->x_exact); #endif RemoveInts (&computeData->sizes); RemoveInts (&computeData->dspls); RemoveInts (&computeData->vlen); if (computeData->isOriginal) { RemoveSparseMatrix (&computeData->matL); computeData->isOriginal = 0; } else { RemoveSparseMatrix2 (&computeData->matL); } } /*********************************************************************************/ int main (int argc, char **argv) { int dim; double *sol1 = NULL, *sol2 = NULL; int index = 0, indexL = 0; SparseMatrix mat = {0, 0, NULL, NULL, NULL}, sym = {0, 0, NULL, NULL, NULL}; int root = 0, myId, nProcs; int isTarget, numTarget, req, initNumP; int dimL, dspL, *vdimL = NULL, *vdspL = NULL; SparseMatrix matL = {0, 0, NULL, NULL, NULL}; double *sol1L = NULL, *sol2L = NULL; double beta; int IONE = 1; double DMONE = -1.0; int mat_from_file, nodes, size_param, stencil_points; Compute_data computeData; user_redist_t user_data; /***************************************/ MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &req); // Definition of the variables nProcs and myId MPI_Comm_size(MPI_COMM_WORLD, &nProcs); MPI_Comm_rank(MPI_COMM_WORLD, &myId); root = nProcs-1; root = 0; computeData.myId = myId; computeData.numP = nProcs; computeData.comm = MPI_COMM_WORLD; initNumP = computeData.numP; user_data = empty_user_data; user_data.comm = computeData.comm; prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0); isTarget = MAM_Init(root, &computeData.comm, argv[0], user_func, (void *) &user_data); if(isTarget) { targets_update(&computeData, &user_data); computeData.isOriginal = 0; // dump(&computeData); } else { /***************************************/ if (argc == 4) { mat_from_file = atoi(argv[2]); numTarget = atoi(argv[3]); } else { mat_from_file = atoi(argv[2]); nodes = atoi(argv[3]); size_param = atoi(argv[4]); stencil_points = atoi(argv[5]); } /***************************************/ // printf ("A\n"); CreateInts (&vdimL, nProcs); CreateInts (&vdspL, nProcs); if(mat_from_file) { if (myId == root) { // Creating the matrix ReadMatrixHB (argv[1], &sym); TransposeSparseMatrices (sym, 0, &mat, 0); dim = mat.dim1; } // Distributing the matrix dim = DistributeMatrix (mat, index, &matL, indexL, vdimL, vdspL, root, MPI_COMM_WORLD); dimL = vdimL[myId]; dspL = vdspL[myId]; if (myId == root) { RemoveSparseMatrix (&mat); RemoveSparseMatrix (&sym); } // printf ("B\n"); } else { dim = size_param * size_param * size_param; int divL, rstL, i; divL = (dim / nProcs); rstL = (dim % nProcs); for (i=0; i numTarget && computeData.myId == 0) { MPI_Abort(MPI_COMM_WORLD, -100); } if(computeData.comm != MPI_COMM_WORLD && computeData.comm != MPI_COMM_NULL) MPI_Comm_free(&(computeData.comm)); MPI_Finalize (); return 0; } /* MAM New functions */ /* * FunciĆ³n para declarar los datos a comunicar por parte de MAM */ void originals_set_data(Compute_data *computeData, user_redist_t *user_data, int num_target) { TransformHeadertoLength (computeData->matL.vptr, computeData->my_size); CreateInts (&computeData->vlen, computeData->my_size); CopyInts (computeData->matL.vptr+1, computeData->vlen, computeData->my_size); TransformLengthtoHeader (computeData->matL.vptr, computeData->my_size); MAM_Set_target_number(num_target); MAM_Data_add(&(computeData->n), NULL, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT); MAM_Data_add(&(computeData->tol0), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT); MAM_Data_add(&(computeData->t1), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT); MAM_Data_add(&(computeData->iter), NULL, 1, MPI_INT, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE); MAM_Data_add(&(computeData->tol), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE); MAM_Data_add(&(computeData->rho), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE); #if DIRECT_ERROR MAM_Data_add(&(computeData->direct_err), NULL, 1, MPI_DOUBLE, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE); #endif MAM_Data_add(computeData->vlen, NULL, computeData->n, MPI_INT, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); MAM_Data_add(computeData->r0, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); MAM_Data_add(computeData->b, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); #if PRECOND MAM_Data_add(computeData->diags, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); #endif #if DIRECT_ERROR MAM_Data_add(computeData->x_exact, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); #endif MAM_Data_add(computeData->p, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); MAM_Data_add(computeData->r, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); MAM_Data_add(computeData->x, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); #if PRECOND MAM_Data_add(computeData->p_hat, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); MAM_Data_add(computeData->q_hat, NULL, computeData->n, MPI_DOUBLE, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); #endif user_data->n = computeData->n; user_data->array_vptr = computeData->matL.vptr; user_data->array_vlen = computeData->vlen; user_data->array_vpos = computeData->matL.vpos; user_data->array_vval = computeData->matL.vval; } void targets_update(Compute_data *computeData, user_redist_t *user_data) { size_t entry, total_qty; void *value = NULL; MPI_Datatype type; MPI_Comm_size(computeData->comm, &computeData->numP); MPI_Comm_rank(computeData->comm, &computeData->myId); entry = 0; MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT); computeData->n = *((int *)value); MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT); computeData->tol0 = *((double *)value); MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_CONSTANT); computeData->t1 = *((double *)value); entry = 0; MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE); computeData->iter = *((int *)value); MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE); computeData->tol = *((double *)value); MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE); computeData->rho = *((double *)value); #if DIRECT_ERROR MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_REPLICATED, MAM_DATA_VARIABLE); computeData->direct_err = *((double *)value); #endif entry = 0; MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); computeData->vlen = ((int *)value); //computeData->vlen = user_data->recv_vlen; MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); computeData->r0 = ((double *)value); MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); computeData->b = ((double *)value); #if PRECOND MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); computeData->diags = ((double *)value); #endif #if DIRECT_ERROR MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_CONSTANT); computeData->x_exact = ((double *)value); #endif entry = 0; MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); computeData->p = ((double *)value); MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); computeData->r = ((double *)value); MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); computeData->x = ((double *)value); #if PRECOND MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); computeData->p_hat = ((double *)value); MAM_Data_get_pointer(&value, entry++, &total_qty, &type, MAM_DATA_DISTRIBUTED, MAM_DATA_VARIABLE); computeData->q_hat = ((double *)value); #endif int n = computeData->n; CreateInts (&computeData->sizes, computeData->numP); CreateInts (&computeData->dspls, computeData->numP); ComputeMatrixSizes (n, computeData->sizes, computeData->dspls, computeData->comm); computeData->my_size = computeData->sizes[computeData->myId]; computeData->my_dspl = computeData->dspls[computeData->myId]; computeData->matL = user_data->other_subm; int n_dist = computeData->matL.dim1; CreateDoubles (&computeData->s, n_dist); CreateDoubles (&computeData->q, n_dist); CreateDoubles (&computeData->y, n_dist); CreateDoubles (&computeData->aux, n); #if DIRECT_ERROR CreateDoubles (&computeData->res_err, n_dist); #endif *user_data = empty_user_data; user_data->array_vptr = computeData->matL.vptr; user_data->array_vlen = computeData->vlen; user_data->array_vpos = computeData->matL.vpos; user_data->array_vval = computeData->matL.vval; user_data->comm = computeData->comm; } void user_func(void *args) { int local_flag, flag = 0; mam_user_reconf_t user_reconf; MAM_Get_Reconf_Info(&user_reconf); user_redist_t *user_data = (user_redist_t *) args; if(!user_data->initiated) { //targets_distribution_synch(user_reconf, user_data); //flag = 1; targets_distribution(user_reconf, user_data); user_data->initiated = 1; if(user_reconf.rank_state == MAM_PROC_NEW_RANK) { MPI_Waitall(2, user_data->reqs, MPI_STATUSES_IGNORE); flag = 1; } } else { MPI_Testall(2, user_data->reqs, &local_flag, MPI_STATUSES_IGNORE); MPI_Allreduce(&local_flag, &flag, 1, MPI_INT, MPI_MIN, user_data->comm); } if(flag) MAM_Resume_redistribution(NULL); } void dump(Compute_data *computeData) { int i; if(computeData->myId == 0) printf("TamBL="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { printf("%d, ", computeData->my_size); } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nVlen="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%d, ", computeData->vlen[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nVptr="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size+1; j++) { printf("%d, ", computeData->matL.vptr[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nVpos="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmatL.vptr[computeData->my_size]; j++) { printf("%d, ", computeData->matL.vpos[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nVval="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmatL.vptr[computeData->my_size]; j++) { printf("%lf, ", computeData->matL.vval[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nX="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->x[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nB="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->b[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nr="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->r[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nr0="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->r0[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\np="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->p[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\ndiags="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->diags[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\np_hat="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->p_hat[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\nq_hat="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->q_hat[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } /* if(computeData->myId == 0) printf("\ns="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->s[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } if(computeData->myId == 0) printf("\ny="); fflush(stdout); MPI_Barrier(computeData->comm); for(i=0; inumP; i++) { if(computeData->myId == i) { for(int j=0; jmy_size; j++) { printf("%lf, ", computeData->y[j]); } } fflush(stdout); sleep(1); MPI_Barrier(computeData->comm); } */ } /* typedef struct { double tol, tol0; int iter, n; double rho; double *x, *b; double *s, *q, *r, *p, *r0, *y, *p_hat, *q_hat; double *aux; SparseMatrix matL; #if PRECOND double *diags; #endif #if DIRECT_ERROR double *res_err, *x_exact; double direct_err; #endif double t1; int *sizes, *dspls; int my_size, my_dspl; int *vlen; int myId, numP; MPI_Comm comm; } Compute_data; */