#include #include #include //#include //#include #include "mymkl.h" #include "ScalarVectors.h" #include "SparseMatrices.h" #include #include #include "../malleability/malleabilityManager.h" #include //#define ONLY_SYM 0 #define ROOT 0 //#define DEBUG 0 #define MAX_PROCS_SET 16 typedef struct { double umbral, tol; int iter, maxiter, n; double beta, rho, alpha; double *res, *z, *d, *vec; SparseMatrix subm; double *d_full; int *dist_elem, *displs_elem; int *dist_rows, *displs_rows; int *vlen; } Compute_data; struct Dist_data { int ini; int fin; int tamBl; // Numero de filas int tot_r; // Total de filas en la matriz int myId; int numP; int numP_parents; MPI_Comm comm, comm_children, comm_parents; MPI_Datatype scalars, arrays; }; void dumb(Compute_data *computeData, struct Dist_data *dist_data); //FIXME Delte me void init_app(Compute_data *computeData, struct Dist_data *dist_data, char* argv[]); void get_mat_dist(Compute_data *computeData, struct Dist_data dist_data, SparseMatrix mat); void get_rows_dist(Compute_data *computeData, int numP, int n); void mat_alloc(Compute_data *computeData, SparseMatrix mat, struct Dist_data dist_data); void computeSolution(Compute_data computeData, double **subsol, SparseMatrix mat, int myId, double **full_vec); void pre_compute(Compute_data *computeData, struct Dist_data dist_data, double *subsol, double *full_vec); int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm); void free_computeData(Compute_data *computeData, int terminate); //===================================MALLEABILITY FUNCTIONS==================================================== int n_check = 30; int dist_old(struct Dist_data *dist_data, Compute_data *computeData, int num_children, int sm, int ss, int rm, int rs, int send_sync); void dist_new(struct Dist_data *dist_data, Compute_data *computeData); void update_dist_data(struct Dist_data *dist_data); void print_global_results(); //---------------------------------------------------------------------------------------------------- void get_dist(int total_r, int id, int numP, struct Dist_data *dist_data); void set_counts(int id, int numP, struct Dist_data data_dist, int *sendcounts); void getIds_intercomm(struct Dist_data dist_data, int numP_other, int **idS); //---------------------------------------------------------------------------------------------------- int main (int argc, char *argv[]) { int terminate; int req, num_nodes, num_cpus = 20; int sm, ss, rm, rs, send_sync; char *nodelist = NULL; Compute_data computeData; computeData.z = NULL; computeData.d_full = NULL, computeData.d = NULL; computeData.vec = NULL; computeData.res = NULL; computeData.dist_elem = NULL; computeData.displs_elem = NULL; computeData.dist_rows = NULL; computeData.displs_rows = NULL; computeData.subm.vptr = NULL; computeData.vlen = NULL; send_sync=1; sm = 1; ss = 1; rm = 0; rs = 1; int numP, myId, num_children = 0; struct Dist_data dist_data; if (argc >= 10) { num_children = atoi(argv[2]); sm = atoi(argv[3]); ss = atoi(argv[4]); rm = atoi(argv[5]); rs = atoi(argv[6]); send_sync = atoi(argv[7]); nodelist = argv[8]; num_nodes = atoi(argv[9]); num_cpus = num_nodes * num_cpus; } MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &req); MPI_Comm_size(MPI_COMM_WORLD, &numP); MPI_Comm_rank(MPI_COMM_WORLD, &myId); dist_data.comm = MPI_COMM_WORLD; int new_group = init_malleability(myId, numP, ROOT, dist_data.comm, argv[0], nodelist, num_cpus, num_nodes); update_dist_data(&dist_data); if( !new_group ) { //First set of processes init_app(&computeData, &dist_data, argv); dist_old(&dist_data, &computeData, num_children, sm, ss, rm, rs, send_sync); MPI_Barrier(MPI_COMM_WORLD); set_global_time(MPI_Wtime()); } else { dist_new(&dist_data, &computeData); } // if(computeData.iter==0) terminate = compute(&computeData, &dist_data, sm); if(terminate) { update_dist_data(&dist_data); MPI_Barrier(dist_data.comm); if(dist_data.myId == ROOT) { print_global_results(); printf ("End(%d) --> (%d,%20.10e)\n", computeData.n, computeData.iter, computeData.tol); } } // End of CG free_malleability(); free_computeData(&computeData, 1); if(sm && numP > num_children && dist_data.myId == 0) { MPI_Abort(MPI_COMM_WORLD, -100); } MPI_Finalize(); } /* * Init application data before * starting iterative computation */ void init_app(Compute_data *computeData, struct Dist_data *dist_data, char* argv[]) { SparseMatrix mat, sym; double *full_vec = NULL; double *subsol = NULL; if(dist_data->myId == ROOT) { #ifdef ONLY_SYM printf ("Working with symmetric format\n"); CreateSparseMatrixHB (argv[1], &mat, 1); #else printf ("Working with general format\n"); CreateSparseMatrixHB (argv[1], &sym, 1); DesymmetrizeSparseMatrices (sym, &mat); RemoveSparseMatrix (&sym); #endif computeData->n = mat.dim1; } // Communicate number of rows to distribute and number of elements in the matrix MPI_Bcast(&computeData->n, 1, MPI_INT, ROOT, MPI_COMM_WORLD); // Each process calcules their own distribution get_dist(computeData->n, dist_data->myId, dist_data->numP, dist_data); if(dist_data->myId == ROOT) { // ROOT gets rows and vpos/vval distribution get_mat_dist(computeData, *dist_data, mat); TransformHeadertoLength(mat.vptr, computeData->n); // From vptr to vlen } else { // Non ROOT proceses gets row distribution get_rows_dist(computeData, dist_data->numP, computeData->n); CreateInts (&computeData->dist_elem, dist_data->numP*2); InitInts (computeData->dist_elem, dist_data->numP * 2, 0.0, 0); computeData->displs_elem = computeData->dist_elem + dist_data->numP; } // Allocate for each process their submatrix and get their distribution from ROOT mat_alloc(computeData, mat, *dist_data); computeSolution(*computeData, &subsol, mat, dist_data->myId, &full_vec); pre_compute(computeData, *dist_data, subsol, full_vec); //Free Initial data RemoveDoubles(&subsol); RemoveDoubles(&full_vec); if(dist_data->myId == ROOT) { RemoveSparseMatrix(&mat); } } /* * MPI Dist * Broadcast the vptr array and each process gets the data that corresponds to itself. * * mat.vptr must be in vlen format to work correctly */ void get_mat_dist(Compute_data *computeData, struct Dist_data dist_data, SparseMatrix mat) { int i, j; struct Dist_data dist_data_aux; #ifdef DEBUG if(dist_data.myId == ROOT) printf("Distribuyendo vptr\n"); #endif CreateInts (&computeData->dist_rows, dist_data.numP); CreateInts (&computeData->displs_rows, dist_data.numP); CreateInts (&computeData->dist_elem, dist_data.numP*2); computeData->displs_elem = computeData->dist_elem + dist_data.numP; InitInts (computeData->dist_rows, dist_data.numP, 0, 0); InitInts (computeData->displs_rows, dist_data.numP, 0, 0); InitInts (computeData->dist_elem, dist_data.numP*2, 0, 0); // Fill dist_rows and dist_elem so each process can make ScatterV or GatherV calls for(i=0; in, i, dist_data.numP, &dist_data_aux); computeData->dist_rows[i] = dist_data_aux.tamBl; computeData->dist_elem[i] = mat.vptr[dist_data_aux.fin] - mat.vptr[dist_data_aux.ini]; // Fill displacements if(i!=0) { computeData->displs_elem[i] = computeData->displs_elem[i-1] + computeData->dist_elem[i-1]; computeData->displs_rows[i] = computeData->displs_rows[i-1] + computeData->dist_rows[i-1]; } } #ifdef DEBUG printf("Proc %d almacena %d filas con %d elementos\n", dist_data.myId, computeData->dist_rows[dist_data.myId], computeData->dist_elem[dist_data.myId]); fflush(stdout); #endif } /* * MPI Dist * Get the rows distribution of n rows in a given number of processes */ void get_rows_dist(Compute_data *computeData, int numP, int n) { int i, j; struct Dist_data dist_data; CreateInts (&(computeData->dist_rows), numP); CreateInts (&(computeData->displs_rows), numP); InitInts (computeData->dist_rows, numP, 0, 0); InitInts (computeData->displs_rows, numP, 0, 0); // Fill dist_rows and dist_elem so each process can make ScatterV or GatherV calls for(i=0; idist_rows[i] = dist_data.tamBl; // Fill displacements if(i!=0) { computeData->displs_rows[i] = computeData->displs_rows[i-1] + computeData->dist_rows[i-1]; } } } /* * Matrix allocation * * The matrix that each process will use is allocated and * their vptr array initialised. * * MPI Dist * Distribute vpos and vvalues data among processes * Both arrays have the same distribution */ void mat_alloc(Compute_data *computeData, SparseMatrix mat, struct Dist_data dist_data) { int i; int elems; // Number of elements this process has #ifdef DEBUG if(dist_data.myId == ROOT) printf("Distribuyendo vpos y vvalue\n"); #endif // dist_rows[myId] is the number of rows, n the number of columns, and dist_elem[myId] is the number of elements this process will have in the matrix CreateSparseMatrixVptr(&(computeData->subm), dist_data.tamBl, computeData->n, 0); computeData->subm.vptr[0] = 0; MPI_Scatterv((mat.vptr)+1, computeData->dist_rows, computeData->displs_rows, MPI_INT, (computeData->subm.vptr)+1, dist_data.tamBl, MPI_INT, ROOT, MPI_COMM_WORLD); CreateInts(&(computeData->vlen), dist_data.tamBl); for(i=0; ivlen[i] = computeData->subm.vptr[i+1]; } TransformLengthtoHeader(computeData->subm.vptr, computeData->subm.dim1); // The array is converted from vlen to vptr elems = computeData->subm.vptr[dist_data.tamBl]; CreateSparseMatrixValues(&(computeData->subm), dist_data.tamBl, computeData->n, elems, 0); MPI_Scatterv(mat.vpos, computeData->dist_elem, computeData->displs_elem, MPI_INT, computeData->subm.vpos, elems, MPI_INT, ROOT, MPI_COMM_WORLD); MPI_Scatterv(mat.vval, computeData->dist_elem, computeData->displs_elem, MPI_DOUBLE, computeData->subm.vval, elems, MPI_DOUBLE, ROOT, MPI_COMM_WORLD); // Free elem arrays, as they are not going to be used again RemoveInts (&computeData->dist_elem); } /* * Compute solution */ void computeSolution(Compute_data computeData, double **subsol, SparseMatrix mat, int myId, double **full_vec) { CreateDoubles (subsol, computeData.dist_rows[myId]); InitDoubles (*subsol, computeData.dist_rows[myId], 0.0, 0.0); CreateDoubles(full_vec, computeData.n); InitDoubles (*full_vec, computeData.n, 1.0, 0.0); //Compute SOLUTION #ifdef ONLY_SYM ProdSymSparseMatrixVector (computeData.subm, *full_vec, *subsol); // sol += A * x #else ProdSparseMatrixVector (computeData.subm, *full_vec, *subsol); // sol += A * x #endif /* #ifdef DEBUG int aux, i; double *solD = NULL, *sol = NULL; if(myId == ROOT) { printf("Computing solution\n"); CreateDoubles (&sol, computeData.n); CreateDoubles (&solD, computeData.n); InitDoubles (sol, computeData.n, 0.0, 0.0); InitDoubles (solD, computeData.n, 0.0, 0.0); TransformLengthtoHeader(mat.vptr, mat.dim1); // vlen to vptr (At mat_alloc was needed as vlen) } MPI_Gatherv(*subsol, computeData.dist_rows[myId], MPI_DOUBLE, sol, computeData.dist_rows, computeData.displs_rows, MPI_DOUBLE, ROOT, MPI_COMM_WORLD); if(myId == ROOT) { #ifdef ONLY_SYM ProdSymSparseMatrixVector (mat, *full_vec, solD); // solD += A * x #else ProdSparseMatrixVector (mat, *full_vec, solD); // solD += A * x #endif // ONLY_SIM aux = 1; printf("Checking sol array is ok\n"); for(i=0; ires = NULL; computeData->z = NULL; computeData->d = NULL; computeData->umbral = 1.0e-8; CreateDoubles(&computeData->res, dist_data.tamBl); CreateDoubles(&computeData->z, dist_data.tamBl); CreateDoubles(&computeData->d, dist_data.tamBl); CreateDoubles (&computeData->vec, dist_data.tamBl); CreateDoubles (&computeData->d_full, computeData->n); InitDoubles (computeData->vec, dist_data.tamBl, DZERO, DZERO); // x = 0 InitDoubles (full_vec, computeData->n, DZERO, DZERO); // full_x = 0 computeData->iter = 0; #ifdef ONLY_SYM ProdSymSparseMatrixVector (computeData->subm, full_vec, computeData->z); // z += A * full_x // mkl_dcsrsymv ("U", &n, mat.vval, mat.vptr, mat.vpos, vec, z); // z = A * full_x #else ProdSparseMatrixVector (computeData->subm, full_vec, computeData->z); // z += A * full_x #endif rcopy (&(dist_data.tamBl), subsol, &IONE, computeData->res, &IONE); // res = b raxpy (&(dist_data.tamBl), &DMONE, computeData->z, &IONE, computeData->res, &IONE); // res -= z //rcopy (&(computeData.subm.dim1), computeData.res, &IONE, &(computeData.d+computeData.displs_rows[myId]), &IONE); // d_full = res MPI_Allgatherv(computeData->res, dist_data.tamBl, MPI_DOUBLE, computeData->d_full, computeData->dist_rows, computeData->displs_rows, MPI_DOUBLE, MPI_COMM_WORLD); rcopy (&(dist_data.tamBl), &(computeData->d_full[dist_data.ini]), &IONE, computeData->d, &IONE); // d = d_full[ini] to d_full[ini+tamBl] computeData->beta = rdot (&(dist_data.tamBl), computeData->res, &IONE, computeData->res, &IONE); // beta = res' * res MPI_Allreduce(MPI_IN_PLACE, &computeData->beta, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); computeData->tol = sqrt (computeData->beta); // tol = sqrt(beta) = norm (res) } /* * Bucle de computo principal */ int compute(Compute_data *computeData, struct Dist_data *dist_data, int sm) { int IZERO = 0, IONE = 1; double DONE = 1.0, DMONE = -1.0, DZERO = 0.0; int state = MALL_NOT_STARTED; int ended_loop = 1; int cnt = 0; int reconfigure = 0, rec_iter = 500; computeData->maxiter = 1000; while ((computeData->iter < computeData->maxiter) && (computeData->tol > computeData->umbral)) { MPI_Allgatherv(computeData->d, dist_data->tamBl, MPI_DOUBLE, computeData->d_full, computeData->dist_rows, computeData->displs_rows, MPI_DOUBLE, dist_data->comm); // d_full = Gather(d) // COMPUTATION #ifdef ONLY_SYM ProdSymSparseMatrixVector (computeData->subm, computeData->d_full, computeData->z); // z += A * d_full #else ProdSparseMatrixVector (computeData->subm, computeData->d_full, computeData->z); // z += A * d_full #endif computeData->rho = rdot (&(dist_data->tamBl), computeData->d, &IONE, computeData->z, &IONE); // rho = (d * z) MPI_Allreduce(MPI_IN_PLACE, &computeData->rho, 1, MPI_DOUBLE, MPI_SUM, dist_data->comm); // Reduce(rho, SUM) computeData->rho = computeData->beta / computeData->rho; // rho = beta / aux raxpy (&(dist_data->tamBl), &computeData->rho, computeData->d, &IONE, computeData->vec, &IONE); // x += rho * d computeData->rho = -computeData->rho; raxpy (&(dist_data->tamBl), &computeData->rho, computeData->z, &IONE, computeData->res, &IONE); // res -= rho * z computeData->alpha = computeData->beta; // alpha = beta computeData->beta = rdot (&(dist_data->tamBl), computeData->res, &IONE, computeData->res, &IONE); // beta = res' * res MPI_Allreduce(MPI_IN_PLACE, &computeData->beta, 1, MPI_DOUBLE, MPI_SUM, dist_data->comm); // Reduce(beta, SUM) computeData->alpha = computeData->beta / computeData->alpha; // alpha = beta / alpha rscal (&(dist_data->tamBl), &computeData->alpha, computeData->d, &IONE); // d = alpha * d raxpy (&(dist_data->tamBl), &DONE, computeData->res, &IONE, computeData->d, &IONE); // d += res //MPI_Allgatherv(computeData->d, dist_data->tamBl, MPI_DOUBLE, computeData->d_full, // computeData->dist_rows, computeData->displs_rows, MPI_DOUBLE, dist_data->comm); // d_full = Gather(d) computeData->tol = sqrt (computeData->beta); // tol = sqrt(beta) = norm (res) computeData->iter++; if (computeData->iter == rec_iter) reconfigure = 1; if (reconfigure) { state = malleability_checkpoint(); if ((state == MALL_COMPLETED && sm == 0) || state == MALL_ZOMBIE) { ended_loop = 0; break; } else if(state == MALL_COMPLETED) { reconfigure = 0; free_computeData(computeData, 0); update_dist_data(dist_data); dist_new(dist_data, computeData); } } } #ifdef DEBUG if(dist_data->myId == ROOT) printf ("Ended loop\n"); #endif return ended_loop; } void dumb(Compute_data *computeData, struct Dist_data *dist_data) { int i; if(dist_data->myId == 0) printf("Vptr="); fflush(stdout); MPI_Barrier(dist_data->comm); for(i=0; inumP; i++) { if(dist_data->myId == i) { printf("%d, ", computeData->subm.vptr[dist_data->tamBl]); fflush(stdout); } MPI_Barrier(dist_data->comm); } if(dist_data->myId == 0) printf("\n"); fflush(stdout); MPI_Barrier(dist_data->comm); if(dist_data->myId == 0) printf("Tol="); fflush(stdout); MPI_Barrier(dist_data->comm); for(i=0; inumP; i++) { if(dist_data->myId == i) { printf("%lf, ", computeData->tol); fflush(stdout); } MPI_Barrier(dist_data->comm); } if(dist_data->myId == 0) printf("\n"); fflush(stdout); MPI_Barrier(dist_data->comm); if(dist_data->myId == 0) printf("Z[last]="); fflush(stdout); MPI_Barrier(dist_data->comm); for(i=0; inumP; i++) { if(dist_data->myId == i) { printf("%lf, ", computeData->z[dist_data->tamBl-1]); fflush(stdout); } MPI_Barrier(dist_data->comm); } if(dist_data->myId == 0) printf("\n"); fflush(stdout); MPI_Barrier(dist_data->comm); if(dist_data->myId == 0) printf("D[last]="); fflush(stdout); MPI_Barrier(dist_data->comm); for(i=0; inumP; i++) { if(dist_data->myId == i) { printf("%lf, ", computeData->d[dist_data->tamBl-1]); fflush(stdout); } MPI_Barrier(dist_data->comm); } if(dist_data->myId == 0) printf("\n"); fflush(stdout); MPI_Barrier(dist_data->comm); if(dist_data->myId == 0) printf("res[last]="); fflush(stdout); MPI_Barrier(dist_data->comm); for(i=0; inumP; i++) { if(dist_data->myId == i) { printf("%lf, ", computeData->res[dist_data->tamBl-1]); fflush(stdout); } MPI_Barrier(dist_data->comm); } if(dist_data->myId == 0) printf("\n"); fflush(stdout); MPI_Barrier(dist_data->comm); if(dist_data->myId == 0) printf("Vec[last]="); fflush(stdout); MPI_Barrier(dist_data->comm); for(i=0; inumP; i++) { if(dist_data->myId == i) { printf("%lf, ", computeData->vec[dist_data->tamBl-1]); fflush(stdout); } MPI_Barrier(dist_data->comm); } if(dist_data->myId == 0) printf("\n"); fflush(stdout); MPI_Barrier(dist_data->comm); } void free_computeData(Compute_data *computeData, int terminate) { if(computeData->res != NULL) { RemoveDoubles (&computeData->res); } if(computeData->z != NULL) { RemoveDoubles (&computeData->z); } if(computeData->d != NULL) { RemoveDoubles (&computeData->d); } if(computeData->vec != NULL) { RemoveDoubles (&computeData->vec); } if(computeData->d_full != NULL && terminate) { RemoveDoubles (&computeData->d_full); } if(computeData->subm.vptr != NULL) { RemoveSparseMatrix2 (&computeData->subm); } if(computeData->dist_rows != NULL) { RemoveInts (&computeData->dist_rows); } if(computeData->displs_rows != NULL) { RemoveInts (&computeData->displs_rows); } if(computeData->vlen != NULL) { RemoveInts (&computeData->vlen); } } /* * _____________________________________________________________________________________ * || || * || || * || DISTRIBUTION FUNCTIONS || * || || * || || * \_____________________________________________________________________________________/ */ /* * Las siguientes funciones están todas relacionadas con la distribución de los datos * o procesos. */ /* * ======================================================================================== * ======================================================================================== * ========================PARENTS COMMUNICATION FUNCTIONS================================= * ======================================================================================== * ======================================================================================== */ /* */ int dist_old(struct Dist_data *dist_data, Compute_data *computeData, int num_children, int sm, int ss, int rm, int rs, int send_sync) { int phy_dist = 2; set_malleability_configuration(sm, ss, phy_dist, rm, rs); set_children_number(num_children); malleability_add_data(&(computeData->n), 1, MAL_INT, MAL_DATA_ALONE, 1, 1); malleability_add_data(&(computeData->iter), 1, MAL_INT, MAL_DATA_ALONE, 1, 1); malleability_add_data(&(computeData->tol), 1, MAL_DOUBLE, MAL_DATA_ALONE, 1, 1); malleability_add_data(&(computeData->beta), 1, MAL_DOUBLE, MAL_DATA_ALONE, 1, 1); malleability_add_data(&(computeData->umbral), 1, MAL_DOUBLE, MAL_DATA_ALONE, 1, 1); //malleability_add_data(computeData->d_full, computeData->n, MAL_DOUBLE, MAL_DATA_ALONE, 1, 1); malleability_add_data(computeData->d, computeData->n, MAL_DOUBLE, MAL_DATA_ALONE, 0, 1); malleability_add_data(computeData->vec, computeData->n, MAL_DOUBLE, MAL_DATA_ALONE, 0, 1); malleability_add_data(computeData->res, computeData->n, MAL_DOUBLE, MAL_DATA_ALONE, 0, 1); malleability_add_data(computeData->z, computeData->n, MAL_DOUBLE, MAL_DATA_ALONE, 0, 1); //FIXME SIguientes valores pueden ser asincronos malleability_add_data(computeData->vlen, computeData->n, MAL_INT, 1+MAL_DATA_INDEPENDENT, 0, send_sync); malleability_add_data(computeData->subm.vpos, computeData->n, MAL_INT, 1+MAL_DATA_DEPENDENT, 0, send_sync); malleability_add_data(computeData->subm.vval, computeData->n, MAL_DOUBLE, 1+MAL_DATA_DEPENDENT, 0, send_sync); } /* * ======================================================================================== * ======================================================================================== * ========================CHILDREN COMMUNICATION FUNCTIONS================================ * ======================================================================================== * ======================================================================================== */ /* * Función llamada por un set de procesos hijos. * * Primero los hijos obtienen de los padres una información iniciar * con la que conocer el tamaño de sus vectores y matriz, como asi * tambien cuantos datos van a recibir de cada padre. * * Tras esto se preparan para recibir los datos de los padres. * */ void dist_new(struct Dist_data *dist_data, Compute_data *computeData) { int IONE = 1, i, is_synch; size_t entry, entries; void *value = NULL; is_synch = 1; entry = 0; malleability_get_data(&value, 0, 1, 1); computeData->n = *((int *)value); malleability_get_data(&value, 1, 1, 1); computeData->iter = *((int *)value); malleability_get_data(&value, 2, 1, 1); computeData->tol = *((double *)value); malleability_get_data(&value, 3, 1, 1); computeData->beta = *((double *)value); malleability_get_data(&value, 4, 1, 1); computeData->umbral = *((double *)value); //malleability_get_data(&value, 5, 1, 1); //computeData->d_full = ((double *)value); malleability_get_data(&value, entry++, 0, 1); computeData->d = ((double *)value); malleability_get_data(&value, entry++, 0, 1); computeData->vec = ((double *)value); malleability_get_data(&value, entry++, 0, 1); computeData->res = ((double *)value); malleability_get_data(&value, entry++, 0, 1); computeData->z = ((double *)value); get_dist(computeData->n, dist_data->myId, dist_data->numP, dist_data); get_rows_dist(computeData, dist_data->numP, computeData->n); //CreateDoubles(&computeData->d, dist_data->tamBl); //rcopy (&(dist_data->tamBl), &(computeData->d_full[dist_data->ini]), &IONE, computeData->d, &IONE); // d = d_full[ini] to d_full[ini+tamBl] CreateDoubles(&computeData->d_full, computeData->n); rcopy (&(dist_data->tamBl), computeData->d, &IONE, &(computeData->d_full[dist_data->ini]), &IONE); // d_full[ini] to d_full[ini+tamBl] = d malleability_get_entries(&entries, 0, 0); //Get if there is any asynch data to recover if(entries) { is_synch=0; entry=0; } malleability_get_data(&value, entry++, 0, is_synch); computeData->vlen = ((int *)value); CreateSparseMatrixVptr(&(computeData->subm), dist_data->tamBl, computeData->n, 0); computeData->subm.vptr[0] = 0; for(i=0; itamBl; i++) { computeData->subm.vptr[i+1] = computeData->vlen[i]; } TransformLengthtoHeader(computeData->subm.vptr, computeData->subm.dim1); // The array is converted from vlen to vptr malleability_get_data(&value, entry++, 0, is_synch); computeData->subm.vpos = ((int *)value); malleability_get_data(&value, entry++, 0, is_synch); computeData->subm.vval = ((double *)value); } void update_dist_data(struct Dist_data *dist_data) { int myId, numP; get_malleability_user_comm(&(dist_data->comm)); MPI_Comm_size(dist_data->comm, &numP); MPI_Comm_rank(dist_data->comm, &myId); dist_data->myId = myId; dist_data->numP = numP; } /* * ======================================================================================== * ======================================================================================== * ================================DISTRIBUTION FUNCTIONS================================== * ======================================================================================== * ======================================================================================== */ /* * Obtiene para el Id que se pasa junto a su * numero de procesos total, con cuantas filas (tamBl), * elementos por fila, y total de filas (fin - ini) * con las que va a trabajar el proceso */ void get_dist(int total_r, int id, int numP, struct Dist_data *dist_data) { int rem; dist_data->tot_r = total_r; dist_data->tamBl = total_r / numP; rem = total_r % numP; if(id < rem) { // First subgroup dist_data->ini = id * dist_data->tamBl + id; dist_data->fin = (id+1) * dist_data->tamBl + (id+1); } else { // Second subgroup dist_data->ini = id * dist_data->tamBl + rem; dist_data->fin = (id+1) * dist_data->tamBl + rem; } if(dist_data->fin > total_r) { dist_data->fin = total_r; } if(dist_data->ini > dist_data->fin) { dist_data->ini = dist_data->fin; } dist_data->tamBl = dist_data->fin - dist_data->ini; } /* * Obtiene para un Id de proceso, cuantos elementos va * a enviar/recibir el proceso myId */ void set_counts(int id, int numP, struct Dist_data data_dist, int *sendcounts) { struct Dist_data other; int biggest_ini, smallest_end, tot_rows; get_dist(data_dist.tot_r, id, numP, &other); // Si el rango de valores no coincide, se pasa al siguiente proceso if(data_dist.ini >= other.fin || data_dist.fin <= other.ini) { return; } // Obtiene el proceso con mayor ini entre los dos procesos if(data_dist.ini > other.ini) { biggest_ini = data_dist.ini; } else { biggest_ini = other.ini; } // Obtiene el proceso con menor fin entre los dos procesos if(data_dist.fin < other.fin) { smallest_end = data_dist.fin; } else { smallest_end = other.fin; } sendcounts[id] = smallest_end - biggest_ini; // Numero de elementos a enviar/recibir del proceso Id } /* * Obtiene para un proceso de un grupo a que rango procesos de * otro grupo tiene que enviar o recibir datos. * * Devuelve el primer identificador y el último (Excluido) con el que * comunicarse. */ void getIds_intercomm(struct Dist_data dist_data, int numP_other, int **idS) { int idI, idE; int tamOther = dist_data.tot_r / numP_other; int remOther = dist_data.tot_r % numP_other; int middle = (tamOther + 1) * remOther; if(middle > dist_data.ini) { // First subgroup idI = dist_data.ini / (tamOther + 1); } else { // Second subgroup idI = ((dist_data.ini - middle) / tamOther) + remOther; } if(middle >= dist_data.fin) { // First subgroup idE = dist_data.fin / (tamOther + 1); idE = (dist_data.fin % (tamOther + 1) > 0 && idE+1 <= numP_other) ? idE+1 : idE; } else { // Second subgroup idE = ((dist_data.fin - middle) / tamOther) + remOther; idE = ((dist_data.fin - middle) % tamOther > 0 && idE+1 <= numP_other) ? idE+1 : idE; } //free(*idS); CreateInts(idS, 2); (*idS)[0] = idI; (*idS)[1] = idE; } void print_global_results() { size_t i; double sp_time, sy_time, asy_time, mall_time, global_time; retrieve_results(&sp_time, &sy_time, &asy_time, &mall_time, &global_time); global_time = MPI_Wtime() - global_time; printf("T_spawn: %lf", sp_time); printf("\nT_SR: %lf", sy_time); printf("\nT_AR: %lf", asy_time); printf("\nT_Malleability: %lf", mall_time); printf("\nT_total: %lf\n", global_time); } /* double starttime, endtime, total, res; MPI_Barrier(MPI_COMM_WORLD); starttime = MPI_Wtime(); endtime = MPI_Wtime(); total = endtime - starttime; MPI_Reduce(&total, &res, 1, MPI_DOUBLE, MPI_MAX, ROOT, MPI_COMM_WORLD); if(dist_data.myId == ROOT) {printf("Tiempo BCAST PADRE %f\n", total); fflush(stdout);} */