results.c 13.8 KB
Newer Older
1
2
3
4
5
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include "results.h"

6
7
#define RESULTS_EXTRA_SIZE 100

8
void def_results_type(results_data *results, int resizes, MPI_Datatype *results_type);
9

10
11
12
13
14
void compute_max(results_data *results, double *computed_array, int myId, int root, MPI_Comm comm);
void compute_mean(results_data *results, double *computed_array, int myId, int numP, int root, MPI_Comm comm);
void compute_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm);
void match_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm);

15
16
17
18
19
//======================================================||
//======================================================||
//================MPI RESULTS FUNCTIONS=================||
//======================================================||
//======================================================||
20
21

/*
22
23
 * Comunica una estructura de resultados a todos los procesos del comunicador
 * a traves de un tipo derivado.
24
 *
25
26
27
28
 * Si se llama con un intercommunicador, el grupo de procesos que envia los datos
 * tiene que indicar en el proceso raiz el valor "MPI_ROOT" para "root" y el resto
 * de ese grupo el valor "MPI_PROC_NULL". Los procesos del otro grupo tienen que
 * indicar el Id del proceso raiz que ha puesto "MPI_ROOT".
29
 */
30
void comm_results(results_data *results, int root, size_t resizes, MPI_Comm intercomm) {
31
32
33
34
  MPI_Datatype results_type;

  // Obtener un tipo derivado para enviar todos los
  // datos escalares con una sola comunicacion
35
  def_results_type(results, resizes, &results_type);
36
37
38
39
40
41
  MPI_Bcast(results, 1, results_type, root, intercomm);

  //Liberar tipos derivados
  MPI_Type_free(&results_type);
}

42
43
44
45
/*
 * Define un tipo derivado de MPI para mandar los tiempos
 * con una sola comunicacion.
 *
46
 * En concreto son tres escalares y dos vectores de tamaño "resizes"
47
48
 */
void def_results_type(results_data *results, int resizes, MPI_Datatype *results_type) {
49
50
  int i, counts = 7;
  int blocklengths[] = {1, 1, 1, 1, 1, 1, 1};
51
52
53
54
  MPI_Aint displs[counts], dir;
  MPI_Datatype types[counts];

  // Rellenar vector types
55
56
  types[0] = types[1] = types[2] = types[3] = types[4] = types[5] = types[6] = MPI_DOUBLE;
  blocklengths[2] = blocklengths[3] = blocklengths[4] = blocklengths[5] = blocklengths[6] = resizes;
57
58
59
60

  // Rellenar vector displs
  MPI_Get_address(results, &dir);

61
62
63
64
65
66
  MPI_Get_address(&(results->exec_start), &displs[0]);
  MPI_Get_address(&(results->wasted_time), &displs[1]);
  MPI_Get_address(results->sync_time, &displs[2]);
  MPI_Get_address(results->async_time, &displs[3]);
  MPI_Get_address(results->spawn_real_time, &displs[4]);
  MPI_Get_address(results->spawn_time, &displs[5]);
67
  MPI_Get_address(results->malleability_time, &displs[6]);
68
69
70
71
72
73

  for(i=0;i<counts;i++) displs[i] -= dir;

  MPI_Type_create_struct(counts, blocklengths, displs, types, results_type);
  MPI_Type_commit(results_type);
}
74
75
76
77
78
79
80
81
82
83
84
85
86
//======================================================||
//======================================================||
//================SET RESULTS FUNCTIONS=================||
//======================================================||
//======================================================||

/*
 * Guarda los resultados respecto a la redistribución de datos
 * tras una reconfiguración. A llamar por los hijos tras
 * terminar la redistribución y obtener la configuración.
 */
void set_results_post_reconfig(results_data *results, int grp, int sdr, int adr) {
  if(sdr) { // Si no hay datos sincronos, el tiempo es 0
87
    results->sync_time[grp-1]  = results->sync_end - results->sync_time[grp-1];
88
  } else {
89
    results->sync_time[grp-1]  = 0;
90
91
  }
  if(adr) { // Si no hay datos asincronos, el tiempo es 0
92
    results->async_time[grp-1]  = results->async_end - results->async_time[grp-1];
93
  } else {
94
    results->async_time[grp-1]  = 0;
95
  }
96
  results->malleability_time[grp-1]  = results->malleability_end - results->malleability_time[grp-1];
97
98
}

99
100
101
102
103
104
105
106
107
108
109
/*
 * Pone el indice del siguiente elemento a escribir a 0 para los vectores
 * que tengan que ver con las iteraciones.
 * Por tanto, todos los anteriores valores de esos vectores pasan a ser invalidos
 * si se intentan acceder desde un código externo.
 *
 * Solo es necesario llamar a esta funcion cuando se ha realizado una
 * expansion con el metodo MERGE
 */
void reset_results_index(results_data *results) {
  results->iter_index = 0;
110
  results->iters_async = 0;
111
112
}

113
//=============================================================== FIXME BORRAR?
114
115
116
117
118
119
120
/*
 * Obtiene para cada iteracion, el tiempo maximo entre todos los procesos
 * que han participado.
 *
 * Es necesario obtener el maximo, pues es el que representa el tiempo real
 * que se ha utilizado.
 */
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
void compute_results_iter(results_data *results, int myId, int numP, int root, size_t stages, int capture_method, MPI_Comm comm) {
  size_t i, *used_ids;
  switch(capture_method) {
    case RESULTS_MAX:
      compute_max(results, results->iters_time, myId, root, comm);
      for(i=0; i<stages; i++) {
        compute_max(results, results->stage_times[i], myId, root, comm);
      }
      break;
    case RESULTS_MEAN:
      compute_mean(results, results->iters_time, myId, numP, root, comm);
      for(i=0; i<stages; i++) {
        compute_mean(results, results->stage_times[i], myId, numP, root, comm);
      }
      break;
    case RESULTS_MEDIAN:
      used_ids = malloc(results->iter_index * sizeof(size_t));
      compute_median(results, results->iters_time, used_ids, myId, numP, root, comm);
      for(i=0; i<stages; i++) {
        //compute_median(results, results->stage_times[i], myId, numP, root, comm);
        match_median(results, results->stage_times[i], used_ids, myId, numP, root, comm);
      }
      free(used_ids);
      break;
  }
}

struct TimeWithIndex {
    double time;
    size_t index;
};

int compare(const void *a, const void *b) {
  return ((struct TimeWithIndex *)a)->time - ((struct TimeWithIndex *)b)->time;
}

void compute_max(results_data *results, double *computed_array, int myId, int root, MPI_Comm comm) {
  if(myId == root) {
    MPI_Reduce(MPI_IN_PLACE, computed_array, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
  } else {
    MPI_Reduce(computed_array, NULL, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
  }
}

void compute_mean(results_data *results, double *computed_array, int myId, int numP, int root, MPI_Comm comm) {
166
  if(myId == root) {
167
    MPI_Reduce(MPI_IN_PLACE, computed_array, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);   
168
    for(size_t i=0; i<results->iter_index; i++) {
169
170
      computed_array[i] = results->iters_time[i] / numP;
    }
171
  } else {
172
    MPI_Reduce(computed_array, NULL, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
173
  }
174
175
176
177
178
}

void compute_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm) {
  double *aux_all_iters, median;
  struct TimeWithIndex *aux_id_iters;
179
180
  if(myId == root) {
    aux_all_iters = malloc(numP *results->iter_index * sizeof(double));
181
    aux_id_iters = malloc(numP * sizeof(struct TimeWithIndex));
182
  }
183
  MPI_Gather(computed_array, results->iter_index, MPI_DOUBLE, aux_all_iters, results->iter_index, MPI_DOUBLE, root, comm);
184
185
186
  if(myId == root) {
    for(size_t i=0; i<results->iter_index; i++) {
      for(int j=0; j<numP; j++) {
187
188
        aux_id_iters[j].time = aux_all_iters[i+(results->iter_index*j)];
        aux_id_iters[j].index = (size_t) j;
189
190
      }
      // Get Median
191
192
193
194
195
      qsort(aux_id_iters, numP, sizeof(struct TimeWithIndex), &compare);
      median = aux_id_iters[numP/2].time;
      if (numP % 2 == 0) median = (aux_id_iters[numP/2 - 1].time + aux_id_iters[numP/2].time) / 2;
      computed_array[i] = median;
      used_ids[i] = aux_id_iters[numP/2].index; //FIXME What should be the index when numP is even?
196
197
198
    }
    free(aux_all_iters);
    free(aux_id_iters);
199
  }
200
201
}

202
203
204
void match_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm) {
  double *aux_all_iters;
  size_t matched_id;
205
  if(myId == root) {
206
    aux_all_iters = malloc(numP * results->iter_index * sizeof(double));
207
  }
208
209
210
211
212
  MPI_Gather(computed_array, results->iter_index, MPI_DOUBLE, aux_all_iters, results->iter_index, MPI_DOUBLE, root, comm);
  if(myId == root) {
    for(size_t i=0; i<results->iter_index; i++) {
      matched_id = used_ids[i];
      computed_array[i] = aux_all_iters[i+(results->iter_index*matched_id)];
213
    }
214
    free(aux_all_iters);
215
216
217
  }
}

218
219
220
221
222
//======================================================||
//======================================================||
//===============PRINT RESULTS FUNCTIONS================||
//======================================================||
//======================================================||
223
224
225
226

/*
 * Imprime por pantalla los resultados locales.
 * Estos son los relacionados con las iteraciones, que son el tiempo
227
 * por iteracion, el tipo (Normal o durante communicacion asincrona).
228
 */
229
void print_iter_results(results_data results) {
230
  size_t i;
231

232
  printf("Async_Iters: %ld\n", results.iters_async);
233
  printf("T_iter: ");
234
235
  for(i=0; i< results.iter_index; i++) {
    printf("%lf ", results.iters_time[i]);
236
  }
237
  printf("\n");
238
239
240
241
242
}

/*
 * Imprime por pantalla los resultados locales de un stage.
 */
243
244
void print_stage_results(results_data results, size_t n_stages) {
  size_t i, j;
245

246
247
248
  for(i=0; i < n_stages; i++) {
    printf("T_stage %ld: ", i);
    for(j=0; j < results.iter_index; j++) {
249
250
251
252
      printf("%lf ", results.stage_times[i][j]);
    }
    printf("\n");
  }
253
254
}

255
256
257
258
259
/*
 * Imprime por pantalla los resultados globales.
 * Estos son el tiempo de creacion de procesos, los de comunicacion
 * asincrona y sincrona y el tiempo total de ejecucion.
 */
260
261
void print_global_results(results_data results, size_t resizes) {
  size_t i;
262

263
  printf("T_spawn: ");
264
  for(i=0; i < resizes; i++) {
265
    printf("%lf ", results.spawn_time[i]);
266
267
  }

268
  printf("\nT_spawn_real: ");
269
  for(i=0; i< resizes; i++) {
270
    printf("%lf ", results.spawn_real_time[i]);
271
272
  }

273
  printf("\nT_SR: ");
274
  for(i=0; i < resizes; i++) {
275
    printf("%lf ", results.sync_time[i]);
276
277
  }

278
  printf("\nT_AR: ");
279
  for(i=0; i < resizes; i++) {
280
    printf("%lf ", results.async_time[i]);
281
282
  }

283
284
285
286
287
  printf("\nT_Malleability: ");
  for(i=0; i < resizes; i++) {
    printf("%lf ", results.malleability_time[i]);
  }

288
  printf("\nT_total: %lf\n", results.exec_time);
289
290
}

291
292
293
294
295
296
//======================================================||
//======================================================||
//=============INIT/FREE RESULTS FUNCTIONS==============||
//======================================================||
//======================================================||

297
298
299
300
301
302
/*
 * Inicializa los datos relacionados con una estructura de resultados.
 *
 * Los argumentos "resizes" y "iters_size" se necesitan para obtener el tamaño
 * de los vectores de resultados.
 */
303
304
void init_results_data(results_data *results, size_t resizes, size_t stages, size_t iters_size) {
  size_t i;
305
306

  results->spawn_time = calloc(resizes, sizeof(double));
307
  results->spawn_real_time = calloc(resizes, sizeof(double));
308
309
  results->sync_time = calloc(resizes, sizeof(double));
  results->async_time = calloc(resizes, sizeof(double));
310
  results->malleability_time = calloc(resizes, sizeof(double));
311
  results->wasted_time = 0;
312

313
314
315
  results->iters_size = iters_size + RESULTS_EXTRA_SIZE;
  results->iters_time = calloc(results->iters_size, sizeof(double));
  results->stage_times = malloc(stages * sizeof(double*));
316
  for(i=0; i<stages; i++) {
317
    results->stage_times[i] = calloc(results->iters_size, sizeof(double));
318
319
  }

320
  results->iters_async = 0;
321
  results->iter_index = 0;
322

323
324
}

325
326
void realloc_results_iters(results_data *results, size_t stages, size_t needed) {
  int error = 0;
327
  double *time_aux;
328
  size_t i;
329
330
331

  if(results->iters_size >= needed) return;

332
  time_aux = (double *) realloc(results->iters_time, needed * sizeof(double));
333
  if(time_aux == NULL) error = 1;
334

335
  for(i=0; i<stages; i++) { //TODO Comprobar que no da error el realloc
336
337
    results->stage_times[i] = (double *) realloc(results->stage_times[i], needed * sizeof(double));
    if(results->stage_times[i] == NULL) error = 1;
338
339
  }

340
  if(error) {
341
    fprintf(stderr, "Fatal error - No se ha podido realojar la memoria de resultados\n");
342
343
344
345
    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  results->iters_time = time_aux;
346
  results->iters_size = needed;
347
348
}

349
350
351
/*
 * Libera toda la memoria asociada con una estructura de resultados.
 */
352
353
354
void free_results_data(results_data *results, size_t stages) {
  size_t i;
  if(results != NULL) {
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
    if(results->spawn_time != NULL) {
      free(results->spawn_time);
      results->spawn_time = NULL;
    }
    if(results->spawn_real_time != NULL) {
      free(results->spawn_real_time);
      results->spawn_real_time = NULL;
    }
    if(results->sync_time != NULL) {
      free(results->sync_time);
      results->sync_time = NULL;
    }
    if(results->async_time != NULL) {
      free(results->async_time);
      results->async_time = NULL;
    }
371
372
373
374
    if(results->malleability_time != NULL) {
      free(results->malleability_time);
      results->malleability_time = NULL;
    }
375

376
377
378
379
    if(results->iters_time != NULL) {
      free(results->iters_time);
      results->iters_time = NULL;
    }
380
    for(i=0; i<stages; i++) {
381
382
383
384
385
386
387
388
      if(results->stage_times[i] != NULL) {
        free(results->stage_times[i]);
        results->stage_times[i] = NULL;
      }
    }
    if(results->stage_times != NULL) {
      free(results->stage_times);
      results->stage_times = NULL;
389
    }
390
  }
391
}