results.c 13.4 KB
Newer Older
1
2
3
4
5
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include "results.h"

6
7
#define RESULTS_EXTRA_SIZE 100

8
9
void def_results_type(results_data *results, int resizes, MPI_Datatype *results_type);

10
11
12
13
14
void compute_max(results_data *results, double *computed_array, int myId, int root, MPI_Comm comm);
void compute_mean(results_data *results, double *computed_array, int myId, int numP, int root, MPI_Comm comm);
void compute_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm);
void match_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm);

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
//======================================================||
//======================================================||
//================MPI RESULTS FUNCTIONS=================||
//======================================================||
//======================================================||

/*
 * Comunica una estructura de resultados a todos los procesos del comunicador
 * a traves de un tipo derivado.
 *
 * Si se llama con un intercommunicador, el grupo de procesos que envia los datos
 * tiene que indicar en el proceso raiz el valor "MPI_ROOT" para "root" y el resto
 * de ese grupo el valor "MPI_PROC_NULL". Los procesos del otro grupo tienen que
 * indicar el Id del proceso raiz que ha puesto "MPI_ROOT".
 */
void results_comm(results_data *results, int root, size_t resizes, MPI_Comm intercomm) {
  MPI_Datatype results_type;

  // Obtener un tipo derivado para enviar todos los
  // datos escalares con una sola comunicacion
  def_results_type(results, resizes, &results_type);
  MPI_Bcast(results, 1, results_type, root, intercomm);

  //Liberar tipos derivados
  MPI_Type_free(&results_type);
}

/*
 * Define un tipo derivado de MPI para mandar los tiempos
 * con una sola comunicacion.
 *
 * En concreto son tres escalares y dos vectores de tamaño "resizes"
 */
void def_results_type(results_data *results, int resizes, MPI_Datatype *results_type) {
49
50
  int i, counts = 7;
  int blocklengths[] = {1, 1, 1, 1, 1, 1, 1, 1};
51
52
53
54
  MPI_Aint displs[counts], dir;
  MPI_Datatype types[counts];

  // Rellenar vector types
55
56
  types[0] = types[1] = types[2] = types[3] = types[4] = types[5] = types[6] = MPI_DOUBLE;
  blocklengths[2] = blocklengths[3] = blocklengths[4] = blocklengths[5] =  blocklengths[6] = resizes;
57
58
59
60
61
62
63
64

  // Rellenar vector displs
  MPI_Get_address(results, &dir);

  MPI_Get_address(&(results->exec_start), &displs[0]);
  MPI_Get_address(&(results->wasted_time), &displs[1]);
  MPI_Get_address(results->sync_time, &displs[2]);
  MPI_Get_address(results->async_time, &displs[3]);
65
66
67
  MPI_Get_address(results->user_time, &displs[4]);
  MPI_Get_address(results->spawn_time, &displs[5]);
  MPI_Get_address(results->malleability_time, &displs[6]);
68
69
70
71
72
73
74

  for(i=0;i<counts;i++) displs[i] -= dir;

  MPI_Type_create_struct(counts, blocklengths, displs, types, results_type);
  MPI_Type_commit(results_type);
}

75
76
77
78
79
80
//======================================================||
//======================================================||
//================SET RESULTS FUNCTIONS=================||
//======================================================||
//======================================================||

81
82
83
84
85
86
87
88
89
90
91
/*
 * Pone el indice del siguiente elemento a escribir a 0 para los vectores
 * que tengan que ver con las iteraciones.
 * Por tanto, todos los anteriores valores de esos vectores pasan a ser invalidos
 * si se intentan acceder desde un código externo.
 *
 * Solo es necesario llamar a esta funcion cuando se ha realizado una
 * expansion con el metodo MERGE
 */
void reset_results_index(results_data *results) {
  results->iter_index = 0;
92
  results->iters_async = 0;
93
94
}

95
96
97
98
99
100
101
/*
 * Obtiene para cada iteracion, el tiempo maximo entre todos los procesos
 * que han participado.
 *
 * Es necesario obtener el maximo, pues es el que representa el tiempo real
 * que se ha utilizado.
 */
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
void compute_results_iter(results_data *results, int myId, int numP, int root, size_t stages, int capture_method, MPI_Comm comm) {
  size_t i, *used_ids;
  switch(capture_method) {
    case RESULTS_MAX:
      compute_max(results, results->iters_time, myId, root, comm);
      for(i=0; i<stages; i++) {
        compute_max(results, results->stage_times[i], myId, root, comm);
      }
      break;
    case RESULTS_MEAN:
      compute_mean(results, results->iters_time, myId, numP, root, comm);
      for(i=0; i<stages; i++) {
        compute_mean(results, results->stage_times[i], myId, numP, root, comm);
      }
      break;
    case RESULTS_MEDIAN:
      used_ids = malloc(results->iter_index * sizeof(size_t));
      compute_median(results, results->iters_time, used_ids, myId, numP, root, comm);
      for(i=0; i<stages; i++) {
        //compute_median(results, results->stage_times[i], myId, numP, root, comm);
        match_median(results, results->stage_times[i], used_ids, myId, numP, root, comm);
      }
      free(used_ids);
      break;
  }
}

void compute_max(results_data *results, double *computed_array, int myId, int root, MPI_Comm comm) {
130
  if(myId == root) {
131
132
133
134
135
136
137
138
139
    MPI_Reduce(MPI_IN_PLACE, computed_array, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
  } else {
    MPI_Reduce(computed_array, NULL, results->iter_index, MPI_DOUBLE, MPI_MAX, root, comm);
  }
}

void compute_mean(results_data *results, double *computed_array, int myId, int numP, int root, MPI_Comm comm) {
  if(myId == root) {
    MPI_Reduce(MPI_IN_PLACE, computed_array, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
140
    for(size_t i=0; i<results->iter_index; i++) {
141
142
      computed_array[i] = results->iters_time[i] / numP;
    }
143
  } else {
144
    MPI_Reduce(computed_array, NULL, results->iter_index, MPI_DOUBLE, MPI_SUM, root, comm);
145
  }
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
}



struct TimeWithIndex {
    double time;
    size_t index;
};

int compare(const void *a, const void *b) {
  return ((struct TimeWithIndex *)a)->time - ((struct TimeWithIndex *)b)->time;
}

/*
 * Calcula la mediana de un vector de tiempos replicado entre "numP" procesos.
 * Se calcula la mediana para cada elemento del vector final y se devuelve este.
 *
 * Además se devuelve en el vector "used_ids" de que proceso se ha obtenido la mediana de cada elemento.
 */
void compute_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm) {
  double *aux_all_iters, median;
  struct TimeWithIndex *aux_id_iters;
168
169
  if(myId == root) {
    aux_all_iters = malloc(numP *results->iter_index * sizeof(double));
170
    aux_id_iters = malloc(numP * sizeof(struct TimeWithIndex));
171
  }
172
  MPI_Gather(computed_array, results->iter_index, MPI_DOUBLE, aux_all_iters, results->iter_index, MPI_DOUBLE, root, comm);
173
174
175
  if(myId == root) {
    for(size_t i=0; i<results->iter_index; i++) {
      for(int j=0; j<numP; j++) {
176
177
        aux_id_iters[j].time = aux_all_iters[i+(results->iter_index*j)];
        aux_id_iters[j].index = (size_t) j;
178
179
      }
      // Get Median
180
181
182
183
184
      qsort(aux_id_iters, numP, sizeof(struct TimeWithIndex), &compare);
      median = aux_id_iters[numP/2].time;
      if (numP % 2 == 0) median = (aux_id_iters[numP/2 - 1].time + aux_id_iters[numP/2].time) / 2;
      computed_array[i] = median;
      used_ids[i] = aux_id_iters[numP/2].index; //FIXME What should be the index when numP is even?
185
186
187
    }
    free(aux_all_iters);
    free(aux_id_iters);
188
  }
189
190
}

191
/*
192
193
194
 * Obtiene las medianas de un vector de tiempos replicado entre "numP" procesos.
 * La mediana de cada elemento se obtiene consultando el vector "used_ids", que contiene
 * que proceso tiene la mediana.
195
 *
196
 * Como resultado devuelve un vector con la mediana calculada.
197
 */
198
199
200
void match_median(results_data *results, double *computed_array, size_t *used_ids, int myId, int numP, int root, MPI_Comm comm) {
  double *aux_all_iters;
  size_t matched_id;
201
  if(myId == root) {
202
    aux_all_iters = malloc(numP * results->iter_index * sizeof(double));
203
  }
204
205
206
207
208
  MPI_Gather(computed_array, results->iter_index, MPI_DOUBLE, aux_all_iters, results->iter_index, MPI_DOUBLE, root, comm);
  if(myId == root) {
    for(size_t i=0; i<results->iter_index; i++) {
      matched_id = used_ids[i];
      computed_array[i] = aux_all_iters[i+(results->iter_index*matched_id)];
209
    }
210
    free(aux_all_iters);
211
212
213
  }
}

214
215
216
217
218
//======================================================||
//======================================================||
//===============PRINT RESULTS FUNCTIONS================||
//======================================================||
//======================================================||
219
220
221
222

/*
 * Imprime por pantalla los resultados locales.
 * Estos son los relacionados con las iteraciones, que son el tiempo
223
 * por iteracion, el tipo (Normal o durante communicacion asincrona).
224
 */
225
void print_iter_results(results_data results) {
226
  size_t i;
227

228
  printf("Async_Iters: %ld\n", results.iters_async);
229
  printf("T_iter: ");
230
231
  for(i=0; i< results.iter_index; i++) {
    printf("%lf ", results.iters_time[i]);
232
  }
233
  printf("\n");
234
235
236
237
238
}

/*
 * Imprime por pantalla los resultados locales de un stage.
 */
239
240
void print_stage_results(results_data results, size_t n_stages) {
  size_t i, j;
241

242
243
244
  for(i=0; i < n_stages; i++) {
    printf("T_stage %ld: ", i);
    for(j=0; j < results.iter_index; j++) {
245
246
247
248
      printf("%lf ", results.stage_times[i][j]);
    }
    printf("\n");
  }
249
250
}

251
252
253
254
255
/*
 * Imprime por pantalla los resultados globales.
 * Estos son el tiempo de creacion de procesos, los de comunicacion
 * asincrona y sincrona y el tiempo total de ejecucion.
 */
256
257
void print_global_results(results_data results, size_t resizes) {
  size_t i;
258

259
  printf("T_spawn: ");
260
  for(i=0; i < resizes; i++) {
261
    printf("%lf ", results.spawn_time[i]);
262
263
  }

264
  printf("\nT_SR: ");
265
  for(i=0; i < resizes; i++) {
266
    printf("%lf ", results.sync_time[i]);
267
268
  }

269
  printf("\nT_AR: ");
270
  for(i=0; i < resizes; i++) {
271
    printf("%lf ", results.async_time[i]);
272
273
  }

274
275
276
277
278
  printf("\nT_US: ");
  for(i=0; i < resizes; i++) {
    printf("%lf ", results.user_time[i]);
  }

279
280
281
282
283
  printf("\nT_Malleability: ");
  for(i=0; i < resizes; i++) {
    printf("%lf ", results.malleability_time[i]);
  }

284
  printf("\nT_total: %lf\n", results.exec_time);
285
286
}

287
288
289
290
291
292
//======================================================||
//======================================================||
//=============INIT/FREE RESULTS FUNCTIONS==============||
//======================================================||
//======================================================||

293
294
295
296
297
298
/*
 * Inicializa los datos relacionados con una estructura de resultados.
 *
 * Los argumentos "resizes" y "iters_size" se necesitan para obtener el tamaño
 * de los vectores de resultados.
 */
299
300
void init_results_data(results_data *results, size_t resizes, size_t stages, size_t iters_size) {
  size_t i;
301
302
303
304

  results->spawn_time = calloc(resizes, sizeof(double));
  results->sync_time = calloc(resizes, sizeof(double));
  results->async_time = calloc(resizes, sizeof(double));
305
  results->user_time = calloc(resizes, sizeof(double));
306
  results->malleability_time = calloc(resizes, sizeof(double));
307
  results->wasted_time = 0;
308

309
310
311
  results->iters_size = iters_size + RESULTS_EXTRA_SIZE;
  results->iters_time = calloc(results->iters_size, sizeof(double));
  results->stage_times = malloc(stages * sizeof(double*));
312
  for(i=0; i<stages; i++) {
313
    results->stage_times[i] = calloc(results->iters_size, sizeof(double));
314
315
  }

316
  results->iters_async = 0;
317
  results->iter_index = 0;
318

319
320
}

321
322
void realloc_results_iters(results_data *results, size_t stages, size_t needed) {
  int error = 0;
323
  double *time_aux;
324
  size_t i;
325
326
327

  if(results->iters_size >= needed) return;

328
  time_aux = (double *) realloc(results->iters_time, needed * sizeof(double));
329
  if(time_aux == NULL) error = 1;
330

331
  for(i=0; i<stages; i++) { //TODO Comprobar que no da error el realloc
332
333
    results->stage_times[i] = (double *) realloc(results->stage_times[i], needed * sizeof(double));
    if(results->stage_times[i] == NULL) error = 1;
334
335
  }

336
  if(error) {
337
    fprintf(stderr, "Fatal error - No se ha podido realojar la memoria de resultados\n");
338
339
340
341
    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  results->iters_time = time_aux;
342
  results->iters_size = needed;
343
344
}

345
346
347
/*
 * Libera toda la memoria asociada con una estructura de resultados.
 */
348
349
350
void free_results_data(results_data *results, size_t stages) {
  size_t i;
  if(results != NULL) {
351
352
353
354
355
356
357
358
359
360
361
362
    if(results->spawn_time != NULL) {
      free(results->spawn_time);
      results->spawn_time = NULL;
    }
    if(results->sync_time != NULL) {
      free(results->sync_time);
      results->sync_time = NULL;
    }
    if(results->async_time != NULL) {
      free(results->async_time);
      results->async_time = NULL;
    }
363
364
365
366
    if(results->user_time != NULL) {
      free(results->user_time);
      results->user_time = NULL;
    }
367
368
369
370
    if(results->malleability_time != NULL) {
      free(results->malleability_time);
      results->malleability_time = NULL;
    }
371

372
373
374
375
    if(results->iters_time != NULL) {
      free(results->iters_time);
      results->iters_time = NULL;
    }
376
    for(i=0; i<stages; i++) {
377
378
379
380
381
382
383
384
      if(results->stage_times[i] != NULL) {
        free(results->stage_times[i]);
        results->stage_times[i] = NULL;
      }
    }
    if(results->stage_times != NULL) {
      free(results->stage_times);
      results->stage_times = NULL;
385
    }
386
  }
387
}