GenericSpawn.c 11.6 KB
Newer Older
1
2
3
4
5
6
7
8
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <mpi.h>
#include <string.h>
#include "../malleabilityStates.h"
9
#include "../malleabilityDataStructures.h"
10
#include "../MAM_Configuration.h"
11
12
13
14
#include "ProcessDist.h"
#include "GenericSpawn.h"
#include "Baseline.h"
#include "Merge.h"
15
#include "Spawn_state.h"
16
17
18
19

// This code is a Singleton object -- Only one instance can be used at a given time and
// no multiple calls to perform diferent resizes can be performed at the same time.

20
Spawn_data *spawn_data = NULL;
21
22
23
24
pthread_t spawn_thread;
MPI_Comm *returned_comm;

//--------------PRIVATE CONFIGURATION DECLARATIONS---------------//
iker_martin's avatar
iker_martin committed
25
void set_spawn_configuration(MPI_Comm comm);
26
27
28
29
30
void deallocate_spawn_data();

//--------------PRIVATE DECLARATIONS---------------//
void generic_spawn(MPI_Comm *child, int data_stage);

31
32
int check_single_state(MPI_Comm comm, int global_state, int wait_completed);
int check_generic_state(MPI_Comm comm, MPI_Comm *child, int local_state, int wait_completed);
33
34
35

//--------------PRIVATE THREADS DECLARATIONS---------------//
int allocate_thread_spawn();
36
void* thread_work();
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56


//--------------PUBLIC FUNCTIONS---------------//

/*
 * Se solicita la creacion de un nuevo grupo de "numP" procesos con una distribucion
 * fisica "type_dist".
 *
 * Se puede solicitar en primer plano, encargandose por tanto el proceso que llama a esta funcion,
 * o en segundo plano, donde un hilo se encarga de configurar esta creacion.
 *
 * Si se pide en primer plano, al terminarla es posible llamar a "check_spawn_state()" para crear
 * los procesos.
 *
 * Si se pide en segundo plano, llamar a "check_spawn_state()" comprobara si la configuracion para
 * crearlos esta lista, y si es asi, los crea.
 *
 * Devuelve el estado de el procedimiento. Si no devuelve "MALL_SPAWN_COMPLETED", es necesario llamar a
 * "check_spawn_state()".
 */
iker_martin's avatar
iker_martin committed
57
int init_spawn(MPI_Comm comm, MPI_Comm *child) {
58
  int local_state;
iker_martin's avatar
iker_martin committed
59
  set_spawn_configuration(comm);
60
61

  if(!spawn_data->spawn_is_async) {
62
    generic_spawn(child, MALL_NOT_STARTED);
63
64
65
66
67
    local_state = get_spawn_state(spawn_data->spawn_is_async);
    if (local_state == MALL_SPAWN_COMPLETED)
      deallocate_spawn_data();

  } else {
68
69
    local_state = spawn_data->spawn_is_single ? 
	    MALL_SPAWN_SINGLE_PENDING : MALL_SPAWN_PENDING;
70
    local_state = mall_conf->spawn_method == MALL_SPAWN_MERGE && spawn_data->initial_qty > spawn_data->target_qty ?
71
	    MALL_SPAWN_ADAPT_POSTPONE : local_state;
72
    set_spawn_state(local_state, 0);
iker_martin's avatar
iker_martin committed
73
    if((spawn_data->spawn_is_single && mall->myId == mall->root) || !spawn_data->spawn_is_single) {
74
75
76
77
      allocate_thread_spawn();
    }
  }
    
78
  return local_state;
79
80
81
82
83
84
}

/*
 * Comprueba si una configuracion para crear un nuevo grupo de procesos esta lista,
 * y en caso de que lo este, se devuelve el communicador a estos nuevos procesos.
 */
85
int check_spawn_state(MPI_Comm *child, MPI_Comm comm, int wait_completed) { 
86
87
88
  int local_state;
  int global_state=MALL_NOT_STARTED;

89
  if(spawn_data->spawn_is_async) { // Async
90
91
    local_state = get_spawn_state(spawn_data->spawn_is_async);

92
    if(local_state == MALL_SPAWN_SINGLE_PENDING || local_state == MALL_SPAWN_SINGLE_COMPLETED) { // Single
93
      global_state = check_single_state(comm, local_state, wait_completed);
94

95
96
    } else if(local_state == MALL_SPAWN_PENDING || local_state == MALL_SPAWN_COMPLETED || local_state == MALL_SPAWN_ADAPTED) { // Generic
      global_state = check_generic_state(comm, child, local_state, wait_completed);
97

98
99
100
    } else if(local_state == MALL_SPAWN_ADAPT_POSTPONE) {
      global_state = local_state;
      
101
    } else {
102
      printf("Error Check spawn: Configuracion invalida State = %d\n", local_state);
103
104
105
      MPI_Abort(MPI_COMM_WORLD, -1);
      return -10;
    }
106
  } else if(mall_conf->spawn_method == MALL_SPAWN_MERGE){ // Start Merge shrink Sync
107
108
109
    generic_spawn(child, MALL_DIST_COMPLETED);
    global_state = get_spawn_state(spawn_data->spawn_is_async);
  }
110
  if(global_state == MALL_SPAWN_COMPLETED || global_state == MALL_SPAWN_ADAPTED)
111
    deallocate_spawn_data();
112

113
114
115
  return global_state;
}

116
117
118
119
120
121
122
123
124
125
126
127
/*
 * Elimina la bandera bloqueante MALL_SPAWN_ADAPT_POSTPONE para los hilos 
 * auxiliares. Esta bandera los bloquea para que el metodo Merge shrink no 
 * avance hasta que se complete la redistribucion de datos. Por tanto,
 * al modificar la bandera los hilos pueden continuar.
 *
 * Por seguridad se comprueba que no se realice el cambio a la bandera a 
 * no ser que se cumplan las 3 condiciones.
 */
void unset_spawn_postpone_flag(int outside_state) {
  int local_state = get_spawn_state(spawn_data->spawn_is_async);
  if(local_state == MALL_SPAWN_ADAPT_POSTPONE && outside_state == MALL_SPAWN_ADAPT_PENDING && spawn_data->spawn_is_async) { 
128
    set_spawn_state(MALL_SPAWN_PENDING, spawn_data->spawn_is_async);
129
    wakeup_redistribution();
130
131
132
  }
}

133
134
135
136
137
138
139
140
/*
 * Funcion bloqueante de los hijos para asegurar que todas las tareas del paso
 * de creacion de los hijos se terminan correctamente.
 *
 * Ademas los hijos obtienen informacion basica de los padres
 * para el paso de redistribucion de datos (Numeros de procesos y Id del Root).
 *
 */
iker_martin's avatar
iker_martin committed
141
void malleability_connect_children(MPI_Comm comm, MPI_Comm *parents) {
142
  spawn_data = (Spawn_data *) malloc(sizeof(Spawn_data));
iker_martin's avatar
iker_martin committed
143
144
  spawn_data->spawn_qty = mall->numP;
  spawn_data->target_qty = mall->numP;
145
146
  spawn_data->comm = comm;

iker_martin's avatar
iker_martin committed
147
148
149
150
  MAM_Comm_main_structures(MALLEABILITY_ROOT); //FIXME What if root is another id different to 0? Send from spawn to root id?
  MPI_Comm_remote_size(*parents, &spawn_data->initial_qty);
  MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_SINGLE, &(spawn_data->spawn_is_single));
  MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_PTHREAD, &(spawn_data->spawn_is_async));
151

iker_martin's avatar
iker_martin committed
152
  switch(mall_conf->spawn_method) {
153
    case MALL_SPAWN_BASELINE:
154
      baseline(*spawn_data, parents);
155
156
      break;
    case MALL_SPAWN_MERGE:
157
      spawn_data->target_qty += spawn_data->initial_qty;
158
      merge(*spawn_data, parents, MALL_NOT_STARTED);
159
160
161
      break;
  }

iker_martin's avatar
iker_martin committed
162
  mall->num_parents = spawn_data->initial_qty;
163

164
165
166
167
168
169
170
171
  free(spawn_data);
}

//--------------PRIVATE CONFIGURATION FUNCTIONS---------------//
/*
 * Agrupa en una sola estructura todos los datos de configuración necesarios
 * e inicializa las estructuras necesarias.
 */
iker_martin's avatar
iker_martin committed
172
void set_spawn_configuration(MPI_Comm comm) {
173
174
  spawn_data = (Spawn_data *) malloc(sizeof(Spawn_data));

iker_martin's avatar
iker_martin committed
175
176
177
  spawn_data->initial_qty = mall->numP;
  spawn_data->target_qty = mall->numC;
  MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_SINGLE, &(spawn_data->spawn_is_single)); 
178
  MAM_Contains_strat(MAM_SPAWN_STRATEGIES, MAM_STRAT_SPAWN_PTHREAD, &(spawn_data->spawn_is_async));
179
180
181
  spawn_data->comm = comm;


182
  switch(mall_conf->spawn_method) {
183
184
185
    case MALL_SPAWN_BASELINE:
      spawn_data->spawn_qty = spawn_data->target_qty;
      spawn_data->already_created = 0;
186
      break;
187
188
189
    case MALL_SPAWN_MERGE:
      spawn_data->spawn_qty = spawn_data->target_qty - spawn_data->initial_qty;
      spawn_data->already_created = spawn_data->initial_qty;
190
      break;
191
192
193
  }

  if(spawn_data->spawn_is_async) {
194
    init_spawn_state();
195
196
  }

197
  spawn_data->mapping = MPI_INFO_NULL;
iker_martin's avatar
iker_martin committed
198
199
  if(mall->myId == mall->root) {
    physical_struct_create(spawn_data->target_qty, spawn_data->already_created, MALL_DIST_STRING, &(spawn_data->dist));
200
201
202
203
204
205
206
207
  }
}

/*
 * Libera una estructura de datos spawn_data
 * junto a la destrucion de aquellas estructuras que utiliza.
 */
void deallocate_spawn_data() {
208
209
  if(spawn_data == NULL) return;

210
211
212
213
  if(spawn_data->mapping != MPI_INFO_NULL) {
    MPI_Info_free(&(spawn_data->mapping));
  }
  if(spawn_data->spawn_is_async) {
214
    free_spawn_state();
215
216
  }
  free(spawn_data); 
217
  spawn_data = NULL;
218
219
220
221
222
223
224
225
226
227
228
229
}


//--------------PRIVATE SPAWN CREATION FUNCTIONS---------------//

/*
 * Funcion generica para la creacion de procesos. Obtiene la configuracion
 * y segun esta, elige como deberian crearse los procesos.
 *
 * Cuando termina, modifica la variable global para indicar este cambio
 */
void generic_spawn(MPI_Comm *child, int data_stage) {
230
  int local_state, aux_state;
231
232

  // WORK
iker_martin's avatar
iker_martin committed
233
  if(mall->myId == mall->root && spawn_data->spawn_qty > 0) { //SET MAPPING FOR NEW PROCESSES
234
235
    processes_dist(spawn_data->dist, &(spawn_data->mapping));
  }
236
  switch(mall_conf->spawn_method) {
237
238
239
240
241
242
243
244
    case MALL_SPAWN_BASELINE:
      local_state = baseline(*spawn_data, child);
      break;
    case MALL_SPAWN_MERGE:
      local_state = merge(*spawn_data, child, data_stage);
      break;
  }
  // END WORK
245
246
247
248
  aux_state = get_spawn_state(spawn_data->spawn_is_async);
  if(!(aux_state == MALL_SPAWN_PENDING && local_state == MALL_SPAWN_ADAPT_POSTPONE)) {
    set_spawn_state(local_state, spawn_data->spawn_is_async);
  }
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
}


//--------------PRIVATE THREAD FUNCTIONS---------------//

/*
 * Aloja la memoria para un hilo auxiliar dedicado a la creacion de procesos.
 * No se puede realizar un "join" sobre el hilo y el mismo libera su memoria
 * asociado al terminar.
 */
int allocate_thread_spawn() {
  if(pthread_create(&spawn_thread, NULL, thread_work, NULL)) {
    printf("Error al crear el hilo de SPAWN\n");
    MPI_Abort(MPI_COMM_WORLD, -1);
    return -1;
  }
  if(pthread_detach(spawn_thread)) {
    printf("Error when detaching spawning thread\n");
    MPI_Abort(MPI_COMM_WORLD, -1);
    return -1;
  }
  return 0;
}

/*
 * Funcion llamada por un hilo para que este se encarge
 * de configurar la creacion de un nuevo grupo de procesos.
 *
 * Una vez esta lista la configuracion y es posible crear los procesos
 * se avisa al hilo maestro.
 */
280
void* thread_work() {
281
  int local_state;
282
283
  returned_comm = (MPI_Comm *) malloc(sizeof(MPI_Comm));
 
284
285
  generic_spawn(returned_comm, MALL_NOT_STARTED);

286
  local_state = get_spawn_state(spawn_data->spawn_is_async);
287
  if(local_state == MALL_SPAWN_ADAPT_POSTPONE || local_state == MALL_SPAWN_PENDING) {
288
    // El grupo de procesos se terminara de juntar tras la redistribucion de datos
289

290
    local_state = wait_redistribution();
291
    generic_spawn(returned_comm, MALL_DIST_COMPLETED);
292
  }
293
  wakeup_completion();
294
295
296
297
298
299
300
301
302
303
304
305
306
307

  pthread_exit(NULL);
}

/*
 * Comprueba si una creacion de procesos asincrona en el
 * paso "single" ha terminado. 
 * Si no ha terminado se mantiene el estado 
 * "MALL_SPAWN_SINGLE_PENDING".
 *
 * Si ha terminado se crean los hilos auxiliares para 
 * los procesos no root y se devuelve el estado
 * "MALL_SPAWN_PENDING".
 */
308
309
310
311
int check_single_state(MPI_Comm comm, int global_state, int wait_completed) {
  while(wait_completed && mall->myId == mall->root && global_state == MALL_SPAWN_SINGLE_PENDING) {
    global_state = wait_completion();
  }
iker_martin's avatar
iker_martin committed
312
  MPI_Bcast(&global_state, 1, MPI_INT, mall->root, comm);
313
314
315
316
317

  // Non-root processes join root to finalize the spawn
  // They also must join if the application has ended its work
  if(global_state == MALL_SPAWN_SINGLE_COMPLETED) { 
    global_state = MALL_SPAWN_PENDING;
318
    set_spawn_state(global_state, spawn_data->spawn_is_async);
319

iker_martin's avatar
iker_martin committed
320
    if(mall->myId != mall->root) {
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
      allocate_thread_spawn(spawn_data);
    }
  }
  return global_state;
}

/*
 * Comprueba si una creación de procesos asincrona en el
 * paso "generic" ha terminado.
 * Si no ha terminado devuelve el estado 
 * "MALL_SPAWN_PENDING".
 *
 * Si ha terminado libera la memoria asociada a spawn_data
 * y devuelve el estado "MALL_SPAWN_COMPLETED".
 */
336
int check_generic_state(MPI_Comm comm, MPI_Comm *child, int local_state, int wait_completed) {
337
338
  int global_state;

339
340
  while(wait_completed && local_state == MALL_SPAWN_PENDING) local_state = wait_completion();

341
  MPI_Allreduce(&local_state, &global_state, 1, MPI_INT, MPI_MIN, comm);
342
  if(global_state == MALL_SPAWN_COMPLETED || global_state == MALL_SPAWN_ADAPTED) {
343
    set_spawn_state(global_state, spawn_data->spawn_is_async);
344
    *child = *returned_comm;
345
    deallocate_spawn_data();
346
347
348
  }
  return global_state;
}