MallTimes.py 9.94 KB
Newer Older
Iker Martín's avatar
Iker Martín committed
1
2
3
4
import sys
import glob
import numpy as np
import pandas as pd
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
5
from enum import Enum
Iker Martín's avatar
Iker Martín committed
6

Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
class G_enum(Enum):
    TOTAL_RESIZES = 0
    TOTAL_GROUPS = 1
    TOTAL_STAGES = 2
    GRANULARITY = 3
    SDR = 4
    ADR = 5
    DR = 6
    RED_METHOD = 7
    RED_STRATEGY = 8
    SPAWN_METHOD = 9
    SPAWN_STRATEGY = 10
    GROUPS = 11
    FACTOR_S = 12
    DIST = 13
    STAGE_TYPES = 14
    STAGE_TIMES = 15
    STAGE_BYTES = 16
    ITERS = 17
    ASYNCH_ITERS = 18
    T_ITER = 19
    T_STAGES = 20
    T_SPAWN = 21
    T_SPAWN_REAL = 22
    T_SR = 23
    T_AR = 24
    T_MALLEABILITY = 25
    T_TOTAL = 26
    #Malleability specific
    NP = 0
    NC = 1
    #Iteration specific
    IS_DYNAMIC = 11
    N_PARENTS = 17
Iker Martín's avatar
Iker Martín committed
41

Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Redistribution_Method", \
            "Redistribution_Strategy", "Spawn_Method", "Spawn_Strategy", "Groups", "FactorS", "Dist", "Stage_Types", "Stage_Times", \
            "Stage_Bytes", "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_Malleability", "T_total"] #27

#-----------------------------------------------
# Obtains the value of a given index in a splited line
# and returns it as a float values if possible, string otherwise
def get_value(line, index, separator=True):
  if separator:
    value = line[index].split('=')[1].split(',')[0]
  else:
    value = line[index]

  try:
    value = float(value)
    if value.is_integer():
      value = int(value)
  except ValueError:
    return value
  return value

#-----------------------------------------------
# Obtains the general parameters of an execution and
# stores them for creating a global dataframe
def record_config_line(lineS, dataG_it):
  ordered_indexes = [G_enum.TOTAL_RESIZES.value, G_enum.TOTAL_STAGES.value, \
          G_enum.GRANULARITY.value, G_enum.SDR.value, G_enum.ADR.value]
  offset_line = 2
  for i in range(len(ordered_indexes)):
    value = get_value(lineS, i+offset_line)
    index = ordered_indexes[i]
    dataG_it[index] = value

  dataG_it[G_enum.TOTAL_GROUPS.value] = dataG_it[G_enum.TOTAL_RESIZES.value]+1

  #FIXME Modificar cuando ADR ya no sea un porcentaje
  dataG_it[G_enum.DR.value] = dataG_it[G_enum.SDR.value] + dataG_it[G_enum.ADR.value]

  # Init lists for each column
  array_groups = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
          G_enum.ASYNCH_ITERS.value, G_enum.T_ITER.value, G_enum.T_STAGES.value, G_enum.RED_METHOD.value, \
          G_enum.RED_STRATEGY.value, G_enum.SPAWN_METHOD.value, G_enum.SPAWN_STRATEGY.value,]
  array_resizes = [ G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_MALLEABILITY.value]
  array_stages = [G_enum.STAGE_TYPES.value, \
          G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
  for index in array_groups:
    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_GROUPS.value]
  for group in range(dataG_it[G_enum.TOTAL_GROUPS.value]):
    dataG_it[G_enum.T_ITER.value][group] = []

  for index in array_resizes:
    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_RESIZES.value]

  for index in array_stages:
    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_STAGES.value]

#-----------------------------------------------
# Obtains the parameters of a stage line 
# and stores it in the dataframe
# Is needed to indicate in which stage is
# being performed
def record_stage_line(lineS, dataG_it, stage):
  array_stages = [G_enum.STAGE_TYPES.value, \
          G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
  offset_lines = 2
  for i in range(len(array_stages)):
    value = get_value(lineS, i+offset_lines)
    index = array_stages[i]
    dataG_it[index][stage] = value

#-----------------------------------------------
# Obtains the parameters of a resize line
# and stores them in the dataframe
# Is needed to indicate to which group refers
# the resize line
# Group 0: Iters=3, Procs=80, Factors=0.037500, Dist=2, RM=0, SM=0, RS=0, SS=0
def record_group_line(lineS, dataG_it, group):
  array_groups = [G_enum.ITERS.value, G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, \
          G_enum.RED_METHOD.value, G_enum.SPAWN_METHOD.value, G_enum.RED_STRATEGY.value, G_enum.SPAWN_STRATEGY.value]
  offset_lines = 2
  for i in range(len(array_groups)):
    value = get_value(lineS, i+offset_lines)
    index = array_groups[i]
    dataG_it[index][group] = value

#-----------------------------------------------
def record_time_line(lineS, dataG_it):
  T_names = ["T_spawn:", "T_spawn_real:", "T_SR:", "T_AR:", "T_Malleability:", "T_total:"]
  T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_MALLEABILITY.value, G_enum.T_TOTAL.value]
  if not (lineS[0] in T_names): # Execute only if line represents a Time
      return

  index = T_names.index(lineS[0])
  index = T_values[index]
  offset_lines = 1

  len_index = 1
  if dataG_it[index] != None:
    len_index = len(dataG_it[index])
    for i in range(len_index):
      dataG_it[index][i] = get_value(lineS, i+offset_lines, False)
  else:
      dataG_it[index] = get_value(lineS, offset_lines, False)
Iker Martín's avatar
Iker Martín committed
146
147

#-----------------------------------------------
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
148
149
150
151
152
def record_multiple_times_line(lineS, dataG_it, group):
  T_names = ["T_iter:", "T_stage"]
  T_values = [G_enum.T_ITER.value, G_enum.T_STAGES.value]
  if not (lineS[0] in T_names): # Execute only if line represents a Time
      return
Iker Martín's avatar
Iker Martín committed
153

Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
154
155
  index = T_names.index(lineS[0])
  index = T_values[index]
Iker Martín's avatar
Iker Martín committed
156

Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
  offset_lines = 1
  if index == G_enum.T_STAGES.value:
    offset_lines += 1
    total_iters = len(lineS)-offset_lines
    stage = int(lineS[1].split(":")[0])
    if stage == 0:
      dataG_it[index][group] = [None] * total_iters
      for i in range(total_iters):
        dataG_it[index][group][i] = [None] * dataG_it[G_enum.TOTAL_STAGES.value]
    for i in range(total_iters):
        dataG_it[index][group][i][stage] = get_value(lineS, i+offset_lines, False)
  else:
    total_iters = len(lineS)-offset_lines
    for i in range(total_iters):
      dataG_it[index][group].append(get_value(lineS, i+offset_lines, False))
  
Iker Martín's avatar
Iker Martín committed
173
#-----------------------------------------------
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
174
175
176
177
def read_local_file(f, dataG, it, runs_in_file):
  offset = 0
  real_it = 0
  group = 0
Iker Martín's avatar
Iker Martín committed
178

Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  for line in f:
    lineS = line.split()

    if len(lineS) > 0:
      if lineS[0] == "Group": # GROUP number
        offset += 1
        real_it = it - (runs_in_file-offset)
        group = int(lineS[1].split(":")[0])
      elif lineS[0] == "Async_Iters:":
        offset_line = 1
        dataG[real_it][G_enum.ASYNCH_ITERS.value][group] = get_value(lineS, offset_line, False)
      else:
        record_multiple_times_line(lineS, dataG[real_it], group)

#-----------------------------------------------
def read_global_file(f, dataG, it):
  runs_in_file=0
Iker Martín's avatar
Iker Martín committed
196
197
198
199
200
201
  for line in f: 
    lineS = line.split()

    if len(lineS) > 0:
      if lineS[0] == "Config": # CONFIG LINE
        it += 1
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
        runs_in_file += 1
        group = 0
        stage = 0

        dataG.append([None]*len(columnsG))
        record_config_line(lineS, dataG[it])

      elif lineS[0] == "Stage":
        record_stage_line(lineS, dataG[it], stage)
        stage+=1
      elif lineS[0] == "Group":
        record_group_line(lineS, dataG[it], group)
        group+=1
      else:
        record_time_line(lineS, dataG[it])

  return it,runs_in_file

#-----------------------------------------------


#-----------------------------------------------
def convert_to_tuples(dfG):
  array_list_items = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
          G_enum.ASYNCH_ITERS.value, G_enum.RED_METHOD.value, G_enum.RED_STRATEGY.value, G_enum.SPAWN_METHOD.value, \
          G_enum.SPAWN_STRATEGY.value, G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, \
          G_enum.T_AR.value, G_enum.STAGE_TYPES.value, G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
            #TODO Falta T_malleability?
  array_multiple_list_items = [G_enum.T_ITER.value, G_enum.T_STAGES.value]
  for item in array_list_items:
    name = columnsG[item]
    values = dfG[name].copy()
    for index in range(len(values)):
      values[index] = tuple(values[index])
    dfG[name] = values

  for item in array_multiple_list_items:
    name = columnsG[item]
    values = dfG[name].copy()
    for i in range(len(values)):
      for j in range(len(values[i])):
        if(type(values[i][j][0]) == list):
          for r in range(len(values[i][j])):
            values[i][j][r] = tuple(values[i][j][r])
        values[i][j] = tuple(values[i][j])
      values[i] = tuple(values[i])
    dfG[name] = values

Iker Martín's avatar
Iker Martín committed
250
251
252
#-----------------------------------------------

if len(sys.argv) < 2:
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
253
    print("The files name is missing\nUsage: python3 MallTimes.py commonName directory OutName")
Iker Martín's avatar
Iker Martín committed
254
255
    exit(1)

Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
256
common_name = sys.argv[1]
Iker Martín's avatar
Iker Martín committed
257
258
259
260
if len(sys.argv) >= 3:
    BaseDir = sys.argv[2]
    print("Searching in directory: "+ BaseDir)
else:
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
261
    BaseDir = "./"
Iker Martín's avatar
Iker Martín committed
262
263
264
265
266

if len(sys.argv) >= 4:
  name = sys.argv[3]
else:
  name = "data"
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
267
print("File name will be: " + name + "G.pkl")
Iker Martín's avatar
Iker Martín committed
268
269

insideDir = "Run"
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
270
271
lista = glob.glob(BaseDir + insideDir + "*/" + common_name + "*_Global.out")
lista += (glob.glob(BaseDir + common_name + "*_Global.out")) # Se utiliza cuando solo hay un nivel de directorios
Iker Martín's avatar
Iker Martín committed
272
273
274
print("Number of files found: "+ str(len(lista)));

it = -1
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
275
dataG = []
Iker Martín's avatar
Iker Martín committed
276
277
278

for elem in lista:
  f = open(elem, "r")
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
279
280
281
282
  id_run = elem.split("_Global.out")[0].split(common_name)[-1] 
  lista_local = glob.glob(BaseDir + common_name + id_run + "_G*NP*.out")

  it,runs_in_file = read_global_file(f, dataG, it)
Iker Martín's avatar
Iker Martín committed
283
  f.close()
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
284
285
286
287
288
  for elem_local in lista_local:
    f_local = open(elem_local, "r")
    read_local_file(f_local, dataG, it, runs_in_file)
    f_local.close()

Iker Martín's avatar
Iker Martín committed
289

Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
290
291
292
293
dfG = pd.DataFrame(dataG, columns=columnsG)
convert_to_tuples(dfG)
print(dfG)
dfG.to_pickle(name + 'G.pkl')
Iker Martín's avatar
Iker Martín committed
294

Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
295
#dfM = pd.DataFrame(dataM, columns=columnsM)
Iker Martín's avatar
Iker Martín committed
296
297

#Poner en TC el valor real y en TH el necesario para la app
Iker Martín Álvarez's avatar
Iker Martín Álvarez committed
298
299
300
#cond = dfM.TH != 0
#dfM.loc[cond, ['TC', 'TH']] = dfM.loc[cond, ['TH', 'TC']].values
#dfM.to_csv(name + 'M.csv')