MallTimes.py 7.56 KB
Newer Older
1
2
3
4
import sys
import glob
import numpy as np
import pandas as pd
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from enum import Enum

class G_enum(Enum):
    TOTAL_RESIZES = 0
    TOTAL_GROUPS = 1
    TOTAL_STAGES = 2
    GRANULARITY = 3
    SDR = 4
    ADR = 5
    DR = 6
    ASYNCH_REDISTRIBUTION_TYPE = 7
    SPAWN_METHOD = 8
    SPAWN_STRATEGY = 9
    GROUPS = 10
    FACTOR_S = 11
    DIST = 12
    STAGE_TYPES = 13
    STAGE_TIMES = 14
    STAGE_BYTES = 15
    ITERS = 16
    ASYNCH_ITERS = 17
    T_ITER = 18
    T_STAGES = 19
    T_SPAWN = 20
    T_SPAWN_REAL = 21
    T_SR = 22
    T_AR = 23
    T_TOTAL = 24

columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \
            "Spawn_Method", "Spawn_Strategy", "Groups", "Factor_S", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \
            "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #25
37

38
39
40
41
42
43
# Obtains the value of a given index in a splited line
# and returns it as a float values
def get_value(line, index):
  return float(line[index].split('=')[1].split(',')[0])

# Obtains the general parameters of an execution and
44
45
# stores them for creating a global dataframe
def record_config_line(lineS, dataG_it):
46
47
  ordered_indexes = [G_enum.TOTAL_RESIZES.value, G_enum.TOTAL_STAGES.value, \
          G_enum.GRANULARITY.value, G_enum.SDR.value, G_enum.ADR.value]
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  offset_line = 2
  for i in range(len(ordered_indexes)):
    value = get_value(lineS, i+offset_line)
    if value.is_integer():
      value = int(value)
    index = ordered_indexes[i]
    dataG_it[index] = value

  dataG_it[G_enum.TOTAL_GROUPS.value] = dataG_it[G_enum.TOTAL_RESIZES.value]
  dataG_it[G_enum.TOTAL_RESIZES.value] -=1 #FIXME Modificar en App sintetica

  #FIXME Modificar cuando ADR ya no sea un porcentaje
  dataG_it[G_enum.DR.value] = dataG_it[G_enum.SDR.value] + dataG_it[G_enum.ADR.value]

  # Init lists for each column
  array_groups = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
          G_enum.ASYNCH_ITERS.value, G_enum.T_ITER.value, G_enum.T_STAGES.value]
  array_resizes = [G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, \
          G_enum.SPAWN_STRATEGY.value, G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, \
          G_enum.T_SR.value, G_enum.T_AR.value]
  array_stages = [G_enum.STAGE_TYPES.value, \
          G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
  for index in array_groups:
    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_GROUPS.value]

  for index in array_resizes:
    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_RESIZES.value]

  for index in array_stages:
    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_STAGES.value]

# Obtains the parameters of a stage line 
# and stores it in the dataframe
# Is needed to indicate in which stage is
# being performed
def record_stage_line(lineS, dataG_it, stage):
  array_stages = [G_enum.STAGE_TYPES.value, \
          G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
  offset_lines = 2
  for i in range(len(array_stages)):
    value = get_value(lineS, i+offset_lines)
    if value.is_integer():
        value = int(value)
    index = array_stage[i]
    dataG_it[index][stage] = value

# Obtains the parameters of a resize line
# and stores them in the dataframe
# Is needed to indicate to which group refers
# the resize line
def record_resize_line(lineS, dataG_it, group):
99
100
  array_stages = [G_enum.ITERS.value, G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, \
          G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, G_enum.SPAWN_STRATEGY.value]
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  offset_lines = 2
  for i in range(len(array_stages)):
    value = get_value(lineS, i+offset_lines)
    if value.is_integer():
        value = int(value)
    index = array_stage[i]
    dataG_it[index][group] = value

def record_time_line(lineS, dataG_it):
  T_names = ["T_spawn:", "T_spawn_real:", "T_SR:", "T_AR:", "T_total:"]
  T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_TOTAL.value]
  if not (lineS[0] in T_names): # Execute only if line represents a Time
      return

  index = T_names.index(linesS[0])
  offset_lines = 1
  for i in range(len(dataG_it[index])):
    value = get_value(lineS, i+offset_lines)
    dataG_it[index][i] = value
120
121

#-----------------------------------------------
122
def read_global_file(f, dataG, it):
123
124
125
126
127
128
129
130
131
132
  resizes = 0
  timer = 0
  previousNP = 0

  for line in f: 
    lineS = line.split()

    if len(lineS) > 0:
      if lineS[0] == "Config": # CONFIG LINE
        it += 1
133
134
135
136
137
        dataG.append([None]*(25+1))
        #dataG[it][-1] = None Indicates if local data has been recorded(1) or not(None)
        record_config(lineS, dataG[it])
        resize = 0
        stage = 0
138

139
      elif lineS[0] == "Stage":
140
141
        record_stage_line(lineS, dataG[it], stage)
        stage+=1
142
      elif lineS[0] == "Resize":
143
144
145
146
147
148
149
        record_resize_line(lineS, dataG[it], resize)
        resize+=1
      elif lineS[0] == "T_total:":
        value = get_value(lineS, 1)
        dataG[it][G_enum.T_TOTAL.value] = value
      else:
        record_time_line(lineS, dataG[it])
150
151
          
  return it
152

153
#-----------------------------------------------
154
155
156
157
158
159
160
def read_local_file(f, dataG, it):
  resizes = 0
  timer = 0
  previousNP = 0

  for line in f: 
    lineS = line.split()
161

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
    if len(lineS) > 0:
      if lineS[0] == "Config": # CONFIG LINE
        it += 1
        record_config(lineS, dataG[it], dataM[it])
        resize = 0
        stage = 0

      elif lineS[0] == "Stage":
        record_stage_line(lineS, dataG[it], stage)
        stage+=1
      elif lineS[0] == "Resize":
        record_resize_line(lineS, dataG[it], resize)
        resize+=1
      elif lineS[0] == "T_total:":
        value = get_value(lineS, 1)
        dataG[it][G_enum.T_TOTAL.value] = value
      else:
        record_time_line(lineS, dataG[it])
          
  return it

#columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \\
#            "Spawn_Method", "Spawn_Strategy", "Groups", "Dist",   "Stage_Types", "Stage_Times", "Stage_Bytes", \\
#            "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #24

#-----------------------------------------------
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
if len(sys.argv) < 2:
    print("The files name is missing\nUsage: python3 iterTimes.py resultsName directory csvOutName")
    exit(1)

if len(sys.argv) >= 3:
    BaseDir = sys.argv[2]
    print("Searching in directory: "+ BaseDir)
else:
    BaseDir = sys.argv[2]

if len(sys.argv) >= 4:
  print("Csv name will be: " + sys.argv[3] + "G.csv & " + sys.argv[3] + "M.csv")
  name = sys.argv[3]
else:
  name = "data"

insideDir = "Run"
lista = glob.glob("./" + BaseDir + insideDir + "*/" + sys.argv[1]+ "*Global.o*")
206
lista += (glob.glob("./" + BaseDir + sys.argv[1]+ "*Global.o*")) # Se utiliza cuando solo hay un nivel de directorios
207
208
209
print("Number of files found: "+ str(len(lista)));

it = -1
210
211
212
213
dataG = []
dataM = []
columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
columnsM = ["N", "%Async", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TC", "TH", "TS", "TA"] #15
214
215
216

for elem in lista:
  f = open(elem, "r")
217
  it = read_file(f, dataG, dataM, it)
218
219
220
  f.close()

#print(data)
221
222
dfG = pd.DataFrame(dataG, columns=columnsG)
dfG.to_csv(name + 'G.csv')
223

224
dfM = pd.DataFrame(dataM, columns=columnsM)
225
226

#Poner en TC el valor real y en TH el necesario para la app
227
228
229
cond = dfM.TH != 0
dfM.loc[cond, ['TC', 'TH']] = dfB.loc[cond, ['TH', 'TC']].values
dfM.to_csv(name + 'M.csv')