MallTimes.py 6.89 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import sys
import glob
import numpy as np
import pandas as pd

def getData(lineS, outData, tp, hasIter = False):
  for data in lineS:
    k_v = data.split('=')
    if k_v[0] == "time":
      time = float(k_v[1])
    elif k_v[0] == "iters" and hasIter:
      iters = int(k_v[1])

  outData[tp] = time
  if hasIter:
    outData[tp+1] = iters

#-----------------------------------------------
def record(f, observation, line):
  # Record first line - General info
  lineS = line.split()
  for j in range(1,7):
    observation[j] = int(lineS[j].split('=')[1])

  # Record procces number
  line = next(f)
  lineS = line.split()
  j = 7
  for key_values in lineS:
    k_v = key_values.split('=')
    observation[j] = int(k_v[1])
    j+=1

  # Record data
  j = 9
  for j in range(9, 13):
    line = next(f)
    lineS = line.split()  
    getData(lineS, observation, j)

  line = next(f)
  lineS = line.split()  
  #if observation[0] == "A":
  getData(lineS, observation, 13, True)
  #else:
   # getData(lineS, observation, 13)

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Obtains the value of a given index in a splited line
# and returns it as a float values
def get_value(line, index):
  return float(line[index].split('=')[1].split(',')[0])

# Obtains the general parameters of an execution and
# stores them for creating a dataframe
def record_config_line(lineS, dataA, dataB):
  dataA.append([None]*13)
  dataB.append([None]*15)
  resizes = int(get_value(lineS, 2))
  stages = int(get_value(lineS, 3))
  compute_tam = int(get_value(lineS, 4))
  sdr = int(get_value(lineS, 5))
  adr = int(get_value(lineS, 6)) #TODO Que lo tome como porcentaje
  at  = int(get_value(lineS, 7))
  sm = int(get_value(lineS, 8))
  ss = int(get_value(lineS, 9))
  latency = get_value(lineS, 10)
  bw = get_value(lineS, 11)

  dataB[it][0] = sdr
  dataB[it][1] = adr 
  dataB[it][4] = "" 
  dataB[it][5] = compute_tam
  dataB[it][6] = comm_tam
  dataB[it][7] = cst
  dataB[it][8] = css
  dataB[it][9] = time
  dataB[it][10] = "" 

  dataA[it][0] = sdr
  dataA[it][1] = adr 
  dataA[it][5] = ""
  dataA[it][6] = compute_tam
  dataA[it][7] = comm_tam
  dataA[it][8] = cst
  dataA[it][9] = css
  dataA[it][10] = time
  dataA[it][11] = ""

def record_stage_line(lineS, dataG_it, dataM_it):
  pt = int(get_value(lineS, 2))
  t_stage = get_value(lineS, 3)
  u_bytes = int(get_value(lineS, 4))

  dataG_it[].append(pt)
  dataG_it[].append(t_stage)
  dataG_it[].append(u_bytes)

  dataM_it[].append(pt)
  dataM_it[].append(t_stage)
  dataM_it[].append(u_bytes)
  
def record_resize_line(lineS, dataG_it, dataM_it):
        iters = int(lineS[2].split('=')[1].split(',')[0])
        npr = int(lineS[3].split('=')[1].split(',')[0])
        dist = lineS[5].split('=')[1]

        resizes = resizes - 1
        if resizes == 0:
          dataB[it][3] = npr
          dataB[it][4] += dist
          dataB[it][10] += str(iters)

          dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
          dataA[it][2] = str(previousNP) + "," + str(npr)
          dataA[it][5] += dist
          dataA[it][11] += str(iters)
          timer = 4
        else:
          dataB[it][2] = npr
          dataB[it][4] += dist + ","
          dataB[it][10] += str(iters) + ","

          dataA[it][3] = npr
          dataA[it][5] += dist + ","
          dataA[it][11] += str(iters) + ","
          previousNP = npr
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

#-----------------------------------------------
def read_file(f, dataA, dataB, it):
  recording = False
  resizes = 0
  timer = 0
  previousNP = 0

  for line in f: 
    lineS = line.split()

    if len(lineS) > 0:
      if lineS[0] == "Config": # CONFIG LINE
        recording = True
        it += 1
142
        record_config(lineS, dataG, dataM)
143

144
145
146
      elif lineS[0] == "Stage":
          record_stage_line(lineS, dataG, dataM)
      elif lineS[0] == "Resize":
147
148
149
150
151
152
153
154
155
      elif recording and resizes != 0: # RESIZE LINE
        iters = int(lineS[2].split('=')[1].split(',')[0])
        npr = int(lineS[3].split('=')[1].split(',')[0])
        dist = lineS[5].split('=')[1]

        resizes = resizes - 1
        if resizes == 0:
          dataB[it][3] = npr
          dataB[it][4] += dist
156
          dataB[it][10] += str(iters)
157

158
          dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
159
          dataA[it][2] = str(previousNP) + "," + str(npr)
160
161
          dataA[it][5] += dist
          dataA[it][11] += str(iters)
162
          timer = 4
163
164
165
        else:
          dataB[it][2] = npr
          dataB[it][4] += dist + ","
166
          dataB[it][10] += str(iters) + ","
167

168
169
170
          dataA[it][3] = npr
          dataA[it][5] += dist + ","
          dataA[it][11] += str(iters) + ","
171
172
173
          previousNP = npr

      else: # SAVE TIMES
174
        if timer == 4:
175
          dataB[it][11] = float(lineS[1])
176
        elif timer == 3:
177
          dataB[it][12] = float(lineS[1])
178
        elif timer == 2:
179
          dataB[it][13] = float(lineS[1])
180
        elif timer == 1:
181
          dataB[it][14] = float(lineS[1])
182
        else:
183
          dataA[it][12] = float(lineS[1])
184
185
186
        timer = timer - 1
          
  return it
187
188
#columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8
#columnsB1 = ["N", "%Async", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TC", "TS", "TA"] #12
189
190
191
192
#Config loaded: resizes=2, matrix=1000, sdr=1000000000, adr=0, aib=0, time=2.000000 || grp=1
#Resize 0: Iters=100, Procs=2, Factors=1.000000, Phy=2
#Resize 1: Iters=100, Procs=4, Factors=0.500000, Phy=2
#Tspawn: 0.249393 
193
#Tthread: 0 
194
195
196
#Tsync: 0.330391 
#Tasync: 0
#Tex: 301.428615
197
198

#Config loaded: resizes=1, matrix=0, comm_tam=0, sdr=0, adr=0, aib=0, cst=3, css=1, time=1 || grp=1
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#-----------------------------------------------

if len(sys.argv) < 2:
    print("The files name is missing\nUsage: python3 iterTimes.py resultsName directory csvOutName")
    exit(1)

if len(sys.argv) >= 3:
    BaseDir = sys.argv[2]
    print("Searching in directory: "+ BaseDir)
else:
    BaseDir = sys.argv[2]

if len(sys.argv) >= 4:
  print("Csv name will be: " + sys.argv[3] + "G.csv & " + sys.argv[3] + "M.csv")
  name = sys.argv[3]
else:
  name = "data"

insideDir = "Run"
lista = glob.glob("./" + BaseDir + insideDir + "*/" + sys.argv[1]+ "*Global.o*")
219
lista += (glob.glob("./" + BaseDir + sys.argv[1]+ "*Global.o*")) # Se utiliza cuando solo hay un nivel de directorios
220
221
222
print("Number of files found: "+ str(len(lista)));

it = -1
223
224
225
226
dataG = []
dataM = []
columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
columnsM = ["N", "%Async", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TC", "TH", "TS", "TA"] #15
227
228
229

for elem in lista:
  f = open(elem, "r")
230
  it = read_file(f, dataG, dataM, it)
231
232
233
  f.close()

#print(data)
234
235
dfG = pd.DataFrame(dataG, columns=columnsG)
dfG.to_csv(name + 'G.csv')
236

237
dfM = pd.DataFrame(dataM, columns=columnsM)
238
239

#Poner en TC el valor real y en TH el necesario para la app
240
241
242
cond = dfM.TH != 0
dfM.loc[cond, ['TC', 'TH']] = dfB.loc[cond, ['TH', 'TC']].values
dfM.to_csv(name + 'M.csv')