Refactor de recogida de datos. WIP

8ca50713 · iker_martin · 04e2f90a · 8ca50713
Commit 8ca50713 authored Nov 04, 2022 by iker_martin
--- a/Analysis/MallTimes.py
+++ b/Analysis/MallTimes.py
@@ -2,48 +2,38 @@ import sys
 import glob
 import numpy as np
 import pandas as pd
+from enum import Enum
-def getData(lineS, outData, tp, hasIter = False):
-  for data in lineS:
+class G_enum(Enum):
-    k_v = data.split('=')
+    TOTAL_RESIZES = 0
-    if k_v[0] == "time":
+    TOTAL_GROUPS = 1
-      time = float(k_v[1])
+    TOTAL_STAGES = 2
-    elif k_v[0] == "iters" and hasIter:
+    GRANULARITY = 3
-      iters = int(k_v[1])
+    SDR = 4
+    ADR = 5
-  outData[tp] = time
+    DR = 6
-  if hasIter:
+    ASYNCH_REDISTRIBUTION_TYPE = 7
-    outData[tp+1] = iters
+    SPAWN_METHOD = 8
+    SPAWN_STRATEGY = 9
-#-----------------------------------------------
+    GROUPS = 10
-def record(f, observation, line):
+    FACTOR_S = 11
-  # Record first line - General info
+    DIST = 12
-  lineS = line.split()
+    STAGE_TYPES = 13
-  for j in range(1,7):
+    STAGE_TIMES = 14
-    observation[j] = int(lineS[j].split('=')[1])
+    STAGE_BYTES = 15
+    ITERS = 16
-  # Record procces number
+    ASYNCH_ITERS = 17
-  line = next(f)
+    T_ITER = 18
-  lineS = line.split()
+    T_STAGES = 19
-  j = 7
+    T_SPAWN = 20
-  for key_values in lineS:
+    T_SPAWN_REAL = 21
-    k_v = key_values.split('=')
+    T_SR = 22
-    observation[j] = int(k_v[1])
+    T_AR = 23
-    j+=1
+    T_TOTAL = 24
-  # Record data
+columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \
-  j = 9
+            "Spawn_Method", "Spawn_Strategy", "Groups", "Factor_S", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \
-  for j in range(9, 13):
+            "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #25
-    line = next(f)
-    lineS = line.split()  
-    getData(lineS, observation, j)
-  line = next(f)
-  lineS = line.split()  
-  #if observation[0] == "A":
-  getData(lineS, observation, 13, True)
-  #else:
-   # getData(lineS, observation, 13)
 # Obtains the value of a given index in a splited line
 # and returns it as a float values
@@ -51,83 +41,94 @@ def get_value(line, index):
  return float(line[index].split('=')[1].split(',')[0])
 # Obtains the general parameters of an execution and
-# stores them for creating a dataframe
+# stores them for creating a global dataframe
-def record_config_line(lineS, dataA, dataB):
+def record_config_line(lineS, dataG_it):
-  dataA.append([None]*13)
+  ordered_indexes = [G_enum.TOTAL_RESIZES.value, G_enum.TOTAL_STAGES.value, G_enum.GRANULARITY.value, G_enum.SDR.value, \
-  dataB.append([None]*15)
+          G_enum.ADR.value, G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, G_emun.SPAWN_STRATEGY.value]
-  resizes = int(get_value(lineS, 2))
+  offset_line = 2
-  stages = int(get_value(lineS, 3))
+  for i in range(len(ordered_indexes)):
-  compute_tam = int(get_value(lineS, 4))
+    value = get_value(lineS, i+offset_line)
-  sdr = int(get_value(lineS, 5))
+    if value.is_integer():
-  adr = int(get_value(lineS, 6)) #TODO Que lo tome como porcentaje
+      value = int(value)
-  at  = int(get_value(lineS, 7))
+    index = ordered_indexes[i]
-  sm = int(get_value(lineS, 8))
+    dataG_it[index] = value
-  ss = int(get_value(lineS, 9))
-  latency = get_value(lineS, 10)
+  dataG_it[G_enum.TOTAL_GROUPS.value] = dataG_it[G_enum.TOTAL_RESIZES.value]
-  bw = get_value(lineS, 11)
+  dataG_it[G_enum.TOTAL_RESIZES.value] -=1 #FIXME Modificar en App sintetica
-  dataB[it][0] = sdr
+  #FIXME Modificar cuando ADR ya no sea un porcentaje
-  dataB[it][1] = adr 
+  dataG_it[G_enum.DR.value] = dataG_it[G_enum.SDR.value] + dataG_it[G_enum.ADR.value]
-  dataB[it][4] = "" 
-  dataB[it][5] = compute_tam
+  # Init lists for each column
-  dataB[it][6] = comm_tam
+  array_groups = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
-  dataB[it][7] = cst
+          G_enum.ASYNCH_ITERS.value, G_enum.T_ITER.value, G_enum.T_STAGES.value]
-  dataB[it][8] = css
+  array_resizes = [G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, \
-  dataB[it][9] = time
+          G_enum.SPAWN_STRATEGY.value, G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, \
-  dataB[it][10] = "" 
+          G_enum.T_SR.value, G_enum.T_AR.value]
+  array_stages = [G_enum.STAGE_TYPES.value, \
-  dataA[it][0] = sdr
+          G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
-  dataA[it][1] = adr 
+  for index in array_groups:
-  dataA[it][5] = ""
+    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_GROUPS.value]
-  dataA[it][6] = compute_tam
-  dataA[it][7] = comm_tam
+  for index in array_resizes:
-  dataA[it][8] = cst
+    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_RESIZES.value]
-  dataA[it][9] = css
-  dataA[it][10] = time
+  for index in array_stages:
-  dataA[it][11] = ""
+    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_STAGES.value]
-def record_stage_line(lineS, dataG_it, dataM_it):
-  pt = int(get_value(lineS, 2))
-  t_stage = get_value(lineS, 3)
-  u_bytes = int(get_value(lineS, 4))
+#columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \\
-  dataG_it[].append(pt)
+#            "Spawn_Method", "Spawn_Strategy", "Groups", "Dist",   "Stage_Types", "Stage_Times", "Stage_Bytes", \\
-  dataG_it[].append(t_stage)
+#            "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #24
-  dataG_it[].append(u_bytes)
+#columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
-  dataM_it[].append(pt)
+# Obtains the parameters of a stage line 
-  dataM_it[].append(t_stage)
+# and stores it in the dataframe
-  dataM_it[].append(u_bytes)
+# Is needed to indicate in which stage is
+# being performed
-def record_resize_line(lineS, dataG_it, dataM_it):
+def record_stage_line(lineS, dataG_it, stage):
-        iters = int(lineS[2].split('=')[1].split(',')[0])
+  array_stages = [G_enum.STAGE_TYPES.value, \
-        npr = int(lineS[3].split('=')[1].split(',')[0])
+          G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
-        dist = lineS[5].split('=')[1]
+  offset_lines = 2
+  for i in range(len(array_stages)):
-        resizes = resizes - 1
+    value = get_value(lineS, i+offset_lines)
-        if resizes == 0:
+    if value.is_integer():
-          dataB[it][3] = npr
+        value = int(value)
-          dataB[it][4] += dist
+    index = array_stage[i]
-          dataB[it][10] += str(iters)
+    dataG_it[index][stage] = value
-          dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
+# Obtains the parameters of a resize line
-          dataA[it][2] = str(previousNP) + "," + str(npr)
+# and stores them in the dataframe
-          dataA[it][5] += dist
+# Is needed to indicate to which group refers
-          dataA[it][11] += str(iters)
+# the resize line
-          timer = 4
+def record_resize_line(lineS, dataG_it, group):
-        else:
+  array_stages = [G_enum.ITERS.value, G_enum.GROUPS.value\
-          dataB[it][2] = npr
+          G_enum.FACTOR_S.value, G_enum.DIST.value]
-          dataB[it][4] += dist + ","
+  offset_lines = 2
-          dataB[it][10] += str(iters) + ","
+  for i in range(len(array_stages)):
+    value = get_value(lineS, i+offset_lines)
-          dataA[it][3] = npr
+    if value.is_integer():
-          dataA[it][5] += dist + ","
+        value = int(value)
-          dataA[it][11] += str(iters) + ","
+    index = array_stage[i]
-          previousNP = npr
+    dataG_it[index][group] = value
+def record_time_line(lineS, dataG_it):
+  T_names = ["T_spawn:", "T_spawn_real:", "T_SR:", "T_AR:", "T_total:"]
+  T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_TOTAL.value]
+  if not (lineS[0] in T_names): # Execute only if line represents a Time
+      return
+  index = T_names.index(linesS[0])
+  offset_lines = 1
+  for i in range(len(dataG_it[index])):
+    value = get_value(lineS, i+offset_lines)
+    dataG_it[index][i] = value
 #-----------------------------------------------
-def read_file(f, dataA, dataB, it):
+def read_global_file(f, dataA, dataB, it):
-  recording = False
  resizes = 0
  timer = 0
  previousNP = 0
@@ -137,51 +138,16 @@ def read_file(f, dataA, dataB, it):
    if len(lineS) > 0:
      if lineS[0] == "Config": # CONFIG LINE
-        recording = True
        it += 1
-        record_config(lineS, dataG, dataM)
+        dataA.append([None]*25)
+        record_config(lineS, dataG[it], dataM[it])
      elif lineS[0] == "Stage":
-          record_stage_line(lineS, dataG, dataM)
+        record_stage_line(lineS, dataG[it], ??)
      elif lineS[0] == "Resize":
-      elif recording and resizes != 0: # RESIZE LINE
+        record_resize_line(lineS, dataG[it], ??)
-        iters = int(lineS[2].split('=')[1].split(',')[0])
+      elif lineS[0] in T_names:
-        npr = int(lineS[3].split('=')[1].split(',')[0])
+        dataG[it][]
-        dist = lineS[5].split('=')[1]
-        resizes = resizes - 1
-        if resizes == 0:
-          dataB[it][3] = npr
-          dataB[it][4] += dist
-          dataB[it][10] += str(iters)
-          dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
-          dataA[it][2] = str(previousNP) + "," + str(npr)
-          dataA[it][5] += dist
-          dataA[it][11] += str(iters)
-          timer = 4
-        else:
-          dataB[it][2] = npr
-          dataB[it][4] += dist + ","
-          dataB[it][10] += str(iters) + ","
-          dataA[it][3] = npr
-          dataA[it][5] += dist + ","
-          dataA[it][11] += str(iters) + ","
-          previousNP = npr
-      else: # SAVE TIMES
-        if timer == 4:
-          dataB[it][11] = float(lineS[1])
-        elif timer == 3:
-          dataB[it][12] = float(lineS[1])
-        elif timer == 2:
-          dataB[it][13] = float(lineS[1])
-        elif timer == 1:
-          dataB[it][14] = float(lineS[1])
-        else:
-          dataA[it][12] = float(lineS[1])
-        timer = timer - 1
  return it
 #columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8