Refactor de recogida de datos. WIP

8ca50713 · iker_martin · 04e2f90a · 8ca50713
Commit 8ca50713 authored Nov 04, 2022 by iker_martin
--- a/Analysis/MallTimes.py
+++ b/Analysis/MallTimes.py
@@ -2,48 +2,38 @@ import sys
 import glob
 import numpy as np
 import pandas as pd
-
-def getData(lineS, outData, tp, hasIter = False):
-  for data in lineS:
-    k_v = data.split('=')
-    if k_v[0] == "time":
-      time = float(k_v[1])
-    elif k_v[0] == "iters" and hasIter:
-      iters = int(k_v[1])
-
-  outData[tp] = time
-  if hasIter:
-    outData[tp+1] = iters
-
-#-----------------------------------------------
-def record(f, observation, line):
-  # Record first line - General info
-  lineS = line.split()
-  for j in range(1,7):
-    observation[j] = int(lineS[j].split('=')[1])
-
-  # Record procces number
-  line = next(f)
-  lineS = line.split()
-  j = 7
-  for key_values in lineS:
-    k_v = key_values.split('=')
-    observation[j] = int(k_v[1])
-    j+=1
-
-  # Record data
-  j = 9
-  for j in range(9, 13):
-    line = next(f)
-    lineS = line.split()  
-    getData(lineS, observation, j)
-
-  line = next(f)
-  lineS = line.split()  
-  #if observation[0] == "A":
-  getData(lineS, observation, 13, True)
-  #else:
-   # getData(lineS, observation, 13)
+from enum import Enum
+
+class G_enum(Enum):
+    TOTAL_RESIZES = 0
+    TOTAL_GROUPS = 1
+    TOTAL_STAGES = 2
+    GRANULARITY = 3
+    SDR = 4
+    ADR = 5
+    DR = 6
+    ASYNCH_REDISTRIBUTION_TYPE = 7
+    SPAWN_METHOD = 8
+    SPAWN_STRATEGY = 9
+    GROUPS = 10
+    FACTOR_S = 11
+    DIST = 12
+    STAGE_TYPES = 13
+    STAGE_TIMES = 14
+    STAGE_BYTES = 15
+    ITERS = 16
+    ASYNCH_ITERS = 17
+    T_ITER = 18
+    T_STAGES = 19
+    T_SPAWN = 20
+    T_SPAWN_REAL = 21
+    T_SR = 22
+    T_AR = 23
+    T_TOTAL = 24
+
+columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \
+            "Spawn_Method", "Spawn_Strategy", "Groups", "Factor_S", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \
+            "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #25

 # Obtains the value of a given index in a splited line
 # and returns it as a float values
@@ -51,83 +41,94 @@ def get_value(line, index):
  return float(line[index].split('=')[1].split(',')[0])

 # Obtains the general parameters of an execution and
-# stores them for creating a dataframe
-def record_config_line(lineS, dataA, dataB):
-  dataA.append([None]*13)
-  dataB.append([None]*15)
-  resizes = int(get_value(lineS, 2))
-  stages = int(get_value(lineS, 3))
-  compute_tam = int(get_value(lineS, 4))
-  sdr = int(get_value(lineS, 5))
-  adr = int(get_value(lineS, 6)) #TODO Que lo tome como porcentaje
-  at  = int(get_value(lineS, 7))
-  sm = int(get_value(lineS, 8))
-  ss = int(get_value(lineS, 9))
-  latency = get_value(lineS, 10)
-  bw = get_value(lineS, 11)
-
-  dataB[it][0] = sdr
-  dataB[it][1] = adr 
-  dataB[it][4] = "" 
-  dataB[it][5] = compute_tam
-  dataB[it][6] = comm_tam
-  dataB[it][7] = cst
-  dataB[it][8] = css
-  dataB[it][9] = time
-  dataB[it][10] = "" 
-
-  dataA[it][0] = sdr
-  dataA[it][1] = adr 
-  dataA[it][5] = ""
-  dataA[it][6] = compute_tam
-  dataA[it][7] = comm_tam
-  dataA[it][8] = cst
-  dataA[it][9] = css
-  dataA[it][10] = time
-  dataA[it][11] = ""
-
-def record_stage_line(lineS, dataG_it, dataM_it):
-  pt = int(get_value(lineS, 2))
-  t_stage = get_value(lineS, 3)
-  u_bytes = int(get_value(lineS, 4))
-
-  dataG_it[].append(pt)
-  dataG_it[].append(t_stage)
-  dataG_it[].append(u_bytes)
-
-  dataM_it[].append(pt)
-  dataM_it[].append(t_stage)
-  dataM_it[].append(u_bytes)
-  
-def record_resize_line(lineS, dataG_it, dataM_it):
-        iters = int(lineS[2].split('=')[1].split(',')[0])
-        npr = int(lineS[3].split('=')[1].split(',')[0])
-        dist = lineS[5].split('=')[1]
-
-        resizes = resizes - 1
-        if resizes == 0:
-          dataB[it][3] = npr
-          dataB[it][4] += dist
-          dataB[it][10] += str(iters)
-
-          dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
-          dataA[it][2] = str(previousNP) + "," + str(npr)
-          dataA[it][5] += dist
-          dataA[it][11] += str(iters)
-          timer = 4
-        else:
-          dataB[it][2] = npr
-          dataB[it][4] += dist + ","
-          dataB[it][10] += str(iters) + ","
-
-          dataA[it][3] = npr
-          dataA[it][5] += dist + ","
-          dataA[it][11] += str(iters) + ","
-          previousNP = npr
+# stores them for creating a global dataframe
+def record_config_line(lineS, dataG_it):
+  ordered_indexes = [G_enum.TOTAL_RESIZES.value, G_enum.TOTAL_STAGES.value, G_enum.GRANULARITY.value, G_enum.SDR.value, \
+          G_enum.ADR.value, G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, G_emun.SPAWN_STRATEGY.value]
+  offset_line = 2
+  for i in range(len(ordered_indexes)):
+    value = get_value(lineS, i+offset_line)
+    if value.is_integer():
+      value = int(value)
+    index = ordered_indexes[i]
+    dataG_it[index] = value
+
+  dataG_it[G_enum.TOTAL_GROUPS.value] = dataG_it[G_enum.TOTAL_RESIZES.value]
+  dataG_it[G_enum.TOTAL_RESIZES.value] -=1 #FIXME Modificar en App sintetica
+
+  #FIXME Modificar cuando ADR ya no sea un porcentaje
+  dataG_it[G_enum.DR.value] = dataG_it[G_enum.SDR.value] + dataG_it[G_enum.ADR.value]
+
+  # Init lists for each column
+  array_groups = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
+          G_enum.ASYNCH_ITERS.value, G_enum.T_ITER.value, G_enum.T_STAGES.value]
+  array_resizes = [G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, \
+          G_enum.SPAWN_STRATEGY.value, G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, \
+          G_enum.T_SR.value, G_enum.T_AR.value]
+  array_stages = [G_enum.STAGE_TYPES.value, \
+          G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
+  for index in array_groups:
+    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_GROUPS.value]
+
+  for index in array_resizes:
+    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_RESIZES.value]
+
+  for index in array_stages:
+    dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_STAGES.value]
+
+
+
+
+
+#columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \\
+#            "Spawn_Method", "Spawn_Strategy", "Groups", "Dist",   "Stage_Types", "Stage_Times", "Stage_Bytes", \\
+#            "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #24
+#columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
+
+# Obtains the parameters of a stage line 
+# and stores it in the dataframe
+# Is needed to indicate in which stage is
+# being performed
+def record_stage_line(lineS, dataG_it, stage):
+  array_stages = [G_enum.STAGE_TYPES.value, \
+          G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
+  offset_lines = 2
+  for i in range(len(array_stages)):
+    value = get_value(lineS, i+offset_lines)
+    if value.is_integer():
+        value = int(value)
+    index = array_stage[i]
+    dataG_it[index][stage] = value
+
+# Obtains the parameters of a resize line
+# and stores them in the dataframe
+# Is needed to indicate to which group refers
+# the resize line
+def record_resize_line(lineS, dataG_it, group):
+  array_stages = [G_enum.ITERS.value, G_enum.GROUPS.value\
+          G_enum.FACTOR_S.value, G_enum.DIST.value]
+  offset_lines = 2
+  for i in range(len(array_stages)):
+    value = get_value(lineS, i+offset_lines)
+    if value.is_integer():
+        value = int(value)
+    index = array_stage[i]
+    dataG_it[index][group] = value
+
+def record_time_line(lineS, dataG_it):
+  T_names = ["T_spawn:", "T_spawn_real:", "T_SR:", "T_AR:", "T_total:"]
+  T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_TOTAL.value]
+  if not (lineS[0] in T_names): # Execute only if line represents a Time
+      return
+
+  index = T_names.index(linesS[0])
+  offset_lines = 1
+  for i in range(len(dataG_it[index])):
+    value = get_value(lineS, i+offset_lines)
+    dataG_it[index][i] = value

 #-----------------------------------------------
-def read_file(f, dataA, dataB, it):
-  recording = False
+def read_global_file(f, dataA, dataB, it):
  resizes = 0
  timer = 0
  previousNP = 0
@@ -137,51 +138,16 @@ def read_file(f, dataA, dataB, it):

    if len(lineS) > 0:
      if lineS[0] == "Config": # CONFIG LINE
-        recording = True
        it += 1
-        record_config(lineS, dataG, dataM)
+        dataA.append([None]*25)
+        record_config(lineS, dataG[it], dataM[it])

      elif lineS[0] == "Stage":
-          record_stage_line(lineS, dataG, dataM)
+        record_stage_line(lineS, dataG[it], ??)
      elif lineS[0] == "Resize":
-      elif recording and resizes != 0: # RESIZE LINE
-        iters = int(lineS[2].split('=')[1].split(',')[0])
-        npr = int(lineS[3].split('=')[1].split(',')[0])
-        dist = lineS[5].split('=')[1]
-
-        resizes = resizes - 1
-        if resizes == 0:
-          dataB[it][3] = npr
-          dataB[it][4] += dist
-          dataB[it][10] += str(iters)
-
-          dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
-          dataA[it][2] = str(previousNP) + "," + str(npr)
-          dataA[it][5] += dist
-          dataA[it][11] += str(iters)
-          timer = 4
-        else:
-          dataB[it][2] = npr
-          dataB[it][4] += dist + ","
-          dataB[it][10] += str(iters) + ","
-
-          dataA[it][3] = npr
-          dataA[it][5] += dist + ","
-          dataA[it][11] += str(iters) + ","
-          previousNP = npr
-
-      else: # SAVE TIMES
-        if timer == 4:
-          dataB[it][11] = float(lineS[1])
-        elif timer == 3:
-          dataB[it][12] = float(lineS[1])
-        elif timer == 2:
-          dataB[it][13] = float(lineS[1])
-        elif timer == 1:
-          dataB[it][14] = float(lineS[1])
-        else:
-          dataA[it][12] = float(lineS[1])
-        timer = timer - 1
+        record_resize_line(lineS, dataG[it], ??)
+      elif lineS[0] in T_names:
+        dataG[it][]
          
  return it
 #columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8