Commit 8ca50713 authored by iker_martin's avatar iker_martin
Browse files

Refactor de recogida de datos. WIP

parent 04e2f90a
...@@ -2,48 +2,38 @@ import sys ...@@ -2,48 +2,38 @@ import sys
import glob import glob
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from enum import Enum
def getData(lineS, outData, tp, hasIter = False):
for data in lineS: class G_enum(Enum):
k_v = data.split('=') TOTAL_RESIZES = 0
if k_v[0] == "time": TOTAL_GROUPS = 1
time = float(k_v[1]) TOTAL_STAGES = 2
elif k_v[0] == "iters" and hasIter: GRANULARITY = 3
iters = int(k_v[1]) SDR = 4
ADR = 5
outData[tp] = time DR = 6
if hasIter: ASYNCH_REDISTRIBUTION_TYPE = 7
outData[tp+1] = iters SPAWN_METHOD = 8
SPAWN_STRATEGY = 9
#----------------------------------------------- GROUPS = 10
def record(f, observation, line): FACTOR_S = 11
# Record first line - General info DIST = 12
lineS = line.split() STAGE_TYPES = 13
for j in range(1,7): STAGE_TIMES = 14
observation[j] = int(lineS[j].split('=')[1]) STAGE_BYTES = 15
ITERS = 16
# Record procces number ASYNCH_ITERS = 17
line = next(f) T_ITER = 18
lineS = line.split() T_STAGES = 19
j = 7 T_SPAWN = 20
for key_values in lineS: T_SPAWN_REAL = 21
k_v = key_values.split('=') T_SR = 22
observation[j] = int(k_v[1]) T_AR = 23
j+=1 T_TOTAL = 24
# Record data columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \
j = 9 "Spawn_Method", "Spawn_Strategy", "Groups", "Factor_S", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \
for j in range(9, 13): "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #25
line = next(f)
lineS = line.split()
getData(lineS, observation, j)
line = next(f)
lineS = line.split()
#if observation[0] == "A":
getData(lineS, observation, 13, True)
#else:
# getData(lineS, observation, 13)
# Obtains the value of a given index in a splited line # Obtains the value of a given index in a splited line
# and returns it as a float values # and returns it as a float values
...@@ -51,83 +41,94 @@ def get_value(line, index): ...@@ -51,83 +41,94 @@ def get_value(line, index):
return float(line[index].split('=')[1].split(',')[0]) return float(line[index].split('=')[1].split(',')[0])
# Obtains the general parameters of an execution and # Obtains the general parameters of an execution and
# stores them for creating a dataframe # stores them for creating a global dataframe
def record_config_line(lineS, dataA, dataB): def record_config_line(lineS, dataG_it):
dataA.append([None]*13) ordered_indexes = [G_enum.TOTAL_RESIZES.value, G_enum.TOTAL_STAGES.value, G_enum.GRANULARITY.value, G_enum.SDR.value, \
dataB.append([None]*15) G_enum.ADR.value, G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, G_emun.SPAWN_STRATEGY.value]
resizes = int(get_value(lineS, 2)) offset_line = 2
stages = int(get_value(lineS, 3)) for i in range(len(ordered_indexes)):
compute_tam = int(get_value(lineS, 4)) value = get_value(lineS, i+offset_line)
sdr = int(get_value(lineS, 5)) if value.is_integer():
adr = int(get_value(lineS, 6)) #TODO Que lo tome como porcentaje value = int(value)
at = int(get_value(lineS, 7)) index = ordered_indexes[i]
sm = int(get_value(lineS, 8)) dataG_it[index] = value
ss = int(get_value(lineS, 9))
latency = get_value(lineS, 10) dataG_it[G_enum.TOTAL_GROUPS.value] = dataG_it[G_enum.TOTAL_RESIZES.value]
bw = get_value(lineS, 11) dataG_it[G_enum.TOTAL_RESIZES.value] -=1 #FIXME Modificar en App sintetica
dataB[it][0] = sdr #FIXME Modificar cuando ADR ya no sea un porcentaje
dataB[it][1] = adr dataG_it[G_enum.DR.value] = dataG_it[G_enum.SDR.value] + dataG_it[G_enum.ADR.value]
dataB[it][4] = ""
dataB[it][5] = compute_tam # Init lists for each column
dataB[it][6] = comm_tam array_groups = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
dataB[it][7] = cst G_enum.ASYNCH_ITERS.value, G_enum.T_ITER.value, G_enum.T_STAGES.value]
dataB[it][8] = css array_resizes = [G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, \
dataB[it][9] = time G_enum.SPAWN_STRATEGY.value, G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, \
dataB[it][10] = "" G_enum.T_SR.value, G_enum.T_AR.value]
array_stages = [G_enum.STAGE_TYPES.value, \
dataA[it][0] = sdr G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
dataA[it][1] = adr for index in array_groups:
dataA[it][5] = "" dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_GROUPS.value]
dataA[it][6] = compute_tam
dataA[it][7] = comm_tam for index in array_resizes:
dataA[it][8] = cst dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_RESIZES.value]
dataA[it][9] = css
dataA[it][10] = time for index in array_stages:
dataA[it][11] = "" dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_STAGES.value]
def record_stage_line(lineS, dataG_it, dataM_it):
pt = int(get_value(lineS, 2))
t_stage = get_value(lineS, 3)
u_bytes = int(get_value(lineS, 4))
#columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \\
dataG_it[].append(pt) # "Spawn_Method", "Spawn_Strategy", "Groups", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \\
dataG_it[].append(t_stage) # "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #24
dataG_it[].append(u_bytes) #columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
dataM_it[].append(pt) # Obtains the parameters of a stage line
dataM_it[].append(t_stage) # and stores it in the dataframe
dataM_it[].append(u_bytes) # Is needed to indicate in which stage is
# being performed
def record_resize_line(lineS, dataG_it, dataM_it): def record_stage_line(lineS, dataG_it, stage):
iters = int(lineS[2].split('=')[1].split(',')[0]) array_stages = [G_enum.STAGE_TYPES.value, \
npr = int(lineS[3].split('=')[1].split(',')[0]) G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
dist = lineS[5].split('=')[1] offset_lines = 2
for i in range(len(array_stages)):
resizes = resizes - 1 value = get_value(lineS, i+offset_lines)
if resizes == 0: if value.is_integer():
dataB[it][3] = npr value = int(value)
dataB[it][4] += dist index = array_stage[i]
dataB[it][10] += str(iters) dataG_it[index][stage] = value
dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig # Obtains the parameters of a resize line
dataA[it][2] = str(previousNP) + "," + str(npr) # and stores them in the dataframe
dataA[it][5] += dist # Is needed to indicate to which group refers
dataA[it][11] += str(iters) # the resize line
timer = 4 def record_resize_line(lineS, dataG_it, group):
else: array_stages = [G_enum.ITERS.value, G_enum.GROUPS.value\
dataB[it][2] = npr G_enum.FACTOR_S.value, G_enum.DIST.value]
dataB[it][4] += dist + "," offset_lines = 2
dataB[it][10] += str(iters) + "," for i in range(len(array_stages)):
value = get_value(lineS, i+offset_lines)
dataA[it][3] = npr if value.is_integer():
dataA[it][5] += dist + "," value = int(value)
dataA[it][11] += str(iters) + "," index = array_stage[i]
previousNP = npr dataG_it[index][group] = value
def record_time_line(lineS, dataG_it):
T_names = ["T_spawn:", "T_spawn_real:", "T_SR:", "T_AR:", "T_total:"]
T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_TOTAL.value]
if not (lineS[0] in T_names): # Execute only if line represents a Time
return
index = T_names.index(linesS[0])
offset_lines = 1
for i in range(len(dataG_it[index])):
value = get_value(lineS, i+offset_lines)
dataG_it[index][i] = value
#----------------------------------------------- #-----------------------------------------------
def read_file(f, dataA, dataB, it): def read_global_file(f, dataA, dataB, it):
recording = False
resizes = 0 resizes = 0
timer = 0 timer = 0
previousNP = 0 previousNP = 0
...@@ -137,51 +138,16 @@ def read_file(f, dataA, dataB, it): ...@@ -137,51 +138,16 @@ def read_file(f, dataA, dataB, it):
if len(lineS) > 0: if len(lineS) > 0:
if lineS[0] == "Config": # CONFIG LINE if lineS[0] == "Config": # CONFIG LINE
recording = True
it += 1 it += 1
record_config(lineS, dataG, dataM) dataA.append([None]*25)
record_config(lineS, dataG[it], dataM[it])
elif lineS[0] == "Stage": elif lineS[0] == "Stage":
record_stage_line(lineS, dataG, dataM) record_stage_line(lineS, dataG[it], ??)
elif lineS[0] == "Resize": elif lineS[0] == "Resize":
elif recording and resizes != 0: # RESIZE LINE record_resize_line(lineS, dataG[it], ??)
iters = int(lineS[2].split('=')[1].split(',')[0]) elif lineS[0] in T_names:
npr = int(lineS[3].split('=')[1].split(',')[0]) dataG[it][]
dist = lineS[5].split('=')[1]
resizes = resizes - 1
if resizes == 0:
dataB[it][3] = npr
dataB[it][4] += dist
dataB[it][10] += str(iters)
dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
dataA[it][2] = str(previousNP) + "," + str(npr)
dataA[it][5] += dist
dataA[it][11] += str(iters)
timer = 4
else:
dataB[it][2] = npr
dataB[it][4] += dist + ","
dataB[it][10] += str(iters) + ","
dataA[it][3] = npr
dataA[it][5] += dist + ","
dataA[it][11] += str(iters) + ","
previousNP = npr
else: # SAVE TIMES
if timer == 4:
dataB[it][11] = float(lineS[1])
elif timer == 3:
dataB[it][12] = float(lineS[1])
elif timer == 2:
dataB[it][13] = float(lineS[1])
elif timer == 1:
dataB[it][14] = float(lineS[1])
else:
dataA[it][12] = float(lineS[1])
timer = timer - 1
return it return it
#columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8 #columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment