Commit 8ca50713 authored by iker_martin's avatar iker_martin
Browse files

Refactor de recogida de datos. WIP

parent 04e2f90a
......@@ -2,48 +2,38 @@ import sys
import glob
import numpy as np
import pandas as pd
def getData(lineS, outData, tp, hasIter = False):
for data in lineS:
k_v = data.split('=')
if k_v[0] == "time":
time = float(k_v[1])
elif k_v[0] == "iters" and hasIter:
iters = int(k_v[1])
outData[tp] = time
if hasIter:
outData[tp+1] = iters
#-----------------------------------------------
def record(f, observation, line):
# Record first line - General info
lineS = line.split()
for j in range(1,7):
observation[j] = int(lineS[j].split('=')[1])
# Record procces number
line = next(f)
lineS = line.split()
j = 7
for key_values in lineS:
k_v = key_values.split('=')
observation[j] = int(k_v[1])
j+=1
# Record data
j = 9
for j in range(9, 13):
line = next(f)
lineS = line.split()
getData(lineS, observation, j)
line = next(f)
lineS = line.split()
#if observation[0] == "A":
getData(lineS, observation, 13, True)
#else:
# getData(lineS, observation, 13)
from enum import Enum
class G_enum(Enum):
TOTAL_RESIZES = 0
TOTAL_GROUPS = 1
TOTAL_STAGES = 2
GRANULARITY = 3
SDR = 4
ADR = 5
DR = 6
ASYNCH_REDISTRIBUTION_TYPE = 7
SPAWN_METHOD = 8
SPAWN_STRATEGY = 9
GROUPS = 10
FACTOR_S = 11
DIST = 12
STAGE_TYPES = 13
STAGE_TIMES = 14
STAGE_BYTES = 15
ITERS = 16
ASYNCH_ITERS = 17
T_ITER = 18
T_STAGES = 19
T_SPAWN = 20
T_SPAWN_REAL = 21
T_SR = 22
T_AR = 23
T_TOTAL = 24
columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \
"Spawn_Method", "Spawn_Strategy", "Groups", "Factor_S", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \
"Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #25
# Obtains the value of a given index in a splited line
# and returns it as a float values
......@@ -51,83 +41,94 @@ def get_value(line, index):
return float(line[index].split('=')[1].split(',')[0])
# Obtains the general parameters of an execution and
# stores them for creating a dataframe
def record_config_line(lineS, dataA, dataB):
dataA.append([None]*13)
dataB.append([None]*15)
resizes = int(get_value(lineS, 2))
stages = int(get_value(lineS, 3))
compute_tam = int(get_value(lineS, 4))
sdr = int(get_value(lineS, 5))
adr = int(get_value(lineS, 6)) #TODO Que lo tome como porcentaje
at = int(get_value(lineS, 7))
sm = int(get_value(lineS, 8))
ss = int(get_value(lineS, 9))
latency = get_value(lineS, 10)
bw = get_value(lineS, 11)
dataB[it][0] = sdr
dataB[it][1] = adr
dataB[it][4] = ""
dataB[it][5] = compute_tam
dataB[it][6] = comm_tam
dataB[it][7] = cst
dataB[it][8] = css
dataB[it][9] = time
dataB[it][10] = ""
dataA[it][0] = sdr
dataA[it][1] = adr
dataA[it][5] = ""
dataA[it][6] = compute_tam
dataA[it][7] = comm_tam
dataA[it][8] = cst
dataA[it][9] = css
dataA[it][10] = time
dataA[it][11] = ""
def record_stage_line(lineS, dataG_it, dataM_it):
pt = int(get_value(lineS, 2))
t_stage = get_value(lineS, 3)
u_bytes = int(get_value(lineS, 4))
dataG_it[].append(pt)
dataG_it[].append(t_stage)
dataG_it[].append(u_bytes)
dataM_it[].append(pt)
dataM_it[].append(t_stage)
dataM_it[].append(u_bytes)
def record_resize_line(lineS, dataG_it, dataM_it):
iters = int(lineS[2].split('=')[1].split(',')[0])
npr = int(lineS[3].split('=')[1].split(',')[0])
dist = lineS[5].split('=')[1]
resizes = resizes - 1
if resizes == 0:
dataB[it][3] = npr
dataB[it][4] += dist
dataB[it][10] += str(iters)
dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
dataA[it][2] = str(previousNP) + "," + str(npr)
dataA[it][5] += dist
dataA[it][11] += str(iters)
timer = 4
else:
dataB[it][2] = npr
dataB[it][4] += dist + ","
dataB[it][10] += str(iters) + ","
dataA[it][3] = npr
dataA[it][5] += dist + ","
dataA[it][11] += str(iters) + ","
previousNP = npr
# stores them for creating a global dataframe
def record_config_line(lineS, dataG_it):
ordered_indexes = [G_enum.TOTAL_RESIZES.value, G_enum.TOTAL_STAGES.value, G_enum.GRANULARITY.value, G_enum.SDR.value, \
G_enum.ADR.value, G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, G_emun.SPAWN_STRATEGY.value]
offset_line = 2
for i in range(len(ordered_indexes)):
value = get_value(lineS, i+offset_line)
if value.is_integer():
value = int(value)
index = ordered_indexes[i]
dataG_it[index] = value
dataG_it[G_enum.TOTAL_GROUPS.value] = dataG_it[G_enum.TOTAL_RESIZES.value]
dataG_it[G_enum.TOTAL_RESIZES.value] -=1 #FIXME Modificar en App sintetica
#FIXME Modificar cuando ADR ya no sea un porcentaje
dataG_it[G_enum.DR.value] = dataG_it[G_enum.SDR.value] + dataG_it[G_enum.ADR.value]
# Init lists for each column
array_groups = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
G_enum.ASYNCH_ITERS.value, G_enum.T_ITER.value, G_enum.T_STAGES.value]
array_resizes = [G_enum.ASYNCH_REDISTRIBUTION_TYPE.value, G_enum.SPAWN_METHOD.value, \
G_enum.SPAWN_STRATEGY.value, G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, \
G_enum.T_SR.value, G_enum.T_AR.value]
array_stages = [G_enum.STAGE_TYPES.value, \
G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
for index in array_groups:
dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_GROUPS.value]
for index in array_resizes:
dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_RESIZES.value]
for index in array_stages:
dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_STAGES.value]
#columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \\
# "Spawn_Method", "Spawn_Strategy", "Groups", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \\
# "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #24
#columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
# Obtains the parameters of a stage line
# and stores it in the dataframe
# Is needed to indicate in which stage is
# being performed
def record_stage_line(lineS, dataG_it, stage):
array_stages = [G_enum.STAGE_TYPES.value, \
G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
offset_lines = 2
for i in range(len(array_stages)):
value = get_value(lineS, i+offset_lines)
if value.is_integer():
value = int(value)
index = array_stage[i]
dataG_it[index][stage] = value
# Obtains the parameters of a resize line
# and stores them in the dataframe
# Is needed to indicate to which group refers
# the resize line
def record_resize_line(lineS, dataG_it, group):
array_stages = [G_enum.ITERS.value, G_enum.GROUPS.value\
G_enum.FACTOR_S.value, G_enum.DIST.value]
offset_lines = 2
for i in range(len(array_stages)):
value = get_value(lineS, i+offset_lines)
if value.is_integer():
value = int(value)
index = array_stage[i]
dataG_it[index][group] = value
def record_time_line(lineS, dataG_it):
T_names = ["T_spawn:", "T_spawn_real:", "T_SR:", "T_AR:", "T_total:"]
T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_TOTAL.value]
if not (lineS[0] in T_names): # Execute only if line represents a Time
return
index = T_names.index(linesS[0])
offset_lines = 1
for i in range(len(dataG_it[index])):
value = get_value(lineS, i+offset_lines)
dataG_it[index][i] = value
#-----------------------------------------------
def read_file(f, dataA, dataB, it):
recording = False
def read_global_file(f, dataA, dataB, it):
resizes = 0
timer = 0
previousNP = 0
......@@ -137,51 +138,16 @@ def read_file(f, dataA, dataB, it):
if len(lineS) > 0:
if lineS[0] == "Config": # CONFIG LINE
recording = True
it += 1
record_config(lineS, dataG, dataM)
dataA.append([None]*25)
record_config(lineS, dataG[it], dataM[it])
elif lineS[0] == "Stage":
record_stage_line(lineS, dataG, dataM)
record_stage_line(lineS, dataG[it], ??)
elif lineS[0] == "Resize":
elif recording and resizes != 0: # RESIZE LINE
iters = int(lineS[2].split('=')[1].split(',')[0])
npr = int(lineS[3].split('=')[1].split(',')[0])
dist = lineS[5].split('=')[1]
resizes = resizes - 1
if resizes == 0:
dataB[it][3] = npr
dataB[it][4] += dist
dataB[it][10] += str(iters)
dataA[it][4] = npr #FIXME No sera correcta si hay mas de una reconfig
dataA[it][2] = str(previousNP) + "," + str(npr)
dataA[it][5] += dist
dataA[it][11] += str(iters)
timer = 4
else:
dataB[it][2] = npr
dataB[it][4] += dist + ","
dataB[it][10] += str(iters) + ","
dataA[it][3] = npr
dataA[it][5] += dist + ","
dataA[it][11] += str(iters) + ","
previousNP = npr
else: # SAVE TIMES
if timer == 4:
dataB[it][11] = float(lineS[1])
elif timer == 3:
dataB[it][12] = float(lineS[1])
elif timer == 2:
dataB[it][13] = float(lineS[1])
elif timer == 1:
dataB[it][14] = float(lineS[1])
else:
dataA[it][12] = float(lineS[1])
timer = timer - 1
record_resize_line(lineS, dataG[it], ??)
elif lineS[0] in T_names:
dataG[it][]
return it
#columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment