Commit ec2bcc2a authored by Iker Martín Álvarez's avatar Iker Martín Álvarez
Browse files

Refactored collecters to a single one which captures everything in a single...

Refactored collecters to a single one which captures everything in a single DF. iterTimes.py has not been eliminated yet
parent fd7ac130
...@@ -41,19 +41,24 @@ columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR ...@@ -41,19 +41,24 @@ columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR
"Redistribution_Strategy", "Spawn_Method", "Spawn_Strategy", "Groups", "FactorS", "Dist", "Stage_Types", "Stage_Times", \ "Redistribution_Strategy", "Spawn_Method", "Spawn_Strategy", "Groups", "FactorS", "Dist", "Stage_Types", "Stage_Times", \
"Stage_Bytes", "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #26 "Stage_Bytes", "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #26
columnsM = ["NP", "NC", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Redistribution_Method", \ #-----------------------------------------------
"Redistribution_Strategy", "Spawn_Method", "Spawn_Strategy", "FactorS", "Dist", "Stage_Types", "Stage_Times", \
"Stage_Bytes", "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR"] #24
# Obtains the value of a given index in a splited line # Obtains the value of a given index in a splited line
# and returns it as a float values if possible, string otherwise # and returns it as a float values if possible, string otherwise
def get_value(line, index): def get_value(line, index, separator=True):
value = line[index].split('=')[1].split(',')[0] if separator:
value = line[index].split('=')[1].split(',')[0]
else:
value = line[index]
try: try:
return float(value) value = float(value)
if value.is_integer():
value = int(value)
except ValueError: except ValueError:
return value return value
return value
#-----------------------------------------------
# Obtains the general parameters of an execution and # Obtains the general parameters of an execution and
# stores them for creating a global dataframe # stores them for creating a global dataframe
def record_config_line(lineS, dataG_it): def record_config_line(lineS, dataG_it):
...@@ -62,8 +67,6 @@ def record_config_line(lineS, dataG_it): ...@@ -62,8 +67,6 @@ def record_config_line(lineS, dataG_it):
offset_line = 2 offset_line = 2
for i in range(len(ordered_indexes)): for i in range(len(ordered_indexes)):
value = get_value(lineS, i+offset_line) value = get_value(lineS, i+offset_line)
if value.is_integer():
value = int(value)
index = ordered_indexes[i] index = ordered_indexes[i]
dataG_it[index] = value dataG_it[index] = value
...@@ -74,14 +77,15 @@ def record_config_line(lineS, dataG_it): ...@@ -74,14 +77,15 @@ def record_config_line(lineS, dataG_it):
# Init lists for each column # Init lists for each column
array_groups = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \ array_groups = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
G_enum.ASYNCH_ITERS.value, G_enum.T_ITER.value, G_enum.T_STAGES.value] G_enum.ASYNCH_ITERS.value, G_enum.T_ITER.value, G_enum.T_STAGES.value, G_enum.RED_METHOD.value, \
array_resizes = [G_enum.REDISTRIBUTION_METHOD.value, G_enum.REDISTRIBUTION_METHOD.value, G_enum.RED_STRATEGY.value, G_enum.SPAWN_METHOD.value, G_enum.SPAWN_STRATEGY.value,]
G_enum.SPAWN_METHOD.value, G_enum.SPAWN_STRATEGY.value, G_enum.T_SPAWN.value, \ array_resizes = [ G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value]
G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value]
array_stages = [G_enum.STAGE_TYPES.value, \ array_stages = [G_enum.STAGE_TYPES.value, \
G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value] G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
for index in array_groups: for index in array_groups:
dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_GROUPS.value] dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_GROUPS.value]
for group in range(dataG_it[G_enum.TOTAL_GROUPS.value]): #FIXME Modificar orden, Async Iters antes que T_iter. Asi es posible descubrir el tamanyo al crearlo
dataG_it[G_enum.T_ITER.value][group] = []
for index in array_resizes: for index in array_resizes:
dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_RESIZES.value] dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_RESIZES.value]
...@@ -89,6 +93,7 @@ def record_config_line(lineS, dataG_it): ...@@ -89,6 +93,7 @@ def record_config_line(lineS, dataG_it):
for index in array_stages: for index in array_stages:
dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_STAGES.value] dataG_it[index] = [None]*dataG_it[G_enum.TOTAL_STAGES.value]
#-----------------------------------------------
# Obtains the parameters of a stage line # Obtains the parameters of a stage line
# and stores it in the dataframe # and stores it in the dataframe
# Is needed to indicate in which stage is # Is needed to indicate in which stage is
...@@ -99,116 +104,121 @@ def record_stage_line(lineS, dataG_it, stage): ...@@ -99,116 +104,121 @@ def record_stage_line(lineS, dataG_it, stage):
offset_lines = 2 offset_lines = 2
for i in range(len(array_stages)): for i in range(len(array_stages)):
value = get_value(lineS, i+offset_lines) value = get_value(lineS, i+offset_lines)
if value.is_integer():
value = int(value)
index = array_stages[i] index = array_stages[i]
dataG_it[index][stage] = value dataG_it[index][stage] = value
#-----------------------------------------------
# Obtains the parameters of a resize line # Obtains the parameters of a resize line
# and stores them in the dataframe # and stores them in the dataframe
# Is needed to indicate to which group refers # Is needed to indicate to which group refers
# the resize line # the resize line
def record_resize_line(lineS, dataG_it, group): def record_group_line(lineS, dataG_it, group):
array_groups = [G_enum.ITERS.value, G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, \ array_groups = [G_enum.ITERS.value, G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, \
G_enum.REDISTRIBUTION_METHOD.value, G_enum.REDISTRIBUTION_STRATEGY.value, G_enum.SPAWN_METHOD.value, G_enum.SPAWN_STRATEGY.value] G_enum.RED_METHOD.value, G_enum.RED_STRATEGY.value, G_enum.SPAWN_METHOD.value, G_enum.SPAWN_STRATEGY.value]
offset_lines = 2 offset_lines = 2
for i in range(len(array_groups)): for i in range(len(array_groups)):
value = get_value(lineS, i+offset_lines) value = get_value(lineS, i+offset_lines)
if value.is_integer(): index = array_groups[i]
value = int(value)
index = array_stage[i]
dataG_it[index][group] = value dataG_it[index][group] = value
#-----------------------------------------------
def record_time_line(lineS, dataG_it): def record_time_line(lineS, dataG_it):
T_names = ["T_spawn:", "T_spawn_real:", "T_SR:", "T_AR:", "T_total:"] T_names = ["T_spawn:", "T_spawn_real:", "T_SR:", "T_AR:", "T_total:"]
T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_TOTAL.value] T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_TOTAL.value]
if not (lineS[0] in T_names): # Execute only if line represents a Time if not (lineS[0] in T_names): # Execute only if line represents a Time
return return
index = T_names.index(linesS[0]) index = T_names.index(lineS[0])
index = T_values[index] index = T_values[index]
offset_lines = 1 offset_lines = 1
for i in range(len(dataG_it[index])):
dataG_it[index][i] = get_value(lineS, i+offset_lines)
def record_multiple_times_line(lineS, dataG_it, ): len_index = 1
T_values = [G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, G_enum.T_AR.value, G_enum.T_TOTAL.value] if dataG_it[index] != None:
len_index = len(dataG_it[index])
for i in range(len_index):
dataG_it[index][i] = get_value(lineS, i+offset_lines, False)
else:
dataG_it[index] = get_value(lineS, offset_lines, False)
#-----------------------------------------------
def record_multiple_times_line(lineS, dataG_it, group):
T_names = ["T_iter:", "T_stage"]
T_values = [G_enum.T_ITER.value, G_enum.T_STAGES.value]
if not (lineS[0] in T_names): # Execute only if line represents a Time if not (lineS[0] in T_names): # Execute only if line represents a Time
return return
groups = dataG_it[G_enum.TOTAL_GROUPS.value] index = T_names.index(lineS[0])
index = T_names.index(linesS[0])
index = T_values[index] index = T_values[index]
offset_lines = 1 offset_lines = 1
for i in range(len(dataG_it[index])): if index == G_enum.T_STAGES.value:
offset_lines += 1
total_iters = len(lineS)-offset_lines
stage = int(lineS[1].split(":")[0])
if stage == 0:
dataG_it[index][group] = [None] * total_iters
for i in range(total_iters):
dataG_it[index][group][i] = [None] * dataG_it[G_enum.TOTAL_STAGES.value]
for i in range(total_iters):
dataG_it[index][group][i][stage] = get_value(lineS, i+offset_lines, False)
else:
total_iters = len(lineS)-offset_lines
for i in range(total_iters): #FIXME Modificar orden T_iter y Async_iters. Crear lista de total_iters aqui
dataG_it[index][group].append(get_value(lineS, i+offset_lines, False))
#----------------------------------------------- #-----------------------------------------------
def read_local_file(f, dataG, it): def read_local_file(f, dataG, it, runs_in_file):
resizes = 0 offset = 0
timer = 0 real_it = 0
previousNP = 0 group = 0
for line in f: for line in f:
lineS = line.split() lineS = line.split()
if len(lineS) > 0: if len(lineS) > 0:
if lineS[0] == "Config": # CONFIG LINE if lineS[0] == "Group": # GROUP number
it += 1 offset += 1
record_config(lineS, dataG[it], dataM[it]) real_it = it - (runs_in_file-offset)
resize = 0 group = int(lineS[1].split(":")[0])
stage = 0 if lineS[0] == "Async_Iters:":
offset_lines = 1
elif lineS[0] == "Stage": dataG[real_it][G_enum.ASYNCH_ITERS.value][group] = get_value(lineS, offset_lines, False)
record_stage_line(lineS, dataG[it], stage)
stage+=1
elif lineS[0] == "Resize":
record_resize_line(lineS, dataG[it], resize)
resize+=1
elif lineS[0] == "T_total:":
value = get_value(lineS, 1)
dataG[it][G_enum.T_TOTAL.value] = value
else: else:
record_time_line(lineS, dataG[it]) record_multiple_times_line(lineS, dataG[real_it], group)
return it
#----------------------------------------------- #-----------------------------------------------
def read_global_file(f, dataG, it): def read_global_file(f, dataG, it):
run = -1 runs_in_file=0
for line in f: for line in f:
lineS = line.split() lineS = line.split()
if len(lineS) > 0: if len(lineS) > 0:
if lineS[0] == "Config": # CONFIG LINE if lineS[0] == "Config": # CONFIG LINE
it += 1 it += 1
nonlocal columnsG runs_in_file += 1
dataG.append([None]*len(columnsG)) group = 0
record_config(lineS, dataG[it])
resize = 0
stage = 0 stage = 0
run += 1
dataG.append([None]*len(columnsG))
record_config_line(lineS, dataG[it])
elif lineS[0] == "Stage": elif lineS[0] == "Stage":
record_stage_line(lineS, dataG[it], stage) record_stage_line(lineS, dataG[it], stage)
stage+=1 stage+=1
elif lineS[0] == "Resize": elif lineS[0] == "Group":
record_resize_line(lineS, dataG[it], resize) record_group_line(lineS, dataG[it], group)
resize+=1 group+=1
else: else:
record_time_line(lineS, dataG[it]) record_time_line(lineS, dataG[it])
return it,runs_in_file
read_local_file(dataG[it])
return it
#----------------------------------------------- #-----------------------------------------------
if len(sys.argv) < 2: if len(sys.argv) < 2:
print("The files name is missing\nUsage: python3 MallTimes.py resultsName directory csvOutName") print("The files name is missing\nUsage: python3 MallTimes.py resultsName directory csvOutName")
exit(1) exit(1)
common_name = sys.argv[1]
if len(sys.argv) >= 3: if len(sys.argv) >= 3:
BaseDir = sys.argv[2] BaseDir = sys.argv[2]
print("Searching in directory: "+ BaseDir) print("Searching in directory: "+ BaseDir)
...@@ -222,28 +232,34 @@ else: ...@@ -222,28 +232,34 @@ else:
print("Csv name will be: " + name + "G.csv & " + name + "M.csv") print("Csv name will be: " + name + "G.csv & " + name + "M.csv")
insideDir = "Run" insideDir = "Run"
lista = glob.glob("./" + BaseDir + insideDir + "*/" + sys.argv[1]+ "*Global.o*") lista = glob.glob(BaseDir + insideDir + "*/" + common_name + "*_Global.out")
lista += (glob.glob("./" + BaseDir + sys.argv[1]+ "*Global.o*")) # Se utiliza cuando solo hay un nivel de directorios lista += (glob.glob(BaseDir + common_name + "*_Global.out")) # Se utiliza cuando solo hay un nivel de directorios
print("Number of files found: "+ str(len(lista))); print("Number of files found: "+ str(len(lista)));
it = -1 it = -1
dataG = [] dataG = []
dataM = []
columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
columnsM = ["N", "%Async", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TC", "TH", "TS", "TA"] #15
for elem in lista: for elem in lista:
f = open(elem, "r") f = open(elem, "r")
it = read_file(f, dataG, dataM, it) id_run = elem.split("_Global.out")[0].split(common_name)[1]
path_to_run = elem.split(common_name)[0]
lista_local = glob.glob(path_to_run + common_name + id_run + "_G*NP*.out")
it,runs_in_file = read_global_file(f, dataG, it)
f.close() f.close()
for elem_local in lista_local:
f_local = open(elem_local, "r")
read_local_file(f_local, dataG, it, runs_in_file)
f_local.close()
#print(data)
dfG = pd.DataFrame(dataG, columns=columnsG) dfG = pd.DataFrame(dataG, columns=columnsG)
dfG.to_csv(name + 'G.csv') dfG.to_csv(name + 'G.csv')
dfG.to_excel(name + 'G.xlsx')
dfM = pd.DataFrame(dataM, columns=columnsM) #dfM = pd.DataFrame(dataM, columns=columnsM)
#Poner en TC el valor real y en TH el necesario para la app #Poner en TC el valor real y en TH el necesario para la app
cond = dfM.TH != 0 #cond = dfM.TH != 0
dfM.loc[cond, ['TC', 'TH']] = dfM.loc[cond, ['TH', 'TC']].values #dfM.loc[cond, ['TC', 'TH']] = dfM.loc[cond, ['TH', 'TC']].values
dfM.to_csv(name + 'M.csv') #dfM.to_csv(name + 'M.csv')
...@@ -3,18 +3,16 @@ import glob ...@@ -3,18 +3,16 @@ import glob
import numpy as numpy import numpy as numpy
import pandas as pd import pandas as pd
if len(sys.argv) < 3: if len(sys.argv) < 3:
print("The files name is missing\nUsage: python3 joinDf.py resultsName1.csv resultsName2.csv csvOutName") print("The files name is missing\nUsage: python3 joinDf.py resultsName1.csv resultsName2.csv csvOutName")
exit(1) exit(1)
if len(sys.argv) >= 4: if len(sys.argv) >= 4:
print("Csv name will be: " + sys.argv[3] + ".csv")
name = sys.argv[3] name = sys.argv[3]
else: else:
name = "dataJOINED" name = "dataJOINED"
print("Csv name will be: " + name + ".csv")
df1 = pd.read_csv( sys.argv[1] ) df1 = pd.read_csv( sys.argv[1] )
df2 = pd.read_csv( sys.argv[2] ) df2 = pd.read_csv( sys.argv[2] )
frames = [df1, df2] frames = [df1, df2]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment