Commit fe94510f authored by Iker Martín Álvarez's avatar Iker Martín Álvarez
Browse files

Refactor in recollector to use tuples instead of lists. Other minor changes in data gatherer codes.

parent c3f4d98c
......@@ -87,7 +87,7 @@ def create_resize_dataframe(dfG, dataM):
#-----------------------------------------------
if len(sys.argv) < 2:
print("The files name is missing\nUsage: python3 CreateResizeDataframe.py input_file.csv output_name.csv")
print("The files name is missing\nUsage: python3 CreateResizeDataframe.py input_file.pkl output_name")
exit(1)
input_name = sys.argv[1]
......@@ -95,7 +95,7 @@ if len(sys.argv) > 2:
name = sys.argv[2]
else:
name = "dataM"
print("Csv name will be: " + name + ".pkl")
print("File name will be: " + name + ".pkl")
dfG = pd.read_pickle(input_name)
......
......@@ -213,8 +213,38 @@ def read_global_file(f, dataG, it):
return it,runs_in_file
#-----------------------------------------------
#-----------------------------------------------
def convert_to_tuples(dfG):
array_list_items = [G_enum.GROUPS.value, G_enum.FACTOR_S.value, G_enum.DIST.value, G_enum.ITERS.value, \
G_enum.ASYNCH_ITERS.value, G_enum.RED_METHOD.value, G_enum.RED_STRATEGY.value, G_enum.SPAWN_METHOD.value, \
G_enum.SPAWN_STRATEGY.value, G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, \
G_enum.T_AR.value, G_enum.STAGE_TYPES.value, G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
array_multiple_list_items = [G_enum.T_ITER.value, G_enum.T_STAGES.value]
for item in array_list_items:
name = columnsG[item]
values = dfG[name].copy()
for index in range(len(values)):
values[index] = tuple(values[index])
dfG[name] = values
for item in array_multiple_list_items:
name = columnsG[item]
values = dfG[name].copy()
for i in range(len(values)):
for j in range(len(values[i])):
if(type(values[i][j][0]) == list):
for r in range(len(values[i][j])):
values[i][j][r] = tuple(values[i][j][r])
values[i][j] = tuple(values[i][j])
values[i] = tuple(values[i])
dfG[name] = values
#-----------------------------------------------
if len(sys.argv) < 2:
print("The files name is missing\nUsage: python3 MallTimes.py resultsName directory csvOutName")
print("The files name is missing\nUsage: python3 MallTimes.py commonName directory OutName")
exit(1)
common_name = sys.argv[1]
......@@ -228,7 +258,7 @@ if len(sys.argv) >= 4:
name = sys.argv[3]
else:
name = "data"
print("Csv name will be: " + name + "G.csv & " + name + "M.csv")
print("File name will be: " + name + "G.pkl")
insideDir = "Run"
lista = glob.glob(BaseDir + insideDir + "*/" + common_name + "*_Global.out")
......@@ -253,6 +283,8 @@ for elem in lista:
dfG = pd.DataFrame(dataG, columns=columnsG)
convert_to_tuples(dfG)
print(dfG)
dfG.to_pickle(name + 'G.pkl')
#dfM = pd.DataFrame(dataM, columns=columnsM)
......
......@@ -4,7 +4,7 @@ import numpy as numpy
import pandas as pd
if len(sys.argv) < 3:
print("The files name is missing\nUsage: python3 joinDf.py resultsName1.csv resultsName2.csv csvOutName")
print("The files name is missing\nUsage: python3 joinDf.py resultsName1.pkl resultsName2.pkl OutName")
exit(1)
if len(sys.argv) >= 4:
......@@ -12,11 +12,11 @@ if len(sys.argv) >= 4:
else:
name = "dataJOINED"
print("Csv name will be: " + name + ".csv")
df1 = pd.read_csv( sys.argv[1] )
df2 = pd.read_csv( sys.argv[2] )
print("File name will be: " + name + ".pkl")
df1 = pd.read_pickle( sys.argv[1] )
df2 = pd.read_pickle( sys.argv[2] )
frames = [df1, df2]
df3 = pd.concat(frames)
df3 = df3.drop(columns=df3.columns[0])
df3.to_csv(name + '.csv')
df3.to_pickle(name + '.pkl')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment