CreateResizeDataframe.py 3.55 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import sys
import glob
import numpy as np
import pandas as pd
from enum import Enum

class G_enum(Enum):
    TOTAL_RESIZES = 0
    TOTAL_GROUPS = 1
    TOTAL_STAGES = 2
    GRANULARITY = 3
    SDR = 4
    ADR = 5
    DR = 6
    RED_METHOD = 7
    RED_STRATEGY = 8
    SPAWN_METHOD = 9
    SPAWN_STRATEGY = 10
    GROUPS = 11
    FACTOR_S = 12
    DIST = 13
    STAGE_TYPES = 14
    STAGE_TIMES = 15
    STAGE_BYTES = 16
    ITERS = 17
    ASYNCH_ITERS = 18
    T_ITER = 19
    T_STAGES = 20
    T_SPAWN = 21
    T_SPAWN_REAL = 22
    T_SR = 23
    T_AR = 24
33
34
    T_MALLEABILITY = 25
    T_TOTAL = 26
35
36
37
    #Malleability specific
    NP = 0
    NC = 1
38
39
    #Iteration specific
    IS_DYNAMIC = 11
40
    N_PARENTS = 17
41
42

#columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Redistribution_Method", \
43
#            "Redistribution_Strategy", "Spawn_Method", "Spawn_Strategy", "Groups", "FactorS", "Dist", "Stage_Types", "Stage_Times", \
44
#            "Stage_Bytes", "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_Malleability", "T_total"] #27
45
46
47

columnsM = ["NP", "NC", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Redistribution_Method", \
            "Redistribution_Strategy", "Spawn_Method", "Spawn_Strategy", "FactorS", "Dist", "Stage_Type", "Stage_Time", \
48
            "Stage_Bytes", "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_Malleability"] #25
49
50
51

def copy_resize(row, dataM_it, resize):
  basic_indexes = [G_enum.TOTAL_STAGES.value, G_enum.GRANULARITY.value, G_enum.SDR.value, \
52
53
          G_enum.ADR.value, G_enum.DR.value]
  basic_group = [G_enum.STAGE_TYPES.value, G_enum.STAGE_TIMES.value, G_enum.STAGE_BYTES.value]
54
55
  array_actual_group = [G_enum.FACTOR_S.value, G_enum.ITERS.value, G_enum.ASYNCH_ITERS.value, \
          G_enum.T_SPAWN.value, G_enum.T_SPAWN_REAL.value, G_enum.T_SR.value, \
56
          G_enum.T_AR.value, G_enum.T_MALLEABILITY.value, G_enum.T_ITER.value, G_enum.T_STAGES.value]
57
58
59
60
61
  array_next_group = [G_enum.RED_METHOD.value, G_enum.RED_STRATEGY.value, \
          G_enum.SPAWN_METHOD.value, G_enum.SPAWN_STRATEGY.value]

  dataM_it[G_enum.NP.value] = row[G_enum.GROUPS.value][resize]
  dataM_it[G_enum.NC.value] = row[G_enum.GROUPS.value][resize+1]
62
63
64
  dataM_it[G_enum.DIST.value-1] = [None, None]
  dataM_it[G_enum.DIST.value-1][0] = row[G_enum.DIST.value][resize]
  dataM_it[G_enum.DIST.value-1][1] = row[G_enum.DIST.value][resize+1]
65
66
67

  for index in basic_indexes:
    dataM_it[index] = row[index]
68
69
70
    
  for index in basic_group:
    dataM_it[index-1] = row[index]
71
72
73
74
75
76
77
78
79
80
81
82

  for index in array_actual_group:
    dataM_it[index-1] = row[index][resize] 

  for index in array_next_group:
    dataM_it[index] = row[index][resize+1]


#-----------------------------------------------

def create_resize_dataframe(dfG, dataM):
  it = -1
83
84
  for row_index in range(len(dfG)):
    row = dfG.iloc[row_index]
85
    resizes = row[G_enum.TOTAL_RESIZES.value]
86

87
88
    for resize in range(resizes):
      it += 1
89
      dataM.append( [None] * len(columnsM) )
90
91
92
93
      copy_resize(row, dataM[it], resize)

#-----------------------------------------------
if len(sys.argv) < 2:
94
    print("The files name is missing\nUsage: python3 CreateResizeDataframe.py input_file.pkl output_name")
95
96
97
98
99
100
101
    exit(1)

input_name = sys.argv[1]
if len(sys.argv) > 2:
  name = sys.argv[2]
else:
  name = "dataM"
102
print("File name will be: " + name + ".pkl")
103
104


105
dfG = pd.read_pickle(input_name)
106
107
108
dataM = []
create_resize_dataframe(dfG, dataM)

109
110
dfM = pd.DataFrame(dataM, columns=columnsM)
dfM.to_pickle(name + '.pkl')
111
112
113

print(dfG)
print(dfM)