Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Iker Martín Álvarez
Proteo
Commits
8ca50713
Commit
8ca50713
authored
Nov 04, 2022
by
iker_martin
Browse files
Refactor de recogida de datos. WIP
parent
04e2f90a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Analysis/MallTimes.py
View file @
8ca50713
...
@@ -2,48 +2,38 @@ import sys
...
@@ -2,48 +2,38 @@ import sys
import
glob
import
glob
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
enum
import
Enum
def
getData
(
lineS
,
outData
,
tp
,
hasIter
=
False
):
for
data
in
lineS
:
class
G_enum
(
Enum
):
k_v
=
data
.
split
(
'='
)
TOTAL_RESIZES
=
0
if
k_v
[
0
]
==
"time"
:
TOTAL_GROUPS
=
1
time
=
float
(
k_v
[
1
])
TOTAL_STAGES
=
2
elif
k_v
[
0
]
==
"iters"
and
hasIter
:
GRANULARITY
=
3
iters
=
int
(
k_v
[
1
])
SDR
=
4
ADR
=
5
outData
[
tp
]
=
time
DR
=
6
if
hasIter
:
ASYNCH_REDISTRIBUTION_TYPE
=
7
outData
[
tp
+
1
]
=
iters
SPAWN_METHOD
=
8
SPAWN_STRATEGY
=
9
#-----------------------------------------------
GROUPS
=
10
def
record
(
f
,
observation
,
line
):
FACTOR_S
=
11
# Record first line - General info
DIST
=
12
lineS
=
line
.
split
()
STAGE_TYPES
=
13
for
j
in
range
(
1
,
7
):
STAGE_TIMES
=
14
observation
[
j
]
=
int
(
lineS
[
j
].
split
(
'='
)[
1
])
STAGE_BYTES
=
15
ITERS
=
16
# Record procces number
ASYNCH_ITERS
=
17
line
=
next
(
f
)
T_ITER
=
18
lineS
=
line
.
split
()
T_STAGES
=
19
j
=
7
T_SPAWN
=
20
for
key_values
in
lineS
:
T_SPAWN_REAL
=
21
k_v
=
key_values
.
split
(
'='
)
T_SR
=
22
observation
[
j
]
=
int
(
k_v
[
1
])
T_AR
=
23
j
+=
1
T_TOTAL
=
24
# Record data
columnsG
=
[
"Total_Resizes"
,
"Total_Groups"
,
"Total_Stages"
,
"Granularity"
,
"SDR"
,
"ADR"
,
"DR"
,
"Asynch_Redistribution_Type"
,
\
j
=
9
"Spawn_Method"
,
"Spawn_Strategy"
,
"Groups"
,
"Factor_S"
,
"Dist"
,
"Stage_Types"
,
"Stage_Times"
,
"Stage_Bytes"
,
\
for
j
in
range
(
9
,
13
):
"Iters"
,
"Asynch_Iters"
,
"T_iter"
,
"T_stages"
,
"T_spawn"
,
"T_spawn_real"
,
"T_SR"
,
"T_AR"
,
"T_total"
]
#25
line
=
next
(
f
)
lineS
=
line
.
split
()
getData
(
lineS
,
observation
,
j
)
line
=
next
(
f
)
lineS
=
line
.
split
()
#if observation[0] == "A":
getData
(
lineS
,
observation
,
13
,
True
)
#else:
# getData(lineS, observation, 13)
# Obtains the value of a given index in a splited line
# Obtains the value of a given index in a splited line
# and returns it as a float values
# and returns it as a float values
...
@@ -51,83 +41,94 @@ def get_value(line, index):
...
@@ -51,83 +41,94 @@ def get_value(line, index):
return
float
(
line
[
index
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
return
float
(
line
[
index
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
# Obtains the general parameters of an execution and
# Obtains the general parameters of an execution and
# stores them for creating a dataframe
# stores them for creating a global dataframe
def
record_config_line
(
lineS
,
dataA
,
dataB
):
def
record_config_line
(
lineS
,
dataG_it
):
dataA
.
append
([
None
]
*
13
)
ordered_indexes
=
[
G_enum
.
TOTAL_RESIZES
.
value
,
G_enum
.
TOTAL_STAGES
.
value
,
G_enum
.
GRANULARITY
.
value
,
G_enum
.
SDR
.
value
,
\
dataB
.
append
([
None
]
*
15
)
G_enum
.
ADR
.
value
,
G_enum
.
ASYNCH_REDISTRIBUTION_TYPE
.
value
,
G_enum
.
SPAWN_METHOD
.
value
,
G_emun
.
SPAWN_STRATEGY
.
value
]
resizes
=
int
(
get_value
(
lineS
,
2
))
offset_line
=
2
stages
=
int
(
get_value
(
lineS
,
3
))
for
i
in
range
(
len
(
ordered_indexes
)):
compute_tam
=
int
(
get_value
(
lineS
,
4
))
value
=
get_value
(
lineS
,
i
+
offset_line
)
sdr
=
int
(
get_value
(
lineS
,
5
))
if
value
.
is_integer
():
adr
=
int
(
get_value
(
lineS
,
6
))
#TODO Que lo tome como porcentaje
value
=
int
(
value
)
at
=
int
(
get_value
(
lineS
,
7
))
index
=
ordered_indexes
[
i
]
sm
=
int
(
get_value
(
lineS
,
8
))
dataG_it
[
index
]
=
value
ss
=
int
(
get_value
(
lineS
,
9
))
latency
=
get_value
(
lineS
,
10
)
dataG_it
[
G_enum
.
TOTAL_GROUPS
.
value
]
=
dataG_it
[
G_enum
.
TOTAL_RESIZES
.
value
]
bw
=
get_value
(
lineS
,
11
)
dataG_it
[
G_enum
.
TOTAL_RESIZES
.
value
]
-=
1
#FIXME Modificar en App sintetica
dataB
[
it
][
0
]
=
sdr
#FIXME Modificar cuando ADR ya no sea un porcentaje
dataB
[
it
][
1
]
=
adr
dataG_it
[
G_enum
.
DR
.
value
]
=
dataG_it
[
G_enum
.
SDR
.
value
]
+
dataG_it
[
G_enum
.
ADR
.
value
]
dataB
[
it
][
4
]
=
""
dataB
[
it
][
5
]
=
compute_tam
# Init lists for each column
dataB
[
it
][
6
]
=
comm_tam
array_groups
=
[
G_enum
.
GROUPS
.
value
,
G_enum
.
FACTOR_S
.
value
,
G_enum
.
DIST
.
value
,
G_enum
.
ITERS
.
value
,
\
dataB
[
it
][
7
]
=
cst
G_enum
.
ASYNCH_ITERS
.
value
,
G_enum
.
T_ITER
.
value
,
G_enum
.
T_STAGES
.
value
]
dataB
[
it
][
8
]
=
css
array_resizes
=
[
G_enum
.
ASYNCH_REDISTRIBUTION_TYPE
.
value
,
G_enum
.
SPAWN_METHOD
.
value
,
\
dataB
[
it
][
9
]
=
time
G_enum
.
SPAWN_STRATEGY
.
value
,
G_enum
.
T_SPAWN
.
value
,
G_enum
.
T_SPAWN_REAL
.
value
,
\
dataB
[
it
][
10
]
=
""
G_enum
.
T_SR
.
value
,
G_enum
.
T_AR
.
value
]
array_stages
=
[
G_enum
.
STAGE_TYPES
.
value
,
\
dataA
[
it
][
0
]
=
sdr
G_enum
.
STAGE_TIMES
.
value
,
G_enum
.
STAGE_BYTES
.
value
]
dataA
[
it
][
1
]
=
adr
for
index
in
array_groups
:
dataA
[
it
][
5
]
=
""
dataG_it
[
index
]
=
[
None
]
*
dataG_it
[
G_enum
.
TOTAL_GROUPS
.
value
]
dataA
[
it
][
6
]
=
compute_tam
dataA
[
it
][
7
]
=
comm_tam
for
index
in
array_resizes
:
dataA
[
it
][
8
]
=
cst
dataG_it
[
index
]
=
[
None
]
*
dataG_it
[
G_enum
.
TOTAL_RESIZES
.
value
]
dataA
[
it
][
9
]
=
css
dataA
[
it
][
10
]
=
time
for
index
in
array_stages
:
dataA
[
it
][
11
]
=
""
dataG_it
[
index
]
=
[
None
]
*
dataG_it
[
G_enum
.
TOTAL_STAGES
.
value
]
def
record_stage_line
(
lineS
,
dataG_it
,
dataM_it
):
pt
=
int
(
get_value
(
lineS
,
2
))
t_stage
=
get_value
(
lineS
,
3
)
u_bytes
=
int
(
get_value
(
lineS
,
4
))
#columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \\
dataG_it
[].
append
(
pt
)
# "Spawn_Method", "Spawn_Strategy", "Groups", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \\
dataG_it
[].
append
(
t_stage
)
# "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #24
dataG_it
[].
append
(
u_bytes
)
#columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
dataM_it
[].
append
(
pt
)
# Obtains the parameters of a stage line
dataM_it
[].
append
(
t_stage
)
# and stores it in the dataframe
dataM_it
[].
append
(
u_bytes
)
# Is needed to indicate in which stage is
# being performed
def
record_resize_line
(
lineS
,
dataG_it
,
dataM_it
):
def
record_stage_line
(
lineS
,
dataG_it
,
stage
):
iters
=
int
(
lineS
[
2
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
array_stages
=
[
G_enum
.
STAGE_TYPES
.
value
,
\
npr
=
int
(
lineS
[
3
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
G_enum
.
STAGE_TIMES
.
value
,
G_enum
.
STAGE_BYTES
.
value
]
dist
=
lineS
[
5
].
split
(
'='
)[
1
]
offset_lines
=
2
for
i
in
range
(
len
(
array_stages
)):
resizes
=
resizes
-
1
value
=
get_value
(
lineS
,
i
+
offset_lines
)
if
resizes
==
0
:
if
value
.
is_integer
():
dataB
[
it
][
3
]
=
npr
value
=
int
(
value
)
dataB
[
it
][
4
]
+=
dist
index
=
array_stage
[
i
]
dataB
[
it
][
10
]
+=
str
(
iters
)
dataG_it
[
index
][
stage
]
=
value
dataA
[
it
][
4
]
=
npr
#FIXME No sera correcta si hay mas de una reconfig
# Obtains the parameters of a resize line
dataA
[
it
][
2
]
=
str
(
previousNP
)
+
","
+
str
(
npr
)
# and stores them in the dataframe
dataA
[
it
][
5
]
+=
dist
# Is needed to indicate to which group refers
dataA
[
it
][
11
]
+=
str
(
iters
)
# the resize line
timer
=
4
def
record_resize_line
(
lineS
,
dataG_it
,
group
):
else
:
array_stages
=
[
G_enum
.
ITERS
.
value
,
G_enum
.
GROUPS
.
value
\
dataB
[
it
][
2
]
=
npr
G_enum
.
FACTOR_S
.
value
,
G_enum
.
DIST
.
value
]
dataB
[
it
][
4
]
+=
dist
+
","
offset_lines
=
2
dataB
[
it
][
10
]
+=
str
(
iters
)
+
","
for
i
in
range
(
len
(
array_stages
)):
value
=
get_value
(
lineS
,
i
+
offset_lines
)
dataA
[
it
][
3
]
=
npr
if
value
.
is_integer
():
dataA
[
it
][
5
]
+=
dist
+
","
value
=
int
(
value
)
dataA
[
it
][
11
]
+=
str
(
iters
)
+
","
index
=
array_stage
[
i
]
previousNP
=
npr
dataG_it
[
index
][
group
]
=
value
def
record_time_line
(
lineS
,
dataG_it
):
T_names
=
[
"T_spawn:"
,
"T_spawn_real:"
,
"T_SR:"
,
"T_AR:"
,
"T_total:"
]
T_values
=
[
G_enum
.
T_SPAWN
.
value
,
G_enum
.
T_SPAWN_REAL
.
value
,
G_enum
.
T_SR
.
value
,
G_enum
.
T_AR
.
value
,
G_enum
.
T_TOTAL
.
value
]
if
not
(
lineS
[
0
]
in
T_names
):
# Execute only if line represents a Time
return
index
=
T_names
.
index
(
linesS
[
0
])
offset_lines
=
1
for
i
in
range
(
len
(
dataG_it
[
index
])):
value
=
get_value
(
lineS
,
i
+
offset_lines
)
dataG_it
[
index
][
i
]
=
value
#-----------------------------------------------
#-----------------------------------------------
def
read_file
(
f
,
dataA
,
dataB
,
it
):
def
read_global_file
(
f
,
dataA
,
dataB
,
it
):
recording
=
False
resizes
=
0
resizes
=
0
timer
=
0
timer
=
0
previousNP
=
0
previousNP
=
0
...
@@ -137,51 +138,16 @@ def read_file(f, dataA, dataB, it):
...
@@ -137,51 +138,16 @@ def read_file(f, dataA, dataB, it):
if
len
(
lineS
)
>
0
:
if
len
(
lineS
)
>
0
:
if
lineS
[
0
]
==
"Config"
:
# CONFIG LINE
if
lineS
[
0
]
==
"Config"
:
# CONFIG LINE
recording
=
True
it
+=
1
it
+=
1
record_config
(
lineS
,
dataG
,
dataM
)
dataA
.
append
([
None
]
*
25
)
record_config
(
lineS
,
dataG
[
it
],
dataM
[
it
])
elif
lineS
[
0
]
==
"Stage"
:
elif
lineS
[
0
]
==
"Stage"
:
record_stage_line
(
lineS
,
dataG
,
dataM
)
record_stage_line
(
lineS
,
dataG
[
it
],
??
)
elif
lineS
[
0
]
==
"Resize"
:
elif
lineS
[
0
]
==
"Resize"
:
elif
recording
and
resizes
!=
0
:
# RESIZE LINE
record_resize_line
(
lineS
,
dataG
[
it
],
??
)
iters
=
int
(
lineS
[
2
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
elif
lineS
[
0
]
in
T_names
:
npr
=
int
(
lineS
[
3
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
dataG
[
it
][]
dist
=
lineS
[
5
].
split
(
'='
)[
1
]
resizes
=
resizes
-
1
if
resizes
==
0
:
dataB
[
it
][
3
]
=
npr
dataB
[
it
][
4
]
+=
dist
dataB
[
it
][
10
]
+=
str
(
iters
)
dataA
[
it
][
4
]
=
npr
#FIXME No sera correcta si hay mas de una reconfig
dataA
[
it
][
2
]
=
str
(
previousNP
)
+
","
+
str
(
npr
)
dataA
[
it
][
5
]
+=
dist
dataA
[
it
][
11
]
+=
str
(
iters
)
timer
=
4
else
:
dataB
[
it
][
2
]
=
npr
dataB
[
it
][
4
]
+=
dist
+
","
dataB
[
it
][
10
]
+=
str
(
iters
)
+
","
dataA
[
it
][
3
]
=
npr
dataA
[
it
][
5
]
+=
dist
+
","
dataA
[
it
][
11
]
+=
str
(
iters
)
+
","
previousNP
=
npr
else
:
# SAVE TIMES
if
timer
==
4
:
dataB
[
it
][
11
]
=
float
(
lineS
[
1
])
elif
timer
==
3
:
dataB
[
it
][
12
]
=
float
(
lineS
[
1
])
elif
timer
==
2
:
dataB
[
it
][
13
]
=
float
(
lineS
[
1
])
elif
timer
==
1
:
dataB
[
it
][
14
]
=
float
(
lineS
[
1
])
else
:
dataA
[
it
][
12
]
=
float
(
lineS
[
1
])
timer
=
timer
-
1
return
it
return
it
#columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8
#columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment