Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Iker Martín Álvarez
Proteo
Commits
8ca50713
Commit
8ca50713
authored
Nov 04, 2022
by
iker_martin
Browse files
Refactor de recogida de datos. WIP
parent
04e2f90a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Analysis/MallTimes.py
View file @
8ca50713
...
...
@@ -2,48 +2,38 @@ import sys
import
glob
import
numpy
as
np
import
pandas
as
pd
def
getData
(
lineS
,
outData
,
tp
,
hasIter
=
False
):
for
data
in
lineS
:
k_v
=
data
.
split
(
'='
)
if
k_v
[
0
]
==
"time"
:
time
=
float
(
k_v
[
1
])
elif
k_v
[
0
]
==
"iters"
and
hasIter
:
iters
=
int
(
k_v
[
1
])
outData
[
tp
]
=
time
if
hasIter
:
outData
[
tp
+
1
]
=
iters
#-----------------------------------------------
def
record
(
f
,
observation
,
line
):
# Record first line - General info
lineS
=
line
.
split
()
for
j
in
range
(
1
,
7
):
observation
[
j
]
=
int
(
lineS
[
j
].
split
(
'='
)[
1
])
# Record procces number
line
=
next
(
f
)
lineS
=
line
.
split
()
j
=
7
for
key_values
in
lineS
:
k_v
=
key_values
.
split
(
'='
)
observation
[
j
]
=
int
(
k_v
[
1
])
j
+=
1
# Record data
j
=
9
for
j
in
range
(
9
,
13
):
line
=
next
(
f
)
lineS
=
line
.
split
()
getData
(
lineS
,
observation
,
j
)
line
=
next
(
f
)
lineS
=
line
.
split
()
#if observation[0] == "A":
getData
(
lineS
,
observation
,
13
,
True
)
#else:
# getData(lineS, observation, 13)
from
enum
import
Enum
class
G_enum
(
Enum
):
TOTAL_RESIZES
=
0
TOTAL_GROUPS
=
1
TOTAL_STAGES
=
2
GRANULARITY
=
3
SDR
=
4
ADR
=
5
DR
=
6
ASYNCH_REDISTRIBUTION_TYPE
=
7
SPAWN_METHOD
=
8
SPAWN_STRATEGY
=
9
GROUPS
=
10
FACTOR_S
=
11
DIST
=
12
STAGE_TYPES
=
13
STAGE_TIMES
=
14
STAGE_BYTES
=
15
ITERS
=
16
ASYNCH_ITERS
=
17
T_ITER
=
18
T_STAGES
=
19
T_SPAWN
=
20
T_SPAWN_REAL
=
21
T_SR
=
22
T_AR
=
23
T_TOTAL
=
24
columnsG
=
[
"Total_Resizes"
,
"Total_Groups"
,
"Total_Stages"
,
"Granularity"
,
"SDR"
,
"ADR"
,
"DR"
,
"Asynch_Redistribution_Type"
,
\
"Spawn_Method"
,
"Spawn_Strategy"
,
"Groups"
,
"Factor_S"
,
"Dist"
,
"Stage_Types"
,
"Stage_Times"
,
"Stage_Bytes"
,
\
"Iters"
,
"Asynch_Iters"
,
"T_iter"
,
"T_stages"
,
"T_spawn"
,
"T_spawn_real"
,
"T_SR"
,
"T_AR"
,
"T_total"
]
#25
# Obtains the value of a given index in a splited line
# and returns it as a float values
...
...
@@ -51,83 +41,94 @@ def get_value(line, index):
return
float
(
line
[
index
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
# Obtains the general parameters of an execution and
# stores them for creating a dataframe
def
record_config_line
(
lineS
,
dataA
,
dataB
):
dataA
.
append
([
None
]
*
13
)
dataB
.
append
([
None
]
*
15
)
resizes
=
int
(
get_value
(
lineS
,
2
))
stages
=
int
(
get_value
(
lineS
,
3
))
compute_tam
=
int
(
get_value
(
lineS
,
4
))
sdr
=
int
(
get_value
(
lineS
,
5
))
adr
=
int
(
get_value
(
lineS
,
6
))
#TODO Que lo tome como porcentaje
at
=
int
(
get_value
(
lineS
,
7
))
sm
=
int
(
get_value
(
lineS
,
8
))
ss
=
int
(
get_value
(
lineS
,
9
))
latency
=
get_value
(
lineS
,
10
)
bw
=
get_value
(
lineS
,
11
)
dataB
[
it
][
0
]
=
sdr
dataB
[
it
][
1
]
=
adr
dataB
[
it
][
4
]
=
""
dataB
[
it
][
5
]
=
compute_tam
dataB
[
it
][
6
]
=
comm_tam
dataB
[
it
][
7
]
=
cst
dataB
[
it
][
8
]
=
css
dataB
[
it
][
9
]
=
time
dataB
[
it
][
10
]
=
""
dataA
[
it
][
0
]
=
sdr
dataA
[
it
][
1
]
=
adr
dataA
[
it
][
5
]
=
""
dataA
[
it
][
6
]
=
compute_tam
dataA
[
it
][
7
]
=
comm_tam
dataA
[
it
][
8
]
=
cst
dataA
[
it
][
9
]
=
css
dataA
[
it
][
10
]
=
time
dataA
[
it
][
11
]
=
""
def
record_stage_line
(
lineS
,
dataG_it
,
dataM_it
):
pt
=
int
(
get_value
(
lineS
,
2
))
t_stage
=
get_value
(
lineS
,
3
)
u_bytes
=
int
(
get_value
(
lineS
,
4
))
dataG_it
[].
append
(
pt
)
dataG_it
[].
append
(
t_stage
)
dataG_it
[].
append
(
u_bytes
)
dataM_it
[].
append
(
pt
)
dataM_it
[].
append
(
t_stage
)
dataM_it
[].
append
(
u_bytes
)
def
record_resize_line
(
lineS
,
dataG_it
,
dataM_it
):
iters
=
int
(
lineS
[
2
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
npr
=
int
(
lineS
[
3
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
dist
=
lineS
[
5
].
split
(
'='
)[
1
]
resizes
=
resizes
-
1
if
resizes
==
0
:
dataB
[
it
][
3
]
=
npr
dataB
[
it
][
4
]
+=
dist
dataB
[
it
][
10
]
+=
str
(
iters
)
dataA
[
it
][
4
]
=
npr
#FIXME No sera correcta si hay mas de una reconfig
dataA
[
it
][
2
]
=
str
(
previousNP
)
+
","
+
str
(
npr
)
dataA
[
it
][
5
]
+=
dist
dataA
[
it
][
11
]
+=
str
(
iters
)
timer
=
4
else
:
dataB
[
it
][
2
]
=
npr
dataB
[
it
][
4
]
+=
dist
+
","
dataB
[
it
][
10
]
+=
str
(
iters
)
+
","
dataA
[
it
][
3
]
=
npr
dataA
[
it
][
5
]
+=
dist
+
","
dataA
[
it
][
11
]
+=
str
(
iters
)
+
","
previousNP
=
npr
# stores them for creating a global dataframe
def
record_config_line
(
lineS
,
dataG_it
):
ordered_indexes
=
[
G_enum
.
TOTAL_RESIZES
.
value
,
G_enum
.
TOTAL_STAGES
.
value
,
G_enum
.
GRANULARITY
.
value
,
G_enum
.
SDR
.
value
,
\
G_enum
.
ADR
.
value
,
G_enum
.
ASYNCH_REDISTRIBUTION_TYPE
.
value
,
G_enum
.
SPAWN_METHOD
.
value
,
G_emun
.
SPAWN_STRATEGY
.
value
]
offset_line
=
2
for
i
in
range
(
len
(
ordered_indexes
)):
value
=
get_value
(
lineS
,
i
+
offset_line
)
if
value
.
is_integer
():
value
=
int
(
value
)
index
=
ordered_indexes
[
i
]
dataG_it
[
index
]
=
value
dataG_it
[
G_enum
.
TOTAL_GROUPS
.
value
]
=
dataG_it
[
G_enum
.
TOTAL_RESIZES
.
value
]
dataG_it
[
G_enum
.
TOTAL_RESIZES
.
value
]
-=
1
#FIXME Modificar en App sintetica
#FIXME Modificar cuando ADR ya no sea un porcentaje
dataG_it
[
G_enum
.
DR
.
value
]
=
dataG_it
[
G_enum
.
SDR
.
value
]
+
dataG_it
[
G_enum
.
ADR
.
value
]
# Init lists for each column
array_groups
=
[
G_enum
.
GROUPS
.
value
,
G_enum
.
FACTOR_S
.
value
,
G_enum
.
DIST
.
value
,
G_enum
.
ITERS
.
value
,
\
G_enum
.
ASYNCH_ITERS
.
value
,
G_enum
.
T_ITER
.
value
,
G_enum
.
T_STAGES
.
value
]
array_resizes
=
[
G_enum
.
ASYNCH_REDISTRIBUTION_TYPE
.
value
,
G_enum
.
SPAWN_METHOD
.
value
,
\
G_enum
.
SPAWN_STRATEGY
.
value
,
G_enum
.
T_SPAWN
.
value
,
G_enum
.
T_SPAWN_REAL
.
value
,
\
G_enum
.
T_SR
.
value
,
G_enum
.
T_AR
.
value
]
array_stages
=
[
G_enum
.
STAGE_TYPES
.
value
,
\
G_enum
.
STAGE_TIMES
.
value
,
G_enum
.
STAGE_BYTES
.
value
]
for
index
in
array_groups
:
dataG_it
[
index
]
=
[
None
]
*
dataG_it
[
G_enum
.
TOTAL_GROUPS
.
value
]
for
index
in
array_resizes
:
dataG_it
[
index
]
=
[
None
]
*
dataG_it
[
G_enum
.
TOTAL_RESIZES
.
value
]
for
index
in
array_stages
:
dataG_it
[
index
]
=
[
None
]
*
dataG_it
[
G_enum
.
TOTAL_STAGES
.
value
]
#columnsG = ["Total_Resizes", "Total_Groups", "Total_Stages", "Granularity", "SDR", "ADR", "DR", "Asynch_Redistribution_Type", \\
# "Spawn_Method", "Spawn_Strategy", "Groups", "Dist", "Stage_Types", "Stage_Times", "Stage_Bytes", \\
# "Iters", "Asynch_Iters", "T_iter", "T_stages", "T_spawn", "T_spawn_real", "T_SR", "T_AR", "T_total"] #24
#columnsG = ["N", "%Async", "Groups", "NP", "NS", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #13
# Obtains the parameters of a stage line
# and stores it in the dataframe
# Is needed to indicate in which stage is
# being performed
def
record_stage_line
(
lineS
,
dataG_it
,
stage
):
array_stages
=
[
G_enum
.
STAGE_TYPES
.
value
,
\
G_enum
.
STAGE_TIMES
.
value
,
G_enum
.
STAGE_BYTES
.
value
]
offset_lines
=
2
for
i
in
range
(
len
(
array_stages
)):
value
=
get_value
(
lineS
,
i
+
offset_lines
)
if
value
.
is_integer
():
value
=
int
(
value
)
index
=
array_stage
[
i
]
dataG_it
[
index
][
stage
]
=
value
# Obtains the parameters of a resize line
# and stores them in the dataframe
# Is needed to indicate to which group refers
# the resize line
def
record_resize_line
(
lineS
,
dataG_it
,
group
):
array_stages
=
[
G_enum
.
ITERS
.
value
,
G_enum
.
GROUPS
.
value
\
G_enum
.
FACTOR_S
.
value
,
G_enum
.
DIST
.
value
]
offset_lines
=
2
for
i
in
range
(
len
(
array_stages
)):
value
=
get_value
(
lineS
,
i
+
offset_lines
)
if
value
.
is_integer
():
value
=
int
(
value
)
index
=
array_stage
[
i
]
dataG_it
[
index
][
group
]
=
value
def
record_time_line
(
lineS
,
dataG_it
):
T_names
=
[
"T_spawn:"
,
"T_spawn_real:"
,
"T_SR:"
,
"T_AR:"
,
"T_total:"
]
T_values
=
[
G_enum
.
T_SPAWN
.
value
,
G_enum
.
T_SPAWN_REAL
.
value
,
G_enum
.
T_SR
.
value
,
G_enum
.
T_AR
.
value
,
G_enum
.
T_TOTAL
.
value
]
if
not
(
lineS
[
0
]
in
T_names
):
# Execute only if line represents a Time
return
index
=
T_names
.
index
(
linesS
[
0
])
offset_lines
=
1
for
i
in
range
(
len
(
dataG_it
[
index
])):
value
=
get_value
(
lineS
,
i
+
offset_lines
)
dataG_it
[
index
][
i
]
=
value
#-----------------------------------------------
def
read_file
(
f
,
dataA
,
dataB
,
it
):
recording
=
False
def
read_global_file
(
f
,
dataA
,
dataB
,
it
):
resizes
=
0
timer
=
0
previousNP
=
0
...
...
@@ -137,51 +138,16 @@ def read_file(f, dataA, dataB, it):
if
len
(
lineS
)
>
0
:
if
lineS
[
0
]
==
"Config"
:
# CONFIG LINE
recording
=
True
it
+=
1
record_config
(
lineS
,
dataG
,
dataM
)
dataA
.
append
([
None
]
*
25
)
record_config
(
lineS
,
dataG
[
it
],
dataM
[
it
])
elif
lineS
[
0
]
==
"Stage"
:
record_stage_line
(
lineS
,
dataG
,
dataM
)
record_stage_line
(
lineS
,
dataG
[
it
],
??
)
elif
lineS
[
0
]
==
"Resize"
:
elif
recording
and
resizes
!=
0
:
# RESIZE LINE
iters
=
int
(
lineS
[
2
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
npr
=
int
(
lineS
[
3
].
split
(
'='
)[
1
].
split
(
','
)[
0
])
dist
=
lineS
[
5
].
split
(
'='
)[
1
]
resizes
=
resizes
-
1
if
resizes
==
0
:
dataB
[
it
][
3
]
=
npr
dataB
[
it
][
4
]
+=
dist
dataB
[
it
][
10
]
+=
str
(
iters
)
dataA
[
it
][
4
]
=
npr
#FIXME No sera correcta si hay mas de una reconfig
dataA
[
it
][
2
]
=
str
(
previousNP
)
+
","
+
str
(
npr
)
dataA
[
it
][
5
]
+=
dist
dataA
[
it
][
11
]
+=
str
(
iters
)
timer
=
4
else
:
dataB
[
it
][
2
]
=
npr
dataB
[
it
][
4
]
+=
dist
+
","
dataB
[
it
][
10
]
+=
str
(
iters
)
+
","
dataA
[
it
][
3
]
=
npr
dataA
[
it
][
5
]
+=
dist
+
","
dataA
[
it
][
11
]
+=
str
(
iters
)
+
","
previousNP
=
npr
else
:
# SAVE TIMES
if
timer
==
4
:
dataB
[
it
][
11
]
=
float
(
lineS
[
1
])
elif
timer
==
3
:
dataB
[
it
][
12
]
=
float
(
lineS
[
1
])
elif
timer
==
2
:
dataB
[
it
][
13
]
=
float
(
lineS
[
1
])
elif
timer
==
1
:
dataB
[
it
][
14
]
=
float
(
lineS
[
1
])
else
:
dataA
[
it
][
12
]
=
float
(
lineS
[
1
])
timer
=
timer
-
1
record_resize_line
(
lineS
,
dataG
[
it
],
??
)
elif
lineS
[
0
]
in
T_names
:
dataG
[
it
][]
return
it
#columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Cst", "Css", "Time", "Iters", "TE"] #8
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment