Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
German Leon
Carol-fi
Commits
663c6b9c
Commit
663c6b9c
authored
Jan 10, 2021
by
German Leon
Browse files
Version IO y RF
parent
a04c78af
Changes
19
Hide whitespace changes
Inline
Side-by-side
app_profiler.py
View file @
663c6b9c
...
...
@@ -54,6 +54,7 @@ def profiler_caller(gdb_exec, kernels, benchmark_binary, benchmark_args,device,s
init_string
=
'"{};{};{};file {}; set args {}; set cuda break_on_launch application"'
.
format
(
False
,
True
,
kernels
,
benchmark_binary
,
benchmark_args_striped
)
profiler_cmd
=
script
.
format
(
device
,
gdb_exec
,
init_string
,
cp
.
PROFILER_SCRIPT
)
max_registers
=
os
.
system
(
profiler_cmd
)
>>
8
print
(
"Maximo numero de registros ###################################+++"
)
print
(
max_registers
,
max_registers
>>
8
)
if
bool
(
section
):
init_string
=
'"{};{};{};file {}; set args {}; break {}; break {}"'
.
format
(
bool
(
section
),
False
,
kernels
,
benchmark_binary
,
benchmark_args_striped
,
section
[
'begin'
],
section
[
'end'
])
...
...
classes/BitFlip.py
View file @
663c6b9c
...
...
@@ -47,8 +47,12 @@ class BitFlip:
#print (linea)#+"-"+str(len(linea))+"-"+linea[0])
#print ("============")
lista
=
re
.
findall
(
r
"R(\d+)"
,
linea
[
0
])
#Ahora son todos los registros.
#lista=range(0,self.__maxregs)
setlista
=
set
()
setlista
.
update
(
lista
)
#self.__logging.info(str(len(setlista)))
return
setlista
def
regmod
(
self
):
...
...
classes/SignalApp.py
View file @
663c6b9c
...
...
@@ -60,14 +60,14 @@ class SignalApp(Thread):
os
.
system
(
"{} > /dev/null 2>/dev/null"
.
format
(
self
.
__signal_cmd
))
self
.
__log
.
info
(
"sending signal {}"
.
format
(
signals
))
#try:
#
(self._syncro).wait()
#
(self._syncro).wait()
#except threading.BrokenBarrierError:
#except:
#
(self._syncro).abort()
#
print("Breakpoint fuera de tiempo")
#
(self._syncro).abort()
#
print("Breakpoint fuera de tiempo")
#(self._syncro).reset()
time
.
sleep
(
self
.
__time_to_sleep
)
(
self
.
_syncro
).
reset
()
#
(self._syncro).reset()
try
:
(
self
.
_waitfinish
).
wait
()
#except threading.BrokenBarrierError:
...
...
classes/SummaryFile.py
View file @
663c6b9c
...
...
@@ -26,7 +26,8 @@ class SummaryFile:
self
.
__fieldnames
=
kwargs
.
get
(
"fieldnames"
)
# Open and start csv file
self
.
__open_csv
(
mode
=
'w'
)
#self.__open_csv(mode='w')
self
.
__open_csv
(
mode
=
kwargs
.
get
(
"mode"
))
self
.
__dict_buff
.
writeheader
()
self
.
__close_csv
()
...
...
classes/__pycache__/SignalApp.cpython-37.pyc
View file @
663c6b9c
No preview for this file type
classes/__pycache__/SummaryFile.cpython-37.pyc
View file @
663c6b9c
No preview for this file type
codes/mmElem/matrixmul.conf
View file @
663c6b9c
...
...
@@ -17,9 +17,9 @@ faultModel = 0
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
#
injectionSite = RF
injectionSite
=
RF
injectionSite
=
INST_OUT
#
injectionSite = INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s
...
...
@@ -33,7 +33,7 @@ benchmarkBinary = /home/badia/mycarol-fi/codes/mmElem/matrixMul
benchmarkArgs
= -
wA
=
8192
-
hA
=
8192
-
hB
=
8192
-
wB
=
8192
# CSV output file. It will be overwrite at each injection
csvFile
=
codes
/
mmElem
/
fi_matrix_mul_single_bit
.
csv
csvFile
=
results
/
mElem_R_2_gpu1
.
csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript
=
codes
/
mmElem
/
sdc_check
.
sh
...
...
fault_injector.py
View file @
663c6b9c
...
...
@@ -29,12 +29,19 @@ if the string is always empty the column will be empty, otherwise it
will contain the returned values for each injection
"""
def
receiveSignal
(
signalNumber
,
frame
):
syncro
.
wait
()
try
:
syncro
.
wait
()
except
:
syncro
.
abort
()
print
(
"Breakpoint inicial fuera de tiempo"
)
print
(
"Alcanzado el breakpoint, y recibida la señal"
,
signalNumber
);
def
receiveEnd
(
signalNumber
,
frame
):
print
(
"Esperando sincronismo del final"
);
wait_finish
.
wait
()
try
:
wait_finish
.
wait
()
except
:
wait_finish
.
abort
()
print
(
"Hang timeout execution"
)
print
(
"Recibida la señal de final del programa"
,
signalNumber
);
def
user_defined_function
(
injection_output_path
):
...
...
@@ -64,7 +71,7 @@ def signal_handler(sig, frame):
os
.
system
(
"rm -f {}/bin/*"
.
format
(
current_path
))
for
th
in
gpus_threads
:
th
.
join
()
sys
.
exit
(
0
)
#
sys.exit(0)
"""
...
...
@@ -233,7 +240,7 @@ def check_sdcs_and_app_crash(logging, sdc_check_script, inj_output_path, inj_err
else
:
is_app_crash
[
1
]
=
is_app_crash
[
1
]
+
" "
+
carol_fi_signal
break
if
(
len
(
is_app_crash
)
>
0
):
if
(
not
is_app_crash
[
0
]
):
# Check if NVIDIA signals on output
is_masked
=
True
for
line
in
out_lines
:
...
...
@@ -241,7 +248,10 @@ def check_sdcs_and_app_crash(logging, sdc_check_script, inj_output_path, inj_err
#print("FAIL=="+line+"====")
is_sdc
=
True
is_masked
=
False
break
break
else
:
is_sdc
=
False
is_masked
=
False
# if is_app_crash[0]:
...
...
@@ -533,7 +543,7 @@ by sending a SIGINT signal to the application
def
fault_injection_by_signal
(
**
kwargs
):
# Global rows list
global
lock
,
exit_injector
global
lock
,
exit_injector
,
num_rounds
,
kill_strings
benchmark_binary
=
kwargs
.
get
(
'benchmark_binary'
)
kwargs
[
'signal_cmd'
]
=
"killall -2 {}"
.
format
(
os
.
path
.
basename
(
benchmark_binary
))
fault_models
=
kwargs
.
get
(
'fault_models'
)
...
...
@@ -548,7 +558,13 @@ def fault_injection_by_signal(**kwargs):
# Execute the fault injector for each one of the sections(apps) of the configuration file
for
fault_model
in
fault_models
:
# Execute iterations number of fault injection for a specific app
num_rounds
=
1
try
:
ret_profiler
=
cf
.
load_config_file
(
"tmpxxx_num_rounds.conf"
)
num_rounds
=
int
(
ret_profiler
.
get
(
'DEFAULT'
,
'Ocurrencias'
))
os
.
system
(
"rm tmpxxx_num_rounds.conf"
)
except
:
num_rounds
=
1
while
num_rounds
<=
iterations
:
if
exit_injector
:
return
...
...
@@ -595,12 +611,29 @@ def fault_injection_by_signal(**kwargs):
acc_fault_injeted
=
0
else
:
acc_fault_injected
+=
1
if
(
acc_fault_injected
==
(
max_fallos
/
2
)):
time
.
sleep
(
240
)
if
(
acc_fault_injected
==
(
max_fallos
/
2
)):
for
cmd
in
kill_strings
.
split
(
";"
):
os
.
system
(
cmd
+
" > /dev/null 2>&1"
)
for
th
in
gpus_threads
:
try
:
th
.
join
()
except
:
nulo
=
1
time
.
sleep
(
240
)
if
(
acc_fault_injected
==
max_fallos
):
break
exit_injector
=
True
for
cmd
in
kill_strings
.
split
(
";"
):
os
.
system
(
cmd
+
" > /dev/null 2>&1"
)
for
th
in
gpus_threads
:
try
:
th
.
join
()
except
:
nulo
=
1
f
=
open
(
"tmpxxx_num_rounds.conf"
,
"w"
)
f
.
write
(
"[DEFAULT]
\n
Ocurrencias = "
+
str
(
num_rounds
)
+
"
\n
"
)
f
.
close
()
pretty_print
(
header
=
header
,
row
=
row
)
...
...
@@ -657,7 +690,12 @@ def main():
fieldnames
=
[
'unique_id'
,
'register'
,
'iteration'
,
'fault_model'
,
'thread'
,
'block'
,
'old_value'
,
'new_value'
,
'inj_site'
,
'fault_successful'
,
'hang'
,
'crash'
,
'masked'
,
'sdc'
,
'Exception'
,
'time'
,
'inj_time_location'
,
'bits_flipped'
,
'instruction'
,
'user_defined'
]
summary_file
=
SummaryFile
(
filename
=
csv_file
,
fieldnames
=
fieldnames
,
mode
=
'w'
)
if
os
.
path
.
exists
(
"tmpxxx_num_rounds.conf"
):
mode
=
'a'
else
:
mode
=
'w'
summary_file
=
SummaryFile
(
filename
=
csv_file
,
fieldnames
=
fieldnames
,
mode
=
mode
)
#'w'
# Lock for summary file parallel
lock
=
Lock
()
...
...
@@ -718,13 +756,13 @@ def main():
fi_master_thread
=
Thread
(
target
=
fault_injection_by_signal
,
kwargs
=
kwargs
)
gpus_threads
.
append
(
fi_master_thread
)
ret
=
0
#
ret=0
for
thread
in
gpus_threads
:
thread
.
start
()
for
thread
in
gpus_threads
:
thread
.
join
()
#ret +=
thread.exit_code
#ret +=
acc_fault_injected
os
.
system
(
"rm -f {}/bin/*"
.
format
(
current_path
))
if
exit_injector
:
...
...
@@ -732,8 +770,11 @@ def main():
else
:
cf
.
printf
(
"Fault injection finished, results can be found in {}"
.
format
(
csv_file
))
########################################################################
return
ret
#if (iterations==num_rounds):
# sys.exit(0)
#else:
# sys.exit(1)
return
(
iterations
==
num_rounds
)
########################################################################
# Main #
########################################################################
...
...
flip_value.py
View file @
663c6b9c
...
...
@@ -40,10 +40,13 @@ def set_event(event):
# #os.system ("killall -2 python3")
elif
(
isinstance
(
event
,
gdb
.
SignalEvent
)):
try
:
# Just checking if it was hit
if
bit_flip
.
fault_injected
is
False
:
bit_flip
.
single_event
()
global_logging
.
info
(
"BIT FLIP SET ON SIGNAL {}"
.
format
(
event
.
stop_signal
))
#global_logging.info ("Enviado senal a "+ str(pid))
#os.system ("kill -s USR1 " + str(pid))
except
Exception
as
err
:
global_logging
.
exception
(
"EVENT DIFFERENT FROM STOP SIGNAL: {}"
.
format
(
str
(
err
)))
...
...
profiler_new.py
View file @
663c6b9c
...
...
@@ -101,11 +101,13 @@ def main():
#print (cadena[2].split(";"))
for
init_str
in
cadena
[
3
].
split
(
";"
):
gdb
.
execute
(
init_str
)
max
=
0.
max
i
=
0.
gdb
.
execute
(
"r"
)
if
(
onlycount
):
ks
=
cadena
[
2
].
split
(
","
,
1
)
max
=
numreg
(
ks
[
0
])
ks
=
cadena
[
2
].
split
(
","
,
1
)
print
(
ks
)
maxi
=
max
([
numreg
(
x
)
for
x
in
ks
])
print
(
"Maximo.."
+
str
(
maxi
))
else
:
if
(
section
):
gdb
.
execute
(
"c"
)
...
...
@@ -115,6 +117,6 @@ def main():
f
=
open
(
"tmpxxx_return_profiler.conf"
,
"w"
)
f
.
write
(
"[DEFAULT]
\n
Ocurrencias = "
+
str
(
ocurrencias
)
+
"
\n
Tiempo = "
+
str
(
trun
)
+
"
\n
"
)
f
.
close
()
sys
.
exit
(
max
)
sys
.
exit
(
max
i
)
main
()
res-new-ind-simple.sh
View file @
663c6b9c
#cut -d, -f 3,4,9,10,11,12,13,14,15 fi_lavaMD_single_bit_RF.csv
fi_field
=
10
model
=
$[$fi_field
- 1]
hang
=
$[$fi_field
+ 1]
crash
=
$[$hang
+ 1]
masked
=
$[$crash
+1
]
...
...
@@ -10,8 +11,19 @@ do
cab
=
$(
cut
-d
,
-f
$i
$1
|head
-1
)
trues
=
$(
cut
-d
,
-f
$i
$1
|grep
"True"
|wc
-l
)
echo
$cab
"="
$trues
campos[
$i
]=
$trues
done
campos[
$masked
]=
$[
campos[
$masked
]
-campos
[
$hang
]]
for
i
in
$hang
$crash
$masked
$sdc
do
tantoporcien
=
$(
printf
%.3f
"
$((
campos[
$i
]
*
10
**
5
/
campos[
$fi_field
]
))
e-3"
)
cab
=
$(
cut
-d
,
-f
$i
$1
|head
-1
)
echo
$cab
"(%)="
$tantoporcien
done
interrupciones
=
$(
grep
unique
$1
|
wc
-l
)
interrupciones
=
$[
$interrupciones
-1
]
echo
"Hang-restart-fi="
$interrupciones
echo
"Model="
$(
cut
-d
,
-f
$model
$1
|head
-2
|tail
-1
)
excep
=
$[$sdc
+1]
for
i
in
$(
seq
1 1 15
)
...
...
res-new-ind.sh
View file @
663c6b9c
#cut -d, -f 3,4,9,10,11,12,13,14,15 fi_lavaMD_single_bit_RF.csv
./res-new-ind-simple.sh
$1
reg
=
2
num_reg
=
0
;
for
i
in
$(
seq
0 1 255
)
do
ib
=
$(
echo
"R"
$i
)
n
=
$(
cut
-f
$reg
-d
,
$1
|grep
$ib
|wc
-l
)
sib
=
$(
echo
"R"
$i
","
)
n
=
$(
cut
-f
$reg
-d
,
$1
|grep
-w
$ib
|wc
-l
)
if
[
"
$n
"
-gt
0
]
;
then
echo
$ib
"="
$n
num_reg
=
$[$num_reg
+ 1]
fi
done
echo
"Registros usados="
$num_reg
for
i
in
$(
seq
0 1 255
)
do
ib
=
$(
echo
"R"
$i
)
n
=
$(
cut
-f
$reg
-d
,
$1
|grep
$ib
|wc
-l
)
sib
=
$(
echo
"R"
$i
","
)
n
=
$(
cut
-f
$reg
-d
,
$1
|grep
-w
$ib
|wc
-l
)
if
[
"
$n
"
-gt
0
]
;
then
echo
"============"
$ib
"========="
head
-1
$1
>
filetempreg.csv
grep
$ib
$1
>>
filetempreg.csv
grep
$
s
ib
$1
>>
filetempreg.csv
./res-new-ind-simple.sh filetempreg.csv
fi
done
res-new.sh
View file @
663c6b9c
cab
=
"Experimento"
for
j
in
"fault_successful"
"Model"
"hang(%)"
"crash(%)"
"masked(%)"
"sdc(%)"
"Hang-restart-fi"
"Registros"
do
cab
=
$(
echo
$cab
";"
$j
)
done
echo
$cab
>
$1
/resumen_total.csv
for
i
in
$(
ls
$1
|
grep
-v
-E
resumen_
*
)
do
echo
"res-new-ind.sh"
$1
/
$i
">"
$1
"/resumen_"
$i
./res-new-ind.sh
$1
/
$i
>
$1
/resumen_
$i
row
=
$(
echo
$i
)
for
j
in
"fault_successful"
"Model"
"hang(%)"
"crash(%)"
"masked(%)"
"sdc(%)"
"Hang-restart-fi"
"Registros"
do
campo
=
$(
grep
$j
$1
/resumen_
$i
|cut
-d
=
-f2
|head
-1
)
row
=
$(
echo
$row
";"
$campo
)
done
echo
$row
>>
$1
/resumen_total.csv
done
test.sh
View file @
663c6b9c
...
...
@@ -16,6 +16,12 @@ echo "Step 1 - Profiling the application for fault injection"
echo
"Step 2 - Running
${
FAULTS
}
on
${
CONFFILE
}
"
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
while
test
-f
"tmpxxx_num_rounds.conf"
do
cat
tmpxxx_num_rounds.conf
>>
tandas
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
echo
"==============================="
done
echo
"Fault injection finished"
...
...
test_hotspot.sh
View file @
663c6b9c
...
...
@@ -13,11 +13,12 @@ FAULTS=1000
CONFFILE
=
$DIR_RODINIA
/hotspot/hotspot.conf
echo
"Step 1 - Profiling the application for fault injection"
./app_profiler.py
-c
${
CONFFILE
}
$*
date
>>
comienzo
echo
"Step 2 - Running
${
FAULTS
}
on
${
CONFFILE
}
"
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
date
>>
final
echo
"Fault injection finished"
exit
0
test_mult.sh
View file @
663c6b9c
#!/bin/bash
trap echo
"No paro"
SIGINIT
set
-e
...
...
@@ -18,6 +19,13 @@ echo "Step 2 - Running ${FAULTS} on ${CONFFILE}"
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
echo
$?
while
test
-f
"tmpxxx_num_rounds.conf"
do
cat
tmpxxx_num_rounds.conf
>>
tandas
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
echo
"==============================="
done
echo
"Fault injection finished"
exit
0
test_mult_mBlock.sh
View file @
663c6b9c
...
...
@@ -16,7 +16,14 @@ echo "Step 1 - Profiling the application for fault injection"
echo
"Step 2 - Running
${
FAULTS
}
on
${
CONFFILE
}
"
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
echo
$?
while
test
-f
"tmpxxx_num_rounds.conf"
do
cat
tmpxxx_num_rounds.conf
>>
tandas
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
echo
"==============================="
done
echo
"Fault injection finished"
exit
0
test_nw.sh
View file @
663c6b9c
#!/bin/bash
trap echo
"No paro"
SIGINIT
set
-e
#uncomment to a more verbose script
...
...
@@ -18,6 +18,14 @@ echo "Step 1 - Profiling the application for fault injection"
echo
"Step 2 - Running
${
FAULTS
}
on
${
CONFFILE
}
"
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
while
test
-f
"tmpxxx_num_rounds.conf"
do
cat
tmpxxx_num_rounds.conf
>>
tandas
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
echo
"==============================="
done
echo
"Fault injection finished"
exit
0
test_rodinia.sh
View file @
663c6b9c
...
...
@@ -17,7 +17,12 @@ echo "Step 1 - Profiling the application for fault injection"
echo
"Step 2 - Running
${
FAULTS
}
on
${
CONFFILE
}
"
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
while
test
-f
"tmpxxx_num_rounds.conf"
do
cat
tmpxxx_num_rounds.conf
>>
tandas
./fault_injector.py
-i
${
FAULTS
}
-c
${
CONFFILE
}
-n
1
$*
echo
"==============================="
done
echo
"Fault injection finished"
exit
0
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment