Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
German Leon
Carol-fi
Commits
8621c3da
Commit
8621c3da
authored
Feb 11, 2021
by
German Leon
Browse files
Version inestable
parent
68e252bb
Changes
19
Show whitespace changes
Inline
Side-by-side
app_profiler.py
View file @
8621c3da
...
@@ -114,8 +114,8 @@ def main():
...
@@ -114,8 +114,8 @@ def main():
benchmark_binary
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary'
)
benchmark_binary
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary'
)
if
(
args
.
device
>
0
):
cp
.
LOGS_PATH
=
"{}
-{}-
{}"
.
format
(
cp
.
LOGS_PATH
,
args
.
device
,
benchmark_binary
.
split
(
'/'
)[
-
1
])
cp
.
LOGS_PATH
=
"{}
l_{}_d
{}"
.
format
(
cp
.
LOGS_PATH
,
benchmark_binary
.
split
(
'/'
)[
-
1
]
,
args
.
device
)
cp
.
rewrite_path
()
cp
.
rewrite_path
()
os
.
system
(
"rm -f {}"
.
format
(
cp
.
KERNEL_INFO_DIR
))
os
.
system
(
"rm -f {}"
.
format
(
cp
.
KERNEL_INFO_DIR
))
...
...
app_profiler_old.py
deleted
100755 → 0
View file @
68e252bb
#!/usr/bin/env python3
import
argparse
import
os
import
re
import
time
import
common_functions
as
cf
import
common_parameters
as
cp
def
generate_dict
(
sm_version
,
input_file_name
):
with
open
(
input_file_name
,
"r"
)
as
f
:
# dictionary to store the number of allocated registers per static
kernel_reg
=
{}
kernel_name
=
""
# temporary variable to store the kernel_name
check_for_register_count
=
False
# process the input file created by capturing the stderr while compiling the
# application using -Xptxas -v options
for
line
in
f
:
# for each line in the file
m
=
re
.
match
(
r
".*Compiling entry function.*'(\S+)'.*for.*'{}'.*"
.
format
(
sm_version
),
line
)
if
m
:
kernel_name
=
m
.
group
(
1
)
check_for_register_count
=
True
m
=
re
.
match
(
r
".*Used[ ]+(\d+)[ ]+registers.*"
,
line
)
if
check_for_register_count
and
m
:
reg_num
=
m
.
group
(
1
)
# extract register number
if
kernel_name
not
in
kernel_reg
:
# associate the extracted register number with the kernel name
kernel_reg
[
kernel_name
]
=
int
(
reg_num
.
strip
())
else
:
print
(
"Warning: {} exists in the kernel_reg dictionary. "
"Skipping this register count."
.
format
(
kernel_name
))
check_for_register_count
=
False
return
kernel_reg
"""
Function that calls the profiler based on the injection mode
"""
def
profiler_caller
(
gdb_exec
,
kernel
,
benchmark_binary
,
benchmark_args
):
acc_time
=
0
script
=
'{} -ex
\'
py arg0 = {}
\'
-n -batch -x {}'
benchmark_args_striped
=
benchmark_args
.
replace
(
'
\\
n'
,
''
).
replace
(
'
\\
'
,
''
)
print
(
"KERNEL"
+
kernel
)
init_string
=
'"file {}; set args {}"'
.
format
(
benchmark_binary
,
benchmark_args_striped
)
#init_string = '"file {}; set args {}; break {}"'.format(benchmark_binary, benchmark_args_striped,kernel)
profiler_cmd
=
script
.
format
(
gdb_exec
,
init_string
,
cp
.
PROFILER_SCRIPT
)
print
(
"Profiler caller"
)
if
cp
.
DEBUG
:
print
(
"PROFILER CMD: {}"
.
format
(
profiler_cmd
))
for
i
in
range
(
0
,
cp
.
MAX_TIMES_TO_PROFILE
):
start
=
time
.
time
()
os
.
system
(
profiler_cmd
)
end
=
time
.
time
()
acc_time
+=
end
-
start
cf
.
kill_all
(
"killall -9 {}; killall -9 {}"
.
format
(
os
.
path
.
basename
(
gdb_exec
),
os
.
path
.
basename
(
benchmark_binary
)))
return
acc_time
/
cp
.
MAX_TIMES_TO_PROFILE
"""
Function to generate the gold execution
"""
def
generate_gold
(
gdb_exec
,
benchmark_binary
,
benchmark_args
):
# Create tmp path and clean it if it exists
tmp_path
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
+
"/"
+
cp
.
LOGS_PATH
+
"/tmp"
if
not
os
.
path
.
exists
(
tmp_path
):
os
.
mkdir
(
tmp_path
)
os
.
system
(
"rm -rf "
+
tmp_path
+
"/*"
)
script
=
'{} -ex
\'
py arg0 = {}
\'
-n -batch -x {} > {} 2> {}'
init_string
=
'"file {}; set args {}"'
.
format
(
benchmark_binary
,
benchmark_args
)
profiler_cmd
=
script
.
format
(
gdb_exec
,
init_string
,
cp
.
PROFILER_SCRIPT
,
cp
.
GOLD_OUTPUT_PATH
,
cp
.
GOLD_ERR_PATH
)
if
cp
.
DEBUG
:
print
(
"PROFILER CMD: {}"
.
format
(
profiler_cmd
))
# Execute and save gold file
return
os
.
system
(
profiler_cmd
)
def
main
():
os
.
system
(
"rm -f {}"
.
format
(
cp
.
KERNEL_INFO_DIR
))
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-c'
,
'--conf'
,
dest
=
"config_file"
,
help
=
'Configuration file'
,
required
=
True
)
args
=
parser
.
parse_args
()
# Read the configuration file with data for all the apps that will be executed
conf
=
cf
.
load_config_file
(
args
.
config_file
)
# First set env vars
cf
.
set_python_env
()
########################################################################
# Profiler step
# Max time will be obtained by running
# it will also get app output for golden copy
# that is,
print
(
"###################################################
\n
1 - Profiling application"
)
if
'benchmarkBinary_noverificar'
in
conf
[
'DEFAULT'
]:
benchmark_binary
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary_noverificar'
)
else
:
benchmark_binary
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary'
)
if
'benchmarkArgs_noverificar'
in
conf
[
'DEFAULT'
]:
benchmark_args
=
conf
.
get
(
'DEFAULT'
,
'benchmarkArgs_noverificar'
)
else
:
benchmark_args
=
conf
.
get
(
'DEFAULT'
,
'benchmarkArgs'
)
gdb_exec
=
conf
.
get
(
"DEFAULT"
,
"gdbExecName"
)
kernel
=
conf
.
get
(
'DEFAULT'
,
'kernel'
)
max_time_app
=
profiler_caller
(
gdb_exec
=
gdb_exec
,
kernel
=
kernel
,
benchmark_binary
=
benchmark_binary
,
benchmark_args
=
benchmark_args
)
# saving gold
print
(
"Saving gold"
);
generate_gold_result
=
generate_gold
(
gdb_exec
=
gdb_exec
,
benchmark_binary
=
benchmark_binary
,
benchmark_args
=
benchmark_args
)
if
generate_gold_result
!=
0
:
raise
EnvironmentError
(
"Gold generation did not finish well, the fault injection will not work"
)
# Remove trash GDB info from the std output and the err output
cf
.
remove_useless_information_from_output
(
cp
.
GOLD_OUTPUT_PATH
)
cf
.
remove_useless_information_from_output
(
cp
.
GOLD_ERR_PATH
)
# Save the kernel configuration txt file
cf
.
save_file
(
file_path
=
cp
.
KERNEL_INFO_DIR
,
data
=
{
'max_time'
:
max_time_app
})
print
(
"1 - Profile finished
\n
###################################################"
)
if
__name__
==
'__main__'
:
main
()
classes/RunGDB.py
View file @
8621c3da
...
@@ -37,11 +37,14 @@ class RunGDB(Thread):
...
@@ -37,11 +37,14 @@ class RunGDB(Thread):
start_cmd
=
"{}/{}"
.
format
(
self
.
__base_path
,
self
.
__flip_script
)
start_cmd
=
"{}/{}"
.
format
(
self
.
__base_path
,
self
.
__flip_script
)
script
=
'env CUDA_VISIBLE_DEVICES={} {} -ex
\'
py arg0 = "{}"
\'
-n -batch -x {} > {} 2>{} &'
script
=
'env CUDA_VISIBLE_DEVICES={} {} -ex
\'
py arg0 = "{}"
\'
-n -batch -x {} > {} 2>{} &'
#script = 'env CUDA_VISIBLE_DEVICES=1 {} -ex \'py arg0 = "{}"\' -n -batch -x {} > {} 2>{} &'
#printf(script.format(self.__gpu_to_execute, self.__gdb_exe_name, self.__gdb_env_string,start_cmd, self.__inj_output_path,self.__inj_err_path))
#script = 'env CUDA_VISIBLE_DEVICES={} {} -ex \'py arg0 = "{}"\' -n -batch -x {} &'
os
.
system
(
script
.
format
(
self
.
__gpu_to_execute
,
self
.
__gdb_exe_name
,
self
.
__gdb_env_string
,
os
.
system
(
script
.
format
(
self
.
__gpu_to_execute
,
self
.
__gdb_exe_name
,
self
.
__gdb_env_string
,
start_cmd
,
self
.
__inj_output_path
,
start_cmd
,
self
.
__inj_output_path
,
self
.
__inj_err_path
))
self
.
__inj_err_path
))
#os.system(script.format(self.__gpu_to_execute, self.__gdb_exe_name, self.__gdb_env_string,start_cmd))
print
(
script
.
format
(
self
.
__gpu_to_execute
,
self
.
__gdb_exe_name
,
self
.
__gdb_env_string
,
print
(
script
.
format
(
self
.
__gpu_to_execute
,
self
.
__gdb_exe_name
,
self
.
__gdb_env_string
,
start_cmd
,
self
.
__inj_output_path
,
start_cmd
,
self
.
__inj_output_path
,
self
.
__inj_err_path
))
self
.
__inj_err_path
))
...
...
classes/SignalApp.py
View file @
8621c3da
...
@@ -13,11 +13,11 @@ Signal the app to stop so GDB can execute the script to flip a value
...
@@ -13,11 +13,11 @@ Signal the app to stop so GDB can execute the script to flip a value
class
SignalApp
(
Thread
):
class
SignalApp
(
Thread
):
def
__init__
(
self
,
signal_cmd
,
max_wait_time
,
log_path
,
unique_id
,
signals_to_send
,
init_sleep
,
syncro
,
waitfinish
):
def
__init__
(
self
,
file_connection
,
max_wait_time
,
log_path
,
unique_id
,
signals_to_send
,
init_sleep
,
syncro
,
waitfinish
):
global
hang
global
crashsystem
,
hang
hang
=
False
hang
=
False
super
(
SignalApp
,
self
).
__init__
()
super
(
SignalApp
,
self
).
__init__
()
self
.
__
signal_cmd
=
signal_cmd
self
.
__
file_connection
=
file_connection
os
.
system
(
"rm -f {}"
.
format
(
log_path
))
os
.
system
(
"rm -f {}"
.
format
(
log_path
))
self
.
__log
=
Logging
(
log_file
=
log_path
,
unique_id
=
unique_id
)
self
.
__log
=
Logging
(
log_file
=
log_path
,
unique_id
=
unique_id
)
...
@@ -32,23 +32,11 @@ class SignalApp(Thread):
...
@@ -32,23 +32,11 @@ class SignalApp(Thread):
self
.
_waitfinish
=
waitfinish
self
.
_waitfinish
=
waitfinish
def
run
(
self
):
def
run
(
self
):
# Send a series of signal to make sure gdb will flip a value in one of the interrupt signals
# Send a series of signal to make sure gdb will flip a value in one of the interrupt signals
log_string
=
"Sending a signal using command: {} after {}s and each {}s."
.
format
(
self
.
__signal_cmd
,
self
.
__init_wait_time
,
self
.
__time_to_sleep
)
#
log_string = "Sending a signal using command: {} after {}s and each {}s.".format(self.__signal_cmd, self.__init_wait_time,self.__time_to_sleep)
log_string
=
"Sending a signal each {}s of {} times."
.
format
(
self
.
__time_to_sleep
,
self
.
__signals_to_send
)
if
cp
.
DEBUG
:
if
cp
.
DEBUG
:
self
.
__log
.
info
(
log_string
)
self
.
__log
.
info
(
log_string
)
crashsystem
=
False
# Sleep for a random time
# time.sleep(self.__init_wait_time)
#a=[ uniform.randint(1,1000) for _ in range(self.__signals_to_send))]
#a.sort()
#os.system(self.__signal_cmd)
#for line in os.popen(self.__signal_cmd):
# pid=int(line)
#os.kill(int(self.__signal_cmd),signal.SIGINT)
self
.
__log
.
info
(
log_string
)
try
:
try
:
(
self
.
_syncro
).
wait
()
(
self
.
_syncro
).
wait
()
#except threading.BrokenBarrierError:
#except threading.BrokenBarrierError:
...
@@ -57,31 +45,51 @@ class SignalApp(Thread):
...
@@ -57,31 +45,51 @@ class SignalApp(Thread):
self
.
__log
.
info
(
"Breakpoint inicial fuera de tiempo"
)
self
.
__log
.
info
(
"Breakpoint inicial fuera de tiempo"
)
#(self._waitfinish).wait()
#(self._waitfinish).wait()
(
self
.
_syncro
).
reset
()
(
self
.
_syncro
).
reset
()
hang
=
True
crashsystem
=
True
self
.
__log
.
info
(
"Timeout syncron of breakpoint
\n
"
)
return
pid
=
(
os
.
popen
(
self
.
__signal_cmd
)).
read
()
#self.__log.info("Timeout syncron of breakpoint\n")
pid
=
int
(
pid
.
split
(
'
\n
'
)[
0
])
pidf
=
open
(
self
.
__file_connection
,
"r"
)
pid
=
int
(
pidf
.
read
())
pidf
.
close
()
#os.remove(self.__file_connection)
#os.system(self.__signal_cmd)
#pidf = (os.popen(self.__signal_cmd))
#print("Comando {} y resultado {} de tipo {}".format(self.__signal_cmd,pid, type (pid)) )
#print("Comando {} y resultado {} de tipo {}".format(self.__signal_cmd,pid, type (pid)) )
#pid=int(pid.split('\n')[0])
#pidf.close()
print
(
" resultado pid {} de tipo {}"
.
format
(
pid
,
type
(
pid
))
)
# Time random
# Time random
#print ("INIT:"+str(self.__init_wait_time)+"sleep"+str())
#print ("INIT:"+str(self.__init_wait_time)+"sleep"+str())
time
.
sleep
(
self
.
__init_wait_time
)
time
.
sleep
(
self
.
__init_wait_time
)
crash
=
False
for
signals
in
range
(
0
,
self
.
__signals_to_send
):
for
signals
in
range
(
0
,
self
.
__signals_to_send
):
#os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
#os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
try
:
try
:
os
.
kill
(
pid
,
signal
.
SIGINT
)
os
.
kill
(
pid
,
signal
.
SIGINT
)
self
.
__log
.
info
(
"sending signal {}"
.
format
(
signals
))
except
:
except
:
self
.
__log
.
info
(
"Process is dead"
)
self
.
__log
.
info
(
"Process is dead. Crash?"
)
os
.
kill
(
pid
,
signal
.
SIGKILL
)
(
self
.
_waitfinish
).
abort
()
(
self
.
_waitfinish
).
reset
()
(
self
.
_syncro
).
abort
()
(
self
.
_syncro
).
reset
()
crash
=
True
break
break
self
.
__log
.
info
(
"sending signal {}"
.
format
(
signals
))
try
:
try
:
(
self
.
_syncro
).
wait
()
(
self
.
_syncro
).
wait
()
except
:
except
:
(
self
.
_syncro
).
abort
()
(
self
.
_syncro
).
abort
()
break
#
break
#print("Breakpoint fuera de tiempo")
#print("Breakpoint fuera de tiempo")
(
self
.
_syncro
).
reset
()
(
self
.
_syncro
).
reset
()
time
.
sleep
(
self
.
__time_to_sleep
)
time
.
sleep
(
self
.
__time_to_sleep
)
#(self._syncro).reset()
#(self._syncro).reset()
if
not
crash
:
try
:
try
:
(
self
.
_waitfinish
).
wait
()
(
self
.
_waitfinish
).
wait
()
#except threading.BrokenBarrierError:
#except threading.BrokenBarrierError:
...
...
codes/hotspot/hotspot.conf
View file @
8621c3da
...
@@ -17,8 +17,8 @@ faultModel = 0
...
@@ -17,8 +17,8 @@ faultModel = 0
# RF -> Register File
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite
=
RF
injectionSite
=
RF
# Max time factor to finish the app, this will be multiplied by the application running time
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s
# Considered as a crash is 10s
...
...
codes/lavaMD/lavaMD.conf
View file @
8621c3da
...
@@ -30,7 +30,7 @@ maxWaitTimes = 5
...
@@ -30,7 +30,7 @@ maxWaitTimes = 5
benchmarkBinary
= /
home
/
badia
/
mycarol
-
fi
/
codes
/
lavaMD
/
lavaMD
benchmarkBinary
= /
home
/
badia
/
mycarol
-
fi
/
codes
/
lavaMD
/
lavaMD
# Commands to set the session inside GDB environment
# Commands to set the session inside GDB environment
benchmarkArgs
= -
boxes1d
4
0
benchmarkArgs
= -
boxes1d
6
0
# CSV output file. It will be overwrite at each injection
# CSV output file. It will be overwrite at each injection
csvFile
=
results
/
lavaMD_IO
.
csv
csvFile
=
results
/
lavaMD_IO
.
csv
...
...
codes/lavaMD/main.c
View file @
8621c3da
...
@@ -313,8 +313,10 @@ int correct=true;
...
@@ -313,8 +313,10 @@ int correct=true;
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[i][3],fabs(fv_cpu[i].z-golden[i][3]) ,semicorrect );
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[i][3],fabs(fv_cpu[i].z-golden[i][3]) ,semicorrect );
if
(
!
semicorrect
)
{
if
(
!
semicorrect
)
{
printf
(
"Error! En la componete %05d
\n
"
,
i
//printf("Error! En la componete %05d \n", i
);
//);
//
int
basura
=
0
;
#pragma omp critical
#pragma omp critical
{
{
correct
=
false
;
correct
=
false
;
...
...
codes/lavaMD/main_h.c
View file @
8621c3da
...
@@ -313,8 +313,8 @@ int correct=true;
...
@@ -313,8 +313,8 @@ int correct=true;
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[i][3],fabs(fv_cpu[i].z-golden[i][3]) ,semicorrect );
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[i][3],fabs(fv_cpu[i].z-golden[i][3]) ,semicorrect );
if
(
!
semicorrect
)
{
if
(
!
semicorrect
)
{
printf
(
"Error! En la componete %05d
\n
"
,
i
//
printf("Error! En la componete %05d \n", i
);
//
);
#pragma omp critical
#pragma omp critical
{
{
correct
=
false
;
correct
=
false
;
...
...
codes/lavaMD/main_intento.c
View file @
8621c3da
...
@@ -319,7 +319,7 @@ int correct=true;
...
@@ -319,7 +319,7 @@ int correct=true;
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[3],fabs(fv_cpu[i].z-golden[3]) ,semicorrect );
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[3],fabs(fv_cpu[i].z-golden[3]) ,semicorrect );
if
(
!
semicorrect
)
{
if
(
!
semicorrect
)
{
printf
(
"Error! En la componete %05d
\n
"
,
i
//
printf("Error! En la componete %05d \n", i
);
);
#pragma omp critical
#pragma omp critical
{
{
...
...
codes/lud/lud.conf
View file @
8621c3da
...
@@ -17,7 +17,7 @@ faultModel = 0
...
@@ -17,7 +17,7 @@ faultModel = 0
# RF -> Register File
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite
=
RF
injectionSite
=
INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
...
@@ -26,7 +26,7 @@ maxWaitTimes = 5
...
@@ -26,7 +26,7 @@ maxWaitTimes = 5
# binary file of the application
# binary file of the application
# Must be full path
# Must be full path
benchmarkBinary
= /
home
/
badia
/
rodinia_3
.
1
/
cuda
/
lud
/
cuda
/
lud_cuda
benchmarkBinary
= /
home
/
badia
/
mycarol
-
fi
/
codes
/
lud
/
cuda
/
lud_cuda
#benchmarkBinary_noverificar = /home/badia/rodinia_3.1/cuda/lavaMD/lavaMD
#benchmarkBinary_noverificar = /home/badia/rodinia_3.1/cuda/lavaMD/lavaMD
...
@@ -36,10 +36,10 @@ benchmarkArgs = -s 10000 -v
...
@@ -36,10 +36,10 @@ benchmarkArgs = -s 10000 -v
benchmarkArgs_noverificar
= -
s
10000
benchmarkArgs_noverificar
= -
s
10000
# CSV output file. It will be overwrite at each injection
# CSV output file. It will be overwrite at each injection
csvFile
=
results
/
lud_
RF
.
csv
csvFile
=
results
/
lud_
IO
.
csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript
=
../
rodinia_3
.
1
/
cuda
/
lud
/
sdc_check
.
sh
goldenCheckScript
=
codes
/
lud
/
sdc_check
.
sh
# Number of signals that will be sent to the application
# Number of signals that will be sent to the application
seqSignals
=
20
seqSignals
=
20
...
...
codes/mmElem/matrixmul.conf
View file @
8621c3da
...
@@ -17,9 +17,9 @@ faultModel = 0
...
@@ -17,9 +17,9 @@ faultModel = 0
# RF -> Register File
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite
=
RF
#
injectionSite = RF
#
injectionSite = INST_OUT
injectionSite
=
INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s
# Considered as a crash is 10s
...
...
codes/nw/nw.conf
View file @
8621c3da
...
@@ -17,8 +17,9 @@ faultModel = 0
...
@@ -17,8 +17,9 @@ faultModel = 0
# RF -> Register File
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite
=
RF
#
injectionSite = RF
injectionSite
=
INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s
# Considered as a crash is 10s
...
@@ -37,7 +38,7 @@ benchmarkArgs = 32000 1
...
@@ -37,7 +38,7 @@ benchmarkArgs = 32000 1
#benchmarkArgs_noverificar = -s 2048
#benchmarkArgs_noverificar = -s 2048
# CSV output file. It will be overwrite at each injection
# CSV output file. It will be overwrite at each injection
csvFile
=
results
/
nw_
RF
.
csv
csvFile
=
results
/
nw_
IO
.
csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript
=
codes
/
nw
/
sdc_check
.
sh
goldenCheckScript
=
codes
/
nw
/
sdc_check
.
sh
...
...
common_parameters.py
View file @
8621c3da
import
signal
import
signal
import
os
# Max size of register
# Max size of register
SINGLE_MAX_SIZE_REGISTER
=
32
SINGLE_MAX_SIZE_REGISTER
=
32
...
@@ -8,7 +9,9 @@ SINGLE_MAX_SIZE_REGISTER = 32
...
@@ -8,7 +9,9 @@ SINGLE_MAX_SIZE_REGISTER = 32
MAX_TIMES_TO_PROFILE
=
2
MAX_TIMES_TO_PROFILE
=
2
# Log path to store all injections info
# Log path to store all injections info
LOGS_PATH
=
'logs'
LOGS_PATH
=
'logs/'
FILE_PID_PIF
=
"/pid_program_injected"
# Internal python scripts
# Internal python scripts
...
@@ -102,7 +105,7 @@ FOCUS_ERROR_STRING = "Focus not set on any active CUDA kernel."
...
@@ -102,7 +105,7 @@ FOCUS_ERROR_STRING = "Focus not set on any active CUDA kernel."
def
rewrite_path
():
def
rewrite_path
():
# Temporary file to store kernel information
# Temporary file to store kernel information
global
KERNEL_INFO_DIR
,
GOLD_ERR_PATH
,
GOLD_OUTPUT_PATH
,
INJ_OUTPUT_PATH
,
INJ_ERR_PATH
,
DIFF_LOG
,
DIFF_ERR_LOG
,
SIGNAL_APP_LOG
,
LOG_DEFAULT_NAME
global
KERNEL_INFO_DIR
,
GOLD_ERR_PATH
,
GOLD_OUTPUT_PATH
,
INJ_OUTPUT_PATH
,
INJ_ERR_PATH
,
DIFF_LOG
,
DIFF_ERR_LOG
,
SIGNAL_APP_LOG
,
LOG_DEFAULT_NAME
,
FILE_PID_PIF
KERNEL_INFO_DIR
=
LOGS_PATH
+
'/tmp/carol-fi-kernel-info.txt'
KERNEL_INFO_DIR
=
LOGS_PATH
+
'/tmp/carol-fi-kernel-info.txt'
# For golden generation
# For golden generation
GOLD_ERR_PATH
=
LOGS_PATH
+
'/tmp/carol_fi_golden_bench_err.txt'
GOLD_ERR_PATH
=
LOGS_PATH
+
'/tmp/carol_fi_golden_bench_err.txt'
...
@@ -113,13 +116,14 @@ def rewrite_path():
...
@@ -113,13 +116,14 @@ def rewrite_path():
INJ_ERR_PATH
=
LOGS_PATH
+
'/tmp/carol_fi_inj_bench_err_{}.txt'
INJ_ERR_PATH
=
LOGS_PATH
+
'/tmp/carol_fi_inj_bench_err_{}.txt'
# Temporary difference logs
# Temporary difference logs
DIFF_LOG
=
LOGS_PATH
+
'/tmp/diff_{}.log'
DIFF_LOG
=
LOGS_PATH
+
'/tmp/diff_{}.log'
DIFF_ERR_LOG
=
LOGS_PATH
+
'/tmp/diff_err_{}.log'
DIFF_ERR_LOG
=
LOGS_PATH
+
'/tmp/diff_err_{}.log'
SIGNAL_APP_LOG
=
LOGS_PATH
+
'/tmp/signal_app_thread_{}.txt'
SIGNAL_APP_LOG
=
LOGS_PATH
+
'/tmp/signal_app_thread_{}.txt'
PATH_CURRENT
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
PATH_TMP
=
PATH_CURRENT
+
"/"
+
LOGS_PATH
+
"/tmp"
FILE_PID_PIF
=
PATH_TMP
+
"/pid_program_injected"
# Common body of log filename
# Common body of log filename
...
...
fault_injector.py
View file @
8621c3da
...
@@ -75,7 +75,7 @@ def signal_handler(sig, frame):
...
@@ -75,7 +75,7 @@ def signal_handler(sig, frame):
print
(
"Current_path "
+
current_path
)
print
(
"Current_path "
+
current_path
)
for
th
in
gpus_threads
:
for
th
in
gpus_threads
:
th
.
join
()
th
.
join
()
#
sys.exit(0)
sys
.
exit
(
0
)
"""
"""
...
@@ -384,15 +384,16 @@ def gdb_inject_fault(**kwargs):
...
@@ -384,15 +384,16 @@ def gdb_inject_fault(**kwargs):
#init_wait_time = uniform(0, end_time * cp.MAX_SIGNAL_BEFORE_ENDING)
#init_wait_time = uniform(0, end_time * cp.MAX_SIGNAL_BEFORE_ENDING)
#time_to_sleep = (max_wait_time - self.__init_wait_time) / seq_signals
#time_to_sleep = (max_wait_time - self.__init_wait_time) / seq_signals
# Generate configuration file for specific test
# Generate configuration file for specific test
gdb_env_string
=
"{}|{}|{}|{}|{}|{}|file {}; set args {}|{}"
.
format
(
gdb_kernel
,
os
.
getpid
(),
maxregs
,
","
.
join
(
str
(
i
)
for
i
in
bits_to_flip
),
fault_model
,
gdb_env_string
=
" {}|{}|{}|{}|{}|{}|{}|file {}; set args {}|{}"
.
format
(
gdb_kernel
,
os
.
getpid
(),
maxregs
,
cp
.
FILE_PID_PIF
,
","
.
join
(
str
(
i
)
for
i
in
bits_to_flip
),
fault_model
,
flip_log_file
,
benchmark_binary
,
benchmark_args
,
injection_site
)
flip_log_file
,
benchmark_binary
,
benchmark_args
,
injection_site
)
if
cp
.
DEBUG
:
if
cp
.
DEBUG
:
cf
.
printf
(
"THREAD {} ENV GENERATE FINISHED"
.
format
(
host_thread
))
cf
.
printf
(
"THREAD {} ENV GENERATE FINISHED"
.
format
(
host_thread
))
# First we have to start the SignalApp thread
# First we have to start the SignalApp thread
signal_app_thread
=
SignalApp
(
max_wait_time
=
end_time
,
signal_cmd
=
signal_cmd
,
signal_app_thread
=
SignalApp
(
max_wait_time
=
end_time
,
file_connection
=
cp
.
FILE_PID_PIF
,
log_path
=
signal_app_log
,
unique_id
=
unique_id
,
log_path
=
signal_app_log
,
unique_id
=
unique_id
,
signals_to_send
=
seq_signals
,
signals_to_send
=
seq_signals
,
init_sleep
=
init_sleep
,
syncro
=
syncro
,
waitfinish
=
wait_finish
)
init_sleep
=
init_sleep
,
syncro
=
syncro
,
waitfinish
=
wait_finish
)
...
@@ -556,7 +557,7 @@ by sending a SIGINT signal to the application
...
@@ -556,7 +557,7 @@ by sending a SIGINT signal to the application
def
fault_injection_by_signal
(
**
kwargs
):
def
fault_injection_by_signal
(
**
kwargs
):
# Global rows list
# Global rows list
global
lock
,
exit_injector
,
num_rounds
,
kill_strings
global
lock
,
exit_injector
,
num_rounds
,
kill_strings
,
crashsystem
benchmark_binary
=
kwargs
.
get
(
'benchmark_binary'
)
benchmark_binary
=
kwargs
.
get
(
'benchmark_binary'
)
#kwargs['signal_cmd'] = "killall -2 {}".format(os.path.basename(benchmark_binary))
#kwargs['signal_cmd'] = "killall -2 {}".format(os.path.basename(benchmark_binary))
kwargs
[
'signal_cmd'
]
=
"pgrep {}"
.
format
(
os
.
path
.
basename
(
benchmark_binary
))
kwargs
[
'signal_cmd'
]
=
"pgrep {}"
.
format
(
os
.
path
.
basename
(
benchmark_binary
))
...
@@ -566,19 +567,23 @@ def fault_injection_by_signal(**kwargs):
...
@@ -566,19 +567,23 @@ def fault_injection_by_signal(**kwargs):
injection_site
=
kwargs
.
get
(
'injection_site'
)
injection_site
=
kwargs
.
get
(
'injection_site'
)
summary_file
=
kwargs
.
get
(
'summary_file'
)
summary_file
=
kwargs
.
get
(
'summary_file'
)
header
=
kwargs
.
get
(
'header'
)
header
=
kwargs
.
get
(
'header'
)
max_fallos
=
10
max_fallos
=
5
acc_fault_injected
=
0
acc_fault_injected
=
0
cf
.
printf
(
"-----------------------------------------------------------------------------------------------"
)
cf
.
printf
(
"-----------------------------------------------------------------------------------------------"
)
# Execute the fault injector for each one of the sections(apps) of the configuration file
# Execute the fault injector for each one of the sections(apps) of the configuration file
for
fault_model
in
fault_models
:
for
fault_model
in
fault_models
:
# Execute iterations number of fault injection for a specific app
# Execute iterations number of fault injection for a specific app
print
(
"================"
)
try
:
try
:
print
(
num_rounds
)
ret_profiler
=
cf
.
load_config_file
(
"tmpxxx_num_rounds.conf"
)
ret_profiler
=
cf
.
load_config_file
(
"tmpxxx_num_rounds.conf"
)
num_rounds
=
int
(
ret_profiler
.
get
(
'DEFAULT'
,
'Ocurrencias'
))
num_rounds
=
int
(
ret_profiler
.
get
(
'DEFAULT'
,
'Ocurrencias'
))
print
(
num_rounds
)
os
.
system
(
"rm tmpxxx_num_rounds.conf"
)
os
.
system
(
"rm tmpxxx_num_rounds.conf"
)
except
:
except
:
num_rounds
=
1
num_rounds
=
1
print
(
num_rounds
)
print
(
"================"
)
while
num_rounds
<=
iterations
:
while
num_rounds
<=
iterations
:
if
exit_injector
:
if
exit_injector
:
return
return
...
@@ -625,16 +630,6 @@ def fault_injection_by_signal(**kwargs):
...
@@ -625,16 +630,6 @@ def fault_injection_by_signal(**kwargs):
acc_fault_injeted
=
0
acc_fault_injeted
=
0
else
:
else
:
acc_fault_injected
+=
1
acc_fault_injected
+=
1
if
(
acc_fault_injected
==
(
max_fallos
/
2
)):
for
cmd
in
kill_strings
.
split
(
";"
):
os
.
system
(
cmd
+
" > /dev/null 2>&1"
)
for
th
in
gpus_threads
:
try
:
th
.
join
()
except
:
nulo
=
1
time
.
sleep
(
240
)
if
(
acc_fault_injected
==
max_fallos
):
if
(
acc_fault_injected
==
max_fallos
):
exit_injector
=
True
exit_injector
=
True
for
cmd
in
kill_strings
.
split
(
";"
):
for
cmd
in
kill_strings
.
split
(
";"
):
...
@@ -685,9 +680,12 @@ def main():
...
@@ -685,9 +680,12 @@ def main():
# Read the configuration file with data for all the apps that will be executed
# Read the configuration file with data for all the apps that will be executed
conf
=
cf
.
load_config_file
(
args
.
config_file
)
conf
=
cf
.
load_config_file
(
args
.
config_file
)
benchmark_binary_default
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary'
)
benchmark_binary_default
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary'
)
if
(
args
.
device
>
0
):
cp
.
LOGS_PATH
=
"{}
-{}-
{}"
.
format
(
cp
.
LOGS_PATH
,
args
.
device
,
benchmark_binary_default
.
split
(
'/'
)[
-
1
])
cp
.
LOGS_PATH
=
"{}
l_{}_d
{}"
.
format
(
cp
.
LOGS_PATH
,
benchmark_binary_default
.
split
(
'/'
)[
-
1
]
,
args
.
device
)
cp
.
rewrite_path
()
cp
.
rewrite_path
()
cf
.
printf
(
cp
.
FILE_PID_PIF
)
# Connect signal SIGINT to stop the fault injector
# Connect signal SIGINT to stop the fault injector
kill_strings
=
""
kill_strings
=
""
signal
.
signal
(
signal
.
SIGINT
,
signal_handler
)
signal
.
signal
(
signal
.
SIGINT
,
signal_handler
)
...
@@ -768,7 +766,8 @@ def main():
...
@@ -768,7 +766,8 @@ def main():
'summary_file'
:
summary_file
,
'summary_file'
:
summary_file
,
'header'
:
fieldnames
'header'
:
fieldnames
}
}
syncro
=
threading
.
Barrier
(
2
,
timeout
=
kwargs
.
get
(
'max_time'
)
)
#syncro = threading.Barrier(2, timeout=5*kwargs.get('max_time') )
syncro
=
threading
.
Barrier
(
2
,
timeout
=
80
)
wait_finish
=
threading
.
Barrier
(
2
,
timeout
=
kwargs
.
get
(
'max_time'
))
wait_finish
=
threading
.
Barrier
(
2
,
timeout
=
kwargs
.
get
(
'max_time'
))
kill_strings
+=
"killall -9 {};killall -9 {};"
.
format
(
os
.
path
.
basename
(
benchmark_binary
),
os
.
path
.
basename
(
gdb
))
kill_strings
+=
"killall -9 {};killall -9 {};"
.
format
(
os
.
path
.
basename
(
benchmark_binary
),
os
.
path
.
basename
(
gdb
))
...
...
flip_value.py
View file @
8621c3da
...
@@ -4,7 +4,7 @@ import time
...
@@ -4,7 +4,7 @@ import time
from
classes.BitFlip
import
BitFlip
from
classes.BitFlip
import
BitFlip
from
classes.Logging
import
Logging
from
classes.Logging
import
Logging
import
common_parameters
as
cp
import
common_parameters
as
cp
import
common_functions
as
cf
# All common functions will be at common_functions module
"""
"""
Handler attached to exit event
Handler attached to exit event
"""
"""
...
@@ -30,7 +30,7 @@ signal
...
@@ -30,7 +30,7 @@ signal
def
set_event
(
event
):
def
set_event
(
event
):
# Accessing global vars
# Accessing global vars
global
global_logging
,
was_hit
,
bit_flip
,
bp
,
t
global
global_logging
,
was_hit
,
bit_flip
,
bp
,
t
,
primero
if
(
isinstance
(
event
,
gdb
.
BreakpointEvent
)):
if
(
isinstance
(
event
,
gdb
.
BreakpointEvent
)):
global_logging
.
info
(
"Before breakpoint"
+
str
(
time
.
clock
()
-
t
))
global_logging
.
info
(
"Before breakpoint"
+
str
(
time
.
clock
()
-
t
))
global_logging
.
info
(
"Enviado senal a "
+
str
(
pid
))
global_logging
.
info
(
"Enviado senal a "
+
str
(
pid
))
...
@@ -71,12 +71,11 @@ def main():
...
@@ -71,12 +71,11 @@ def main():
# Connecting to a exit handler event
# Connecting to a exit handler event
gdb
.
events
.
exited
.
connect
(
exit_handler
)
gdb
.
events
.
exited
.
connect
(
exit_handler
)
# Connecting to a stop signal event
gdb
.
events
.
stop
.
connect
(
set_event
)
# Get variables values from environment
# Get variables values from environment
# Firsn parse line
# Firsn parse line
[
kernel
,
pid
,
max_regs
,
bits_to_flip
,
fault_model
,
flip_log_file
,
[
kernel
,
pid
,
max_regs
,
file_connect
,
bits_to_flip
,
fault_model
,
flip_log_file
,
gdb_init_strings
,
injection_site
]
=
arg0
.
split
(
'|'
)
gdb_init_strings
,
injection_site
]
=
arg0
.
split
(
'|'
)
...
@@ -100,13 +99,26 @@ def main():
...
@@ -100,13 +99,26 @@ def main():
# Start app execution
# Start app execution
t
=
time
.
clock
();
t
=
time
.
clock
();
gdb
.
Breakpoint
(
'main'
)
#gdb.execute("break "+kernel)
#gdb.execute("break "+kernel)
bp
=
gdb
.
Breakpoint
(
kernel
)
bp
=
gdb
.
Breakpoint
(
kernel
)
global_logging
.
info
(
"Put Break "
+
str
(
time
.
clock
()
-
t
))
global_logging
.
info
(
"Put Break "
+
str
(
time
.
clock
()
-
t
))
gdb
.
execute
(
"r"
)
gdb
.
execute
(
"r"
)
try
:
pid_bench
=
gdb
.
execute
(
"info proc"
,
to_string
=
True
).
splitlines
()[
0
].
split
(
' '
)[
1
]
except
:
global_logging
.
info
(
"problema solictando info proc"
)
global_logging
.
info
(
"PID: {}"
.
format
(
pid_bench
))
fp
=
open
(
file_connect
,
"w"
)
fp
.
write
(
pid_bench
)
fp
.
close
()
# Connecting to a stop signal event
gdb
.
events
.
stop
.
connect
(
set_event
)
print
(
"4"
)
i
=
0
i
=
0
try
:
try
:
while
'The program'
not
in
gdb
.
execute
(
'c'
,
to_string
=
True
):
while
'The program'
not
in
gdb
.
execute
(
'c'
,
to_string
=
True
):
...
@@ -120,7 +132,7 @@ def main():
...
@@ -120,7 +132,7 @@ def main():
if
'Failed'
in
err_str
:
if
'Failed'
in
err_str
:
gdb
.
execute
(
'quit'
)
gdb
.
execute
(
'quit'
)
global_logging
.
exception
(
"QUIT REQUIRED"
)
global_logging
.
exception
(
"QUIT REQUIRED"
)
print
(
"6"
)
# Call main execution
# Call main execution
global_logging
=
None
global_logging
=
None
...
...
prueba.py
View file @
8621c3da
...
@@ -130,13 +130,16 @@ def main():
...
@@ -130,13 +130,16 @@ def main():
gdb
.
execute
(
"file codes/mmElem/matrixMul"
)
gdb
.
execute
(
"file codes/mmElem/matrixMul"
)
gdb
.
execute
(
"set arg -wA=16384 -hA=16384 -hB=16384 -wB=16384"
)
gdb
.
execute
(
"set arg -wA=16384 -hA=16384 -hB=16384 -wB=16384"
)
#gdb.execute("set cuda break_on_launch application")
#gdb.execute("set cuda break_on_launch application")
# bp=gdb.Breakpoint('matrixMulCUDA')
bm
=
gdb
.
Breakpoint
(
'main'
)
bp
=
gdb
.
Breakpoint
(
'matrixMulCUDA'
)
#gdb.execute('handle SIGALRM stop')
#gdb.execute('handle SIGALRM stop')
#gdb.execute('handle SIGALRM ignore')
#gdb.execute('handle SIGALRM ignore')
#gdb.execute('handle SIGALRM')
#gdb.execute('handle SIGALRM')
gdb
.
execute
(
'catch signal SIGALRM'
)
#
gdb.execute('catch signal SIGALRM')
gdb
.
execute
(
'r'
)
gdb
.
execute
(
'r'
)
#selectrd()
#selectrd()
a
=
gdb
.
execute
(
"info proc"
,
to_string
=
True
).
splitlines
()[
0
].
split
(
' '
)[
1
]
print
(
"Process:{}"
.
format
(
a
))
print
(
"1"
)
print
(
"1"
)
gdb
.
execute
(
"c"
)
gdb
.
execute
(
"c"
)
print
(
"1"
)
print
(
"1"
)
...
...
test.sh
View file @
8621c3da
...
@@ -5,7 +5,7 @@ set -e
...
@@ -5,7 +5,7 @@ set -e
#uncomment to a more verbose script
#uncomment to a more verbose script
#set -x
#set -x
FAULTS
=
10
FAULTS
=
10
00
#CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/mmElem/matrixmul_16K.conf
#CONFFILE=codes/mmElem/matrixmul_16K.conf
...
...
test_hotspot.sh
View file @
8621c3da
...
@@ -5,7 +5,7 @@ set -e
...
@@ -5,7 +5,7 @@ set -e
#uncomment to a more verbose script
#uncomment to a more verbose script
#set -x
#set -x
DIR_RODINIA
=
codes
DIR_RODINIA
=
codes
FAULTS
=
100
FAULTS
=
100
0
#CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/mmElem/matrixmul_16K.conf
#CONFFILE=codes/mmElem/matrixmul_16K.conf
...
...
test_rodinia.sh
View file @
8621c3da
...
@@ -4,7 +4,7 @@ set -e
...
@@ -4,7 +4,7 @@ set -e
#uncomment to a more verbose script
#uncomment to a more verbose script
#set -x
#set -x
DIR_RODINIA
=
$HOME
/rodinia_3.1/cuda
DIR_RODINIA
=
codes
FAULTS
=
1000
FAULTS
=
1000
#CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/matrixMul/matrixmul_16K.conf
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment