Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
German Leon
Carol-fi
Commits
68e252bb
Commit
68e252bb
authored
Jan 20, 2021
by
German Leon
Browse files
Version refinada
parent
707781af
Changes
10
Hide whitespace changes
Inline
Side-by-side
app_profiler.py
View file @
68e252bb
...
...
@@ -107,13 +107,18 @@ def main():
parser
.
add_argument
(
'-d'
,
'--device'
,
dest
=
"device"
,
help
=
"The GPU to perform FI."
" Default is 0."
,
required
=
False
,
default
=
0
,
type
=
int
)
args
=
parser
.
parse_args
()
if
(
args
.
device
>
0
):
cp
.
LOGS_PATH
=
cp
.
LOGS_PATH
+
"-"
+
str
(
args
.
device
)
cp
.
rewrite_path
()
os
.
system
(
"rm -f {}"
.
format
(
cp
.
KERNEL_INFO_DIR
))
# Read the configuration file with data for all the apps that will be executed
conf
=
cf
.
load_config_file
(
args
.
config_file
)
benchmark_binary
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary'
)
if
(
args
.
device
>
0
):
cp
.
LOGS_PATH
=
"{}-{}-{}"
.
format
(
cp
.
LOGS_PATH
,
args
.
device
,
benchmark_binary
.
split
(
'/'
)[
-
1
])
cp
.
rewrite_path
()
os
.
system
(
"rm -f {}"
.
format
(
cp
.
KERNEL_INFO_DIR
))
# First set env vars
cf
.
set_python_env
()
...
...
classes/BitFlip.py
View file @
68e252bb
...
...
@@ -108,7 +108,6 @@ class BitFlip:
return
valores
def
nextinstr
(
self
):
#Obtengo el valor de los registro referenciados por la primera instruccipn
#self.__logging.info("===============================")
gdb
.
execute
(
"nexti"
)
linea
=
self
.
asmline
()
...
...
@@ -121,7 +120,6 @@ class BitFlip:
#Almaceno en un dictionario los valores de los registros
regs
=
{}
for
x
in
valores
:
#print(x)
m
=
re
.
match
(
r
".*R(\d+).*0x([0-9a-fA-F]+).*"
,
x
)
if
m
:
regs
[
m
.
group
(
1
)]
=
m
.
group
(
2
)
...
...
classes/RunGDB.py
View file @
68e252bb
...
...
@@ -28,6 +28,8 @@ class RunGDB(Thread):
self
.
__inj_err_path
=
inj_err_path
self
.
__gpu_to_execute
=
gpu_to_execute
os
.
environ
[
'OMP_NUM_THREADS'
]
=
'1'
def
run
(
self
):
if
cp
.
DEBUG
:
...
...
classes/SignalApp.py
View file @
68e252bb
...
...
@@ -4,7 +4,7 @@ from threading import Thread
from
random
import
uniform
import
common_parameters
as
cp
# All common parameters will bet at common_parameters module
import
os
import
os
,
signal
import
sys
"""
...
...
@@ -35,37 +35,51 @@ class SignalApp(Thread):
log_string
=
"Sending a signal using command: {} after {}s and each {}s."
.
format
(
self
.
__signal_cmd
,
self
.
__init_wait_time
,
self
.
__time_to_sleep
)
if
cp
.
DEBUG
:
print
(
log_string
)
self
.
__log
.
info
(
log_string
)
# Sleep for a random time
# time.sleep(self.__init_wait_time)
#a=[ uniform.randint(1,1000) for _ in range(self.__signals_to_send))]
#a.sort()
# os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
#os.system(self.__signal_cmd)
#for line in os.popen(self.__signal_cmd):
# pid=int(line)
#os.kill(int(self.__signal_cmd),signal.SIGINT)
self
.
__log
.
info
(
log_string
)
try
:
(
self
.
_syncro
).
wait
()
#except threading.BrokenBarrierError:
except
:
(
self
.
_syncro
).
abort
()
print
(
"Breakpoint inicial fuera de tiempo"
)
(
self
.
_syncro
).
reset
()
hang
=
True
self
.
__log
.
info
(
"Timeout syncron of breakpoint
\n
"
)
self
.
__log
.
info
(
"Breakpoint inicial fuera de tiempo"
)
#(self._waitfinish).wait()
(
self
.
_syncro
).
reset
()
hang
=
True
self
.
__log
.
info
(
"Timeout syncron of breakpoint
\n
"
)
pid
=
(
os
.
popen
(
self
.
__signal_cmd
)).
read
()
pid
=
int
(
pid
.
split
(
'
\n
'
)[
0
])
#print("Comando {} y resultado {} de tipo {}".format(self.__signal_cmd,pid, type (pid)) )
# Time random
#print ("INIT:"+str(self.__init_wait_time)+"sleep"+str())
time
.
sleep
(
self
.
__init_wait_time
)
for
signals
in
range
(
0
,
self
.
__signals_to_send
):
os
.
system
(
"{} > /dev/null 2>/dev/null"
.
format
(
self
.
__signal_cmd
))
#os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
try
:
os
.
kill
(
pid
,
signal
.
SIGINT
)
except
:
self
.
__log
.
info
(
"Process is dead"
)
break
self
.
__log
.
info
(
"sending signal {}"
.
format
(
signals
))
#
try:
#
(self._syncro).wait()
#
except
threading.BrokenBarrierError
:
#except:
#(self._syncro).abort()
try
:
(
self
.
_syncro
).
wait
()
except
:
(
self
.
_syncro
).
abort
()
break
#print("Breakpoint fuera de tiempo")
#
(self._syncro).reset()
(
self
.
_syncro
).
reset
()
time
.
sleep
(
self
.
__time_to_sleep
)
#(self._syncro).reset()
try
:
...
...
@@ -73,7 +87,7 @@ class SignalApp(Thread):
#except threading.BrokenBarrierError:
except
:
(
self
.
_waitfinish
).
abort
()
print
(
"Hang timeout execution"
)
self
.
__log
.
info
(
"Hang timeout execution"
)
hang
=
True
self
.
__log
.
info
(
"Timeout execution programa"
)
(
self
.
_waitfinish
).
reset
()
...
...
codes/hotspot/hotspot.conf
View file @
68e252bb
...
...
@@ -28,13 +28,13 @@ maxWaitTimes = 5
# Must be full path
benchmarkBinary
= /
home
/
badia
/
mycarol
-
fi
/
codes
/
hotspot
/
hotspot
#benchmarkBinary_noverificar = /home/badia/rodinia_3.1/cuda/lavaMD/lavaMD
# Commands to set the session inside GDB environment
benchmarkArgs
=
2
10000
/
home
/
badia
/
mycarol
-
fi
/
data
/
hotspot
/
temp_1024
/
home
/
badia
/
mycarol
-
fi
/
data
/
hotspot
/
power_1024
output
.
out
#benchmarkArgs = 16384 2 2 /home/badia/rodinia_3.1/data/hotspot/temp_16384 /home/badia/rodinia_3.1/data/hotspot/power_16384 output.out
benchmarkArgs
=
1024
2
10000
/
home
/
badia
/
mycarol
-
fi
/
data
/
hotspot
/
temp_1024
/
home
/
badia
/
mycarol
-
fi
/
data
/
hotspot
/
power_1024
output
.
out
benchmarkArgs_noverificar
=
1024
2
10000
/
home
/
badia
/
mycarol
-
fi
/
data
/
hotspot
/
temp_1024
/
home
/
badia
/
mycarol
-
fi
/
data
/
hotspot
/
power_1024
output
.
out
#benchmarkArgs_noverificar = 16384 2 2 /home/badia/rodinia_3.1/data/hotspot/temp_16384 /home/badia/rodinia_3.1/data/hotspot/power_16384 .
# CSV output file. It will be overwrite at each injection
csvFile
=
results
/
hotspot_RF
.
csv
...
...
common_parameters.py
View file @
68e252bb
import
signal
# Max size of register
SINGLE_MAX_SIZE_REGISTER
=
32
...
...
@@ -26,8 +27,11 @@ DEBUG_PROFILER = True
NUM_DIVISION_TIMES
=
100.0
# Common body of log filename
#SIGNALSTOP= "USR1"
SIGNAL_STOP
=
signal
.
SIGRTMIN
#SIGNALEXIT= "USR2"
SIGNAL_EXIT
=
signal
.
SIGRTMIN
+
1
# MAX INT 32 bits
MAX_INT_32
=
4294967295
...
...
fault_injector.py
View file @
68e252bb
...
...
@@ -29,20 +29,23 @@ if the string is always empty the column will be empty, otherwise it
will contain the returned values for each injection
"""
def
receiveSignal
(
signalNumber
,
frame
):
global
logging
logging
.
info
(
"Esperando sincronismo del final"
)
try
:
syncro
.
wait
()
except
:
syncro
.
abort
()
print
(
"Breakpoint inicial fuera de tiempo"
)
print
(
"Alcanzado el breakpoint, y recibida la señal
"
,
signalNumber
);
logging
.
info
(
"Breakpoint inicial fuera de tiempo"
)
logging
.
info
(
"Alcanzado el breakpoint, y recibida la señal
{}"
.
format
(
signalNumber
)
)
;
def
receiveEnd
(
signalNumber
,
frame
):
print
(
"Esperando sincronismo del final"
);
global
logging
logging
.
info
(
"Esperando sincronismo del final"
)
try
:
wait_finish
.
wait
()
except
:
wait_finish
.
abort
()
print
(
"Hang timeout execution"
)
print
(
"Recibida la señal de final del programa
"
,
signalNumber
);
logging
.
info
(
"Hang timeout execution"
)
logging
.
info
(
"Recibida la señal de final del programa
{}"
.
format
(
signalNumber
)
)
;
def
user_defined_function
(
injection_output_path
):
# This is a temporary example for carol-fi-codes suite
...
...
@@ -315,7 +318,7 @@ def check_injection_outcome(host_thread, logging, injection_site):
dpc
=
{}
dpc
[
'absoluto'
]
=
"0x"
+
re
.
match
(
r
".*0x([0-9a-fA-F]+) <.*"
,
assm_line
).
group
(
1
)
dpc
[
'relativo'
]
=
assm_line
.
split
(
'<'
)[
1
].
split
(
'>'
)[
0
]
print
(
"---PC: "
+
dpc
[
'absoluto'
]
+
"PC rel"
+
dpc
[
'relativo'
])
#
print("---PC: "+dpc['absoluto']+ "PC rel"+dpc['relativo'])
pc
=
dpc
[
'absoluto'
]
+
"<"
+
dpc
[
'relativo'
]
+
">"
if
cp
.
INJECTION_SITES
[
injection_site
]
in
[
cp
.
INST_OUT
,
cp
.
INST_ADD
]:
# if fault was injected ASSM_LINE MUST be in the logfile
...
...
@@ -337,7 +340,7 @@ return old register value, new register value
def
gdb_inject_fault
(
**
kwargs
):
global
kill_strings
global
kill_strings
,
logging
# These are the mandatory parameters
bits_to_flip
=
kwargs
.
get
(
'bits_to_flip'
)
fault_model
=
kwargs
.
get
(
'fault_model'
)
...
...
@@ -377,6 +380,9 @@ def gdb_inject_fault(**kwargs):
logging
=
Logging
(
log_file
=
flip_log_file
,
unique_id
=
unique_id
)
logging
.
info
(
"Starting GDB script"
)
#init_wait_time = uniform(0, end_time * cp.MAX_SIGNAL_BEFORE_ENDING)
#time_to_sleep = (max_wait_time - self.__init_wait_time) / seq_signals
# Generate configuration file for specific test
gdb_env_string
=
"{}|{}|{}|{}|{}|{}|file {}; set args {}|{}"
.
format
(
gdb_kernel
,
os
.
getpid
(),
maxregs
,
","
.
join
(
str
(
i
)
for
i
in
bits_to_flip
),
fault_model
,
flip_log_file
,
benchmark_binary
,
benchmark_args
,
...
...
@@ -404,10 +410,9 @@ def gdb_inject_fault(**kwargs):
# Starting both threads
fi_process
.
start
()
print
(
"Esperando breakpoint....."
)
#syncro.wait()
#syncro.reset()
"syncro = threading.Barrier(2, timeout=5) "
#if cp.DEBUG:
# cf.printf( "Waiting breakpoint.....")
signal_app_thread
.
start
()
...
...
@@ -553,7 +558,8 @@ def fault_injection_by_signal(**kwargs):
# Global rows list
global
lock
,
exit_injector
,
num_rounds
,
kill_strings
benchmark_binary
=
kwargs
.
get
(
'benchmark_binary'
)
kwargs
[
'signal_cmd'
]
=
"killall -2 {}"
.
format
(
os
.
path
.
basename
(
benchmark_binary
))
#kwargs['signal_cmd'] = "killall -2 {}".format(os.path.basename(benchmark_binary))
kwargs
[
'signal_cmd'
]
=
"pgrep {}"
.
format
(
os
.
path
.
basename
(
benchmark_binary
))
fault_models
=
kwargs
.
get
(
'fault_models'
)
iterations
=
kwargs
.
get
(
'iterations'
)
host_thread
=
kwargs
.
get
(
'host_thread'
)
...
...
@@ -654,8 +660,8 @@ Main function
def
main
():
global
kill_strings
,
current_path
,
gpus_threads
,
lock
,
syncro
,
wait_finish
signal
.
signal
(
signal
.
SIGUSR1
,
receiveSignal
);
signal
.
signal
(
signal
.
SIGUSR2
,
receiveEnd
);
signal
.
signal
(
cp
.
SIGNAL_STOP
,
receiveSignal
);
signal
.
signal
(
cp
.
SIGNAL_EXIT
,
receiveEnd
);
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-c'
,
'--conf'
,
dest
=
"config_file"
,
help
=
'Configuration file'
,
required
=
True
)
parser
.
add_argument
(
'-i'
,
'--iter'
,
dest
=
"iterations"
,
...
...
@@ -670,9 +676,7 @@ def main():
args
=
parser
.
parse_args
()
if
args
.
iterations
<
1
:
parser
.
error
(
'Iterations must be greater than zero'
)
if
(
args
.
device
>
0
):
cp
.
LOGS_PATH
=
cp
.
LOGS_PATH
+
"-"
+
str
(
args
.
device
)
cp
.
rewrite_path
()
# Start with a different seed every time to vary the random numbers generated
# the seed will be the current number of second since 01/01/70
...
...
@@ -680,7 +684,10 @@ def main():
# Read the configuration file with data for all the apps that will be executed
conf
=
cf
.
load_config_file
(
args
.
config_file
)
benchmark_binary_default
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary'
)
if
(
args
.
device
>
0
):
cp
.
LOGS_PATH
=
"{}-{}-{}"
.
format
(
cp
.
LOGS_PATH
,
args
.
device
,
benchmark_binary_default
.
split
(
'/'
)[
-
1
])
cp
.
rewrite_path
()
# Connect signal SIGINT to stop the fault injector
kill_strings
=
""
signal
.
signal
(
signal
.
SIGINT
,
signal_handler
)
...
...
@@ -727,7 +734,7 @@ def main():
raise
FileNotFoundError
(
tmp_path
+
" path does not exists, run app_profile.py to create it"
)
# Set binaries for the injection
benchmark_binary_default
=
conf
.
get
(
'DEFAULT'
,
'benchmarkBinary'
)
gdb_path_default
=
conf
.
get
(
'DEFAULT'
,
'gdbExecName'
)
each_thread_iterations
=
iterations
/
num_gpus
...
...
flip_value.py
View file @
68e252bb
...
...
@@ -8,13 +8,12 @@ import common_parameters as cp
"""
Handler attached to exit event
"""
def
sendsignal
(
signal
):
os
.
kill
(
int
(
pid
),
signal
)
def
exit_handler
(
event
):
global
global_logging
os
.
system
(
"kill -s USR2 "
+
str
(
pid
)
)
sendsignal
(
cp
.
SIGNAL_EXIT
)
global_logging
.
info
(
str
(
"event type: exit"
))
print
(
"llego el final"
)
try
:
global_logging
.
info
(
"exit code: {}"
.
format
(
str
(
event
.
exit_code
)))
...
...
@@ -35,7 +34,7 @@ def set_event(event):
if
(
isinstance
(
event
,
gdb
.
BreakpointEvent
)):
global_logging
.
info
(
"Before breakpoint"
+
str
(
time
.
clock
()
-
t
))
global_logging
.
info
(
"Enviado senal a "
+
str
(
pid
))
os
.
system
(
"kill -s USR1 "
+
str
(
pid
)
)
sendsignal
(
cp
.
SIGNAL_STOP
)
bp
.
enabled
=
False
gdb
.
execute
(
'c'
)
# #os.system ("killall -2 python3")
...
...
@@ -47,7 +46,7 @@ def set_event(event):
bit_flip
.
single_event
()
global_logging
.
info
(
"BIT FLIP SET ON SIGNAL {}"
.
format
(
event
.
stop_signal
))
#global_logging.info ("Enviado senal a "+ str(pid))
#os.system ("kill -s USR1 " + str(pid)
)
sendsignal
(
cp
.
SIGNAL_STOP
)
except
Exception
as
err
:
global_logging
.
exception
(
"EVENT DIFFERENT FROM STOP SIGNAL: {}"
.
format
(
str
(
err
)))
...
...
@@ -74,12 +73,14 @@ def main():
# Connecting to a stop signal event
gdb
.
events
.
stop
.
connect
(
set_event
)
# Get variables values from environment
# Firsn parse line
[
kernel
,
pid
,
max_regs
,
bits_to_flip
,
fault_model
,
flip_log_file
,
gdb_init_strings
,
injection_site
]
=
arg0
.
split
(
'|'
)
# Logging
global_logging
=
Logging
(
log_file
=
flip_log_file
)
global_logging
.
info
(
"Starting flip_value script "
+
" called by "
+
str
(
pid
)
+
" for stop kernel "
+
str
(
kernel
));
...
...
@@ -131,3 +132,9 @@ injection_site = None
bit_flip
=
None
main
()
prueba.py
View file @
68e252bb
...
...
@@ -2,6 +2,7 @@ import os
import
re
import
gdb
import
time
import
signal
import
common_functions
as
cf
def
exit_handler
(
event
):
global
nosalir
...
...
@@ -19,6 +20,26 @@ def exit_handler(event):
Handler that will put a breakpoint on the kernel after
signal
"""
def
handler
(
signum
,
frame
):
global
pid
,
pillo
,
t0
print
(
"handler:+++++++++++========================++++++++++++"
)
#print ("+++++++++++========================++++++++++++")
#gdb.execute("nexti")
settimeslice
()
pillo
=
time
.
time
()
-
t0
os
.
kill
(
os
.
getpid
(),
signal
.
SIGINT
)
#print ("+++++++++++========================++++++++++++")
def
setalarm
():
signal
.
signal
(
signal
.
SIGALRM
,
handler
)
#print ("No disponible")
def
settimeslice
():
#signal.alarm(1)
signal
.
setitimer
(
signal
.
ITIMER_REAL
,
1.0
,
1.0
)
#print ("No disponible")
def
pausealarm
():
signal
.
setitimer
(
signal
.
ITIMER_REAL
,
0
)
#print ("No disponible")
def
selectrd
():
linea
=
cf
.
execute_command
(
gdb
=
gdb
,
to_execute
=
"x/1i $pc"
)
...
...
@@ -49,7 +70,7 @@ def selectrd():
print
(
x
)
m
=
re
.
match
(
r
".*R(\d+).*0x([0-9a-fA-F]+).*"
,
x
)
if
m
:
if
(
regs
[
m
.
group
(
1
)]
!=
m
.
group
(
2
))
if
(
regs
[
m
.
group
(
1
)]
!=
m
.
group
(
2
))
:
regdst
.
add
(
m
.
group
(
1
))
...
...
@@ -57,46 +78,78 @@ def selectrd():
print
(
type
(
lista
))
print
(
lista
)
def
set_event
(
event
):
global
trun
,
ocurrencias
,
t
global
trun
,
ocurrencias
,
t
,
pillo
,
t0
,
bp
#pausealarm()
if
(
isinstance
(
event
,
gdb
.
BreakpointEvent
)):
t
=
time
.
clock
()
print
(
"Bp"
)
t0
=
time
.
clock
()
bp
.
enabled
=
False
ocurrencias
=
ocurrencias
+
1
#signal.setitimer(signal.ITIMER_REAL,3)
else
:
trun
=
(
time
.
clock
()
-
t
)
print
(
"set_event"
)
#signal.setitimer(signal.ITIMER_REAL,0)
trun
=
(
time
.
clock
()
-
t0
)
#signal.setitimer(signal.ITIMER_REAL,2)
#signal.setitimer(signal.ITIMER_REAL,0.1)
t
=
time
.
time
()
-
t0
print
(
"
\n
Pongo: "
+
str
(
t
)
+
" Pillo;"
+
str
(
pillo
)
+
"
\n
"
)
#settimeslice()
#gdb.execute("c")
def
main
():
global
ocurrencias
,
t
,
nosalir
,
trun
global
ocurrencias
,
t
,
nosalir
,
trun
,
pid
,
t0
,
pillo
,
bp
was_hit
=
False
#pid=os.getpid()
ocurrencias
=
0
# Initialize GDB to run the appset pagination off
gdb
.
execute
(
'catch signal SIGALRM'
)
gdb
.
execute
(
"set confirm off"
)
gdb
.
execute
(
"set pagination off"
)
gdb
.
execute
(
"set target-async off"
)
gdb
.
execute
(
"set non-stop off"
)
setalarm
()
settimeslice
()
# Connecting to a exit handler event
gdb
.
events
.
exited
.
connect
(
exit_handler
)
# Connecting to a stop signal event
gdb
.
events
.
stop
.
connect
(
set_event
)
#gdb.events.cont.connect(set_event_cont)
#gdb.execute("file ~/rodinia_3.1/cuda/lud/cuda/lud_cuda")
#gdb.execute("set arg -s 10000")
#gdb.execute("break lud_cuda")
t0
=
time
.
time
()
print
(
t0
)
pillo
=
t0
gdb
.
execute
(
"file codes/mmElem/matrixMul"
)
gdb
.
execute
(
"set arg -wA=16384 -hA=16384 -hB=16384 -wB=16384"
)
gdb
.
execute
(
"set cuda break_on_launch application"
)
#gdb.execute("set cuda break_on_launch application")
# bp=gdb.Breakpoint('matrixMulCUDA')
#gdb.execute('handle SIGALRM stop')
#gdb.execute('handle SIGALRM ignore')
#gdb.execute('handle SIGALRM')
gdb
.
execute
(
'catch signal SIGALRM'
)
gdb
.
execute
(
'r'
)
selectrd
()
nosalir
=
True
while
nosalir
:
gdb
.
execute
(
"finish"
)
gdb
.
execute
(
"c"
)
#selectrd()
print
(
"1"
)
gdb
.
execute
(
"c"
)
print
(
"1"
)
gdb
.
execute
(
"c"
)
gdb
.
execute
(
"c"
)
gdb
.
execute
(
"c"
)
pausealarm
()
gdb
.
execute
(
"c"
)
gdb
.
execute
(
"c"
)
#nosalir=True
# while nosalir:
# gdb.execute("finish")
# gdb.execute("c")"
print
(
" Ocurrencias "
+
str
(
ocurrencias
)
+
" Tiempo acumulado de ejecucciones "
+
str
(
trun
))
f
=
open
(
"tmpxxx_return_profiler.conf"
,
"w"
)
f
.
write
(
"Ocurrencias ="
+
str
(
ocurrencias
)
+
"
\n
Tiempo "
+
str
(
trun
)
+
"
\n
"
)
...
...
test_nw.sh
View file @
68e252bb
...
...
@@ -6,7 +6,7 @@ set -e
#set -x
#DIR_RODINIA=$HOME/rodinia_3.1/cuda
DIR_RODINIA
=
codes
FAULTS
=
100
FAULTS
=
100
0
#CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/mmElem/matrixmul_16K.conf
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment