Commit 663c6b9c authored by German Leon's avatar German Leon
Browse files

Version IO y RF

parent a04c78af
......@@ -54,6 +54,7 @@ def profiler_caller(gdb_exec, kernels, benchmark_binary, benchmark_args,device,s
init_string = '"{};{};{};file {}; set args {}; set cuda break_on_launch application"'.format(False,True,kernels,benchmark_binary, benchmark_args_striped)
profiler_cmd = script.format(device, gdb_exec, init_string, cp.PROFILER_SCRIPT)
max_registers=os.system(profiler_cmd) >>8
print ("Maximo numero de registros ###################################+++")
print(max_registers,max_registers>>8)
if bool(section):
init_string = '"{};{};{};file {}; set args {}; break {}; break {}"'.format( bool(section),False,kernels,benchmark_binary,benchmark_args_striped,section['begin'],section['end'])
......
......@@ -47,8 +47,12 @@ class BitFlip:
#print (linea)#+"-"+str(len(linea))+"-"+linea[0])
#print ("============")
lista=re.findall(r"R(\d+)", linea[0])
#Ahora son todos los registros.
#lista=range(0,self.__maxregs)
setlista=set()
setlista.update(lista)
#self.__logging.info(str(len(setlista)))
return setlista
def regmod (self):
......
......@@ -60,14 +60,14 @@ class SignalApp(Thread):
os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
self.__log.info("sending signal {}".format(signals))
#try:
# (self._syncro).wait()
#(self._syncro).wait()
#except threading.BrokenBarrierError:
#except:
# (self._syncro).abort()
# print("Breakpoint fuera de tiempo")
#(self._syncro).abort()
#print("Breakpoint fuera de tiempo")
#(self._syncro).reset()
time.sleep(self.__time_to_sleep)
(self._syncro).reset()
#(self._syncro).reset()
try:
(self._waitfinish).wait()
#except threading.BrokenBarrierError:
......
......@@ -26,7 +26,8 @@ class SummaryFile:
self.__fieldnames = kwargs.get("fieldnames")
# Open and start csv file
self.__open_csv(mode='w')
#self.__open_csv(mode='w')
self.__open_csv(mode=kwargs.get("mode"))
self.__dict_buff.writeheader()
self.__close_csv()
......
......@@ -17,9 +17,9 @@ faultModel = 0
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
#injectionSite = RF
injectionSite = RF
injectionSite = INST_OUT
#injectionSite = INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s
......@@ -33,7 +33,7 @@ benchmarkBinary = /home/badia/mycarol-fi/codes/mmElem/matrixMul
benchmarkArgs = -wA=8192 -hA=8192 -hB=8192 -wB=8192
# CSV output file. It will be overwrite at each injection
csvFile = codes/mmElem/fi_matrix_mul_single_bit.csv
csvFile = results/mElem_R_2_gpu1.csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript = codes/mmElem/sdc_check.sh
......
......@@ -29,12 +29,19 @@ if the string is always empty the column will be empty, otherwise it
will contain the returned values for each injection
"""
def receiveSignal(signalNumber, frame):
syncro.wait()
try:
syncro.wait()
except:
syncro.abort()
print("Breakpoint inicial fuera de tiempo")
print ("Alcanzado el breakpoint, y recibida la señal",signalNumber);
def receiveEnd(signalNumber, frame):
print ("Esperando sincronismo del final");
wait_finish.wait()
try:
wait_finish.wait()
except:
wait_finish.abort()
print("Hang timeout execution")
print ("Recibida la señal de final del programa",signalNumber);
def user_defined_function(injection_output_path):
......@@ -64,7 +71,7 @@ def signal_handler(sig, frame):
os.system("rm -f {}/bin/*".format(current_path))
for th in gpus_threads:
th.join()
sys.exit(0)
#sys.exit(0)
"""
......@@ -233,7 +240,7 @@ def check_sdcs_and_app_crash(logging, sdc_check_script, inj_output_path, inj_err
else:
is_app_crash[1]=is_app_crash[1]+" "+carol_fi_signal
break
if (len(is_app_crash) >0):
if (not is_app_crash[0]):
# Check if NVIDIA signals on output
is_masked= True
for line in out_lines:
......@@ -241,7 +248,10 @@ def check_sdcs_and_app_crash(logging, sdc_check_script, inj_output_path, inj_err
#print("FAIL=="+line+"====")
is_sdc= True
is_masked=False
break
break
else:
is_sdc= False
is_masked=False
# if is_app_crash[0]:
......@@ -533,7 +543,7 @@ by sending a SIGINT signal to the application
def fault_injection_by_signal(**kwargs):
# Global rows list
global lock, exit_injector
global lock, exit_injector,num_rounds,kill_strings
benchmark_binary = kwargs.get('benchmark_binary')
kwargs['signal_cmd'] = "killall -2 {}".format(os.path.basename(benchmark_binary))
fault_models = kwargs.get('fault_models')
......@@ -548,7 +558,13 @@ def fault_injection_by_signal(**kwargs):
# Execute the fault injector for each one of the sections(apps) of the configuration file
for fault_model in fault_models:
# Execute iterations number of fault injection for a specific app
num_rounds = 1
try:
ret_profiler = cf.load_config_file("tmpxxx_num_rounds.conf")
num_rounds=int(ret_profiler.get('DEFAULT', 'Ocurrencias'))
os.system ("rm tmpxxx_num_rounds.conf")
except:
num_rounds = 1
while num_rounds <= iterations:
if exit_injector:
return
......@@ -595,12 +611,29 @@ def fault_injection_by_signal(**kwargs):
acc_fault_injeted=0
else:
acc_fault_injected+=1
if (acc_fault_injected == (max_fallos/2)):
time.sleep(240)
if (acc_fault_injected == (max_fallos/2)):
for cmd in kill_strings.split(";"):
os.system(cmd + " > /dev/null 2>&1")
for th in gpus_threads:
try:
th.join()
except:
nulo=1
time.sleep(240)
if (acc_fault_injected == max_fallos):
break
exit_injector=True
for cmd in kill_strings.split(";"):
os.system(cmd + " > /dev/null 2>&1")
for th in gpus_threads:
try:
th.join()
except:
nulo=1
f=open("tmpxxx_num_rounds.conf","w")
f.write("[DEFAULT] \nOcurrencias = "+str(num_rounds)+"\n")
f.close()
pretty_print(header=header, row=row)
......@@ -657,7 +690,12 @@ def main():
fieldnames = ['unique_id', 'register', 'iteration', 'fault_model', 'thread', 'block', 'old_value',
'new_value', 'inj_site', 'fault_successful', 'hang', 'crash', 'masked', 'sdc', 'Exception','time',
'inj_time_location', 'bits_flipped', 'instruction', 'user_defined']
summary_file = SummaryFile(filename=csv_file, fieldnames=fieldnames, mode='w')
if os.path.exists("tmpxxx_num_rounds.conf"):
mode='a'
else:
mode='w'
summary_file = SummaryFile(filename=csv_file, fieldnames=fieldnames, mode=mode) #'w'
# Lock for summary file parallel
lock = Lock()
......@@ -718,13 +756,13 @@ def main():
fi_master_thread = Thread(target=fault_injection_by_signal, kwargs=kwargs)
gpus_threads.append(fi_master_thread)
ret=0
#ret=0
for thread in gpus_threads:
thread.start()
for thread in gpus_threads:
thread.join()
#ret += thread.exit_code
#ret += acc_fault_injected
os.system("rm -f {}/bin/*".format(current_path))
if exit_injector:
......@@ -732,8 +770,11 @@ def main():
else:
cf.printf("Fault injection finished, results can be found in {}".format(csv_file))
########################################################################
return ret
#if (iterations==num_rounds):
# sys.exit(0)
#else:
# sys.exit(1)
return (iterations==num_rounds)
########################################################################
# Main #
########################################################################
......
......@@ -40,10 +40,13 @@ def set_event(event):
# #os.system ("killall -2 python3")
elif (isinstance(event, gdb.SignalEvent)):
try:
# Just checking if it was hit
if bit_flip.fault_injected is False:
bit_flip.single_event()
global_logging.info("BIT FLIP SET ON SIGNAL {}".format(event.stop_signal))
#global_logging.info ("Enviado senal a "+ str(pid))
#os.system ("kill -s USR1 " + str(pid))
except Exception as err:
global_logging.exception("EVENT DIFFERENT FROM STOP SIGNAL: {}".format(str(err)))
......
......@@ -101,11 +101,13 @@ def main():
#print (cadena[2].split(";"))
for init_str in cadena[3].split(";"):
gdb.execute(init_str)
max=0.
maxi=0.
gdb.execute("r")
if (onlycount):
ks=cadena[2].split(",",1)
max=numreg(ks[0])
ks=cadena[2].split(",",1)
print (ks)
maxi=max([ numreg(x) for x in ks])
print("Maximo.."+str (maxi))
else:
if (section):
gdb.execute("c")
......@@ -115,6 +117,6 @@ def main():
f=open("tmpxxx_return_profiler.conf","w")
f.write("[DEFAULT] \nOcurrencias = "+str(ocurrencias)+"\nTiempo = "+str(trun)+"\n")
f.close()
sys.exit(max)
sys.exit(maxi)
main()
#cut -d, -f 3,4,9,10,11,12,13,14,15 fi_lavaMD_single_bit_RF.csv
fi_field=10
model=$[$fi_field - 1]
hang=$[$fi_field + 1]
crash=$[$hang + 1]
masked=$[$crash +1 ]
......@@ -10,8 +11,19 @@ do
cab=$(cut -d, -f$i $1|head -1)
trues=$(cut -d, -f$i $1|grep "True"|wc -l)
echo $cab "=" $trues
campos[$i]=$trues
done
campos[$masked]=$[campos[$masked]-campos[$hang]]
for i in $hang $crash $masked $sdc
do
tantoporcien=$(printf %.3f "$(( campos[$i] * 10**5/campos[$fi_field] ))e-3")
cab=$(cut -d, -f$i $1|head -1)
echo $cab"(%)="$tantoporcien
done
interrupciones=$(grep unique $1| wc -l)
interrupciones=$[ $interrupciones -1]
echo "Hang-restart-fi="$interrupciones
echo "Model="$(cut -d, -f$model $1|head -2|tail -1)
excep=$[$sdc +1]
for i in $(seq 1 1 15)
......
#cut -d, -f 3,4,9,10,11,12,13,14,15 fi_lavaMD_single_bit_RF.csv
./res-new-ind-simple.sh $1
reg=2
num_reg=0;
for i in $(seq 0 1 255)
do
ib=$(echo "R"$i)
n=$(cut -f $reg -d, $1|grep $ib|wc -l)
sib=$(echo "R"$i",")
n=$(cut -f $reg -d, $1|grep -w $ib|wc -l)
if [ "$n" -gt 0 ];
then
echo $ib"="$n
num_reg=$[$num_reg + 1]
fi
done
echo "Registros usados="$num_reg
for i in $(seq 0 1 255)
do
ib=$(echo "R"$i)
n=$(cut -f $reg -d, $1|grep $ib|wc -l)
sib=$(echo "R"$i",")
n=$(cut -f $reg -d, $1|grep -w $ib|wc -l)
if [ "$n" -gt 0 ];
then
echo "============"$ib"========="
head -1 $1 >filetempreg.csv
grep $ib $1 >> filetempreg.csv
grep $sib $1 >> filetempreg.csv
./res-new-ind-simple.sh filetempreg.csv
fi
done
cab="Experimento"
for j in "fault_successful" "Model" "hang(%)" "crash(%)" "masked(%)" "sdc(%)" "Hang-restart-fi" "Registros"
do
cab=$(echo $cab";"$j)
done
echo $cab > $1/resumen_total.csv
for i in $(ls $1 | grep -v -E resumen_* )
do
echo "res-new-ind.sh" $1/$i ">" $1"/resumen_"$i
./res-new-ind.sh $1/$i > $1/resumen_$i
row=$(echo $i)
for j in "fault_successful" "Model" "hang(%)" "crash(%)" "masked(%)" "sdc(%)" "Hang-restart-fi" "Registros"
do
campo=$(grep $j $1/resumen_$i|cut -d= -f2|head -1)
row=$(echo $row";"$campo)
done
echo $row >> $1/resumen_total.csv
done
......@@ -16,6 +16,12 @@ echo "Step 1 - Profiling the application for fault injection"
echo "Step 2 - Running ${FAULTS} on ${CONFFILE}"
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
while test -f "tmpxxx_num_rounds.conf"
do
cat tmpxxx_num_rounds.conf >> tandas
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
echo "==============================="
done
echo "Fault injection finished"
......
......@@ -13,11 +13,12 @@ FAULTS=1000
CONFFILE=$DIR_RODINIA/hotspot/hotspot.conf
echo "Step 1 - Profiling the application for fault injection"
./app_profiler.py -c ${CONFFILE} $*
date >> comienzo
echo "Step 2 - Running ${FAULTS} on ${CONFFILE}"
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
date >> final
echo "Fault injection finished"
exit 0
#!/bin/bash
trap echo "No paro" SIGINIT
set -e
......@@ -18,6 +19,13 @@ echo "Step 2 - Running ${FAULTS} on ${CONFFILE}"
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
echo $?
while test -f "tmpxxx_num_rounds.conf"
do
cat tmpxxx_num_rounds.conf >> tandas
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
echo "==============================="
done
echo "Fault injection finished"
exit 0
......@@ -16,7 +16,14 @@ echo "Step 1 - Profiling the application for fault injection"
echo "Step 2 - Running ${FAULTS} on ${CONFFILE}"
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
echo $?
while test -f "tmpxxx_num_rounds.conf"
do
cat tmpxxx_num_rounds.conf >> tandas
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
echo "==============================="
done
echo "Fault injection finished"
exit 0
#!/bin/bash
trap echo "No paro" SIGINIT
set -e
#uncomment to a more verbose script
......@@ -18,6 +18,14 @@ echo "Step 1 - Profiling the application for fault injection"
echo "Step 2 - Running ${FAULTS} on ${CONFFILE}"
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
while test -f "tmpxxx_num_rounds.conf"
do
cat tmpxxx_num_rounds.conf >> tandas
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
echo "==============================="
done
echo "Fault injection finished"
exit 0
......@@ -17,7 +17,12 @@ echo "Step 1 - Profiling the application for fault injection"
echo "Step 2 - Running ${FAULTS} on ${CONFFILE}"
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
while test -f "tmpxxx_num_rounds.conf"
do
cat tmpxxx_num_rounds.conf >> tandas
./fault_injector.py -i ${FAULTS} -c ${CONFFILE} -n 1 $*
echo "==============================="
done
echo "Fault injection finished"
exit 0
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment