Commit 68e252bb authored by German Leon's avatar German Leon
Browse files

Version refinada

parent 707781af
......@@ -107,13 +107,18 @@ def main():
parser.add_argument('-d', '--device', dest="device", help="The GPU to perform FI."
" Default is 0.", required=False, default=0, type=int)
args = parser.parse_args()
if (args.device>0):
cp.LOGS_PATH=cp.LOGS_PATH+"-"+str(args.device)
cp.rewrite_path()
os.system("rm -f {}".format(cp.KERNEL_INFO_DIR))
# Read the configuration file with data for all the apps that will be executed
conf = cf.load_config_file(args.config_file)
benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary')
if (args.device>0):
cp.LOGS_PATH="{}-{}-{}".format(cp.LOGS_PATH,args.device,benchmark_binary.split('/')[-1])
cp.rewrite_path()
os.system("rm -f {}".format(cp.KERNEL_INFO_DIR))
# First set env vars
cf.set_python_env()
......
......@@ -108,7 +108,6 @@ class BitFlip:
return valores
def nextinstr(self):
#Obtengo el valor de los registro referenciados por la primera instruccipn
#self.__logging.info("===============================")
gdb.execute("nexti")
linea= self.asmline()
......@@ -121,7 +120,6 @@ class BitFlip:
#Almaceno en un dictionario los valores de los registros
regs={}
for x in valores:
#print(x)
m = re.match(r".*R(\d+).*0x([0-9a-fA-F]+).*", x)
if m:
regs[m.group(1)]=m.group(2)
......
......@@ -29,6 +29,8 @@ class RunGDB(Thread):
self.__gpu_to_execute = gpu_to_execute
os.environ['OMP_NUM_THREADS'] = '1'
def run(self):
if cp.DEBUG:
printf("GDB Thread run, id: {}".format(self.__unique_id))
......
......@@ -4,7 +4,7 @@ from threading import Thread
from random import uniform
import common_parameters as cp # All common parameters will bet at common_parameters module
import os
import os,signal
import sys
"""
......@@ -35,37 +35,51 @@ class SignalApp(Thread):
log_string = "Sending a signal using command: {} after {}s and each {}s.".format(self.__signal_cmd, self.__init_wait_time,self.__time_to_sleep)
if cp.DEBUG:
print(log_string)
self.__log.info(log_string)
# Sleep for a random time
# time.sleep(self.__init_wait_time)
#a=[ uniform.randint(1,1000) for _ in range(self.__signals_to_send))]
#a.sort()
# os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
#os.system(self.__signal_cmd)
#for line in os.popen(self.__signal_cmd):
# pid=int(line)
#os.kill(int(self.__signal_cmd),signal.SIGINT)
self.__log.info(log_string)
try:
(self._syncro).wait()
#except threading.BrokenBarrierError:
except:
(self._syncro).abort()
print("Breakpoint inicial fuera de tiempo")
self.__log.info("Breakpoint inicial fuera de tiempo")
#(self._waitfinish).wait()
(self._syncro).reset()
hang=True
self.__log.info("Timeout syncron of breakpoint\n")
pid = (os.popen(self.__signal_cmd)).read()
pid=int(pid.split('\n')[0])
#print("Comando {} y resultado {} de tipo {}".format(self.__signal_cmd,pid, type (pid)) )
# Time random
#print ("INIT:"+str(self.__init_wait_time)+"sleep"+str())
time.sleep(self.__init_wait_time)
for signals in range(0, self.__signals_to_send):
os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
#os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
try:
os.kill(pid,signal.SIGINT)
except:
self.__log.info("Process is dead")
break
self.__log.info("sending signal {}".format(signals))
#try:
#(self._syncro).wait()
#except threading.BrokenBarrierError:
#except:
#(self._syncro).abort()
try:
(self._syncro).wait()
except:
(self._syncro).abort()
break
#print("Breakpoint fuera de tiempo")
#(self._syncro).reset()
(self._syncro).reset()
time.sleep(self.__time_to_sleep)
#(self._syncro).reset()
try:
......@@ -73,7 +87,7 @@ class SignalApp(Thread):
#except threading.BrokenBarrierError:
except:
(self._waitfinish).abort()
print("Hang timeout execution")
self.__log.info("Hang timeout execution")
hang=True
self.__log.info("Timeout execution programa")
(self._waitfinish).reset()
......
......@@ -28,13 +28,13 @@ maxWaitTimes = 5
# Must be full path
benchmarkBinary = /home/badia/mycarol-fi/codes/hotspot/hotspot
#benchmarkBinary_noverificar = /home/badia/rodinia_3.1/cuda/lavaMD/lavaMD
# Commands to set the session inside GDB environment
benchmarkArgs = 2 10000 /home/badia/mycarol-fi/data/hotspot/temp_1024 /home/badia/mycarol-fi/data/hotspot/power_1024 output.out
#benchmarkArgs = 16384 2 2 /home/badia/rodinia_3.1/data/hotspot/temp_16384 /home/badia/rodinia_3.1/data/hotspot/power_16384 output.out
benchmarkArgs = 1024 2 10000 /home/badia/mycarol-fi/data/hotspot/temp_1024 /home/badia/mycarol-fi/data/hotspot/power_1024 output.out
benchmarkArgs_noverificar = 1024 2 10000 /home/badia/mycarol-fi/data/hotspot/temp_1024 /home/badia/mycarol-fi/data/hotspot/power_1024 output.out
#benchmarkArgs_noverificar = 16384 2 2 /home/badia/rodinia_3.1/data/hotspot/temp_16384 /home/badia/rodinia_3.1/data/hotspot/power_16384 .
# CSV output file. It will be overwrite at each injection
csvFile = results/hotspot_RF.csv
......
import signal
# Max size of register
SINGLE_MAX_SIZE_REGISTER = 32
......@@ -26,8 +27,11 @@ DEBUG_PROFILER = True
NUM_DIVISION_TIMES = 100.0
# Common body of log filename
#SIGNALSTOP= "USR1"
SIGNAL_STOP=signal.SIGRTMIN
#SIGNALEXIT= "USR2"
SIGNAL_EXIT=signal.SIGRTMIN+1
# MAX INT 32 bits
MAX_INT_32 = 4294967295
......
......@@ -29,20 +29,23 @@ if the string is always empty the column will be empty, otherwise it
will contain the returned values for each injection
"""
def receiveSignal(signalNumber, frame):
global logging
logging.info("Esperando sincronismo del final")
try:
syncro.wait()
except:
syncro.abort()
print("Breakpoint inicial fuera de tiempo")
print ("Alcanzado el breakpoint, y recibida la señal",signalNumber);
logging.info("Breakpoint inicial fuera de tiempo")
logging.info("Alcanzado el breakpoint, y recibida la señal {}".format(signalNumber));
def receiveEnd(signalNumber, frame):
print ("Esperando sincronismo del final");
global logging
logging.info("Esperando sincronismo del final")
try:
wait_finish.wait()
except:
wait_finish.abort()
print("Hang timeout execution")
print ("Recibida la señal de final del programa",signalNumber);
logging.info("Hang timeout execution")
logging.info("Recibida la señal de final del programa {}".format(signalNumber));
def user_defined_function(injection_output_path):
# This is a temporary example for carol-fi-codes suite
......@@ -315,7 +318,7 @@ def check_injection_outcome(host_thread, logging, injection_site):
dpc={}
dpc['absoluto']="0x"+re.match(r".*0x([0-9a-fA-F]+) <.*",assm_line).group(1)
dpc['relativo']=assm_line.split('<')[1].split('>')[0]
print("---PC: "+dpc['absoluto']+ "PC rel"+dpc['relativo'])
#print("---PC: "+dpc['absoluto']+ "PC rel"+dpc['relativo'])
pc=dpc['absoluto']+"<"+dpc['relativo']+">"
if cp.INJECTION_SITES[injection_site] in [cp.INST_OUT, cp.INST_ADD]:
# if fault was injected ASSM_LINE MUST be in the logfile
......@@ -337,7 +340,7 @@ return old register value, new register value
def gdb_inject_fault(**kwargs):
global kill_strings
global kill_strings, logging
# These are the mandatory parameters
bits_to_flip = kwargs.get('bits_to_flip')
fault_model = kwargs.get('fault_model')
......@@ -377,6 +380,9 @@ def gdb_inject_fault(**kwargs):
logging = Logging(log_file=flip_log_file, unique_id=unique_id)
logging.info("Starting GDB script")
#init_wait_time = uniform(0, end_time * cp.MAX_SIGNAL_BEFORE_ENDING)
#time_to_sleep = (max_wait_time - self.__init_wait_time) / seq_signals
# Generate configuration file for specific test
gdb_env_string = "{}|{}|{}|{}|{}|{}|file {}; set args {}|{}".format(gdb_kernel,os.getpid(),maxregs,",".join(str(i) for i in bits_to_flip), fault_model,
flip_log_file, benchmark_binary, benchmark_args,
......@@ -404,10 +410,9 @@ def gdb_inject_fault(**kwargs):
# Starting both threads
fi_process.start()
print ("Esperando breakpoint.....")
#syncro.wait()
#syncro.reset()
"syncro = threading.Barrier(2, timeout=5) "
#if cp.DEBUG:
# cf.printf( "Waiting breakpoint.....")
signal_app_thread.start()
......@@ -553,7 +558,8 @@ def fault_injection_by_signal(**kwargs):
# Global rows list
global lock, exit_injector,num_rounds,kill_strings
benchmark_binary = kwargs.get('benchmark_binary')
kwargs['signal_cmd'] = "killall -2 {}".format(os.path.basename(benchmark_binary))
#kwargs['signal_cmd'] = "killall -2 {}".format(os.path.basename(benchmark_binary))
kwargs['signal_cmd'] = "pgrep {}".format(os.path.basename(benchmark_binary))
fault_models = kwargs.get('fault_models')
iterations = kwargs.get('iterations')
host_thread = kwargs.get('host_thread')
......@@ -654,8 +660,8 @@ Main function
def main():
global kill_strings, current_path, gpus_threads, lock, syncro, wait_finish
signal.signal(signal.SIGUSR1,receiveSignal);
signal.signal(signal.SIGUSR2,receiveEnd);
signal.signal(cp.SIGNAL_STOP,receiveSignal);
signal.signal(cp.SIGNAL_EXIT,receiveEnd);
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--conf', dest="config_file", help='Configuration file', required=True)
parser.add_argument('-i', '--iter', dest="iterations",
......@@ -670,9 +676,7 @@ def main():
args = parser.parse_args()
if args.iterations < 1:
parser.error('Iterations must be greater than zero')
if (args.device>0):
cp.LOGS_PATH=cp.LOGS_PATH+"-"+str(args.device)
cp.rewrite_path()
# Start with a different seed every time to vary the random numbers generated
# the seed will be the current number of second since 01/01/70
......@@ -680,7 +684,10 @@ def main():
# Read the configuration file with data for all the apps that will be executed
conf = cf.load_config_file(args.config_file)
benchmark_binary_default = conf.get('DEFAULT', 'benchmarkBinary')
if (args.device>0):
cp.LOGS_PATH="{}-{}-{}".format(cp.LOGS_PATH,args.device,benchmark_binary_default.split('/')[-1])
cp.rewrite_path()
# Connect signal SIGINT to stop the fault injector
kill_strings = ""
signal.signal(signal.SIGINT, signal_handler)
......@@ -727,7 +734,7 @@ def main():
raise FileNotFoundError(tmp_path + " path does not exists, run app_profile.py to create it")
# Set binaries for the injection
benchmark_binary_default = conf.get('DEFAULT', 'benchmarkBinary')
gdb_path_default = conf.get('DEFAULT', 'gdbExecName')
each_thread_iterations = iterations / num_gpus
......
......@@ -8,13 +8,12 @@ import common_parameters as cp
"""
Handler attached to exit event
"""
def sendsignal (signal):
os.kill(int(pid),signal)
def exit_handler(event):
global global_logging
os.system ("kill -s USR2 " + str(pid))
sendsignal(cp.SIGNAL_EXIT)
global_logging.info(str("event type: exit"))
print ("llego el final")
try:
global_logging.info("exit code: {}".format(str(event.exit_code)))
......@@ -35,7 +34,7 @@ def set_event(event):
if (isinstance(event, gdb.BreakpointEvent)):
global_logging.info("Before breakpoint"+ str(time.clock()-t))
global_logging.info ("Enviado senal a "+ str(pid))
os.system ("kill -s USR1 " + str(pid))
sendsignal(cp.SIGNAL_STOP)
bp.enabled=False
gdb.execute('c')
# #os.system ("killall -2 python3")
......@@ -47,7 +46,7 @@ def set_event(event):
bit_flip.single_event()
global_logging.info("BIT FLIP SET ON SIGNAL {}".format(event.stop_signal))
#global_logging.info ("Enviado senal a "+ str(pid))
#os.system ("kill -s USR1 " + str(pid))
sendsignal(cp.SIGNAL_STOP)
except Exception as err:
global_logging.exception("EVENT DIFFERENT FROM STOP SIGNAL: {}".format(str(err)))
......@@ -80,6 +79,8 @@ def main():
[kernel,pid,max_regs,bits_to_flip, fault_model, flip_log_file,
gdb_init_strings, injection_site] = arg0.split('|')
# Logging
global_logging = Logging(log_file=flip_log_file)
global_logging.info("Starting flip_value script "+" called by " + str(pid) + " for stop kernel " + str(kernel));
......@@ -131,3 +132,9 @@ injection_site = None
bit_flip = None
main()
......@@ -2,6 +2,7 @@ import os
import re
import gdb
import time
import signal
import common_functions as cf
def exit_handler(event):
global nosalir
......@@ -19,6 +20,26 @@ def exit_handler(event):
Handler that will put a breakpoint on the kernel after
signal
"""
def handler(signum, frame):
global pid,pillo,t0
print ("handler:+++++++++++========================++++++++++++")
#print ("+++++++++++========================++++++++++++")
#gdb.execute("nexti")
settimeslice()
pillo=time.time()-t0
os.kill(os.getpid(), signal.SIGINT)
#print ("+++++++++++========================++++++++++++")
def setalarm():
signal.signal(signal.SIGALRM,handler)
#print ("No disponible")
def settimeslice():
#signal.alarm(1)
signal.setitimer(signal.ITIMER_REAL,1.0,1.0)
#print ("No disponible")
def pausealarm():
signal.setitimer(signal.ITIMER_REAL,0)
#print ("No disponible")
def selectrd():
linea= cf.execute_command(gdb=gdb, to_execute="x/1i $pc")
......@@ -49,7 +70,7 @@ def selectrd():
print(x)
m = re.match(r".*R(\d+).*0x([0-9a-fA-F]+).*", x)
if m:
if (regs[m.group(1)]!=m.group(2))
if (regs[m.group(1)]!=m.group(2)):
regdst.add(m.group(1))
......@@ -57,46 +78,78 @@ def selectrd():
print (type(lista))
print(lista)
def set_event(event):
global trun,ocurrencias,t
global trun,ocurrencias,t,pillo,t0,bp
#pausealarm()
if (isinstance(event, gdb.BreakpointEvent)):
t=time.clock()
print ("Bp")
t0=time.clock()
bp.enabled=False
ocurrencias=ocurrencias+1
#signal.setitimer(signal.ITIMER_REAL,3)
else:
trun=(time.clock()-t)
print ("set_event")
#signal.setitimer(signal.ITIMER_REAL,0)
trun=(time.clock()-t0)
#signal.setitimer(signal.ITIMER_REAL,2)
#signal.setitimer(signal.ITIMER_REAL,0.1)
t=time.time()-t0
print ("\n Pongo: "+str(t)+" Pillo;"+str(pillo)+"\n")
#settimeslice()
#gdb.execute("c")
def main():
global ocurrencias,t,nosalir,trun
was_hit = False
global ocurrencias,t,nosalir,trun,pid,t0,pillo,bp
was_hit = False
#pid=os.getpid()
ocurrencias=0
# Initialize GDB to run the appset pagination off
gdb.execute('catch signal SIGALRM')
gdb.execute("set confirm off")
gdb.execute("set pagination off")
gdb.execute("set target-async off")
gdb.execute("set non-stop off")
setalarm()
settimeslice()
# Connecting to a exit handler event
gdb.events.exited.connect(exit_handler)
# Connecting to a stop signal event
gdb.events.stop.connect(set_event)
#gdb.events.cont.connect(set_event_cont)
#gdb.execute("file ~/rodinia_3.1/cuda/lud/cuda/lud_cuda")
#gdb.execute("set arg -s 10000")
#gdb.execute("break lud_cuda")
t0=time.time()
print (t0)
pillo=t0
gdb.execute("file codes/mmElem/matrixMul")
gdb.execute("set arg -wA=16384 -hA=16384 -hB=16384 -wB=16384")
gdb.execute("set cuda break_on_launch application")
#gdb.execute("set cuda break_on_launch application")
# bp=gdb.Breakpoint('matrixMulCUDA')
#gdb.execute('handle SIGALRM stop')
#gdb.execute('handle SIGALRM ignore')
#gdb.execute('handle SIGALRM')
gdb.execute('catch signal SIGALRM')
gdb.execute('r')
selectrd()
nosalir=True
while nosalir:
gdb.execute("finish")
#selectrd()
print ("1")
gdb.execute("c")
print ("1")
gdb.execute("c")
gdb.execute("c")
gdb.execute("c")
pausealarm()
gdb.execute("c")
gdb.execute("c")
#nosalir=True
# while nosalir:
# gdb.execute("finish")
# gdb.execute("c")"
print (" Ocurrencias "+str(ocurrencias)+" Tiempo acumulado de ejecucciones "+ str(trun))
f=open("tmpxxx_return_profiler.conf","w")
f.write("Ocurrencias ="+str(ocurrencias)+"\n Tiempo "+str(trun)+"\n")
......
......@@ -6,7 +6,7 @@ set -e
#set -x
#DIR_RODINIA=$HOME/rodinia_3.1/cuda
DIR_RODINIA=codes
FAULTS=100
FAULTS=1000
#CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/mmElem/matrixmul_16K.conf
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment