Commit 8621c3da authored by German Leon's avatar German Leon
Browse files

Version inestable

parent 68e252bb
...@@ -114,8 +114,8 @@ def main(): ...@@ -114,8 +114,8 @@ def main():
benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary') benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary')
if (args.device>0):
cp.LOGS_PATH="{}-{}-{}".format(cp.LOGS_PATH,args.device,benchmark_binary.split('/')[-1]) cp.LOGS_PATH="{}l_{}_d{}".format(cp.LOGS_PATH,benchmark_binary.split('/')[-1],args.device)
cp.rewrite_path() cp.rewrite_path()
os.system("rm -f {}".format(cp.KERNEL_INFO_DIR)) os.system("rm -f {}".format(cp.KERNEL_INFO_DIR))
......
#!/usr/bin/env python3
import argparse
import os
import re
import time
import common_functions as cf
import common_parameters as cp
def generate_dict(sm_version, input_file_name):
with open(input_file_name, "r") as f:
# dictionary to store the number of allocated registers per static
kernel_reg = {}
kernel_name = "" # temporary variable to store the kernel_name
check_for_register_count = False
# process the input file created by capturing the stderr while compiling the
# application using -Xptxas -v options
for line in f: # for each line in the file
m = re.match(r".*Compiling entry function.*'(\S+)'.*for.*'{}'.*".format(sm_version), line)
if m:
kernel_name = m.group(1)
check_for_register_count = True
m = re.match(r".*Used[ ]+(\d+)[ ]+registers.*", line)
if check_for_register_count and m:
reg_num = m.group(1) # extract register number
if kernel_name not in kernel_reg:
# associate the extracted register number with the kernel name
kernel_reg[kernel_name] = int(reg_num.strip())
else:
print("Warning: {} exists in the kernel_reg dictionary. "
"Skipping this register count.".format(kernel_name))
check_for_register_count = False
return kernel_reg
"""
Function that calls the profiler based on the injection mode
"""
def profiler_caller(gdb_exec, kernel, benchmark_binary, benchmark_args):
acc_time = 0
script = '{} -ex \'py arg0 = {}\' -n -batch -x {}'
benchmark_args_striped = benchmark_args.replace('\\n', '').replace('\\', '')
print ("KERNEL"+kernel)
init_string = '"file {}; set args {}"'.format(benchmark_binary, benchmark_args_striped)
#init_string = '"file {}; set args {}; break {}"'.format(benchmark_binary, benchmark_args_striped,kernel)
profiler_cmd = script.format(gdb_exec, init_string, cp.PROFILER_SCRIPT)
print ("Profiler caller")
if cp.DEBUG:
print("PROFILER CMD: {}".format(profiler_cmd))
for i in range(0, cp.MAX_TIMES_TO_PROFILE):
start = time.time()
os.system(profiler_cmd)
end = time.time()
acc_time += end - start
cf.kill_all("killall -9 {}; killall -9 {}".format(
os.path.basename(gdb_exec), os.path.basename(benchmark_binary)))
return acc_time / cp.MAX_TIMES_TO_PROFILE
"""
Function to generate the gold execution
"""
def generate_gold(gdb_exec, benchmark_binary, benchmark_args):
# Create tmp path and clean it if it exists
tmp_path = os.path.dirname(os.path.realpath(__file__)) + "/" + cp.LOGS_PATH + "/tmp"
if not os.path.exists(tmp_path):
os.mkdir(tmp_path)
os.system("rm -rf " + tmp_path + "/*")
script = '{} -ex \'py arg0 = {}\' -n -batch -x {} > {} 2> {}'
init_string = '"file {}; set args {}"'.format(benchmark_binary, benchmark_args)
profiler_cmd = script.format(gdb_exec, init_string, cp.PROFILER_SCRIPT, cp.GOLD_OUTPUT_PATH, cp.GOLD_ERR_PATH)
if cp.DEBUG:
print("PROFILER CMD: {}".format(profiler_cmd))
# Execute and save gold file
return os.system(profiler_cmd)
def main():
os.system("rm -f {}".format(cp.KERNEL_INFO_DIR))
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--conf', dest="config_file", help='Configuration file', required=True)
args = parser.parse_args()
# Read the configuration file with data for all the apps that will be executed
conf = cf.load_config_file(args.config_file)
# First set env vars
cf.set_python_env()
########################################################################
# Profiler step
# Max time will be obtained by running
# it will also get app output for golden copy
# that is,
print("###################################################\n1 - Profiling application")
if 'benchmarkBinary_noverificar' in conf['DEFAULT']:
benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary_noverificar')
else:
benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary')
if 'benchmarkArgs_noverificar' in conf['DEFAULT']:
benchmark_args = conf.get('DEFAULT', 'benchmarkArgs_noverificar')
else:
benchmark_args = conf.get('DEFAULT', 'benchmarkArgs')
gdb_exec = conf.get("DEFAULT", "gdbExecName")
kernel=conf.get('DEFAULT', 'kernel')
max_time_app = profiler_caller(gdb_exec=gdb_exec,kernel=kernel, benchmark_binary=benchmark_binary, benchmark_args=benchmark_args)
# saving gold
print ("Saving gold");
generate_gold_result = generate_gold(gdb_exec=gdb_exec,
benchmark_binary=benchmark_binary, benchmark_args=benchmark_args)
if generate_gold_result != 0:
raise EnvironmentError("Gold generation did not finish well, the fault injection will not work")
# Remove trash GDB info from the std output and the err output
cf.remove_useless_information_from_output(cp.GOLD_OUTPUT_PATH)
cf.remove_useless_information_from_output(cp.GOLD_ERR_PATH)
# Save the kernel configuration txt file
cf.save_file(file_path=cp.KERNEL_INFO_DIR, data={'max_time': max_time_app})
print("1 - Profile finished\n###################################################")
if __name__ == '__main__':
main()
...@@ -37,11 +37,14 @@ class RunGDB(Thread): ...@@ -37,11 +37,14 @@ class RunGDB(Thread):
start_cmd = "{}/{}".format(self.__base_path, self.__flip_script) start_cmd = "{}/{}".format(self.__base_path, self.__flip_script)
script = 'env CUDA_VISIBLE_DEVICES={} {} -ex \'py arg0 = "{}"\' -n -batch -x {} > {} 2>{} &' script = 'env CUDA_VISIBLE_DEVICES={} {} -ex \'py arg0 = "{}"\' -n -batch -x {} > {} 2>{} &'
#script = 'env CUDA_VISIBLE_DEVICES=1 {} -ex \'py arg0 = "{}"\' -n -batch -x {} > {} 2>{} &'
#printf(script.format(self.__gpu_to_execute, self.__gdb_exe_name, self.__gdb_env_string,start_cmd, self.__inj_output_path,self.__inj_err_path)) #script = 'env CUDA_VISIBLE_DEVICES={} {} -ex \'py arg0 = "{}"\' -n -batch -x {} &'
os.system(script.format(self.__gpu_to_execute, self.__gdb_exe_name, self.__gdb_env_string, os.system(script.format(self.__gpu_to_execute, self.__gdb_exe_name, self.__gdb_env_string,
start_cmd, self.__inj_output_path, start_cmd,self.__inj_output_path, self.__inj_err_path))
self.__inj_err_path))
#os.system(script.format(self.__gpu_to_execute, self.__gdb_exe_name, self.__gdb_env_string,start_cmd))
print(script.format(self.__gpu_to_execute, self.__gdb_exe_name, self.__gdb_env_string, print(script.format(self.__gpu_to_execute, self.__gdb_exe_name, self.__gdb_env_string,
start_cmd, self.__inj_output_path, start_cmd, self.__inj_output_path,
self.__inj_err_path)) self.__inj_err_path))
......
...@@ -13,11 +13,11 @@ Signal the app to stop so GDB can execute the script to flip a value ...@@ -13,11 +13,11 @@ Signal the app to stop so GDB can execute the script to flip a value
class SignalApp(Thread): class SignalApp(Thread):
def __init__(self, signal_cmd, max_wait_time, log_path, unique_id, signals_to_send, init_sleep, syncro,waitfinish): def __init__(self,file_connection, max_wait_time, log_path, unique_id, signals_to_send, init_sleep, syncro,waitfinish):
global hang global crashsystem,hang
hang=False hang=False
super(SignalApp, self).__init__() super(SignalApp, self).__init__()
self.__signal_cmd = signal_cmd self.__file_connection = file_connection
os.system("rm -f {}".format(log_path)) os.system("rm -f {}".format(log_path))
self.__log = Logging(log_file=log_path, unique_id=unique_id) self.__log = Logging(log_file=log_path, unique_id=unique_id)
...@@ -32,23 +32,11 @@ class SignalApp(Thread): ...@@ -32,23 +32,11 @@ class SignalApp(Thread):
self._waitfinish=waitfinish self._waitfinish=waitfinish
def run(self): def run(self):
# Send a series of signal to make sure gdb will flip a value in one of the interrupt signals # Send a series of signal to make sure gdb will flip a value in one of the interrupt signals
log_string = "Sending a signal using command: {} after {}s and each {}s.".format(self.__signal_cmd, self.__init_wait_time,self.__time_to_sleep) #log_string = "Sending a signal using command: {} after {}s and each {}s.".format(self.__signal_cmd, self.__init_wait_time,self.__time_to_sleep)
log_string = "Sending a signal each {}s of {} times.".format(self.__time_to_sleep,self.__signals_to_send )
if cp.DEBUG: if cp.DEBUG:
self.__log.info(log_string) self.__log.info(log_string)
crashsystem=False
# Sleep for a random time
# time.sleep(self.__init_wait_time)
#a=[ uniform.randint(1,1000) for _ in range(self.__signals_to_send))]
#a.sort()
#os.system(self.__signal_cmd)
#for line in os.popen(self.__signal_cmd):
# pid=int(line)
#os.kill(int(self.__signal_cmd),signal.SIGINT)
self.__log.info(log_string)
try: try:
(self._syncro).wait() (self._syncro).wait()
#except threading.BrokenBarrierError: #except threading.BrokenBarrierError:
...@@ -57,39 +45,59 @@ class SignalApp(Thread): ...@@ -57,39 +45,59 @@ class SignalApp(Thread):
self.__log.info("Breakpoint inicial fuera de tiempo") self.__log.info("Breakpoint inicial fuera de tiempo")
#(self._waitfinish).wait() #(self._waitfinish).wait()
(self._syncro).reset() (self._syncro).reset()
hang=True crashsystem=True
self.__log.info("Timeout syncron of breakpoint\n") return
pid = (os.popen(self.__signal_cmd)).read() #self.__log.info("Timeout syncron of breakpoint\n")
pid=int(pid.split('\n')[0])
#print("Comando {} y resultado {} de tipo {}".format(self.__signal_cmd,pid, type (pid)) ) pidf=open(self.__file_connection,"r")
pid=int(pidf.read())
pidf.close()
#os.remove(self.__file_connection)
#os.system(self.__signal_cmd)
#pidf = (os.popen(self.__signal_cmd))
#print("Comando {} y resultado {} de tipo {}".format(self.__signal_cmd,pid, type (pid)) )
#pid=int(pid.split('\n')[0])
#pidf.close()
print(" resultado pid {} de tipo {}".format(pid, type (pid)) )
# Time random # Time random
#print ("INIT:"+str(self.__init_wait_time)+"sleep"+str()) #print ("INIT:"+str(self.__init_wait_time)+"sleep"+str())
time.sleep(self.__init_wait_time) time.sleep(self.__init_wait_time)
crash=False
for signals in range(0, self.__signals_to_send): for signals in range(0, self.__signals_to_send):
#os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd)) #os.system("{} > /dev/null 2>/dev/null".format(self.__signal_cmd))
try: try:
os.kill(pid,signal.SIGINT) os.kill(pid,signal.SIGINT)
self.__log.info("sending signal {}".format(signals))
except: except:
self.__log.info("Process is dead") self.__log.info("Process is dead. Crash?")
os.kill(pid,signal.SIGKILL)
(self._waitfinish).abort()
(self._waitfinish).reset()
(self._syncro).abort()
(self._syncro).reset()
crash=True
break break
self.__log.info("sending signal {}".format(signals))
try: try:
(self._syncro).wait() (self._syncro).wait()
except: except:
(self._syncro).abort() (self._syncro).abort()
break #break
#print("Breakpoint fuera de tiempo") #print("Breakpoint fuera de tiempo")
(self._syncro).reset() (self._syncro).reset()
time.sleep(self.__time_to_sleep) time.sleep(self.__time_to_sleep)
#(self._syncro).reset() #(self._syncro).reset()
try: if not crash:
(self._waitfinish).wait() try:
#except threading.BrokenBarrierError: (self._waitfinish).wait()
except: #except threading.BrokenBarrierError:
(self._waitfinish).abort() except:
self.__log.info("Hang timeout execution") (self._waitfinish).abort()
hang=True self.__log.info("Hang timeout execution")
self.__log.info("Timeout execution programa") hang=True
self.__log.info("Timeout execution programa")
(self._waitfinish).reset() (self._waitfinish).reset()
def ishang (self): def ishang (self):
......
...@@ -17,8 +17,8 @@ faultModel = 0 ...@@ -17,8 +17,8 @@ faultModel = 0
# RF -> Register File # RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET) # INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET) # INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite = RF
injectionSite = RF
# Max time factor to finish the app, this will be multiplied by the application running time # Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is # For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s # Considered as a crash is 10s
......
...@@ -30,7 +30,7 @@ maxWaitTimes = 5 ...@@ -30,7 +30,7 @@ maxWaitTimes = 5
benchmarkBinary = /home/badia/mycarol-fi/codes/lavaMD/lavaMD benchmarkBinary = /home/badia/mycarol-fi/codes/lavaMD/lavaMD
# Commands to set the session inside GDB environment # Commands to set the session inside GDB environment
benchmarkArgs = -boxes1d 40 benchmarkArgs = -boxes1d 60
# CSV output file. It will be overwrite at each injection # CSV output file. It will be overwrite at each injection
csvFile = results/lavaMD_IO.csv csvFile = results/lavaMD_IO.csv
......
...@@ -313,8 +313,10 @@ int correct=true; ...@@ -313,8 +313,10 @@ int correct=true;
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[i][3],fabs(fv_cpu[i].z-golden[i][3]) ,semicorrect ); //printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[i][3],fabs(fv_cpu[i].z-golden[i][3]) ,semicorrect );
if (!semicorrect) { if (!semicorrect) {
printf("Error! En la componete %05d \n", i //printf("Error! En la componete %05d \n", i
); //);
//
int basura=0;
#pragma omp critical #pragma omp critical
{ {
correct = false; correct = false;
......
...@@ -313,8 +313,8 @@ int correct=true; ...@@ -313,8 +313,8 @@ int correct=true;
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[i][3],fabs(fv_cpu[i].z-golden[i][3]) ,semicorrect ); //printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[i][3],fabs(fv_cpu[i].z-golden[i][3]) ,semicorrect );
if (!semicorrect) { if (!semicorrect) {
printf("Error! En la componete %05d \n", i //printf("Error! En la componete %05d \n", i
); // );
#pragma omp critical #pragma omp critical
{ {
correct = false; correct = false;
......
...@@ -319,7 +319,7 @@ int correct=true; ...@@ -319,7 +319,7 @@ int correct=true;
//printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[3],fabs(fv_cpu[i].z-golden[3]) ,semicorrect ); //printf ("Z %f golden %f dif %f Error %d\n",fv_cpu[i].z,golden[3],fabs(fv_cpu[i].z-golden[3]) ,semicorrect );
if (!semicorrect) { if (!semicorrect) {
printf("Error! En la componete %05d \n", i // printf("Error! En la componete %05d \n", i
); );
#pragma omp critical #pragma omp critical
{ {
......
...@@ -17,7 +17,7 @@ faultModel = 0 ...@@ -17,7 +17,7 @@ faultModel = 0
# RF -> Register File # RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET) # INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET) # INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite = RF injectionSite = INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time # Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is # For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
...@@ -26,7 +26,7 @@ maxWaitTimes = 5 ...@@ -26,7 +26,7 @@ maxWaitTimes = 5
# binary file of the application # binary file of the application
# Must be full path # Must be full path
benchmarkBinary = /home/badia/rodinia_3.1/cuda/lud/cuda/lud_cuda benchmarkBinary = /home/badia/mycarol-fi/codes/lud/cuda/lud_cuda
#benchmarkBinary_noverificar = /home/badia/rodinia_3.1/cuda/lavaMD/lavaMD #benchmarkBinary_noverificar = /home/badia/rodinia_3.1/cuda/lavaMD/lavaMD
...@@ -36,10 +36,10 @@ benchmarkArgs = -s 10000 -v ...@@ -36,10 +36,10 @@ benchmarkArgs = -s 10000 -v
benchmarkArgs_noverificar = -s 10000 benchmarkArgs_noverificar = -s 10000
# CSV output file. It will be overwrite at each injection # CSV output file. It will be overwrite at each injection
csvFile = results/lud_RF.csv csvFile = results/lud_IO.csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT # You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript = ../rodinia_3.1/cuda/lud/sdc_check.sh goldenCheckScript = codes/lud/sdc_check.sh
# Number of signals that will be sent to the application # Number of signals that will be sent to the application
seqSignals = 20 seqSignals = 20
......
...@@ -17,9 +17,9 @@ faultModel = 0 ...@@ -17,9 +17,9 @@ faultModel = 0
# RF -> Register File # RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET) # INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET) # INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite = RF #injectionSite = RF
#injectionSite = INST_OUT injectionSite = INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time # Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is # For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s # Considered as a crash is 10s
......
...@@ -17,8 +17,9 @@ faultModel = 0 ...@@ -17,8 +17,9 @@ faultModel = 0
# RF -> Register File # RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET) # INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET) # INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite = RF #injectionSite = RF
injectionSite = INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time # Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is # For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s # Considered as a crash is 10s
...@@ -37,7 +38,7 @@ benchmarkArgs = 32000 1 ...@@ -37,7 +38,7 @@ benchmarkArgs = 32000 1
#benchmarkArgs_noverificar = -s 2048 #benchmarkArgs_noverificar = -s 2048
# CSV output file. It will be overwrite at each injection # CSV output file. It will be overwrite at each injection
csvFile = results/nw_RF.csv csvFile = results/nw_IO.csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT # You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript = codes/nw/sdc_check.sh goldenCheckScript = codes/nw/sdc_check.sh
......
import signal import signal
import os
# Max size of register # Max size of register
SINGLE_MAX_SIZE_REGISTER = 32 SINGLE_MAX_SIZE_REGISTER = 32
...@@ -8,7 +9,9 @@ SINGLE_MAX_SIZE_REGISTER = 32 ...@@ -8,7 +9,9 @@ SINGLE_MAX_SIZE_REGISTER = 32
MAX_TIMES_TO_PROFILE = 2 MAX_TIMES_TO_PROFILE = 2
# Log path to store all injections info # Log path to store all injections info
LOGS_PATH = 'logs' LOGS_PATH = 'logs/'
FILE_PID_PIF= "/pid_program_injected"
# Internal python scripts # Internal python scripts
...@@ -102,7 +105,7 @@ FOCUS_ERROR_STRING = "Focus not set on any active CUDA kernel." ...@@ -102,7 +105,7 @@ FOCUS_ERROR_STRING = "Focus not set on any active CUDA kernel."
def rewrite_path(): def rewrite_path():
# Temporary file to store kernel information # Temporary file to store kernel information
global KERNEL_INFO_DIR,GOLD_ERR_PATH,GOLD_OUTPUT_PATH,INJ_OUTPUT_PATH,INJ_ERR_PATH,DIFF_LOG,DIFF_ERR_LOG, SIGNAL_APP_LOG, LOG_DEFAULT_NAME global KERNEL_INFO_DIR,GOLD_ERR_PATH,GOLD_OUTPUT_PATH,INJ_OUTPUT_PATH,INJ_ERR_PATH,DIFF_LOG,DIFF_ERR_LOG, SIGNAL_APP_LOG, LOG_DEFAULT_NAME, FILE_PID_PIF
KERNEL_INFO_DIR = LOGS_PATH + '/tmp/carol-fi-kernel-info.txt' KERNEL_INFO_DIR = LOGS_PATH + '/tmp/carol-fi-kernel-info.txt'
# For golden generation # For golden generation
GOLD_ERR_PATH = LOGS_PATH + '/tmp/carol_fi_golden_bench_err.txt' GOLD_ERR_PATH = LOGS_PATH + '/tmp/carol_fi_golden_bench_err.txt'
...@@ -113,13 +116,14 @@ def rewrite_path(): ...@@ -113,13 +116,14 @@ def rewrite_path():
INJ_ERR_PATH = LOGS_PATH + '/tmp/carol_fi_inj_bench_err_{}.txt' INJ_ERR_PATH = LOGS_PATH + '/tmp/carol_fi_inj_bench_err_{}.txt'
# Temporary difference logs # Temporary difference logs
DIFF_LOG = LOGS_PATH + '/tmp/diff_{}.log' DIFF_LOG = LOGS_PATH + '/tmp/diff_{}.log'
DIFF_ERR_LOG = LOGS_PATH + '/tmp/diff_err_{}.log' DIFF_ERR_LOG = LOGS_PATH + '/tmp/diff_err_{}.log'
SIGNAL_APP_LOG = LOGS_PATH + '/tmp/signal_app_thread_{}.txt' SIGNAL_APP_LOG = LOGS_PATH + '/tmp/signal_app_thread_{}.txt'
PATH_CURRENT=os.path.dirname(os.path.realpath(__file__))
PATH_TMP= PATH_CURRENT + "/" + LOGS_PATH + "/tmp"
FILE_PID_PIF= PATH_TMP+"/pid_program_injected"
# Common body of log filename # Common body of log filename
......
...@@ -75,7 +75,7 @@ def signal_handler(sig, frame): ...@@ -75,7 +75,7 @@ def signal_handler(sig, frame):
print("Current_path "+current_path) print("Current_path "+current_path)
for th in gpus_threads: for th in gpus_threads:
th.join() th.join()
#sys.exit(0) sys.exit(0)
""" """
...@@ -384,15 +384,16 @@ def gdb_inject_fault(**kwargs): ...@@ -384,15 +384,16 @@ def gdb_inject_fault(**kwargs):
#init_wait_time = uniform(0, end_time * cp.MAX_SIGNAL_BEFORE_ENDING) #init_wait_time = uniform(0, end_time * cp.MAX_SIGNAL_BEFORE_ENDING)
#time_to_sleep = (max_wait_time - self.__init_wait_time) / seq_signals #time_to_sleep = (max_wait_time - self.__init_wait_time) / seq_signals
# Generate configuration file for specific test # Generate configuration file for specific test
gdb_env_string = "{}|{}|{}|{}|{}|{}|file {}; set args {}|{}".format(gdb_kernel,os.getpid(),maxregs,",".join(str(i) for i in bits_to_flip), fault_model, gdb_env_string = " {}|{}|{}|{}|{}|{}|{}|file {}; set args {}|{}".format(gdb_kernel,os.getpid(),maxregs,cp.FILE_PID_PIF,",".join(str(i) for i in bits_to_flip), fault_model, flip_log_file, benchmark_binary, benchmark_args, injection_site)
flip_log_file, benchmark_binary, benchmark_args,
injection_site)
if cp.DEBUG: if cp.DEBUG:
cf.printf("THREAD {} ENV GENERATE FINISHED".format(host_thread)) cf.printf("THREAD {} ENV GENERATE FINISHED".format(host_thread))
# First we have to start the SignalApp thread # First we have to start the SignalApp thread
signal_app_thread = SignalApp(max_wait_time=end_time, signal_cmd=signal_cmd, signal_app_thread = SignalApp(max_wait_time=end_time, file_connection=cp.FILE_PID_PIF,
log_path=signal_app_log, unique_id=unique_id, log_path=signal_app_log, unique_id=unique_id,
signals_to_send=seq_signals, signals_to_send=seq_signals,
init_sleep=init_sleep,syncro=syncro,waitfinish=wait_finish) init_sleep=init_sleep,syncro=syncro,waitfinish=wait_finish)
...@@ -556,7 +557,7 @@ by sending a SIGINT signal to the application ...@@ -556,7 +557,7 @@ by sending a SIGINT signal to the application
def fault_injection_by_signal(**kwargs): def fault_injection_by_signal(**kwargs):
# Global rows list # Global rows list
global lock, exit_injector,num_rounds,kill_strings global lock, exit_injector,num_rounds,kill_strings,crashsystem
benchmark_binary = kwargs.get('benchmark_binary') benchmark_binary = kwargs.get('benchmark_binary')
#kwargs['signal_cmd'] = "killall -2 {}".format(os.path.basename(benchmark_binary)) #kwargs['signal_cmd'] = "killall -2 {}".format(os.path.basename(benchmark_binary))
kwargs['signal_cmd'] = "pgrep {}".format(os.path.basename(benchmark_binary)) kwargs['signal_cmd'] = "pgrep {}".format(os.path.basename(benchmark_binary))
...@@ -566,19 +567,23 @@ def fault_injection_by_signal(**kwargs): ...@@ -566,19 +567,23 @@ def fault_injection_by_signal(**kwargs):
injection_site = kwargs.get('injection_site') injection_site = kwargs.get('injection_site')
summary_file = kwargs.get('summary_file') summary_file = kwargs.get('summary_file')
header = kwargs.get('header') header = kwargs.get('header')
max_fallos=10 max_fallos=5
acc_fault_injected=0 acc_fault_injected=0
cf.printf("-----------------------------------------------------------------------------------------------") cf.printf("-----------------------------------------------------------------------------------------------")
# Execute the fault injector for each one of the sections(apps) of the configuration file # Execute the fault injector for each one of the sections(apps) of the configuration file
for fault_model in fault_models: for fault_model in fault_models:
# Execute iterations number of fault injection for a specific app # Execute iterations number of fault injection for a specific app
print("================")
try: try:
print(num_rounds)
ret_profiler = cf.load_config_file("tmpxxx_num_rounds.conf") ret_profiler = cf.load_config_file("tmpxxx_num_rounds.conf")
num_rounds=int(ret_profiler.get('DEFAULT', 'Ocurrencias')) num_rounds=int(ret_profiler.get('DEFAULT', 'Ocurrencias'))
print(num_rounds)
os.system ("rm tmpxxx_num_rounds.conf") os.system ("rm tmpxxx_num_rounds.conf")
except: except:
num_rounds = 1 num_rounds = 1
print(num_rounds)
print("================")
while num_rounds <= iterations: while num_rounds <= iterations:
if exit_injector: if exit_injector:
return return
...@@ -624,17 +629,7 @@ def fault_injection_by_signal(**kwargs): ...@@ -624,17 +629,7 @@ def fault_injection_by_signal(**kwargs):
num_rounds += 1 num_rounds += 1
acc_fault_injeted=0 acc_fault_injeted=0
else: else:
acc_fault_injected+=1 acc_fault_injected+=1
if (acc_fault_injected == (max_fallos/2)):
for cmd in kill_strings.split(";"):
os.system(cmd + " > /dev/null 2>&1")
for th in gpus_threads:
try:
th.join()
except:
nulo=1
time.sleep(240)
if (acc_fault_injected == max_fallos): if (acc_fault_injected == max_fallos):
exit_injector=True exit_injector=True
for cmd in kill_strings.split(";"): for cmd in kill_strings.split(";"):
...@@ -685,9 +680,12 @@ def main(): ...@@ -685,9 +680,12 @@ def main():
# Read the configuration file with data for all the apps that will be executed # Read the configuration file with data for all the apps that will be executed
conf = cf.load_config_file(args.config_file) conf = cf.load_config_file(args.config_file)
benchmark_binary_default = conf.get('DEFAULT', 'benchmarkBinary') benchmark_binary_default = conf.get('DEFAULT', 'benchmarkBinary')
if (args.device>0):
cp.LOGS_PATH="{}-{}-{}".format(cp.LOGS_PATH,args.device,benchmark_binary_default.split('/')[-1]) cp.LOGS_PATH="{}l_{}_d{}".format(cp.LOGS_PATH,benchmark_binary_default.split('/')[-1],args.device)
cp.rewrite_path() cp.rewrite_path()
cf.printf(cp.FILE_PID_PIF)
# Connect signal SIGINT to stop the fault injector # Connect signal SIGINT to stop the fault injector
kill_strings = "" kill_strings = ""
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
...@@ -727,7 +725,7 @@ def main(): ...@@ -727,7 +725,7 @@ def main():
bin_path = current_path + '/bin' bin_path = current_path + '/bin'
if not os.path.exists(bin_path): if not os.path.exists(bin_path):
os.mkdir(bin_path) os.mkdir(bin_path)
# Create tmp path and clean it if it exists # Create tmp path and clean it if it exists
tmp_path = current_path + "/" + cp.LOGS_PATH + "/tmp" tmp_path = current_path + "/" + cp.LOGS_PATH + "/tmp"
if not os.path.exists(tmp_path): if not os.path.exists(tmp_path):
...@@ -768,7 +766,8 @@ def main(): ...@@ -768,7 +766,8 @@ def main():
'summary_file': summary_file, 'summary_file': summary_file,
'header': fieldnames 'header': fieldnames
} }
syncro = threading.Barrier(2, timeout=kwargs.get('max_time') ) #syncro = threading.Barrier(2, timeout=5*kwargs.get('max_time') )
syncro = threading.Barrier(2, timeout=80 )
wait_finish = threading.Barrier(2, timeout=kwargs.get('max_time')) wait_finish = threading.Barrier(2, timeout=kwargs.get('max_time'))
kill_strings += "killall -9 {};killall -9 {};".format(os.path.basename(benchmark_binary), os.path.basename(gdb)) kill_strings += "killall -9 {};killall -9 {};".format(os.path.basename(benchmark_binary), os.path.basename(gdb))
......
...@@ -4,7 +4,7 @@ import time ...@@ -4,7 +4,7 @@ import time
from classes.BitFlip import BitFlip from classes.BitFlip import BitFlip
from classes.Logging import Logging from classes.Logging import Logging
import common_parameters as cp import common_parameters as cp
import common_functions as cf # All common functions will be at common_functions module
""" """
Handler attached to exit event Handler attached to exit event
""" """
...@@ -30,7 +30,7 @@ signal ...@@ -30,7 +30,7 @@ signal
def set_event(event): def set_event(event):
# Accessing global vars # Accessing global vars
global global_logging, was_hit, bit_flip,bp,t global global_logging, was_hit, bit_flip,bp,t,primero
if (isinstance(event, gdb.BreakpointEvent)): if (isinstance(event, gdb.BreakpointEvent)):
global_logging.info("Before breakpoint"+ str(time.clock()-t)) global_logging.info("Before breakpoint"+ str(time.clock()-t))
global_logging.info ("Enviado senal a "+ str(pid)) global_logging.info ("Enviado senal a "+ str(pid))
...@@ -71,12 +71,11 @@ def main(): ...@@ -71,12 +71,11 @@ def main():
# Connecting to a exit handler event # Connecting to a exit handler event
gdb.events.exited.connect(exit_handler) gdb.events.exited.connect(exit_handler)
# Connecting to a stop signal event
gdb.events.stop.connect(set_event)
# Get variables values from environment # Get variables values from environment
# Firsn parse line # Firsn parse line
[kernel,pid,max_regs,bits_to_flip, fault_model, flip_log_file, [kernel,pid,max_regs,file_connect,bits_to_flip, fault_model, flip_log_file,
gdb_init_strings, injection_site] = arg0.split('|') gdb_init_strings, injection_site] = arg0.split('|')
...@@ -100,13 +99,26 @@ def main(): ...@@ -100,13 +99,26 @@ def main():
# Start app execution # Start app execution
t=time.clock(); t=time.clock();
gdb.Breakpoint('main')
#gdb.execute("break "+kernel) #gdb.execute("break "+kernel)
bp=gdb.Breakpoint(kernel) bp=gdb.Breakpoint(kernel)
global_logging.info("Put Break "+ str(time.clock()-t)) global_logging.info("Put Break "+ str(time.clock()-t))
gdb.execute("r") gdb.execute("r")
try:
pid_bench=gdb.execute ("info proc", to_string=True).splitlines()[0].split(' ')[1]
except:
global_logging.info("problema solictando info proc")
global_logging.info("PID: {}".format(pid_bench))
fp= open(file_connect,"w")
fp.write(pid_bench)
fp.close()
# Connecting to a stop signal event
gdb.events.stop.connect(set_event)
print("4")
i = 0 i = 0
try: try:
while 'The program' not in gdb.execute('c', to_string=True): while 'The program' not in gdb.execute('c', to_string=True):
...@@ -120,7 +132,7 @@ def main(): ...@@ -120,7 +132,7 @@ def main():
if 'Failed' in err_str: if 'Failed' in err_str:
gdb.execute('quit') gdb.execute('quit')
global_logging.exception("QUIT REQUIRED") global_logging.exception("QUIT REQUIRED")
print("6")
# Call main execution # Call main execution
global_logging = None global_logging = None
......
...@@ -130,13 +130,16 @@ def main(): ...@@ -130,13 +130,16 @@ def main():
gdb.execute("file codes/mmElem/matrixMul") gdb.execute("file codes/mmElem/matrixMul")
gdb.execute("set arg -wA=16384 -hA=16384 -hB=16384 -wB=16384") gdb.execute("set arg -wA=16384 -hA=16384 -hB=16384 -wB=16384")
#gdb.execute("set cuda break_on_launch application") #gdb.execute("set cuda break_on_launch application")
# bp=gdb.Breakpoint('matrixMulCUDA') bm=gdb.Breakpoint('main')
bp=gdb.Breakpoint('matrixMulCUDA')
#gdb.execute('handle SIGALRM stop') #gdb.execute('handle SIGALRM stop')
#gdb.execute('handle SIGALRM ignore') #gdb.execute('handle SIGALRM ignore')
#gdb.execute('handle SIGALRM') #gdb.execute('handle SIGALRM')
gdb.execute('catch signal SIGALRM') #gdb.execute('catch signal SIGALRM')
gdb.execute('r') gdb.execute('r')
#selectrd() #selectrd()
a=gdb.execute ("info proc", to_string=True).splitlines()[0].split(' ')[1]
print("Process:{}".format(a))
print ("1") print ("1")
gdb.execute("c") gdb.execute("c")
print ("1") print ("1")
......
...@@ -5,7 +5,7 @@ set -e ...@@ -5,7 +5,7 @@ set -e
#uncomment to a more verbose script #uncomment to a more verbose script
#set -x #set -x
FAULTS=10 FAULTS=1000
#CONFFILE=codes/matrixMul/matrixmul_16K.conf #CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/mmElem/matrixmul_16K.conf #CONFFILE=codes/mmElem/matrixmul_16K.conf
......
...@@ -5,7 +5,7 @@ set -e ...@@ -5,7 +5,7 @@ set -e
#uncomment to a more verbose script #uncomment to a more verbose script
#set -x #set -x
DIR_RODINIA=codes DIR_RODINIA=codes
FAULTS=100 FAULTS=1000
#CONFFILE=codes/matrixMul/matrixmul_16K.conf #CONFFILE=codes/matrixMul/matrixmul_16K.conf
#CONFFILE=codes/mmElem/matrixmul_16K.conf #CONFFILE=codes/mmElem/matrixmul_16K.conf
......
...@@ -4,7 +4,7 @@ set -e ...@@ -4,7 +4,7 @@ set -e
#uncomment to a more verbose script #uncomment to a more verbose script
#set -x #set -x
DIR_RODINIA=$HOME/rodinia_3.1/cuda DIR_RODINIA=codes
FAULTS=1000 FAULTS=1000
#CONFFILE=codes/matrixMul/matrixmul_16K.conf #CONFFILE=codes/matrixMul/matrixmul_16K.conf
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment