Commit e4ae2422 authored by German Leon's avatar German Leon
Browse files

Initial commit

parents
#!/usr/bin/env python3
import argparse
import os
import re
import time
import common_functions as cf
import common_parameters as cp
def generate_dict(sm_version, input_file_name):
with open(input_file_name, "r") as f:
# dictionary to store the number of allocated registers per static
kernel_reg = {}
kernel_name = "" # temporary variable to store the kernel_name
check_for_register_count = False
# process the input file created by capturing the stderr while compiling the
# application using -Xptxas -v options
for line in f: # for each line in the file
m = re.match(r".*Compiling entry function.*'(\S+)'.*for.*'{}'.*".format(sm_version), line)
if m:
kernel_name = m.group(1)
check_for_register_count = True
m = re.match(r".*Used[ ]+(\d+)[ ]+registers.*", line)
if check_for_register_count and m:
reg_num = m.group(1) # extract register number
if kernel_name not in kernel_reg:
# associate the extracted register number with the kernel name
kernel_reg[kernel_name] = int(reg_num.strip())
else:
print("Warning: {} exists in the kernel_reg dictionary. "
"Skipping this register count.".format(kernel_name))
check_for_register_count = False
return kernel_reg
"""
Function that calls the profiler based on the injection mode
"""
def profiler_caller(gdb_exec, kernel, benchmark_binary, benchmark_args,device,section,kernel_end):
acc_time = 0
acc_time_profiler=0
script = 'env CUDA_VISIBLE_DEVICES={} {} -ex \'py arg0 = {}\' -n -batch -x {}'
benchmark_args_striped = benchmark_args.replace('\\n', '').replace('\\', '')
print ("KERNEL"+kernel)
#init_string = '"file {}; set args {}"'.format(benchmark_binary, benchmark_args_striped)
init_string = '"{};{};file {}; set args {}; break {}"'.format(section,kernel_end,benchmark_binary, benchmark_args_striped,kernel)
profiler_cmd = script.format(device, gdb_exec, init_string, cp.PROFILER_SCRIPT)
print ("Profiler caller")
if cp.DEBUG:
print("PROFILER CMD: {}".format(profiler_cmd))
for i in range(0, cp.MAX_TIMES_TO_PROFILE):
start = time.time()
os.system(profiler_cmd)
end = time.time()
ret_profiler = cf.load_config_file("tmpxxx_return_profiler.conf")
acc_time_profiler+=float(ret_profiler.get('DEFAULT', 'Tiempo'))
acc_time += end - start
cf.kill_all("killall -9 {}; killall -9 {}".format(
os.path.basename(gdb_exec), os.path.basename(benchmark_binary)))
return acc_time_profiler / cp.MAX_TIMES_TO_PROFILE, acc_time / cp.MAX_TIMES_TO_PROFILE
"""
Function to generate the gold execution
"""
def generate_gold(gdb_exec, benchmark_binary, benchmark_args,device):
# Create tmp path and clean it if it exists
tmp_path = os.path.dirname(os.path.realpath(__file__)) + "/" + cp.LOGS_PATH + "/tmp"
os.system("mkdir -p " + tmp_path)
os.system("rm -rf " + tmp_path + "/*")
script = 'env CUDA_VISIBLE_DEVICES={} {} -ex \'py arg0 = {}\' -n -batch -x {} > {} 2> {}'
init_string = '"file {}; set args {}"'.format(benchmark_binary, benchmark_args)
profiler_cmd = script.format(device, gdb_exec, init_string, cp.PROFILER_SCRIPT, cp.GOLD_OUTPUT_PATH, cp.GOLD_ERR_PATH)
if cp.DEBUG:
print("PROFILER CMD: {}".format(profiler_cmd))
# Execute and save gold file
return os.system(profiler_cmd)
def main():
os.system("rm -f {}".format(cp.KERNEL_INFO_DIR))
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--conf', dest="config_file", help='Configuration file', required=True)
parser.add_argument('-d', '--device', dest="device", help="The GPU to perform FI."
" Default is 0.", required=False, default=0, type=int)
args = parser.parse_args()
# Read the configuration file with data for all the apps that will be executed
conf = cf.load_config_file(args.config_file)
# First set env vars
cf.set_python_env()
########################################################################
# Profiler step
# Max time will be obtained by running
# it will also get app output for golden copy
# that is,
print("###################################################\n1 - Profiling application")
if 'benchmarkBinary_noverificar' in conf['DEFAULT']:
benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary_noverificar')
else:
benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary')
if 'benchmarkArgs_noverificar' in conf['DEFAULT']:
benchmark_args = conf.get('DEFAULT', 'benchmarkArgs_noverificar')
else:
benchmark_args = conf.get('DEFAULT', 'benchmarkArgs')
section= 'kernel_end' in conf['DEFAULT']
kernel_end=''
if (section):
kernel_end=conf.get('DEFAULT','kernel_end')
gdb_exec = conf.get("DEFAULT", "gdbExecName")
kernel=conf.get('DEFAULT', 'kernel')
[max_time_kernel,max_time_app] = profiler_caller(gdb_exec=gdb_exec,kernel=kernel, benchmark_binary=benchmark_binary, benchmark_args=benchmark_args,device=args.device,section=section,kernel_end=kernel_end)
print ("Time kernel= "+str(max_time_kernel)+ "Time app "+str(max_time_app))
# saving gold
print ("Saving gold");
generate_gold_result = generate_gold(gdb_exec=gdb_exec,
benchmark_binary=benchmark_binary, benchmark_args=benchmark_args,device=args.device)
if generate_gold_result != 0:
raise EnvironmentError("Gold generation did not finish well, the fault injection will not work")
# Remove trash GDB info from the std output and the err output
cf.remove_useless_information_from_output(cp.GOLD_OUTPUT_PATH)
cf.remove_useless_information_from_output(cp.GOLD_ERR_PATH)
# Save the kernel configuration txt file
cf.save_file(file_path=cp.KERNEL_INFO_DIR, data={'max_time': max_time_app,'max_time_kernel': max_time_kernel})
print("1 - Profile finished\n###################################################")
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import argparse
import os
import re
import time
import common_functions as cf
import common_parameters as cp
def generate_dict(sm_version, input_file_name):
with open(input_file_name, "r") as f:
# dictionary to store the number of allocated registers per static
kernel_reg = {}
kernel_name = "" # temporary variable to store the kernel_name
check_for_register_count = False
# process the input file created by capturing the stderr while compiling the
# application using -Xptxas -v options
for line in f: # for each line in the file
m = re.match(r".*Compiling entry function.*'(\S+)'.*for.*'{}'.*".format(sm_version), line)
if m:
kernel_name = m.group(1)
check_for_register_count = True
m = re.match(r".*Used[ ]+(\d+)[ ]+registers.*", line)
if check_for_register_count and m:
reg_num = m.group(1) # extract register number
if kernel_name not in kernel_reg:
# associate the extracted register number with the kernel name
kernel_reg[kernel_name] = int(reg_num.strip())
else:
print("Warning: {} exists in the kernel_reg dictionary. "
"Skipping this register count.".format(kernel_name))
check_for_register_count = False
return kernel_reg
"""
Function that calls the profiler based on the injection mode
"""
def profiler_caller(gdb_exec, kernel, benchmark_binary, benchmark_args):
acc_time = 0
script = '{} -ex \'py arg0 = {}\' -n -batch -x {}'
benchmark_args_striped = benchmark_args.replace('\\n', '').replace('\\', '')
print ("KERNEL"+kernel)
init_string = '"file {}; set args {}"'.format(benchmark_binary, benchmark_args_striped)
#init_string = '"file {}; set args {}; break {}"'.format(benchmark_binary, benchmark_args_striped,kernel)
profiler_cmd = script.format(gdb_exec, init_string, cp.PROFILER_SCRIPT)
print ("Profiler caller")
if cp.DEBUG:
print("PROFILER CMD: {}".format(profiler_cmd))
for i in range(0, cp.MAX_TIMES_TO_PROFILE):
start = time.time()
os.system(profiler_cmd)
end = time.time()
acc_time += end - start
cf.kill_all("killall -9 {}; killall -9 {}".format(
os.path.basename(gdb_exec), os.path.basename(benchmark_binary)))
return acc_time / cp.MAX_TIMES_TO_PROFILE
"""
Function to generate the gold execution
"""
def generate_gold(gdb_exec, benchmark_binary, benchmark_args):
# Create tmp path and clean it if it exists
tmp_path = os.path.dirname(os.path.realpath(__file__)) + "/" + cp.LOGS_PATH + "/tmp"
if not os.path.exists(tmp_path):
os.mkdir(tmp_path)
os.system("rm -rf " + tmp_path + "/*")
script = '{} -ex \'py arg0 = {}\' -n -batch -x {} > {} 2> {}'
init_string = '"file {}; set args {}"'.format(benchmark_binary, benchmark_args)
profiler_cmd = script.format(gdb_exec, init_string, cp.PROFILER_SCRIPT, cp.GOLD_OUTPUT_PATH, cp.GOLD_ERR_PATH)
if cp.DEBUG:
print("PROFILER CMD: {}".format(profiler_cmd))
# Execute and save gold file
return os.system(profiler_cmd)
def main():
os.system("rm -f {}".format(cp.KERNEL_INFO_DIR))
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--conf', dest="config_file", help='Configuration file', required=True)
args = parser.parse_args()
# Read the configuration file with data for all the apps that will be executed
conf = cf.load_config_file(args.config_file)
# First set env vars
cf.set_python_env()
########################################################################
# Profiler step
# Max time will be obtained by running
# it will also get app output for golden copy
# that is,
print("###################################################\n1 - Profiling application")
if 'benchmarkBinary_noverificar' in conf['DEFAULT']:
benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary_noverificar')
else:
benchmark_binary = conf.get('DEFAULT', 'benchmarkBinary')
if 'benchmarkArgs_noverificar' in conf['DEFAULT']:
benchmark_args = conf.get('DEFAULT', 'benchmarkArgs_noverificar')
else:
benchmark_args = conf.get('DEFAULT', 'benchmarkArgs')
gdb_exec = conf.get("DEFAULT", "gdbExecName")
kernel=conf.get('DEFAULT', 'kernel')
max_time_app = profiler_caller(gdb_exec=gdb_exec,kernel=kernel, benchmark_binary=benchmark_binary, benchmark_args=benchmark_args)
# saving gold
print ("Saving gold");
generate_gold_result = generate_gold(gdb_exec=gdb_exec,
benchmark_binary=benchmark_binary, benchmark_args=benchmark_args)
if generate_gold_result != 0:
raise EnvironmentError("Gold generation did not finish well, the fault injection will not work")
# Remove trash GDB info from the std output and the err output
cf.remove_useless_information_from_output(cp.GOLD_OUTPUT_PATH)
cf.remove_useless_information_from_output(cp.GOLD_ERR_PATH)
# Save the kernel configuration txt file
cf.save_file(file_path=cp.KERNEL_INFO_DIR, data={'max_time': max_time_app})
print("1 - Profile finished\n###################################################")
if __name__ == '__main__':
main()
/**
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/**
* Matrix multiplication: C = A * B.
* Host code.
*
* This sample implements matrix multiplication as described in Chapter 3
* of the programming guide.
* It has been written for clarity of exposition to illustrate various CUDA
* programming principles, not with the goal of providing the most
* performant generic kernel for matrix multiplication.
*
* See also:
* V. Volkov and J. Demmel, "Benchmarking GPUs to tune dense linear algebra,"
* in Proc. 2008 ACM/IEEE Conf. on Supercomputing (SC '08),
* Piscataway, NJ: IEEE Press, 2008, pp. Art. 31:1-11.
*/
// System includes
#include <stdio.h>
#include <assert.h>
// CUDA runtime
#include <cuda_runtime.h>
// Helper functions and utilities to work with CUDA
#include <helper_functions.h>
#include <helper_cuda.h>
#include <omp.h>
#if BUILD_TIMER == 1
static double timer;
#endif
/**
* Matrix multiplication (CUDA Kernel) on the device: C = A * B
* wA is A's width and wB is B's width
* Every thread computes one element of C as a dot product
* C[i][j] = A[i][:] * B[:][j]
*/
__global__ void matrixMulCUDA(float *C, float *A, float *B, int ldA, int ldB, int ldC) {
// Thread global indexes
int i = blockIdx.y * blockDim.y + threadIdx.y;
int j = blockIdx.x * blockDim.x + threadIdx.x;
// printf("**C[%d][%d]\n", i, j);
float *ptrA = &A[i*ldA]; // Pointer to the first element of row i of A
float tmp = 0.0f;
for (int k = 0; k < ldA; k++) {
tmp += (*ptrA++) * B[k*ldB+j];
}
C[i*ldC+j] = tmp;
// printf("C[%d][%d] = %f\n", i, j, tmp);
}
void constantInit(float *data, int size, float val) {
for (int i = 0; i < size; ++i) {
data[i] = val;
}
}
double mysecond() {
struct timeval tp;
struct timezone tzp;
int i = gettimeofday(&tp, &tzp);
return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
}
/**
* Run a simple test of matrix multiplication using CUDA
*/
int matrixMultiply(int argc, char **argv, int block_size, dim3 &dimsA,
dim3 &dimsB) {
// Allocate host memory for matrices A and B
unsigned int size_A = dimsA.x * dimsA.y;
unsigned int mem_size_A = sizeof(float) * size_A;
float *h_A = (float *) malloc(mem_size_A);
unsigned int size_B = dimsB.x * dimsB.y;
unsigned int mem_size_B = sizeof(float) * size_B;
float *h_B = (float *) malloc(mem_size_B);
// Initialize host memory
const float valB = 0.01f;
constantInit(h_A, size_A, 1.0f);
constantInit(h_B, size_B, valB);
// Allocate device memory
float *d_A, *d_B, *d_C;
// Allocate host matrix C
dim3 dimsC(dimsB.x, dimsA.y, 1);
unsigned int mem_size_C = dimsC.x * dimsC.y * sizeof(float);
float *h_C = (float *) malloc(mem_size_C);
if (h_C == NULL) {
fprintf(stderr, "Failed to allocate host matrix C!\n");
exit (EXIT_FAILURE);
}
cudaError_t error;
error = cudaMalloc((void **) &d_A, mem_size_A);
if (error != cudaSuccess) {
printf("cudaMalloc d_A returned error %s (code %d), line(%d)\n",
cudaGetErrorString(error), error, __LINE__);
exit (EXIT_FAILURE);
}
error = cudaMalloc((void **) &d_B, mem_size_B);
if (error != cudaSuccess) {
printf("cudaMalloc d_B returned error %s (code %d), line(%d)\n",
cudaGetErrorString(error), error, __LINE__);
exit (EXIT_FAILURE);
}
error = cudaMalloc((void **) &d_C, mem_size_C);
if (error != cudaSuccess) {
printf("cudaMalloc d_C returned error %s (code %d), line(%d)\n",
cudaGetErrorString(error), error, __LINE__);
exit (EXIT_FAILURE);
}
// copy host memory to device
error = cudaMemcpy(d_A, h_A, mem_size_A, cudaMemcpyHostToDevice);
if (error != cudaSuccess) {
printf("cudaMemcpy (d_A,h_A) returned error %s (code %d), line(%d)\n",
cudaGetErrorString(error), error, __LINE__);
exit (EXIT_FAILURE);
}
error = cudaMemcpy(d_B, h_B, mem_size_B, cudaMemcpyHostToDevice);
if (error != cudaSuccess) {
printf("cudaMemcpy (d_B,h_B) returned error %s (code %d), line(%d)\n",
cudaGetErrorString(error), error, __LINE__);
exit (EXIT_FAILURE);
}
// Setup execution parameters
dim3 threads(block_size, block_size);
dim3 grid(dimsB.x / threads.x, dimsA.y / threads.y);
// Create and start timer
printf("Computing result using CUDA Kernel...\n");
// Performs warmup operation using matrixMul CUDA kernel
// if (block_size == 16) {
// matrixMulCUDA<16> <<<grid, threads>>>(d_C, d_A, d_B, dimsA.x, dimsB.x);
// } else {
// matrixMulCUDA<32> <<<grid, threads>>>(d_C, d_A, d_B, dimsA.x, dimsB.x);
// }
// printf("done\n");
//
// cudaDeviceSynchronize();
// Allocate CUDA events that we'll use for timing
cudaEvent_t start;
error = cudaEventCreate(&start);
if (error != cudaSuccess) {
fprintf(stderr, "Failed to create start event (error code %s)!\n",
cudaGetErrorString(error));
exit (EXIT_FAILURE);
}
cudaEvent_t stop;
error = cudaEventCreate(&stop);
if (error != cudaSuccess) {
fprintf(stderr, "Failed to create stop event (error code %s)!\n",
cudaGetErrorString(error));
exit (EXIT_FAILURE);
}
// Record the start event
error = cudaEventRecord(start, NULL);
if (error != cudaSuccess) {
fprintf(stderr, "Failed to record start event (error code %s)!\n",
cudaGetErrorString(error));
exit (EXIT_FAILURE);
}
// Execute the kernel
int nIter = 1;
#if BUILD_TIMER == 1
printf("BEFORE START KERNEL %lf\n", mysecond() - timer);
double t1 = mysecond();
#endif
for (int j = 0; j < nIter; j++) {
//matrixMulCUDA<32> <<<grid, threads>>>(d_C, d_A, d_B, dimsA.x, dimsB.x);
matrixMulCUDA <<<grid, threads>>>(d_C, d_A, d_B, dimsA.x, dimsB.x, dimsC.x);
cudaDeviceSynchronize();
}
#if BUILD_TIMER == 1
double exec_time = mysecond() - t1;
printf("KERNEL EXECUTION TIME %lf\n", exec_time);
#endif
// Record the stop event
error = cudaEventRecord(stop, NULL);
if (error != cudaSuccess) {
fprintf(stderr, "Failed to record stop event (error code %s)!\n",
cudaGetErrorString(error));
exit (EXIT_FAILURE);
}
// Wait for the stop event to complete
error = cudaEventSynchronize(stop);
if (error != cudaSuccess) {
fprintf(stderr,
"Failed to synchronize on the stop event (error code %s)!\n",
cudaGetErrorString(error));
exit (EXIT_FAILURE);
}
float msecTotal = 0.0f;
error = cudaEventElapsedTime(&msecTotal, start, stop);
if (error != cudaSuccess) {
fprintf(stderr,
"Failed to get time elapsed between events (error code %s)!\n",
cudaGetErrorString(error));
exit (EXIT_FAILURE);
}
#if BUILD_TIMER == 1
// Compute and print the performance
float msecPerMatrixMul = msecTotal / nIter;
double flopsPerMatrixMul = 2.0 * (double) dimsA.x * (double) dimsA.y
* (double) dimsB.x;
double gigaFlops = (flopsPerMatrixMul * 1.0e-9f)
/ (msecPerMatrixMul / 1000.0f);
printf(
"Performance= %.2f GFlop/s, Time= %.3f msec, Size= %.0f Ops, WorkgroupSize= %u threads/block\n",
gigaFlops, msecPerMatrixMul, flopsPerMatrixMul,
threads.x * threads.y);
#endif
// Copy result from device to host
error = cudaMemcpy(h_C, d_C, mem_size_C, cudaMemcpyDeviceToHost);
if (error != cudaSuccess) {
printf("cudaMemcpy (h_C,d_C) returned error %s (code %d), line(%d)\n",
cudaGetErrorString(error), error, __LINE__);
exit (EXIT_FAILURE);
}
printf("Checking computed result for correctness: ");
bool correct = true;
// test relative error by the formula
// |<x, y>_cpu - <x,y>_gpu|/<|x|, |y|> < eps
double eps = 1.e-6; // machine zero
#if BUILD_TIMER == 1
t1 = mysecond();
#endif
#pragma omp parallel for shared(h_C, correct)
for (int i = 0; i < (int) (dimsC.x * dimsC.y); i++) {
float abs_err = fabs(h_C[i] - float(dimsA.x * valB));
float dot_length = dimsA.x;
float abs_val = fabs(h_C[i]);
float rel_err = abs_err / abs_val / dot_length;
if (rel_err > eps) {
printf("Error! Matrix[%05d]=%.8f, ref=%.8f error term is > %E\n", i,
h_C[i], dimsA.x * valB, eps);
#pragma omp critical
{
correct = false;
}
}
}
#if BUILD_TIMER == 1
exec_time = mysecond() - t1;
printf("CMP TIME %lf\n", exec_time);
#endif
printf("%s\n", correct ? "Result = PASS" : "Result = FAIL");
// Clean up memory
free(h_A);
free(h_B);
free(h_C);
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
printf(
"\nNOTE: The CUDA Samples are not meant for performance measurements. "
"Results may vary when GPU Boost is enabled.\n");
if (correct) {
return EXIT_SUCCESS;
} else {
return EXIT_FAILURE;
}
}
/**
* Program main
*/
int main(int argc, char **argv) {
#if BUILD_TIMER == 1
timer = mysecond();
#endif
printf("[Matrix Multiply Using CUDA] - Starting...\n");
if (checkCmdLineFlag(argc, (const char **) argv, "help")
|| checkCmdLineFlag(argc, (const char **) argv, "?")) {
printf("Usage -device=n (n >= 0 for deviceID)\n");
printf(" -wA=WidthA -hA=HeightA (Width x Height of Matrix A)\n");
printf(" -wB=WidthB -hB=HeightB (Width x Height of Matrix B)\n");
printf(
" Note: Outer matrix dimensions of A & B matrices must be equal.\n");
exit (EXIT_SUCCESS);
}
// By default, we use device 0, otherwise we override the device ID based on what is provided at the command line
int devID = 0;
if (checkCmdLineFlag(argc, (const char **) argv, "device")) {
devID = getCmdLineArgumentInt(argc, (const char **) argv, "device");
cudaSetDevice(devID);
}
cudaError_t error;
cudaDeviceProp deviceProp;
error = cudaGetDevice(&devID);
if (error != cudaSuccess) {
printf("cudaGetDevice returned error %s (code %d), line(%d)\n",
cudaGetErrorString(error), error, __LINE__);
}
error = cudaGetDeviceProperties(&deviceProp, devID);
if (deviceProp.computeMode == cudaComputeModeProhibited) {
fprintf(stderr,
"Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
exit (EXIT_SUCCESS);
}
if (error != cudaSuccess) {
printf(
"cudaGetDeviceProperties returned error %s (code %d), line(%d)\n",
cudaGetErrorString(error), error, __LINE__);
} else {
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID,
deviceProp.name, deviceProp.major, deviceProp.minor);
}
// Use a larger block size for Fermi and above
int block_size = (deviceProp.major < 2) ? 16 : 32;
dim3 dimsA(5 * 2 * block_size, 5 * 2 * block_size, 1);
dim3 dimsB(5 * 4 * block_size, 5 * 2 * block_size, 1);
// width of Matrix A
if (checkCmdLineFlag(argc, (const char **) argv, "wA")) {
dimsA.x = getCmdLineArgumentInt(argc, (const char **) argv, "wA");
}
// height of Matrix A
if (checkCmdLineFlag(argc, (const char **) argv, "hA")) {
dimsA.y = getCmdLineArgumentInt(argc, (const char **) argv, "hA");
}
// width of Matrix B
if (checkCmdLineFlag(argc, (const char **) argv, "wB")) {
dimsB.x = getCmdLineArgumentInt(argc, (const char **) argv, "wB");
}
// height of Matrix B
if (checkCmdLineFlag(argc, (const char **) argv, "hB")) {
dimsB.y = getCmdLineArgumentInt(argc, (const char **) argv, "hB");
}
if (dimsA.x != dimsB.y) {
printf("Error: outer matrix dimensions must be equal. (%d != %d)\n",
dimsA.x, dimsB.y);
exit (EXIT_FAILURE);
}
printf("MatrixA(%d,%d), MatrixB(%d,%d)\n", dimsA.x, dimsA.y, dimsB.x,
dimsB.y);
int matrix_result = matrixMultiply(argc, argv, block_size, dimsA, dimsB);
exit(matrix_result);
}
# 1 "matrixMul.cu"
# 139 "/usr/include/stdio.h" 3
extern FILE *stderr;
# 74 "/usr/include/c++/9/iostream" 3
static struct _ZNSt8ios_base4InitE _ZN39_INTERNAL_17_matrixMul_cpp1_ii_9deaad98St8__ioinitE __attribute__((visibility("default"))) = {};
extern void *__dso_handle __attribute__((visibility("hidden")));
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wcast-qual"
#define __NV_CUBIN_HANDLE_STORAGE__ static
#if !defined(__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__)
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
#endif
#include "crt/host_runtime.h"
#include "matrixMul.fatbin.c"
extern void __device_stub__Z13matrixMulCUDAPfS_S_iii(float *, float *, float *, int, int, int);
static void __nv_cudaEntityRegisterCallback(void **);
static void __sti____cudaRegisterAll(void) __attribute__((__constructor__));
void __device_stub__Z13matrixMulCUDAPfS_S_iii(float *__par0, float *__par1, float *__par2, int __par3, int __par4, int __par5){__cudaLaunchPrologue(6);__cudaSetupArgSimple(__par0, 0UL);__cudaSetupArgSimple(__par1, 8UL);__cudaSetupArgSimple(__par2, 16UL);__cudaSetupArgSimple(__par3, 24UL);__cudaSetupArgSimple(__par4, 28UL);__cudaSetupArgSimple(__par5, 32UL);__cudaLaunch(((char *)((void ( *)(float *, float *, float *, int, int, int))matrixMulCUDA)));}
# 50 "matrixMul.cu"
void matrixMulCUDA( float *__cuda_0,float *__cuda_1,float *__cuda_2,int __cuda_3,int __cuda_4,int __cuda_5)
# 50 "matrixMul.cu"
{__device_stub__Z13matrixMulCUDAPfS_S_iii( __cuda_0,__cuda_1,__cuda_2,__cuda_3,__cuda_4,__cuda_5);
# 66 "matrixMul.cu"
}
# 1 "matrixMul.cudafe1.stub.c"
static void __nv_cudaEntityRegisterCallback( void **__T3) { __nv_dummy_param_ref(__T3); __nv_save_fatbinhandle_for_managed_rt(__T3); __cudaRegisterEntry(__T3, ((void ( *)(float *, float *, float *, int, int, int))matrixMulCUDA), _Z13matrixMulCUDAPfS_S_iii, (-1)); }
static void __sti____cudaRegisterAll(void) { __cudaRegisterBinary(__nv_cudaEntityRegisterCallback); }
#pragma GCC diagnostic pop
This diff is collapsed.
#ifndef __SKIP_INTERNAL_FATBINARY_HEADERS
#include "fatbinary_section.h"
#endif
#define __CUDAFATBINSECTION ".nvFatBinSegment"
#define __CUDAFATBINDATASECTION ".nv_fatbin"
asm(
".section .nv_fatbin, \"a\"\n"
".align 8\n"
"fatbinData:\n"
".quad 0x00100001ba55ed50,0x0000000000000110,0x0000005001010002,0x00000000000000c0\n"
".quad 0x00000000000000be,0x0000004600010007,0x0000000c00000040,0x0000000000002013\n"
".quad 0x0000000000000000,0x0000000000000268,0x754d78697274616d,0x00000000206f2e6c\n"
".quad 0x010102464c457fa2,0x0002660001000733,0xc0230001006e00be,0xf500010012000801\n"
".quad 0x380040004605460d,0x0100040040000300,0x72747368732e0000,0x2700082e00626174\n"
".quad 0x735f00ff00086d79,0x766e2e0078646e68,0x2100326f666e692e,0x2e00df004800010f\n"
".quad 0x0100402200010003,0x0108003000322e00,0x722f0400400b1f00,0x0174131113004000\n"
".quad 0x000100a82200010e,0x2a00240600061811,0x0000065700180008,0x0500480f01a80500\n"
".quad 0x003801130040a81b,0x2f0038081500010f,0x0008801700010006,0x0000000000000000\n"
".text\n");
#ifdef __cplusplus
extern "C" {
#endif
extern const unsigned long long fatbinData[36];
#ifdef __cplusplus
}
#endif
#ifdef __cplusplus
extern "C" {
#endif
static const __fatBinC_Wrapper_t __fatDeviceText __attribute__ ((aligned (8))) __attribute__ ((section (__CUDAFATBINSECTION)))=
{ 0x466243b1, 2, fatbinData, (void**)__cudaPrelinkedFatbins };
#ifdef __cplusplus
}
#endif
#define NUM_PRELINKED_OBJECTS 0
[DEFAULT]
debug =True
# Name of the gdb executable
gdbExecName = /usr/local/cuda-10.1.243/bin/cuda-gdb
# Which fault model to use, 0 -> single; 1 -> double;
# 2 -> random; 3 -> zeros; 4 -> least 16 significant bits (LSB);
# 5 -> least 8 significant bits (LSB)
# If you want multiple fault models, place them separated by ','
# faultModel = 0,2,3
faultModel = 0
# Injection site
# Can be:
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite = RF
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s
maxWaitTimes = 5
# binary file of the application
# Must be full path
benchmarkBinary = /home/badia/carol-fi-carol-fi_cuda-parallel/codes/mmElem/matrixMul
# Commands to set the session inside GDB environment
benchmarkArgs = -device=1 -wA=16384 -hA=16384 -hB=16384 -wB=16384
# CSV output file. It will be overwrite at each injection
csvFile = codes/mmElem/fi_matrix_mul_single_bit.csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript = codes/mmElem/sdc_check.sh
# Number of signals that will be sent to the application
seqSignals = 20
# Initial sleep time in seconds before start sending signals
# Generally the memory setup time
initSleep = 2.3
[DEFAULT]
debug =True
# Name of the gdb executable
gdbExecName = /usr/local/cuda-10.1.243/bin/cuda-gdb
# Which fault model to use, 0 -> single; 1 -> double;
# 2 -> random; 3 -> zeros; 4 -> least 16 significant bits (LSB);
# 5 -> least 8 significant bits (LSB)
# If you want multiple fault models, place them separated by ','
# faultModel = 0,2,3
faultModel = 0
# Injection site
# Can be:
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite = RF, INST_OUT
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s
maxWaitTimes = 5
# binary file of the application
# Must be full path
benchmarkBinary = /home/badia/mycarol-fi/codes/mmElem/matrixMul
# Commands to set the session inside GDB environment
benchmarkArgs = -wA=16384 -hA=16384 -hB=16384 -wB=16384
# CSV output file. It will be overwrite at each injection
csvFile = codes/matrixMul/fi_matrix_mul_single_bit.csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript = codes/matrixMul/sdc_check.sh
# Number of signals that will be sent to the application
seqSignals = 20
# Initial sleep time in seconds before start sending signals
# Generally the memory setup time
initSleep = 2.1
#kernel = matrixMulCUDA
kernel = matrixMul.cu:208
kernel_end = matrixMul.cu:216
[DEFAULT]
debug =True
# Name of the gdb executable
gdbExecName = /usr/local/cuda-10.1.243/bin/cuda-gdb
# Which fault model to use, 0 -> single; 1 -> double;
# 2 -> random; 3 -> zeros; 4 -> least 16 significant bits (LSB);
# 5 -> least 8 significant bits (LSB)
# If you want multiple fault models, place them separated by ','
# faultModel = 0,2,3
faultModel = 0
# Injection site
# Can be:
# RF -> Register File
# INST_OUT -> Instruction Output (NOT IMPLEMENTED YET)
# INST_composed -> Instruction Adress (NOT IMPLEMENTED YET)
injectionSite = RF
# Max time factor to finish the app, this will be multiplied by the application running time
# For example if your app spend 2s, and the maxWaitTimes is 5, the max running time before it is
# Considered as a crash is 10s
maxWaitTimes = 5
# binary file of the application
# Must be full path
benchmarkBinary = /home/badia/carol-fi-carol-fi_cuda-parallel/codes/matrixMul/matrixMul
# Commands to set the session inside GDB environment
#benchmarkArgs = -device=1 -wA=16384 -hA=16384 -hB=16384 -wB=16384
benchmarkArgs = -device=1 -wA=24576 -hA=24576 -hB=24576 -wB=24576
# CSV output file. It will be overwrite at each injection
csvFile = codes/matrixMul/fi_matrix_mul_single_bit.csv
# You should create a script on the benchmark source folder to verify GOLD_OUTPUT x INJ_OUTPUT
goldenCheckScript = codes/matrixMul/sdc_check.sh
# Number of signals that will be sent to the application
seqSignals = 20
# Initial sleep time in seconds before start sending signals
# Generally the memory setup time
initSleep = 4.9
#!/usr/bin/sh
# SDC checking diff
# Must compare all things here
# Any particular output comparison must be made here
# To be considered as an SDC or CRASH the
# DIFF_LOG and DIFF_ERR_LOG files must not be empty
# INJ_OUTPUT_PATH, INJ_ERR_PATH, GOLD_OUTPUT_PATH, GOLD_ERR_PATH
# are environment variables defined by the fault_injector.py
# diff stdout
diff -B ${INJ_OUTPUT_PATH} ${GOLD_OUTPUT_PATH} > ${DIFF_LOG}
# Special comparison like the following one can be done in this script
grep -q "Result = FAIL" ${INJ_OUTPUT_PATH} >> ${DIFF_LOG}
# diff stderr
diff -B ${INJ_ERR_PATH} ${GOLD_ERR_PATH} > ${DIFF_ERR_LOG}
# Must exit 0
exit 0
\ No newline at end of file
import os
import pickle
import re
import sys
import common_parameters as cp
if sys.version_info >= (3, 0):
import configparser # python 3
else:
import ConfigParser # python 2
"""
Support function to execute a command
and return the output.
If the command contains NEWLINE character
it will result in a list.
"""
def execute_command(gdb, to_execute):
ret = gdb.execute(to_execute, to_string=True)
return ret.splitlines()
"""
Serialize a dictionary into a
file path using pickle.
"""
def save_file(file_path, data):
with open(file_path, "wb") as f_out:
pickle.dump(data, f_out)
f_out.close()
"""
Serialize a dictionary into a
file path using pickle.
"""
def append_file(file_path, data):
with open(file_path, "ab") as f_out:
pickle.dump(data, f_out)
f_out.close()
"""
Load a dictionary from a file path using pickle.
return a dictionary
"""
def load_file(file_path):
with open(file_path, "rb") as f_in:
data = pickle.load(f_in)
return data
"""
Read configuration file
"""
def load_config_file(flip_config_file):
# Read configuration file
if sys.version_info >= (3, 0):
conf = configparser.ConfigParser()
else:
conf = ConfigParser.ConfigParser()
conf.read(flip_config_file)
return conf
"""
Kill all remaining processes
"""
def kill_all(kill_string, logging=None):
for cmd in kill_string.split(";"):
os.system(cmd + " > /dev/null 2>&1")
if logging:
logging.debug("kill cmd: {}".format(cmd))
"""
GDB python cannot find common_functions.py, so I added this directory to PYTHONPATH
"""
def set_python_env():
current_path = os.path.dirname(os.path.realpath(__file__))
os.environ['PYTHONPATH'] = "$PYTHONPATH:" + current_path + ":" + current_path + "/classes"
os.environ['OMP_NUM_THREADS'] = '1'
return current_path
"""
Remove all useless information produced by CUDA-GDB on the output files
before they got to the SDC check script
"""
def remove_useless_information_from_output(output_file_path):
ok_output_lines = []
with open(output_file_path, 'r') as ifp:
lines = ifp.readlines()
for line in lines:
is_line_addable = True
for pattern in cp.POSSIBLE_USELESS_GDB_OUTPUT_PATTERNS:
# It is addable or not
search_result = re.search(pattern=pattern, string=line)
if search_result:
is_line_addable = False
if is_line_addable:
ok_output_lines.append(line)
# Overwrite the output file
with open(output_file_path, 'w') as ofp:
ofp.writelines(ok_output_lines)
"""
Show output function
to allow pretty printing
"""
def printf(*args):
string_to_print = "" # ""\r"
for i in args:
string_to_print += "{0} ".format(i)
print(string_to_print)
# Max size of register
SINGLE_MAX_SIZE_REGISTER = 32
# Times to profile
# this will be the max number of executions
# to profiler application
MAX_TIMES_TO_PROFILE = 2
# Log path to store all injections info
LOGS_PATH = 'logs'
# Temporary file to store kernel information
KERNEL_INFO_DIR = LOGS_PATH + '/tmp/carol-fi-kernel-info.txt'
# For golden generation
GOLD_ERR_PATH = LOGS_PATH + '/tmp/carol_fi_golden_bench_err.txt'
GOLD_OUTPUT_PATH = LOGS_PATH + '/tmp/carol_fi_golden_bench_output.txt'
# Files that will be compared to golden ones
INJ_OUTPUT_PATH = LOGS_PATH + '/tmp/carol_fi_inj_bench_output_{}.txt'
INJ_ERR_PATH = LOGS_PATH + '/tmp/carol_fi_inj_bench_err_{}.txt'
# Internal python scripts
FLIP_SCRIPT = 'flip_value.py'
PROFILER_SCRIPT = 'profiler_new.py'
# Temporary difference logs
DIFF_LOG = LOGS_PATH + '/tmp/diff_{}.log'
DIFF_ERR_LOG = LOGS_PATH + '/tmp/diff_err_{}.log'
# Debug env vars
# Debug FI process
DEBUG = True
# Debug profiler process
DEBUG_PROFILER = True
# Log file for SignalApp thread
SIGNAL_APP_LOG = LOGS_PATH + '/tmp/signal_app_thread_{}.txt'
# Num of sleep time divisor
NUM_DIVISION_TIMES = 100.0
# Common body of log filename
LOG_DEFAULT_NAME = LOGS_PATH + '/tmp/carolfi-flipvalue-{}.log'
# MAX INT 32 bits
MAX_INT_32 = 4294967295
# Most of the benchmarks we cannot wait until the end of the processing
# Considering most of 90% of the time
MAX_SIGNAL_BEFORE_ENDING = 0.9
# termination, program, alarm, asynchronous, job, operation error, miscellaneous, signal interruption
# 'SIGINT' must not be here, since I used it to send an interruption to app
SIGNALS = ['SIGKILL', 'SIGTERM', 'SIGQUIT', 'SIGHUP', # termination codes
'SIGFPE', 'SIGILL', 'SIGSEGV', 'SIGBUS', 'SIGABRT', 'SIGIOT', 'SIGTRAP', 'SIGEMT', 'SIGSYS', # program codes
'SIGALRM', 'SIGVTALRM', 'SIGPROF', # alarm codes
'SIGIO', 'SIGURG', 'SIGPOLL', # asynchronous codes
'SIGCHLD', 'SIGCLD', 'SIGCONT', 'SIGSTOP', 'SIGTSTP', 'SIGTTIN', 'SIGTTOU', # job control
'SIGPIPE', 'SIGLOST', 'SIGXCPU', 'SIGXFSZ', # operation codes
'SIGUSR1', 'SIGUSR2', 'SIGWINCH', 'SIGINFO', # miscellaneous codes
'strsignal', 'psignal', # signal messages
# cuda signals
'CUDA_EXCEPTION_0', 'CUDA_EXCEPTION_1', 'CUDA_EXCEPTION_2', 'CUDA_EXCEPTION_3', 'CUDA_EXCEPTION_4',
'CUDA_EXCEPTION_5',
'CUDA_EXCEPTION_6', 'CUDA_EXCEPTION_7', 'CUDA_EXCEPTION_8', 'CUDA_EXCEPTION_9', 'CUDA_EXCEPTION_10',
'CUDA_EXCEPTION_11',
'CUDA_EXCEPTION_12', 'CUDA_EXCEPTION_13', 'CUDA_EXCEPTION_14', 'CUDA_EXCEPTION_15']
# All trash produced by GDB must be add here in this list
# Using the Regular Expression format (python re)
POSSIBLE_USELESS_GDB_OUTPUT_PATTERNS = [
r'.*Thread.*received signal SIGINT, Interrupt.*', # Thread SIGINT message
r'.*New Thread.*', # New GDB Thread creation
r'.*Thread debugging using.*enabled.*', # Lib thread enabled
r'.*Using host.*library.*', # Using host library
r'.*Switching focus to CUDA kernel.*', # Switching focus to CUDA kernel message
r'.*0x.*in.*<<<.*>>>.*', # Kernel interruption message
r'.*Inferior.*\(process.*\) exited normally.*', # GDB exited normally message
r'.*Thread 0x.*exited.*', # Thread exited
r'.*0x.* in cu.* () from /usr/lib/.*libcuda.*', # Cuda lib calls
r'.*0x.*in.*\[clone.*\].*\(\).*', # OMP calls
r'.*0x.*in.*', # General API call
r'.*Inferior.*\(process.*\).*', # General inferior process
]
# Injection sites
RF = 0
INST_OUT = 1
INST_ADD = 2
INJECTION_SITES = {
'RF': RF,
'INST_OUT': INST_OUT,
'INST_ADD': INST_ADD
}
# Which fault model to use, 0 -> single; 1 -> double;
# 2 -> random; 3 -> zeros; 4 -> least 16 significant bits (LSB);
# 5 -> least 8 significant bits (LSB)
FLIP_SINGLE_BIT = 0
FLIP_TWO_BITS = 1
RANDOM_VALUE = 2
ZERO_VALUE = 3
LEAST_16_BITS = 4
LEAST_8_BITS = 5
# Focus error string
FOCUS_ERROR_STRING = "Focus not set on any active CUDA kernel."
This diff is collapsed.
import os
import gdb
import time
from classes.BitFlip import BitFlip
from classes.Logging import Logging
import common_parameters as cp
"""
Handler attached to exit event
"""
def exit_handler(event):
global global_logging
global_logging.info(str("event type: exit"))
print ("llego el final")
os.system ("kill -s USR2 " + str(pid))
try:
global_logging.info("exit code: {}".format(str(event.exit_code)))
except Exception as err:
err_str = "ERROR: {}".format(str(err))
global_logging.exception(err_str)
"""
Handler that will put a breakpoint on the kernel after
signal
"""
def set_event(event):
# Accessing global vars
global global_logging, was_hit, bit_lip,bp,t
if (isinstance(event, gdb.BreakpointEvent)):
global_logging.info("Before breakpoint"+ str(time.clock()-t))
global_logging.info ("Enviado senal a "+ str(pid))
os.system ("kill -s USR1 " + str(pid))
bp.enabled=False
gdb.execute('c')
# #os.system ("killall -2 python3")
else:
try:
# Just checking if it was hit
if bit_flip.fault_injected is False:
bit_flip.single_event()
global_logging.info("BIT FLIP SET ON SIGNAL {}".format(event.stop_signal))
except Exception as err:
global_logging.exception("EVENT DIFFERENT FROM STOP SIGNAL: {}".format(str(err)))
"""
Main function
"""
def main():
global global_logging, register, injection_site, bits_to_flip, fault_model, was_hit, bit_flip, arg0
was_hit = False
# Initialize GDB to run the app
gdb.execute("set confirm off")
gdb.execute("set pagination off")
gdb.execute("set target-async off")
gdb.execute("set non-stop off")
# Connecting to a exit handler event
gdb.events.exited.connect(exit_handler)
# Connecting to a stop signal event
gdb.events.stop.connect(set_event)
# Get variables values from environment
# Firsn parse line
[kernel,pid,bits_to_flip, fault_model, flip_log_file,
gdb_init_strings, injection_site] = arg0.split('|')
# Logging
global_logging = Logging(log_file=flip_log_file)
global_logging.info("Starting flip_value script "+" called by " + str(pid) + " for stop kernel " + str(kernel));
try:
for init_str in gdb_init_strings.split(";"):
gdb.execute(init_str)
global_logging.info("initializing setup: " + str(init_str))
except gdb.error as err:
global_logging.exception("ERROR on initializing setup: {}".format(str(err)))
# Set Breakpoint attributes to be use
bits_to_flip = [i for i in bits_to_flip.split(",")]
fault_model = int(fault_model)
bit_flip = BitFlip(bits_to_flip=bits_to_flip, fault_model=fault_model,
logging=global_logging, injection_site=cp.INJECTION_SITES[injection_site])
# Start app execution
t=time.clock();
#gdb.execute("break "+kernel)
bp=gdb.Breakpoint(kernel)
global_logging.info("Put Break "+ str(time.clock()-t))
gdb.execute("r")
i = 0
try:
while 'The program' not in gdb.execute('c', to_string=True):
i += 1
except Exception as err:
global_logging.info("CONTINUED {} times".format(i))
err_str = str(err).rstrip()
global_logging.exception("IGNORED CONTINUE ERROR: {}".format(err_str))
# Make sure that it is going to finish
if 'Failed' in err_str:
gdb.execute('quit')
global_logging.exception("QUIT REQUIRED")
# Call main execution
global_logging = None
register = None
bits_to_flip = None
fault_model = None
was_hit = False
injection_site = None
bit_flip = None
main()
from subprocess import Popen
from sys import argv
with open(argv[1], "w") as fp:
fp.write(str(Popen(argv[2]).pid))
import gdb
"""
Main function
"""
# Initialize GDB to run the app
gdb.execute("set confirm off")
gdb.execute("set pagination off")
gdb.execute("set target-async off")
gdb.execute("set non-stop off")
# gdb_init_strings = str(os.environ["CAROL_FI_INFO"])
gdb_init_strings = arg0
for init_str in gdb_init_strings.split(";"):
gdb.execute(init_str)
gdb.execute("r")
import gdb
import time
def exit_handler(event):
global nosalir
nosalir=False
print(str("event type: exit"))
try:
print("exit code: {}".format(str(event.exit_code)))
except Exception as err:
err_str = "ERROR: {}".format(str(err))
print(err_str)
"""
Handler that will put a breakpoint on the kernel after
signal
"""
def set_event(event):
global trun,ocurrencias,t,primera
print ("Es mi primera vez"+ str(primera)+" "+str(ocurrencias))
if (isinstance(event, gdb.BreakpointEvent)):
if (primera):
t=time.clock()
ocurrencias=ocurrencias+1
else:
trun=(time.clock()-t)
primera=not primera
else:
trun=(time.clock()-t)
"""
Main function
"""
def main():
global ocurrencias,t,nosalir,trun,primera
primera=True ;
ocurrencias=0;
# Initialize GDB to run the app
gdb.execute("set confirm off")
gdb.execute("set pagination off")
gdb.execute("set target-async off")
gdb.execute("set non-stop off")
# Connecting to a exit handler event
gdb.events.exited.connect(exit_handler)
# Connecting to a stop signal event
gdb.events.stop.connect(set_event)
# gdb_init_strings = str(os.environ["CAROL_FI_INFO"])
gdb_init_strings = arg0
cadena=gdb_init_strings.split(";",2)
#print (cadena,"-",cadena[0],'-',cadena[1],'-', cadena[2])
section =cadena[0]=="True"
kernel_end=cadena[1]
#print ("B "+section+"ke "+kernel_end+" ....")
#print (cadena[2].split(";"))
for init_str in cadena[2].split(";"):
gdb.execute(init_str)
if (section):
gdb.execute ("break "+kernel_end)
gdb.execute("r")
#nosalir=True
#while nosalir:
if (section):
#print ("Point 1")
gdb.execute("c")
else:
gdb.execute("finish")
#print ("Punto 2")
#print (" Ocurrencias "+str(ocurrencias)+" Tiempo acumulado de ejecucciones "+ str(trun)+ "\n")
gdb.execute("c")
#print (" Ocurrencias "+str(ocurrencias)+" Tiempo acumulado de ejecucciones "+ str(trun))
f=open("tmpxxx_return_profiler.conf","w")
f.write("[DEFAULT] \nOcurrencias = "+str(ocurrencias)+"\nTiempo = "+str(trun)+"\n")
f.close()
#print ("End write file \n")
#sys.stdout.flush()
main()
import os
import gdb
import time
def exit_handler(event):
global nosalir
nosalir=False
print(str("event type: exit"))
try:
print("exit code: {}".format(str(event.exit_code)))
except Exception as err:
err_str = "ERROR: {}".format(str(err))
print(err_str)
"""
Handler that will put a breakpoint on the kernel after
signal
"""
def set_event(event):
global trun,ocurrencias,t
if (isinstance(event, gdb.BreakpointEvent)):
t=time.clock()
ocurrencias=ocurrencias+1
else:
trun=(time.clock()-t)
def main():
global ocurrencias,t,nosalir,trun
was_hit = False
ocurrencias=0
# Initialize GDB to run the appset pagination off
gdb.execute("set confirm off")
gdb.execute("set pagination off")
gdb.execute("set target-async off")
gdb.execute("set non-stop off")
# Connecting to a exit handler event
gdb.events.exited.connect(exit_handler)
# Connecting to a stop signal event
gdb.events.stop.connect(set_event)
gdb.execute("file ~/rodinia_3.1/cuda/lud/cuda/lud_cuda")
gdb.execute("set arg -s 10000")
gdb.execute("break lud_cuda")
gdb.execute('r')
nosalir=True
while nosalir:
gdb.execute("finish")
gdb.execute("c")
print (" Ocurrencias "+str(ocurrencias)+" Tiempo acumulado de ejecucciones "+ str(trun))
f=open("tmpxxx_return_profiler.conf","w")
f.write("Ocurrencias ="+str(ocurrencias)+"\n Tiempo "+str(trun)+"\n")
f.close()
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment