Commit cec4b3f3 authored by German Leon's avatar German Leon
Browse files

Initial commit

parents
# TK 1sudo cat /sys/kernel/debug/clock/gbus/rate
sudo cat /sys/devices/57000000.gpu/devfreq/57000000.gpu/cur_freq
#min sudo cat /sys/devices/57000000.gpu/devfreq/57000000.gpu/min_freq
#max sudo cat /sys/devices/57000000.gpu/devfreq/57000000.gpu/max_freq
sudo bash -c "echo ${1}000 > /sys/devices/57000000.gpu/devfreq/57000000.gpu/min_freq"
sudo bash -c "echo ${1}000 > /sys/devices/57000000.gpu/devfreq/57000000.gpu/max_freq"
#################TK1###################
#echo ${1}000 > /sys/kernel/debug/clock/override.gbus/rate
# sudo echo $1 > /sys/kernel/debug/clock/override.gbus/rate
#/usr/local/cuda/samples/1_Utilities/deviceQuery/deviceQuery
#echo "before:" >> /home/happy/rodinia/lud/rad/results/res.out
#cat /sys/kernel/debug/clock/override.gbus/rate >> /home/happy/rodinia/lud/rad/results/res.out
#sudo bash -c "cat /sys/kernel/debug/clock/gbus/max > /sys/kernel/debug/clock/override.gbus/rate"
#echo "After:" >> /home/happy/rodinia/lud/rad/results/res.out
#cat /sys/kernel/debug/clock/gbus/max >> /home/happy/rodinia/lud/rad/results/res.out
#sudo bash -c "echo 1 > /sys/kernel/debug/clock/override.gbus/state"
#sudo /home/happy/bin/a.out $1
#sudo bash -c "echo $1 > /sys/kernel/debug/clock/override.gbus/rate"
## TK1 echo ${1}000 > /sys/kernel/debug/clock/override.gbus/rate
#echo ${1}000 > /sys/kernel/debug/clock/override.gbus/rate
#echo "after:" >> /home/happy/rodinia/lud/rad/results/res.out
#cat /sys/kernel/debug/clock/override.gbus/rate >> /home/happy/rodinia/lud/rad/results/res.out
#echo "before:" >> /home/happy/rodinia/lud/rad/results/res.out
#cat /sys/kernel/debug/clock/override.gbus/state >> /home/happy/rodinia/lud/rad/results/res.out
#sudo bash -c "echo 1 > /sys/kernel/debug/clock/override.gbus/state"
## TK1 echo 1 > /sys/kernel/debug/clock/override.gbus/state
#echo "after:" >> /home/happy/rodinia/lud/rad/results/res.out
#cat /sys/kernel/debug/clock/override.gbus/state >> /home/happy/rodinia/lud/rad/results/res.out
#TK1 sudo cut -d'(' -f1 /sys/kernel/debug/clock/gbus/possible_rates
sudo cat /sys/devices/57000000.gpu/devfreq/57000000.gpu/available_frequencies
File added
#!/bin/bash
#for i in min.sh #max.sh #medio.sh
for i in test.sh
do
grid=$(cat $i|cut -d"-" -f2 | cut -d"=" -f2 |cut -d" " -f1 )
blk=$(cat $i|cut -d"-" -f3 | cut -d"=" -f2 | cut -d" " -f1)
nop=$(cat $i|cut -d"-" -f4 | cut -d"=" -f2)
nop=$(( $nop / 1000 ))
BENCH=$(cat $i|cut -d"-" -f5 | cut -d"=" -f2|cut -d" " -f1)
cmd=$(cat $i)
# cmd=$(cat $i|sed "s/\$BENCH/${BENCH}/")
echo "==== "$cmd" ========== "
#echo "Eventos"
#nvprof --events all $cmd 2> $i.events.log
echo "Metricas"
# salida=$(" ")
nvprof --log-file res_${BENCH}_${grid}_${blk}_${nop}m.csv --csv -m $(cat metricas3) -f /home/happy/microbench/micro/$cmd > res_${BENCH}_${grid}_${blk}_${nop}m.log 2>&1
# nvprof -m all -f /home/happy/microbench/micro/$cmd > res_${BENCH}_${grid}_${blk}_${nop}m.log 2>&1
# nvprof --csv -m ipc -f /home/happy/microbench/micro/$cmd > res_${BENCH}_${grid}_${blk}_${nop}m.log 2>&1
# nvprof -m all -f $cmd > ${i}-${BENCH}.metrics.log 2>&1
#echo "TimeLine"
#nvprof --export-profile timeline-$i.nvprof -f $cmd
#echo "Metricas paran vvp"
#nvprof --metrics achieved_occupancy,executed_ipc -o metrics-$i.nvprof -f $cmd
#echo "Analisi del kernel"
#nvprof --kernels microKernel --analysis-metrics -o analysis-$i.nvprof -f $cmd
#echo "Print GPU TRACE"
nvprof $cmd 2> datos-${i}-_${BENCH}_${grid}_${blk}_${nop}m.log
#echo "Ejecucción normal"
#time $cmd >> datos-${i}-${BENCH}.log
#echo $cmd >> datos-${i}-${BENCH}.log
done
// iisalive.cpp
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h> // for watchdog timer
#include <unistd.h> // needed only if close() is used to close watchdog timer
#include <sys/ioctl.h> // for watchdog timer
#include <linux/watchdog.h> // for watchdog timer
#include <time.h>
int main(int argc, char*argv[]) {
FILE *fdf;
int valor,millamperes;
char buffer[100];
char name[50];
int i;
struct watchdog_info ident;
int fd, ret;
int timeout = 0;
int timeslice=10;
int retfinal;
printf("Entrado en iamlive....\r\n");
while (1) {
sleep (timeslice);
time_t t = time(NULL);
struct tm tm = *localtime(&t);
fprintf(stdout, "alive: %d-%02d-%02d %02d:%02d:%02d Temp:",
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
for(i=0; i<6;i++) {
sprintf(buffer,"/sys/devices/virtual/thermal/thermal_zone%d/temp",i);
fdf=fopen(buffer,"r");
fscanf (fdf,"%d",&valor);
fprintf(stdout,"%.1f,",valor/1000.0);
}
for(i=0; i<3;i++) {
sprintf(buffer,"/sys/bus/i2c/drivers/ina3221x/6-0040/iio:device0/rail_name_%d",i);
fdf=fopen(buffer,"r");
fscanf (fdf,"%s",name);
fclose(fdf);
sprintf(buffer,"/sys/bus/i2c/drivers/ina3221x/6-0040/iio:device0/in_power%d_input",i);
fdf=fopen(buffer,"r");
fscanf (fdf,"%d",&millamperes);
fclose(fdf);
fprintf(stdout,"%s:%.3fW ",name+7,((float)millamperes)/1.0e3);
}
fprintf (stdout,"\r\n");
} // end while
return 0;
}
// iisalive.cpp
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h> // for watchdog timer
#include <unistd.h> // needed only if close() is used to close watchdog timer
#include <sys/ioctl.h> // for watchdog timer
#include <linux/watchdog.h> // for watchdog timer
#include <time.h>
int main(int argc, char*argv[]) {
FILE *fdf, *fdl;
int valor,millamperes;
char buffer[100];
char name[50];
int i;
struct watchdog_info ident;
int fd, ret, fw;
int timeout = 0;
int timeslice=10;
int retfinal;
char *nom;
/* open WDT0 device (WDT0 enables itself automatically) */
fd = open("/dev/watchdog0", O_RDWR);
if (fd<0) {
fprintf(stderr, "Open watchdog device failed!\r\n");
return -1;
}
/* Enviorment monitor*/
ret = ioctl(fd, WDIOC_GETSUPPORT, &ident);
if (ret){
fprintf(stderr, "Kick watchdog failed!\n");
}
//printf ("Identidad %s Version %d Codigo %8x \n", ident.identity,ident.firmware_version,ident.options );
timeout = 60;
if (argc > 1) timeout=atoi(argv[1]);
if (argc > 2) timeslice=atoi(argv[2]);
/* WDT0 is counting now,check the default timeout value */
ret = ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
if(ret) {
fprintf(stderr, "Set watchdog timeout value failed!\r\n");
return -1;
}
fprintf(stdout, "Initial Watchdog timeout value: %d\r\n", timeout);
/* set new timeout value 60s */
/* Note the value should be within [5, 1000] */
/* Enviorment monitor*/
ret = ioctl(fd, WDIOC_GETSUPPORT, &ident);
if (ret){
fprintf(stderr, "Kick watchdog failed!\r\n");
}
//tdout, "/printf ("Identidad %s Version %d Codigo %d \n", ident.identity,ident.firmware_version,ident.options );
retfinal=ident.options & 0xff;
//printf ("retfinal: %d-%8x \n", retfinal,ident.options);
fprintf(stdout, "Last error ?");
switch (retfinal)
{
case WDIOF_OVERHEAT: fprintf(stdout," Reset due to CPU overheat \r\n");break;
case WDIOF_FANFAULT: fprintf(stdout, " Fan failed \r\n");break;
case WDIOF_EXTERN1: fprintf(stdout, " External relay 1 \r\n");break;
case WDIOF_EXTERN2: fprintf(stdout, " External relay 2 \r\n");break;
case WDIOF_POWERUNDER: fprintf(stdout, " Power bad/power fault \r\n");break;
case WDIOF_CARDRESET: fprintf(stdout, " Card previously reset the CPU \r\n");break;
case WDIOF_POWEROVER: fprintf(stdout, " Power over voltage \r\n");break;
case WDIOF_SETTIMEOUT: fprintf(stdout, " Set timeout (in seconds) \r\n");break;
default:
fprintf(stdout,"Code: %d \r\n", ident.options);
}
while (1) {
nom=getenv("MYHOME");
if (nom==NULL)
sprintf(buffer,"./watchdog.log");
else
sprintf(buffer,"%s/watchdog.log",nom);
if ( (fdl = fopen(buffer, "a") ) == NULL ) {
printf("Error! opening watchog.log file\n");
}
sleep (timeslice);
ret = ioctl(fd, WDIOC_KEEPALIVE, &ident);
if (ret<0) {
fprintf(stderr, "Kick watchdog failed!\r\n");
return -1;
}
time_t t = time(NULL);
struct tm tm = *localtime(&t);
fprintf(stdout, "alive: %d-%02d-%02d %02d:%02d:%02d Temperaturas:",
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
fprintf(fdl, "alive: %d-%02d-%02d %02d:%02d:%02d \n",
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
fflush(fdl);
for(i=0; i<6;i++) {
sprintf(buffer,"/sys/devices/virtual/thermal/thermal_zone%d/temp",i);
fdf=fopen(buffer,"r");
fscanf (fdf,"%d",&valor);
fprintf(stdout,"%.1f,",valor/1000.0);
}
for(i=0; i<3;i++) {
sprintf(buffer,"/sys/bus/i2c/drivers/ina3221x/6-0040/iio:device0/rail_name_%d",i);
fdf=fopen(buffer,"r");
fscanf (fdf,"%s",name);
fclose(fdf);
sprintf(buffer,"/sys/bus/i2c/drivers/ina3221x/6-0040/iio:device0/in_power%d_input",i);
fdf=fopen(buffer,"r");
fscanf (fdf,"%d",&millamperes);
fclose(fdf);
fprintf(stdout,"%s:%.3fW ",name+7,((float)millamperes)/1.0e3);
}
fprintf (stdout,"\r\n");
fclose(fdl);
} // end while
close(fd);
if (ret<0) {
fprintf(stderr, "Failed to close watchdog device.");
return -1;
}
printf ("+1\n");
return 0;
}
################################################################################
#
# Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
#
# NOTICE TO USER:
#
# This source code is subject to NVIDIA ownership rights under U.S. and
# international Copyright laws.
#
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
# OR PERFORMANCE OF THIS SOURCE CODE.
#
# U.S. Government End Users. This source code is a "commercial item" as
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
# "commercial computer software" and "commercial computer software
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
# and is provided to the U.S. Government only as a commercial end item.
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
# source code with only those rights set forth herein.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
ifeq ($(HOST_ARCH),aarch64)
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
HOST_ARCH := sbsa
TARGET_ARCH := sbsa
endif
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),sbsa)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
#HOST_COMPILER ?= g++
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc #-ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE} -Xptxas -dlcm=ca #--use_fast_math
CCFLAGS :=
LDFLAGS :=
# build flags
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
LDFLAGS += -lsocket
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
ifdef TARGET_OVERRIDE
LDFLAGS += -lslog2
endif
ifneq ($(TARGET_FS),)
LDFLAGS += -L$(TARGET_FS)/usr/lib
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
endif
endif
endif
ifdef TARGET_OVERRIDE # cuda toolkit targets override
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
SAMPLE_ENABLED := 1
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I$(CUDA_PATH)/samples/common/inc
LIBRARIES :=
################################################################################
# Gencode arguments
SMS ?= 53
#SMS ?= 35 37 50 52 60 61 70 75 80 86
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: micro
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
micro.o: micro.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
micro: micro.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
# $(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
# $(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./micro
clean:
rm -f micro micro.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/micro
clobber: clean
#ifndef INPUT_DEVICE_H_
#define INPUT_DEVICE_H_
__device__ __constant__ float common_float_input[] = {
2.41402913977, 7.65850839163, 6.57018671847, 9.80414661279,
-4.85665699159, 1.40699530361, -4.00834298395, 2.75964278884,
0.753262341411, 8.98645466109, -3.23323038093, -5.19239307522,
9.90331972115, 0.584347609233, 3.14268840228, -9.97311622408,
-5.96948683544, -0.831376506372, 1.19700873336, -1.96605078477,
-8.23109810903, -1.29516718578, -8.45815965182, -0.670358265412,
-3.24976484313, 6.39819111914, 7.33714421344, -3.52884896322,
3.03332474283, 4.5932481709, 8.91995616697, -2.87694449007,
-7.61484636122, 1.7265883128, 2.95949301825, 2.13195500359,
-4.76115294317, -0.421506880065, 8.98221583254, -1.31022386188,
1.22051829285, -9.81295303328, 7.08069139168, 9.03499725207,
-7.29925520458, 6.58551147809, 3.38853096935, 7.39922361637,
5.61074760553, 0.267656607305, -3.92687259716, 5.94862703359,
-7.68667549747, -0.224313773875, 1.52269709647, -9.29622879623,
2.41229422124, -3.69306674179, -4.29996911635, 4.2727654865,
5.92900308563, 0.021378756807, 8.08974926168, -5.52333043762,
-2.76493214307, -6.85399333285, -0.770411640811, 8.13926861709,
-3.10265510179, -0.25706896316, 0.00375881011646, -8.29025785655,
-0.516734468288, -8.02211349581, -6.30649718795, 8.66788571786,
-2.80592509615, -1.68280768331, -9.62574824486, 4.77507910988,
3.14988134977, 1.42891055327, 9.90292377242, 6.55882596072,
-8.54875363024, 0.622573619001, -1.62174532402, -6.48139765096,
1.97635086388, -7.65175980217, -2.62981402834, 6.24710558629,
-6.81646000167, 4.4693733079, 4.81423936346, 7.64454091337,
-3.88202919923, -2.78887676864, 8.35286114779, 9.26014594175,
1.64884459812, 8.35319565231, 0.69196796832, 7.02982849361,
7.82627409056, -9.82273650902, 5.35033922669, -8.6306197159,
0.95768314402, -0.252636994701, -8.14080566508, -1.3103301735,
-9.16100536427, 3.63403668456, -9.61459829128, 9.16886958736,
0.526091804679, -3.86532358275, 6.4977243401, -7.1990382726,
-6.51104454042, -7.46915749683, -9.34225762815, 7.65960642934,
-3.09998646446, -3.17577604382, -6.82271780545, -4.82743752118,
6.7635731489, -0.443955326818, -0.111571832634, -6.97720830328,
-5.62045714222, 3.77298203954, 0.625549536063, -7.23499605082,
6.61388957256, 9.6974860733, 2.73619629747, 9.41040832699,
7.65526487561, -1.09882039635, -4.20023909993, -0.339128005663,
0.24534027111, 2.02262947722, -5.72531457242, 4.18196481839,
8.50239487897, 6.27066631584, 8.84311590612, -2.33295873586,
-0.179735883731, 3.79696715395, 4.00790142229, -4.80345143147,
1.6746544459, -5.91605344649, 9.51118593738, -3.76969035762,
-8.70331082238, 0.774819393984, -9.58715854744, 7.31082617065,
-0.876864296053, 5.43481310694, 5.39671871111, 5.65472871598,
7.81278538663, -7.86138245382, -9.74657727237, -3.38391367634,
9.97467872344, -2.23551598183, 3.83574337483, -1.64003268571,
-5.78034848526, 9.41997570212, -0.735273499647, -9.03815318412,
0.992658121327, 9.00201765551, 0.178511539815, -2.62269707233,
3.29199691071, -2.93371142962, -1.93929303193, 3.11881646278,
3.04151574022, -5.85558098971, -3.31657171628, 4.37384612808,
-5.67532456086, 0.078185749595, 4.94053401732, 0.225583754937,
6.5894464817, -5.7153163796, -5.49691394243, -7.08593297598,
-4.4060699674, -9.01879325141, 2.0829691237, -1.60517735892,
-4.95843487614, 2.68376321302, 5.73243166438, 5.60318533049,
8.42504918213, 8.04727421105, -0.177456592557, 5.48311346299,
3.30054341334, -5.23846329198, 8.1073799034, -7.54419253278,
6.02443959913, -3.89996016897, -8.92858441661, 1.47579350179,
-7.98416185095, -9.96208329475, 3.84586400437, 2.36617260487,
-9.349548423, -5.30100311328, -3.05918943304, 2.39149394492,
4.85895477755, 5.42920805013, 5.6169918571, 6.75065343372,
1.50402010026, -1.44274481069, 7.02567983202, -4.18236711083,
3.87535836654, -1.8557499295, 1.14942116674, -5.2447272067,
3.65955856372, -4.74072797443, -0.437332073748, -6.93982594683,
2.45286886249, 0.403403784041, 5.65166409406, -2.21839157567,
-7.09492923926, -1.81718828402, -6.36886960406, 0.41141289371,
2.75343885008, -1.34412180815, -0.0411522036819, -7.00685900761,
-9.98789755681, 1.67412693942, 7.19557187614, -4.23876985321,
9.52029688895, -3.40164205102, -4.65019467949, 2.63607190273,
7.56174186237, -9.89265386175, 7.52185232248, 1.66502573817,
-2.41159954174, 6.01879560968, -6.3672435836, 2.38386837255,
4.73425817315, 7.74560412674, 6.99942119683, 6.77984522804,
2.77234371097, 1.74868474562, -1.05707374642, 1.5489820833,
-8.93705730548, -8.02354623722, -8.68941604775, 6.68714661609,
-7.5399178742, 2.80174218618, -6.45692082868, 5.46291434825,
-1.54169329446, -9.83555526797, 0.271318277776, 5.24381294776,
-7.35535452136, -7.78658265782, -6.93081663079, -6.46218779678,
-8.23488185225, -3.36871425382, -7.76030698362, -3.60824815595,
8.97214234524, -4.76425525023, -6.46246778865, -4.32257590096,
-7.45924967072, 7.00539569904, 1.31033189633, -9.58225907937,
6.2967184082, 1.43542500956, 5.35343004217, -6.66991062715,
5.34568941213, 4.78471328423, 6.94259491104, -0.928160258876,
-0.695893355115, 8.82571940821, -8.42940626582, 1.70419913889,
5.44709482658, -4.92351265501, -5.21526814258, 3.60855989673,
-8.5070917783, -6.65578431361, -9.64633924454, 8.97470988661,
8.61554040725, 9.80092283885, 4.59392764545, 0.749894652441,
3.62906244241, 0.0907504271618, 5.70037956104, -5.81830423163,
5.5104060351, -1.50184185442, 1.46494429626, -8.76761097044,
-4.27782468266, 8.01920759861, -7.7729330388, 9.89476808234,
6.46189928185, -6.41794023401, -9.30241500334, 6.69842925194,
3.20382799978, -9.23440355658, 9.92773715177, 4.64986514732,
-1.21742834571, -6.38140718211, 9.99354732255, -6.67231142022,
-5.63947372546, -7.11017974509, 6.73848738141, -7.76177072779,
-2.50546724177, 0.997567079087, -7.90983650399, -3.70352182103,
-5.23185562816, 7.84351146938, -0.416867974071, -7.11650397743,
1.59386501847, 7.90209031207, 0.538550619462, 1.54887151048,
2.406962487, 8.63121497026, 0.703743093905, -3.0413499837,
9.06219251239, -8.69434331621, 9.27054043897, -2.76872748836,
-0.3734803744, 2.78368366546, -6.47261961244, 4.94875797469,
-8.89256811661, 8.43663066181, -9.23829126056, -6.99595178976,
-9.43539144689, 7.67697347605, 5.49121348506, 8.09654269564,
2.05799481312, 3.25888429813, -6.75532913647, 4.41007523002,
6.6077220969, 5.94829700391, 8.09374388574, -1.27536049523,
9.51869970436, -4.85725758076, -9.11765415375, -5.93763506032,
-0.562874589931, 5.8831744437, -7.94080015099, -6.06605894049,
1.92186735664, -1.56532177599, 2.19317064562, 8.23284068058,
5.60229536828, -9.49181680664, -3.49280784295, -8.52965882533,
7.81228003039, -1.95347150954, -2.40999128584, -5.31696662156,
-7.0145082203, -6.99365297548, 7.02471888114, -3.27434564803,
-7.50527841985, 4.35414833728, 9.29639862552, -7.73651890212,
1.89723563704, -7.17424843033, -6.31293744278, 2.25685408441,
-6.81471191738, 2.04355753132, -4.81096710935, -3.59130408261,
5.80683658627, 6.27852313355, 8.90446901499, 2.13445112114,
-9.65974961526, 8.05371494793, 1.32454420502, -0.105521324274,
-9.39742571714, 9.85310810574, 0.83695987353, 0.307000095853,
5.90060231122, 1.08841789808, 8.24735934212, 9.69586146305,
-4.11522956361, 4.0793359355, 0.128425830553, -9.58445755409,
0.013801879459, -8.43436808612, -2.46862001566, 9.34469451096,
-2.39386084215, -1.38528619145, 0.376289678331, 0.89430000248,
0.286515493359, -1.7633691451, -4.91138580881, 3.39431952308,
7.23919447137, 2.58795511109, 4.50504512186, -1.53799634993,
-6.55484514761, -5.96775448191, -0.0295220549242, -8.96354369737,
-2.41274966881, -7.61062751052, -0.652694413125, -4.20615224831,
6.74075283111, -5.91453900787, 6.07589584055, 1.25108939605,
-7.22912102067, 2.40923669612, 0.86607232246, 8.19081556128,
4.34711482234, 7.99258456292, -6.708253622, -6.00638934724,
-4.97174640191, -9.94010416668, 0.725423538586, -1.22523211699,
-9.40713070832, -0.991480096208, 1.30511901587, -6.55183976577,
-2.9960523758, -0.396116016334, 2.5167390572, 0.72540877691,
-6.37334828263, 9.33969396338, -2.14242544215, 5.68674283448,
-7.22988868561, 6.45774938501, -0.316238790059, -8.02625211109,
0.521440651515, -1.51066667304, 8.76071575906, 5.87002690038,
-4.17888632096, 6.46334004192, -3.88806588441, 9.62985903555,
3.32635939532, -3.59420334553, -9.48416753402, 3.2553007964,
8.44906550593, -9.18152591424, 4.12148009864, -0.236938516205,
8.30024332318, 5.816503847, -6.08900933335, -7.94491643089,
-3.72829604913, 4.22444292859, -6.0083461638, -9.50637927189,
1.98597953942, 7.73772819642, 1.63208865192, 7.07330432823,
-5.17979213053, -8.95309612039, 7.84053138485, -6.46645918776,
-6.02122077537, -5.65945625999, 7.33014649929, 6.68702062818,
5.63140464483, 0.94464281305, 3.14706943584, 9.79429643477,
-1.44180146488, 9.42109182998, -7.00319267093, -3.74926465729,
6.63996033209, 1.45735349435, 8.67819226558, -9.45491162651,
2.31258085697, -7.66004383463, 9.99590649971, -9.84962981716,
-8.06458815403, 8.96682281587, 0.371462150198, 4.10050648819,
-2.0764614675, 1.87647536411, -4.88392101613, -4.03050630443,
-3.48791622376, 2.13611811054, 4.95062447284, 9.18825091709,
-0.491501134842, 6.67072807058, 0.757997270726, -3.34355078959,
-8.89876704589, 7.54243818731, -4.20497331698, -3.22177775809,
-8.66834196251, -7.44478397831, -0.800717801286, 3.42727761748,
-5.30854056602, 2.81727427899, -7.66822126976, -4.33335498071,
-6.80137832552, -3.80007356638, -5.06148074268, 6.001173306,
1.25697165356, -4.68420005001, 4.52308401665, -4.87948473658,
-3.92229786749, -9.01806104756, -8.32816260837, -6.25176926251,
-2.65952025231, -3.73575790922, -8.12718598018, 4.67933642013,
3.17224476123, 7.66875583352, 0.411370822658, -3.02607192196,
-4.48579705245, 8.72630149296, 0.412549957243, 3.62325937163,
-8.29384877497, -4.56253699042, 6.12857761594, 6.40984837408,
-6.23008203444, 2.94701524272, 5.84677349913, 1.49909542447,
4.46049791935, 9.90532774185, -1.69230574748, -8.33659247424,
-0.543365826418, 4.62339648063, -3.1338550932, -3.37910660065,
-4.71252836505, -2.27071212153, -7.90195504673, 1.31648655473,
3.16918835349, -2.04215110622, -0.0648034551791, -4.99566702968,
2.75031555829, -2.39791663768, -9.35640107832, -3.332307932,
-1.78199963153, -6.10886202031, 5.00846712473, 5.41679147637,
-6.97532328055, -1.24072440339, 1.83527864207, -0.829314320972,
8.86174021702, 3.79158251065, 6.07129345055, -2.15908054222,
0.0804740197021, 0.707353371699, -7.9675140699, 8.88598174871,
3.73000024216, 1.60817843035, -8.89526955582, 6.33149628893,
-7.35376110437, 7.46389101751, -0.0852495808324, -6.25219520293,
7.58566312321, 5.87522078144, 2.03353545571, 8.88259408951,
5.95267346832, -0.661480826176, 4.60888966328, 0.321686484505,
6.71195674739, -8.54243983136, 5.04371078596, 6.55484233502,
4.031780793, -0.469933802898, 2.76763000228, 0.481231305397,
-7.45746899401, -1.49128610058, 2.23403320744, -8.14820831371,
-2.58811133051, 1.42789262222, -8.70493634055, 9.77075533542,
0.718085848638, 8.7737046278, -2.06650387786, 7.72469222002,
-6.95310186203, 5.42351246683, 5.62210102877, 3.2495258676,
-6.01472578347, -8.10612473651, -3.44414807806, 0.964494874292,
-1.25336548376, -6.10598935257, -2.04361163789, 8.76092494534,
2.49832804975, -1.94125151581, -9.77787370699, -5.50702423646,
-9.42046125882, -0.0979965917911, 6.61614678006, 8.1289856254,
-4.1685584036, -0.798645676538, 7.26199840761, -9.86609999581,
8.00269243244, 9.17050068448, 0.3630543508, 8.83376454362,
-8.5183819034, -2.05292839865, 7.801230762, 7.21289430472,
-7.50817704984, 0.243494983012, -8.17409029533, 0.253268887573,
3.5475333822, -6.21317827706, -4.3935559326, -9.82794991157,
-9.45742499118, -8.09598538714, -0.442381771901, 0.363648567229,
-0.815341730482, 9.75417676854, 7.00055812752, -3.59281930826,
-7.85553427728, 0.421808840145, 2.22316751701, -3.80963946323,
5.03708878144, 8.46273799251, 3.30550655162, -9.43399987891,
9.46429511277, 1.87488337544, -3.79491532835, -0.572087347949,
5.25935878721, 1.03473513205, 0.207264777203, -0.720572180818,
-7.37984253947, 2.53304045288, 7.3614107064, 1.51774133549,
-4.12031966059, -9.52697678539, -7.51331985601, -7.62109798154,
1.35572143135, 7.76536399197, 0.35859267326, -5.80215198763,
-0.440753306325, -6.00879799915, 9.92998280624, -0.537083653568,
0.821940749715, 1.38062748848, -9.44309264851, 2.19828260256,
4.86702597726, -2.87119473551, -9.56573721305, -4.20744572265,
7.87170785555, 0.810471293271, 4.77716148499, 1.13385048068,
5.5601540242, 4.16052609615, -7.46630274436, 4.18256277772,
-2.20246360718, 8.3474602693, 2.57352419181, -9.62109133667,
-2.35213557311, 8.57142300672, -0.15695677026, 0.749450470949,
-0.948546933054, -0.397034456903, 3.00626200909, -1.71241171225,
-6.12482532162, -4.69341597636, -1.97305698435, 6.96744601616,
-2.38784107205, 5.06218302859, -0.601652095466, 5.60226410794,
3.59878861382, 4.41065109351, -2.40236659932, -3.8425032102,
-1.73315045312, 1.41485283038, 8.16437111272, 1.94117825173,
-5.87351304202, -1.13534020743, -2.38480422889, -4.39873178396,
5.03972442868, -4.23347076985, 7.91705146748, 1.88737566316,
4.64124943499, 4.21148917075, -3.96666975788, 0.33927750911,
0.224015491469, 8.23204051119, 7.34640970712, -1.80649478106,
8.94297810786, -5.63796108207, -8.06537250095, 1.77404759541,
3.1314063331, 0.548770287441, -5.3281733294, -3.4591761573,
3.09480378277, -1.87097921949, -3.4240727229, 7.23174887192,
-8.43435326355, 0.904334063737, 8.21376905514, -3.86482463031,
-8.9634921347, -6.19648566334, -5.66952793593, 7.09000363048,
1.91906868921, -8.05838547114, 7.15237846564, -1.56095631321,
-5.04627871921, 6.05055358523, 1.85641256483, 6.57071681923,
8.23494991192, 4.65369326273, 9.67100027793, -9.44414172075,
-4.7739748078, -7.10549096036, -7.78721465747, -1.01894008518,
-4.97302897994, 1.62206366801, -0.162929371964, -5.60980424243,
-0.644036491045, 8.62949347178, -8.69525822184, -2.87584867567,
-8.97558342206, 4.77340455908, 7.11208666512, 7.71482184007,
7.60546222237, 6.5038967742, 6.32305199103, -7.78005768363,
-1.65741479103, -2.81577399366, -1.20977277948, 8.29670270275,
7.6236583057, 2.34876905574, -0.61703899151, -1.18955594108,
-3.91594340333, -3.67564729522, -9.54479485338, -7.20015410968,
-8.95677330403, 7.36349208455, 9.4251783571, 2.70765157054,
0.14018204293, 9.18283991246, 3.33691904061, -9.43399752791,
-1.58920452992, -7.76352206643, 7.98066190076, -7.61349007487,
8.9907265962, -7.86589234325, 3.4686102646, -8.52984883416,
-1.12984170339, 6.26132513608, 3.08522815352, -1.80905407867,
6.35975040397, 5.79813039561, -3.14235093057, 1.64057003362,
-3.13180169404, -3.89769592251, 1.83072339323, 4.5065764253,
-5.18833655036, -0.0493604800881, -1.97002136908, -3.94776106072,
-2.85093289348, 5.23309941412, -9.3539443511, 3.97695789903,
-3.73245159379, 1.95457296396, 6.11879559036, 3.68366114965,
4.37976182014, -8.71931517401, 5.143609877, -5.55504031899,
-9.17103794128, 1.51992606771, 8.41553401895, 9.78335439014,
2.14131517662, 3.8673457671, 5.49076504596, 8.90893718755,
-6.08899618639, -9.9691062848, -2.00949008492, -8.98688998365,
8.89203772943, -4.292474488, 3.57972386747, -3.52563067544,
-4.1621849542, -0.682530547643, 6.86254101175, 2.40273546624,
2.83706318117, -2.75463552454, 5.48364250933, 4.58172165887,
-1.62994922245, -0.334727706489, 4.74395154153, 6.42222434141,
-5.23799453101, -2.42583480314, -3.31438375947, 8.82048289241,
9.05254646477, -1.65176714358, 1.11850209287, -6.11289943338,
4.50073575952, -4.26621069373, -9.01187128873, -1.80474317563,
6.69318465484, -6.34953675301, -4.48063570956, 1.15623846517,
-1.35367234467, -7.90751755027, -0.0675583765527, -9.0984400263,
2.72128270487, 0.383167124181, 6.20911790398, 3.99139250613,
-7.2309035232, -0.494021738584, -5.99020463473, -0.368724826829,
-2.43391780235, 3.98105653844, 2.52821022783, -2.69531887529,
0.348961444698, 4.46291160242, -8.75223672303, -2.52524711504,
-7.08358851225, 7.66012690338, 0.384392659234, -2.76703265896,
-9.39725183764, 8.60458288158, -1.23949887317, -5.59880798874,
0.522626280632, 1.65576240479, -8.69470502817, -4.6146476747,
2.30277697263, -3.07909166553, -0.190296275476, -4.18763357843,
};
__device__ __constant__ __restrict__ int32_t common_int_input[] = {
32628, 4736, 16120, 56777, 27068, 6252, 17497, 40445,
54733, 51333, 60735, 10262, 29695, 38862, 8655, 37346,
1545, 54551, 14229, 42893, 45716, 51210, 21663, 40089,
64550, 48635, 51683, 55465, 8702, 64484, 47140, 8156,
44242, 217, 26709, 29171, 23389, 53184, 46888, 46442,
14834, 475, 38150, 36697, 37760, 48259, 56753, 50801,
35647, 2633, 20961, 23335, 63807, 45699, 59325, 48474,
25351, 23349, 39257, 47743, 47717, 8833, 28573, 48927,
62630, 21482, 14771, 14962, 37351, 40587, 22111, 42213,
18614, 23188, 52947, 44969, 37959, 26174, 785, 52595,
38824, 59018, 29280, 57831, 51122, 24828, 38234, 26524,
63761, 56119, 20191, 59466, 8305, 1307, 3648, 40980,
29483, 31989, 42322, 10639, 3415, 42672, 9424, 62555,
28886, 10734, 13819, 5893, 10937, 22947, 16573, 47334,
37754, 37903, 54629, 46001, 44532, 24950, 60764, 24198,
23162, 855, 31203, 7039, 26158, 16362, 21428, 31326,
28465, 19997, 32710, 17738, 56009, 30858, 9220, 34070,
42236, 10471, 7639, 37128, 63470, 42534, 26716, 41520,
14971, 29379, 13767, 53928, 2511, 43340, 32103, 20274,
3247, 387, 16570, 46560, 23316, 20559, 11729, 17051,
16579, 23184, 25856, 62026, 31278, 59901, 45547, 62346,
598, 36978, 61947, 37387, 38985, 4786, 31954, 60567,
63645, 47160, 15074, 40007, 7896, 2116, 52155, 55877,
45076, 49646, 15041, 39600, 43058, 24811, 17078, 63116,
40002, 8154, 6363, 65390, 56511, 58335, 59064, 49143,
65158, 19706, 20318, 30324, 43904, 49863, 28693, 19787,
52480, 49571, 46894, 30122, 15386, 41342, 47060, 27142,
51254, 39155, 56187, 59085, 32321, 25777, 8867, 28511,
38848, 41425, 51650, 3645, 31887, 48147, 48742, 1370,
33723, 43997, 53855, 54772, 1503, 35127, 8252, 54478,
13877, 46091, 59901, 4878, 64462, 35569, 55919, 64033,
2473, 39328, 52680, 42166, 14424, 3119, 36663, 14384,
52846, 18891, 16928, 41985, 21283, 46209, 52837, 46439,
63946, 18511, 45828, 8080, 22438, 39417, 49270, 37197,
32523, 56317, 64322, 62590, 28180, 54503, 20017, 17675,
65072, 24690, 2009, 64795, 18178, 20414, 40728, 53298,
22069, 24175, 59963, 52561, 24336, 11183, 51520, 51774,
49683, 39172, 21615, 60810, 36325, 29895, 52361, 21131,
45612, 11316, 47349, 20719, 32440, 48130, 20301, 28648,
4067, 7263, 10685, 39913, 9745, 13862, 57049, 9011,
20122, 36202, 58234, 62117, 7436, 64559, 22376, 39017,
55617, 53396, 373, 63769, 17039, 56156, 27010, 12172,
21823, 15451, 33611, 18466, 58353, 20141, 48094, 20728,
51169, 24516, 54961, 22920, 2949, 49182, 55299, 52146,
54449, 13437, 58770, 33683, 30354, 34456, 41392, 24685,
22931, 24718, 7521, 1664, 27484, 65036, 28600, 47719,
965, 56813, 56948, 16748, 2988, 6517, 10370, 42362,
65214, 7176, 21538, 13013, 14546, 10208, 8115, 23610,
18422, 64624, 53258, 57046, 63325, 43460, 47109, 35696,
46640, 37263, 11176, 8022, 29302, 22643, 32762, 16131,
19608, 28327, 57833, 283, 6285, 17143, 6921, 7464,
65472, 23671, 23784, 47543, 17921, 56955, 35370, 31100,
65075, 2907, 15692, 9367, 30426, 52225, 51839, 36577,
11070, 21807, 48652, 37264, 55454, 37745, 917, 28748,
4639, 51622, 62321, 55277, 53659, 59817, 30901, 39641,
36438, 56324, 35498, 7056, 23515, 42581, 17153, 12062,
53692, 20807, 62767, 8768, 45046, 35308, 33296, 23024,
8519, 62252, 64991, 33758, 13572, 20751, 43274, 61656,
45609, 46613, 10771, 15496, 42875, 41230, 10580, 56592,
49483, 63448, 55357, 27309, 27262, 35395, 5883, 39117,
46143, 17383, 28093, 9139, 21750, 12527, 46826, 3477,
7057, 49985, 43523, 13399, 6284, 2731, 46427, 54376,
33691, 53668, 3620, 43180, 48978, 53176, 4673, 46180,
62072, 28515, 7552, 63948, 23360, 42466, 59917, 27527,
32034, 23943, 11236, 55107, 47472, 54190, 54802, 5095,
58131, 1901, 48934, 22233, 12127, 59272, 58154, 40963,
52684, 8397, 55594, 15553, 40039, 11939, 17503, 6689,
13460, 39800, 59464, 22658, 49371, 16354, 58786, 60732,
44503, 6724, 37007, 64531, 13432, 54301, 27030, 46448,
36083, 48331, 6696, 62452, 31374, 35471, 55776, 18257,
339, 7956, 61244, 38933, 2190, 30747, 12361, 14169,
58559, 12376, 18709, 63976, 14853, 46633, 27524, 787,
42505, 30363, 3100, 64700, 24922, 23463, 36644, 12699,
45447, 15276, 64472, 22660, 57232, 20025, 25106, 43736,
42212, 13416, 63398, 2461, 58482, 58616, 48411, 9405,
1774, 18938, 16423, 3710, 13716, 5800, 2292, 48416,
1551, 26227, 40754, 43818, 35750, 40686, 14783, 58235,
45979, 78, 7503, 7546, 23765, 39910, 6341, 57948,
48230, 31801, 40314, 25263, 6662, 62173, 37141, 17224,
52488, 21337, 56529, 33442, 54267, 45723, 47021, 56449,
1324, 30583, 46034, 17544, 6288, 31692, 17757, 22107,
13785, 53263, 60009, 19953, 51632, 43338, 18441, 11872,
53136, 5404, 62944, 11515, 47187, 55498, 21254, 39838,
20995, 55346, 38741, 4604, 48577, 463, 9586, 7963,
31821, 22223, 31750, 46103, 51290, 44501, 26132, 49477,
1113, 39722, 8410, 25587, 41878, 57975, 64102, 24336,
41886, 50479, 18283, 21764, 54393, 24856, 16793, 12462,
32719, 36202, 59456, 63547, 30882, 45744, 27884, 59683,
28232, 25668, 51848, 11456, 58523, 31470, 29228, 14731,
47670, 45680, 46969, 4179, 25813, 29707, 25162, 56204,
2823, 47711, 57923, 22297, 19275, 25390, 41903, 53256,
17149, 22208, 51475, 44150, 2761, 53851, 38915, 16200,
20893, 35112, 32733, 12325, 47676, 4380, 1170, 61632,
24713, 63316, 18528, 53538, 12245, 26464, 36203, 61866,
28097, 64558, 57317, 29777, 19627, 29235, 15672, 30571,
10156, 7510, 37037, 17753, 55721, 28287, 27934, 57353,
34888, 32221, 39285, 43835, 43627, 18451, 57654, 32336,
27317, 47207, 5864, 45226, 48813, 49576, 3394, 41829,
58632, 31500, 56962, 8707, 60616, 8968, 60404, 55182,
62762, 36658, 46239, 61437, 35172, 9356, 47431, 56316,
54265, 18458, 26265, 12367, 65170, 12407, 36130, 62801,
53859, 64871, 1503, 17855, 21133, 9230, 49248, 60307,
55264, 6630, 41284, 30855, 62418, 21204, 41122, 49769,
17772, 30872, 22652, 38279, 34078, 50309, 56811, 46536,
25681, 46704, 5578, 15161, 5744, 27181, 3012, 5118,
26870, 31798, 32869, 25380, 4420, 31807, 9218, 27261,
62236, 33392, 27545, 22857, 27255, 18844, 27846, 57679,
41995, 61222, 57417, 52587, 31518, 40483, 40685, 17232,
49755, 60174, 48235, 6964, 50279, 24623, 12509, 27809,
8974, 40904, 55029, 33668, 57235, 4712, 8135, 3560,
39933, 49083, 38455, 57298, 24562, 8172, 22129, 63036,
9771, 13732, 55766, 48971, 62722, 21051, 63044, 17246,
40657, 15822, 46971, 26538, 20210, 41402, 52616, 18986,
35810, 60650, 36885, 61863, 27406, 28358, 24981, 41398,
55812, 46436, 64950, 31299, 40615, 23751, 29793, 36630,
33555, 35401, 7239, 65075, 28528, 29658, 21797, 13792,
1754, 11831, 10665, 43038, 30127, 30093, 43458, 34426,
57469, 12146, 52524, 49435, 9986, 33361, 39028, 49892,
48274, 36080, 42503, 34975, 2895, 33432, 46169, 35586,
55567, 33142, 39804, 51789, 17865, 23539, 6706, 41967,
12447, 47698, 39408, 50795, 37489, 19443, 31232, 40311,
41517, 24942, 52851, 33501, 40359, 16609, 7901, 8371,
55099, 27931, 11028, 40159, 1543, 33996, 12448, 467,
11922, 12136, 24926, 65439, 17699, 2658, 23564, 37686,
10623, 11899, 45336, 29434, 25488, 61547, 44994, 51584,
50511, 51617, 52786, 32795, 43795, 53292, 2345, 40358,
44226, 8130, 53021, 5904, 23333, 44127, 42982, 10607,
51926, 61446, 7555, 43826, 24755, 4054, 56583, 280,
};
__device__ __constant__ __restrict__ int32_t inverse_mul_input[] = {
131635, 906877, 266438, 75647, 158674, 686975, 245469, 106193,
78472, 83669, 70717, 418532, 144637, 110519, 496242, 115005,
2779915, 78734, 301847, 100133, 93949, 83870, 198263, 107136,
66538, 88311, 83103, 77436, 493561, 66606, 91111, 526603,
97079, 19792477, 160806, 147235, 183632, 80757, 91601, 92481,
289536, 9042037, 112582, 117039, 113744, 88999, 75679, 84545,
120487, 1631207, 204903, 184057, 67312, 93984, 72398, 88604,
169421, 183947, 109407, 89961, 90010, 486242, 150316, 87784,
68577, 199934, 290771, 287059, 114990, 105822, 194246, 101746,
230739, 185224, 81119, 95510, 113148, 164093, 5471296, 81662,
110627, 72774, 146687, 74268, 84015, 172989, 112334, 161928,
67361, 76534, 212717, 72226, 517155, 3286127, 1177349, 104807,
145677, 134264, 101484, 403701, 1257678, 100651, 455748, 68660,
148687, 400128, 310802, 728826, 392701, 187170, 259155, 90738,
113762, 113315, 78621, 93367, 96447, 172143, 70683, 177493,
185432, 5023354, 137646, 610168, 164194, 262497, 200438, 137106,
150886, 214781, 131305, 242134, 76684, 139185, 465832, 126064,
101690, 410178, 562243, 115681, 67670, 100978, 160764, 103444,
286886, 146192, 311976, 79643, 1710461, 99100, 133788, 211847,
1322750, 11098107, 259202, 92246, 184207, 208910, 366184, 251890,
259061, 185256, 166112, 69245, 137316, 71702, 94298, 68890,
7182220, 116150, 69333, 114879, 110170, 897403, 134411, 70913,
67484, 91073, 284926, 107356, 543943, 2029758, 82351, 76865,
95283, 86512, 285551, 108459, 99749, 173108, 251492, 68049,
107369, 526732, 674991, 65683, 76003, 73626, 72718, 87398,
65917, 217953, 211388, 141636, 97827, 86136, 149687, 217061,
81841, 86643, 91589, 142586, 279148, 103889, 91266, 158241,
83798, 109692, 76441, 72692, 132885, 166621, 484377, 150643,
110559, 103681, 83156, 1178318, 134694, 89206, 88117, 3135013,
127361, 97620, 79751, 78416, 2857597, 122270, 520476, 78839,
309503, 93185, 71702, 880478, 66628, 120751, 76807, 67075,
1736744, 109209, 81530, 101859, 297766, 1377034, 117148, 298594,
81274, 227356, 253720, 102298, 201803, 92947, 81288, 92487,
67166, 232023, 93720, 531556, 191415, 108963, 87173, 115466,
132060, 76265, 66773, 68621, 152412, 78803, 214566, 242997,
66004, 173956, 2137864, 66286, 236273, 210394, 105455, 80585,
194616, 177662, 71627, 81714, 176487, 384063, 83366, 82957,
86448, 109644, 198704, 70630, 118238, 143669, 82027, 203255,
94164, 379549, 90709, 207297, 132398, 89237, 211565, 149923,
1056053, 591349, 401963, 107609, 440736, 309838, 75286, 476637,
213447, 118639, 73754, 69144, 577592, 66528, 191946, 110080,
77225, 80437, 11514658, 67352, 252067, 76483, 159014, 352857,
196810, 277974, 127785, 232588, 73604, 213245, 89304, 207207,
83937, 175191, 78146, 187390, 1456415, 87329, 77669, 82365,
78881, 319638, 73081, 127512, 141496, 124651, 103764, 173991,
187300, 173759, 571064, 2581111, 156272, 66040, 150174, 90006,
4450744, 75599, 75420, 256447, 1437406, 659041, 414173, 101388,
65860, 598519, 199414, 330053, 295268, 420746, 529263, 181914,
233144, 66461, 80645, 75290, 67825, 98826, 91171, 120321,
92088, 115261, 384303, 535399, 146576, 189682, 131097, 266256,
219042, 151621, 74265, 15176563, 683368, 250538, 620571, 575425,
65601, 181445, 180583, 90339, 239662, 75410, 121430, 138102,
66001, 1477457, 273705, 458522, 141162, 82240, 82853, 117423,
387983, 196954, 88280, 115258, 77451, 113790, 4683716, 149401,
925840, 83201, 68917, 77699, 80042, 71802, 138992, 108347,
117871, 76255, 120992, 608698, 182648, 100866, 250392, 356075,
79993, 206420, 68428, 489846, 95347, 121643, 128994, 186544,
504164, 68994, 66086, 127229, 316458, 206977, 99251, 69661,
94170, 92141, 398753, 277167, 100175, 104171, 405952, 75894,
86797, 67693, 77587, 157273, 157545, 121344, 730065, 109798,
93080, 247079, 152884, 469961, 197470, 342857, 91722, 1235251,
608611, 85926, 98683, 320544, 683477, 1572673, 92511, 78987,
127482, 80029, 1186456, 99467, 87692, 80769, 919103, 93005,
69194, 150622, 568720, 67164, 183860, 101139, 71682, 156028,
134076, 179384, 382251, 77939, 90474, 79258, 78373, 842977,
73885, 2259320, 87771, 193180, 354166, 72462, 73856, 104850,
81524, 511489, 77256, 276151, 107270, 359743, 245385, 642095,
319092, 107914, 72229, 189557, 86994, 262625, 73062, 70721,
96510, 638752, 116059, 66557, 319757, 79096, 158897, 92469,
119031, 88866, 641423, 68773, 136896, 121084, 77004, 235251,
12669521, 539841, 70129, 110317, 1961173, 139688, 347462, 303125,
73345, 347041, 229567, 67135, 289165, 92102, 156045, 5457392,
101047, 141454, 1385474, 66383, 172337, 183053, 117208, 338214,
94505, 281158, 66618, 189540, 75045, 214481, 171074, 98203,
101748, 320138, 67747, 1745213, 73441, 73273, 88719, 456669,
2421064, 226791, 261522, 1157674, 313136, 740512, 1873895, 88710,
2769161, 163762, 105388, 98019, 120139, 105564, 290535, 73753,
93412, 55063684, 572434, 569172, 180727, 107617, 677333, 74118,
89052, 135058, 106538, 170011, 644697, 69081, 115640, 249360,
81828, 201292, 75979, 128431, 79146, 93935, 91342, 76086,
3243934, 140437, 93300, 244812, 683042, 135523, 241875, 194281,
311569, 80637, 71573, 215255, 83185, 99104, 232904, 361773,
80830, 794776, 68235, 372989, 91021, 77390, 202079, 107811,
204571, 77603, 110864, 932878, 88416, 9276388, 448046, 539366,
134973, 193267, 135275, 93161, 83739, 96514, 164357, 86808,
3858911, 108126, 510698, 167858, 102560, 74084, 67003, 176487,
102540, 85085, 234916, 197343, 78962, 172794, 255760, 344646,
131269, 118639, 72238, 67588, 139077, 93892, 154030, 71963,
152132, 167328, 82838, 374910, 73390, 136479, 146948, 291560,
90098, 94023, 91443, 1027751, 166388, 144578, 170693, 76418,
1521420, 90021, 74150, 192626, 222826, 169160, 102498, 80648,
250451, 193398, 83438, 97282, 1555584, 79757, 110368, 265122,
205570, 122322, 131213, 348477, 90087, 980587, 3670913, 69688,
173794, 67834, 231810, 80223, 350753, 162295, 118636, 69424,
152863, 66529, 74934, 144238, 218830, 146912, 274054, 140492,
422900, 571900, 115965, 241930, 77080, 151836, 153755, 74887,
123108, 133298, 109329, 97981, 98448, 232777, 74496, 132824,
157227, 90982, 732430, 94967, 87989, 86635, 1265459, 102680,
73253, 136349, 75401, 493278, 70856, 478922, 71105, 77833,
68433, 117164, 92887, 69909, 122114, 459061, 90552, 76266,
79149, 232689, 163525, 347293, 65905, 346173, 118876, 68391,
79745, 66208, 2857597, 240548, 203236, 465327, 87211, 71219,
77718, 647809, 104035, 139199, 68810, 202555, 104445, 86299,
241671, 139122, 189607, 112202, 126034, 85372, 75601, 92294,
167243, 91962, 769984, 283291, 747732, 158014, 1425952, 839189,
159843, 135071, 130670, 169227, 971713, 135033, 465933, 157550,
69011, 128623, 155926, 187906, 157585, 227923, 154241, 74464,
102274, 70154, 74804, 81674, 136271, 106094, 105567, 249244,
86323, 71376, 89043, 616739, 85423, 174430, 343351, 154446,
478602, 105002, 78050, 127569, 75041, 911496, 527962, 1206452,
107555, 87505, 111689, 74959, 174863, 525572, 194088, 68136,
439563, 312771, 77018, 87705, 68477, 204027, 68127, 249042,
105640, 271456, 91439, 161843, 212517, 103739, 81629, 226218,
119938, 70816, 116443, 69428, 156717, 151456, 171930, 103749,
76955, 92493, 66128, 137224, 105749, 180834, 144161, 117253,
127998, 121324, 593310, 66001, 150553, 144817, 197044, 311411,
2448671, 363027, 402717, 99795, 142563, 142724, 98831, 124760,
74736, 353612, 81772, 86882, 430099, 128743, 110049, 86086,
88971, 119041, 101051, 122802, 1483582, 128469, 93028, 120693,
77294, 129593, 107903, 82933, 240413, 182462, 640467, 102342,
345061, 90046, 108988, 84555, 114567, 220901, 137519, 106546,
103451, 172199, 81266, 128205, 106420, 258593, 543598, 513077,
77951, 153771, 389461, 106950, 2783518, 126338, 345033, 9196933,
360256, 353904, 172309, 65634, 242668, 1615865, 182269, 113968,
404309, 360952, 94737, 145919, 168510, 69784, 95457, 83262,
85031, 83209, 81366, 130965, 98070, 80594, 1831543, 106422,
97115, 528287, 81006, 727468, 184073, 97332, 99925, 404919,
82714, 69899, 568494, 98001, 173499, 1059440, 75906, 15339169,
};
#endif /* INPUT_DEVICE_H_ */
cd /home/happy/microbench/micro
# posibles valores de $1 = shm, glb, reg
bench=$1
comun="-nop=145000 -bench=$bench -time=0.5"
#: '
echo "./micro -grid=1 -blk=1 $comun " > test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
echo "./micro -grid=1 -blk=32 $comun "> test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
echo "./micro -grid=1 -blk=64 $comun "> test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
echo "./micro -grid=1 -blk=128 $comun "> test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
echo "./micro -grid=1 -blk=1024 $comun" > test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
echo "./micro -grid=2 -blk=1024 $comun "> test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
#'
echo "./micro -grid=32 -blk=64 $comun" > test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
#: '
echo "./micro -grid=32 -blk=128 $comun "> test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
echo "./micro -grid=64 -blk=32 $comun " > test.sh
chmod +x test.sh
/home/happy/bin/profile_micro.sh
'
./micro -grid=1 -blk=1024 -nop=1000000 -bench=shm
/**
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/**
* Matrix multiplication: C = A * B.
* Host code.
*
* This sample implements matrix multiplication which makes use of shared memory
* to ensure data reuse, the matrix multiplication is done using tiling approach.
* It has been written for clarity of exposition to illustrate various CUDA programming
* principles, not with the goal of providing the most performant generic kernel for matrix multiplication.
* See also:
* V. Volkov and J. Demmel, "Benchmarking GPUs to tune dense linear algebra,"
* in Proc. 2008 ACM/IEEE Conf. on Supercomputing (SC '08),
* Piscataway, NJ: IEEE Press, 2008, pp. Art. 31:1-11.
*/
// System includes
#include <stdio.h>
#include <stdbool.h>
#include <assert.h>
#include "jetson_nano.h"
// CUDA runtime
#include <cuda_runtime.h>
// Helper functions and utilities to work with CUDA
#include <helper_functions.h>
#include <helper_cuda.h>
#include <cuda_fp16.h>
#include "input_device.h"
#define FREQ 921600000
#define T 1
#define BITSNOSIGNIFICATIVOS 16
#define CYCLES (T*(FREQ) >> BITSNOSIGNIFICATIVOS)
#define QUATUMINTERACIONES 1000
#define SIZEROW 1
typedef int btype;
typedef btype *btypePtr;
#define myclock() (int) (clock64() >> BITSNOSIGNIFICATIVOS)
/**
* Micro Kernel that performs the computation using only registers
*/
__global__ void microKernel_reg_iter (unsigned int nit, char *vadd) {
btype regin, regout, local;
btype id = (blockIdx.x*blockDim.x + threadIdx.x+1);
regin = id;
local = id;
#pragma unroll 2
for (int op = 0; op < nit; ++op) {
regout = regin*local + id;
local = (regout-local)/regin;
}
vadd[(int) id - 1] = (local == id);
}
/**
* Micro Kernel that performs the computation using only registers
*/
__global__ void microKernel_reg_time (unsigned int cycles, char *vadd) {
//long long
unsigned int fin,ahora;
//clock_t start,ahora;
btype regin, regout, local;
btype id = (blockIdx.x*blockDim.x + threadIdx.x+1);
ahora=myclock();
regin = id;
local = id;
//fin=ahora+CYCLES;
fin=ahora+cycles;
while (ahora < fin )
{
ahora=myclock();
#pragma unroll 2
for (unsigned int op=0; op< QUATUMINTERACIONES;++op){
regout = regin*local + id;
local = (regout-local)/regin;
}
}
vadd[(int) id - 1] = (local == id);
}
/**
* Micro Kernel that performs the computation using global memory (and cache)
*/
__global__ void microKernel_global_iter(int nit, char *vadd, volatile btype *global) {
btype regin, regout;
btype id = (blockIdx.x*blockDim.x + threadIdx.x+1);
int idInt = SIZEROW*(int) id;
regin = id;
global[idInt] = id;
#pragma unroll 2
for (int op = 0; op < nit; ++op) {
regout = regin*global[idInt] + id;
global[idInt] = (regout-global[idInt])/regin;
}
vadd[(int) id - 1] = ( global[idInt] == id );
}
__global__ void microKernel_global_time(unsigned int cycles, char *vadd, volatile btype *global) {
unsigned int fin,ahora;
btype regin, regout;
btype id = (blockIdx.x*blockDim.x + threadIdx.x+1);
volatile int idInt = SIZEROW*(int) id;
ahora=myclock();
regin = id;
fin=ahora+cycles;
global[idInt] = id;
while (ahora < fin )
{
ahora=myclock();
#pragma unroll 2
for (unsigned int op = 0; op < QUATUMINTERACIONES; ++op) {
regout = regin*global[idInt] + id;
global[idInt] = (regout-global[idInt])/regin;
}
}
vadd[(int) id - 1] = ( global[idInt] == id );
}
/**
* Micro Kernel that performs the computation using shared memory
*/
__global__ void microKernel_shared_iter(unsigned int nit, char *vadd) {
btype regin, regout;
volatile btype id = (btype) (blockIdx.x*blockDim.x + threadIdx.x + 1);
volatile extern __shared__ btype sh[];
regin = id;
sh[threadIdx.x] = id;
#pragma unroll 2
for (unsigned int op = 0; op < nit; ++op) {
regout = regin*sh[threadIdx.x] + id;
sh[threadIdx.x] = (regout-sh[threadIdx.x])/regin;
}
vadd[(int) id - 1 ] = (sh[threadIdx.x] == id);
}
__global__ void microKernel_shared_time (unsigned int cycles, char *vadd) {
unsigned int fin,ahora;
btype regin, regout;
volatile btype id = (btype) (blockIdx.x*blockDim.x + threadIdx.x + 1);
volatile extern __shared__ btype sh[];
ahora=myclock();
regin = id;
sh[threadIdx.x] = id;
//fin=ahora+CYCLES;
fin=ahora+cycles;
while (ahora < fin )
{
ahora=myclock();
#pragma unroll 2
for (int op = 0; op < QUATUMINTERACIONES; ++op) {
regout = regin*sh[threadIdx.x] + id;
sh[threadIdx.x] = (regout-sh[threadIdx.x])/regin;
}
}
vadd[(int) id - 1 ] = (sh[threadIdx.x] == id);
}
bool check_error(char *h_vadd, int vsize) {
int sum = 0;
for (int i = 0; i < vsize; i++)
sum += h_vadd[i];
return (sum == vsize);
}
/**
* Run microKernel
*/
int launch_kernel(char *bench, int grid, int blk, unsigned int nitocycles,int time) {
char *h_vadd;
char *d_vadd;
btypePtr d_global;
int vsize = grid*blk;
// Allocate CUDA events that we'll use for timing
cudaEvent_t start, stop;
checkCudaErrors(cudaEventCreate(&start));
checkCudaErrors(cudaEventCreate(&stop));
h_vadd = (char *) malloc(vsize*sizeof(char));
checkCudaErrors(cudaMalloc(&d_vadd, vsize*sizeof(char)));
checkCudaErrors(cudaDeviceSynchronize());
// Record the start event
checkCudaErrors(cudaEventRecord(start));
// Execute the kernel
if (!strcmp(bench, "shm") ) {
printf("shm");
if(time) {
printf("time \n");
microKernel_shared_time <<<grid, blk, blk*sizeof(btype)>>>(nitocycles, d_vadd);
}
else {
printf("iterations\n");
microKernel_shared_iter <<<grid, blk, blk*sizeof(btype)>>>(nitocycles, d_vadd);
}
} else if (!strcmp(bench, "glb") ) {
printf("glb");
checkCudaErrors(cudaMalloc(&d_global, SIZEROW*vsize*sizeof(btype)));
if(time) {
printf("time\n");
microKernel_global_time <<<grid, blk, blk*sizeof(btype)>>>(nitocycles, d_vadd, d_global);
}
else {
printf("iterations\n");
microKernel_global_iter <<<grid, blk, blk*sizeof(btype)>>>(nitocycles, d_vadd, d_global);
}
} else if (!strcmp(bench, "reg") ) {
printf("reg");
if(time) {
printf("time\n");
microKernel_reg_time <<<grid, blk, blk*sizeof(btype)>>>(nitocycles, d_vadd);
}
else {
printf("iterations\n");
microKernel_reg_iter <<<grid, blk, blk*sizeof(btype)>>>(nitocycles, d_vadd);
}
}
// Record the stop event
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaEventRecord(stop));
// Wait for the stop event to complete
checkCudaErrors(cudaEventSynchronize(stop));
float msecTotal = 0.0f;
checkCudaErrors(cudaEventElapsedTime(&msecTotal, start, stop));
// Compute and print the performance
printf( "Elapsed time= %.2f\n", msecTotal);
//checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors( cudaMemcpy(h_vadd, d_vadd, vsize*sizeof(char), cudaMemcpyDeviceToHost) );
printf("Checking computed result for correctness:\n ");
bool correct = check_error(h_vadd, vsize);
// Clean up memory
checkCudaErrors(cudaEventDestroy(start));
checkCudaErrors(cudaEventDestroy(stop));
checkCudaErrors(cudaFree(d_vadd));
if (!strcmp(bench, "glb") ) {
checkCudaErrors(cudaFree(d_global));
}
free(h_vadd);
return correct;
/*
if (correct) {
return EXIT_SUCCESS;
} else {
return EXIT_FAILURE;
}
*/
}
/**
* Program main
*/
int a;
long int b;
long long int c;
char *buffer,*buffer2;
int main(int argc, char **argv) {
unsigned int grid, blk, nitocycles;
long int frec;
char *bench = (char *) malloc(4);
bool time;
unsigned long int long_nitocycles;
if (checkCmdLineFlag(argc, (const char **)argv, "help") ||
checkCmdLineFlag(argc, (const char **)argv, "?")) {
printf("Usage -bench=bench_name ('shm', 'glb', 'reg')\n");
printf(" -grid=grid_size (Grid size)\n");
printf(" -blk=block_size (Thread block size)\n");
printf(" -nit=number_its (number of iterations)\n");
printf(" -time=time (time to run the microbenchark)\n");
exit(EXIT_SUCCESS);
}
/*
if (checkCmdLineFlag(argc, (const char **)argv, "nit")) {
nitocycles = getCmdLineArgumentInt(argc, (const char **)argv, "nit");}
if (checkCmdLineFlag(argc, (const char **)argv, "nit")) {
getCmdLineArgumentString(argc, (const char **)argv, "nit",&buffer);}
printf ("Valor entero %d y cadena %s, long convertido de string %lu", nitocycles,buffer,strtol(buffer,&buffer2,10));
*/
frec=frec_now(); // Get current frequency to compute time from cycles
printf("GPU frequency: %lu \n", frec);
if (checkCmdLineFlag(argc, (const char **)argv, "bench")) {
getCmdLineArgumentString(argc, (const char **)argv, "bench", &bench);
}
else
printf ("FAIL: bench\n");
// Grid size
if (checkCmdLineFlag(argc, (const char **)argv, "grid")) {
grid = getCmdLineArgumentInt(argc, (const char **)argv, "grid");
}
// Thread block size
if (checkCmdLineFlag(argc, (const char **)argv, "blk")) {
blk = getCmdLineArgumentInt(argc, (const char **)argv, "blk");
}
else
printf ("FAIL: blk\n");
time=false;
// Kernel time
if (checkCmdLineFlag(argc, (const char **)argv, "time")) {
long_nitocycles = ((long int) (frec * getCmdLineArgumentFloat(argc, (const char **)argv, "time")));
nitocycles=(unsigned int) (long_nitocycles >> BITSNOSIGNIFICATIVOS);
time=true;
}
else // Number of iterations
if (checkCmdLineFlag(argc, (const char **)argv, "nit")) {
nitocycles = getCmdLineArgumentInt(argc, (const char **)argv, "nit");
}
else
printf ("FAIL:nit and/or time\n");
printf("microKernel=%s, grid: %u, blk: %u, nit o cycles: %u\n", bench, grid, blk, nitocycles);
int kernel_result = launch_kernel(bench, grid, blk, nitocycles,time);
printf("Launch result: %d\n", kernel_result);
exit(!kernel_result);
}
./micro -grid=4 -blk=1024 -nop=100000 -bench=$BENCH -t=5 -t=5
#!/usr/bin/python
import csv
import sys
import re
patron=re.compile('(^[^=](.*)$)',re.M)
fields=['Metric Name']
for i in range(1,len(sys.argv)):
a=sys.argv[i]
a=a.split('_')
a=a[2]+'_'+a[3]
fields.append(a)
metricas=[]
print(metricas)
file=open(sys.argv[1])
contents=file.read()
with open("tmpxx","w") as filetmp:
for m in patron.findall(contents):
#print(m)
filetmp.write((m[0]+"\n"))
with open("tmpxx") as csvtmp:
dst=csv.DictReader(csvtmp)
for row in dst:
metricas.append(row['Metric Name'])
#print ("====================")
#print (metricas)
with open('total.csv','w') as csvdstfile:
writer=csv.DictWriter(csvdstfile,fieldnames=fields);
writer.writeheader()
dw={}
ix=0;
for account in ['grid size','thread block size']:
#print("M",account)
dw['Metric Name']=account
for f in fields[1:]:
a=f.split('_')
dw[f]=a[ix]
ix=ix+1
writer.writerow(dw)
dw={}
for m in metricas:
dw={}
dw['Metric Name']=m
#print ("Metr", m)
for i in range(1,len(sys.argv)):
print(sys.argv[i])
file=open(sys.argv[i])
contents=file.read()
with open("tmpxx","w") as filetmp:
for my in patron.findall(contents):
filetmp.write(my[0])
filetmp.write("\n")
with open("tmpxx") as csvtmp:
dst=csv.DictReader(csvtmp)
#print(dst)
for row in dst:
#print("Row",row['Metric Name'], " y mi m es ",m,"\n")
if (row['Metric Name']==m):
#print("Entro")
dw[fields[i]]=row['Avg'].replace(".",",")
print (dw)
writer.writerow(dw)
#! /usr/bin/python
import sys, os, subprocess, random, time, datetime
import pexpect, re
import os.path
from os import path
import fnmatch
import serial
import config
from config import *
def configurate_serie():
#os.system("sudo stop ttyS0")
Puerto=0
try:
serie = serial.Serial(Puerto, 115200)
serie.timeout=1;
except serial.SerialException:
#-- Error al abrir el puerto serie
sys.stderr.write("Error al abrir puerto (%s)\n" % str(Puerto))
return serie
#mantener vivos los watdog softwre y hardware
def KeepWatchdog():
os.system("/home/ubuntu/bin/refresh_watchdog > watchsoft.test")
os.system("touch watchsoft.test")
def writeOutput(serie, output, elapsedTime, outputFileName, fileName):
# if verbose:
# print("\n***Elapsed time: %s ms. ***\n" % (elapsedTime))
serie.write("\n***Elapsed time: %s ms. ***\n" % (elapsedTime))
if verbose:
print("%s " % (output) )
serie.write("%s " % (output) )
output += "\n\n***Elapsed time: " + `elapsedTime` + " ms. ***\n\n"
outputFile = open(outputFileName, "w")
outputFile.write(output)
outputFile.flush()
outputFile.close()
# os.system("tar -rvf results/" + fileName + ".tar " + outputFileName)
# os.system("rm " + outputFileName)
def runBenchmark(fiIni, fileName, tIni):
serie=configurate_serie()
numTimeouts = 0
if (totalFiNumber == -1): # bucle infinito
forever = True
else:
forever = False
stdLogFile = open("results/"+fileName+"_"+tIni+".log", "w")
fiNumber = fiIni - 1
while forever or ( (fiNumber+1) < totalFiNumber+1):
fiNumber = fiNumber + 1
KeepWatchdog()
try:
x = datetime.datetime.now()
outputFileName = "results/"+fileName + x.strftime("_%H%M%S.err")
ti = x.strftime("%m/%d/%Y %H:%M:%S")
# Measure execution time
startTime = time.time()
# os.system("sudo /home/ubuntu/bin/gpufreq-info")
# os.system("cpufreq-info | grep 'current CPU' | cut -d's' -f2 | cut -d'(' -f1")
# os.system("/home/ubuntu/bin/gpufreq-info >> /home/ubuntu/rodinia/lud/rad/results/res.out")
p = pexpect.spawn("./"+exeFile+" "+inputParameters, timeout=timeout)
p.expect(pexpect.EOF)
# Process elspased time
elapsedTime = 1000*(time.time() - startTime)
p.close()
if (p.exitstatus == 0):
if verbose:
print("(%d) %s; PASS in %.2f ms. " % (fiNumber, ti, elapsedTime) )
print(p.before)
sys.stdout.flush()
stdLogFile.write("(%d) %s; PASS in %.2f ms.\n" % (fiNumber, ti, elapsedTime) )
stdLogFile.write("%s" % (p.before) )
stdLogFile.flush()
serie.write("(%d) %s; PASS in %.2f ms.\n\r" % (fiNumber, ti, elapsedTime) )
serie.write("%s" % (p.before) )
elif (p.exitstatus == 1):
if verbose:
print("(%d) %s; FAIL in %.2f ms." % (fiNumber, ti, elapsedTime) )
sys.stdout.flush()
stdLogFile.write("(%d) %s; FAIL in %.2f ms.; %s\n" % (fiNumber, ti, elapsedTime, outputFileName) )
stdLogFile.write("%s" % (p.before) )
stdLogFile.flush()
serie.write("(%d) %s; FAIL in %.2f ms.; %s\n\r" % (fiNumber, ti, elapsedTime, outputFileName) )
writeOutput(serie, p.before, elapsedTime, outputFileName, fileName+"_"+tIni)
else:
if verbose:
print("(%d) %s; Exit status: %d in %.2f ms." % (fiNumber, ti, p.exitstatus, elapsedTime) )
sys.stdout.flush()
stdLogFile.write("(%d) %s; Exit status: %d in %.2f ms.\n" % (fiNumber, ti, p.exitstatus, elapsedTime) )
stdLogFile.write("%s" % (p.before) )
stdLogFile.flush()
serie.write("(%d) %s; Exit status: %d in %.2f ms.\n\r" % (fiNumber, ti, p.exitstatus, elapsedTime) )
writeOutput(serie, p.before, elapsedTime, outputFileName, fileName+"_"+tIni)
stdLogFile.flush()
numTimeouts = 0
except KeyboardInterrupt:
print("KeyboardInterrupt")
sys.stdout.flush()
lastFile = open("lastFiNumber", "w")
lastFile.write("%d" % fiNumber)
lastFile.flush()
lastFile.close()
stdLogFile.close()
stdLogFile.flush()
sys.exit(2)
except pexpect.EOF, e:
print("Fin de archivo")
except pexpect.TIMEOUT, e:
terminated = p.terminate(force=True)
elapsedTime = time.time() - startTime
if verbose:
print("(%d) %s; Timeout after %.2f s." % (fiNumber, ti, elapsedTime) )
sys.stdout.flush()
stdLogFile.write("(%d) %s; Timeout after %.2f s.\n" % (fiNumber, ti, elapsedTime) )
stdLogFile.write("%s" % (p.before) )
stdLogFile.flush()
serie.write("(%d) %s; Timeout after %.2f s.\n\r" % (fiNumber, ti, elapsedTime) )
sys.stdout.flush()
# stdLogFile.close()
numTimeouts += 1
# Si se dan 3 timeouts seguidos, suponemoe que no se recuperara y rearrancamos la Jetson
lastFile = open("lastFiNumber", "w")
if (numTimeouts == 3):
fiNumber = fiNumber - 2 # contamos los tres ultimos timeouts consecutivos como un solo error no recuperable
lastFile.write("%d" % fiNumber)
lastFile.flush()
lastFile.close()
time.sleep(2)
sys.exit(3)
else:
lastFile.write("%d" % fiNumber)
lastFile.flush()
lastFile.close()
finally:
lastFile = open("lastFiNumber", "w")
lastFile.write("%d" % fiNumber)
lastFile.flush()
lastFile.close()
time.sleep(1)
# end while
stdLogFile.close()
#end runBenchmark
def main(argv):
fileName = sys.argv[1]
fiIni = int( sys.argv[2] )
tIni = sys.argv[3]
runBenchmark(fiIni, fileName, tIni)
##############################################################################
#main()
if __name__ == "__main__":
main(sys.argv)
!/bin/bash
dir=`grep ^"dir = " config.py | cut -d"\"" -f2`
name=`grep ^"exeFile = " config.py | cut -d"\"" -f2`
size=`grep ^"size = " config.py | cut -d"\"" -f2`
#blk=`grep ^"blk =" config.py | cut -d"\"" -f2`
totalFiNumber=`grep ^"totalFiNumber = " config.py | cut -d"=" -f2`
cpufreq=`grep ^"cpufreq = " config.py | cut -d"=" -f2`
gpufreq=`grep ^"gpufreq = " config.py | cut -d"=" -f2`
PATH=${MYHOME}/bin:$PATH
rm -f goldens
ln -s ${dir}/goldens goldens
rm -f ${name}
ln -s ${dir}/${name} ${name}
echo >> results/res.out
echo "==================================" >> results/res.out
echo `date` >> results/res.out
echo "Experiment: " $name " size: " $size " cpufreq: " $cpufreq "gpufreq: " $gpufreq >> results/res.out
echo "==================================" >> results/res.out
echo >> results/res.out
if [ ${cpufreq} -ne -1 ]
then
# echo "initial CPUfreq: " `cpufreq-info | grep "current CPU" | cut -d"s" -f2 | cut -d"(" -f1` > ./results/res.out
sudo cpufreq-set -c 0 -d ${cpufreq} -u ${cpufreq}
echo "CPUfreq set to: " `cpufreq-info | grep "current CPU" | cut -d"s" -f2 | cut -d"(" -f1` >> results/res.out
echo "CPUfreq set to: " `cpufreq-info | grep "current CPU" | cut -d"s" -f2 | cut -d"(" -f1` > /dev/ttyS0
fi
if [ ${gpufreq} -ne -1 ]
then
# echo "initial GPUfreq: " `/home/buntu/bin/gpufreq-info` >> ./results/res.out
echo "Setting GPUFreq to: " ${gpufreq} >> results/res.out
sudo ${MYHOME}/bin/gpufreq-set ${gpufreq}
echo "GPUfreq set to: " `gpufreq-info` >> results/res.out
fi
if [ -f "lastFiNumber" ]
then # por si hemos rearrancado el test.sh sin acabar el anterior
fiNumber=$((`cat lastFiNumber` + 1))
else
fiNumber=1
fi
#if [ -f "lastFiNumber" ]
#then
# rm lastFiNumber
#fi
sudo killall iamalive_ini
sudo ${MYHOME}/bin/iamalive > /dev/ttyS0 &
if [ $totalFiNumber -eq -1 ]
then
forever=1
else
forever=0
fi
tIni=`date +%H%M%S`
########### WARMING, WARMING ################
./${name} -s ${size} -v
########### WARMING, WARMING ################
if [ ${gpufreq} -ne -1 ]
then
# echo "initial GPUfreq: " `/home/ubuntu/bin/gpufreq-info` >> ./results/res.out
echo "After warming. Setting GPUFreq to: " ${gpufreq} >> results/res.out
sudo ${MYHOME}/bin/gpufreq-set ${gpufreq}
echo "GPUfreq set to: " `gpufreq-info` >> results/res.out
fi
while [ $forever -eq 1 ] || [ $fiNumber -le $totalFiNumber ]
do
tIni=`date +%H%M%S`
if [ $gpufreq -eq -1 ]
then
sufix=""
else
g=`${MYHOME}/bin/gpufreq-info`
sufix="_"${g%"000000"}
fi
fileName=${name}"_"${size}${sufix}
echo $fileName >> results/res.out
echo "===========" >> results/res.out
# fileName=${name}"_"${size}"_b"${blk}
# gzip -f results/*.log
# gzip -f results/*.tar
if [ $# -eq 1 ] # parametro gdb
then
./test_gdb.py $fileName $fiNumber $tIni #| tee ./results/${fileName}_${tIni}.out
else
./test.py $fileName $fiNumber $tIni #| tee ./results/${fileName}_${tIni}.out
fi
echo "GPUfreq after test: " `gpufreq-info` >> results/res.out
echo "===========" >> results/res.out
status=$?
if [ $status -eq 3 ] # tres timeouts seguidos
then
sudo reboot now
sleep 30
sudo reboot now
exit 1
elif [ $status -eq 2 ] # interrupcion por teclado del test
then
fiNumber=$totalFiNumber + 1 # provocar el fin del test
forever=0
else
fiNumber=$((`cat lastFiNumber` + 1))
# pgrep $name
# kill -9 `pgrep $name`
# echo "Relaunching test after " $fiNumber " iterations."
fi
done # end while
if [ $cpufreq -ne -1 ]
then
# coloca la CPU a la maxima frecuencia
sudo cpufreq-set -c 0 -d 2065500 -u 2065500
fi
sudo killall iamalive
sudo iamalive_ini 120 10 &
sudo killall -2 reset
echo "Status after while loop: " $status
if [ -f "lastFiNumber" ]
then
echo "Delete lastFiNumber. Reset index" > /dev/ttyS0
sudo rm lastFiNumber
fi
#if [ $status != 2 ] # si no se ha interrumpido por teclado
#then
mv ${MYHOME}/startup_script.sh ${MYHOME}/startup_script_noactivo.sh
#fi
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment