--- lib/CUDAKernels/kernels.cu +++ @@ -206,7 +206,7 @@ // template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); } - +#if (CUDART_VERSION <= 8000) __device__ double atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = @@ -220,7 +220,7 @@ } while (assumed != old); return __longlong_as_double(old); } - +#endif __device__ __forceinline__ double atomicMin(double *address, double val) { --- lib/include/sapdevclass.h +++ @@ -33,7 +33,10 @@ #include #include -#include +#define omp_get_num_procs() 1 +#define omp_get_thread_num() 0 +#define omp_get_num_threads() 1 +#define omp_get_max_threads() 1 #include --- lib/interfaces/sapporoG6lib.cpp +++ @@ -45,34 +45,9 @@ int g6_open_(int *id) { - //Check for a config file if its there use it - id = id; //Make the compiler happy - int *devList = NULL; - int how_many = 0; - FILE *fd; - if ((fd = fopen("sapporo2.config", "r"))) { - char line[256]; - fprintf(stderr, "sapporo2::open - config file is found\n"); - if(fgets(line, 256, fd) != NULL) - sscanf(line, "%d", &how_many); - - //Read the devices we want to use - if(how_many > 0) - { - devList = new int[how_many]; - for (int i = 0; i < how_many; i++) { - if(fgets(line, 256, fd) != NULL) - sscanf(line, "%d", &devList[i]); - } - } - } else { - fprintf(stderr," sapporo2::open - no config file is found \n"); - how_many = 0; - } - int res = g6_open_special(how_many, devList); - - delete[] devList; - + int devList = *id; + int how_many = 1; + int res = g6_open_special(how_many, &devList); return res; } --- lib/Makefile +++ @@ -1,11 +1,13 @@ -CXX = g++ -CC = gcc -LD = g++ -F90 = ifort +CXX ?= g++ +CC ?= gcc +LD ?= g++ +F90 ?= ifort .SUFFIXES: .o .cpp .ptx .cu -CUDA_TK ?= /usr/local/cuda +CUDA_HOME ?= /usr/local/cuda +CUDA_TK = $(CUDA_HOME) +OPTIMIZATION ?= 3 testRunFlags1= @@ -24,7 +26,7 @@ testRunFlags3="-D TIMING_STATS=1" endif -OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp +OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2 #Use below if compiling with CPU_SUPPORT (SSE) #CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4 @@ -36,23 +38,14 @@ NVCC = $(CUDA_TK)/bin/nvcc -# Support older CUDA versions out of the box -NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g') -ifeq "${NVCCVERSION}" "V5.5.22" - NVCCFLAGS ?= -arch sm_20 -else - NVCCFLAGS ?= -arch sm_30 -endif - -#NVCCFLAGS = -arch sm_35 -#NVCCFLAGS ?= -arch sm_30 -#NVCCFLAGS = -arch sm_20 +GPUARCH ?= sm_75 +NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX) NVCCFLAGS += ${testRunFlags} # Use with Mac OS X # NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int" -LDFLAGS = -lcuda -fopenmp +LDFLAGS = -lcuda INCLUDEPATH = ./include