--- lib/CUDAKernels/kernels.cu +++ lib/CUDAKernels/kernels.cu @@ -206,7 +206,7 @@ // template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); } - +#if (CUDART_VERSION <= 8000) __device__ double atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = @@ -220,7 +220,7 @@ } while (assumed != old); return __longlong_as_double(old); } - +#endif __device__ __forceinline__ double atomicMin(double *address, double val) { --- lib/include/sapdevclass.h +++ lib/include/sapdevclass.h @@ -33,7 +33,10 @@ #include #include -#include +#define omp_get_num_procs() 1 +#define omp_get_thread_num() 0 +#define omp_get_num_threads() 1 +#define omp_get_max_threads() 1 #include --- lib/interfaces/sapporoG6lib.cpp +++ lib/interfaces/sapporoG6lib.cpp @@ -1,3 +1,5 @@ +#include +#include #include "sapporohostclass.h" sapporo grav; @@ -10,11 +12,6 @@ extern "C" { -#ifdef _OCL_ - const char *kernelFile = "OpenCL/kernels4th.cl"; -#else - const char *kernelFile = "CUDA/kernels.ptx"; -#endif double *dsmin_i; //Distance of nearest neighbour double acc_i[3]; //To store the multiplied acc @@ -29,6 +26,19 @@ //devices to use. Otherwise they should be specified in the config file //Open the GPUs + + char kernelFile[1024]; + if (const char* kernelFile_env = std::getenv("SAPPORO2_KERNEL_FILE")) + strncpy(kernelFile, kernelFile_env, 1024); + else + strcpy(kernelFile, "CUDA/kernels.ptx"); + + std::ifstream file(kernelFile); + if (!file.is_open()) { + std::cout << "Kernel file " << kernelFile << " not found! Please set the SAPPORO2_KERNEL_FILE environment variable."; + exit(1); + } + //Double single, default int res = grav.open(kernelFile, list, ndev, FOURTH, DOUBLESINGLE); @@ -45,34 +55,9 @@ int g6_open_(int *id) { - //Check for a config file if its there use it - id = id; //Make the compiler happy - int *devList = NULL; - int how_many = 0; - FILE *fd; - if ((fd = fopen("sapporo2.config", "r"))) { - char line[256]; - fprintf(stderr, "sapporo2::open - config file is found\n"); - if(fgets(line, 256, fd) != NULL) - sscanf(line, "%d", &how_many); - - //Read the devices we want to use - if(how_many > 0) - { - devList = new int[how_many]; - for (int i = 0; i < how_many; i++) { - if(fgets(line, 256, fd) != NULL) - sscanf(line, "%d", &devList[i]); - } - } - } else { - fprintf(stderr," sapporo2::open - no config file is found \n"); - how_many = 0; - } - int res = g6_open_special(how_many, devList); - - delete[] devList; - + int devList = *id; + int how_many = 1; + int res = g6_open_special(how_many, &devList); return res; } --- lib/Makefile +++ lib/Makefile @@ -1,84 +1,27 @@ -CXX = g++ -CC = gcc -LD = g++ -F90 = ifort - -.SUFFIXES: .o .cpp .ptx .cu - -CUDA_TK ?= /usr/local/cuda - - -testRunFlags1= -testRunFlags2= -testRunFlags3= - -#Check for the defines - -ifdef NTHREADS - testRunFlags1="-D NTHREADS=$(NTHREADS)" - testRunFlags3="-D TIMING_STATS=1" -endif - -ifdef NBLOCKS_PER_MULTI - testRunFlags2="-D NBLOCKS_PER_MULTI=$(NBLOCKS_PER_MULTI)" - testRunFlags3="-D TIMING_STATS=1" -endif - -OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp - -#Use below if compiling with CPU_SUPPORT (SSE) -#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4 -CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include - -testRunFlags= $(testRunFlags1) $(testRunFlags2) $(testRunFlags3) -$(info $(testRunFlags)) - -NVCC = $(CUDA_TK)/bin/nvcc - - -# Support older CUDA versions out of the box -NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g') -ifeq "${NVCCVERSION}" "V5.5.22" - NVCCFLAGS ?= -arch sm_20 -else - NVCCFLAGS ?= -arch sm_30 -endif - -#NVCCFLAGS = -arch sm_35 -#NVCCFLAGS ?= -arch sm_30 -#NVCCFLAGS = -arch sm_20 -NVCCFLAGS += ${testRunFlags} - -# Use with Mac OS X -# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int" - -LDFLAGS = -lcuda -fopenmp - - +NVCC = nvcc +GPUARCH ?= sm_75 +OPTIMIZATION ?= 3 + +CUDA_HOME ?= /usr/local/cuda +OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2 +CXXFLAGS += -fPIC $(OFLAGS) -I$(CUDA_HOME)/include +NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX) INCLUDEPATH = ./include CXXFLAGS += -I$(INCLUDEPATH) -I./ NVCCFLAGS += -I$(INCLUDEPATH) -I./ - INTERFACEPATH =./interfaces - CUDAKERNELSPATH = ./CUDAKernels CUDAKERNELS = kernels.cu - CUDAPTX = $(CUDAKERNELS:%.cu=$(CUDAKERNELSPATH)/%.ptx) - SRCPATH = src -SRC = sapporohostclass.cpp sapporoG6lib.cpp sapporoYeblib.cpp sapporoG5lib.cpp sapporo6thlib.cpp +SRC = sapporohostclass.cpp sapporoG6lib.cpp OBJ = $(SRC:%.cpp=%.o) - -LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o $(INTERFACEPATH)/sapporoYeblib.o -LIBOBJ += $(INTERFACEPATH)/sapporoG5lib.o +LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o TARGET = libsapporo.a - all: $(OBJ) $(CUDAPTX) $(TARGET) kernels: $(CUDAPTX) - $(TARGET): $(LIBOBJ) ar qv $@ $^ @@ -99,16 +42,9 @@ $(OBJ): $(INCLUDEPATH)/*.h - sapporohostclass.o : $(INCLUDEPATH)/kernels.ptxh $(INCLUDEPATH)/sapporohostclass.h $(INCLUDEPATH)/sapdevclass.h $(INCLUDEPATH)/defines.h $(CUDAKERNELSPATH)/kernels.ptx : $(INCLUDEPATH)/defines.h libsapporo.a : sapporohostclass.o - - - - - - - +.SUFFIXES: .o .cpp .ptx .cu \ No newline at end of file