diff --git a/sapporo2/get_sapporo.sh b/sapporo2/get_sapporo.sh index 1a45a67..f5eea23 100755 --- a/sapporo2/get_sapporo.sh +++ b/sapporo2/get_sapporo.sh @@ -3,7 +3,29 @@ commit=7c3f80acf1df5a8907118706a2260184cfccc6a1 archive_md5sum=b7b17eeded2cb5dfb9e174424b3f7ecb url=https://github.com/treecode/sapporo2/archive/$commit.tar.gz wget -O sapporo2.tar.gz $url + +# Verify source. md5sum --check <<<"$archive_md5sum sapporo2.tar.gz" + +# Protect .gitignore. +cp -rp .gitignore .gitignore~ + +# Untar original source. tar --strip-components=1 -xvf sapporo2.tar.gz -rm -f sapporo2.tar.gz + +# Restore .gitignore +mv .gitignore~ .gitignore + +# Cleanup unused bits +rm -rf sapporo2.tar.gz \ + lib/include/vec.h \ + lib/include/ocldev.h* \ + lib/include/SSE_AVX \ + lib/interfaces/sapporo6thlib.cpp \ + lib/interfaces/sapporoG5lib.cpp \ + lib/interfaces/sapporoYeblib.cpp \ + lib/Makefile_ocl \ + lib/OpenCLKernels + +# Patch source. patch -p0 < patch.diff diff --git a/sapporo2/patch.diff b/sapporo2/patch.diff index 89a0747..1b565be 100644 --- a/sapporo2/patch.diff +++ b/sapporo2/patch.diff @@ -1,5 +1,5 @@ --- lib/CUDAKernels/kernels.cu -+++ ++++ lib/CUDAKernels/kernels.cu @@ -206,7 +206,7 @@ // template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); } @@ -19,7 +19,7 @@ __device__ __forceinline__ double atomicMin(double *address, double val) { --- lib/include/sapdevclass.h -+++ ++++ lib/include/sapdevclass.h @@ -33,7 +33,10 @@ #include @@ -33,8 +33,46 @@ #include --- lib/interfaces/sapporoG6lib.cpp -+++ -@@ -45,34 +45,9 @@ ++++ lib/interfaces/sapporoG6lib.cpp +@@ -1,3 +1,5 @@ ++#include ++#include + #include "sapporohostclass.h" + + sapporo grav; +@@ -10,11 +12,6 @@ + + extern "C" { + +-#ifdef _OCL_ +- const char *kernelFile = "OpenCL/kernels4th.cl"; +-#else +- const char *kernelFile = "CUDA/kernels.ptx"; +-#endif + double *dsmin_i; //Distance of nearest neighbour + + double acc_i[3]; //To store the multiplied acc +@@ -29,6 +26,19 @@ + //devices to use. Otherwise they should be specified in the config file + + //Open the GPUs ++ ++ char kernelFile[1024]; ++ if (const char* kernelFile_env = std::getenv("SAPPORO2_KERNEL_FILE")) ++ strncpy(kernelFile, kernelFile_env, 1024); ++ else ++ strcpy(kernelFile, "CUDA/kernels.ptx"); ++ ++ std::ifstream file(kernelFile); ++ if (!file.is_open()) { ++ std::cout << "Kernel file " << kernelFile << " not found! Please set the SAPPORO2_KERNEL_FILE environment variable."; ++ exit(1); ++ } ++ + + //Double single, default + int res = grav.open(kernelFile, list, ndev, FOURTH, DOUBLESINGLE); +@@ -45,34 +55,9 @@ int g6_open_(int *id) { @@ -73,39 +111,46 @@ } --- lib/Makefile -+++ -@@ -1,11 +1,13 @@ ++++ lib/Makefile +@@ -1,84 +1,27 @@ -CXX = g++ -CC = gcc -LD = g++ -F90 = ifort -+CXX ?= g++ -+CC ?= gcc -+LD ?= g++ -+F90 ?= ifort - - .SUFFIXES: .o .cpp .ptx .cu - +- +-.SUFFIXES: .o .cpp .ptx .cu +- -CUDA_TK ?= /usr/local/cuda -+CUDA_HOME ?= /usr/local/cuda -+CUDA_TK = $(CUDA_HOME) -+OPTIMIZATION ?= 3 - - - testRunFlags1= -@@ -24,7 +26,7 @@ - testRunFlags3="-D TIMING_STATS=1" - endif - +- +- +-testRunFlags1= +-testRunFlags2= +-testRunFlags3= +- +-#Check for the defines +- +-ifdef NTHREADS +- testRunFlags1="-D NTHREADS=$(NTHREADS)" +- testRunFlags3="-D TIMING_STATS=1" +-endif +- +-ifdef NBLOCKS_PER_MULTI +- testRunFlags2="-D NBLOCKS_PER_MULTI=$(NBLOCKS_PER_MULTI)" +- testRunFlags3="-D TIMING_STATS=1" +-endif +- -OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp -+OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2 - - #Use below if compiling with CPU_SUPPORT (SSE) - #CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4 -@@ -36,23 +38,14 @@ - NVCC = $(CUDA_TK)/bin/nvcc - - +- +-#Use below if compiling with CPU_SUPPORT (SSE) +-#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4 +-CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include +- +-testRunFlags= $(testRunFlags1) $(testRunFlags2) $(testRunFlags3) +-$(info $(testRunFlags)) +- +-NVCC = $(CUDA_TK)/bin/nvcc +- +- -# Support older CUDA versions out of the box -NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g') -ifeq "${NVCCVERSION}" "V5.5.22" @@ -117,15 +162,67 @@ -#NVCCFLAGS = -arch sm_35 -#NVCCFLAGS ?= -arch sm_30 -#NVCCFLAGS = -arch sm_20 -+GPUARCH ?= sm_75 -+NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX) - NVCCFLAGS += ${testRunFlags} - - # Use with Mac OS X - # NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int" - +-NVCCFLAGS += ${testRunFlags} +- +-# Use with Mac OS X +-# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int" +- -LDFLAGS = -lcuda -fopenmp -+LDFLAGS = -lcuda - - +- +- ++NVCC = nvcc ++GPUARCH ?= sm_75 ++OPTIMIZATION ?= 3 ++ ++CUDA_HOME ?= /usr/local/cuda ++OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2 ++CXXFLAGS += -fPIC $(OFLAGS) -I$(CUDA_HOME)/include ++NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX) INCLUDEPATH = ./include + CXXFLAGS += -I$(INCLUDEPATH) -I./ + NVCCFLAGS += -I$(INCLUDEPATH) -I./ +- + INTERFACEPATH =./interfaces +- + CUDAKERNELSPATH = ./CUDAKernels + CUDAKERNELS = kernels.cu +- + CUDAPTX = $(CUDAKERNELS:%.cu=$(CUDAKERNELSPATH)/%.ptx) +- + SRCPATH = src +-SRC = sapporohostclass.cpp sapporoG6lib.cpp sapporoYeblib.cpp sapporoG5lib.cpp sapporo6thlib.cpp ++SRC = sapporohostclass.cpp sapporoG6lib.cpp + OBJ = $(SRC:%.cpp=%.o) +- +-LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o $(INTERFACEPATH)/sapporoYeblib.o +-LIBOBJ += $(INTERFACEPATH)/sapporoG5lib.o ++LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o + TARGET = libsapporo.a + +- + all: $(OBJ) $(CUDAPTX) $(TARGET) + kernels: $(CUDAPTX) + +- + $(TARGET): $(LIBOBJ) + ar qv $@ $^ + +@@ -99,16 +42,9 @@ + + $(OBJ): $(INCLUDEPATH)/*.h + +- + sapporohostclass.o : $(INCLUDEPATH)/kernels.ptxh $(INCLUDEPATH)/sapporohostclass.h $(INCLUDEPATH)/sapdevclass.h $(INCLUDEPATH)/defines.h + $(CUDAKERNELSPATH)/kernels.ptx : $(INCLUDEPATH)/defines.h + + libsapporo.a : sapporohostclass.o + +- +- +- +- +- +- +- ++.SUFFIXES: .o .cpp .ptx .cu +\ No newline at end of file \ No newline at end of file