Sapporo will now use an environment variable to find kernels, also removed unused files.

This commit is contained in:
Yohai Meiron 2021-10-15 15:29:35 -04:00
parent 74e8cbdd03
commit f68cd8773f
2 changed files with 161 additions and 42 deletions

View file

@ -3,7 +3,29 @@ commit=7c3f80acf1df5a8907118706a2260184cfccc6a1
archive_md5sum=b7b17eeded2cb5dfb9e174424b3f7ecb archive_md5sum=b7b17eeded2cb5dfb9e174424b3f7ecb
url=https://github.com/treecode/sapporo2/archive/$commit.tar.gz url=https://github.com/treecode/sapporo2/archive/$commit.tar.gz
wget -O sapporo2.tar.gz $url wget -O sapporo2.tar.gz $url
# Verify source.
md5sum --check <<<"$archive_md5sum sapporo2.tar.gz" md5sum --check <<<"$archive_md5sum sapporo2.tar.gz"
# Protect .gitignore.
cp -rp .gitignore .gitignore~
# Untar original source.
tar --strip-components=1 -xvf sapporo2.tar.gz tar --strip-components=1 -xvf sapporo2.tar.gz
rm -f sapporo2.tar.gz
# Restore .gitignore
mv .gitignore~ .gitignore
# Cleanup unused bits
rm -rf sapporo2.tar.gz \
lib/include/vec.h \
lib/include/ocldev.h* \
lib/include/SSE_AVX \
lib/interfaces/sapporo6thlib.cpp \
lib/interfaces/sapporoG5lib.cpp \
lib/interfaces/sapporoYeblib.cpp \
lib/Makefile_ocl \
lib/OpenCLKernels
# Patch source.
patch -p0 < patch.diff patch -p0 < patch.diff

View file

@ -1,5 +1,5 @@
--- lib/CUDAKernels/kernels.cu --- lib/CUDAKernels/kernels.cu
+++ +++ lib/CUDAKernels/kernels.cu
@@ -206,7 +206,7 @@ @@ -206,7 +206,7 @@
// template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); } // template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); }
@ -19,7 +19,7 @@
__device__ __forceinline__ double atomicMin(double *address, double val) __device__ __forceinline__ double atomicMin(double *address, double val)
{ {
--- lib/include/sapdevclass.h --- lib/include/sapdevclass.h
+++ +++ lib/include/sapdevclass.h
@@ -33,7 +33,10 @@ @@ -33,7 +33,10 @@
#include <cassert> #include <cassert>
@ -33,8 +33,46 @@
#include <sys/time.h> #include <sys/time.h>
--- lib/interfaces/sapporoG6lib.cpp --- lib/interfaces/sapporoG6lib.cpp
+++ +++ lib/interfaces/sapporoG6lib.cpp
@@ -45,34 +45,9 @@ @@ -1,3 +1,5 @@
+#include <cstdlib>
+#include <fstream>
#include "sapporohostclass.h"
sapporo grav;
@@ -10,11 +12,6 @@
extern "C" {
-#ifdef _OCL_
- const char *kernelFile = "OpenCL/kernels4th.cl";
-#else
- const char *kernelFile = "CUDA/kernels.ptx";
-#endif
double *dsmin_i; //Distance of nearest neighbour
double acc_i[3]; //To store the multiplied acc
@@ -29,6 +26,19 @@
//devices to use. Otherwise they should be specified in the config file
//Open the GPUs
+
+ char kernelFile[1024];
+ if (const char* kernelFile_env = std::getenv("SAPPORO2_KERNEL_FILE"))
+ strncpy(kernelFile, kernelFile_env, 1024);
+ else
+ strcpy(kernelFile, "CUDA/kernels.ptx");
+
+ std::ifstream file(kernelFile);
+ if (!file.is_open()) {
+ std::cout << "Kernel file " << kernelFile << " not found! Please set the SAPPORO2_KERNEL_FILE environment variable.";
+ exit(1);
+ }
+
//Double single, default
int res = grav.open(kernelFile, list, ndev, FOURTH, DOUBLESINGLE);
@@ -45,34 +55,9 @@
int g6_open_(int *id) int g6_open_(int *id)
{ {
@ -73,39 +111,46 @@
} }
--- lib/Makefile --- lib/Makefile
+++ +++ lib/Makefile
@@ -1,11 +1,13 @@ @@ -1,84 +1,27 @@
-CXX = g++ -CXX = g++
-CC = gcc -CC = gcc
-LD = g++ -LD = g++
-F90 = ifort -F90 = ifort
+CXX ?= g++ -
+CC ?= gcc -.SUFFIXES: .o .cpp .ptx .cu
+LD ?= g++ -
+F90 ?= ifort
.SUFFIXES: .o .cpp .ptx .cu
-CUDA_TK ?= /usr/local/cuda -CUDA_TK ?= /usr/local/cuda
+CUDA_HOME ?= /usr/local/cuda -
+CUDA_TK = $(CUDA_HOME) -
+OPTIMIZATION ?= 3 -testRunFlags1=
-testRunFlags2=
-testRunFlags3=
testRunFlags1= -
@@ -24,7 +26,7 @@ -#Check for the defines
testRunFlags3="-D TIMING_STATS=1" -
endif -ifdef NTHREADS
- testRunFlags1="-D NTHREADS=$(NTHREADS)"
- testRunFlags3="-D TIMING_STATS=1"
-endif
-
-ifdef NBLOCKS_PER_MULTI
- testRunFlags2="-D NBLOCKS_PER_MULTI=$(NBLOCKS_PER_MULTI)"
- testRunFlags3="-D TIMING_STATS=1"
-endif
-
-OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp -OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp
+OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2 -
-#Use below if compiling with CPU_SUPPORT (SSE)
#Use below if compiling with CPU_SUPPORT (SSE) -#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4
#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4 -CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include
@@ -36,23 +38,14 @@ -
NVCC = $(CUDA_TK)/bin/nvcc -testRunFlags= $(testRunFlags1) $(testRunFlags2) $(testRunFlags3)
-$(info $(testRunFlags))
-
-NVCC = $(CUDA_TK)/bin/nvcc
-
-
-# Support older CUDA versions out of the box -# Support older CUDA versions out of the box
-NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g') -NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g')
-ifeq "${NVCCVERSION}" "V5.5.22" -ifeq "${NVCCVERSION}" "V5.5.22"
@ -117,15 +162,67 @@
-#NVCCFLAGS = -arch sm_35 -#NVCCFLAGS = -arch sm_35
-#NVCCFLAGS ?= -arch sm_30 -#NVCCFLAGS ?= -arch sm_30
-#NVCCFLAGS = -arch sm_20 -#NVCCFLAGS = -arch sm_20
+GPUARCH ?= sm_75 -NVCCFLAGS += ${testRunFlags}
+NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX) -
NVCCFLAGS += ${testRunFlags} -# Use with Mac OS X
-# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int"
# Use with Mac OS X -
# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int"
-LDFLAGS = -lcuda -fopenmp -LDFLAGS = -lcuda -fopenmp
+LDFLAGS = -lcuda -
-
+NVCC = nvcc
+GPUARCH ?= sm_75
+OPTIMIZATION ?= 3
+
+CUDA_HOME ?= /usr/local/cuda
+OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2
+CXXFLAGS += -fPIC $(OFLAGS) -I$(CUDA_HOME)/include
+NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX)
INCLUDEPATH = ./include INCLUDEPATH = ./include
CXXFLAGS += -I$(INCLUDEPATH) -I./
NVCCFLAGS += -I$(INCLUDEPATH) -I./
-
INTERFACEPATH =./interfaces
-
CUDAKERNELSPATH = ./CUDAKernels
CUDAKERNELS = kernels.cu
-
CUDAPTX = $(CUDAKERNELS:%.cu=$(CUDAKERNELSPATH)/%.ptx)
-
SRCPATH = src
-SRC = sapporohostclass.cpp sapporoG6lib.cpp sapporoYeblib.cpp sapporoG5lib.cpp sapporo6thlib.cpp
+SRC = sapporohostclass.cpp sapporoG6lib.cpp
OBJ = $(SRC:%.cpp=%.o)
-
-LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o $(INTERFACEPATH)/sapporoYeblib.o
-LIBOBJ += $(INTERFACEPATH)/sapporoG5lib.o
+LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o
TARGET = libsapporo.a
-
all: $(OBJ) $(CUDAPTX) $(TARGET)
kernels: $(CUDAPTX)
-
$(TARGET): $(LIBOBJ)
ar qv $@ $^
@@ -99,16 +42,9 @@
$(OBJ): $(INCLUDEPATH)/*.h
-
sapporohostclass.o : $(INCLUDEPATH)/kernels.ptxh $(INCLUDEPATH)/sapporohostclass.h $(INCLUDEPATH)/sapdevclass.h $(INCLUDEPATH)/defines.h
$(CUDAKERNELSPATH)/kernels.ptx : $(INCLUDEPATH)/defines.h
libsapporo.a : sapporohostclass.o
-
-
-
-
-
-
-
+.SUFFIXES: .o .cpp .ptx .cu
\ No newline at end of file