Sapporo will now use an environment variable to find kernels, also removed unused files.
This commit is contained in:
parent
74e8cbdd03
commit
f68cd8773f
2 changed files with 161 additions and 42 deletions
|
|
@ -3,7 +3,29 @@ commit=7c3f80acf1df5a8907118706a2260184cfccc6a1
|
||||||
archive_md5sum=b7b17eeded2cb5dfb9e174424b3f7ecb
|
archive_md5sum=b7b17eeded2cb5dfb9e174424b3f7ecb
|
||||||
url=https://github.com/treecode/sapporo2/archive/$commit.tar.gz
|
url=https://github.com/treecode/sapporo2/archive/$commit.tar.gz
|
||||||
wget -O sapporo2.tar.gz $url
|
wget -O sapporo2.tar.gz $url
|
||||||
|
|
||||||
|
# Verify source.
|
||||||
md5sum --check <<<"$archive_md5sum sapporo2.tar.gz"
|
md5sum --check <<<"$archive_md5sum sapporo2.tar.gz"
|
||||||
|
|
||||||
|
# Protect .gitignore.
|
||||||
|
cp -rp .gitignore .gitignore~
|
||||||
|
|
||||||
|
# Untar original source.
|
||||||
tar --strip-components=1 -xvf sapporo2.tar.gz
|
tar --strip-components=1 -xvf sapporo2.tar.gz
|
||||||
rm -f sapporo2.tar.gz
|
|
||||||
|
# Restore .gitignore
|
||||||
|
mv .gitignore~ .gitignore
|
||||||
|
|
||||||
|
# Cleanup unused bits
|
||||||
|
rm -rf sapporo2.tar.gz \
|
||||||
|
lib/include/vec.h \
|
||||||
|
lib/include/ocldev.h* \
|
||||||
|
lib/include/SSE_AVX \
|
||||||
|
lib/interfaces/sapporo6thlib.cpp \
|
||||||
|
lib/interfaces/sapporoG5lib.cpp \
|
||||||
|
lib/interfaces/sapporoYeblib.cpp \
|
||||||
|
lib/Makefile_ocl \
|
||||||
|
lib/OpenCLKernels
|
||||||
|
|
||||||
|
# Patch source.
|
||||||
patch -p0 < patch.diff
|
patch -p0 < patch.diff
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
--- lib/CUDAKernels/kernels.cu
|
--- lib/CUDAKernels/kernels.cu
|
||||||
+++
|
+++ lib/CUDAKernels/kernels.cu
|
||||||
@@ -206,7 +206,7 @@
|
@@ -206,7 +206,7 @@
|
||||||
// template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); }
|
// template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); }
|
||||||
|
|
||||||
|
|
@ -19,7 +19,7 @@
|
||||||
__device__ __forceinline__ double atomicMin(double *address, double val)
|
__device__ __forceinline__ double atomicMin(double *address, double val)
|
||||||
{
|
{
|
||||||
--- lib/include/sapdevclass.h
|
--- lib/include/sapdevclass.h
|
||||||
+++
|
+++ lib/include/sapdevclass.h
|
||||||
@@ -33,7 +33,10 @@
|
@@ -33,7 +33,10 @@
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
@ -33,8 +33,46 @@
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
|
||||||
--- lib/interfaces/sapporoG6lib.cpp
|
--- lib/interfaces/sapporoG6lib.cpp
|
||||||
+++
|
+++ lib/interfaces/sapporoG6lib.cpp
|
||||||
@@ -45,34 +45,9 @@
|
@@ -1,3 +1,5 @@
|
||||||
|
+#include <cstdlib>
|
||||||
|
+#include <fstream>
|
||||||
|
#include "sapporohostclass.h"
|
||||||
|
|
||||||
|
sapporo grav;
|
||||||
|
@@ -10,11 +12,6 @@
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
-#ifdef _OCL_
|
||||||
|
- const char *kernelFile = "OpenCL/kernels4th.cl";
|
||||||
|
-#else
|
||||||
|
- const char *kernelFile = "CUDA/kernels.ptx";
|
||||||
|
-#endif
|
||||||
|
double *dsmin_i; //Distance of nearest neighbour
|
||||||
|
|
||||||
|
double acc_i[3]; //To store the multiplied acc
|
||||||
|
@@ -29,6 +26,19 @@
|
||||||
|
//devices to use. Otherwise they should be specified in the config file
|
||||||
|
|
||||||
|
//Open the GPUs
|
||||||
|
+
|
||||||
|
+ char kernelFile[1024];
|
||||||
|
+ if (const char* kernelFile_env = std::getenv("SAPPORO2_KERNEL_FILE"))
|
||||||
|
+ strncpy(kernelFile, kernelFile_env, 1024);
|
||||||
|
+ else
|
||||||
|
+ strcpy(kernelFile, "CUDA/kernels.ptx");
|
||||||
|
+
|
||||||
|
+ std::ifstream file(kernelFile);
|
||||||
|
+ if (!file.is_open()) {
|
||||||
|
+ std::cout << "Kernel file " << kernelFile << " not found! Please set the SAPPORO2_KERNEL_FILE environment variable.";
|
||||||
|
+ exit(1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
|
||||||
|
//Double single, default
|
||||||
|
int res = grav.open(kernelFile, list, ndev, FOURTH, DOUBLESINGLE);
|
||||||
|
@@ -45,34 +55,9 @@
|
||||||
|
|
||||||
int g6_open_(int *id)
|
int g6_open_(int *id)
|
||||||
{
|
{
|
||||||
|
|
@ -73,39 +111,46 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
--- lib/Makefile
|
--- lib/Makefile
|
||||||
+++
|
+++ lib/Makefile
|
||||||
@@ -1,11 +1,13 @@
|
@@ -1,84 +1,27 @@
|
||||||
-CXX = g++
|
-CXX = g++
|
||||||
-CC = gcc
|
-CC = gcc
|
||||||
-LD = g++
|
-LD = g++
|
||||||
-F90 = ifort
|
-F90 = ifort
|
||||||
+CXX ?= g++
|
-
|
||||||
+CC ?= gcc
|
-.SUFFIXES: .o .cpp .ptx .cu
|
||||||
+LD ?= g++
|
-
|
||||||
+F90 ?= ifort
|
|
||||||
|
|
||||||
.SUFFIXES: .o .cpp .ptx .cu
|
|
||||||
|
|
||||||
-CUDA_TK ?= /usr/local/cuda
|
-CUDA_TK ?= /usr/local/cuda
|
||||||
+CUDA_HOME ?= /usr/local/cuda
|
-
|
||||||
+CUDA_TK = $(CUDA_HOME)
|
-
|
||||||
+OPTIMIZATION ?= 3
|
-testRunFlags1=
|
||||||
|
-testRunFlags2=
|
||||||
|
-testRunFlags3=
|
||||||
testRunFlags1=
|
-
|
||||||
@@ -24,7 +26,7 @@
|
-#Check for the defines
|
||||||
testRunFlags3="-D TIMING_STATS=1"
|
-
|
||||||
endif
|
-ifdef NTHREADS
|
||||||
|
- testRunFlags1="-D NTHREADS=$(NTHREADS)"
|
||||||
|
- testRunFlags3="-D TIMING_STATS=1"
|
||||||
|
-endif
|
||||||
|
-
|
||||||
|
-ifdef NBLOCKS_PER_MULTI
|
||||||
|
- testRunFlags2="-D NBLOCKS_PER_MULTI=$(NBLOCKS_PER_MULTI)"
|
||||||
|
- testRunFlags3="-D TIMING_STATS=1"
|
||||||
|
-endif
|
||||||
|
-
|
||||||
-OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp
|
-OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp
|
||||||
+OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2
|
-
|
||||||
|
-#Use below if compiling with CPU_SUPPORT (SSE)
|
||||||
#Use below if compiling with CPU_SUPPORT (SSE)
|
-#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4
|
||||||
#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4
|
-CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include
|
||||||
@@ -36,23 +38,14 @@
|
-
|
||||||
NVCC = $(CUDA_TK)/bin/nvcc
|
-testRunFlags= $(testRunFlags1) $(testRunFlags2) $(testRunFlags3)
|
||||||
|
-$(info $(testRunFlags))
|
||||||
|
-
|
||||||
|
-NVCC = $(CUDA_TK)/bin/nvcc
|
||||||
|
-
|
||||||
|
-
|
||||||
-# Support older CUDA versions out of the box
|
-# Support older CUDA versions out of the box
|
||||||
-NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g')
|
-NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g')
|
||||||
-ifeq "${NVCCVERSION}" "V5.5.22"
|
-ifeq "${NVCCVERSION}" "V5.5.22"
|
||||||
|
|
@ -117,15 +162,67 @@
|
||||||
-#NVCCFLAGS = -arch sm_35
|
-#NVCCFLAGS = -arch sm_35
|
||||||
-#NVCCFLAGS ?= -arch sm_30
|
-#NVCCFLAGS ?= -arch sm_30
|
||||||
-#NVCCFLAGS = -arch sm_20
|
-#NVCCFLAGS = -arch sm_20
|
||||||
+GPUARCH ?= sm_75
|
-NVCCFLAGS += ${testRunFlags}
|
||||||
+NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX)
|
-
|
||||||
NVCCFLAGS += ${testRunFlags}
|
-# Use with Mac OS X
|
||||||
|
-# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int"
|
||||||
# Use with Mac OS X
|
-
|
||||||
# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int"
|
|
||||||
|
|
||||||
-LDFLAGS = -lcuda -fopenmp
|
-LDFLAGS = -lcuda -fopenmp
|
||||||
+LDFLAGS = -lcuda
|
-
|
||||||
|
-
|
||||||
|
+NVCC = nvcc
|
||||||
|
+GPUARCH ?= sm_75
|
||||||
|
+OPTIMIZATION ?= 3
|
||||||
|
+
|
||||||
|
+CUDA_HOME ?= /usr/local/cuda
|
||||||
|
+OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2
|
||||||
|
+CXXFLAGS += -fPIC $(OFLAGS) -I$(CUDA_HOME)/include
|
||||||
|
+NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX)
|
||||||
INCLUDEPATH = ./include
|
INCLUDEPATH = ./include
|
||||||
|
CXXFLAGS += -I$(INCLUDEPATH) -I./
|
||||||
|
NVCCFLAGS += -I$(INCLUDEPATH) -I./
|
||||||
|
-
|
||||||
|
INTERFACEPATH =./interfaces
|
||||||
|
-
|
||||||
|
CUDAKERNELSPATH = ./CUDAKernels
|
||||||
|
CUDAKERNELS = kernels.cu
|
||||||
|
-
|
||||||
|
CUDAPTX = $(CUDAKERNELS:%.cu=$(CUDAKERNELSPATH)/%.ptx)
|
||||||
|
-
|
||||||
|
SRCPATH = src
|
||||||
|
-SRC = sapporohostclass.cpp sapporoG6lib.cpp sapporoYeblib.cpp sapporoG5lib.cpp sapporo6thlib.cpp
|
||||||
|
+SRC = sapporohostclass.cpp sapporoG6lib.cpp
|
||||||
|
OBJ = $(SRC:%.cpp=%.o)
|
||||||
|
-
|
||||||
|
-LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o $(INTERFACEPATH)/sapporoYeblib.o
|
||||||
|
-LIBOBJ += $(INTERFACEPATH)/sapporoG5lib.o
|
||||||
|
+LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o
|
||||||
|
TARGET = libsapporo.a
|
||||||
|
|
||||||
|
-
|
||||||
|
all: $(OBJ) $(CUDAPTX) $(TARGET)
|
||||||
|
kernels: $(CUDAPTX)
|
||||||
|
|
||||||
|
-
|
||||||
|
$(TARGET): $(LIBOBJ)
|
||||||
|
ar qv $@ $^
|
||||||
|
|
||||||
|
@@ -99,16 +42,9 @@
|
||||||
|
|
||||||
|
$(OBJ): $(INCLUDEPATH)/*.h
|
||||||
|
|
||||||
|
-
|
||||||
|
sapporohostclass.o : $(INCLUDEPATH)/kernels.ptxh $(INCLUDEPATH)/sapporohostclass.h $(INCLUDEPATH)/sapdevclass.h $(INCLUDEPATH)/defines.h
|
||||||
|
$(CUDAKERNELSPATH)/kernels.ptx : $(INCLUDEPATH)/defines.h
|
||||||
|
|
||||||
|
libsapporo.a : sapporohostclass.o
|
||||||
|
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-
|
||||||
|
+.SUFFIXES: .o .cpp .ptx .cu
|
||||||
|
\ No newline at end of file
|
||||||
Loading…
Add table
Add a link
Reference in a new issue