phigrape-hybrid/sapporo2/patch.diff

--- lib/CUDAKernels/kernels.cu
+++ lib/CUDAKernels/kernels.cu
@@ -206,7 +206,7 @@
 // template<>           __device__ __forceinline__  double RSQRT(double val) { return 1.0/sqrt(val); }


-
+#if (CUDART_VERSION <= 8000)
 __device__ double atomicAdd(double* address, double val)
 {
     unsigned long long int* address_as_ull =
@@ -220,7 +220,7 @@
     } while (assumed != old);
     return __longlong_as_double(old);
 }
-
+#endif

 __device__ __forceinline__ double atomicMin(double *address, double val)
 {
--- lib/include/sapdevclass.h
+++ lib/include/sapdevclass.h
@@ -33,7 +33,10 @@

 #include <cassert>
 #include <iostream>
-#include <omp.h>
+#define omp_get_num_procs()   1
+#define omp_get_thread_num()  0
+#define omp_get_num_threads() 1
+#define omp_get_max_threads() 1

 #include <sys/time.h>

--- lib/interfaces/sapporoG6lib.cpp
+++ lib/interfaces/sapporoG6lib.cpp
@@ -1,3 +1,5 @@
+#include <cstdlib>
+#include <fstream>
 #include "sapporohostclass.h"

 sapporo grav;
@@ -10,11 +12,6 @@

 extern "C" {

-#ifdef _OCL_
-  const char *kernelFile = "OpenCL/kernels4th.cl";
-#else
-  const char *kernelFile = "CUDA/kernels.ptx";
-#endif
   double *dsmin_i;        //Distance of nearest neighbour

   double acc_i[3];      //To store the multiplied acc
@@ -29,6 +26,19 @@
     //devices to use. Otherwise they should be specified in the config file

     //Open the GPUs
+
+    char kernelFile[1024];
+    if (const char* kernelFile_env = std::getenv("SAPPORO2_KERNEL_FILE"))
+      strncpy(kernelFile, kernelFile_env, 1024);
+    else
+      strcpy(kernelFile, "CUDA/kernels.ptx");
+
+    std::ifstream file(kernelFile);
+    if (!file.is_open()) {
+      std::cout << "Kernel file " << kernelFile << " not found! Please set the SAPPORO2_KERNEL_FILE environment variable.";
+      exit(1);
+    }
+

     //Double single, default
     int res     = grav.open(kernelFile, list, ndev, FOURTH, DOUBLESINGLE);
@@ -45,34 +55,9 @@

   int g6_open_(int *id)
   {
-    //Check for a config file if its there use it
-    id = id;    //Make the compiler happy
-    int *devList = NULL;
-    int how_many = 0;
-    FILE *fd;
-    if ((fd = fopen("sapporo2.config", "r"))) {
-      char line[256];
-      fprintf(stderr, "sapporo2::open - config file is found\n");
-      if(fgets(line, 256, fd) != NULL)
-        sscanf(line, "%d", &how_many);
-
-      //Read the devices we want to use
-      if(how_many > 0)
-      {
-        devList = new int[how_many];
-        for (int i = 0; i < how_many; i++) {
-            if(fgets(line, 256, fd) != NULL)
-              sscanf(line, "%d", &devList[i]);
-        }
-      }
-    } else {
-      fprintf(stderr," sapporo2::open - no config file is found \n");
-      how_many = 0;
-    }
-    int res = g6_open_special(how_many, devList);
-
-    delete[] devList;
-
+    int devList = *id;
+    int how_many = 1;
+    int res = g6_open_special(how_many, &devList);
     return res;
   }

--- lib/Makefile
+++ lib/Makefile
@@ -1,84 +1,27 @@
-CXX = g++
-CC  = gcc
-LD  = g++
-F90  = ifort
-
-.SUFFIXES: .o .cpp .ptx .cu
-
-CUDA_TK  ?= /usr/local/cuda
-
-
-testRunFlags1=
-testRunFlags2=
-testRunFlags3=
-
-#Check for the defines
-
-ifdef NTHREADS
-	testRunFlags1="-D NTHREADS=$(NTHREADS)"
-	testRunFlags3="-D TIMING_STATS=1"
-endif
-
-ifdef NBLOCKS_PER_MULTI
-	testRunFlags2="-D NBLOCKS_PER_MULTI=$(NBLOCKS_PER_MULTI)"
-	testRunFlags3="-D TIMING_STATS=1"
-endif
-
-OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp
-
-#Use below if compiling with CPU_SUPPORT (SSE)
-#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4
-CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include
-
-testRunFlags= $(testRunFlags1) $(testRunFlags2) $(testRunFlags3)
-$(info $(testRunFlags))
-
-NVCC      = $(CUDA_TK)/bin/nvcc
-
-
-# Support older CUDA versions out of the box
-NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g')
-ifeq "${NVCCVERSION}" "V5.5.22"
-	NVCCFLAGS ?= -arch sm_20
-else
-	NVCCFLAGS ?= -arch sm_30
-endif
-
-#NVCCFLAGS = -arch sm_35
-#NVCCFLAGS ?= -arch sm_30
-#NVCCFLAGS = -arch sm_20
-NVCCFLAGS +=  ${testRunFlags}
-
-# Use with Mac OS X
-# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int"
-
-LDFLAGS = -lcuda -fopenmp
-
-
+NVCC     = nvcc
+GPUARCH ?= sm_75
+OPTIMIZATION ?= 3
+
+CUDA_HOME ?= /usr/local/cuda
+OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2
+CXXFLAGS += -fPIC $(OFLAGS) -I$(CUDA_HOME)/include
+NVCCFLAGS  = -arch $(GPUARCH) -ccbin $(CXX)
 INCLUDEPATH = ./include
 CXXFLAGS  += -I$(INCLUDEPATH) -I./
 NVCCFLAGS += -I$(INCLUDEPATH) -I./
-
 INTERFACEPATH =./interfaces
-
 CUDAKERNELSPATH = ./CUDAKernels
 CUDAKERNELS = kernels.cu
-
 CUDAPTX = $(CUDAKERNELS:%.cu=$(CUDAKERNELSPATH)/%.ptx)
-
 SRCPATH = src
-SRC = sapporohostclass.cpp sapporoG6lib.cpp sapporoYeblib.cpp sapporoG5lib.cpp sapporo6thlib.cpp
+SRC = sapporohostclass.cpp sapporoG6lib.cpp
 OBJ = $(SRC:%.cpp=%.o)
-
-LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o $(INTERFACEPATH)/sapporoYeblib.o
-LIBOBJ += $(INTERFACEPATH)/sapporoG5lib.o
+LIBOBJ = sapporohostclass.o $(INTERFACEPATH)/sapporoG6lib.o
 TARGET = libsapporo.a

-
 all:	  $(OBJ) $(CUDAPTX) $(TARGET)
 kernels:  $(CUDAPTX)

-
 $(TARGET): $(LIBOBJ)
 	ar qv $@ $^

@@ -99,16 +42,9 @@

 $(OBJ): $(INCLUDEPATH)/*.h

-
 sapporohostclass.o : $(INCLUDEPATH)/kernels.ptxh $(INCLUDEPATH)/sapporohostclass.h $(INCLUDEPATH)/sapdevclass.h $(INCLUDEPATH)/defines.h
 $(CUDAKERNELSPATH)/kernels.ptx : $(INCLUDEPATH)/defines.h

 libsapporo.a : sapporohostclass.o

-
-
-
-
-
-
-
+.SUFFIXES: .o .cpp .ptx .cu
\ No newline at end of file