147 lines
3.2 KiB
Diff
147 lines
3.2 KiB
Diff
diff -ruN
|
|
--- lib/CUDAKernels/kernels.cu
|
|
+++
|
|
@@ -206,7 +206,7 @@
|
|
// template<> __device__ __forceinline__ double RSQRT(double val) { return 1.0/sqrt(val); }
|
|
|
|
|
|
-
|
|
+#if (CUDART_VERSION <= 8000)
|
|
__device__ double atomicAdd(double* address, double val)
|
|
{
|
|
unsigned long long int* address_as_ull =
|
|
@@ -220,7 +220,7 @@
|
|
} while (assumed != old);
|
|
return __longlong_as_double(old);
|
|
}
|
|
-
|
|
+#endif
|
|
|
|
__device__ __forceinline__ double atomicMin(double *address, double val)
|
|
{
|
|
diff -ruN
|
|
--- lib/include/cudadev.h
|
|
+++
|
|
@@ -80,7 +80,7 @@
|
|
assert(false);\
|
|
} }
|
|
|
|
-#define cuSafeCall( call ) CU_SAFE_CALL_NO_SYNC(call);
|
|
+#define cuSafeCall( call ) call;
|
|
|
|
class context {
|
|
protected:
|
|
diff -ruN
|
|
--- lib/include/sapdevclass.h
|
|
+++
|
|
@@ -33,7 +33,10 @@
|
|
|
|
#include <cassert>
|
|
#include <iostream>
|
|
-#include <omp.h>
|
|
+#define omp_get_num_procs() 1
|
|
+#define omp_get_thread_num() 0
|
|
+#define omp_get_num_threads() 1
|
|
+#define omp_get_max_threads() 1
|
|
|
|
#include <sys/time.h>
|
|
|
|
diff -ruN
|
|
--- lib/interfaces/sapporoG6lib.cpp
|
|
+++
|
|
@@ -45,34 +45,9 @@
|
|
|
|
int g6_open_(int *id)
|
|
{
|
|
- //Check for a config file if its there use it
|
|
- id = id; //Make the compiler happy
|
|
- int *devList = NULL;
|
|
- int how_many = 0;
|
|
- FILE *fd;
|
|
- if ((fd = fopen("sapporo2.config", "r"))) {
|
|
- char line[256];
|
|
- fprintf(stderr, "sapporo2::open - config file is found\n");
|
|
- if(fgets(line, 256, fd) != NULL)
|
|
- sscanf(line, "%d", &how_many);
|
|
-
|
|
- //Read the devices we want to use
|
|
- if(how_many > 0)
|
|
- {
|
|
- devList = new int[how_many];
|
|
- for (int i = 0; i < how_many; i++) {
|
|
- if(fgets(line, 256, fd) != NULL)
|
|
- sscanf(line, "%d", &devList[i]);
|
|
- }
|
|
- }
|
|
- } else {
|
|
- fprintf(stderr," sapporo2::open - no config file is found \n");
|
|
- how_many = 0;
|
|
- }
|
|
- int res = g6_open_special(how_many, devList);
|
|
-
|
|
- delete[] devList;
|
|
-
|
|
+ int devList = *id;
|
|
+ int how_many = 1;
|
|
+ int res = g6_open_special(how_many, &devList);
|
|
return res;
|
|
}
|
|
|
|
diff -ruN
|
|
--- lib/Makefile
|
|
+++
|
|
@@ -1,11 +1,13 @@
|
|
-CXX = g++
|
|
-CC = gcc
|
|
-LD = g++
|
|
-F90 = ifort
|
|
+CXX ?= g++
|
|
+CC ?= gcc
|
|
+LD ?= g++
|
|
+F90 ?= ifort
|
|
|
|
.SUFFIXES: .o .cpp .ptx .cu
|
|
|
|
-CUDA_TK ?= /usr/local/cuda
|
|
+CUDAHOME ?= /usr/local/cuda
|
|
+CUDA_TK = $(CUDAHOME)
|
|
+OPTIMIZATION ?= 3
|
|
|
|
|
|
testRunFlags1=
|
|
@@ -24,7 +26,7 @@
|
|
testRunFlags3="-D TIMING_STATS=1"
|
|
endif
|
|
|
|
-OFLAGS = -g -O3 -Wall -Wextra -Wstrict-aliasing=2 -fopenmp
|
|
+OFLAGS = -g -O$(OPTIMIZATION) -Wall -Wextra -Wstrict-aliasing=2
|
|
|
|
#Use below if compiling with CPU_SUPPORT (SSE)
|
|
#CXXFLAGS += ${testRunFlags} -fPIC $(OFLAGS) -I$(CUDA_TK)/include -msse4
|
|
@@ -36,23 +38,14 @@
|
|
NVCC = $(CUDA_TK)/bin/nvcc
|
|
|
|
|
|
-# Support older CUDA versions out of the box
|
|
-NVCCVERSION=$(shell "${NVCC}" --version | grep ^Cuda | sed 's/^.* //g')
|
|
-ifeq "${NVCCVERSION}" "V5.5.22"
|
|
- NVCCFLAGS ?= -arch sm_20
|
|
-else
|
|
- NVCCFLAGS ?= -arch sm_30
|
|
-endif
|
|
-
|
|
-#NVCCFLAGS = -arch sm_35
|
|
-#NVCCFLAGS ?= -arch sm_30
|
|
-#NVCCFLAGS = -arch sm_20
|
|
+GPUARCH ?= sm_75
|
|
+NVCCFLAGS = -arch $(GPUARCH) -ccbin $(CXX)
|
|
NVCCFLAGS += ${testRunFlags}
|
|
|
|
# Use with Mac OS X
|
|
# NVCCFLAGS = -arch sm_12 -Xcompiler="-Duint=unsigned\ int"
|
|
|
|
-LDFLAGS = -lcuda -fopenmp
|
|
+LDFLAGS = -lcuda
|
|
|
|
|
|
INCLUDEPATH = ./include
|