added project files
This commit is contained in:
commit
1718e6287b
24 changed files with 3045 additions and 0 deletions
49
Makefile
Normal file
49
Makefile
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
# standard amuse configuration include
|
||||||
|
# config.mk will be made after ./configure has run
|
||||||
|
AMUSE_DIR?=../../../..
|
||||||
|
-include $(AMUSE_DIR)/config.mk
|
||||||
|
GPUARCH ?= 60
|
||||||
|
CUDAFLAGS += -arch=sm_$(GPUARCH)
|
||||||
|
######### ugly!!!! should be in the amuse base make file
|
||||||
|
|
||||||
|
|
||||||
|
MPICXX ?= mpicxx
|
||||||
|
|
||||||
|
CFLAGS += -Wall -g
|
||||||
|
CXXFLAGS += $(CFLAGS)
|
||||||
|
LDFLAGS += -lm $(MUSE_LD_FLAGS)
|
||||||
|
|
||||||
|
OBJS = interface.o
|
||||||
|
|
||||||
|
CODELIB = src/libetics.a
|
||||||
|
|
||||||
|
CODE_GENERATOR = $(AMUSE_DIR)/build.py
|
||||||
|
|
||||||
|
all: etics_worker
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(RM) *.so *.o *.pyc worker_code.cc worker_code.h
|
||||||
|
$(RM) *~ etics_worker worker_code.cc
|
||||||
|
make -C src clean
|
||||||
|
|
||||||
|
$(CODELIB):
|
||||||
|
make -C src libetics.a
|
||||||
|
|
||||||
|
worker_code.cc: interface.py
|
||||||
|
$(CODE_GENERATOR) --type=c interface.py EticsInterface -o $@
|
||||||
|
|
||||||
|
worker_code.h: interface.py
|
||||||
|
$(CODE_GENERATOR) --type=H interface.py EticsInterface -o $@
|
||||||
|
|
||||||
|
etics_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
|
||||||
|
$(MPICXX) $(CXXFLAGS) -L/usr/local/cuda/lib64 $< $(OBJS) $(CODELIB) -o $@ -lcudart
|
||||||
|
echo the above compilation directive is not nice
|
||||||
|
|
||||||
|
.SUFFIXES: .cu .o
|
||||||
|
|
||||||
|
.cu.o: $<
|
||||||
|
$(NVCC) -Xcompiler="$(CXXFLAGS)" -c -o $@ $<
|
||||||
|
|
||||||
|
|
||||||
|
# .cc.o: $<
|
||||||
|
# $(CXX) $(CXXFLAGS) -c -o $@ $<
|
||||||
102
README.md
Normal file
102
README.md
Normal file
|
|
@ -0,0 +1,102 @@
|
||||||
|
ETICS
|
||||||
|
=====
|
||||||
|
|
||||||
|
This is ETICS (Expansion Techniques In Collisionless Systems), a GPU (currently
|
||||||
|
**CUDA only**) N-body code which uses series expansion to calculate the
|
||||||
|
gravitational field. See more details in this publication:
|
||||||
|
|
||||||
|
Meiron, Y., Li, B., Holley-Bockelmann, K., & Spurzem, R. 2014, ApJ, 792, 98
|
||||||
|
|
||||||
|
http://adsabs.harvard.edu/abs/2014ApJ...792...98M
|
||||||
|
|
||||||
|
|
||||||
|
What's inside
|
||||||
|
-------------
|
||||||
|
|
||||||
|
- ETICS standalone program
|
||||||
|
- ETICS static library
|
||||||
|
- ETICS module for AMUSE
|
||||||
|
|
||||||
|
|
||||||
|
Prerequisites
|
||||||
|
-------------
|
||||||
|
|
||||||
|
- CUDA (>= 6; mandatory)
|
||||||
|
- HDF5 (optional)
|
||||||
|
- Boost (optional)
|
||||||
|
- AMUSE (mandatory only for AMUSE module)
|
||||||
|
|
||||||
|
To disable HDF5 [insert explanation here]
|
||||||
|
To disable Boost [insert explanation here]
|
||||||
|
|
||||||
|
|
||||||
|
Compilation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
### Standalone program
|
||||||
|
|
||||||
|
make standalone
|
||||||
|
|
||||||
|
builds in `src/`.
|
||||||
|
|
||||||
|
|
||||||
|
### Static library
|
||||||
|
|
||||||
|
make library
|
||||||
|
|
||||||
|
builds in `src/`.
|
||||||
|
|
||||||
|
|
||||||
|
### AMUSE module
|
||||||
|
|
||||||
|
make
|
||||||
|
|
||||||
|
builds in top level directory. The whole `etics` directory has to be placed (or
|
||||||
|
linked) inside:
|
||||||
|
|
||||||
|
$AMUSE_DIR/src/amuse/community/
|
||||||
|
|
||||||
|
|
||||||
|
How to use
|
||||||
|
----------
|
||||||
|
|
||||||
|
The `file.ini` is a self-explanatory input file; if compiled without Boost, fill
|
||||||
|
in the relevant variables in file `noboost.inc` which is compiled into the
|
||||||
|
executable (any change of parameters requires re-compilation). Start simulation
|
||||||
|
with:
|
||||||
|
|
||||||
|
./etics file.ini
|
||||||
|
|
||||||
|
Any input file name is acceptable.
|
||||||
|
|
||||||
|
|
||||||
|
Known issues
|
||||||
|
------------
|
||||||
|
|
||||||
|
* No MEX
|
||||||
|
|
||||||
|
The MEX (Multipole Expansion) method is not available in this version; thus, the
|
||||||
|
SCF (Self-Consistent Field) method is the only expansion technique available.
|
||||||
|
The ETICS program has been heavily restructured and the MEX routines are no
|
||||||
|
longer compatible. Hopefully this will be fixed.
|
||||||
|
|
||||||
|
* Hardcoded launch configuration
|
||||||
|
|
||||||
|
For at least one of the CUDA kernels, for various reasons, it seems that "brute
|
||||||
|
force" search is needed to find the optimal launch configuration. Currently it
|
||||||
|
is hardcoded, and a primitive search routine is provided.
|
||||||
|
|
||||||
|
* Problem for particles with |θ| << 1
|
||||||
|
|
||||||
|
Due to using an unstable recursion relation to calculate the Associated Legendre
|
||||||
|
polynomials, particles in a narrow cone around the z-axis cannot be considered
|
||||||
|
accurately. This means that they give an erroneous contribution to the
|
||||||
|
gravitational field and also are assigned erroneous force and potential. The
|
||||||
|
size of this cone increases with the angular term of the expansion. To partly
|
||||||
|
solve this, the current (ugly) fix is to only consider particles with cos(θ) >
|
||||||
|
0.999 at the monopole level. This is not so bad because the monopole is always
|
||||||
|
the most dominant term (and is error free) and the number of particles in this
|
||||||
|
cone is small and they are transient (i.e. they come out of it usually in a
|
||||||
|
small number of steps). A somewhat better solution is to make this arbitrary
|
||||||
|
cutoff of 0.999 l-dependent, and an even better solution would be to use an
|
||||||
|
asymptotic expression for the Associated Legendre polynomials.
|
||||||
26
WORKPLAN.md
Normal file
26
WORKPLAN.md
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
The idea is to be able to handle mergers and sinking galactic satellites by
|
||||||
|
having each subsystem centered on one (massless?) particle, and have the
|
||||||
|
expansion calculated separately in each center of mass system (only for
|
||||||
|
particles belonging to it). After the internal forces are calculated for each
|
||||||
|
subsystem, the mutual forces are calculated as well, and all the particles are
|
||||||
|
integrated in a global coordinate system.
|
||||||
|
|
||||||
|
Technically, most of the SCF module has to become a class. Some of the arrays
|
||||||
|
like RadCoeff and AngCoeff (anything else?) should remain global and would be
|
||||||
|
initialized by some global initialization routine. Then each subsystem would be
|
||||||
|
a different instance of the SCF class, with its own list of coefficients and
|
||||||
|
particle cache. It seems that all subsystems can (should?) share the integrator
|
||||||
|
object. Maybe (probably?) the integrator should be agnostic to how particles are
|
||||||
|
divided into subsystems, and the call to CalculateGravity() would take care of
|
||||||
|
everything. In this case we are talking about a class for the current SCF
|
||||||
|
routines but also some wrapper that knows how to CalculateGravity() by calling
|
||||||
|
the appropriate methods for each subsystem's object, and also work out the
|
||||||
|
mutual gravity. Need a good name for this intermediate level between the
|
||||||
|
integrator and the SCF core.
|
||||||
|
|
||||||
|
This intermediate level or even a different class would deal how to associate
|
||||||
|
particles to the different subsystems and how to figure out the initial
|
||||||
|
conditions. Also, the central particles might need some special treatment
|
||||||
|
(possibly the integrator considers them normal particles but this other level
|
||||||
|
knows not to use them for force calculation; I should think whether those should
|
||||||
|
be omitted from output, but it might be helpful to keep them).
|
||||||
1
__init__.py
Normal file
1
__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
# generated file
|
||||||
126
format-convert.py
Normal file
126
format-convert.py
Normal file
|
|
@ -0,0 +1,126 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# This script converts phiGRAPE snapshot files to HDF5.
|
||||||
|
# Author: Yohai Meiron
|
||||||
|
|
||||||
|
import argparse, os, sys, numpy, re
|
||||||
|
|
||||||
|
# Set commandline parameters
|
||||||
|
parser = argparse.ArgumentParser(description='Convert phiGRPAE snapshot files into HDF5 (h5part) format.')
|
||||||
|
parser.add_argument('InputFile', metavar='InputFile', type=str, help='first input file (numbers in the file name must represet numerical order)')
|
||||||
|
parser.add_argument('--num', type=int, help='number of snapshot to convert (by default, all are converted)')
|
||||||
|
parser.add_argument('-o', '--output', default='output', help='output file name (or pattern)')
|
||||||
|
parser.add_argument('--particles', type=int, help='number of particles (if not the whole thing)')
|
||||||
|
parser.add_argument('--maxfs', type=float, help='maximum size in MB of a single HDF5 output')
|
||||||
|
parser.add_argument('--single', action='store_true', help='use single precision instead of double')
|
||||||
|
parser.add_argument('--bh', type=int, nargs='?', const=1, default=0, help='number of black holes to grab from the end of the file')
|
||||||
|
|
||||||
|
# Print message and parse input
|
||||||
|
print 'Welcome to Yohai\'s format converter! (phiGRAPE ---> HDF5)\n'
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Parameter conflict tests
|
||||||
|
if (args.bh > 0) and (args.particles==None):
|
||||||
|
parser.print_usage()
|
||||||
|
print '%s: error: cannot --bh without --particles' % sys.argv[0]
|
||||||
|
sys.exit(2)
|
||||||
|
Nbh = args.bh
|
||||||
|
|
||||||
|
# Verify h5py
|
||||||
|
try:
|
||||||
|
import h5py
|
||||||
|
except:
|
||||||
|
print '%s: error: booooo, can\'t find h5py.' % sys.argv[0]
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Check if input file exists, separate name from path and get file name pattern
|
||||||
|
if not os.path.exists(args.InputFile):
|
||||||
|
print 'File `%s` not found!' % args.InputFile
|
||||||
|
sys.exit(1)
|
||||||
|
InputFileName = os.path.basename(args.InputFile)
|
||||||
|
InputFilePath = os.path.dirname(os.path.realpath(args.InputFile))
|
||||||
|
Pattern = re.sub(r'\d', r'\d', InputFileName)
|
||||||
|
Pattern = '^' + Pattern + "$"
|
||||||
|
r = re.compile(Pattern)
|
||||||
|
|
||||||
|
# Look for the other files
|
||||||
|
ListDir = os.listdir(InputFilePath)
|
||||||
|
FileList = []
|
||||||
|
for Filename in ListDir:
|
||||||
|
m = r.search(Filename)
|
||||||
|
if not m is None:
|
||||||
|
FileList.append(Filename)
|
||||||
|
FileList.sort()
|
||||||
|
FileList = FileList[FileList.index(InputFileName):]
|
||||||
|
|
||||||
|
# Verify that enough files were found; shorten list if needed
|
||||||
|
if (not args.num==None) and args.num > len(FileList):
|
||||||
|
print 'Requested to convert %d files, but only %d found!' % (args.num, len(FileList))
|
||||||
|
sys.exit(1)
|
||||||
|
if (not args.num==None): num = args.num
|
||||||
|
else: num = len(FileList)
|
||||||
|
FileList = FileList[:num]
|
||||||
|
|
||||||
|
print 'Will convert %d files in the directory `%s`.' % (num, InputFilePath)
|
||||||
|
|
||||||
|
NumperOfStepsPerFile = 2147483647
|
||||||
|
OutputFileNum = 2
|
||||||
|
StepCounter = 0
|
||||||
|
FirstTime = True
|
||||||
|
|
||||||
|
for Filename in FileList:
|
||||||
|
if StepCounter > NumperOfStepsPerFile:
|
||||||
|
OutputFile.close()
|
||||||
|
OutputFile = h5py.File('output(%d).h5part' % OutputFileNum, 'w')
|
||||||
|
StepCounter = 0
|
||||||
|
OutputFileNum += 1
|
||||||
|
FullFilename = os.path.join(InputFilePath, Filename)
|
||||||
|
with open(FullFilename, 'r') as File:
|
||||||
|
Line = File.readline()
|
||||||
|
try:
|
||||||
|
StepNumer = int(Line)
|
||||||
|
except:
|
||||||
|
print 'Error to read snapshot number (line 1 in file `%s`)' % Filename
|
||||||
|
sys.exit(1)
|
||||||
|
File.readline()
|
||||||
|
Line = File.readline()
|
||||||
|
try:
|
||||||
|
Time = numpy.double(Line)
|
||||||
|
except:
|
||||||
|
print 'Error to read snapshot time (line 3 in file `%s`)' % Filename
|
||||||
|
sys.exit(1)
|
||||||
|
Data = numpy.loadtxt(FullFilename, skiprows=3)
|
||||||
|
if (not args.particles==None) and (args.particles > Data.shape[0]):
|
||||||
|
print 'Requested to use %d paticles, but file %s has only %d!' % (args.particles, Filename, Data.shape[0])
|
||||||
|
sys.exit(1)
|
||||||
|
if (not args.particles==None): N = args.particles
|
||||||
|
else: N = Data.shape[0]
|
||||||
|
if FirstTime:
|
||||||
|
if Nbh == 0: print 'Reading %d particles.' % (N)
|
||||||
|
else: print 'Reading %d normal particles + %d black hole(s).' % (N, Nbh)
|
||||||
|
if args.single: EstimatedDataSize = 32*N/1024.0**2
|
||||||
|
else: EstimatedDataSize = 60*N/1024.0**2
|
||||||
|
if not args.maxfs==None: NumperOfStepsPerFile = (int)(args.maxfs/EstimatedDataSize)
|
||||||
|
FirstTime = False
|
||||||
|
if NumperOfStepsPerFile > num: OutputFile = h5py.File('output.h5part', 'w')
|
||||||
|
else: OutputFile = h5py.File('output(1).h5part', 'w')
|
||||||
|
|
||||||
|
DataBH = Data[-Nbh-1:-1]
|
||||||
|
Data = Data[:N]
|
||||||
|
Data = numpy.vstack((Data, DataBH))
|
||||||
|
Group = OutputFile.create_group('Step#%d' % (StepNumer))
|
||||||
|
Group.attrs['Time'] = Time
|
||||||
|
Group.attrs['TotalN'] = N + Nbh
|
||||||
|
if args.single: T='f'
|
||||||
|
else: T='d'
|
||||||
|
DataSet = Group.create_dataset('ID', (N+Nbh,), dtype='i'); DataSet[...] = Data[:,0]
|
||||||
|
DataSet = Group.create_dataset('Mass', (N+Nbh,), dtype=T); DataSet[...] = Data[:,1]
|
||||||
|
DataSet = Group.create_dataset('X', (N+Nbh,), dtype=T); DataSet[...] = Data[:,2]
|
||||||
|
DataSet = Group.create_dataset('Y', (N+Nbh,), dtype=T); DataSet[...] = Data[:,3]
|
||||||
|
DataSet = Group.create_dataset('Z', (N+Nbh,), dtype=T); DataSet[...] = Data[:,4]
|
||||||
|
DataSet = Group.create_dataset('VX', (N+Nbh,), dtype=T); DataSet[...] = Data[:,5]
|
||||||
|
DataSet = Group.create_dataset('VY', (N+Nbh,), dtype=T); DataSet[...] = Data[:,6]
|
||||||
|
DataSet = Group.create_dataset('VZ', (N+Nbh,), dtype=T); DataSet[...] = Data[:,7]
|
||||||
|
OutputFile.flush()
|
||||||
|
print 'Completed /Step#%d' % (StepNumer)
|
||||||
|
StepCounter += 1
|
||||||
|
OutputFile.close()
|
||||||
378
interface.cu
Normal file
378
interface.cu
Normal file
|
|
@ -0,0 +1,378 @@
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "src/common.hpp"
|
||||||
|
#include "src/scf.hpp"
|
||||||
|
#include "src/integrate.hpp"
|
||||||
|
#include <thrust/host_vector.h>
|
||||||
|
#include <thrust/device_vector.h>
|
||||||
|
#include <thrust/inner_product.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace etics;
|
||||||
|
|
||||||
|
Integrator IntegratorObj;
|
||||||
|
|
||||||
|
// GLOBAL VARIABLES
|
||||||
|
Real ConstantStep = 0.001953125;
|
||||||
|
Real T, Step, dT1, dT2, Tcrit;
|
||||||
|
int N;
|
||||||
|
extern Real mass;
|
||||||
|
extern int k3gs, k3bs, k4gs, k4bs;
|
||||||
|
|
||||||
|
/*extern*/ Particle *hostP;
|
||||||
|
thrust::host_vector<Particle> PPP;
|
||||||
|
/*extern*/ thrust::device_vector<Particle> PPPPP;
|
||||||
|
|
||||||
|
// /*extern*/ thrust::device_vector<vec3> F0xxxxxx;
|
||||||
|
// /*extern*/ thrust::device_vector<Real> PotPotPot; // ugly name
|
||||||
|
// /*extern*/ thrust::device_vector<vec3> F1xxxxxx;
|
||||||
|
/*extern*/ vec3 *F1_ptr;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// extern Particle *P_h;
|
||||||
|
// extern thrust::device_vector<Particle> P;
|
||||||
|
//
|
||||||
|
// extern thrust::device_vector<vec3> F0;
|
||||||
|
// extern thrust::device_vector<Real> Potential;
|
||||||
|
// extern thrust::device_vector<vec3> F1;
|
||||||
|
|
||||||
|
|
||||||
|
void CommitParticles();
|
||||||
|
// void InitSCF(int N);
|
||||||
|
// void ForceSCF(int N, Real *Potential, Particle *PPPPP, vec3 *F);
|
||||||
|
void DriftStep();
|
||||||
|
void KickStep();
|
||||||
|
void CommitForces();
|
||||||
|
int InitilizeIntegratorMemory();
|
||||||
|
#define PTR(x) (thrust::raw_pointer_cast((x).data()))
|
||||||
|
|
||||||
|
|
||||||
|
int initialize_code() {
|
||||||
|
#warning initscf should be here!!! just the problem is that N is requires, so fix it
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int recommit_parameters() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int commit_parameters() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int new_particle(int *id, double mass, double x, double y, double z, double vx, double vy, double vz, double radius) {
|
||||||
|
Particle p;
|
||||||
|
p.m = mass;
|
||||||
|
p.pos = vec3(x, y, z);
|
||||||
|
p.vel = vec3(vx, vy, vz);
|
||||||
|
PPP.push_back(p);
|
||||||
|
*id = N;
|
||||||
|
N++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int commit_particles() {
|
||||||
|
// cerr << "calling commit_particles" << endl;
|
||||||
|
cerr << "we did commit_particles()" << endl;
|
||||||
|
etics::scf::Init(N, 180, 64, 2605, 384);
|
||||||
|
#warning hardcoded launch configuration
|
||||||
|
IntegratorObj = Integrator(&PPP[0], N);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CenterMassFunctor {
|
||||||
|
Real ConstantMass;
|
||||||
|
__host__ __device__ CenterMassFunctor(Real _ConstantMass) : ConstantMass(_ConstantMass) {}
|
||||||
|
__host__ __device__ vec3 operator() (const Particle &p) const {return p.pos;}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ShiftFunctor {
|
||||||
|
vec3 Shift;
|
||||||
|
__host__ __device__ ShiftFunctor(vec3 _Shift) : Shift(_Shift) {}
|
||||||
|
__host__ __device__ Particle operator() (Particle &p) const {
|
||||||
|
p.pos += Shift;
|
||||||
|
p.CalculateR2();
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int counttt = 0;
|
||||||
|
bool FirstStep = true;
|
||||||
|
int evolve_model(double t) {
|
||||||
|
// PPPPP = PPP;
|
||||||
|
cerr << "call evolve_model t_end = " << t << " dt = " << t - T << "****************" << endl;
|
||||||
|
|
||||||
|
// vec3 CenterMass = thrust::transform_reduce(PPPPP.begin(), PPPPP.end(), CenterMassFunctor(mass), vec3(0,0,0), thrust::plus<vec3>());
|
||||||
|
// CenterMass = CenterMass * (1.0/N); //ugly should divide by the total mass
|
||||||
|
// cerr << "CENTER OF MASS " << CenterMass.x << endl;
|
||||||
|
//
|
||||||
|
// // thrust::transform(PPPPP.begin(), PPPPP.end(), PPPPP.begin(), ShiftFunctor(-CenterMass));
|
||||||
|
//
|
||||||
|
// vec3 CenterMass2 = thrust::transform_reduce(PPPPP.begin(), PPPPP.end(), CenterMassFunctor(mass), vec3(0,0,0), thrust::plus<vec3>());
|
||||||
|
// CenterMass2 = CenterMass2 * (1.0/N); //ugly should divide by the total mass
|
||||||
|
// cerr << "CENTER OF MASS after correction " << CenterMass2.x << endl;
|
||||||
|
//
|
||||||
|
|
||||||
|
Step = ConstantStep;
|
||||||
|
while (T <= t) {
|
||||||
|
// Take the drift step.
|
||||||
|
IntegratorObj.DriftStep(Step);
|
||||||
|
|
||||||
|
// Calculate the forces in the new positions.
|
||||||
|
// ForceSCF(N, PTR(PotPotPot), PTR(PPPPP), PTR(F1xxxxxx));
|
||||||
|
IntegratorObj.CalculateGravity();
|
||||||
|
|
||||||
|
// Finish by taking the kick step.
|
||||||
|
// The kick functor also "commits" the predicted forces into the "acc" member.
|
||||||
|
IntegratorObj.KickStep(Step);
|
||||||
|
|
||||||
|
// N particles were implicitly propagated in this iteration.
|
||||||
|
|
||||||
|
// Advance global time.
|
||||||
|
T += Step;
|
||||||
|
}
|
||||||
|
//
|
||||||
|
// vec3 CenterMass3 = thrust::transform_reduce(PPPPP.begin(), PPPPP.end(), CenterMassFunctor(mass), vec3(0,0,0), thrust::plus<vec3>());
|
||||||
|
// CenterMass3 = CenterMass3 * (1.0/N); //ugly should divide by the total mass
|
||||||
|
// cerr << "CENTER OF MASS after evolve " << CenterMass3.x << endl;
|
||||||
|
//
|
||||||
|
// cerr << "done evolve; transform" << endl;
|
||||||
|
// // thrust::transform(PPPPP.begin(), PPPPP.end(), PPPPP.begin(), ShiftFunctor(+CenterMass)); // antishift
|
||||||
|
//
|
||||||
|
// vec3 CenterMass4 = thrust::transform_reduce(PPPPP.begin(), PPPPP.end(), CenterMassFunctor(mass), vec3(0,0,0), thrust::plus<vec3>());
|
||||||
|
// CenterMass4 = CenterMass4 * (1.0/N); //ugly should divide by the total mass
|
||||||
|
// cerr << "CENTER OF MASS after antishift " << CenterMass4.x << endl;
|
||||||
|
//
|
||||||
|
// cerr << "done transform; download to RAM" << endl;
|
||||||
|
IntegratorObj.CopyParticlesToHost(&PPP[0]);
|
||||||
|
//
|
||||||
|
// cerr << "done download; return" << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_begin_time(double time_begin) {
|
||||||
|
// cerr << "called set_begin_time(" << time_begin << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int get_begin_time(double *time_begin) {
|
||||||
|
*time_begin = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_mass(int index_of_the_particle, double *mass) {
|
||||||
|
*mass = PPP[index_of_the_particle].m;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_time(double *time) {
|
||||||
|
*time = T;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_mass(int index_of_the_particle, double mass) {
|
||||||
|
// cerr << "calling set_mass" << endl;
|
||||||
|
PPP[index_of_the_particle].m = mass;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_index_of_first_particle(int *index_of_the_particle) {
|
||||||
|
// cerr << "calling get_index_of_first_particle" << endl;
|
||||||
|
*index_of_the_particle = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_total_radius(double *radius) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_potential_at_point(double soft, double x, double y, double z, double *phi) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_total_mass(double *mass) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_eps2(double epsilon_squared) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_eps2(double *epsilon_squared) {
|
||||||
|
*epsilon_squared = 0;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_number_of_particles(int *number_of_particles) {
|
||||||
|
// cerr << "calling get_number_of_particles" << endl;
|
||||||
|
*number_of_particles = PPP.size();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_index_of_next_particle(int index_of_the_particle, int *index_of_the_next_particle) {
|
||||||
|
*index_of_the_next_particle = index_of_the_particle + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int delete_particle(int index_of_the_particle) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_potential(int index_of_the_particle, double *potential) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int synchronize_model() {
|
||||||
|
// cerr << "calling synchronize_model" << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_state(int index_of_the_particle, double mass, double radius, double x, double y, double z, double vx, double vy, double vz) {
|
||||||
|
cerr << "calling set_state" << endl;
|
||||||
|
// cerr << "calling set_state" << endl;
|
||||||
|
PPP[index_of_the_particle].pos = vec3(x, y, z);
|
||||||
|
PPP[index_of_the_particle].vel = vec3(vx, vy, vz);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_state(int index_of_the_particle, double *mass, double *radius, double *x, double *y, double *z, double *vx, double *vy, double *vz) {
|
||||||
|
// cerr << "calling get_state" << endl;
|
||||||
|
Particle p = PPP[index_of_the_particle];
|
||||||
|
*mass = index_of_the_particle;
|
||||||
|
*x = p.pos.x;
|
||||||
|
*y = p.pos.y;
|
||||||
|
*z = p.pos.z;
|
||||||
|
*vx = p.vel.x;
|
||||||
|
*vy = p.vel.y;
|
||||||
|
*vz = p.vel.z;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_time_step(double *time_step) {
|
||||||
|
// cerr << "calling get_time_step" << endl;
|
||||||
|
*time_step = ConstantStep;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_time_step(double time_step) {
|
||||||
|
cerr << "calling set_time_step" << endl;
|
||||||
|
ConstantStep = time_step;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_launch_config(int **launch_config) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_launch_config(int *launch_config) {
|
||||||
|
// k3gs = launch_config[0];
|
||||||
|
// k3bs = launch_config[1];
|
||||||
|
// k4gs = launch_config[2];
|
||||||
|
// k4bs = launch_config[3];
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int recommit_particles() {
|
||||||
|
// cerr << "calling recommit_particles" << endl;
|
||||||
|
#warning put something here
|
||||||
|
cerr << "hhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" << endl;
|
||||||
|
PPPPP = PPP;
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_acceleration(int index_of_the_particle, double ax, double ay, double az) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_center_of_mass_position(double *x, double *y, double *z) {
|
||||||
|
// vec3 CenterMass = thrust::transform_reduce(PPPPP.begin(), PPPPP.end(), CenterMassFunctor(mass), vec3(0,0,0), thrust::plus<vec3>());
|
||||||
|
// CenterMass = CenterMass * (1.0/N); //ugly should divide by the total mass
|
||||||
|
// *x = CenterMass.x;
|
||||||
|
// *y = CenterMass.y;
|
||||||
|
// *z = CenterMass.z;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_center_of_mass_velocity(double *vx, double *vy, double *vz) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_radius(int index_of_the_particle, double *radius) {
|
||||||
|
*radius = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_radius(int index_of_the_particle, double radius) {
|
||||||
|
// should store the radius somewhere but completely ignored by code
|
||||||
|
// cerr << "calling set_radius" << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cleanup_code() {
|
||||||
|
IntegratorObj.~Integrator();
|
||||||
|
cerr << "bye" << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_gravity_at_point(double soft, double x, double y, double z, double *forcex, double *forcey, double *forcez) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_velocity(int index_of_the_particle, double *vx, double *vy, double *vz) {
|
||||||
|
*vx = PPP[index_of_the_particle].vel.x;
|
||||||
|
*vy = PPP[index_of_the_particle].vel.y;
|
||||||
|
*vz = PPP[index_of_the_particle].vel.z;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_position(int index_of_the_particle, double *x, double *y, double *z) {
|
||||||
|
*x = PPP[index_of_the_particle].pos.x;
|
||||||
|
*y = PPP[index_of_the_particle].pos.y;
|
||||||
|
*z = PPP[index_of_the_particle].pos.z;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool already_printed = false;
|
||||||
|
|
||||||
|
int set_position(int index_of_the_particle, double x, double y, double z) {
|
||||||
|
if (already_printed == false) {
|
||||||
|
cerr << "calling set_position" << endl;
|
||||||
|
cerr << "---------index_of_the_particle=" << index_of_the_particle << endl;
|
||||||
|
cerr << "--------- x" << PPP[index_of_the_particle].pos.x << "--->" << x << endl;
|
||||||
|
cerr << "--------- y" << PPP[index_of_the_particle].pos.y << "--->" << y << endl;
|
||||||
|
cerr << "--------- z" << PPP[index_of_the_particle].pos.z << "--->" << z << endl;
|
||||||
|
already_printed = true;
|
||||||
|
}
|
||||||
|
PPP[index_of_the_particle].pos = vec3(x, y, z);
|
||||||
|
counttt++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_acceleration(int index_of_the_particle, double *ax, double *ay, double *az) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int set_velocity(int index_of_the_particle, double vx, double vy, double vz) {
|
||||||
|
// cerr << "calling set_velocity" << endl;
|
||||||
|
PPP[index_of_the_particle].vel = vec3(vx, vy, vz);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int get_kinetic_energy(double *kinetic_energy) {
|
||||||
|
*kinetic_energy = IntegratorObj.KineticEnergy();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_potential_energy(double *potential_energy) {
|
||||||
|
*potential_energy = IntegratorObj.PotentialEnergy();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int update_force_potential_arrays(double tttt) {
|
||||||
|
#warning time shouldnt be a parameter to this one
|
||||||
|
// ForceSCF(N, PTR(PotPotPot), PTR(PPPPP), PTR(F0xxxxxx));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
160
interface.py
Normal file
160
interface.py
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
from amuse.community import *
|
||||||
|
from amuse.community.interface.gd import GravitationalDynamics
|
||||||
|
from amuse.community.interface.gd import GravitationalDynamicsInterface
|
||||||
|
|
||||||
|
class EticsInterface(CodeInterface, GravitationalDynamicsInterface, LiteratureReferencesMixIn):
|
||||||
|
"""
|
||||||
|
.. [#] Meiron, Y., Li, B., Holley-Bockelmann, K., & Spurzem, R. 2014, ApJ, 792, 98:
|
||||||
|
.. [#] ... "Expansion techniques for collisionless stellar dynamical simulations"
|
||||||
|
"""
|
||||||
|
|
||||||
|
include_headers = ['worker_code.h']
|
||||||
|
|
||||||
|
def __init__(self, **keyword_arguments):
|
||||||
|
CodeInterface.__init__(self, name_of_the_worker='etics_worker', **keyword_arguments)
|
||||||
|
LiteratureReferencesMixIn.__init__(self)
|
||||||
|
|
||||||
|
@legacy_function
|
||||||
|
def new_particle():
|
||||||
|
function = LegacyFunctionSpecification()
|
||||||
|
function.can_handle_array = True
|
||||||
|
function.addParameter('index_of_the_particle', dtype='int32', direction=function.OUT)
|
||||||
|
function.addParameter('mass', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The mass of the particle')
|
||||||
|
function.addParameter('x', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('y', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('z', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('vx', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.addParameter('vy', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.addParameter('vz', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.addParameter('radius', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The radius of the particle', default = 0)
|
||||||
|
function.result_type = 'int32'
|
||||||
|
return function
|
||||||
|
|
||||||
|
@legacy_function
|
||||||
|
def set_state():
|
||||||
|
function = LegacyFunctionSpecification()
|
||||||
|
function.can_handle_array = True
|
||||||
|
function.addParameter('index_of_the_particle', dtype='int32', direction=function.IN)
|
||||||
|
function.addParameter('mass', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The mass of the particle')
|
||||||
|
function.addParameter('radius', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The radius of the particle')
|
||||||
|
function.addParameter('x', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('y', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('z', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('vx', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.addParameter('vy', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.addParameter('vz', dtype='float64', direction=function.IN,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.result_type = 'int32'
|
||||||
|
return function
|
||||||
|
|
||||||
|
@legacy_function
|
||||||
|
def get_state():
|
||||||
|
function = LegacyFunctionSpecification()
|
||||||
|
function.can_handle_array = True
|
||||||
|
function.addParameter('index_of_the_particle', dtype='int32', direction=function.IN)
|
||||||
|
function.addParameter('mass', dtype='float64', direction=function.OUT,
|
||||||
|
description = 'The mass of the particle')
|
||||||
|
function.addParameter('radius', dtype='float64', direction=function.OUT,
|
||||||
|
description = 'The radius of the particle')
|
||||||
|
function.addParameter('x', dtype='float64', direction=function.OUT,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('y', dtype='float64', direction=function.OUT,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('z', dtype='float64', direction=function.OUT,
|
||||||
|
description = 'The initial position vector of the particle')
|
||||||
|
function.addParameter('vx', dtype='float64', direction=function.OUT,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.addParameter('vy', dtype='float64', direction=function.OUT,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.addParameter('vz', dtype='float64', direction=function.OUT,
|
||||||
|
description = 'The initial velocity vector of the particle')
|
||||||
|
function.result_type = 'int32'
|
||||||
|
return function
|
||||||
|
|
||||||
|
@legacy_function
|
||||||
|
def evolve_model():
|
||||||
|
function = LegacyFunctionSpecification()
|
||||||
|
function.can_handle_array = True
|
||||||
|
function.addParameter('dt', dtype='float64', direction=function.IN)
|
||||||
|
function.result_type = 'int32'
|
||||||
|
return function
|
||||||
|
|
||||||
|
@legacy_function
|
||||||
|
def get_number_of_particles():
|
||||||
|
function = LegacyFunctionSpecification()
|
||||||
|
function.addParameter('number_of_particles', dtype='int32', direction=function.OUT, description = 'number of particles')
|
||||||
|
function.result_type = 'int32'
|
||||||
|
return function
|
||||||
|
|
||||||
|
@legacy_function
|
||||||
|
def set_time_step():
|
||||||
|
function = LegacyFunctionSpecification()
|
||||||
|
function.addParameter('time_step', dtype='float64', direction=function.IN, description = 'time step')
|
||||||
|
function.result_type = 'int32'
|
||||||
|
return function
|
||||||
|
|
||||||
|
@legacy_function
|
||||||
|
def update_force_potential_arrays():
|
||||||
|
function = LegacyFunctionSpecification()
|
||||||
|
function.can_handle_array = True
|
||||||
|
function.addParameter('dt', dtype='float64', direction=function.IN)
|
||||||
|
function.result_type = 'int32'
|
||||||
|
return function
|
||||||
|
|
||||||
|
class Etics(GravitationalDynamics):
|
||||||
|
def __init__(self, convert_nbody = None, **keyword_arguments):
|
||||||
|
legacy_interface = EticsInterface(**keyword_arguments)
|
||||||
|
GravitationalDynamics.__init__(self, legacy_interface, convert_nbody, **keyword_arguments)
|
||||||
|
|
||||||
|
def define_parameters(self, object):
|
||||||
|
object.add_method_parameter(
|
||||||
|
'get_begin_time',
|
||||||
|
'set_begin_time',
|
||||||
|
'begin_time',
|
||||||
|
'model time to start the simulation at',
|
||||||
|
default_value = 0.0 | nbody_system.time
|
||||||
|
)
|
||||||
|
object.add_method_parameter(
|
||||||
|
'get_time_step',
|
||||||
|
'set_time_step',
|
||||||
|
'time_step',
|
||||||
|
'constant timestep for iteration',
|
||||||
|
default_value = 0.001953125 | nbody_system.time
|
||||||
|
)
|
||||||
|
|
||||||
|
def define_methods(self, object):
|
||||||
|
GravitationalDynamics.define_methods(self, object)
|
||||||
|
# Define some shortcuts for better readability.
|
||||||
|
M = nbody_system.mass
|
||||||
|
L = nbody_system.length
|
||||||
|
V = nbody_system.speed
|
||||||
|
T = nbody_system.time
|
||||||
|
object.add_method('new_particle', (M,L,L,L,V,V,V,L), (object.INDEX, object.ERROR_CODE))
|
||||||
|
object.add_method('set_state', (object.INDEX, M,L,L,L,L,V,V,V), (object.ERROR_CODE))
|
||||||
|
object.add_method('get_state', (object.INDEX), (M,L,L,L,L,V,V,V, object.ERROR_CODE))
|
||||||
|
object.add_method('set_time_begin', (T), (object.ERROR_CODE))
|
||||||
|
object.add_method('get_time_begin', (), (T, object.ERROR_CODE))
|
||||||
|
object.add_method('get_number_of_particles', (), (units.none, object.ERROR_CODE))
|
||||||
|
object.add_method('get_time_step', (), (T, object.ERROR_CODE))
|
||||||
|
object.add_method('set_time_step', (T), (object.ERROR_CODE))
|
||||||
|
object.add_method('update_force_potential_arrays', (T), (object.ERROR_CODE))
|
||||||
|
|
||||||
|
def define_particle_sets(self, object):
|
||||||
|
GravitationalDynamics.define_particle_sets(self, object)
|
||||||
|
|
||||||
|
|
||||||
72
src/Makefile
Normal file
72
src/Makefile
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
ETICS_LMAX ?= 2
|
||||||
|
ETICS_NMAX ?= 10
|
||||||
|
GPUARCH ?= sm_75
|
||||||
|
CUDAHOME ?= /usr/local/cuda
|
||||||
|
OPTIMIZATION ?= 3
|
||||||
|
|
||||||
|
#Optional CPPFLAGS: -DETICS_HDF5 -DETICS_BOOST -DETICS_MPI
|
||||||
|
#Note that -DETICS_MPI is /not/ needed if ETICS is used as a library
|
||||||
|
CPPFLAGS = -DSCF -DLMAX=$(ETICS_LMAX) -DNMAX=$(ETICS_NMAX)
|
||||||
|
|
||||||
|
CXX ?= g++
|
||||||
|
NVCC ?= $(CUDAHOME)/bin/nvcc
|
||||||
|
LIB += -lm -L$(CUDAHOME)/lib64 -lcudadevrt -lcudart -lcuda
|
||||||
|
CXXFLAGS += -O$(OPTIMIZATION)
|
||||||
|
CUDAFLAGS += -arch=$(GPUARCH)
|
||||||
|
|
||||||
|
# If -DETICS_HDF5 is used, please specify the following
|
||||||
|
#H5HOME = /home/meiron/local
|
||||||
|
#INC += -I$(H5HOME)/include
|
||||||
|
#LIB += -L$(H5HOME)/lib $(H5HOME)/lib/libhdf5_hl_cpp.a $(H5HOME)/lib/libhdf5_cpp.a $(H5HOME)/lib/libhdf5_hl.a $(H5HOME)/lib/libhdf5.a -lz -ldl -lm
|
||||||
|
#LDFLAGS += -Wl,-rpath -Wl,$(H5HOME)/lib
|
||||||
|
|
||||||
|
# If -DETICS_BOOST is used, please specify the following
|
||||||
|
#BOOSTHOME ?= /home/meiron/local
|
||||||
|
#INC += -I$(BOOSTHOME)/include
|
||||||
|
|
||||||
|
# If -DETICS_MPI is used, please specify the following
|
||||||
|
#MPIHOME ?= /home/meiron/local
|
||||||
|
#INC += -I$(MPIHOME)/include
|
||||||
|
#LIB += -L$(MPIHOME)/lib -lmpi
|
||||||
|
#CXXFLAGS += -pthread
|
||||||
|
#LDFLAGS += -Wl,-rpath -Wl,$(MPIHOME) -Wl,--enable-new-dtags
|
||||||
|
|
||||||
|
CODELIB = libetics.a
|
||||||
|
|
||||||
|
# object files needed for the library and the standalone
|
||||||
|
CODEOBJS1 = integrate.o mathaux.o scf.o ic.o
|
||||||
|
|
||||||
|
# object files only needed for the standalone
|
||||||
|
CODEOBJS2 = io.o main.o
|
||||||
|
|
||||||
|
AR = ar ruv
|
||||||
|
RANLIB = ranlib
|
||||||
|
|
||||||
|
all: $(CODELIB) standalone
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(RM) *.o *.a etics
|
||||||
|
|
||||||
|
$(CODELIB): $(CODEOBJS1)
|
||||||
|
$(NVCC) $(CUDAFLAGS) -dlink $(CODEOBJS1) -o dlink.o
|
||||||
|
$(AR) $@ $(CODEOBJS1) dlink.o
|
||||||
|
$(RANLIB) $@
|
||||||
|
|
||||||
|
library: $(CODELIB)
|
||||||
|
|
||||||
|
standalone: $(CODEOBJS1) $(CODEOBJS2)
|
||||||
|
echo -e "\e[31mWARNING! \e[0mThe standalone program has not been tested for a long time. Use extreme caution."
|
||||||
|
$(NVCC) $(CUDAFLAGS) -dlink $(CODEOBJS1) $(CODEOBJS2) -o dlink.o
|
||||||
|
$(CXX) $(CXXFLAGS) -o etics $(CODEOBJS1) $(CODEOBJS2) dlink.o $(LIB) $(LDFLAGS)
|
||||||
|
|
||||||
|
.SUFFIXES: .cu .cpp .o
|
||||||
|
|
||||||
|
# only io.cpp should be compiled with all the weird HDF5 flags
|
||||||
|
io.o: io.cpp
|
||||||
|
$(CXX) $(CPPFLAGS) $(INC) $(CXXFLAGS) -c -o io.o io.cpp
|
||||||
|
|
||||||
|
.cu.o: $<
|
||||||
|
$(NVCC) $(CPPFLAGS) $(INC) -Xcompiler "$(CXXFLAGS)" $(CUDAFLAGS) -dc -o $@ $<
|
||||||
|
|
||||||
|
.cpp.o: $<
|
||||||
|
$(CXX) $(CPPFLAGS) $(INC) $(CXXFLAGS) -c -o $@ $<
|
||||||
117
src/common.hpp
Normal file
117
src/common.hpp
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#ifndef __CUDACC__
|
||||||
|
#define __host__
|
||||||
|
#define __device__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined ETICS_SINGLE_PRECISION && defined ETICS_DOUBLE_PRECISION
|
||||||
|
#error Contradictory precision flags!
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ETICS_SINGLE_PRECISION
|
||||||
|
#define ETICS_DOUBLE_PRECISION
|
||||||
|
#define Real double
|
||||||
|
#define MPI_ETICS_REAL MPI_DOUBLE
|
||||||
|
#else
|
||||||
|
#define Real float
|
||||||
|
#define MPI_ETICS_REAL MPI_FLOAT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined MEX && defined SCF
|
||||||
|
#error Contradictory method flags!
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined MEX && !defined LMAX
|
||||||
|
#error LMAX not defined.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined SCF && ((!defined NMAX) || (!defined LMAX))
|
||||||
|
#error Both NMAX and LMAX should be defined!
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined MEX && defined NMAX
|
||||||
|
#warning NMAX is defined, but will be ignored since the method is MEX!
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct vec3 {
|
||||||
|
Real x, y, z;
|
||||||
|
|
||||||
|
__host__ __device__ vec3() : x(0), y(0), z(0) {}
|
||||||
|
__host__ __device__ vec3(Real _x, Real _y, Real _z) : x(_x), y(_y), z(_z) {}
|
||||||
|
__host__ __device__ Real abs2() const {return x*x + y*y + z*z;}
|
||||||
|
__host__ __device__ vec3 operator+ (const vec3& V) const {return vec3(this->x + V.x, this->y + V.y, this->z + V.z);}
|
||||||
|
__host__ __device__ vec3 operator- (const vec3& V) const {return vec3(this->x - V.x, this->y - V.y, this->z - V.z);}
|
||||||
|
__host__ __device__ vec3 operator* (const Real& C) const {return vec3(this->x*C, this->y*C, this->z*C);}
|
||||||
|
__host__ __device__ vec3& operator+= (const vec3& V) {this->x += V.x; this->y += V.y; this->z += V.z; return *this;}
|
||||||
|
__host__ __device__ vec3& operator-= (const vec3& V) {this->x -= V.x; this->y -= V.y; this->z -= V.z; return *this;}
|
||||||
|
__host__ __device__ vec3& operator/= (const Real& C) {this->x /= C; this->y /= C; this->z /= C; return *this;}
|
||||||
|
__host__ __device__ vec3 operator- () const {return vec3(-this->x, -this->y, -this->z);}
|
||||||
|
__host__ __device__ vec3 operator+ () const {return vec3(this->x, this->y, this->z);}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class Particle {
|
||||||
|
public:
|
||||||
|
int ID;
|
||||||
|
unsigned char Status;
|
||||||
|
Real m;
|
||||||
|
vec3 pos, vel, acc;
|
||||||
|
Real R2;
|
||||||
|
|
||||||
|
__host__ __device__ Particle() {}
|
||||||
|
__host__ __device__ void CalculateR2() {R2 = pos.abs2();}
|
||||||
|
__host__ __device__ bool operator< (const Particle& p) const {return (this->R2 < p.R2);}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define ETICS_MY_CLOCK CLOCK_MONOTONIC
|
||||||
|
class HostTimer {
|
||||||
|
timespec StartTime, StopTime;
|
||||||
|
public:
|
||||||
|
void Start() {
|
||||||
|
clock_gettime(ETICS_MY_CLOCK, &StartTime);
|
||||||
|
}
|
||||||
|
void Stop() {
|
||||||
|
clock_gettime(ETICS_MY_CLOCK, &StopTime);
|
||||||
|
}
|
||||||
|
double Difference() {
|
||||||
|
timespec TimeDiff;
|
||||||
|
if ((StopTime.tv_nsec - StartTime.tv_nsec) < 0) {
|
||||||
|
TimeDiff.tv_sec = StopTime.tv_sec - StartTime.tv_sec - 1;
|
||||||
|
TimeDiff.tv_nsec = 1000000000 + StopTime.tv_nsec - StartTime.tv_nsec;
|
||||||
|
} else {
|
||||||
|
TimeDiff.tv_sec = StopTime.tv_sec - StartTime.tv_sec;
|
||||||
|
TimeDiff.tv_nsec = StopTime.tv_nsec - StartTime.tv_nsec;
|
||||||
|
}
|
||||||
|
return (double)TimeDiff.tv_sec + ((double)TimeDiff.tv_nsec)*1.0e-9;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
class DeviceTimer {
|
||||||
|
cudaEvent_t StartTime, StopTime;
|
||||||
|
public:
|
||||||
|
DeviceTimer() {
|
||||||
|
cudaEventCreate(&StartTime);
|
||||||
|
cudaEventCreate(&StopTime);
|
||||||
|
}
|
||||||
|
~DeviceTimer() {
|
||||||
|
cudaEventDestroy(StartTime);
|
||||||
|
cudaEventDestroy(StopTime);
|
||||||
|
}
|
||||||
|
void Start() {
|
||||||
|
cudaEventRecord(StartTime);
|
||||||
|
}
|
||||||
|
void Stop() {
|
||||||
|
cudaEventRecord(StopTime);
|
||||||
|
cudaEventSynchronize(StopTime);
|
||||||
|
}
|
||||||
|
double Difference(){
|
||||||
|
float TimeDiff;
|
||||||
|
cudaEventElapsedTime(&TimeDiff, StartTime, StopTime);
|
||||||
|
return (double)TimeDiff * 1.0e-3;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
33
src/file.ini
Normal file
33
src/file.ini
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; This is an ETICS parameter file
|
||||||
|
; Required parameters are:
|
||||||
|
; Filename, N, Tcrit, StepSize, dT1, dT2
|
||||||
|
;
|
||||||
|
; Special values for Filename:
|
||||||
|
; _nofile_ or _hernquist_ Generates a Hernquist sphere
|
||||||
|
; _plummer_ Generates a Plummer sphere
|
||||||
|
; The _xxx_ can be followed by a integer indicating the random seed, otherwise,
|
||||||
|
; the system time is used. The generated initial conditions are only saved at
|
||||||
|
; the t=0 snapshot.
|
||||||
|
;
|
||||||
|
; Optional parameters are:
|
||||||
|
; device (CUDA device ID; by default or if negative, cudaSetDevice is skipped)
|
||||||
|
; Prefix (="", prefix for the snapshot file names)
|
||||||
|
; OutputFormat (="ascii", the output format, either ascii or hdf5)
|
||||||
|
|
||||||
|
|
||||||
|
Filename = _hernquist_ 0
|
||||||
|
;N = 1000000
|
||||||
|
;Tcrit = 0.03125
|
||||||
|
;StepSize = 0.001953125
|
||||||
|
;dT1 = 0.001953125
|
||||||
|
;dT2 = 0.25
|
||||||
|
|
||||||
|
N = 1000000
|
||||||
|
Tcrit = 500
|
||||||
|
StepSize = 0.03125
|
||||||
|
dT1 = 1.0
|
||||||
|
dT2 = 5.0
|
||||||
|
|
||||||
|
Prefix = /home/ym/tmp/merger
|
||||||
|
OutputFormat = ascii
|
||||||
269
src/ic.cpp
Normal file
269
src/ic.cpp
Normal file
|
|
@ -0,0 +1,269 @@
|
||||||
|
/**
|
||||||
|
* @file ic.cpp
|
||||||
|
* @author Yohai Meiron <ymeiron@pku.edu.cn>
|
||||||
|
* @brief Functions to generate initial conditions.
|
||||||
|
*
|
||||||
|
* These are functions to generate a Hernquist or a Plummer sphere. In both
|
||||||
|
* cases the model has a total mass of 1 and the total energy is -0.25 Henon
|
||||||
|
* energy units. The Hernquist model generator was adapted from a program by
|
||||||
|
* John Dubinski (1999, original version) from the NEMO toolbox; the accessory
|
||||||
|
* functions are from Numerical Recipes in C (Press et al. 1992). The Plummer
|
||||||
|
* generator is adapted from the gen-plum program by Peter Berczik.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cmath>
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "ic.hpp"
|
||||||
|
|
||||||
|
// Internal functions
|
||||||
|
namespace etics {
|
||||||
|
namespace ic {
|
||||||
|
double E;
|
||||||
|
|
||||||
|
void polint(double xa[], double ya[], int n, double x, double *y, double *dy);
|
||||||
|
double trapzd(double (*func)(double), double a, double b, int n);
|
||||||
|
double qromb(double (*func)(double), double a, double b);
|
||||||
|
double xrandom(double a, double b);
|
||||||
|
double xrandom();
|
||||||
|
double f(double E2), drho2(double u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double etics::ic::trapzd(double (*func)(double), double a, double b, int n) {
|
||||||
|
double x, tnm, sum, del;
|
||||||
|
static double s;
|
||||||
|
int it, j;
|
||||||
|
|
||||||
|
if (n == 1) {
|
||||||
|
return (s=0.5*(b-a)*((*func)(a) + (*func)(b)));
|
||||||
|
} else {
|
||||||
|
for (it = 1, j = 1; j < n-1; j++) it <<= 1;
|
||||||
|
tnm = it;
|
||||||
|
del = (b-a)/tnm;
|
||||||
|
x = a + 0.5*del;
|
||||||
|
for (sum = 0, j = 1; j <= it; j++, x += del) sum += (*func)(x);
|
||||||
|
s = 0.5*(s+(b-a)*sum/tnm);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::ic::polint(double xa[], double ya[], int n, double x, double *y, double *dy) {
|
||||||
|
int ns = 1;
|
||||||
|
double den, dif, dift, ho, hp, w;
|
||||||
|
double *c, *d;
|
||||||
|
|
||||||
|
dif = fabs(x-xa[1]);
|
||||||
|
c = new double[n+1];
|
||||||
|
d = new double[n+1];
|
||||||
|
for (int i = 1; i <= n; i++) {
|
||||||
|
if ((dift=fabs(x-xa[i])) < dif) {
|
||||||
|
ns = i;
|
||||||
|
dif = dift;
|
||||||
|
}
|
||||||
|
c[i] = ya[i];
|
||||||
|
d[i] = ya[i];
|
||||||
|
}
|
||||||
|
*y = ya[ns--];
|
||||||
|
for (int m = 1; m < n; m++) {
|
||||||
|
for (int i = 1; i <= n-m; i++) {
|
||||||
|
ho = xa[i]-x;
|
||||||
|
hp = xa[i+m]-x;
|
||||||
|
w = c[i+1]-d[i];
|
||||||
|
if ((den=ho-hp) == 0.0) {
|
||||||
|
std::cerr << "Error in routine polint" << std::endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
den = w/den;
|
||||||
|
d[i] = hp*den;
|
||||||
|
c[i] = ho*den;
|
||||||
|
}
|
||||||
|
*y += (*dy=(2*ns < (n-m) ? c[ns+1] : d[ns--]));
|
||||||
|
}
|
||||||
|
delete[] c;
|
||||||
|
delete[] d;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define EPS 1.0e-6
|
||||||
|
#define JMAX 20
|
||||||
|
#define JMAXP (JMAX+1)
|
||||||
|
#define K 5
|
||||||
|
double etics::ic::qromb(double (*func)(double), double a, double b) {
|
||||||
|
double ss, dss;
|
||||||
|
double s[JMAXP+1], h[JMAXP+1];
|
||||||
|
|
||||||
|
h[1] = 1.0;
|
||||||
|
for (int j = 1; j <= JMAX; j++) {
|
||||||
|
s[j] = trapzd(func, a, b, j);
|
||||||
|
if (j >= K) {
|
||||||
|
polint(&h[j-K], &s[j-K], K, 0, &ss, &dss);
|
||||||
|
if (fabs(dss) < EPS*fabs(ss)) return ss;
|
||||||
|
}
|
||||||
|
s[j+1] = s[j];
|
||||||
|
h[j+1] = 0.25*h[j];
|
||||||
|
}
|
||||||
|
std::cerr << "Too many steps in routine qromb" << std::endl;
|
||||||
|
exit(1);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#undef EPS
|
||||||
|
#undef JMAX
|
||||||
|
#undef JMAXP
|
||||||
|
#undef K
|
||||||
|
|
||||||
|
double etics::ic::xrandom(double a, double b) {
|
||||||
|
double Result = ((double)rand())/((double)RAND_MAX);
|
||||||
|
Result *= (b - a);
|
||||||
|
Result += a;
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
|
double etics::ic::xrandom() {
|
||||||
|
return xrandom(0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Distribution function of a Hernquist model */
|
||||||
|
double etics::ic::f(double E2) {
|
||||||
|
E = E2;
|
||||||
|
return qromb(drho2, 0.0, 2.0*sqrt(E))*(sqrt(32.)*(M_PI*M_PI*M_PI));
|
||||||
|
}
|
||||||
|
|
||||||
|
double etics::ic::drho2(double u) {
|
||||||
|
double p = E - 0.25*u*u;
|
||||||
|
double r;
|
||||||
|
if (p <= 0) return 0;
|
||||||
|
else {
|
||||||
|
double p1 = 1/p;
|
||||||
|
double b0 = 0.5*(2-p1);
|
||||||
|
double c0 = (1-p1);
|
||||||
|
r = -b0 + sqrt(b0*b0 - c0);
|
||||||
|
}
|
||||||
|
|
||||||
|
double r2 = r*r, r3 = r2*r;
|
||||||
|
|
||||||
|
double c0 = 1.0 + r;
|
||||||
|
double c02 = c0*c0;
|
||||||
|
double c03 = c02*c0;
|
||||||
|
double c04 = c02*c02;
|
||||||
|
double dp1 = -1/c02;
|
||||||
|
double dp2 = 2/c03;
|
||||||
|
|
||||||
|
double drho1r = -(1+4*r)/(r2*c04);
|
||||||
|
double drho2r = 2.0*(14*r2 + 5*r + 1)/(r3*c04*c0);
|
||||||
|
|
||||||
|
return drho2r/(dp1*dp1) - drho1r/(dp1*dp1*dp1)*dp2;
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::ic::hernquist(int N, int Seed, Particle **ParticleList) {
|
||||||
|
using namespace etics::ic;
|
||||||
|
double x, y, z, vx, vy, vz;
|
||||||
|
double fmax, f0, f1, v2, vmax, vmax2, Ep=0, Ek=0;
|
||||||
|
|
||||||
|
srand(Seed);
|
||||||
|
|
||||||
|
*ParticleList = new Particle[N];
|
||||||
|
|
||||||
|
for(int i = 0; i < N; i++) {
|
||||||
|
double eta = xrandom(0.0, 1.0);
|
||||||
|
double radius = sqrt(eta)/(1-sqrt(eta));
|
||||||
|
double phi = xrandom(0.0, 2*M_PI);
|
||||||
|
double cth = xrandom(-1.0, 1.0);
|
||||||
|
double sth = sqrt(1.0 - cth*cth);
|
||||||
|
x = radius*sth*cos(phi);
|
||||||
|
y = radius*sth*sin(phi);
|
||||||
|
z = radius*cth;
|
||||||
|
double psi0 = 1/(1 + radius);
|
||||||
|
Ep += -psi0;
|
||||||
|
vmax2 = 2.0*psi0;
|
||||||
|
vmax = sqrt(vmax2);
|
||||||
|
fmax = f(psi0);
|
||||||
|
f0 = fmax; f1 = 1.1*fmax; // just some initial values
|
||||||
|
while(f1 > f0) {
|
||||||
|
// pick a velocity
|
||||||
|
v2 = 2.0*vmax2;
|
||||||
|
while(v2 > vmax2) { // rejection technique
|
||||||
|
vx = vmax*xrandom(-1.0, 1.0);
|
||||||
|
vy = vmax*xrandom(-1.0, 1.0);
|
||||||
|
vz = vmax*xrandom(-1.0, 1.0);
|
||||||
|
v2 = vx*vx + vy*vy + vz*vz;
|
||||||
|
}
|
||||||
|
f0 = f(psi0 - 0.5*v2);
|
||||||
|
f1 = fmax*xrandom(0.0, 1.0);
|
||||||
|
}
|
||||||
|
Ek += 0.5*v2;
|
||||||
|
Particle p;
|
||||||
|
p.m = 1.0/N;
|
||||||
|
p.pos = vec3(x, y, z);
|
||||||
|
p.vel = vec3(vx, vy, vz);
|
||||||
|
p.ID = i;
|
||||||
|
p.Status = 0;
|
||||||
|
p.CalculateR2();
|
||||||
|
(*ParticleList)[i] = p;
|
||||||
|
}
|
||||||
|
Ep /= 2*N; Ek /= N;
|
||||||
|
double E = Ep + Ek;
|
||||||
|
double RadiusFactor = -4*E; // should be close to 1/3
|
||||||
|
double VelocityFactor = 1/sqrt(-4*E); // should be close to sqrt(3)
|
||||||
|
for(int i = 0; i < N; i++) {
|
||||||
|
(*ParticleList)[i].pos = (*ParticleList)[i].pos * RadiusFactor;
|
||||||
|
(*ParticleList)[i].vel = (*ParticleList)[i].vel * VelocityFactor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define RMAX 10.0
|
||||||
|
void etics::ic::plummer(int N, int Seed, Particle **ParticleList) {
|
||||||
|
double X, Y, Z, Vx, Vy, Vz,
|
||||||
|
X1, X2, X3, X4, X5, X6, X7,
|
||||||
|
R, Ve, V;
|
||||||
|
|
||||||
|
srand(Seed);
|
||||||
|
|
||||||
|
*ParticleList = new Particle[N];
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
while(i < N) {
|
||||||
|
X1 = xrandom(); X2 = xrandom(); X3 = xrandom();
|
||||||
|
R = 1.0/sqrt( (pow(X1,-2.0/3.0) - 1.0) );
|
||||||
|
if (R > RMAX) continue;
|
||||||
|
Z = (1.0 - 2.0*X2)*R;
|
||||||
|
X = sqrt(R*R - Z*Z) * cos(2.0*M_PI*X3);
|
||||||
|
Y = sqrt(R*R - Z*Z) * sin(2.0*M_PI*X3);
|
||||||
|
|
||||||
|
Ve = sqrt(2.0)*pow( (1.0 + R*R), -0.25 );
|
||||||
|
|
||||||
|
X4 = 0.0;
|
||||||
|
X5 = 0.0;
|
||||||
|
while( 0.1*X5 >= X4*X4*pow( (1.0-X4*X4), 3.5) ) {
|
||||||
|
X4 = xrandom();
|
||||||
|
X5 = xrandom();
|
||||||
|
}
|
||||||
|
|
||||||
|
V = Ve*X4;
|
||||||
|
|
||||||
|
X6 = xrandom();
|
||||||
|
X7 = xrandom();
|
||||||
|
|
||||||
|
Vz = (1.0 - 2.0*X6)*V;
|
||||||
|
Vx = sqrt(V*V - Vz*Vz) * cos(2.0*M_PI*X7);
|
||||||
|
Vy = sqrt(V*V - Vz*Vz) * sin(2.0*M_PI*X7);
|
||||||
|
|
||||||
|
#define CONV (3.0*M_PI/16.0)
|
||||||
|
X *= CONV; Y *= CONV; Z *= CONV;
|
||||||
|
Vx /= sqrt(CONV); Vy /= sqrt(CONV); Vz /= sqrt(CONV);
|
||||||
|
#undef CONV
|
||||||
|
|
||||||
|
Particle p;
|
||||||
|
p.m = 1.0/N;
|
||||||
|
p.pos = vec3(X, Y, Z);
|
||||||
|
p.vel = vec3(Vx, Vy, Vz);
|
||||||
|
p.ID = i;
|
||||||
|
p.Status = 0;
|
||||||
|
p.CalculateR2();
|
||||||
|
(*ParticleList)[i] = p;
|
||||||
|
|
||||||
|
i++;
|
||||||
|
} /* while(i < N) */
|
||||||
|
}
|
||||||
|
#undef RMAX
|
||||||
7
src/ic.hpp
Normal file
7
src/ic.hpp
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
#pragma once
|
||||||
|
namespace etics {
|
||||||
|
namespace ic {
|
||||||
|
void hernquist(int N, int Seed, Particle **ParticleList);
|
||||||
|
void plummer(int N, int Seed, Particle **ParticleList);
|
||||||
|
}
|
||||||
|
}
|
||||||
127
src/integrate.cu
Normal file
127
src/integrate.cu
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
#include <thrust/device_vector.h>
|
||||||
|
#include <thrust/inner_product.h>
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "integrate.hpp"
|
||||||
|
|
||||||
|
#ifdef MEX
|
||||||
|
#define method mex
|
||||||
|
#include "mex.hpp"
|
||||||
|
#elif defined(SCF)
|
||||||
|
#define method scf
|
||||||
|
#include "scf.hpp"
|
||||||
|
#elif defined(MULTICENTER)
|
||||||
|
#define method multicenter
|
||||||
|
#include "multicenter.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define PTR(x) (thrust::raw_pointer_cast((x).data()))
|
||||||
|
|
||||||
|
namespace etics
|
||||||
|
{
|
||||||
|
|
||||||
|
// The 'drift' step is performed using the 'acc' member.
|
||||||
|
struct DriftFunctor {
|
||||||
|
Real Step, Step2;
|
||||||
|
__host__ __device__ DriftFunctor(Real _Step) : Step(_Step), Step2(_Step*_Step) {}
|
||||||
|
__host__ __device__ Particle operator() (Particle &p) const {
|
||||||
|
p.pos += p.vel*Step + p.acc*0.5*Step2;
|
||||||
|
p.CalculateR2(); // needed for both MEX and SCF, but not generally needed in leapfrog
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// The 'kick' step is performed using the 'acc' member and also the force F,
|
||||||
|
// calculated at the new (predicted) position.
|
||||||
|
struct KickFunctor {
|
||||||
|
Real Step;
|
||||||
|
__host__ __device__ KickFunctor(Real _Step) : Step(_Step) {}
|
||||||
|
__host__ __device__ Particle operator() (Particle& p, const vec3& F) const {
|
||||||
|
p.vel += (p.acc + F)*0.5*Step;
|
||||||
|
p.acc = F;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct KineticEnergyFunctor {
|
||||||
|
__host__ __device__ Real operator() (const Particle &p) const {return 0.5*p.m*p.vel.abs2();}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PotentialEnergyFunctor {
|
||||||
|
__host__ __device__ Real operator() (const Particle &p, const Real &Potential) const {return p.m*Potential;}
|
||||||
|
};
|
||||||
|
|
||||||
|
Integrator::Integrator() {
|
||||||
|
N = 0;
|
||||||
|
Time = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Integrator::Integrator(Particle *P_h, int _N) {
|
||||||
|
N = _N;
|
||||||
|
Time = 0;
|
||||||
|
P = thrust::device_vector<Particle>(P_h, P_h+N);
|
||||||
|
Potential = thrust::device_vector<Real>(N);
|
||||||
|
Force = thrust::device_vector<vec3>(N);
|
||||||
|
Method = &etics::method::CalculateGravity;
|
||||||
|
CalculateGravity();
|
||||||
|
KickStep(0); // Just to "commit" the forces to the particle list.
|
||||||
|
}
|
||||||
|
|
||||||
|
Integrator::~Integrator() {
|
||||||
|
N = 0;
|
||||||
|
// Weird way to free Thrust memory.
|
||||||
|
P.clear(); P.shrink_to_fit();
|
||||||
|
Potential.clear(); Potential.shrink_to_fit();
|
||||||
|
Force.clear(); Force.shrink_to_fit();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Integrator::CalculateGravity() {
|
||||||
|
(*Method)(PTR(P), N, PTR(Potential), PTR(Force));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Integrator::DriftStep(Real Step) {
|
||||||
|
thrust::transform(P.begin(), P.end(), P.begin(), DriftFunctor(Step));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Integrator::KickStep(Real Step) {
|
||||||
|
thrust::transform(P.begin(), P.end(), Force.begin(), P.begin(), KickFunctor(Step));
|
||||||
|
}
|
||||||
|
|
||||||
|
Real Integrator::GetTime() {
|
||||||
|
return Time;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Integrator::GetN() {
|
||||||
|
return N;
|
||||||
|
}
|
||||||
|
|
||||||
|
Real Integrator::KineticEnergy() {
|
||||||
|
return thrust::transform_reduce(
|
||||||
|
P.begin(), P.end(),
|
||||||
|
KineticEnergyFunctor(),
|
||||||
|
(Real)0, // It must be clear to the function that this zero is a Real.
|
||||||
|
thrust::plus<Real>()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Real Integrator::PotentialEnergy() {
|
||||||
|
return 0.5*thrust::inner_product(
|
||||||
|
P.begin(), P.end(),
|
||||||
|
Potential.begin(),
|
||||||
|
(Real)0,
|
||||||
|
thrust::plus<Real>(),
|
||||||
|
PotentialEnergyFunctor()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Integrator::CopyParticlesToHost(Particle *P_h) {
|
||||||
|
thrust::copy(P.begin(), P.end(), P_h);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Integrator::CopyParticlesToHost(Particle **P_h, int *_N) {
|
||||||
|
Particle *LocalList = new Particle[N];
|
||||||
|
thrust::copy(P.begin(), P.end(), LocalList);
|
||||||
|
*P_h = LocalList;
|
||||||
|
*_N = N;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace etics
|
||||||
27
src/integrate.hpp
Normal file
27
src/integrate.hpp
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
#pragma once
|
||||||
|
#include <thrust/device_vector.h>
|
||||||
|
|
||||||
|
namespace etics {
|
||||||
|
class Integrator {
|
||||||
|
public:
|
||||||
|
Integrator();
|
||||||
|
Integrator(Particle *P_h, int _N);
|
||||||
|
~Integrator();
|
||||||
|
void CalculateGravity();
|
||||||
|
void DriftStep(Real Step);
|
||||||
|
void KickStep(Real Step);
|
||||||
|
Real GetTime();
|
||||||
|
int GetN();
|
||||||
|
Real KineticEnergy();
|
||||||
|
Real PotentialEnergy();
|
||||||
|
void CopyParticlesToHost(Particle *P_h);
|
||||||
|
void CopyParticlesToHost(Particle **P_h, int *_N);
|
||||||
|
private:
|
||||||
|
int N;
|
||||||
|
double Time;
|
||||||
|
thrust::device_vector<Particle> P;
|
||||||
|
thrust::device_vector<Real> Potential;
|
||||||
|
thrust::device_vector<vec3> Force;
|
||||||
|
void (*Method)(Particle*, int, Real*, vec3*);
|
||||||
|
};
|
||||||
|
}
|
||||||
367
src/io.cpp
Normal file
367
src/io.cpp
Normal file
|
|
@ -0,0 +1,367 @@
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <climits>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "io.hpp"
|
||||||
|
|
||||||
|
#ifdef ETICS_BOOST
|
||||||
|
#include <boost/property_tree/ptree.hpp>
|
||||||
|
#include <boost/property_tree/ini_parser.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
void ReadICsASCII(string Filename, int N, Particle **P_h, int *FileSnapshotNum, Real *FileTime) {
|
||||||
|
*P_h = new Particle[N];
|
||||||
|
string Line;
|
||||||
|
ifstream InputFile(Filename.c_str());
|
||||||
|
int LineNum;
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
if (InputFile.is_open()) {
|
||||||
|
getline(InputFile, Line); // First line is the snapshot number.
|
||||||
|
int Count = sscanf(Line.c_str(), "%d", FileSnapshotNum);
|
||||||
|
if (Count != 1) {
|
||||||
|
cerr << "Problem in input file, line 1." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
getline(InputFile, Line); // Second line is the expected number of particles.
|
||||||
|
int ExpectedN;
|
||||||
|
Count = sscanf(Line.c_str(), "%d", &ExpectedN); // We ignore it actually.
|
||||||
|
if (Count != 1) {
|
||||||
|
cerr << "Problem in input file, line 2." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
getline(InputFile, Line);
|
||||||
|
double T0;
|
||||||
|
Count = sscanf(Line.c_str(), "%lf", &T0);
|
||||||
|
if (Count != 1) {
|
||||||
|
cerr << "Problem in input file, line 3." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
*FileTime = (Real)T0;
|
||||||
|
LineNum = 3;
|
||||||
|
while (getline(InputFile, Line))
|
||||||
|
{
|
||||||
|
double m, x, y, z, vx, vy, vz;
|
||||||
|
int id;
|
||||||
|
Particle p;
|
||||||
|
Count = sscanf(Line.c_str(), "%d %lf %lf %lf %lf %lf %lf %lf", &id, &m, &x, &y, &z, &vx, &vy, &vz);
|
||||||
|
if (Count != 8) {
|
||||||
|
cerr << "Problem in input file, line " << LineNum + 1 << "." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
p.ID = id;
|
||||||
|
p.m = (Real)m;
|
||||||
|
p.pos = vec3((Real)x, (Real)y, (Real)z); // This is to ensure proper casting in the case of single prec.
|
||||||
|
p.vel = vec3((Real)vx, (Real)vy, (Real)vz);
|
||||||
|
p.Status = 0;
|
||||||
|
p.CalculateR2();
|
||||||
|
(*P_h)[i] = p;
|
||||||
|
i++;
|
||||||
|
LineNum++;
|
||||||
|
if (i == N) break;
|
||||||
|
}
|
||||||
|
InputFile.close();
|
||||||
|
} else {
|
||||||
|
cerr << "Can't open file" << Filename << "." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (i < N) {
|
||||||
|
cerr << "Was only able to read " << i << " particles while " << N << " were requested." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteSnapshotASCII(string Prefix, int SnapNumber, Particle *P_h, int N, Real T) {
|
||||||
|
char S[512];
|
||||||
|
sprintf(S, "%s%04d.dat", Prefix.c_str(), SnapNumber);
|
||||||
|
ofstream SnapshotFile;
|
||||||
|
SnapshotFile.open(S);
|
||||||
|
sprintf(S, "%06d\n", SnapNumber); SnapshotFile << S;
|
||||||
|
sprintf(S, "%06d\n", N); SnapshotFile << S;
|
||||||
|
sprintf(S, "%.16E\n", T); SnapshotFile << S;
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
sprintf(S, "%06d%14.6e%14.6e%14.6e%14.6e%14.6e%14.6e%14.6e\n", P_h[i].ID, P_h[i].m, P_h[i].pos.x, P_h[i].pos.y, P_h[i].pos.z, P_h[i].vel.x, P_h[i].vel.y, P_h[i].vel.z); SnapshotFile << S;
|
||||||
|
}
|
||||||
|
SnapshotFile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THROW_EXCEPTION(str, stat) {cerr << str << endl; exit((stat));}
|
||||||
|
void ParseInput(int argc, char *argv[], ParametersStruct *Params) {
|
||||||
|
ParametersStruct P;
|
||||||
|
#ifdef ETICS_BOOST
|
||||||
|
if (argc < 2) THROW_EXCEPTION("Usage: etics [FILE]...", 1)
|
||||||
|
ifstream TestFileObject(argv[1]);
|
||||||
|
if (!TestFileObject) THROW_EXCEPTION("Problem reading file...", 1)
|
||||||
|
boost::property_tree::ptree pt;
|
||||||
|
boost::property_tree::ini_parser::read_ini(argv[1], pt); // what if file doesn't exist?
|
||||||
|
|
||||||
|
P.N = pt.get<int>("N", 0);
|
||||||
|
if (P.N <= 0) THROW_EXCEPTION("Could not read number of particles (N) from ini file.", 1)
|
||||||
|
|
||||||
|
P.Tcrit = pt.get<Real>("Tcrit", -1);
|
||||||
|
if (P.Tcrit < 0) THROW_EXCEPTION("Could not read finish time (Tcrit) from ini file.", 1)
|
||||||
|
|
||||||
|
P.dT1 = pt.get<Real>("dT1", 0);
|
||||||
|
if (P.dT1 <= 0) THROW_EXCEPTION("Could output time interval (dT1) from ini file.", 1)
|
||||||
|
|
||||||
|
P.dT2 = pt.get<Real>("dT2", 0);
|
||||||
|
if (P.dT2 <= 0) THROW_EXCEPTION("Could snapshot time interval (dT2) from ini file.", 1)
|
||||||
|
|
||||||
|
P.ConstantStep = pt.get<Real>("StepSize", -1);
|
||||||
|
if (P.ConstantStep < 0) THROW_EXCEPTION("Could not read step size (StepSize) from ini file.", 1)
|
||||||
|
|
||||||
|
P.Filename = pt.get<string>("Filename", "\n");
|
||||||
|
if (P.Filename == "\n") THROW_EXCEPTION("Could not read initial condition file name (Filename) from ini file.", 1)
|
||||||
|
|
||||||
|
P.Prefix = pt.get<string>("Prefix", "");
|
||||||
|
P.OutputFormat = pt.get<string>("OutputFormat", "ascii");
|
||||||
|
P.DeviceID = pt.get<int>("device", -1);
|
||||||
|
if (argc >= 3) { // If there is a argument after the file, it must be either --device or -d
|
||||||
|
int Version1 = strncmp(argv[2], "--device", 8);
|
||||||
|
int Version2 = strncmp(argv[2], "-d", 2);
|
||||||
|
if ((Version1!=0) && (Version2!=0)) THROW_EXCEPTION("Commandline argument after input file must be device number.", 1)
|
||||||
|
char *DeviceIDStr = strchr(argv[2], '=');
|
||||||
|
if (DeviceIDStr!=NULL) DeviceIDStr = DeviceIDStr + 1;
|
||||||
|
else {
|
||||||
|
if (argc <= 3) THROW_EXCEPTION("Device number must follow.", 1)
|
||||||
|
DeviceIDStr = argv[3];
|
||||||
|
}
|
||||||
|
P.DeviceID = atoi(DeviceIDStr);
|
||||||
|
if ((P.DeviceID==0) && (strcmp(DeviceIDStr, "0")!=0)) THROW_EXCEPTION("Error understanding device number.", 1)
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// If no BOOST, we include a file with the parameters and compile it.
|
||||||
|
int N, DeviceID = -1;
|
||||||
|
Real dT1, dT2, Tcrit, StepSize;
|
||||||
|
std::string Filename, Prefix = "", OutputFormat="";
|
||||||
|
#include "noboost.inc"
|
||||||
|
P.N = N;
|
||||||
|
P.Filename = Filename;
|
||||||
|
P.dT1 = dT1;
|
||||||
|
P.dT2 = dT2;
|
||||||
|
P.Tcrit = Tcrit;
|
||||||
|
P.ConstantStep = StepSize;
|
||||||
|
P.DeviceID = DeviceID;
|
||||||
|
P.OutputFormat = OutputFormat;
|
||||||
|
#endif
|
||||||
|
P.Seed = INT_MIN;
|
||||||
|
if ((P.Filename[0]=='_') && (P.Filename.rfind("_") > 0)) {
|
||||||
|
int Break = P.Filename.rfind("_") + 1;
|
||||||
|
string SeedStr = P.Filename.substr(Break, P.Filename.length() - Break);
|
||||||
|
int SeedInt;
|
||||||
|
istringstream iss(SeedStr);
|
||||||
|
iss >> ws >> P.Seed >> ws;
|
||||||
|
if(!iss.eof()) THROW_EXCEPTION("Could not understand random seed (in Filename).", 1)
|
||||||
|
P.Filename = P.Filename.substr(0, Break);
|
||||||
|
}
|
||||||
|
if (P.Seed == INT_MIN) P.Seed = (int)time(NULL);
|
||||||
|
*Params = P;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// make safety: if single precision, fail to compile
|
||||||
|
#ifdef ETICS_HDF5
|
||||||
|
#include "H5Cpp.h"
|
||||||
|
|
||||||
|
int H5IterNum;
|
||||||
|
int *H5IterSnapNumbers;
|
||||||
|
string *H5IterGroupNames;
|
||||||
|
|
||||||
|
herr_t file_info(hid_t loc_id, const char *name, void *opdata) {
|
||||||
|
int res = sscanf(name, "Step#%d", H5IterSnapNumbers + H5IterNum);
|
||||||
|
if (res != 1) {
|
||||||
|
cerr << "Problem understanding group \"" << name << "\" in HDF5 file" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
H5IterGroupNames[H5IterNum] = name;
|
||||||
|
H5IterNum++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ReadICsHDF5(string Filename, int N, Particle **P_h, int *FileSnapshotNum, Real *FileTime) {
|
||||||
|
double *Mass, *X, *Y, *Z, *VX, *VY, *VZ;
|
||||||
|
int *ID;
|
||||||
|
Mass = new double[N];
|
||||||
|
X = new double[N];
|
||||||
|
Y = new double[N];
|
||||||
|
Z = new double[N];
|
||||||
|
VX = new double[N];
|
||||||
|
VY = new double[N];
|
||||||
|
VZ = new double[N];
|
||||||
|
ID = new int[N];
|
||||||
|
|
||||||
|
H5::H5File file;
|
||||||
|
file = H5::H5File(Filename, H5F_ACC_RDONLY);
|
||||||
|
|
||||||
|
H5::Group group = file.openGroup("/");
|
||||||
|
int NumberOfGroups = group.getNumObjs();
|
||||||
|
H5IterSnapNumbers = new int[NumberOfGroups];
|
||||||
|
H5IterGroupNames = new string[NumberOfGroups];
|
||||||
|
H5IterNum = 0;
|
||||||
|
int ret = file.iterateElems("/", NULL, file_info, NULL);
|
||||||
|
int MaxSnapIndex = 0;
|
||||||
|
for (int i=1; i < NumberOfGroups; i++) MaxSnapIndex = (H5IterSnapNumbers[i]>H5IterSnapNumbers[MaxSnapIndex])?i:MaxSnapIndex;
|
||||||
|
string GroupName = H5IterGroupNames[MaxSnapIndex];
|
||||||
|
|
||||||
|
|
||||||
|
group = file.openGroup("/" + GroupName);
|
||||||
|
H5::Attribute attribute = group.openAttribute("Time");
|
||||||
|
attribute.read(H5::PredType::NATIVE_DOUBLE, FileTime);
|
||||||
|
*FileSnapshotNum = H5IterSnapNumbers[MaxSnapIndex];
|
||||||
|
|
||||||
|
H5::DataSet dataset = file.openDataSet("/" + GroupName + "/ID");
|
||||||
|
int NfromFile = dataset.getStorageSize()/sizeof(int);
|
||||||
|
if (NfromFile < N) {
|
||||||
|
cerr << "Was only able to read " << NfromFile << " particles while " << N << " were requested." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
dataset.read(ID, H5::PredType::NATIVE_UINT32);
|
||||||
|
|
||||||
|
dataset = file.openDataSet("/" + GroupName + "/Mass"); dataset.read(Mass, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset = file.openDataSet("/" + GroupName + "/X"); dataset.read(X, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset = file.openDataSet("/" + GroupName + "/Y"); dataset.read(Y, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset = file.openDataSet("/" + GroupName + "/Z"); dataset.read(Z, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset = file.openDataSet("/" + GroupName + "/VX"); dataset.read(VX, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset = file.openDataSet("/" + GroupName + "/VY"); dataset.read(VY, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset = file.openDataSet("/" + GroupName + "/VZ"); dataset.read(VZ, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
|
||||||
|
dataset.close();
|
||||||
|
group.close();
|
||||||
|
file.close();
|
||||||
|
|
||||||
|
*P_h = new Particle[N];
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
Particle p;
|
||||||
|
p.ID = ID[i];
|
||||||
|
p.m = (Real)Mass[i];
|
||||||
|
p.pos = vec3((Real)X[i], (Real)Y[i], (Real)Z[i]); // This is to ensure proper casting in the case of single prec.
|
||||||
|
p.vel = vec3((Real)VX[i], (Real)VY[i], (Real)VZ[i]);
|
||||||
|
p.Status = 0;
|
||||||
|
p.CalculateR2();
|
||||||
|
(*P_h)[i] = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool H5FirstSnapshot = true;
|
||||||
|
|
||||||
|
void WriteSnapshotHDF5(string Prefix, int SnapNumber, Particle *P_h, int N, Real T) {
|
||||||
|
string Filename = Prefix + ".h5part";
|
||||||
|
|
||||||
|
H5::H5File file;
|
||||||
|
|
||||||
|
if (H5FirstSnapshot && std::ifstream(Filename.c_str())) {
|
||||||
|
cerr << "File \"" << Filename << "\" already exist!" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
H5FirstSnapshot = false;
|
||||||
|
|
||||||
|
if (std::ifstream(Filename.c_str())) {
|
||||||
|
file = H5::H5File(Filename, H5F_ACC_RDWR);
|
||||||
|
}
|
||||||
|
else file = H5::H5File(Filename, H5F_ACC_TRUNC);
|
||||||
|
|
||||||
|
char GroupName[64];
|
||||||
|
sprintf(GroupName, "/Step#%d", SnapNumber);
|
||||||
|
|
||||||
|
H5::Group group;
|
||||||
|
try {
|
||||||
|
H5::Exception::dontPrint();
|
||||||
|
group = H5::Group(file.createGroup(GroupName));
|
||||||
|
}
|
||||||
|
catch ( H5::FileIException error ) {
|
||||||
|
cerr << "Couldn't create group \"" << GroupName << "\" in HDF5 file, maybe it already exists?" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
double *Mass, *X, *Y, *Z, *VX, *VY, *VZ, *AX, *AY, *AZ;
|
||||||
|
Mass = new double[N];
|
||||||
|
X = new double[N];
|
||||||
|
Y = new double[N];
|
||||||
|
Z = new double[N];
|
||||||
|
VX = new double[N];
|
||||||
|
VY = new double[N];
|
||||||
|
VZ = new double[N];
|
||||||
|
AX = new double[N];
|
||||||
|
AY = new double[N];
|
||||||
|
AZ = new double[N];
|
||||||
|
int *ID;
|
||||||
|
ID = new int[N];
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
Particle p = P_h[i];
|
||||||
|
ID[i] = p.ID;
|
||||||
|
Mass[i] = p.m;
|
||||||
|
X[i] = p.pos.x;
|
||||||
|
Y[i] = p.pos.y;
|
||||||
|
Z[i] = p.pos.z;
|
||||||
|
VX[i] = p.vel.x;
|
||||||
|
VY[i] = p.vel.y;
|
||||||
|
VZ[i] = p.vel.z;
|
||||||
|
AX[i] = p.acc.x;
|
||||||
|
AY[i] = p.acc.y;
|
||||||
|
AZ[i] = p.acc.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
H5::DataSpace attr_dataspace = H5::DataSpace(H5S_SCALAR);
|
||||||
|
|
||||||
|
H5::Attribute attribute = group.createAttribute("Time", H5::PredType::NATIVE_DOUBLE, attr_dataspace);
|
||||||
|
attribute.write( H5::PredType::NATIVE_DOUBLE, &T);
|
||||||
|
|
||||||
|
attribute = group.createAttribute("TotalN", H5::PredType::NATIVE_UINT32, attr_dataspace);
|
||||||
|
attribute.write( H5::PredType::NATIVE_UINT32, &N);
|
||||||
|
|
||||||
|
hsize_t dimsxxx = N;
|
||||||
|
H5::DataSpace dataspacexxx(1, &dimsxxx);
|
||||||
|
H5::DataSet dataset3;
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("ID", H5::PredType::NATIVE_UINT32, dataspacexxx));
|
||||||
|
dataset3.write(ID, H5::PredType::NATIVE_UINT32);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("Mass", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(Mass, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("X", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(X, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("Y", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(Y, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("Z", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(Z, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("VX", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(VX, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("VY", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(VY, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("VZ", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(VZ, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("AX", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(AX, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("AY", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(AY, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
dataset3 = H5::DataSet(group.createDataSet("AZ", H5::PredType::NATIVE_DOUBLE, dataspacexxx));
|
||||||
|
dataset3.write(AZ, H5::PredType::NATIVE_DOUBLE);
|
||||||
|
|
||||||
|
dataset3.close();
|
||||||
|
dataspacexxx.close();
|
||||||
|
|
||||||
|
attr_dataspace.close();
|
||||||
|
attribute.close();
|
||||||
|
group.close();
|
||||||
|
file.close();
|
||||||
|
|
||||||
|
delete [] Mass;
|
||||||
|
delete [] X;
|
||||||
|
delete [] Y;
|
||||||
|
delete [] Z;
|
||||||
|
delete [] VX;
|
||||||
|
delete [] VY;
|
||||||
|
delete [] VZ;
|
||||||
|
delete [] AX;
|
||||||
|
delete [] AY;
|
||||||
|
delete [] AZ;
|
||||||
|
delete [] ID;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
19
src/io.hpp
Normal file
19
src/io.hpp
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
struct ParametersStruct {
|
||||||
|
int N;
|
||||||
|
std::string Filename;
|
||||||
|
int Seed;
|
||||||
|
Real dT1, dT2, Tcrit, ConstantStep;
|
||||||
|
std::string Prefix;
|
||||||
|
std::string OutputFormat;
|
||||||
|
int DeviceID;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ReadICsASCII(std::string Filename, int N, Particle **P_h, int *FileSnapshotNum, Real *FileTime);
|
||||||
|
void WriteSnapshotASCII(std::string Prefix, int SnapNumber, Particle *hostP, int N, Real T);
|
||||||
|
void ParseInput(int argc, char *argv[], ParametersStruct *Params);
|
||||||
|
#ifdef ETICS_HDF5
|
||||||
|
void ReadICsHDF5(std::string Filename, int N, Particle **P_h, int *FileSnapshotNum, Real *FileTime);
|
||||||
|
void WriteSnapshotHDF5(std::string Prefix, int SnapNumber, Particle *hostP, int N, Real T);
|
||||||
|
#endif
|
||||||
347
src/main.cu
Normal file
347
src/main.cu
Normal file
|
|
@ -0,0 +1,347 @@
|
||||||
|
/**
|
||||||
|
* @file main.cu
|
||||||
|
* @author Yohai Meiron <ymeiron@pku.edu.cn>
|
||||||
|
* @version 1.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
// If you have CUDA then compile with:
|
||||||
|
// nvcc main.cu -lm -O3 -arch=sm_20 -o output
|
||||||
|
// Otherwise enable OpenMP and compile with GCC:
|
||||||
|
// g++ -x c++ -O3 -o output main.cu -DOMP -fopenmp -lgomp -I/home/ym
|
||||||
|
// The -I is the path to the parent directory where thrust is.
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
#include <mpi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CUDA
|
||||||
|
|
||||||
|
#ifdef OMP
|
||||||
|
#error Sorry, OpenMP is currently disabled.
|
||||||
|
#define THRUST_DEVICE_SYSTEM THRUST_DEVICE_BACKEND_OMP
|
||||||
|
#undef CUDA
|
||||||
|
#define PARALLEL_GET_TID omp_get_thread_num()
|
||||||
|
#define PARALLEL_ADVANCE omp_get_num_threads()
|
||||||
|
#define __global__
|
||||||
|
// #include <complex>
|
||||||
|
#else
|
||||||
|
#define PARALLEL_GET_TID threadIdx.x + blockIdx.x * blockDim.x
|
||||||
|
#define PARALLEL_ADVANCE blockDim.x * gridDim.x
|
||||||
|
// #include "cuda_complex.hpp"
|
||||||
|
#endif
|
||||||
|
// #define complex complex<Real>
|
||||||
|
|
||||||
|
#include <thrust/device_vector.h>
|
||||||
|
#include <thrust/host_vector.h>
|
||||||
|
#include <thrust/sort.h>
|
||||||
|
#include <thrust/partition.h>
|
||||||
|
#include <thrust/inner_product.h>
|
||||||
|
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "io.hpp"
|
||||||
|
#include "ic.hpp"
|
||||||
|
#include "integrate.hpp"
|
||||||
|
|
||||||
|
#ifdef MEX
|
||||||
|
#define method mex
|
||||||
|
#include "mex.hpp"
|
||||||
|
#elif defined(SCF)
|
||||||
|
#define method scf
|
||||||
|
#include "scf.hpp"
|
||||||
|
#elif defined(MULTICENTER)
|
||||||
|
#define method multicenter
|
||||||
|
#include "multicenter.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace etics;
|
||||||
|
|
||||||
|
// GLOBAL VARIABLES
|
||||||
|
int MyRank, NumProcs;
|
||||||
|
Real ConstantStep = 0.001953125;
|
||||||
|
Real T, Step, dT1, dT2, Tcrit, FileTime;
|
||||||
|
int NSteps = 0, FileSnapshotNum;
|
||||||
|
|
||||||
|
struct ReorderingFunctor {
|
||||||
|
__host__ __device__ bool operator() (const Particle &lhs, const Particle &rhs) {
|
||||||
|
return (lhs.ID <= rhs.ID);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Real CalculateStepSize() {
|
||||||
|
return ConstantStep;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DisplayInformation(Integrator IntegratorObj) {
|
||||||
|
Real Ek = IntegratorObj.KineticEnergy();
|
||||||
|
Real Ep = IntegratorObj.PotentialEnergy();
|
||||||
|
Real Energy = Ek + Ep;
|
||||||
|
int N=IntegratorObj.GetN();
|
||||||
|
|
||||||
|
Real TotalEnergy;
|
||||||
|
int TotalN;
|
||||||
|
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
MPI_Reduce(&Energy, &TotalEnergy, 1, MPI_ETICS_REAL, MPI_SUM, 0, MPI_COMM_WORLD);
|
||||||
|
MPI_Reduce(&N, &TotalN, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
|
||||||
|
#else
|
||||||
|
TotalEnergy = Energy;
|
||||||
|
TotalN = N;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (MyRank==0) {
|
||||||
|
printf(" TIME =%6.2f NSTEPS =%6d ENERGY =%20.16f N = %d\n", T, NSteps, TotalEnergy, TotalN);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrepareSnapshot(Integrator IntegratorObj, Particle **ParticleList, int *CurrentTotalN) {
|
||||||
|
Particle *LocalList;
|
||||||
|
int LocalBufferSize;
|
||||||
|
IntegratorObj.CopyParticlesToHost(&LocalList, &LocalBufferSize);
|
||||||
|
int TotalN = 0;
|
||||||
|
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
LocalBufferSize *= sizeof(Particle);
|
||||||
|
int BufferSizes[NumProcs];
|
||||||
|
MPI_Gather(&LocalBufferSize, 1, MPI_INT, BufferSizes, 1, MPI_INT, 0, MPI_COMM_WORLD);
|
||||||
|
int Displacements[NumProcs];
|
||||||
|
if (MyRank==0) {
|
||||||
|
for (int p = 0; p < NumProcs; p++) TotalN += BufferSizes[p]/sizeof(Particle);
|
||||||
|
Displacements[0] = 0;
|
||||||
|
for (int p = 1; p < NumProcs; p++) Displacements[p] = Displacements[p-1] + BufferSizes[p-1];
|
||||||
|
*ParticleList = new Particle[TotalN];
|
||||||
|
}
|
||||||
|
MPI_Gatherv(LocalList, LocalBufferSize, MPI_BYTE, *ParticleList, BufferSizes, Displacements, MPI_BYTE, 0, MPI_COMM_WORLD);
|
||||||
|
#else
|
||||||
|
TotalN = LocalBufferSize/sizeof(Particle);
|
||||||
|
*ParticleList = new Particle[TotalN];
|
||||||
|
std::copy(LocalList, LocalList+TotalN, *ParticleList);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef MEX
|
||||||
|
thrust::sort(*ParticleList, (*ParticleList)+TotalN, ReorderingFunctor());
|
||||||
|
#endif
|
||||||
|
*CurrentTotalN = TotalN;
|
||||||
|
free(LocalList);
|
||||||
|
//WARNING Honestly I dont get why I use allocate memory inside the fuction, its very confusing and makes it harder to find leaks.
|
||||||
|
//Future Yohai (2019): Yeah, it's done both here for **ParticleList and internally for *LocalList in Integrator::CopyParticlesToHost that does it, pretty ugly.
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
MPI_Init(&argc, &argv);
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &MyRank);
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &NumProcs);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (MyRank==0) {
|
||||||
|
cerr << "Welcome to ETICS..." << endl;
|
||||||
|
#ifdef MEX
|
||||||
|
cerr << "Using method: MEX" << endl;
|
||||||
|
cerr << "LMAX=" << LMAX << endl;
|
||||||
|
#elif defined(SCF)
|
||||||
|
cerr << "Using method: SCF" << endl;
|
||||||
|
cerr << "LMAX=" << LMAX << endl;
|
||||||
|
cerr << "NMAX=" << NMAX << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
string Filename;
|
||||||
|
int DeviceID = 0;
|
||||||
|
|
||||||
|
ParametersStruct Params;
|
||||||
|
// Instead of reading the input file with MyRank=0 and broadcast the result, we let every rank read the file. This probably saves ~20 lines of ugly MPI code.
|
||||||
|
ParseInput(argc, argv, &Params);
|
||||||
|
int N = Params.N; // total; will be divided by number of processes
|
||||||
|
Filename = Params.Filename;
|
||||||
|
Tcrit = Params.Tcrit;
|
||||||
|
ConstantStep = Params.ConstantStep;
|
||||||
|
DeviceID = Params.DeviceID;
|
||||||
|
dT1 = Params.dT1;
|
||||||
|
dT2 = Params.dT2;
|
||||||
|
|
||||||
|
if (DeviceID >= 0) {
|
||||||
|
if (cudaSetDevice(DeviceID) != cudaSuccess) {
|
||||||
|
cerr << "Problem opening device (ID=" << DeviceID << ")" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cerr << "Skipping call to cudaSetDevice." << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read an input file and initialize the global particle structure.
|
||||||
|
Particle *FullList;
|
||||||
|
if (MyRank==0) {
|
||||||
|
if ((Filename == "_nofile_") || (Filename == "_hernquist_")) {
|
||||||
|
cout << "Generating a Hernquist sphere..." << endl;
|
||||||
|
etics::ic::hernquist(N, Params.Seed, &FullList);
|
||||||
|
FileSnapshotNum = 0;
|
||||||
|
FileTime = 0;
|
||||||
|
cout << "Done." << endl;
|
||||||
|
} else if (Filename == "_plummer_") {
|
||||||
|
cout << "Generating a Plummer sphere..." << endl;
|
||||||
|
etics::ic::plummer(N, Params.Seed, &FullList);
|
||||||
|
FileSnapshotNum = 0;
|
||||||
|
FileTime = 0;
|
||||||
|
cout << "Done." << endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
string InputFileSuffix = Filename.substr(Filename.find_last_of("."), Filename.length()-Filename.find_last_of("."));
|
||||||
|
|
||||||
|
if ((InputFileSuffix==".h5part") || (InputFileSuffix==".hdf5") || (InputFileSuffix==".h5")) {
|
||||||
|
#ifndef ETICS_HDF5
|
||||||
|
cerr << "Compiled without the \"ETICS_HDF5\" flag; cannot read input in this format." << endl;
|
||||||
|
exit(1);
|
||||||
|
#else
|
||||||
|
ReadICsHDF5(Filename, N, &FullList, &FileSnapshotNum, &FileTime);
|
||||||
|
#endif
|
||||||
|
} else ReadICsASCII(Filename, N, &FullList, &FileSnapshotNum, &FileTime);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef ETICS_HDF5
|
||||||
|
if (Params.OutputFormat == "hdf5") {
|
||||||
|
cerr << "Compiled without the \"ETICS_HDF5\" flag; cannot output in requested format." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (!(Params.OutputFormat == "hdf5") && !(Params.OutputFormat == "ascii")) {
|
||||||
|
cerr << "Requested output format unrecognized." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
int LocalN = N / NumProcs;
|
||||||
|
|
||||||
|
int Remainder = N - LocalN*NumProcs;
|
||||||
|
if (MyRank==NumProcs-1) LocalN += Remainder;
|
||||||
|
Particle *LocalList = new Particle[LocalN];
|
||||||
|
int BufferSizes[NumProcs];
|
||||||
|
int Displacements[NumProcs];
|
||||||
|
if (MyRank==0) {
|
||||||
|
for (int p = 0; p < NumProcs; p++) BufferSizes[p] = (N / NumProcs)*sizeof(Particle);
|
||||||
|
BufferSizes[NumProcs-1] += Remainder*sizeof(Particle);
|
||||||
|
Displacements[0] = 0;
|
||||||
|
for (int p = 1; p < NumProcs; p++) Displacements[p] = Displacements[p-1] + BufferSizes[p-1];
|
||||||
|
}
|
||||||
|
MPI_Scatterv(FullList, BufferSizes, Displacements, MPI_BYTE, LocalList, LocalN*sizeof(Particle), MPI_BYTE, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
if (MyRank==0) free(FullList);
|
||||||
|
N = LocalN;
|
||||||
|
#else
|
||||||
|
Particle *LocalList = new Particle[N];
|
||||||
|
copy(FullList, FullList+N, LocalList);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
// This code block is just to ask each MPI process to report, and find if there is a conflict (i.e. multiple processes using the same physical device)
|
||||||
|
cudaDeviceProp DeviceProperties;
|
||||||
|
const int etics_str_len = 256;
|
||||||
|
cudaGetDeviceProperties(&DeviceProperties, 0);
|
||||||
|
char ProcessorName[etics_str_len];
|
||||||
|
int tmp;
|
||||||
|
MPI_Get_processor_name(ProcessorName, &tmp);
|
||||||
|
char UniqueDeviceID[etics_str_len];
|
||||||
|
sprintf(UniqueDeviceID, "%d$$$%s", DeviceProperties.pciBusID, ProcessorName);
|
||||||
|
char Message[etics_str_len];
|
||||||
|
sprintf(Message, "Hello from rank %d (of %d) on %s, using \"%s\" with PCI bus ID %d; this rank has %d particles.\n", MyRank, NumProcs, ProcessorName, DeviceProperties.name, DeviceProperties.pciBusID, LocalN);
|
||||||
|
if (MyRank == 0) {
|
||||||
|
printf("%s", Message);
|
||||||
|
fflush(stdout);
|
||||||
|
for (int Rank = 1; Rank < NumProcs; Rank++) {
|
||||||
|
MPI_Recv(Message, etics_str_len, MPI_CHAR, Rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
|
printf("%s", Message);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
MPI_Send(Message, etics_str_len, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Here we collect the GPU IDs from all MPI processes and print a warning if one GPU is assigned to more than one process.
|
||||||
|
char *StrBuf;
|
||||||
|
StrBuf = (char*)malloc(NumProcs*etics_str_len*sizeof(char));
|
||||||
|
MPI_Gather( UniqueDeviceID, etics_str_len, MPI_CHAR, StrBuf, etics_str_len, MPI_CHAR, 0, MPI_COMM_WORLD);
|
||||||
|
if (MyRank == 0) {
|
||||||
|
bool DuplicateFound = false;
|
||||||
|
for (int i = 0; i < NumProcs; i++) {
|
||||||
|
for (int j = i+1; j < NumProcs; j++) {
|
||||||
|
if (strcmp(StrBuf+i*etics_str_len, StrBuf+j*etics_str_len) == 0) {
|
||||||
|
DuplicateFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (DuplicateFound) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (DuplicateFound) {
|
||||||
|
printf("\x1B[31m!!SEVERE WARNING!!\x1B[0m It seems the same physical GPU device was assigned to multiple processes; check the submission script.\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(StrBuf);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// Now initiate the code
|
||||||
|
method::Init(N, 0, 0, 0, 0);
|
||||||
|
|
||||||
|
// Initiate the integrator
|
||||||
|
Integrator IntegratorObj(LocalList, N);
|
||||||
|
|
||||||
|
// More initializations.
|
||||||
|
Real NextOutput = 0, NextSnapshot = 0;
|
||||||
|
T = FileTime;
|
||||||
|
int SnapNumber = FileSnapshotNum;
|
||||||
|
Step = CalculateStepSize();
|
||||||
|
|
||||||
|
while (T <= Tcrit) {
|
||||||
|
if (T >= NextOutput) {
|
||||||
|
DisplayInformation(IntegratorObj);
|
||||||
|
NextOutput += dT1;
|
||||||
|
}
|
||||||
|
if (T >= NextSnapshot) {
|
||||||
|
// cerr << "TESTING MODE!!! NOT WRITING SNAPSHOTS!!!" << endl;
|
||||||
|
int CurrentTotalN;
|
||||||
|
PrepareSnapshot(IntegratorObj, &FullList, &CurrentTotalN);
|
||||||
|
if (MyRank==0) {
|
||||||
|
if (Params.OutputFormat == "ascii") WriteSnapshotASCII(Params.Prefix, SnapNumber, FullList, CurrentTotalN, T);
|
||||||
|
#ifdef ETICS_HDF5
|
||||||
|
else if (Params.OutputFormat == "hdf5") WriteSnapshotHDF5(Params.Prefix, SnapNumber, FullList, CurrentTotalN, T);
|
||||||
|
#endif
|
||||||
|
else {cerr << "Error" << endl; exit(1);}
|
||||||
|
free(FullList);
|
||||||
|
}
|
||||||
|
SnapNumber++;
|
||||||
|
NextSnapshot += dT2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Take the drift step.
|
||||||
|
IntegratorObj.DriftStep(Step);
|
||||||
|
|
||||||
|
// Calculate the forces in the new positions.
|
||||||
|
IntegratorObj.CalculateGravity();
|
||||||
|
|
||||||
|
// Finish by taking the kick step.
|
||||||
|
// The kick functor also "commits" the predicted forces into the "acc" member.
|
||||||
|
IntegratorObj.KickStep(Step);
|
||||||
|
|
||||||
|
// N particles were implicitly propagated in this iteration.
|
||||||
|
NSteps += 1;
|
||||||
|
|
||||||
|
// Advance global time.
|
||||||
|
T += Step;
|
||||||
|
|
||||||
|
// Calculate the next step.
|
||||||
|
Step = CalculateStepSize();
|
||||||
|
}
|
||||||
|
IntegratorObj.~Integrator();
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
MPI_Finalize();
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
57
src/mathaux.cu
Normal file
57
src/mathaux.cu
Normal file
|
|
@ -0,0 +1,57 @@
|
||||||
|
#include "common.hpp"
|
||||||
|
// #include "mathaux.hpp"
|
||||||
|
|
||||||
|
double FactorialSCF_tmpname(int x) {
|
||||||
|
return (x <= 1) ? 1 : x*FactorialSCF_tmpname(x - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
double GammaPlusHalf(int x) {
|
||||||
|
return pow(0.25, x) * sqrt(M_PI) * FactorialSCF_tmpname(2*x) / FactorialSCF_tmpname(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RadialCoefficients(Real *Result) {
|
||||||
|
for (int n = 0; n <= NMAX; n++) {
|
||||||
|
for (int l = 0; l <= LMAX; l++) {
|
||||||
|
double AnlTilde = - pow(2.0, 8*l + 6) * FactorialSCF_tmpname(n) * (n + 2*l + 1.5);
|
||||||
|
AnlTilde *= GammaPlusHalf(2*l + 1) * GammaPlusHalf(2*l + 1);
|
||||||
|
AnlTilde /= (4*M_PI * (n*(n + 3 + 4*l)/2 + (l+1)*(2*l+1)) * FactorialSCF_tmpname(n+4*l+2));
|
||||||
|
Result[(LMAX+1)*n+l] = (Real)AnlTilde;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ Real Pl(int l, Real x) {
|
||||||
|
// No range check or anything... The first 10 polynomials are hardcoded
|
||||||
|
switch (l) {
|
||||||
|
case 0 : return 1;
|
||||||
|
case 1 : return x;
|
||||||
|
case 2 : return (1.5*x*x - 0.5);
|
||||||
|
case 3 : return (2.5*x*x*x - 1.5*x);
|
||||||
|
case 4 : {Real x2=x*x; return (4.375*x2*x2 - 3.75*x2 + 0.375);}
|
||||||
|
case 5 : {Real x2=x*x, x3=x2*x; return (7.875*x3*x2 - 8.75*x3 + 1.875*x);}
|
||||||
|
case 6 : {Real x2=x*x, x4=x2*x2; return (14.4375*x4*x2 - 19.6875*x4 + 6.5625*x2 - 0.3125);}
|
||||||
|
case 7 : {Real x2=x*x, x3=x2*x, x5=x3*x2; return (26.8125*x5*x2 - 43.3125*x5 + 19.6875*x3 - 2.1875*x);}
|
||||||
|
case 8 : {Real x2=x*x, x4=x2*x2, x6=x4*x2, x8=x4*x4; return (50.2734375*x8 - 93.84375*x6 + 54.140625*x4 - 9.84375*x2 + 0.2734375);}
|
||||||
|
case 9 : {Real x2=x*x, x3=x2*x, x5=x3*x2, x7=x5*x2, x9=x7*x2;
|
||||||
|
return (94.9609375*x9 - 201.09375*x7 + 140.765625*x5 - 36.09375*x3 + 2.4609375*x);}
|
||||||
|
case 10 : {Real x2=x*x, x4=x2*x2, x6=x4*x2, x8=x4*x4, x10=x6*x4;
|
||||||
|
return (180.42578125*x10 - 427.32421875*x8 + 351.9140625*x6 - 117.3046875*x4 + 13.53515625*x2 - 0.24609375);}
|
||||||
|
case 11 : {Real x2=x*x, x3=x2*x, x5=x3*x2, x7=x5*x2, x9=x7*x2, x11=x9*x2;
|
||||||
|
return (344.44921875*x11 - 902.12890625*x9 + 854.6484375*x7 - 351.9140625*x5 + 58.65234375*x3 - 2.70703125*x);}
|
||||||
|
case 12 : {Real x2=x*x, x4=x2*x2, x6=x4*x2, x8=x4*x4, x10=x6*x4, x12=x6*x6;
|
||||||
|
return (660.1943359375*x12 - 1894.470703125*x10 + 2029.7900390625*x8 - 997.08984375*x6 + 219.9462890625*x4 - 17.595703125*x2 + 0.2255859375);}
|
||||||
|
case 13 : {Real x2=x*x, x3=x2*x, x5=x3*x2, x7=x5*x2, x9=x7*x2, x11=x9*x2, x13=x9*x2;
|
||||||
|
return (1269.6044921875*x13 - 3961.166015625*x11 + 4736.1767578125*x9 - 2706.38671875*x7 + 747.8173828125*x5 -87.978515625*x3 + 2.9326171875*x);}
|
||||||
|
case 14 : {Real x2=x*x, x4=x2*x2, x6=x4*x2, x8=x4*x4, x10=x6*x4, x12=x6*x6, x14=x8*x6;
|
||||||
|
return (2448.52294921875*x14 - 8252.42919921875*x12 + 10893.2065429688*x10 - 7104.26513671875*x8 + 2368.08837890625*x6 - 373.90869140625*x4 + 21.99462890625*x2 - 0.20947265625);}
|
||||||
|
default : return -1e30;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AngularCoefficients(Real *Result) {
|
||||||
|
for (int l = 0; l <= LMAX; l++) {
|
||||||
|
for (int m = 0; m <= l; m++) {
|
||||||
|
Result[(l+1)*l/2+m] = (sqrt((2*l+1)/(4*M_PI)) * sqrt(FactorialSCF_tmpname(l-m)/FactorialSCF_tmpname(l+m)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
30
src/mathaux.hpp
Normal file
30
src/mathaux.hpp
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
#pragma once
|
||||||
|
#include "common.hpp"
|
||||||
|
#include <cuComplex.h>
|
||||||
|
#ifdef ETICS_DOUBLE_PRECISION
|
||||||
|
#define Complex cuDoubleComplex
|
||||||
|
#define make_Complex make_cuDoubleComplex
|
||||||
|
#define Complex_add cuCadd
|
||||||
|
#define Complex_mul cuCmul
|
||||||
|
#define Complex_imag cuCimag
|
||||||
|
#define Complex_real cuCreal
|
||||||
|
#define Complex_conj cuConj
|
||||||
|
#else
|
||||||
|
#define Complex cuFloatComplex
|
||||||
|
#define make_Complex make_cuFloatComplex
|
||||||
|
#define Complex_add cuCaddf
|
||||||
|
#define Complex_mul cuCmulf
|
||||||
|
#define Complex_imag cuCimagf
|
||||||
|
#define Complex_real cuCrealf
|
||||||
|
#define Complex_conj cuConjf
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SQRT_4_PI 3.5449077018110320545963349666822903655950989122447742564276
|
||||||
|
|
||||||
|
|
||||||
|
double FactorialSCF_tmpname(int x);
|
||||||
|
double GammaPlusHalf(int x);
|
||||||
|
void RadialCoefficients(Real *Result);
|
||||||
|
#define MAX_HARDCODED_PL 14
|
||||||
|
__host__ __device__ Real Pl(int l, Real x);
|
||||||
|
void AngularCoefficients(Real *Result);
|
||||||
131
src/multicenter.cu
Normal file
131
src/multicenter.cu
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "mathaux.hpp"
|
||||||
|
#include "scf.hpp"
|
||||||
|
#include "integrate.hpp"
|
||||||
|
#include "multicenter.hpp"
|
||||||
|
#include <thrust/transform.h>
|
||||||
|
#include <thrust/functional.h>
|
||||||
|
#include <thrust/execution_policy.h>
|
||||||
|
|
||||||
|
etics::scf::scfclass SCFObject1;
|
||||||
|
etics::scf::scfclass SCFObject2;
|
||||||
|
|
||||||
|
vec3 *F_tmp;
|
||||||
|
vec3 *F_tmp2;
|
||||||
|
Real *Potential_tmp;
|
||||||
|
Real *Potential_tmp2;
|
||||||
|
Particle Origins[2];
|
||||||
|
|
||||||
|
void etics::multicenter::Init(int Nmax, int k3gs_new, int k3bs_new, int k4gs_new, int k4bs_new) { // to be removed!!
|
||||||
|
cudaMalloc((void**)&F_tmp, Nmax*sizeof(vec3));
|
||||||
|
cudaMalloc((void**)&F_tmp2, Nmax*sizeof(vec3));
|
||||||
|
cudaMalloc((void**)&Potential_tmp, Nmax*sizeof(Real));
|
||||||
|
cudaMalloc((void**)&Potential_tmp2, Nmax*sizeof(Real));
|
||||||
|
SCFObject1.Init(Nmax, k3gs_new, k3bs_new, k4gs_new, k4bs_new);
|
||||||
|
// SCFObject1.OriginPos = vec3(5,0,0);
|
||||||
|
// SCFObject1.OriginVel = vec3(0,0.1,0);
|
||||||
|
|
||||||
|
SCFObject2.Init(Nmax, k3gs_new, k3bs_new, k4gs_new, k4bs_new);
|
||||||
|
Particle p;
|
||||||
|
p.m = 0;
|
||||||
|
p.pos = vec3(-6.000000e-01 , 0 , 0);
|
||||||
|
p.vel = vec3(0 , -3.803629e-03 , 0);
|
||||||
|
Origins[0] = p;
|
||||||
|
p.pos = vec3(2.940000e+01 , 0 , 0);
|
||||||
|
p.vel = vec3(0 , 1.863778e-01 , 0);
|
||||||
|
Origins[1] = p;
|
||||||
|
// OriginsIntegrator = etics::Integrator(Origins, 2);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::multicenter::CalculateGravity(Particle *P, int N, Real *Potential, vec3 *F) {
|
||||||
|
Particle PP[2];
|
||||||
|
cudaMemcpy(PP, P+N-2, 2*sizeof(Particle), cudaMemcpyDeviceToHost);
|
||||||
|
// std::cerr << PP[0].pos.x << std::endl;
|
||||||
|
// std::cerr << PP[0].pos.y << std::endl;
|
||||||
|
// std::cerr << PP[0].pos.z << std::endl;
|
||||||
|
// std::cerr << PP[1].pos.x << std::endl;
|
||||||
|
// std::cerr << PP[1].pos.y << std::endl;
|
||||||
|
// std::cerr << PP[1].pos.z << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
// #error all we did last time was to add the offset to the subroutines, now the energy is nan. must be some kind of self force
|
||||||
|
|
||||||
|
SCFObject1.GetGpuLock();
|
||||||
|
SCFObject1.SendCachePointersToGPU();
|
||||||
|
SCFObject1.LoadParticlesToCache(P, 800000, PP[0].pos);
|
||||||
|
SCFObject1.CalculateCoefficients();
|
||||||
|
SCFObject1.ReleaseGpuLock();
|
||||||
|
|
||||||
|
|
||||||
|
SCFObject2.GetGpuLock();
|
||||||
|
SCFObject2.SendCachePointersToGPU();
|
||||||
|
SCFObject2.LoadParticlesToCache(P+800000, 200000-2, PP[1].pos);
|
||||||
|
SCFObject2.CalculateCoefficients();
|
||||||
|
SCFObject2.ReleaseGpuLock();
|
||||||
|
|
||||||
|
|
||||||
|
SCFObject1.GetGpuLock();
|
||||||
|
SCFObject1.SendCoeffsToGPU();
|
||||||
|
SCFObject1.SendCachePointersToGPU();
|
||||||
|
SCFObject1.LoadParticlesToCache(P, N-2, PP[0].pos);
|
||||||
|
SCFObject1.CalculateGravityFromCoefficients(Potential_tmp, F_tmp);
|
||||||
|
SCFObject1.ReleaseGpuLock();
|
||||||
|
|
||||||
|
SCFObject2.GetGpuLock();
|
||||||
|
SCFObject2.SendCoeffsToGPU();
|
||||||
|
SCFObject2.SendCachePointersToGPU();
|
||||||
|
SCFObject2.LoadParticlesToCache(P, N-2, PP[1].pos);
|
||||||
|
SCFObject2.CalculateGravityFromCoefficients(Potential_tmp2, F_tmp2);
|
||||||
|
SCFObject2.ReleaseGpuLock();
|
||||||
|
|
||||||
|
// now deal just with the central particles
|
||||||
|
SCFObject1.GetGpuLock();
|
||||||
|
SCFObject1.SendCoeffsToGPU();
|
||||||
|
SCFObject1.SendCachePointersToGPU();
|
||||||
|
SCFObject1.LoadParticlesToCache(P+N-1, 1, PP[0].pos); //instead of doing that, you need to "push" it at the end of the list
|
||||||
|
SCFObject1.CalculateGravityFromCoefficients(Potential_tmp+N-1, F_tmp+N-1); // the last particle is the second center, so should feel the first's force
|
||||||
|
SCFObject1.ReleaseGpuLock();
|
||||||
|
|
||||||
|
SCFObject2.GetGpuLock();
|
||||||
|
SCFObject2.SendCoeffsToGPU();
|
||||||
|
SCFObject2.SendCachePointersToGPU();
|
||||||
|
SCFObject2.LoadParticlesToCache(P+N-2, 1, PP[1].pos);
|
||||||
|
SCFObject2.CalculateGravityFromCoefficients(Potential_tmp+N-2, F_tmp+N-2);
|
||||||
|
SCFObject2.ReleaseGpuLock();
|
||||||
|
|
||||||
|
|
||||||
|
thrust::plus<Real> op;
|
||||||
|
thrust::transform(thrust::device, Potential_tmp, Potential_tmp+N-2, Potential_tmp2, Potential_tmp, thrust::plus<Real>());
|
||||||
|
thrust::transform(thrust::device, F_tmp, F_tmp+N-2, F_tmp2, F_tmp, thrust::plus<vec3>());
|
||||||
|
|
||||||
|
/*
|
||||||
|
vec3 FFF;
|
||||||
|
Real PPP;
|
||||||
|
SCFObject1.CalculateGravityAtPoint(vec3(100,0.001,0.001), &PPP, &FFF);
|
||||||
|
|
||||||
|
std::cerr << PPP << std::endl;
|
||||||
|
std::cerr << FFF.x << std::endl;
|
||||||
|
std::cerr << FFF.y << std::endl;
|
||||||
|
std::cerr << FFF.z << std::endl;*/
|
||||||
|
|
||||||
|
// vec3 FFF;
|
||||||
|
// Real PPP;
|
||||||
|
// cudaMemcpy(&PPP, Potential_tmp+N-2, 1*sizeof(Real), cudaMemcpyDeviceToHost);
|
||||||
|
// cudaMemcpy(&FFF, F_tmp+N-2, 1*sizeof(vec3), cudaMemcpyDeviceToHost);
|
||||||
|
// std::cerr << "===" << std::endl;
|
||||||
|
// std::cerr << PPP << std::endl;
|
||||||
|
// std::cerr << FFF.x << std::endl;
|
||||||
|
// std::cerr << FFF.y << std::endl;
|
||||||
|
// std::cerr << FFF.z << std::endl;
|
||||||
|
// cudaMemcpy(&PPP, Potential_tmp+N-1, 1*sizeof(Real), cudaMemcpyDeviceToHost);
|
||||||
|
// cudaMemcpy(&FFF, F_tmp+N-1, 1*sizeof(vec3), cudaMemcpyDeviceToHost);
|
||||||
|
// std::cerr << "+++" << std::endl;
|
||||||
|
// std::cerr << PPP << std::endl;
|
||||||
|
// std::cerr << FFF.x << std::endl;
|
||||||
|
// std::cerr << FFF.y << std::endl;
|
||||||
|
// std::cerr << FFF.z << std::endl;
|
||||||
|
|
||||||
|
cudaMemcpy(Potential, Potential_tmp, N*sizeof(Real), cudaMemcpyDeviceToDevice);
|
||||||
|
cudaMemcpy(F, F_tmp, N*sizeof(vec3), cudaMemcpyDeviceToDevice);
|
||||||
|
}
|
||||||
6
src/multicenter.hpp
Normal file
6
src/multicenter.hpp
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
namespace etics {
|
||||||
|
namespace multicenter {
|
||||||
|
void Init(int Nmax, int k3gs_new, int k3bs_new, int k4gs_new, int k4bs_new);
|
||||||
|
void CalculateGravity(Particle *P, int N, Real *Potential, vec3 *F);
|
||||||
|
}
|
||||||
|
}
|
||||||
15
src/noboost.inc
Normal file
15
src/noboost.inc
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* This is an ETICS parameter file used when the BOOST library is not available.
|
||||||
|
* When changing a parameter, the code has to be recompiled; if using BOOST, this
|
||||||
|
* is not required. The syntax is C++, just fill in the variables used in the
|
||||||
|
* regular initialization file. No error messages will be shown if input is bad.
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
Filename = "_nofile_ 0";
|
||||||
|
N = 1048576;
|
||||||
|
Tcrit = 16;
|
||||||
|
StepSize = 0.001953125;
|
||||||
|
dT1 = 0.001953125;
|
||||||
|
dT2 = 0.1;
|
||||||
|
Prefix = "test_del_me";
|
||||||
|
OutputFormat = "ascii";
|
||||||
518
src/scf.cu
Normal file
518
src/scf.cu
Normal file
|
|
@ -0,0 +1,518 @@
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
* @author Yohai Meiron <ymeiron@pku.edu.cn>
|
||||||
|
* @brief Functions to calculate gravitational force using the SCF method.
|
||||||
|
*/
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "mathaux.hpp"
|
||||||
|
#include "scf.hpp"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
using std::cout;
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cmath>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
// Use the MPI flag only for the standalone program
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
#include <mpi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace etics {
|
||||||
|
namespace scf {
|
||||||
|
scfclass SCFObject;
|
||||||
|
|
||||||
|
// These are truly global variables that are initialized once and can be read by every scfclass object.
|
||||||
|
bool ScfGloballyInitialized = false;
|
||||||
|
__constant__ Real RadCoeff[(NMAX+1)*(LMAX+1)];
|
||||||
|
__constant__ Real AngCoeff[(LMAX+1)*(LMAX+2)/2];
|
||||||
|
Real RadCoeff_h[(NMAX+1)*(LMAX+1)];
|
||||||
|
Real AngCoeff_h[(LMAX+1)*(LMAX+2)/2];
|
||||||
|
__constant__ Real LengthScale;
|
||||||
|
Real LengthScale_h;
|
||||||
|
|
||||||
|
// These are global variables that are read/written by each scfclass object; they must be locked before use.
|
||||||
|
__constant__ Complex A[(NMAX+1)*(LMAX+1)*(LMAX+2)/2];
|
||||||
|
__constant__ CacheStruct Cache;
|
||||||
|
|
||||||
|
// These variables provide the mechanism to lock and release the global constant memory variables above.
|
||||||
|
scfclass *GpuLockOwner = NULL;
|
||||||
|
#define GPU_LOCK_PANIC \
|
||||||
|
{ \
|
||||||
|
std::cerr << "Error: GPU lock failed; lock not owned by this object!" << endl; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace etics {
|
||||||
|
namespace scf {
|
||||||
|
int blockSizeToDynamicSMemSize(int BlockSize) { // Should be a lambda function
|
||||||
|
return (LMAX+1)*sizeof(Complex)*BlockSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Class implementation
|
||||||
|
etics::scf::scfclass::scfclass() {
|
||||||
|
N=0;
|
||||||
|
Nmax=0;
|
||||||
|
k3gs=-1; k3bs=-1; k4gs=-1; k4bs=-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
etics::scf::scfclass::~scfclass() {
|
||||||
|
if (etics::scf::GpuLockOwner == this) ReleaseGpuLock();
|
||||||
|
cudaFree(Cache_h.xi);
|
||||||
|
cudaFree(Cache_h.Phi0l);
|
||||||
|
cudaFree(Cache_h.Wprev1);
|
||||||
|
cudaFree(Cache_h.Wprev2);
|
||||||
|
cudaFree(Cache_h.costheta);
|
||||||
|
cudaFree(Cache_h.sintheta_I);
|
||||||
|
cudaFree(Cache_h.Exponent);
|
||||||
|
cudaFree(Cache_h.mass);
|
||||||
|
free(PartialSum_h);
|
||||||
|
cudaFree(PartialSum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::GetGpuLock() {
|
||||||
|
if (etics::scf::GpuLockOwner == this) return;
|
||||||
|
if (etics::scf::GpuLockOwner == NULL) etics::scf::GpuLockOwner = this;
|
||||||
|
else GPU_LOCK_PANIC;
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::ReleaseGpuLock() {
|
||||||
|
if (etics::scf::GpuLockOwner == NULL) return;
|
||||||
|
if (etics::scf::GpuLockOwner == this) etics::scf::GpuLockOwner = NULL;
|
||||||
|
else GPU_LOCK_PANIC;
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::CalculateCoefficients(int n, int l) {
|
||||||
|
int BaseAddress = n*(LMAX+1)*(LMAX+2)/2 + l*(l+1)/2;
|
||||||
|
etics::scf::CalculateCoefficientsPartial<<<k3gs,k3bs,k3bs*sizeof(Complex)*(LMAX+1)>>>(N, n, l, PartialSum);
|
||||||
|
cudaMemcpy(PartialSum_h, PartialSum, k3gs*(l+1)*sizeof(Complex), cudaMemcpyDeviceToHost);
|
||||||
|
for (int m = 0; m <= l; m++)
|
||||||
|
for (int Block=0; Block<k3gs; Block++)
|
||||||
|
A_h[BaseAddress + m] = Complex_add(A_h[BaseAddress + m], PartialSum_h[Block*(l+1) + m]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::CalculateCoefficients() {
|
||||||
|
memset(A_h, 0, (NMAX+1)*(LMAX+1)*(LMAX+2)/2 * sizeof(Complex));
|
||||||
|
for (int l = 0; l <= LMAX; l++) {
|
||||||
|
if (l > 0) etics::scf::CalculatePhi0l<<<128,128>>>(N, l); // wouldn't it make sense just putting it after the n-loop finishes? Probably not becasue then we need to skip at the last iter
|
||||||
|
for (int n = 0; n <= NMAX; n++) {
|
||||||
|
CalculateCoefficients(n, l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::GetCoefficients(Complex *A) {
|
||||||
|
std::copy(A_h, A_h+(NMAX+1)*(LMAX+1)*(LMAX+2)/2, A);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void etics::scf::scfclass::SendCoeffsToGPU() {
|
||||||
|
if (etics::scf::GpuLockOwner != this) GPU_LOCK_PANIC;
|
||||||
|
cudaMemcpyToSymbol(etics::scf::A, A_h, (NMAX+1)*(LMAX+1)*(LMAX+2)/2 * sizeof(Complex));
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::SendCachePointersToGPU() {
|
||||||
|
if (etics::scf::GpuLockOwner != this) GPU_LOCK_PANIC;
|
||||||
|
cudaMemcpyToSymbol(etics::scf::Cache, &Cache_h, sizeof(etics::scf::CacheStruct));
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::LoadParticlesToCache(Particle *P, int N, vec3 Offset) {
|
||||||
|
if (N > Nmax) {
|
||||||
|
fprintf(stderr, "You tried to load too many particles! (%d > %d)", N, Nmax);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
this->N = N;
|
||||||
|
etics::scf::LoadParticlesToCache<<<128,128>>>(P, N, Offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::CalculateGravityFromCoefficients(Real *Potential, vec3 *F) {
|
||||||
|
etics::scf::CalculateGravityFromCoefficients<<<(N+k4bs-1)/k4bs,k4bs>>>(N, Potential, F);
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::CalculateGravityAtPoint(vec3 Pos, Real *Potential, vec3 *F) {
|
||||||
|
static bool first = true;
|
||||||
|
if (first) {
|
||||||
|
printf("WARNING!!! the code has been modified and CalculateGravityAtPoint may not work as planned.\n");
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
Real r = sqrt(Pos.x*Pos.x + Pos.y*Pos.y + Pos.z*Pos.z);
|
||||||
|
Real xi = (r-1)/(r+1);
|
||||||
|
Real costheta = Pos.z/r;
|
||||||
|
Real Normal_I = rsqrt(Pos.x*Pos.x + Pos.y*Pos.y);
|
||||||
|
Complex Exponent = make_Complex(Pos.x*Normal_I, Pos.y*Normal_I);
|
||||||
|
|
||||||
|
etics::scf::CalculateGravityTemplate<0>(xi, costheta, Exponent, A_h, F, Potential);
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::CalculateGravity(Particle *P, int N, Real *Potential, vec3 *F) {
|
||||||
|
GetGpuLock();
|
||||||
|
SendCachePointersToGPU();
|
||||||
|
LoadParticlesToCache(P, N);
|
||||||
|
CalculateCoefficients();
|
||||||
|
#ifdef ETICS_MPI
|
||||||
|
Complex ATotal[(NMAX+1)*(LMAX+1)*(LMAX+2)/2];
|
||||||
|
MPI_Allreduce(&A_h, &ATotal, (NMAX+1)*(LMAX+1)*(LMAX+2)/2*2, MPI_ETICS_REAL, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
std::copy(ATotal, ATotal+(NMAX+1)*(LMAX+1)*(LMAX+2)/2, A_h);
|
||||||
|
// not really need this copy, just calculate the coefficients in some local array, then sum it into a global array (A_h or somthing) and copy it to GPUs
|
||||||
|
#endif
|
||||||
|
SendCoeffsToGPU();
|
||||||
|
CalculateGravityFromCoefficients(Potential, F);
|
||||||
|
ReleaseGpuLock();
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::SetLaunchConfiguration(int k3gs_new, int k3bs_new, int k4gs_new, int k4bs_new) {
|
||||||
|
if (k3gs_new != k3gs) {
|
||||||
|
if (PartialSum_h != NULL) free(PartialSum_h);
|
||||||
|
if (PartialSum != NULL) cudaFree(PartialSum_h);
|
||||||
|
PartialSum_h = (Complex*)malloc(k3gs_new*(LMAX+1)*sizeof(Complex));
|
||||||
|
cudaMalloc((void**)&PartialSum, k3gs_new*(LMAX+1)*sizeof(Complex));
|
||||||
|
}
|
||||||
|
k3gs = k3gs_new;
|
||||||
|
k3bs = k3bs_new;
|
||||||
|
k4gs = k4gs_new;
|
||||||
|
k4bs = k4bs_new;
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::scfclass::Init(int Nmax, int k3gs_new, int k3bs_new, int k4gs_new, int k4bs_new) {
|
||||||
|
// First, initialize global arrays if not already initialized.
|
||||||
|
if (!etics::scf::ScfGloballyInitialized) etics::scf::GlobalInit();
|
||||||
|
|
||||||
|
// Next, set the launch configuation (either guess it or use what the user specified).
|
||||||
|
if ((k3gs_new<=0) || (k3bs_new<=0) || (k4gs_new<=0) || (k4bs_new<=0)) {
|
||||||
|
int k3gs_new, k3bs_new, k4gs_new, k4bs_new;
|
||||||
|
etics::scf::GuessLaunchConfiguration(Nmax, &k3gs_new, &k3bs_new, &k4gs_new, &k4bs_new);
|
||||||
|
SetLaunchConfiguration(k3gs_new, k3bs_new, k4gs_new, k4bs_new);
|
||||||
|
}
|
||||||
|
else SetLaunchConfiguration(k3gs_new, k3bs_new, k4gs_new, k4bs_new);
|
||||||
|
|
||||||
|
// If Nmax > 0 then initialize the cahche memory.
|
||||||
|
if (Nmax > 0) {
|
||||||
|
this->Nmax = Nmax;
|
||||||
|
cudaMalloc((void**)&Cache_h.xi, Nmax * sizeof(Real));
|
||||||
|
cudaMalloc((void**)&Cache_h.Phi0l, Nmax * sizeof(Real));
|
||||||
|
cudaMalloc((void**)&Cache_h.Wprev1, Nmax * sizeof(Real));
|
||||||
|
cudaMalloc((void**)&Cache_h.Wprev2, Nmax * sizeof(Real));
|
||||||
|
cudaMalloc((void**)&Cache_h.costheta, Nmax * sizeof(Real));
|
||||||
|
cudaMalloc((void**)&Cache_h.sintheta_I, Nmax * sizeof(Real));
|
||||||
|
cudaMalloc((void**)&Cache_h.Exponent, Nmax * sizeof(Complex));
|
||||||
|
cudaMalloc((void**)&Cache_h.mass, Nmax * sizeof(Real));
|
||||||
|
} else {
|
||||||
|
Cache_h.xi = NULL;
|
||||||
|
Cache_h.Phi0l = NULL;
|
||||||
|
Cache_h.Wprev1 = NULL;
|
||||||
|
Cache_h.Wprev2 = NULL;
|
||||||
|
Cache_h.costheta = NULL;
|
||||||
|
Cache_h.sintheta_I = NULL;
|
||||||
|
Cache_h.Exponent = NULL;
|
||||||
|
Cache_h.mass = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kernels
|
||||||
|
|
||||||
|
__global__ void etics::scf::LoadParticlesToCache(Particle *P, int N, vec3 Offset) { // formerly "Kernel1"
|
||||||
|
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
while (i < N) {
|
||||||
|
vec3 Pos = (P[i].pos - Offset)*etics::scf::LengthScale;
|
||||||
|
Real r = sqrt(Pos.x*Pos.x + Pos.y*Pos.y + Pos.z*Pos.z);
|
||||||
|
Real xi = (r-1)/(r+1);
|
||||||
|
Real costheta = Pos.z/r;
|
||||||
|
Real sintheta_I = rsqrt(1-costheta*costheta);
|
||||||
|
|
||||||
|
Cache.xi[i] = xi;
|
||||||
|
Cache.Phi0l[i] = 0.5 * (1 - xi);
|
||||||
|
Cache.costheta[i] = costheta;
|
||||||
|
Cache.sintheta_I[i] = sintheta_I;
|
||||||
|
|
||||||
|
Real Normal_I = rsqrt(Pos.x*Pos.x + Pos.y*Pos.y);
|
||||||
|
Complex Exponent = make_Complex(Pos.x*Normal_I, -Pos.y*Normal_I);
|
||||||
|
Cache.Exponent[i] = Exponent;
|
||||||
|
Cache.mass[i] = P[i].m;
|
||||||
|
|
||||||
|
i += blockDim.x * gridDim.x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void etics::scf::CalculatePhi0l(int N, int l) { // formerly "Kernel2"
|
||||||
|
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
while (i < N) {
|
||||||
|
Real xi = Cache.xi[i];
|
||||||
|
Cache.Phi0l[i] *= 0.25*(1-xi*xi);
|
||||||
|
|
||||||
|
i += blockDim.x * gridDim.x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void etics::scf::CalculateCoefficientsPartial(int N, int n, int l, Complex *PartialSum) { // formerly "Kernel3"
|
||||||
|
extern __shared__ Complex ReductionCache[]; // size is determined in kernel launch
|
||||||
|
int tid = threadIdx.x;
|
||||||
|
for (int m = 0; m <= l; m++) ReductionCache[m*blockDim.x+tid] = make_Complex(0, 0);
|
||||||
|
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
while (i < N) {
|
||||||
|
Real xi = Cache.xi[i];
|
||||||
|
Real Wnl;
|
||||||
|
if (n == 0) Wnl = 1;
|
||||||
|
else if (n == 1) {Wnl = (4*l+3)*xi; Cache.Wprev2[i] = Wnl;}
|
||||||
|
else if (n == 2) {Wnl = -(2*l+1.5)+( 8*l*(l+2) +7.5)*xi*xi; Cache.Wprev1[i] = Wnl;}
|
||||||
|
else {
|
||||||
|
Real Wprev1 = Cache.Wprev1[i];
|
||||||
|
Wnl = (xi*(2*n+4*l+1)*Wprev1 - (n+4*l+1)*Cache.Wprev2[i])/(Real)n;
|
||||||
|
if (n < NMAX) { // Writing is expensive, avoid if possible.
|
||||||
|
Cache.Wprev2[i] = Wprev1;
|
||||||
|
Cache.Wprev1[i] = Wnl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Real RadialPart = - Cache.mass[i] * SQRT_4_PI * Cache.Phi0l[i] * Wnl * RadCoeff[(LMAX+1)*n+l];
|
||||||
|
Real costheta = Cache.costheta[i];
|
||||||
|
Real Plm = Pl(l, costheta);
|
||||||
|
ReductionCache[tid] = Complex_add(ReductionCache[tid], make_Complex(RadialPart * Plm * AngCoeff[(l+1)*l/2],0));
|
||||||
|
if (l == 0) {i += blockDim.x * gridDim.x; continue;}
|
||||||
|
|
||||||
|
//////////////////////////////// ugly fix
|
||||||
|
if ((costheta < -0.999) || (costheta > +0.999)) {
|
||||||
|
i += blockDim.x * gridDim.x;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
//////////////////////////////// ugly fix
|
||||||
|
|
||||||
|
Real Plm_prev1 = Plm;
|
||||||
|
Real sintheta_I = Cache.sintheta_I[i];
|
||||||
|
Plm = (costheta*Plm - Pl(l-1, costheta))*l*sintheta_I;
|
||||||
|
Complex Exponent = Cache.Exponent[i];
|
||||||
|
Real tmp0 = RadialPart * Plm * AngCoeff[(l+1)*l/2+1];
|
||||||
|
ReductionCache[blockDim.x+tid] = Complex_add(ReductionCache[blockDim.x+tid], make_Complex(tmp0 * Exponent.x, tmp0 * Exponent.y));
|
||||||
|
|
||||||
|
if (l == 1) {i += blockDim.x * gridDim.x; continue;}
|
||||||
|
|
||||||
|
Complex TorodialPart = Exponent;
|
||||||
|
for (int m = 2; m <= l; m++) { // make sure no redundancy at the end of the loop
|
||||||
|
Real Plm_prev2 = Plm_prev1;
|
||||||
|
Plm_prev1 = Plm;
|
||||||
|
Plm = - 2*(m-1)*costheta*sintheta_I*Plm_prev1 - (l+m-1)*(l-m+2)*Plm_prev2;
|
||||||
|
TorodialPart = Complex_mul(TorodialPart, Exponent);
|
||||||
|
tmp0 = RadialPart * Plm * AngCoeff[(l+1)*l/2+m];
|
||||||
|
ReductionCache[m*blockDim.x+tid] = Complex_add(ReductionCache[m*blockDim.x+tid], make_Complex(tmp0 * TorodialPart.x, tmp0 * TorodialPart.y));
|
||||||
|
}
|
||||||
|
i += blockDim.x * gridDim.x;
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
for (int m = 0; m <= l; m++) {
|
||||||
|
i = blockDim.x/2;
|
||||||
|
while (i != 0) {
|
||||||
|
if (tid < i)
|
||||||
|
ReductionCache[m*blockDim.x+tid] = Complex_add(ReductionCache[m*blockDim.x+tid], ReductionCache[m*blockDim.x+tid+i]);
|
||||||
|
__syncthreads();
|
||||||
|
i /= 2;
|
||||||
|
}
|
||||||
|
if (tid == 0)
|
||||||
|
PartialSum[blockIdx.x*(l+1) + m] = ReductionCache[m*blockDim.x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int Mode>
|
||||||
|
__host__ __device__ void etics::scf::CalculateGravityTemplate(Real xi, Real costheta, Complex Exponent, Complex *A, vec3 *F, Real *Potential) {
|
||||||
|
// it gets A as parameter because it can be either on host or device
|
||||||
|
// 0 = both force and potential, 1 = only force, 2 = only pot
|
||||||
|
//WARNING !!! This cannot really be a host function because it needs device cahce, angular coefficients which are on device !!!
|
||||||
|
#define A(n,l,m) A[n*(LMAX+1)*(LMAX+2)/2 + l*(l+1)/2 + m]
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
#define AngCoeff AngCoeff_h
|
||||||
|
#define LengthScale LengthScale_h
|
||||||
|
#endif
|
||||||
|
Real dPhiLeft;
|
||||||
|
Real dPhiLeftMul;
|
||||||
|
Real dPhiRight;
|
||||||
|
Real dPhiRightAdd;
|
||||||
|
Real dPhi;
|
||||||
|
Real RadialPart2;
|
||||||
|
Real PlmDerivTheta;
|
||||||
|
|
||||||
|
Real Pot = 0;
|
||||||
|
Real Fr = 0, Ftheta = 0, Fphi = 0;
|
||||||
|
Real OneOverXiPlusOne = 1/(1+xi);
|
||||||
|
Real r_I = (1-xi)*OneOverXiPlusOne;
|
||||||
|
Real r = 1/r_I; // It's quite likely we can do everything without r.
|
||||||
|
Real sintheta_I = rsqrt(1-costheta*costheta); // faster than using cachei // You sure??? in K3 it's the opposite
|
||||||
|
|
||||||
|
Complex ExponentTmp[LMAX];
|
||||||
|
ExponentTmp[0] = Exponent;
|
||||||
|
for (int m = 1; m < LMAX; m++) ExponentTmp[m] = Complex_mul(ExponentTmp[m-1],Exponent);
|
||||||
|
if (Mode != 2) {
|
||||||
|
Real xi2 = xi*xi;
|
||||||
|
Real xi3 = xi2*xi;
|
||||||
|
dPhiLeft = -0.25*OneOverXiPlusOne;
|
||||||
|
dPhiLeftMul = 0.25*(1-xi2);
|
||||||
|
dPhiRight = xi3 - xi2 - xi + 1;
|
||||||
|
dPhiRightAdd = 2*(xi3 - 2*xi2 + xi);
|
||||||
|
}
|
||||||
|
Real Phi0l = 1/(1+r);
|
||||||
|
Real tmp1 = Phi0l*Phi0l*r;
|
||||||
|
for (int l = 0; l <= LMAX; l++) {
|
||||||
|
if (Mode != 2) {
|
||||||
|
if (l > 0) {
|
||||||
|
dPhiLeft *= dPhiLeftMul;
|
||||||
|
dPhiRight += dPhiRightAdd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Mode != 2) dPhi = dPhiLeft * dPhiRight;
|
||||||
|
for (int n = 0; n <= NMAX; n++) {
|
||||||
|
Real Wnl, Wprev1, Wprev2;
|
||||||
|
if (n == 0) Wnl = 1;
|
||||||
|
else if (n == 1) {Wnl = (4*l+3)*xi; Wprev2 = Wnl;}
|
||||||
|
else if (n == 2) {Wnl = -(2*l+1.5)+( 8*l*(l+2) +7.5)*xi*xi; Wprev1 = Wnl;}
|
||||||
|
else {
|
||||||
|
Wnl = (xi*(2*n+4*l+1)*Wprev1 - (n+4*l+1)*Wprev2)/(Real)n;
|
||||||
|
Wprev2 = Wprev1;
|
||||||
|
Wprev1 = Wnl;
|
||||||
|
}
|
||||||
|
|
||||||
|
Real Wderiv = 0;
|
||||||
|
if (n == 1) {Wderiv = 4*l + 3;}
|
||||||
|
else if (n > 1) {
|
||||||
|
Wderiv = (-n*xi*Wnl + (n+4*l+2)*Wprev2)/(1-xi*xi);
|
||||||
|
} // From an unknown reason it's faster to have this Block separate from the previous one.
|
||||||
|
|
||||||
|
Real RadialPart = - SQRT_4_PI * Phi0l * Wnl;
|
||||||
|
if (Mode != 2) RadialPart2 = SQRT_4_PI * (dPhi*Wnl + Phi0l*Wderiv*2/pow(1+r,2));
|
||||||
|
|
||||||
|
Real Plm = Pl(l, costheta);
|
||||||
|
Real tmp2 = Complex_real(A(n,l,0)) * AngCoeff[(l+1)*l/2] * Plm;
|
||||||
|
if (Mode != 1) Pot += RadialPart * tmp2;
|
||||||
|
if (Mode != 2) Fr += RadialPart2 * tmp2;
|
||||||
|
|
||||||
|
if (l == 0) continue;
|
||||||
|
|
||||||
|
//////////////////////////////// ugly fix
|
||||||
|
if ((costheta < -0.999) || (costheta > +0.999)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
//////////////////////////////// ugly fix
|
||||||
|
|
||||||
|
// The Block below is l>=1, m=0.
|
||||||
|
if (Mode != 2) {
|
||||||
|
PlmDerivTheta = (costheta*Plm - Pl(l-1, costheta))*l*sintheta_I; //TODO check if storing Pl(l-1) somewhere makes it faster
|
||||||
|
Ftheta += - PlmDerivTheta * AngCoeff[(l+1)*l/2] * Complex_real(A(n,l,0)) * RadialPart * r_I;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The Block below is l>=1, m=1.
|
||||||
|
if (Mode == 2) PlmDerivTheta = (costheta*Plm - Pl(l-1, costheta))*l*sintheta_I; //TODO see above regarding storing Pl(l-1)
|
||||||
|
Real Plm_prev1 = Plm;
|
||||||
|
Plm = PlmDerivTheta; // PlmDerivTheta equals Plm for m=1.
|
||||||
|
if (Mode != 2) PlmDerivTheta = - Plm*costheta*sintheta_I - l*(l+1)*Plm_prev1;
|
||||||
|
tmp2 = 2 * AngCoeff[(l+1)*l/2+1];
|
||||||
|
Complex tmp3 = Complex_mul(ExponentTmp[0], A(n,l,1));
|
||||||
|
Complex tmp4 = make_Complex(tmp2 * tmp3.x, tmp2 * tmp3.y);
|
||||||
|
Complex tmp5 = make_Complex(Plm * tmp4.x, Plm * tmp4.y);
|
||||||
|
Complex tmp6 = make_Complex(RadialPart * tmp5.x, RadialPart * tmp5.y);
|
||||||
|
if (Mode != 1) Pot += Complex_real(tmp6);
|
||||||
|
if (Mode != 2) {
|
||||||
|
Fr += RadialPart2 * Complex_real(tmp5);
|
||||||
|
Fphi += Complex_imag(tmp6) * sintheta_I * r_I;
|
||||||
|
Ftheta += - RadialPart * PlmDerivTheta * Complex_real(tmp4) * r_I;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l == 1) continue;
|
||||||
|
|
||||||
|
for (int m = 2; m <= l; m++) {
|
||||||
|
Real Plm_prev2 = Plm_prev1;
|
||||||
|
Plm_prev1 = Plm;
|
||||||
|
Plm = - 2*(m-1)*costheta*sintheta_I*Plm_prev1 - (l+m-1)*(l-m+2)*Plm_prev2;
|
||||||
|
tmp2 = 2 * AngCoeff[(l+1)*l/2+m];
|
||||||
|
tmp3 = Complex_mul(ExponentTmp[m-1], A(n,l,m));
|
||||||
|
tmp4 = make_Complex(tmp2 * tmp3.x, tmp2 * tmp3.y);
|
||||||
|
tmp5 = make_Complex(Plm * tmp4.x, Plm * tmp4.y);
|
||||||
|
tmp6 = make_Complex(RadialPart * tmp5.x, RadialPart * tmp5.y);
|
||||||
|
if (Mode != 1) Pot += Complex_real(tmp6);
|
||||||
|
if (Mode != 2) {
|
||||||
|
PlmDerivTheta = - m*Plm*costheta*sintheta_I - (l+m)*(l-m+1)*Plm_prev1;
|
||||||
|
Fr += RadialPart2 * Complex_real(tmp5);
|
||||||
|
Fphi += m * Complex_imag(tmp6) * sintheta_I * r_I;
|
||||||
|
Ftheta += - RadialPart * PlmDerivTheta * Complex_real(tmp4) * r_I;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Phi0l *= tmp1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Mode != 2) {
|
||||||
|
Real sintheta = 1/sintheta_I;
|
||||||
|
Real tanphi = Exponent.y/Exponent.x;
|
||||||
|
Real cosphi = ((Exponent.x >= 0)?(+1):(-1)) * rsqrt(1+tanphi*tanphi); // no simpler way to get sign bit?
|
||||||
|
Real sinphi = tanphi*cosphi;
|
||||||
|
*F = vec3(sintheta*cosphi*Fr + costheta*cosphi*Ftheta - sinphi*Fphi, sintheta*sinphi*Fr + costheta*sinphi*Ftheta + cosphi*Fphi, costheta*Fr - sintheta*Ftheta);
|
||||||
|
*F = (*F)*LengthScale*LengthScale;
|
||||||
|
}
|
||||||
|
if (Mode != 1) *Potential = Pot*LengthScale;
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
#undef AngCoeff
|
||||||
|
#undef LengthScale
|
||||||
|
#endif
|
||||||
|
#undef A
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void etics::scf::CalculateGravityFromCoefficients(int N, Real *Potential, vec3 *F) { // formerly "Kernel4"
|
||||||
|
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
while (i < N) {
|
||||||
|
CalculateGravityTemplate<0>(Cache.xi[i], Cache.costheta[i], Complex_conj(Cache.Exponent[i]), A, &F[i], &Potential[i]);
|
||||||
|
i += blockDim.x * gridDim.x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Global initialization
|
||||||
|
void etics::scf::GlobalInit() {
|
||||||
|
int pid = getpid();
|
||||||
|
int devid;
|
||||||
|
cudaGetDevice(&devid);
|
||||||
|
char hostname[256];
|
||||||
|
gethostname(hostname, 256);
|
||||||
|
char output_string[256];
|
||||||
|
sprintf(output_string, "ETICS: pid %05d using GPU device number %d on %s", pid, devid, hostname);
|
||||||
|
std::cout << output_string << std::endl;
|
||||||
|
|
||||||
|
RadialCoefficients(etics::scf::RadCoeff_h);
|
||||||
|
cudaMemcpyToSymbol(etics::scf::RadCoeff, etics::scf::RadCoeff_h, (NMAX+1)*(LMAX+1) * sizeof(Real));
|
||||||
|
AngularCoefficients(etics::scf::AngCoeff_h);
|
||||||
|
cudaMemcpyToSymbol(etics::scf::AngCoeff, etics::scf::AngCoeff_h, (LMAX+1)*(LMAX+2)/2 * sizeof(Real));
|
||||||
|
etics::scf::LengthScale_h = 1;
|
||||||
|
cudaMemcpyToSymbol(etics::scf::LengthScale, &etics::scf::LengthScale_h, sizeof(Real));
|
||||||
|
ScfGloballyInitialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::SetLengthScale(Real NewLengthScale) {
|
||||||
|
etics::scf::LengthScale_h = NewLengthScale;
|
||||||
|
cudaMemcpyToSymbol(etics::scf::LengthScale, &etics::scf::LengthScale_h, sizeof(Real));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Old API
|
||||||
|
void etics::scf::GuessLaunchConfiguration(int N, int *k3gs_new, int *k3bs_new, int *k4gs_new, int *k4bs_new) {
|
||||||
|
int blockSize;
|
||||||
|
int minGridSize;
|
||||||
|
int gridSize;
|
||||||
|
//WARNING setting blockSizeLimit=128 for cudaOccupancyMaxPotentialBlockSizeVariableSMem.
|
||||||
|
cudaOccupancyMaxPotentialBlockSizeVariableSMem(&minGridSize, &blockSize, CalculateCoefficientsPartial, blockSizeToDynamicSMemSize, 128);
|
||||||
|
gridSize = minGridSize;
|
||||||
|
*k3gs_new = gridSize;
|
||||||
|
*k3bs_new = blockSize;
|
||||||
|
|
||||||
|
cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, CalculateGravityFromCoefficients, 0, N);
|
||||||
|
gridSize = (N + blockSize - 1) / blockSize;
|
||||||
|
*k4gs_new = gridSize;
|
||||||
|
*k4bs_new = blockSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::Init(int N, int k3gs_new, int k3bs_new, int k4gs_new, int k4bs_new) { // to be removed!!
|
||||||
|
SCFObject.Init(N, k3gs_new, k3bs_new, k4gs_new, k4bs_new);
|
||||||
|
}
|
||||||
|
|
||||||
|
void etics::scf::CalculateGravity(Particle *P, int N, Real *Potential, vec3 *F) {
|
||||||
|
SCFObject.CalculateGravity(P, N, Potential, F);
|
||||||
|
}
|
||||||
61
src/scf.hpp
Normal file
61
src/scf.hpp
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
#pragma once
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "mathaux.hpp"
|
||||||
|
namespace etics {
|
||||||
|
namespace scf {
|
||||||
|
// CUDA Kernels and device functions cannot be in class space
|
||||||
|
__global__ void LoadParticlesToCache(Particle *P, int N, vec3 Offset=vec3(0,0,0));
|
||||||
|
__global__ void CalculatePhi0l(int N, int l);
|
||||||
|
__global__ void CalculateCoefficientsPartial(int N, int n, int l, Complex *PartialSum);
|
||||||
|
template<int Mode> __host__ __device__ void CalculateGravityTemplate(Real xi, Real costheta, Complex Exponent, Complex *A, vec3 *F, Real *Potential);
|
||||||
|
__global__ void CalculateGravityFromCoefficients(int N, Real *Potential, vec3 *F);
|
||||||
|
|
||||||
|
void CalculateGravity(Particle *P, int N, Real *Potential, vec3 *F);
|
||||||
|
void Init(int Nmax, int k3gs_new=0, int k3bs_new=0, int k4gs_new=0, int k4bs_new=0); // to be removed!
|
||||||
|
void GlobalInit();
|
||||||
|
void GuessLaunchConfiguration(int N, int *k3gs_new, int *k3bs_new, int *k4gs_new, int *k4bs_new);
|
||||||
|
void SetLengthScale(Real NewLengthScale);
|
||||||
|
|
||||||
|
struct CacheStruct {
|
||||||
|
Real *xi;
|
||||||
|
Real *Phi0l;
|
||||||
|
Real *Wprev1;
|
||||||
|
Real *Wprev2;
|
||||||
|
Real *costheta;
|
||||||
|
Real *sintheta_I;
|
||||||
|
Complex *Exponent;
|
||||||
|
Real *mass;
|
||||||
|
};
|
||||||
|
|
||||||
|
class scfclass {
|
||||||
|
public:
|
||||||
|
scfclass();
|
||||||
|
~scfclass();
|
||||||
|
void SendCachePointersToGPU();
|
||||||
|
void SendCoeffsToGPU();
|
||||||
|
void LoadParticlesToCache(Particle *P, int N, vec3 Offset=vec3(0,0,0));
|
||||||
|
void CalculateCoefficients();
|
||||||
|
void CalculateGravityFromCoefficients(Real *Potential, vec3 *F);
|
||||||
|
void CalculateGravity(Particle *P, int N, Real *Potential, vec3 *F);
|
||||||
|
void CalculateGravityAtPoint(vec3 Pos, Real *Potential, vec3 *F);
|
||||||
|
void Init(int N, int k3gs_new, int k3bs_new, int k4gs_new, int k4bs_new);
|
||||||
|
void SetLaunchConfiguration(int k3gs_new, int k3bs_new, int k4gs_new, int k4bs_new);
|
||||||
|
void GetCoefficients(Complex *A);
|
||||||
|
void GetGpuLock();
|
||||||
|
void ReleaseGpuLock();
|
||||||
|
//WARNING: the following two functions are not implemented but they should be
|
||||||
|
int GetNMAX();
|
||||||
|
int GetLMAX();
|
||||||
|
//TODO We'll make them private in the final version, now we need access for debugging
|
||||||
|
int N;
|
||||||
|
int Nmax;
|
||||||
|
//WARNING we just hardcode 1024 as the maximum size of the coefficient structure. This is because having NMAX and LMAX here causes problems.
|
||||||
|
Complex A_h[1024];
|
||||||
|
CacheStruct Cache_h;
|
||||||
|
Complex *PartialSum = NULL;
|
||||||
|
Complex *PartialSum_h = NULL;
|
||||||
|
int k3gs, k3bs, k4gs, k4bs;
|
||||||
|
void CalculateCoefficients(int n, int l);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue