343 lines
7.9 KiB
C
343 lines
7.9 KiB
C
//#define EPS_RED
|
|
//#define EPS_MUL 0.0001f
|
|
|
|
static inline __host__
|
|
float2 float2_split(const double x)
|
|
{
|
|
const float fx = float(x);
|
|
const float fy = float(x - double(fx));
|
|
return make_float2(fx, fy);
|
|
}
|
|
|
|
static inline __host__
|
|
double float2_todouble(const float2 f)
|
|
{
|
|
return double(f.x) + double(f.y);
|
|
}
|
|
|
|
static inline __device__
|
|
float2 float2_accum(const float2 acc, const float x)
|
|
{
|
|
const float ax = acc.x + x;
|
|
const float ay = acc.y - ((ax - acc.x) - x);
|
|
return make_float2(ax, ay);
|
|
}
|
|
|
|
static inline __device__
|
|
float float2_diff(const float2 xj, const float2 xi){
|
|
return (xj.x - xi.x) + (xj.y - xi.y);
|
|
}
|
|
|
|
struct Iparticle{
|
|
float2 pos[3]; // 6
|
|
float vel[3]; // 9
|
|
float eps2; // 10
|
|
float h2; // 11
|
|
int id; // 12
|
|
|
|
__host__ void read(
|
|
const double h_pos[],
|
|
const double h_vel[],
|
|
const double h_eps2,
|
|
const double h_h2,
|
|
const int h_id)
|
|
{
|
|
for(int k=0; k<3; k++){
|
|
pos[k] = float2_split(h_pos[k]);
|
|
vel[k] = float(h_vel[k]);
|
|
}
|
|
eps2 = float(h_eps2);
|
|
h2 = float(h_h2);
|
|
id = h_id;
|
|
}
|
|
};
|
|
|
|
struct Jparticle{
|
|
float2 pos [3]; // 6
|
|
float vel [3]; // 9
|
|
float acc2[3]; // 12
|
|
float jrk6[3]; // 15
|
|
float mass; // 16
|
|
float2 tj; // 18
|
|
int id; // 19
|
|
int addr; // 20
|
|
|
|
__host__ void read(
|
|
const double h_pos[],
|
|
const double h_vel[],
|
|
const double h_acc2[],
|
|
const double h_jrk6[],
|
|
const double h_mass,
|
|
const double h_tj,
|
|
const int h_id,
|
|
const int h_addr)
|
|
{
|
|
for(int k=0; k<3; k++){
|
|
pos [k] = float2_split(h_pos[k]);
|
|
vel [k] = float(h_vel [k]);
|
|
acc2[k] = float(h_acc2[k]);
|
|
jrk6[k] = float(h_jrk6[k]);
|
|
}
|
|
mass = float(h_mass);
|
|
tj = float2_split(h_tj);
|
|
id = h_id;
|
|
addr = h_addr;
|
|
assert(addr < NBODY_MAX);
|
|
}
|
|
};
|
|
|
|
struct Jppred{
|
|
float2 pos[3]; // 6
|
|
float vel[3]; // 9
|
|
float mass; // 10
|
|
int id; // 11
|
|
int pad; // 12
|
|
|
|
enum{
|
|
SIZE_F4 = 3,
|
|
};
|
|
|
|
__device__ void predict(
|
|
const Jparticle &jp,
|
|
const float2 ti)
|
|
{
|
|
const float dt = float2_diff(ti, jp.tj);
|
|
#pragma unroll
|
|
for(int k=0; k<3; k++){
|
|
pos[k].x = jp.pos[k].x;
|
|
pos[k].y = jp.pos[k].y + dt*(jp.vel[k] + dt*(jp.acc2[k] + dt*jp.jrk6[k]));
|
|
vel[k] = jp.vel[k] + (2.f*dt)*(jp.acc2[k] + (1.5f*dt)*(jp.jrk6[k]));
|
|
}
|
|
mass = jp.mass;
|
|
id = jp.id ;
|
|
}
|
|
};
|
|
|
|
struct Interaction{
|
|
float3 acc;
|
|
float3 jrk;
|
|
float pot;
|
|
|
|
__device__ Interaction(
|
|
const Iparticle &ip,
|
|
const Jppred &jp){
|
|
const float dx = float2_diff(jp.pos[0], ip.pos[0]);
|
|
const float dy = float2_diff(jp.pos[1], ip.pos[1]);
|
|
const float dz = float2_diff(jp.pos[2], ip.pos[2]);
|
|
const float dvx = jp.vel[0] - ip.vel[0];
|
|
const float dvy = jp.vel[1] - ip.vel[1];
|
|
const float dvz = jp.vel[2] - ip.vel[2];
|
|
|
|
#ifdef EPS_RED
|
|
float r2, tmp_eps2;
|
|
|
|
tmp_eps2 = ip.eps2; // default value ----> eps = 1e-5 !!!
|
|
|
|
// larger ----> 10 x eps if mass > 1e-6 (i.e. high mass part.)
|
|
|
|
if( (jp.id < 99998) || (ip.id < 99998) ) tmp_eps2 *= 100.0f;
|
|
|
|
// if i or j is a BH's ----> 1e-2 * eps
|
|
if( (ip.id == 999998) || (jp.id == 999998) || (ip.id == 999999) || (jp.id == 999999) )
|
|
{
|
|
r2 = EPS_MUL*tmp_eps2 + dx*dx + dy*dy + dz*dz;
|
|
}
|
|
else
|
|
{
|
|
r2 = tmp_eps2 + dx*dx + dy*dy + dz*dz;
|
|
}
|
|
#else
|
|
const float r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
#ifdef EPS_RED
|
|
float r2;
|
|
|
|
// if i or j is a BH's
|
|
|
|
if( (ip.id == 0) || (jp.id == 0) || (ip.id == 1) || (jp.id == 1) )
|
|
{
|
|
r2 = EPS_MUL*ip.eps2 + dx*dx + dy*dy + dz*dz;
|
|
}
|
|
else
|
|
{
|
|
r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
|
|
}
|
|
#else
|
|
const float r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
|
|
#endif
|
|
*/
|
|
|
|
// const float r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
|
|
const float rv = dx*dvx + dy*dvy + dz*dvz;
|
|
|
|
const float rinv1 = (jp.id == ip.id) ? 0.0f
|
|
: rsqrtf(r2);
|
|
|
|
const float rinv2 = rinv1 * rinv1;
|
|
const float mrinv1 = jp.mass * rinv1;
|
|
const float mrinv3 = mrinv1 * rinv2;
|
|
const float alpha = -3.f * rv * rinv2;
|
|
|
|
acc.x = mrinv3 * dx;
|
|
acc.y = mrinv3 * dy;
|
|
acc.z = mrinv3 * dz;
|
|
jrk.x = mrinv3 * (dvx + alpha * dx);
|
|
jrk.y = mrinv3 * (dvy + alpha * dy);
|
|
jrk.z = mrinv3 * (dvz + alpha * dz);
|
|
pot = mrinv1; // use positive definition here
|
|
}
|
|
__device__ void set_neib(int &dst) const{
|
|
// do nothing
|
|
}
|
|
};
|
|
|
|
struct Interaction_NB{
|
|
float3 acc;
|
|
float3 jrk;
|
|
float pot;
|
|
float nb_rinv;
|
|
int jid;
|
|
bool is_neib;
|
|
|
|
__device__ Interaction_NB(
|
|
const Iparticle &ip,
|
|
const Jppred &jp)
|
|
{
|
|
const float dx = float2_diff(jp.pos[0], ip.pos[0]);
|
|
const float dy = float2_diff(jp.pos[1], ip.pos[1]);
|
|
const float dz = float2_diff(jp.pos[2], ip.pos[2]);
|
|
const float dvx = jp.vel[0] - ip.vel[0];
|
|
const float dvy = jp.vel[1] - ip.vel[1];
|
|
const float dvz = jp.vel[2] - ip.vel[2];
|
|
|
|
// if( (jp.id) > 1 && (jp.id < 200000) ) ip.eps2 *= 100;
|
|
|
|
const float r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
|
|
const float rv = dx*dvx + dy*dvy + dz*dvz;
|
|
|
|
const float rinv1 = (jp.id == ip.id) ? 0.0f
|
|
: rsqrtf(r2);
|
|
|
|
const float rinv2 = rinv1 * rinv1;
|
|
const float mrinv1 = jp.mass * rinv1;
|
|
const float mrinv3 = mrinv1 * rinv2;
|
|
const float alpha = -3.f * rv * rinv2;
|
|
|
|
acc.x = mrinv3 * dx;
|
|
acc.y = mrinv3 * dy;
|
|
acc.z = mrinv3 * dz;
|
|
jrk.x = mrinv3 * (dvx + alpha * dx);
|
|
jrk.y = mrinv3 * (dvy + alpha * dy);
|
|
jrk.z = mrinv3 * (dvz + alpha * dz);
|
|
pot = mrinv1; // use positive definition here
|
|
nb_rinv = rinv1;
|
|
jid = jp.id;
|
|
is_neib = (r2 < ip.h2) && (jp.id != ip.id);
|
|
}
|
|
__device__ void set_neib(int &dst) const{
|
|
if(is_neib) dst = jid;
|
|
}
|
|
};
|
|
|
|
struct Force{
|
|
float2 acc[3]; // 6
|
|
float jrk[3]; // 9
|
|
float2 pot; // 11
|
|
int nnb_id; // 12 ID of nearest neighbor
|
|
float nnb_rinv; // 13 rinv of nearest neighbor
|
|
int num_neib; // 14
|
|
|
|
__host__ void write(
|
|
double h_acc[],
|
|
double h_jrk[],
|
|
double &h_pot,
|
|
int &h_nnb_id,
|
|
int &h_num_neib) const
|
|
{
|
|
for(int k=0; k<3; k++){
|
|
h_acc[k] = float2_todouble(acc[k]);
|
|
h_jrk[k] = double(jrk[k]);
|
|
}
|
|
h_pot = - float2_todouble(pot);
|
|
h_nnb_id = nnb_id;
|
|
h_num_neib = num_neib;
|
|
}
|
|
|
|
__device__ void clear()
|
|
{
|
|
#pragma unroll
|
|
for(int k=0; k<3; k++){
|
|
acc[k] = make_float2(0.0f, 0.0f);
|
|
jrk[k] = 0.0f;
|
|
}
|
|
pot = make_float2(0.0f, 0.0f);
|
|
nnb_id = -1;
|
|
nnb_rinv = 0.0f;
|
|
num_neib = 0;
|
|
}
|
|
|
|
__device__ void check_overflow(){
|
|
if(num_neib > NB_MAX) num_neib = -1;
|
|
}
|
|
|
|
// for the redction kernel
|
|
__device__ void operator+=(
|
|
const Force &fo)
|
|
{
|
|
#pragma unroll
|
|
for(int k=0; k<3; k++){
|
|
acc[k] = float2_accum(acc[k], fo.acc[k].x);
|
|
acc[k] = float2_accum(acc[k], fo.acc[k].y);
|
|
jrk[k] += fo.jrk[k];
|
|
}
|
|
pot = float2_accum(pot, fo.pot.x);
|
|
pot = float2_accum(pot, fo.pot.y);
|
|
if(num_neib>=0 && fo.num_neib>=0){
|
|
num_neib += fo.num_neib;
|
|
}else{ // overflow
|
|
num_neib = -1;
|
|
}
|
|
// nearest neighbor
|
|
if(nnb_rinv < fo.nnb_rinv){
|
|
nnb_id = fo.nnb_id;
|
|
nnb_rinv = fo.nnb_rinv;
|
|
}
|
|
}
|
|
|
|
// for the gravity kernel
|
|
__device__ void operator+=(
|
|
const Interaction &fo)
|
|
{
|
|
acc[0] = float2_accum(acc[0], fo.acc.x);
|
|
acc[1] = float2_accum(acc[1], fo.acc.y);
|
|
acc[2] = float2_accum(acc[2], fo.acc.z);
|
|
pot = float2_accum(pot, fo.pot);
|
|
jrk[0] += fo.jrk.x;
|
|
jrk[1] += fo.jrk.y;
|
|
jrk[2] += fo.jrk.z;
|
|
}
|
|
|
|
__device__ void operator+=(
|
|
const Interaction_NB &fo)
|
|
{
|
|
acc[0] = float2_accum(acc[0], fo.acc.x);
|
|
acc[1] = float2_accum(acc[1], fo.acc.y);
|
|
acc[2] = float2_accum(acc[2], fo.acc.z);
|
|
jrk[0] += fo.jrk.x;
|
|
jrk[1] += fo.jrk.y;
|
|
jrk[2] += fo.jrk.z;
|
|
pot = float2_accum(pot, fo.pot);
|
|
// neighbor list counter
|
|
if(fo.is_neib) num_neib++;
|
|
// nearest neighbor
|
|
if(nnb_rinv < fo.nb_rinv){
|
|
nnb_id = fo.jid;
|
|
nnb_rinv = fo.nb_rinv;
|
|
}
|
|
}
|
|
};
|