yebisu/particle.h
2019-08-25 16:12:39 +08:00

343 lines
7.9 KiB
C

//#define EPS_RED
//#define EPS_MUL 0.0001f
static inline __host__
float2 float2_split(const double x)
{
const float fx = float(x);
const float fy = float(x - double(fx));
return make_float2(fx, fy);
}
static inline __host__
double float2_todouble(const float2 f)
{
return double(f.x) + double(f.y);
}
static inline __device__
float2 float2_accum(const float2 acc, const float x)
{
const float ax = acc.x + x;
const float ay = acc.y - ((ax - acc.x) - x);
return make_float2(ax, ay);
}
static inline __device__
float float2_diff(const float2 xj, const float2 xi){
return (xj.x - xi.x) + (xj.y - xi.y);
}
struct Iparticle{
float2 pos[3]; // 6
float vel[3]; // 9
float eps2; // 10
float h2; // 11
int id; // 12
__host__ void read(
const double h_pos[],
const double h_vel[],
const double h_eps2,
const double h_h2,
const int h_id)
{
for(int k=0; k<3; k++){
pos[k] = float2_split(h_pos[k]);
vel[k] = float(h_vel[k]);
}
eps2 = float(h_eps2);
h2 = float(h_h2);
id = h_id;
}
};
struct Jparticle{
float2 pos [3]; // 6
float vel [3]; // 9
float acc2[3]; // 12
float jrk6[3]; // 15
float mass; // 16
float2 tj; // 18
int id; // 19
int addr; // 20
__host__ void read(
const double h_pos[],
const double h_vel[],
const double h_acc2[],
const double h_jrk6[],
const double h_mass,
const double h_tj,
const int h_id,
const int h_addr)
{
for(int k=0; k<3; k++){
pos [k] = float2_split(h_pos[k]);
vel [k] = float(h_vel [k]);
acc2[k] = float(h_acc2[k]);
jrk6[k] = float(h_jrk6[k]);
}
mass = float(h_mass);
tj = float2_split(h_tj);
id = h_id;
addr = h_addr;
assert(addr < NBODY_MAX);
}
};
struct Jppred{
float2 pos[3]; // 6
float vel[3]; // 9
float mass; // 10
int id; // 11
int pad; // 12
enum{
SIZE_F4 = 3,
};
__device__ void predict(
const Jparticle &jp,
const float2 ti)
{
const float dt = float2_diff(ti, jp.tj);
#pragma unroll
for(int k=0; k<3; k++){
pos[k].x = jp.pos[k].x;
pos[k].y = jp.pos[k].y + dt*(jp.vel[k] + dt*(jp.acc2[k] + dt*jp.jrk6[k]));
vel[k] = jp.vel[k] + (2.f*dt)*(jp.acc2[k] + (1.5f*dt)*(jp.jrk6[k]));
}
mass = jp.mass;
id = jp.id ;
}
};
struct Interaction{
float3 acc;
float3 jrk;
float pot;
__device__ Interaction(
const Iparticle &ip,
const Jppred &jp){
const float dx = float2_diff(jp.pos[0], ip.pos[0]);
const float dy = float2_diff(jp.pos[1], ip.pos[1]);
const float dz = float2_diff(jp.pos[2], ip.pos[2]);
const float dvx = jp.vel[0] - ip.vel[0];
const float dvy = jp.vel[1] - ip.vel[1];
const float dvz = jp.vel[2] - ip.vel[2];
#ifdef EPS_RED
float r2, tmp_eps2;
tmp_eps2 = ip.eps2; // default value ----> eps = 1e-5 !!!
// larger ----> 10 x eps if mass > 1e-6 (i.e. high mass part.)
if( (jp.id < 99998) || (ip.id < 99998) ) tmp_eps2 *= 100.0f;
// if i or j is a BH's ----> 1e-2 * eps
if( (ip.id == 999998) || (jp.id == 999998) || (ip.id == 999999) || (jp.id == 999999) )
{
r2 = EPS_MUL*tmp_eps2 + dx*dx + dy*dy + dz*dz;
}
else
{
r2 = tmp_eps2 + dx*dx + dy*dy + dz*dz;
}
#else
const float r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
#endif
/*
#ifdef EPS_RED
float r2;
// if i or j is a BH's
if( (ip.id == 0) || (jp.id == 0) || (ip.id == 1) || (jp.id == 1) )
{
r2 = EPS_MUL*ip.eps2 + dx*dx + dy*dy + dz*dz;
}
else
{
r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
}
#else
const float r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
#endif
*/
// const float r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
const float rv = dx*dvx + dy*dvy + dz*dvz;
const float rinv1 = (jp.id == ip.id) ? 0.0f
: rsqrtf(r2);
const float rinv2 = rinv1 * rinv1;
const float mrinv1 = jp.mass * rinv1;
const float mrinv3 = mrinv1 * rinv2;
const float alpha = -3.f * rv * rinv2;
acc.x = mrinv3 * dx;
acc.y = mrinv3 * dy;
acc.z = mrinv3 * dz;
jrk.x = mrinv3 * (dvx + alpha * dx);
jrk.y = mrinv3 * (dvy + alpha * dy);
jrk.z = mrinv3 * (dvz + alpha * dz);
pot = mrinv1; // use positive definition here
}
__device__ void set_neib(int &dst) const{
// do nothing
}
};
struct Interaction_NB{
float3 acc;
float3 jrk;
float pot;
float nb_rinv;
int jid;
bool is_neib;
__device__ Interaction_NB(
const Iparticle &ip,
const Jppred &jp)
{
const float dx = float2_diff(jp.pos[0], ip.pos[0]);
const float dy = float2_diff(jp.pos[1], ip.pos[1]);
const float dz = float2_diff(jp.pos[2], ip.pos[2]);
const float dvx = jp.vel[0] - ip.vel[0];
const float dvy = jp.vel[1] - ip.vel[1];
const float dvz = jp.vel[2] - ip.vel[2];
// if( (jp.id) > 1 && (jp.id < 200000) ) ip.eps2 *= 100;
const float r2 = ip.eps2 + dx*dx + dy*dy + dz*dz;
const float rv = dx*dvx + dy*dvy + dz*dvz;
const float rinv1 = (jp.id == ip.id) ? 0.0f
: rsqrtf(r2);
const float rinv2 = rinv1 * rinv1;
const float mrinv1 = jp.mass * rinv1;
const float mrinv3 = mrinv1 * rinv2;
const float alpha = -3.f * rv * rinv2;
acc.x = mrinv3 * dx;
acc.y = mrinv3 * dy;
acc.z = mrinv3 * dz;
jrk.x = mrinv3 * (dvx + alpha * dx);
jrk.y = mrinv3 * (dvy + alpha * dy);
jrk.z = mrinv3 * (dvz + alpha * dz);
pot = mrinv1; // use positive definition here
nb_rinv = rinv1;
jid = jp.id;
is_neib = (r2 < ip.h2) && (jp.id != ip.id);
}
__device__ void set_neib(int &dst) const{
if(is_neib) dst = jid;
}
};
struct Force{
float2 acc[3]; // 6
float jrk[3]; // 9
float2 pot; // 11
int nnb_id; // 12 ID of nearest neighbor
float nnb_rinv; // 13 rinv of nearest neighbor
int num_neib; // 14
__host__ void write(
double h_acc[],
double h_jrk[],
double &h_pot,
int &h_nnb_id,
int &h_num_neib) const
{
for(int k=0; k<3; k++){
h_acc[k] = float2_todouble(acc[k]);
h_jrk[k] = double(jrk[k]);
}
h_pot = - float2_todouble(pot);
h_nnb_id = nnb_id;
h_num_neib = num_neib;
}
__device__ void clear()
{
#pragma unroll
for(int k=0; k<3; k++){
acc[k] = make_float2(0.0f, 0.0f);
jrk[k] = 0.0f;
}
pot = make_float2(0.0f, 0.0f);
nnb_id = -1;
nnb_rinv = 0.0f;
num_neib = 0;
}
__device__ void check_overflow(){
if(num_neib > NB_MAX) num_neib = -1;
}
// for the redction kernel
__device__ void operator+=(
const Force &fo)
{
#pragma unroll
for(int k=0; k<3; k++){
acc[k] = float2_accum(acc[k], fo.acc[k].x);
acc[k] = float2_accum(acc[k], fo.acc[k].y);
jrk[k] += fo.jrk[k];
}
pot = float2_accum(pot, fo.pot.x);
pot = float2_accum(pot, fo.pot.y);
if(num_neib>=0 && fo.num_neib>=0){
num_neib += fo.num_neib;
}else{ // overflow
num_neib = -1;
}
// nearest neighbor
if(nnb_rinv < fo.nnb_rinv){
nnb_id = fo.nnb_id;
nnb_rinv = fo.nnb_rinv;
}
}
// for the gravity kernel
__device__ void operator+=(
const Interaction &fo)
{
acc[0] = float2_accum(acc[0], fo.acc.x);
acc[1] = float2_accum(acc[1], fo.acc.y);
acc[2] = float2_accum(acc[2], fo.acc.z);
pot = float2_accum(pot, fo.pot);
jrk[0] += fo.jrk.x;
jrk[1] += fo.jrk.y;
jrk[2] += fo.jrk.z;
}
__device__ void operator+=(
const Interaction_NB &fo)
{
acc[0] = float2_accum(acc[0], fo.acc.x);
acc[1] = float2_accum(acc[1], fo.acc.y);
acc[2] = float2_accum(acc[2], fo.acc.z);
jrk[0] += fo.jrk.x;
jrk[1] += fo.jrk.y;
jrk[2] += fo.jrk.z;
pot = float2_accum(pot, fo.pot);
// neighbor list counter
if(fo.is_neib) num_neib++;
// nearest neighbor
if(nnb_rinv < fo.nb_rinv){
nnb_id = fo.jid;
nnb_rinv = fo.nb_rinv;
}
}
};