756 lines
29 KiB
C++
756 lines
29 KiB
C++
#define ETA_S_CORR 4.0
|
|
#define ETA_BH_CORR 4.0
|
|
|
|
#define DTMAXPOWER -3.0
|
|
#define DTMINPOWER -36.0
|
|
|
|
#include <algorithm>
|
|
#include <chrono>
|
|
#include <math.h>
|
|
#include <mpi.h>
|
|
#include <numeric>
|
|
#include <stdexcept>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
#include <time.h>
|
|
|
|
#include "black_holes.h"
|
|
#include "config.h"
|
|
#include "double3.h"
|
|
#include "external.h"
|
|
#include "grape6.h"
|
|
#include "io.h"
|
|
|
|
#ifdef ETICS
|
|
#include "grapite.h"
|
|
#endif
|
|
|
|
Config *config;
|
|
//chrono::steady_clock::time_point walltime_start;
|
|
|
|
namespace std::chrono {
|
|
struct Timer {
|
|
void start()
|
|
{
|
|
t_start = steady_clock::now();
|
|
}
|
|
void stop()
|
|
{
|
|
t_stop = steady_clock::now();
|
|
time = duration_cast<nanoseconds>(t_stop - t_start).count()*1E-9;
|
|
}
|
|
double time; // seconds
|
|
steady_clock::time_point t_start, t_stop;
|
|
};
|
|
}
|
|
std::chrono::Timer timer;
|
|
|
|
class Calc_self_grav {
|
|
public:
|
|
Calc_self_grav(const int N, const int n_loc, const int clusterid, const int npipe, const double eps)
|
|
: g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps)
|
|
{
|
|
h2.assign(N, eps2);
|
|
pot_loc.resize(N);
|
|
acc_loc.resize(N);
|
|
jrk_loc.resize(N);
|
|
}
|
|
void operator()(const double t, const int n_act, std::vector<int>& ind_act, const double3 x_act[], const double3 v_act[],
|
|
std::vector<double>& pot, double3 acc[], double3 jrk[])
|
|
{
|
|
g6_set_ti(clusterid, t);
|
|
for (int i=0; i<n_act; i+=npipe) {
|
|
int nn = npipe;
|
|
if (n_act-i < npipe) nn = n_act - i;
|
|
//TODO any way we can clean up this ugly casting?
|
|
g6calc_firsthalf(clusterid, n_loc, nn, ind_act.data()+i, (double(*)[3])&x_act[i], (double(*)[3])&v_act[i], (double(*)[3])&acc_loc[i], (double(*)[3])&jrk_loc[i], &pot_loc[i], eps2, h2.data());
|
|
g6calc_lasthalf( clusterid, n_loc, nn, ind_act.data()+i, (double(*)[3])&x_act[i], (double(*)[3])&v_act[i], eps2, h2.data(), (double(*)[3])&acc_loc[i], (double(*)[3])&jrk_loc[i], &pot_loc[i]);
|
|
g6_calls++;
|
|
} /* i */
|
|
/* Reduce the "global" vectors from "local" on all the nodes */
|
|
MPI_Allreduce(pot_loc.data(), pot.data(), n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
MPI_Allreduce(acc_loc.data(), acc, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
MPI_Allreduce(jrk_loc.data(), jrk, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
}
|
|
double g6_calls;
|
|
private:
|
|
int n_loc, clusterid, npipe;
|
|
double eps2;
|
|
std::vector<double> h2;
|
|
std::vector<double> pot_loc; // the _loc variables are for this node only.
|
|
std::vector<double3> acc_loc, jrk_loc;
|
|
};
|
|
|
|
void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n, double3 *x, double3 *v, double *pot, double3 *acc, double3* jrk)
|
|
// TODO should just be a class that has this pointer array as a member
|
|
{
|
|
std::fill(pot, pot+n, 0.);
|
|
for (auto component : external_gravity_components) {
|
|
if (component->is_active)
|
|
component->apply(n, x, v, pot, acc, jrk);
|
|
}
|
|
}
|
|
|
|
void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, int N, double m[], double3 x[], double3 v[], const std::vector<double>& pot, double pot_ext[])
|
|
{
|
|
double E_pot = 0;
|
|
for (int i=0; i<N; i++) E_pot += m[i]*pot[i];
|
|
E_pot *= 0.5;
|
|
|
|
double E_kin = 0;
|
|
for (int i=0; i<N; i++) E_kin += m[i]*v[i].norm2();
|
|
E_kin *= 0.5;
|
|
|
|
double E_pot_ext = 0;
|
|
for (int i=0; i<N; i++) E_pot_ext += m[i]*pot_ext[i];
|
|
|
|
double m_tot = 0;
|
|
double3 xcm = {0, 0, 0};
|
|
double3 vcm = {0, 0, 0};
|
|
for (int i=0; i<N; i++) {
|
|
m_tot += m[i];
|
|
xcm += m[i] * x[i];
|
|
vcm += m[i] * v[i];
|
|
}
|
|
xcm /= m_tot;
|
|
vcm /= m_tot;
|
|
double rcm_mod = xcm.norm();
|
|
double vcm_mod = vcm.norm();
|
|
|
|
double3 mom = {0, 0, 0};
|
|
for (int i=0; i<N; i++) {
|
|
mom[0] += m[i] * (x[i][1]* v[i][2] - x[i][2]*v[i][1]);
|
|
mom[1] += m[i] * (x[i][2]* v[i][0] - x[i][0]*v[i][2]);
|
|
mom[2] += m[i] * (x[i][0]* v[i][1] - x[i][1]*v[i][0]);
|
|
}
|
|
|
|
timer.stop();
|
|
|
|
double E_tot = E_pot + E_kin + E_pot_ext;
|
|
|
|
static double E_tot_prev;
|
|
if (timesteps == 0.0) E_tot_prev = E_tot;
|
|
|
|
double DE_tot = E_tot - E_tot_prev;
|
|
|
|
/* This is the only output to screen */
|
|
printf("%.3E %.3E % .4E %.4E % .4E % .4E % .4E %.2E\n",
|
|
time_cur, timesteps,
|
|
E_pot, E_kin, E_pot_ext, E_tot, DE_tot,
|
|
timer.time);
|
|
|
|
fflush(stdout);
|
|
|
|
auto out = fopen("contr.dat", "a");
|
|
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E\n",
|
|
time_cur, timesteps, n_act_sum, g6_calls,
|
|
E_pot, E_kin, E_pot_ext,
|
|
E_tot, DE_tot,
|
|
rcm_mod, vcm_mod,
|
|
mom[0], mom[1], mom[2],
|
|
timer.time);
|
|
fclose(out);
|
|
|
|
E_tot_prev = E_tot;
|
|
}
|
|
|
|
class Active_search {
|
|
// TODO you can add pointers to t and dt at the constructor, no point giving them at get_minimum_time but without the size.
|
|
public:
|
|
Active_search(const int myRank, const int n_proc, const int n_loc, const int N)
|
|
: myRank(myRank), n_proc(n_proc), n_loc(n_loc), N(N)
|
|
{
|
|
ind_act_loc = new int[n_loc];
|
|
}
|
|
~Active_search() { delete[] ind_act_loc; };
|
|
double get_minimum_time(const double t[], const double dt[])
|
|
{
|
|
double min_t_loc, min_t;
|
|
#ifdef ETICS
|
|
if (config->grapite_active_search) {
|
|
min_t_loc = grapite_get_minimum_time();
|
|
} else
|
|
#endif
|
|
{
|
|
min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc];
|
|
for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) {
|
|
double tmp = t[j] + dt[j];
|
|
if (tmp < min_t_loc) min_t_loc = tmp;
|
|
}
|
|
}
|
|
/* Reduce the "global" min_t from min_t_loc "local" on all processors) */
|
|
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
|
return min_t;
|
|
}
|
|
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int& n_act)
|
|
{
|
|
#ifdef ETICS
|
|
if (config->grapite_active_search) {
|
|
int n_act_loc;
|
|
grapite_active_search(min_t, ind_act_loc, &n_act_loc);
|
|
if (myRank > 0)
|
|
for (int i=0; i<n_act_loc; i++)
|
|
ind_act_loc[i] += myRank*n_loc;
|
|
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
|
|
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
|
|
n_act = n_act_arr[0];
|
|
for (int i=1; i<n_proc; i++)
|
|
n_act += n_act_arr[i];
|
|
displs[0] = 0;
|
|
for (int i=1; i<n_proc; i++)
|
|
displs[i]=displs[i-1]+n_act_arr[i-1];
|
|
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
|
|
} else
|
|
#endif
|
|
{
|
|
n_act = 0;
|
|
for (int i=0; i<N; i++) {
|
|
if (t[i]+dt[i] == min_t) ind_act[n_act++] = i;
|
|
} /* i */
|
|
}
|
|
}
|
|
private:
|
|
int myRank, n_proc, n_loc, N;
|
|
int *ind_act_loc;
|
|
};
|
|
|
|
inline void calc_high_derivatives(const double dt_tmp, const double3 a_old, const double3 a_new, const double3 a1_old, const double3 a1_new, double3& a2, double3& a3)
|
|
{
|
|
double dtinv = 1/dt_tmp;
|
|
double dt2inv = dtinv*dtinv;
|
|
double dt3inv = dt2inv*dtinv;
|
|
|
|
double3 a0mia1 = a_old-a_new;
|
|
double3 ad04plad12 = 4*a1_old + 2*a1_new;
|
|
double3 ad0plad1 = a1_old + a1_new;
|
|
|
|
a2 = -6*a0mia1*dt2inv - ad04plad12*dtinv;
|
|
a3 = 12*a0mia1*dt3inv + 6*ad0plad1*dt2inv;
|
|
}
|
|
|
|
inline void corrector(const double dt_tmp, const double3 a2, const double3 a3, double3& x, double3& v)
|
|
{
|
|
double dt3over6 = dt_tmp*dt_tmp*dt_tmp/6.0;
|
|
double dt4over24 = dt3over6*dt_tmp/4.0;
|
|
double dt5over120 = dt4over24*dt_tmp/5.0;
|
|
|
|
x += dt4over24*a2 + dt5over120*a3;
|
|
v += dt3over6*a2 + dt4over24*a3;
|
|
}
|
|
|
|
inline double aarseth_step(const double eta, const double dt, const double3 a, const double3 a1, const double3 a2, const double3 a3)
|
|
{
|
|
double a1abs = a.norm();
|
|
double adot1abs = a1.norm();
|
|
double3 a2dot1 = a2 + dt*a3;
|
|
double a2dot1abs = a2dot1.norm();
|
|
double a3dot1abs = a3.norm();
|
|
return sqrt(eta*(a1abs*a2dot1abs+adot1abs*adot1abs)/(adot1abs*a3dot1abs+a2dot1abs*a2dot1abs));
|
|
}
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
timer.start();
|
|
|
|
double timesteps=0.0, n_act_sum=0.0;
|
|
|
|
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
|
|
|
|
/* INIT the rand() !!! */
|
|
srand(19640916); /* it is just my birthday :-) */
|
|
|
|
/* Init MPI */
|
|
MPI_Init(&argc, &argv);
|
|
|
|
/* Define the total number of processors and the Rank of each processors */
|
|
int n_proc, myRank;
|
|
const int rootRank = 0;
|
|
MPI_Comm_size(MPI_COMM_WORLD, &n_proc);
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
|
|
|
|
/* Define the processors names */
|
|
int name_proc;
|
|
char processor_name[MPI_MAX_PROCESSOR_NAME];
|
|
MPI_Get_processor_name(processor_name, &name_proc);
|
|
|
|
/* Print the Rank and the names of processors */
|
|
printf("Rank of the processor %03d on %s \n", myRank, processor_name);
|
|
|
|
/* read the input parameters */
|
|
config = new Config("phigrape.conf");
|
|
|
|
int diskstep, N;
|
|
double time_cur;
|
|
// The memory for m, x, and v is allocated inside h5_read or ascii_read
|
|
double *m;
|
|
double3 *x, *v;
|
|
if (is_hdf5(config->input_file_name)) {
|
|
#ifndef HAS_HDF5
|
|
fprintf(stderr, "ERROR: input file is in HDF5 format, but the code was compiled without HDF5 support\n");
|
|
return -1;
|
|
#endif
|
|
h5_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v);
|
|
}
|
|
else
|
|
ascii_read(config->input_file_name, diskstep, N, time_cur, &m, &x, &v);
|
|
|
|
std::vector<int> ind(N);
|
|
std::iota(begin(ind), end(ind), 0);
|
|
double3 *a = new double3[N], *adot = new double3[N];
|
|
std::vector<double> pot(N);
|
|
double *pot_ext = new double[N], *t = new double[N], *dt = new double[N];
|
|
|
|
/* data for active particles */
|
|
int n_act;
|
|
std::vector<int> ind_act(N);
|
|
std::vector<double> pot_act_new(N);
|
|
double *pot_act_ext = new double[N];
|
|
double3 *x_act_new = new double3[N], *v_act_new = new double3[N],
|
|
*a_act_new = new double3[N], *adot_act_new = new double3[N];
|
|
|
|
double eps = config->eps;
|
|
double eta = config->eta;
|
|
double t_end = config->t_end;
|
|
double dt_disk = config->dt_disk;
|
|
double dt_contr = config->dt_contr;
|
|
double dt_bh = config->dt_bh;
|
|
|
|
if (myRank == rootRank) {
|
|
printf("\n");
|
|
printf("Begin the calculation of phi-GRAPE program on %03d processors\n", n_proc);
|
|
printf("\n");
|
|
printf("N = %07d \t eps = %.6E \n", N, eps);
|
|
printf("t_beg = %.6E \t t_end = %.6E \n", time_cur, t_end);
|
|
printf("dt_disk = %.6E \t dt_contr = %.6E \n", dt_disk, dt_contr);
|
|
printf("dt_bh = %.6E \n", dt_bh);
|
|
printf("eta = %.6E \n", eta);
|
|
printf("\n");
|
|
|
|
if ((diskstep == 0) && (time_cur == 0)) {
|
|
FILE *out = fopen("contr.dat", "w");
|
|
fclose(out);
|
|
if (config->live_smbh_output && (config->live_smbh_count > 0)) {
|
|
out = fopen("bh.dat", "w");
|
|
fclose(out);
|
|
}
|
|
if ((config->live_smbh_neighbor_output) && (config->live_smbh_count > 0)) {
|
|
out = fopen("bh_neighbors.dat", "w");
|
|
fclose(out);
|
|
}
|
|
}
|
|
} /* if (myRank == rootRank) */
|
|
|
|
double normalization_mass=1, normalization_length=1, normalization_velocity=1;
|
|
if (config->ext_units_physical) {
|
|
normalization_mass = 1/config->unit_mass;
|
|
normalization_length = 1000/config->unit_length;
|
|
normalization_velocity = 1.52484071426404437233e+01*sqrt(config->unit_length/config->unit_mass);
|
|
}
|
|
Plummer ext_bulge(config->ext_m_bulge*normalization_mass, config->ext_b_bulge*normalization_length);
|
|
Miyamoto_Nagai ext_disk(config->ext_m_disk*normalization_mass, config->ext_a_disk*normalization_length, config->ext_b_disk*normalization_length);
|
|
Plummer ext_halo_plummer(config->ext_m_halo_plummer*normalization_mass, config->ext_b_halo_plummer*normalization_length);
|
|
Logarithmic_halo ext_log_halo(config->ext_log_halo_v*normalization_velocity, config->ext_log_halo_r*normalization_length);
|
|
Dehnen ext_dehnen(config->ext_dehnen_m*normalization_mass, config->ext_dehnen_r*normalization_length, config->ext_dehnen_gamma);
|
|
|
|
std::vector<External_gravity*> external_gravity_components;
|
|
external_gravity_components.push_back(&ext_bulge);
|
|
external_gravity_components.push_back(&ext_disk);
|
|
external_gravity_components.push_back(&ext_halo_plummer);
|
|
external_gravity_components.push_back(&ext_log_halo);
|
|
external_gravity_components.push_back(&ext_dehnen);
|
|
bool has_external_gravity = false;
|
|
for (auto component : external_gravity_components) {
|
|
if (component->is_active) {
|
|
has_external_gravity = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (has_external_gravity) printf("External Potential: \n\n");
|
|
if (ext_bulge.is_active) printf("m_bulge = %.4E b_bulge = %.4E\n", config->ext_m_bulge*normalization_mass, config->ext_b_bulge*normalization_length);
|
|
if (ext_disk.is_active) printf("m_disk = %.4E a_disk = %.4E b_disk = %.4E\n", config->ext_m_disk*normalization_mass, config->ext_a_disk*normalization_length, config->ext_b_disk*normalization_length);
|
|
if (ext_halo_plummer.is_active) printf("m_halo = %.4E b_halo = %.4E\n", config->ext_m_halo_plummer*normalization_mass, config->ext_b_halo_plummer*normalization_length);
|
|
if (ext_log_halo.is_active) printf("v_halo = %.6E r_halo = %.6E \n", config->ext_log_halo_v*normalization_velocity, config->ext_log_halo_r*normalization_length);
|
|
if (ext_dehnen.is_active) printf("m_ext = %.6E r_ext = %.6E \t g_ext = %.3E \n", config->ext_dehnen_m*normalization_mass, config->ext_dehnen_r*normalization_length, config->ext_dehnen_gamma);
|
|
printf("\n");
|
|
fflush(stdout);
|
|
|
|
double eta_bh = eta/ETA_BH_CORR;
|
|
|
|
const double dt_min = pow(2, DTMINPOWER);
|
|
const double dt_max = std::min({dt_disk, dt_contr, pow(2, DTMAXPOWER)});
|
|
|
|
double t_disk = dt_disk*(1+floor(time_cur/dt_disk));
|
|
double t_contr = dt_contr*(1+floor(time_cur/dt_contr));
|
|
double t_bh = dt_bh*(1+floor(time_cur/dt_bh));
|
|
|
|
if (myRank == rootRank) {
|
|
printf("t_disk = %.6E t_contr = %.6E t_bh = %.6E \n", t_disk, t_contr, t_bh);
|
|
printf("\n");
|
|
fflush(stdout);
|
|
} /* if (myRank == rootRank) */
|
|
|
|
std::fill(t, t+N, time_cur);
|
|
std::fill(dt, dt+N, dt_min);
|
|
|
|
/* some local settings for G6a boards */
|
|
int clusterid, numGPU;
|
|
if (config->devices_per_node==0) {
|
|
MPI_Comm shmcomm;
|
|
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
|
|
MPI_Comm_size(shmcomm, &numGPU);
|
|
MPI_Comm_rank(shmcomm, &clusterid);
|
|
} else {
|
|
numGPU = config->devices_per_node;
|
|
clusterid = myRank % numGPU;
|
|
}
|
|
printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid);
|
|
fflush(stdout);
|
|
|
|
/* init the local GRAPEs */
|
|
g6_open(clusterid);
|
|
int npipe = g6_npipes();
|
|
g6_set_tunit(51);
|
|
g6_set_xunit(51);
|
|
|
|
#ifdef ETICS
|
|
grapite_set_dev_exec_threshold(config->grapite_dev_exec_threshold);
|
|
#endif
|
|
|
|
int n_loc = N/n_proc;
|
|
Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
|
|
Active_search active_search(myRank, n_proc, n_loc, N);
|
|
Black_hole_physics black_hole_physics;
|
|
if (config->live_smbh_count == 1)
|
|
black_hole_physics = Black_hole_physics(m[0], 0, myRank, rootRank);
|
|
else if (config->live_smbh_count == 2)
|
|
black_hole_physics = Black_hole_physics(m[0], m[1], myRank, rootRank);
|
|
if (config->binary_smbh_pn) {
|
|
black_hole_physics.set_post_newtonian(config->pn_c, config->pn_usage.data());
|
|
if (config->pn_usage[6]) black_hole_physics.set_spins(config->smbh1_spin.data(), config->smbh2_spin.data());
|
|
}
|
|
black_hole_physics.set_softening(eps, config->live_smbh_custom_eps);
|
|
|
|
Binary_smbh_influence_sphere_output binary_smbh_influence_sphere_output(config->binary_smbh_influence_radius_factor, N, m, x, v, pot, dt);
|
|
|
|
Write_bh_nb_data write_bh_nb_data(config->live_smbh_neighbor_number, config->live_smbh_count, N, m, x, v);
|
|
|
|
#ifdef ETICS
|
|
grapite_read_particle_tags(N, config->grapite_mask_file_name.c_str(), myRank, n_loc);
|
|
grapite_set_dt_exp(config->dt_scf);
|
|
grapite_set_t_exp(time_cur);
|
|
#endif
|
|
|
|
/* load the nj particles to the G6 */
|
|
for (int k=0; k<n_loc; k++) {
|
|
int j = k + myRank*n_loc;
|
|
g6_set_j_particle(clusterid, k, ind[j], t[j], dt[j], m[j], zeros, zeros, zeros, v[j], x[j]);
|
|
} /* k */
|
|
|
|
#ifdef ETICS
|
|
double etics_length_scale;
|
|
if (myRank == rootRank) etics_length_scale = grapite_get_length_scale(N, m, (double(*)[3])x, (double(*)[3])v); // We don't want all ranks to do it, because they need to write a file and might confuse each other
|
|
MPI_Bcast(&etics_length_scale, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
grapite_set_length_scale(etics_length_scale);
|
|
|
|
int grapite_cep_index = grapite_get_cep_index();
|
|
if (grapite_cep_index >= 0) {
|
|
double3 xcm, vcm, xdc, vdc;
|
|
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
|
x[grapite_cep_index] = xdc;
|
|
v[grapite_cep_index] = vdc;
|
|
grapite_update_cep(time_cur, xdc, vdc, zeros, zeros);
|
|
}
|
|
#endif
|
|
|
|
/* define the all particles as a active on all the processors for the first time grav calc. */
|
|
calc_self_grav(time_cur, N, ind, x, v, pot, a, adot);
|
|
|
|
if (config->live_smbh_count == 2) {
|
|
black_hole_physics.set_xv(x[0], x[1], v[0], v[1]);
|
|
if (config->live_smbh_custom_eps >= 0) black_hole_physics.adjust_softening(pot[0], pot[1], a[0], a[1], adot[0], adot[1]);
|
|
if (config->binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt[0], a[0], a[1], adot[0], adot[1]);
|
|
}
|
|
|
|
calc_ext_grav(external_gravity_components, N, x, v, pot_ext, a, adot);
|
|
|
|
/* Energy control... */
|
|
if (myRank == rootRank) energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext);
|
|
|
|
#ifdef ETICS
|
|
if (config->etics_dump_coeffs && (diskstep==0)) {
|
|
char out_fname[256];
|
|
sprintf(out_fname, "coeffs.%06d.%02d.dat", 0, myRank);
|
|
grapite_dump(out_fname, 2);
|
|
}
|
|
|
|
if (grapite_cep_index >= 0) {
|
|
double3 xcm, vcm, xdc, vdc;
|
|
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
|
x[grapite_cep_index] = xdc;
|
|
v[grapite_cep_index] = vdc;
|
|
grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]);
|
|
}
|
|
#endif
|
|
|
|
/* Define initial timestep for all particles on all nodes */
|
|
for (int j=0; j<N; j++) {
|
|
double a2_mod = a[j].norm2();
|
|
double adot2_mod = adot[j].norm2();
|
|
|
|
double dt_tmp, eta_s = eta/ETA_S_CORR;
|
|
if (adot2_mod==0) dt_tmp = eta_s; // That's weird, when will we have such a case?
|
|
else dt_tmp = eta_s*sqrt(a2_mod/adot2_mod);
|
|
|
|
int power = log(dt_tmp)/log(2.0) - 1;
|
|
|
|
dt_tmp = pow(2.0, (double)power);
|
|
|
|
if (dt_tmp < dt_min) dt_tmp = dt_min;
|
|
if (dt_tmp > dt_max) dt_tmp = dt_max;
|
|
|
|
dt[j] = dt_tmp;
|
|
|
|
if (config->dt_min_warning && (myRank == 0)) {
|
|
if (dt[j] == dt_min) {
|
|
printf("!!! Warning0: dt = dt_min = %.6E \t ind = %07d \n", dt[j], ind[j]);
|
|
fflush(stdout);
|
|
}
|
|
}
|
|
} /* j */
|
|
|
|
if (config->live_smbh_count > 0) {
|
|
double min_dt = *std::min_element(dt, dt+N);
|
|
for (int i=0; i<config->live_smbh_count; i++) dt[i] = min_dt;
|
|
}
|
|
|
|
/* load the new values for particles to the local GRAPEs */
|
|
for (int k=0; k<n_loc; k++) {
|
|
int j = k + myRank*n_loc;
|
|
g6_set_j_particle(clusterid, k, ind[j], t[j], dt[j], m[j], zeros, adot[j]*(1./6.), a[j]*0.5, v[j], x[j]);
|
|
} /* k */
|
|
|
|
if (myRank == rootRank) {
|
|
/* Write BH data... */
|
|
if (config->live_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt);
|
|
|
|
/* Write BH NB data... */
|
|
if (config->live_smbh_neighbor_output) write_bh_nb_data(time_cur);
|
|
|
|
} /* if (myRank == rootRank) */
|
|
|
|
timesteps = 0.0; // Why won't those two be long long instead of double + should include the zeroth step
|
|
n_act_sum = 0.0;
|
|
|
|
/* The main integration loop */
|
|
while (time_cur <= t_end) {
|
|
|
|
/* Define the minimal time and the active particles on all the nodes */
|
|
double min_t = active_search.get_minimum_time(t, dt);
|
|
|
|
/* Get indices of all particles that will be active in this bunch */
|
|
active_search.get_active_indices(min_t, t, dt, ind_act.data(), n_act);
|
|
|
|
/* Find the BH(s) indices in the active list */
|
|
int i_bh1=0, i_bh2=1;
|
|
#ifdef ETICS
|
|
int n_bh = config->live_smbh_count;
|
|
if (config->grapite_active_search && (n_bh>0)) {
|
|
int act_def_grapite_bh_count = 0;
|
|
int i_bh[n_bh];
|
|
for (int i=0; i<n_act; i++) {
|
|
if (ind_act[i]<n_bh) {
|
|
i_bh[ind_act[i]] = i;
|
|
if (++act_def_grapite_bh_count == n_bh) break;
|
|
}
|
|
}
|
|
i_bh1 = i_bh[0];
|
|
if (n_bh == 2) i_bh2 = i_bh[1];
|
|
}
|
|
#endif
|
|
|
|
/* predict the active particles positions etc... on all the nodes */
|
|
for (int i=0; i<n_act; i++) {
|
|
int j_act = ind_act[i];
|
|
double dt = min_t - t[j_act];
|
|
double dt2half = 0.5*dt*dt;
|
|
double dt3over6 = (1./3.)*dt*dt2half;
|
|
x_act_new[i] = x[j_act] + v[j_act]*dt + a[j_act]*dt2half + adot[j_act]*dt3over6;
|
|
v_act_new[i] = v[j_act] + a[j_act]*dt + adot[j_act]*dt2half;
|
|
} /* i */
|
|
|
|
/* Calculate gravity on active particles */
|
|
calc_self_grav(min_t, n_act, ind_act, x_act_new, v_act_new, pot_act_new, a_act_new, adot_act_new);
|
|
|
|
if (config->live_smbh_count == 2) {
|
|
black_hole_physics.set_xv(x_act_new[i_bh1], x_act_new[i_bh2], v_act_new[i_bh1], v_act_new[i_bh2]);
|
|
if (config->live_smbh_custom_eps >= 0) black_hole_physics.adjust_softening(pot_act_new[i_bh1], pot_act_new[i_bh2], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]);
|
|
if (config->binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt[i_bh1], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]);
|
|
}
|
|
|
|
/* Calculate gravity on active particles due to external forces */
|
|
calc_ext_grav(external_gravity_components, n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new);
|
|
|
|
/* correct the active particles positions etc... on all the nodes */
|
|
double min_dt = dt_max;
|
|
for (int i=0; i<n_act; i++) {
|
|
int j_act = ind_act[i];
|
|
double dt_tmp = min_t - t[j_act];
|
|
|
|
double3 a2, a3;
|
|
calc_high_derivatives(dt_tmp, a[j_act], a_act_new[i], adot[j_act], adot_act_new[i], a2, a3);
|
|
|
|
corrector(dt_tmp, a2, a3, x_act_new[i], v_act_new[i]);
|
|
|
|
//TODO make beautiful
|
|
double eta_curr;
|
|
if ((config->live_smbh_count > 0) && (ind_act[i] < config->live_smbh_count)) eta_curr = eta_bh;
|
|
else eta_curr = eta;
|
|
|
|
double dt_new = aarseth_step(eta_curr, dt_tmp, a_act_new[i], adot_act_new[i], a2, a3);
|
|
|
|
//TODO the below should be moved to a function
|
|
if (dt_new < dt_min) dt_tmp = dt_min;
|
|
if ((dt_new < dt_tmp) && (dt_new > dt_min)) {
|
|
int power = log(dt_new)/log(2.0) - 1;
|
|
dt_tmp = pow(2.0, (double)power); // TODO why is this casting needed here?
|
|
}
|
|
|
|
if ((dt_new > 2*dt_tmp) && (fmod(min_t, 2*dt_tmp) == 0) && (2*dt_tmp <= dt_max)) {
|
|
dt_tmp *= 2;
|
|
}
|
|
|
|
if (config->dt_min_warning && (myRank == 0)) {
|
|
if (dt_tmp == dt_min) {
|
|
printf("!!! Warning1: dt_act = dt_min = %.6E \t ind_act = %07d \n", dt_tmp, ind_act[i]);
|
|
fflush(stdout);
|
|
}
|
|
}
|
|
if (dt_tmp < min_dt) min_dt = dt_tmp;
|
|
|
|
x[j_act] = x_act_new[i];
|
|
v[j_act] = v_act_new[i];
|
|
t[j_act] = min_t;
|
|
dt[j_act] = dt_tmp;
|
|
pot[j_act] = pot_act_new[i];
|
|
pot_ext[j_act] = pot_act_ext[i];
|
|
a[j_act] = a_act_new[i];
|
|
adot[j_act] = adot_act_new[i];
|
|
} /* i */
|
|
|
|
/* define the min. dt over all the act. part. and set it also for the BH... */
|
|
if (config->live_smbh_count > 0) {
|
|
if (config->live_smbh_count>=1) dt[0] = min_dt;
|
|
if (config->live_smbh_count==2) dt[1] = min_dt;
|
|
}
|
|
|
|
if (config->binary_smbh_influence_sphere_output && (myRank == rootRank)) {
|
|
//TODO clean up this mass. We don't want to have all these _act arrays; they are only needed for this lame function.
|
|
binary_smbh_influence_sphere_output(ind_act, n_act, timesteps, time_cur);
|
|
}
|
|
|
|
/* load the new values for active particles to the local GRAPE's */
|
|
for (int i=0; i<n_act; i++) {
|
|
#ifdef ETICS
|
|
if (ind_act[i] == grapite_cep_index) grapite_update_cep(t[grapite_cep_index], x[grapite_cep_index], v[grapite_cep_index], a[grapite_cep_index], adot[grapite_cep_index]); // All ranks should do it.
|
|
#endif
|
|
int cur_rank = ind_act[i]/n_loc;
|
|
if (myRank == cur_rank) {
|
|
int j_act = ind_act[i];
|
|
int address = ind_act[i] - myRank*n_loc;
|
|
g6_set_j_particle(clusterid, address, ind_act[i], t[j_act], dt[j_act], m[j_act], zeros, adot[j_act]*(1./6.), a[j_act]*0.5, v[j_act], x[j_act]);
|
|
|
|
} /* if (myRank == cur_rank) */
|
|
} /* i */
|
|
|
|
/* Current time set to min_t */
|
|
time_cur = min_t;
|
|
timesteps += 1.0;
|
|
n_act_sum += n_act;
|
|
|
|
if (time_cur >= t_bh) {
|
|
if (myRank == rootRank) {
|
|
/* Write BH data... */
|
|
if (config->live_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt);
|
|
|
|
/* Write BH NB data... */
|
|
if (config->live_smbh_neighbor_output) write_bh_nb_data(time_cur);
|
|
|
|
} /* if (myRank == rootRank) */
|
|
|
|
t_bh += dt_bh;
|
|
} /* if (time_cur >= t_bh) */
|
|
|
|
if (time_cur >= t_contr) {
|
|
if (myRank == rootRank) {
|
|
energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext);
|
|
/* write cont data */
|
|
if (config->output_hdf5) h5_write("data.con", diskstep, N, time_cur, m, x, v, pot, a, adot, 0, true);
|
|
else ascii_write("data.con", diskstep, N, time_cur, m, x, v, 16);
|
|
} /* if (myRank == rootRank) */
|
|
|
|
#ifdef ETICS
|
|
// We are /inside/ a control step, so all particles must be
|
|
// synchronized; we can safely calculate their density centre. The
|
|
// acceleration and jerk currently in the memory are for the
|
|
// predicted position of the CEP, by calling grapite_calc_center we
|
|
// "correct" the position and velocity, but not the gravity at that
|
|
// point.
|
|
if (grapite_cep_index >= 0) {
|
|
double3 xcm, vcm, xdc, vdc;
|
|
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
|
x[grapite_cep_index] = xdc;
|
|
v[grapite_cep_index] = vdc;
|
|
grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]);
|
|
}
|
|
#endif
|
|
|
|
t_contr += dt_contr;
|
|
} /* if (time_cur >= t_contr) */
|
|
|
|
if (time_cur >= t_disk) {
|
|
char out_fname[256];
|
|
diskstep++;
|
|
if (myRank == rootRank) {
|
|
sprintf(out_fname, "%06d", diskstep);
|
|
if (config->output_hdf5) h5_write(std::string(out_fname) + ".h5", diskstep, N, time_cur, m, x, v, pot, a, adot, config->output_extra_mode, config->output_hdf5_double_precision);
|
|
else ascii_write(std::string(out_fname) + ".dat", diskstep, N, time_cur, m, x, v, config->output_ascii_precision);
|
|
} /* if (myRank == rootRank) */
|
|
|
|
#ifdef ETICS
|
|
if (config->etics_dump_coeffs) {
|
|
sprintf(out_fname, "coeffs.%06d.%02d.dat", diskstep, myRank);
|
|
grapite_dump(out_fname, 2);
|
|
}
|
|
#endif
|
|
t_disk += dt_disk;
|
|
} /* if (time_cur >= t_disk) */
|
|
} /* while (time_cur < t_end) */
|
|
|
|
/* close the local GRAPEs */
|
|
timer.stop();
|
|
g6_close(clusterid);
|
|
|
|
double g6_calls_sum;
|
|
MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
|
|
if (myRank == rootRank) {
|
|
/* Write some output for the timestep annalize... */
|
|
printf("\n");
|
|
printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum);
|
|
printf("\n");
|
|
printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(timer.time)/1.0E+09);
|
|
fflush(stdout);
|
|
} /* if (myRank == rootRank) */
|
|
|
|
delete config;
|
|
delete[] m; delete[] x; delete[] v; delete[] a; delete[] adot; delete[] pot_ext; delete[] t; delete[] dt; delete[] x_act_new; delete[] v_act_new; delete[] a_act_new; delete[] adot_act_new; delete[] pot_act_ext;
|
|
|
|
/* Finalize the MPI work */
|
|
MPI_Finalize();
|
|
|
|
return 0;
|
|
}
|