1705 lines
60 KiB
C++
1705 lines
60 KiB
C++
/*****************************************************************************
|
|
File Name : "phi-GRAPE/GPU.c" // BH (1 || 2) + ACC + EJECT
|
|
:
|
|
Contents : N-body code with integration by individual block time step
|
|
: together with the parallel using of GRAPE6a board's.
|
|
:
|
|
: Added the GPU support via SAPPORO library.
|
|
:
|
|
: Normalization to the physical units!!!
|
|
:
|
|
: External Potential added
|
|
: Plummer-Kuzmin: Bulge, Disk, Halo
|
|
: Kharchenko+Andreas...
|
|
:
|
|
: SC extra POT for Bek SC test runs...
|
|
:
|
|
: Rebuced to the Single BH -> Plummer
|
|
: Andreas+Fazeel...
|
|
:
|
|
: Stellar evolution added
|
|
: Stellar lifetimes: Raiteri, Villata & Navarro (1996)
|
|
: IMS mass loss: van den Hoeg & Groenewegen (1997)
|
|
:
|
|
: STARDESTR_EXT: Tidal disruption of stars by external BH...
|
|
: Chingis, Denis & Maxim...
|
|
:
|
|
: STARDESTR: Tidal disruption of stars by BH...
|
|
: Jose, Li Shuo & Shiyan Zhong
|
|
:
|
|
: STARDISK: Drag force...
|
|
: Chingis, Denis & Maxim...
|
|
:
|
|
: STARDISK: variable hz = HZ*(R/R_crit) up to R_crit...
|
|
: Taras, Andreas...
|
|
:
|
|
: Live BH (1 || 2) + ACC + EJECT...
|
|
: Li Shuo & Shiyan Zhong
|
|
:
|
|
: dt_min for BH (1 || 2)...
|
|
:
|
|
: added the PN calculus for the BBH
|
|
: PN0, PN1, PN2, PN2.5 (coded on the base of
|
|
: Gabor Kupi original routine)
|
|
:
|
|
: added the "name" array...
|
|
:
|
|
: added the GMC's calculus (GMC on CPU; GMC2 on GPU)
|
|
: for Alexey SC runs... and also for Fazeel Zurich runs...
|
|
:
|
|
: CPU_TIMELIMIT added for the Julich MW cluster runs...
|
|
:
|
|
Coded by : Peter Berczik
|
|
Version number : 19.04
|
|
Last redaction : 2019.04.16 12:55
|
|
*****************************************************************************/
|
|
#include "double3.h"
|
|
#include "config.h"
|
|
#include "io.h"
|
|
#include "external.h"
|
|
|
|
Config *config;
|
|
|
|
#ifdef ETICS
|
|
#include "grapite.h"
|
|
// why do we need CEP as a compilaion flag... just have it always on when ETICS is on. IF there is no CEP, there should be a graceful skipping of those operations.
|
|
//#define ETICS_CEP
|
|
#ifndef ETICS_DTSCF
|
|
#error "ETICS_DTSCF must be defined"
|
|
#endif
|
|
#endif
|
|
|
|
#define TIMING
|
|
|
|
#define ETA_S_CORR 4.0
|
|
#define ETA_BH_CORR 4.0
|
|
|
|
#define DTMAXPOWER -3.0
|
|
#define DTMINPOWER -36.0
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
#include <stdexcept>
|
|
|
|
#define G6_NPIPE 2048
|
|
#include "grape6.h"
|
|
|
|
#include <mpi.h>
|
|
|
|
/* Some "good" functions and constants... */
|
|
// TODO replace with inline functions
|
|
#define MIN(a,b) (((a)<(b)) ? (a):(b) )
|
|
#define SQR(x) ((x)*(x) )
|
|
|
|
|
|
#define KB 1024
|
|
#define MB (KB*KB)
|
|
|
|
#define N_MAX (6*MB)
|
|
|
|
double L[3]; // needed in pn_bh_spin.c
|
|
// Needed for things related to BHs
|
|
#include "debug.h"
|
|
|
|
double CPU_time_real0, CPU_time_user0, CPU_time_syst0;
|
|
double CPU_time_real, CPU_time_user, CPU_time_syst;
|
|
|
|
#ifdef TIMING
|
|
|
|
double CPU_tmp_real0, CPU_tmp_user0, CPU_tmp_syst0;
|
|
double CPU_tmp_real, CPU_tmp_user, CPU_tmp_syst;
|
|
|
|
double DT_TOT,
|
|
DT_ACT_DEF1, DT_ACT_DEF2, DT_ACT_DEF3, DT_ACT_PRED,
|
|
DT_ACT_GRAV, DT_EXT_GRAV,
|
|
DT_GMC_GRAV, DT_GMC_GMC_GRAV, DT_EXT_GMC_GRAV,
|
|
DT_ACT_CORR, DT_ACT_LOAD,
|
|
DT_STEVOL, DT_STARDISK, DT_STARDESTR;
|
|
|
|
double DT_ACT_REDUCE;
|
|
|
|
#endif
|
|
|
|
/* some local settings for G6a board's */
|
|
|
|
int clusterid, ii, nn, numGPU;
|
|
|
|
int npipe=G6_NPIPE, index_i[G6_NPIPE];
|
|
double h2_i[G6_NPIPE], p_i[G6_NPIPE];
|
|
double3 x_i[G6_NPIPE], v_i[G6_NPIPE],
|
|
a_i[G6_NPIPE], jerk_i[G6_NPIPE];
|
|
int new_tunit=51, new_xunit=51;
|
|
|
|
double ti=0.0;
|
|
|
|
double eps_BH=0.0;
|
|
|
|
/* external potential... */
|
|
|
|
double3 x_bh1, x_bh2, v_bh1, v_bh2;
|
|
|
|
double pot_bh1, pot_bh2;
|
|
double3 a_bh1, adot_bh1, a_bh2, adot_bh2;
|
|
|
|
#include "n_bh.c"
|
|
|
|
double3 a_pn1[7], adot_pn1[7], a_pn2[7], adot_pn2[7];
|
|
|
|
#include "pn_bh_spin.c"
|
|
|
|
#ifdef ETICS
|
|
double t_exp, dt_exp=ETICS_DTSCF; // t_exp is just the initial value
|
|
#ifdef ETICS_CEP
|
|
int grapite_cep_index;
|
|
#endif
|
|
#endif
|
|
|
|
void get_CPU_time(double *time_real, double *time_user, double *time_syst)
|
|
{
|
|
struct rusage xxx;
|
|
double sec_u, microsec_u, sec_s, microsec_s;
|
|
struct timeval tv;
|
|
|
|
getrusage(RUSAGE_SELF,&xxx);
|
|
|
|
sec_u = xxx.ru_utime.tv_sec;
|
|
sec_s = xxx.ru_stime.tv_sec;
|
|
|
|
microsec_u = xxx.ru_utime.tv_usec;
|
|
microsec_s = xxx.ru_stime.tv_usec;
|
|
|
|
*time_user = sec_u + microsec_u * 1.0E-06;
|
|
*time_syst = sec_s + microsec_s * 1.0E-06;
|
|
|
|
gettimeofday(&tv, NULL);
|
|
*time_real = tv.tv_sec + 1.0E-06 * tv.tv_usec;
|
|
|
|
*time_user = *time_real;
|
|
}
|
|
|
|
void write_bh_data(double time_cur, double m[], double3 x[], double3 v[], double pot[], double3 a[], double3 adot[], double dt[])
|
|
{
|
|
if (config->live_smbh_count == 2) {
|
|
|
|
auto out = fopen("bh.dat", "a");
|
|
|
|
for (int i=0; i < 2; i++) {
|
|
double3 *a_pn, *adot_pn, a_bh, adot_bh;
|
|
double pot_bh;
|
|
if (i==0) {
|
|
a_pn = a_pn1;
|
|
adot_pn = adot_pn1;
|
|
pot_bh = pot_bh1;
|
|
a_bh = a_bh1;
|
|
adot_bh = adot_bh1;
|
|
} else {
|
|
a_pn = a_pn2;
|
|
adot_pn = adot_pn2;
|
|
pot_bh = pot_bh2;
|
|
a_bh = a_bh2;
|
|
adot_bh = adot_bh2;
|
|
}
|
|
if (config->live_smbh_custom_eps >= 0) {
|
|
fprintf(out, "%.16E \t %.8E \t % .16E % .16E % .16E \t %.16E \t % .16E % .16E % .16E \t %.16E \t % .8E \t % .8E % .8E % .8E \t %.8E \t % .8E % .8E % .8E \t %.8E \t %.8E \t\t % .8E \t % .8E % .8E % .8E \t %.8E \t % .8E % .8E % .8E \t %.8E ",
|
|
time_cur, m[i],
|
|
x[i][0], x[i][1], x[i][2], x[i].norm(),
|
|
v[i][0], v[i][1], v[i][2], v[i].norm(),
|
|
pot[i],
|
|
a[i][0], a[i][1], a[i][2], a[i].norm(),
|
|
adot[i][0], adot[i][1], adot[i][2], adot[i].norm(),
|
|
dt[i],
|
|
pot_bh,
|
|
a_bh[0], a_bh[1], a_bh[2], a_bh.norm(),
|
|
adot_bh[0], adot_bh[1], adot_bh[2], adot_bh.norm());
|
|
if (config->binary_smbh_pn) {
|
|
fprintf(out, "\t");
|
|
for (int pn_idx=0; pn_idx < 7; pn_idx++) {
|
|
fprintf(out, "\t % .8E % .8E % .8E \t %.8E \t % .8E % .8E % .8E \t %.8E ", a_pn[pn_idx][0], a_pn[pn_idx][1], a_pn[pn_idx][2], a_pn[pn_idx].norm(), adot_pn[pn_idx][0], adot_pn[pn_idx][1], adot_pn[pn_idx][2], adot_pn[pn_idx].norm());
|
|
}
|
|
}
|
|
fprintf(out, "\n");
|
|
} else {
|
|
fprintf(out,"%.16E \t %.8E \t % .16E % .16E % .16E \t %.16E \t % .16E % .16E % .16E \t %.16E \t % .8E \t % .8E % .8E % .8E \t %.8E \t % .8E % .8E % .8E \t %.8E \t %.8E \n",
|
|
time_cur, m[i],
|
|
x[i][0], x[i][1], x[i][2], x[i].norm(),
|
|
v[i][0], v[i][1], v[i][2], v[i].norm(),
|
|
pot[i],
|
|
a[i][0], a[i][1], a[i][2], a[i].norm(),
|
|
adot[i][0], adot[i][1], adot[i][2], adot[i].norm(),
|
|
dt[i]);
|
|
}
|
|
}
|
|
fprintf(out, "\n");
|
|
fclose(out);
|
|
} else if (config->live_smbh_count == 1) {
|
|
auto out = fopen("bh.dat", "a");
|
|
double tmp_r = sqrt( SQR(x[0][0]) + SQR(x[0][1]) + SQR(x[0][2]) );
|
|
double tmp_v = sqrt( SQR(v[0][0]) + SQR(v[0][1]) + SQR(v[0][2]) );
|
|
double tmp_a = sqrt( SQR(a[0][0]) + SQR(a[0][1]) + SQR(a[0][2]) );
|
|
double tmp_adot = sqrt( SQR(adot[0][0]) + SQR(adot[0][1]) + SQR(adot[0][2]) );
|
|
fprintf(out,"%.16E \t %.8E \t % .16E % .16E % .16E \t %.16E \t % .16E % .16E % .16E \t %.16E \t % .8E \t % .8E % .8E % .8E \t %.8E \t % .8E % .8E % .8E \t %.8E \t %.8E \n",
|
|
time_cur, m[0],
|
|
x[0][0], x[0][1], x[0][2], tmp_r,
|
|
v[0][0], v[0][1], v[0][2], tmp_v,
|
|
pot[0],
|
|
a[0][0], a[0][1], a[0][2], tmp_a,
|
|
adot[0][0], adot[0][1], adot[0][2], tmp_adot,
|
|
dt[0]);
|
|
fprintf(out,"\n");
|
|
fclose(out);
|
|
}
|
|
}
|
|
|
|
void write_bh_nb_data(double time_cur, int N, double m[], double3 x[], double3 v[])
|
|
{
|
|
int nb = config->live_smbh_neighbor_number;
|
|
int ind_sort[N_MAX];
|
|
double var_sort[N_MAX];
|
|
//TODO you don't want and probably don't need to allocate these here. Maybe just need size nb, or maybe use as private variables.
|
|
|
|
auto out = fopen("bh_neighbors.dat", "a");
|
|
|
|
/* 1st BH */
|
|
|
|
for (int i_bh=0; i_bh < config->live_smbh_count; i_bh++) {
|
|
for (int i=0; i<N; i++) var_sort[i] = (x[i]-x[i_bh]).norm();
|
|
std::iota(ind_sort, ind_sort+N, 0);
|
|
std::partial_sort(ind_sort, ind_sort + nb, ind_sort + N, [&](int i, int j) {return var_sort[i] < var_sort[j];});
|
|
|
|
fprintf(out,"%.16E \t %07d \t %.8E \t % .8E % .8E % .8E \t % .8E % .8E % .8E \t",
|
|
time_cur,
|
|
i_bh,
|
|
m[i_bh],
|
|
x[i_bh][0], x[i_bh][1], x[i_bh][2],
|
|
v[i_bh][0], v[i_bh][1], v[i_bh][2]);
|
|
|
|
for (int j=1; j < nb; j++) {
|
|
int i = ind_sort[j];
|
|
fprintf(out,"%02d %07d %.8E % .8E % .8E % .8E %.8E % .8E % .8E % .8E %.8E \t",
|
|
j, i,
|
|
m[i],
|
|
x[i][0], x[i][1], x[i][2], (x[i]-x[i_bh]).norm(),
|
|
v[i][0], v[i][1], v[i][2], (v[i]-v[i_bh]).norm());
|
|
}
|
|
|
|
fprintf(out,"\n");
|
|
}
|
|
fprintf(out, "\n"); // this is redundant
|
|
fclose(out);
|
|
}
|
|
|
|
void calc_self_grav(double t, double eps2, double &g6_calls, int n_loc,
|
|
int n_act, int ind_act[],
|
|
double3 x_act_new[], double3 v_act_new[],
|
|
double pot_act_tmp[],
|
|
double3 a_act_tmp[],
|
|
double3 adot_act_tmp[],
|
|
double h2_i[])
|
|
{
|
|
/* calc the new grav for the active particles */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
|
|
g6_set_ti(clusterid, t);
|
|
|
|
int ni = n_act; // TODO why is this needed?
|
|
|
|
/* define the local phi, a, adot for these active particles */
|
|
|
|
for (int i=0; i<ni; i+=npipe) {
|
|
nn = npipe;
|
|
if (ni-i < npipe) nn = ni - i;
|
|
for (ii=0; ii<nn; ii++) {
|
|
h2_i[ii] = eps2; // TODO This should be a global or something
|
|
} /* ii */
|
|
//TODO any way we can clean up this ugly casting?
|
|
g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i, eps2, h2_i);
|
|
g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, eps2, h2_i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i);
|
|
g6_calls++;
|
|
} /* i */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_ACT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
}
|
|
|
|
void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n_act, double3 *x_act_new, double3 *v_act_new, double *pot_act_ext, double3 *a_act_new, double3* adot_act_new)
|
|
{
|
|
|
|
/* Define the external potential for all active particles on all nodes */
|
|
|
|
int ni = n_act; // TODO redundant?
|
|
|
|
std::fill(pot_act_ext, pot_act_ext+n_act, 0.);
|
|
|
|
for (auto component : external_gravity_components) {
|
|
if (component->is_active)
|
|
component->apply(n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new);
|
|
}
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
|
|
/* For simple Plummer potential... */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_EXT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
|
|
}
|
|
|
|
void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, double& rcm_sum, double& vcm_sum, double& E_tot_0, double& E_tot_corr_0, double& E_tot_corr_sd_0, int &skip_con, int N, double m[], double3 x[], double3 v[], double pot[], double pot_ext[])
|
|
{
|
|
// TODO maybe use static variables here for the previous step energy?
|
|
double E_pot = 0.0;
|
|
for (int i=0; i<N; i++) E_pot += m[i]*pot[i];
|
|
E_pot *= 0.5;
|
|
|
|
double E_kin = 0.0;
|
|
for (int i=0; i<N; i++) E_kin += m[i]*( SQR(v[i][0]) + SQR(v[i][1]) + SQR(v[i][2]) );
|
|
E_kin *= 0.5;
|
|
|
|
double E_pot_ext = 0.0;
|
|
|
|
for (int i=0; i<N; i++) E_pot_ext += m[i]*pot_ext[i];
|
|
|
|
if (timesteps == 0.0) { //TODO what is this thing?
|
|
rcm_sum = 0.0;
|
|
vcm_sum = 0.0;
|
|
}
|
|
|
|
double mcm = 0.0;
|
|
double rcm_mod = 0.0;
|
|
double vcm_mod = 0.0;
|
|
|
|
double3 xcm = {0, 0, 0};
|
|
double3 vcm = {0, 0, 0};
|
|
|
|
for (int i=0; i<N; i++) {
|
|
mcm += m[i];
|
|
xcm += m[i] * x[i];
|
|
vcm += m[i] * v[i];
|
|
} /* i */
|
|
|
|
xcm /= mcm;
|
|
vcm /= mcm;
|
|
|
|
rcm_mod = xcm.norm();
|
|
vcm_mod = vcm.norm();
|
|
|
|
rcm_sum += rcm_mod;
|
|
vcm_sum += vcm_mod;
|
|
|
|
double3 mom = {0, 0, 0};
|
|
|
|
for (int i=0; i<N; i++) {
|
|
mom[0] += m[i] * ( x[i][1] * v[i][2] - x[i][2] * v[i][1] );
|
|
mom[1] += m[i] * ( x[i][2] * v[i][0] - x[i][0] * v[i][2] );
|
|
mom[2] += m[i] * ( x[i][0] * v[i][1] - x[i][1] * v[i][0] );
|
|
} /* i */
|
|
|
|
get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst);
|
|
|
|
double E_tot = E_pot + E_kin + E_pot_ext;
|
|
double E_tot_corr = E_tot /* + E_corr */; // TODO what is this?
|
|
|
|
if (timesteps == 0.0) {
|
|
/* initialize the E_tot_0 + etc... */
|
|
|
|
E_tot_0 = E_tot;
|
|
E_tot_corr_0 = E_tot;
|
|
E_tot_corr_sd_0 = E_tot;
|
|
|
|
skip_con = 1;
|
|
} else {
|
|
/* read the E_tot_0 + etc... */
|
|
if (skip_con == 0) {
|
|
auto inp = fopen("contr.con","r");
|
|
double tmp;
|
|
fscanf(inp,"%lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE",
|
|
&tmp, &tmp, &tmp, &tmp,
|
|
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp,
|
|
&E_tot_0, &tmp,
|
|
&E_tot_corr_0, &tmp,
|
|
&E_tot_corr_sd_0, &tmp,
|
|
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp);
|
|
fclose(inp);
|
|
skip_con = 1;
|
|
}
|
|
}
|
|
double DE_tot = E_tot - E_tot_0;
|
|
|
|
/* This is the only output to screen */
|
|
printf("%.3E %.3E % .4E %.4E % .4E % .4E % .4E %.2E\n",
|
|
time_cur, timesteps,
|
|
E_pot, E_kin, E_pot_ext, E_tot, DE_tot,
|
|
CPU_time_user-CPU_time_user0);
|
|
|
|
fflush(stdout);
|
|
|
|
auto out = fopen("contr.dat", "a");
|
|
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
|
|
time_cur, timesteps, n_act_sum, g6_calls,
|
|
E_pot, E_kin, E_pot_ext,
|
|
E_tot, DE_tot,
|
|
rcm_mod, vcm_mod,
|
|
mom[0], mom[1], mom[2],
|
|
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
|
|
fclose(out);
|
|
|
|
out = fopen("contr.con", "w");
|
|
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
|
|
time_cur, timesteps, n_act_sum, g6_calls,
|
|
E_pot, E_kin, E_pot_ext,
|
|
E_tot, DE_tot,
|
|
rcm_mod, vcm_mod,
|
|
mom[0], mom[1], mom[2],
|
|
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
|
|
fclose(out);
|
|
|
|
E_tot_0 = E_tot;
|
|
E_tot_corr_0 = E_tot_corr;
|
|
E_tot_corr_sd_0 = 0;
|
|
}
|
|
|
|
class Active_search {
|
|
// TODO you can add pointers to t and dt at the constructor, no point giving them at get_minimum_time but without the size.
|
|
public:
|
|
Active_search(const int myRank, const int n_proc, const int n_loc, const int N)
|
|
: myRank(myRank), n_proc(n_proc), n_loc(n_loc), N(N)
|
|
{
|
|
ind_act_loc = new int[n_loc];
|
|
}
|
|
~Active_search() { delete[] ind_act_loc; };
|
|
double get_minimum_time(const double t[], const double dt[])
|
|
{
|
|
double min_t_loc, min_t;
|
|
#ifdef ACT_DEF_GRAPITE
|
|
min_t_loc = grapite_get_minimum_time();
|
|
#else
|
|
min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc];
|
|
for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) {
|
|
double tmp = t[j] + dt[j];
|
|
if (tmp < min_t_loc) min_t_loc = tmp;
|
|
}
|
|
#endif
|
|
/* Reduce the "global" min_t from min_t_loc "local" on all processors) */
|
|
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
|
return min_t;
|
|
}
|
|
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int *n_act)
|
|
{
|
|
#ifdef ACT_DEF_GRAPITE
|
|
int n_act_loc;
|
|
grapite_active_search(min_t, ind_act_loc, &n_act_loc);
|
|
if (myRank > 0)
|
|
for (int i=0; i<n_act_loc; i++)
|
|
ind_act_loc[i] += myRank*n_loc;
|
|
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
|
|
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
|
|
*n_act = n_act_arr[0];
|
|
for (int i=1; i<n_proc; i++)
|
|
*n_act += n_act_arr[i];
|
|
displs[0] = 0;
|
|
for (int i=1; i<n_proc; i++)
|
|
displs[i]=displs[i-1]+n_act_arr[i-1];
|
|
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
|
|
#else
|
|
*n_act = 0;
|
|
for (int i=0; i<N; i++) {
|
|
if (t[i]+dt[i] == min_t) ind_act[(*n_act)++] = i;
|
|
} /* i */
|
|
#endif // ACT_DEF_GRAPITE
|
|
}
|
|
private:
|
|
int myRank, n_proc, n_loc, N;
|
|
int *ind_act_loc;
|
|
};
|
|
|
|
class Source_particle_list {
|
|
public:
|
|
Source_particle_list(int N)
|
|
: N(N)
|
|
{
|
|
ind = new int[N];
|
|
m = new double[N];
|
|
x = new double3[N];
|
|
v = new double3[N];
|
|
pot = new double[N];
|
|
a = new double3[N];
|
|
adot = new double3[N];
|
|
t = new double[N];
|
|
dt = new double[N];
|
|
}
|
|
~Source_particle_list()
|
|
{
|
|
delete[] ind;
|
|
delete[] m;
|
|
delete[] x;
|
|
delete[] v;
|
|
delete[] pot;
|
|
delete[] a;
|
|
delete[] adot;
|
|
delete[] t;
|
|
delete[] dt;
|
|
}
|
|
int N;
|
|
int *ind;
|
|
double3 *x, *v, *a, *adot;
|
|
double *m, *t, *dt, *pot;
|
|
};
|
|
|
|
inline void calc_high_derivatives(const double dt_tmp, const double3 a_act, const double3 a_act_new, const double3 adot_act, const double3 adot_act_new, double3 *a2, double3 *a3)
|
|
{
|
|
double dtinv = 1.0/dt_tmp;
|
|
double dt2inv = dtinv*dtinv;
|
|
double dt3inv = dt2inv*dtinv;
|
|
|
|
double3 a0mia1 = a_act-a_act_new;
|
|
double3 ad04plad12 = 4.0*adot_act + 2.0*adot_act_new;
|
|
double3 ad0plad1 = adot_act + adot_act_new;
|
|
|
|
*a2 = -6.0*a0mia1*dt2inv - ad04plad12*dtinv;
|
|
*a3 = 12.0*a0mia1*dt3inv + 6.0*ad0plad1*dt2inv;
|
|
}
|
|
|
|
inline void corrector(const double dt_tmp, const double3 a2, const double3 a3, double3* x_act_new, double3* v_act_new)
|
|
{
|
|
double dt3over6 = dt_tmp*dt_tmp*dt_tmp/6.0;
|
|
double dt4over24 = dt3over6*dt_tmp/4.0;
|
|
double dt5over120 = dt4over24*dt_tmp/5.0;
|
|
|
|
*x_act_new += dt4over24*a2 + dt5over120*a3;
|
|
*v_act_new += dt3over6*a2 + dt4over24*a3;
|
|
}
|
|
|
|
inline double aarseth_step(const double eta, const double dt_tmp, const double3 a_act_new, const double3 adot_act_new, const double3 a2, const double3 a3)
|
|
{
|
|
double a1abs = a_act_new.norm();
|
|
double adot1abs = adot_act_new.norm();
|
|
double3 a2dot1 = a2 + dt_tmp*a3;
|
|
double a2dot1abs = a2dot1.norm();
|
|
double a3dot1abs = a3.norm();
|
|
|
|
return sqrt(eta*(a1abs*a2dot1abs+adot1abs*adot1abs)/(adot1abs*a3dot1abs+a2dot1abs*a2dot1abs));
|
|
}
|
|
|
|
void binary_smbh_influence_sphere_output(int i_bh1, int i_bh2, int ind_act[], double m_act[], double3 x_act[], double3 v_act[], double pot_act[], double dt_act[], int n_act, double timesteps, double time_cur, double factor, int inf_event[])
|
|
{
|
|
//TODO !!IMPORTANT!! only open the file IF THERE IS ANYTHING TO WRITE!!!
|
|
//TODO inf_event to be static or something?
|
|
auto out = fopen("bbh_inf.dat", "a");
|
|
|
|
double m_bh1 = m_act[i_bh1];
|
|
double m_bh2 = m_act[i_bh2];
|
|
|
|
double3 x_bh1 = x_act[i_bh1];
|
|
double3 x_bh2 = x_act[i_bh2];
|
|
|
|
double3 v_bh1 = v_act[i_bh1];
|
|
double3 v_bh2 = v_act[i_bh2];
|
|
|
|
double3 x_bbhc = (m_bh1*x_bh1 + m_bh2*x_bh2)/(m_bh1 + m_bh2);
|
|
double3 v_bbhc = (m_bh1*v_bh1 + m_bh2*v_bh2)/(m_bh1 + m_bh2);
|
|
|
|
double DR2 = SQR(x_bh1[0] - x_bh2[0]) + SQR(x_bh1[1] - x_bh2[1]) + SQR(x_bh1[2] - x_bh2[2]);
|
|
double DV2 = SQR(v_bh1[0] - v_bh2[0]) + SQR(v_bh1[1] - v_bh2[1]) + SQR(v_bh1[2] - v_bh2[2]);
|
|
|
|
double EB = -(m_bh1 + m_bh2) / sqrt(DR2) + 0.5 * DV2;
|
|
|
|
double SEMI_a = -0.5 * (m_bh1 + m_bh2)/EB;
|
|
double SEMI_a2 = SQR(SEMI_a);
|
|
|
|
for (int i=2; i<n_act; i++) {
|
|
|
|
double tmp_r2 = SQR(x_act[i][0] - x_bbhc[0]) + SQR(x_act[i][1] - x_bbhc[1]) + SQR(x_act[i][2] - x_bbhc[2]);
|
|
|
|
int iii = ind_act[i];
|
|
|
|
if (tmp_r2 < SEMI_a2*SQR(factor)) {
|
|
|
|
if (inf_event[iii] == 0) {
|
|
|
|
fprintf(out,"INF1 %.6E %.16E %07d %07d %.6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E \n",
|
|
timesteps, time_cur, i, ind_act[i],
|
|
sqrt(DR2), x_bbhc[0], x_bbhc[1], x_bbhc[2], v_bbhc[0], v_bbhc[1], v_bbhc[2],
|
|
m_bh1, x_bh1[0], x_bh1[1], x_bh1[2], v_bh1[0], v_bh1[1], v_bh1[2], pot_act[0],
|
|
m_bh2, x_bh2[0], x_bh2[1], x_bh2[2], v_bh2[0], v_bh2[1], v_bh2[2], pot_act[1],
|
|
sqrt(tmp_r2),
|
|
m_act[i], x_act[i][0], x_act[i][1], x_act[i][2], v_act[i][0], v_act[i][1], v_act[i][2], pot_act[i],
|
|
dt_act[i]);
|
|
|
|
inf_event[iii] = 1;
|
|
}
|
|
|
|
} else {
|
|
if (inf_event[iii] == 1) {
|
|
fprintf(out,"INF2 %.6E %.16E %07d %07d %.6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E \n",
|
|
timesteps, time_cur, i, ind_act[i],
|
|
sqrt(DR2), x_bbhc[0], x_bbhc[1], x_bbhc[2], v_bbhc[0], v_bbhc[1], v_bbhc[2],
|
|
m_bh1, x_bh1[0], x_bh1[1], x_bh1[2], v_bh1[0], v_bh1[1], v_bh1[2], pot_act[0],
|
|
m_bh2, x_bh2[0], x_bh2[1], x_bh2[2], v_bh2[0], v_bh2[1], v_bh2[2], pot_act[1],
|
|
sqrt(tmp_r2),
|
|
m_act[i], x_act[i][0], x_act[i][1], x_act[i][2], v_act[i][0], v_act[i][1], v_act[i][2], pot_act[i],
|
|
dt_act[i]);
|
|
}
|
|
|
|
inf_event[iii] = 0;
|
|
|
|
} /* if (tmp_r2 < DR2*R_INF2) */
|
|
|
|
} /* i */
|
|
fclose(out);
|
|
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
int name_proc, n_proc=1, myRank=0, rootRank=0, cur_rank,
|
|
nj, diskstep=0, power, jjj, iii,
|
|
skip_con=0, tmp_i;
|
|
|
|
double dt_disk, dt_contr, t_disk=0.0, t_contr=0.0,
|
|
dt_bh, t_bh=0.0, dt_bh_tmp,
|
|
t_end, time_cur, dt_min, dt_max, min_t, min_t_loc, dt_new,
|
|
eta_s, eta, eta_bh,
|
|
E_tot_0, E_tot_corr_0, E_tot_corr_sd_0,
|
|
rcm_sum=0.0, vcm_sum=0.0,
|
|
eps=0.0, eps2,
|
|
a2_mod, adot2_mod,
|
|
dt_tmp, dt2half, dt3over6, dt4over24, dt5over120,
|
|
dtinv, dt2inv, dt3inv,
|
|
a1abs, adot1abs, a2dot1abs, a3dot1abs,
|
|
timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX], g6_calls=0.0, g6_calls_sum=0.0,
|
|
tmp;
|
|
|
|
double3 xcm, vcm, mom,
|
|
xdc, vdc,
|
|
a2, a3, a2dot1;
|
|
|
|
char processor_name[MPI_MAX_PROCESSOR_NAME],
|
|
inp_fname[30];
|
|
|
|
/* global variables */
|
|
|
|
Source_particle_list source_particle_list(N_MAX);
|
|
|
|
int N;
|
|
int *ind = source_particle_list.ind;
|
|
double *m = source_particle_list.m;
|
|
double3 *x = source_particle_list.x;
|
|
double3 *v = source_particle_list.v;
|
|
double *pot = source_particle_list.pot;
|
|
double3 *a = source_particle_list.a;
|
|
double3 *adot = source_particle_list.adot;
|
|
double *t = source_particle_list.t;
|
|
double *dt = source_particle_list.dt;
|
|
|
|
/* local variables */
|
|
int n_loc;
|
|
|
|
/* data for active particles */
|
|
|
|
int n_act, ind_act[N_MAX];
|
|
double m_act[N_MAX],
|
|
pot_act[N_MAX], t_act[N_MAX], dt_act[N_MAX],
|
|
pot_act_new[N_MAX],
|
|
pot_act_tmp[N_MAX];
|
|
|
|
double3 x_act[N_MAX], v_act[N_MAX],
|
|
a_act[N_MAX], adot_act[N_MAX],
|
|
x_act_new[N_MAX], v_act_new[N_MAX],
|
|
a_act_new[N_MAX], adot_act_new[N_MAX],
|
|
a_act_tmp[N_MAX], adot_act_tmp[N_MAX];;
|
|
|
|
FILE *out;
|
|
|
|
double C_NB;
|
|
int usedOrNot[7];
|
|
|
|
double pot_ext[N_MAX], pot_act_ext[N_MAX]; // for all EXTPOT
|
|
|
|
int i_bh1, i_bh2;
|
|
|
|
double m_bh1, m_bh2;
|
|
|
|
int inf_event[N_MAX];
|
|
double DR2, tmp_r2;
|
|
double3 x_bbhc, v_bbhc;
|
|
double DV2, EB, SEMI_a, SEMI_a2;
|
|
|
|
double s_bh1[3] = {0.0, 0.0, 1.0};
|
|
double s_bh2[3] = {0.0, 0.0, 1.0};
|
|
|
|
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
|
|
|
|
/* INIT the rand() !!! */
|
|
srand(19640916); /* it is just my birthday :-) */
|
|
|
|
/* Init MPI */
|
|
MPI_Init(&argc, &argv);
|
|
|
|
/* Define the total number of processors */
|
|
MPI_Comm_size(MPI_COMM_WORLD, &n_proc);
|
|
|
|
/* Define of the Rank of each processors */
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
|
|
|
|
/* Define the processors names */
|
|
MPI_Get_processor_name(processor_name, &name_proc);
|
|
|
|
/* Print the Rank and the names of processors */
|
|
printf("Rank of the processor %03d on %s \n", myRank, processor_name);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* read the input parameters to the rootRank */
|
|
|
|
config = new Config("phigrape.conf");
|
|
C_NB = config->pn_c;
|
|
std::copy(config->pn_usage.begin(), config->pn_usage.end(), usedOrNot);
|
|
|
|
if (is_hdf5(config->input_file_name)) {
|
|
#ifndef HAS_HDF5
|
|
fprintf(stderr, "ERROR: input file is in HDF5 format, but the code was compiled without HDF5 support\n")
|
|
return -1;
|
|
#endif
|
|
h5_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v);
|
|
}
|
|
else
|
|
ascii_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v);
|
|
std::iota(ind, ind+N, 0);
|
|
|
|
if (myRank == rootRank) {
|
|
|
|
//TODO move it out of (myRank == rootRank) so you don't need to communicate them.
|
|
eps = config->eps;
|
|
t_end = config->t_end;
|
|
dt_disk = config->dt_disk;
|
|
dt_contr = config->dt_contr;
|
|
dt_bh = config->dt_bh;
|
|
eta = config->eta;
|
|
strcpy(inp_fname, config->input_file_name.c_str());
|
|
|
|
if (config->binary_smbh_influence_sphere_output) for (int i=0; i<N; i++) inf_event[i] = 0; // WARNING N wasn't set yet!
|
|
|
|
/*
|
|
eps : Plummer softening parameter (can be even 0)
|
|
t_end : end time of calculation
|
|
dt_disk : interval of snapshot files output (0xxx.dat)
|
|
dt_contr : interval for the energy control output (contr.dat)
|
|
dt_bh : interval for BH output (bh.dat & bh_neighbors.dat)
|
|
eta : parameter for timestep determination
|
|
inp_data : name of the input file (data.inp)
|
|
*/
|
|
|
|
printf("\n");
|
|
printf("Begin the calculation of phi-GRAPE program on %03d processors\n", n_proc);
|
|
printf("\n");
|
|
printf("N = %07d \t eps = %.6E \n", N, eps);
|
|
printf("t_beg = %.6E \t t_end = %.6E \n", time_cur, t_end);
|
|
printf("dt_disk = %.6E \t dt_contr = %.6E \n", dt_disk, dt_contr);
|
|
printf("dt_bh = %.6E \n", dt_bh);
|
|
printf("eta = %.6E \n", eta);
|
|
printf("\n");
|
|
|
|
printf("External Potential: \n");
|
|
printf("\n");
|
|
|
|
if ((diskstep == 0) && (time_cur == 0.0)) {
|
|
out = fopen("contr.dat", "w");
|
|
fclose(out);
|
|
|
|
#ifdef TIMING
|
|
out = fopen("timing.dat", "w");
|
|
fclose(out);
|
|
#endif
|
|
|
|
if (config->live_smbh_output && (config->live_smbh_count > 0)) {
|
|
out = fopen("bh.dat", "w");
|
|
fclose(out);
|
|
}
|
|
if ((config->live_smbh_neighbor_output) && (config->live_smbh_count > 0)) {
|
|
out = fopen("bh_neighbors.dat", "w");
|
|
fclose(out);
|
|
}
|
|
if (config->binary_smbh_influence_sphere_output) {
|
|
out = fopen("bbh_inf.dat", "w");
|
|
fclose(out);
|
|
}
|
|
|
|
} else { // if (diskstep == 0)
|
|
|
|
} // if (diskstep == 0)
|
|
|
|
get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0);
|
|
|
|
} /* if (myRank == rootRank) */
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* Broadcast all useful values to all processors... */
|
|
MPI_Bcast(&N, 1, MPI_INT, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(&eps, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(&eta, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(&t_end, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(&dt_disk, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(&dt_contr, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(&dt_bh, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(&time_cur, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
double normalization_mass=1, normalization_length=1, normalization_velocity=1;
|
|
if (config->ext_units_physical) {
|
|
normalization_mass = 1/config->unit_mass;
|
|
normalization_length = 1000/config->unit_length;
|
|
normalization_velocity = 1.52484071426404437233e+01*sqrt(config->unit_length/config->unit_mass);
|
|
}
|
|
Plummer ext_bulge(config->ext_m_bulge*normalization_mass, config->ext_b_bulge*normalization_length);
|
|
Miyamoto_Nagai ext_disk(config->ext_m_disk*normalization_mass, config->ext_a_disk*normalization_length, config->ext_b_disk*normalization_length);
|
|
Plummer ext_halo_plummer(config->ext_m_halo_plummer*normalization_mass, config->ext_b_halo_plummer*normalization_length);
|
|
Logarithmic_halo ext_log_halo(config->ext_log_halo_v*normalization_velocity, config->ext_log_halo_r*normalization_length);
|
|
Dehnen ext_dehnen(config->ext_dehnen_m*normalization_mass, config->ext_dehnen_r*normalization_length, config->ext_dehnen_gamma);
|
|
|
|
std::vector<External_gravity*> external_gravity_components;
|
|
external_gravity_components.push_back(&ext_bulge);
|
|
external_gravity_components.push_back(&ext_disk);
|
|
external_gravity_components.push_back(&ext_halo_plummer);
|
|
external_gravity_components.push_back(&ext_log_halo);
|
|
external_gravity_components.push_back(&ext_dehnen);
|
|
|
|
if (ext_bulge.is_active) printf("m_bulge = %.4E b_bulge = %.4E\n", config->ext_m_bulge*normalization_mass, config->ext_b_bulge*normalization_length);
|
|
if (ext_disk.is_active) printf("m_disk = %.4E a_disk = %.4E b_disk = %.4E\n", config->ext_m_disk*normalization_mass, config->ext_a_disk*normalization_length, config->ext_b_disk*normalization_length);
|
|
if (ext_halo_plummer.is_active) printf("m_halo = %.4E b_halo = %.4E\n", config->ext_m_halo_plummer*normalization_mass, config->ext_b_halo_plummer*normalization_length);
|
|
if (ext_log_halo.is_active) printf("v_halo = %.6E r_halo = %.6E \n", config->ext_log_halo_v*normalization_velocity, config->ext_log_halo_r*normalization_length);
|
|
if (ext_dehnen.is_active) printf("m_ext = %.6E r_ext = %.6E \t g_ext = %.3E \n", config->ext_dehnen_m*normalization_mass, config->ext_dehnen_r*normalization_length, config->ext_dehnen_gamma);
|
|
printf("\n");
|
|
fflush(stdout);
|
|
|
|
eta_s = eta/ETA_S_CORR;
|
|
eta_bh = eta/ETA_BH_CORR;
|
|
|
|
eps2 = SQR(eps);
|
|
|
|
dt_min = 1.0*pow(2.0, DTMINPOWER);
|
|
dt_max = 1.0*pow(2.0, DTMAXPOWER);
|
|
|
|
if (dt_disk == dt_contr)
|
|
dt_max = dt_contr;
|
|
else
|
|
dt_max = MIN(dt_disk, dt_contr);
|
|
|
|
if (dt_max > 1.0) dt_max = 1.0;
|
|
|
|
t_disk = dt_disk*(1.0+floor(time_cur/dt_disk));
|
|
t_contr = dt_contr*(1.0+floor(time_cur/dt_contr));
|
|
t_bh = dt_bh*(1.0+floor(time_cur/dt_bh));
|
|
|
|
if (myRank == rootRank) {
|
|
printf("t_disk = %.6E t_contr = %.6E t_bh = %.6E \n", t_disk, t_contr, t_bh);
|
|
printf("\n");
|
|
fflush(stdout);
|
|
} /* if (myRank == rootRank) */
|
|
|
|
std::fill(t, t+N, time_cur);
|
|
std::fill(dt, dt+N, dt_min);
|
|
|
|
n_loc = N/n_proc;
|
|
|
|
Active_search active_search(myRank, n_proc, n_loc, N);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* Broadcast the values of all particles to all processors... */
|
|
MPI_Bcast(ind, N, MPI_INT, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(m, N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(x, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
MPI_Bcast(v, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* init the local GRAPE's */
|
|
|
|
if (config->devices_per_node==0) {
|
|
MPI_Comm shmcomm;
|
|
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
|
|
MPI_Comm_size(shmcomm, &numGPU);
|
|
MPI_Comm_rank(shmcomm, &clusterid);
|
|
} else {
|
|
numGPU = config->devices_per_node;
|
|
clusterid = myRank % numGPU;
|
|
}
|
|
printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid);
|
|
fflush(stdout);
|
|
|
|
g6_open(clusterid);
|
|
npipe = g6_npipes();
|
|
g6_set_tunit(new_tunit);
|
|
g6_set_xunit(new_xunit);
|
|
|
|
#ifdef ETICS
|
|
grapite_read_particle_tags(N, "grapite.mask", myRank, n_loc);
|
|
grapite_set_dt_exp(dt_exp);
|
|
dt_exp = time_cur; // if we don't have a binary restart then we don't remember the coefficients, and we need to calculate them now.
|
|
grapite_set_t_exp(t_exp);
|
|
#endif
|
|
|
|
/* load the nj particles to the G6 */
|
|
|
|
for (int j=0; j<n_loc; j++) {
|
|
int kkk = j + myRank*n_loc;
|
|
g6_set_j_particle(clusterid, j, ind[kkk], t[kkk], dt[kkk], m[kkk], zeros, zeros, zeros, v[kkk], x[kkk]);
|
|
} /* j */
|
|
|
|
#ifdef ETICS
|
|
double etics_length_scale;
|
|
if (myRank == rootRank) etics_length_scale = grapite_get_length_scale(N, m, x, v); // We don't want all ranks to do it, because they need to write a file and might confuse each other
|
|
MPI_Bcast(&etics_length_scale, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
|
|
grapite_set_length_scale(etics_length_scale);
|
|
#endif
|
|
|
|
#ifdef ETICS_CEP
|
|
// First time only: get the CEP index
|
|
grapite_cep_index = grapite_get_cep_index();
|
|
|
|
// First calculate the DC
|
|
grapite_calc_center(N, m, x, v, xcm, vcm, xdc, vdc);
|
|
|
|
// Now copy it to the global particle list
|
|
memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
|
|
memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
|
|
|
|
double zeros[3] = {0., 0., 0.}; // We haven't calculated the force yet!
|
|
grapite_update_cep(time_cur, xdc, vdc, zeros, zeros);
|
|
#endif
|
|
|
|
/* define the all particles as a active on all the processors for the first time grav calc. */
|
|
|
|
n_act = N;
|
|
|
|
for (int i=0; i<n_act; i++) {
|
|
ind_act[i] = ind[i];
|
|
iii = ind_act[i];
|
|
m_act[i] = m[iii];
|
|
|
|
x_act[i] = x[iii];
|
|
v_act[i] = v[iii];
|
|
|
|
t_act[i] = t[iii];
|
|
dt_act[i] = dt[iii];
|
|
|
|
} /* i */
|
|
|
|
// NOTE this is where calc_self_grav_zero() used to be.
|
|
calc_self_grav(time_cur, eps2, g6_calls, n_loc,
|
|
n_act, ind_act,
|
|
x_act, v_act,
|
|
pot_act_tmp,
|
|
a_act_tmp,
|
|
adot_act_tmp,
|
|
h2_i);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* Reduce the "global" vectors from "local" on all processors) */
|
|
MPI_Allreduce(pot_act_tmp, pot, n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
MPI_Allreduce(a_act_tmp, a, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
MPI_Allreduce(adot_act_tmp, adot, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
if (config->live_smbh_count == 2) {
|
|
i_bh1 = 0;
|
|
i_bh2 = 1;
|
|
|
|
if (config->live_smbh_custom_eps >= 0) {
|
|
|
|
m_bh1 = m[i_bh1];
|
|
m_bh2 = m[i_bh2];
|
|
|
|
x_bh1 = x[i_bh1];
|
|
v_bh1 = v[i_bh1];
|
|
|
|
x_bh2 = x[i_bh2];
|
|
v_bh2 = v[i_bh2];
|
|
|
|
// calculate and "minus" the BH <-> BH _softened_ pot, acc & jerk
|
|
|
|
tmp_i = calc_force_n_BH(m_bh1, x_bh1, v_bh1,
|
|
m_bh2, x_bh2, v_bh2,
|
|
eps,
|
|
&pot_bh1, a_bh1, adot_bh1,
|
|
&pot_bh2, a_bh2, adot_bh2);
|
|
|
|
pot[i_bh1] -= pot_bh1;
|
|
pot[i_bh2] -= pot_bh2;
|
|
|
|
a[i_bh1] -= a_bh1;
|
|
a[i_bh2] -= a_bh2;
|
|
|
|
adot[i_bh1] -= adot_bh1;
|
|
adot[i_bh2] -= adot_bh2;
|
|
|
|
// calculate and "plus" the new BH <-> BH _unsoftened_ pot, acc, jerk
|
|
|
|
tmp_i = calc_force_n_BH(m_bh1, x_bh1, v_bh1,
|
|
m_bh2, x_bh2, v_bh2,
|
|
config->live_smbh_custom_eps,
|
|
&pot_bh1, a_bh1, adot_bh1,
|
|
&pot_bh2, a_bh2, adot_bh2);
|
|
|
|
pot[i_bh1] += pot_bh1;
|
|
pot[i_bh2] += pot_bh2;
|
|
|
|
a[i_bh1] += a_bh1;
|
|
a[i_bh2] += a_bh2;
|
|
|
|
adot[i_bh1] += adot_bh1;
|
|
adot[i_bh2] += adot_bh2;
|
|
}
|
|
|
|
if (config->binary_smbh_pn) {
|
|
|
|
// calculate and "plus" the new BH <-> BH : PN1, PN2, PN2.5, PN3, PN3.5 : acc, jerk
|
|
|
|
dt_bh_tmp = dt[0];
|
|
|
|
tmp_i = calc_force_pn_BH(m_bh1, x_bh1, v_bh1, s_bh1,
|
|
m_bh2, x_bh2, v_bh2, s_bh2,
|
|
C_NB, dt_bh_tmp, usedOrNot,
|
|
(double(*)[3])a_pn1, (double(*)[3])adot_pn1,
|
|
(double(*)[3])a_pn2, (double(*)[3])adot_pn2, myRank, rootRank);
|
|
|
|
a[i_bh1] += a_pn1[1] + a_pn1[2] + a_pn1[3] + a_pn1[4] + a_pn1[5] + a_pn1[6];
|
|
a[i_bh2] += a_pn2[1] + a_pn2[2] + a_pn2[3] + a_pn2[4] + a_pn2[5] + a_pn2[6];
|
|
|
|
adot[i_bh1] += adot_pn1[1] + adot_pn1[2] + adot_pn1[3] + adot_pn1[4] + adot_pn1[5] + adot_pn1[6];
|
|
adot[i_bh2] += adot_pn2[1] + adot_pn2[2] + adot_pn2[3] + adot_pn2[4] + adot_pn2[5] + adot_pn2[6];
|
|
|
|
if (myRank == rootRank) {
|
|
if (tmp_i == 505) {
|
|
printf("PN RSDIST: %.8E \t %.8E \n", timesteps, time_cur);
|
|
fflush(stdout);
|
|
exit(-1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
calc_ext_grav(external_gravity_components, n_act, x_act, v_act, pot_ext, a, adot);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* Energy control... */
|
|
if (myRank == rootRank) {
|
|
energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
|
|
} /* if (myRank == rootRank) */
|
|
|
|
#ifdef ETICS_DUMP
|
|
if (diskstep==0) {
|
|
sprintf(out_fname, "coeffs.%06d.%02d.dat", 0, myRank);
|
|
grapite_dump(out_fname, 2);
|
|
}
|
|
#endif
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
#ifdef ETICS_CEP
|
|
// First calculate the DC
|
|
grapite_calc_center(N, m, x, v, xcm, vcm, xdc, vdc);
|
|
|
|
// Now copy it to the global particle list
|
|
memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
|
|
memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
|
|
|
|
grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]);
|
|
#endif
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* Define initial timestep for all particles on all nodes */
|
|
|
|
for (int i=0; i<N; i++) {
|
|
a2_mod = a[i][0]*a[i][0]+a[i][1]*a[i][1]+a[i][2]*a[i][2];
|
|
adot2_mod = adot[i][0]*adot[i][0]+adot[i][1]*adot[i][1]+adot[i][2]*adot[i][2];
|
|
|
|
if (adot2_mod == 0)
|
|
dt_tmp = eta_s;
|
|
else
|
|
dt_tmp = eta_s*sqrt(a2_mod/adot2_mod);
|
|
|
|
power = log(dt_tmp)/log(2.0) - 1;
|
|
|
|
dt_tmp = pow(2.0, (double)power);
|
|
|
|
if (dt_tmp < dt_min) dt_tmp = dt_min;
|
|
if (dt_tmp > dt_max) dt_tmp = dt_max;
|
|
|
|
dt[i] = dt_tmp;
|
|
|
|
if (config->dt_min_warning && (myRank == 0)) {
|
|
if (dt[i] == dt_min) {
|
|
printf("!!! Warning0: dt = dt_min = %.6E \t ind = %07d \n", dt[i], ind[i]);
|
|
fflush(stdout);
|
|
}
|
|
}
|
|
} /* i */
|
|
|
|
if (config->live_smbh_count > 0) {
|
|
double min_dt = *std::min_element(dt, dt+N);
|
|
for (int i=0; i<config->live_smbh_count; i++) dt[i] = min_dt;
|
|
}
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* load the new values for particles to the local GRAPE's */
|
|
/* load the nj particles to the G6 */
|
|
|
|
for (int j=0; j<n_loc; j++) {
|
|
int kkk = j + myRank*n_loc;
|
|
g6_set_j_particle(clusterid, j, ind[kkk], t[kkk], dt[kkk], m[kkk], zeros, adot[kkk]*(1./6.), a[kkk]*0.5, v[kkk], x[kkk]);
|
|
} /* j */
|
|
|
|
if (myRank == rootRank) {
|
|
|
|
/* Write BH data... */
|
|
if (config->live_smbh_output) write_bh_data(time_cur, m, x, v, pot, a, adot, dt);
|
|
|
|
/* Write BH NB data... */
|
|
if (config->live_smbh_neighbor_output) write_bh_nb_data(time_cur, N, m, x, v);
|
|
|
|
} /* if (myRank == rootRank) */
|
|
|
|
/* Get the Starting time on rootRank */
|
|
|
|
if (myRank == rootRank) {
|
|
get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0);
|
|
get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst);
|
|
} /* if (myRank == rootRank) */
|
|
|
|
timesteps = 0.0; // Why won't those two be long long instead of double + should include the zeroth step
|
|
n_act_sum = 0.0;
|
|
|
|
for (int i=1; i<N+1; i++) { // TODO why like this?
|
|
n_act_distr[i-1] = 0.0;
|
|
}
|
|
|
|
g6_calls = 0.0; //TODO this should include the calls at the zeroth step, so move it further up.
|
|
|
|
#ifdef TIMING
|
|
DT_TOT = 0.0;
|
|
|
|
DT_ACT_DEF1 = 0.0;
|
|
DT_ACT_DEF2 = 0.0;
|
|
DT_ACT_DEF3 = 0.0;
|
|
DT_ACT_PRED = 0.0;
|
|
DT_ACT_GRAV = 0.0;
|
|
DT_EXT_GRAV = 0.0;
|
|
DT_EXT_GMC_GRAV = 0.0;
|
|
DT_GMC_GMC_GRAV = 0.0;
|
|
DT_ACT_CORR = 0.0;
|
|
DT_ACT_LOAD = 0.0;
|
|
|
|
DT_STEVOL = 0.0;
|
|
DT_STARDISK = 0.0;
|
|
DT_STARDESTR = 0.0;
|
|
|
|
DT_ACT_REDUCE = 0.0;
|
|
#endif
|
|
|
|
/* The main integration loop */
|
|
|
|
while (time_cur <= t_end) {
|
|
|
|
/* Define the minimal time and the active particles on all the nodes (exclude the ZERO masses!!!) */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
|
|
min_t = active_search.get_minimum_time(t, dt);
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_ACT_DEF1 += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
|
|
active_search.get_active_indices(min_t, t, dt, ind_act, &n_act);
|
|
|
|
// TODO deal with it below
|
|
#ifdef ACT_DEF_GRAPITE
|
|
#error please fix here
|
|
#endif
|
|
#if defined(ACT_DEF_GRAPITE) && (defined(ADD_BH1) || defined(ADD_BH2))
|
|
#ifdef ADD_BH1
|
|
#define ACT_DEF_GRAPITE_NUMBH 1
|
|
#else
|
|
#define ACT_DEF_GRAPITE_NUMBH 2
|
|
#endif
|
|
int acf_def_grapite_bh_count = 0;
|
|
for (i=0; i<n_act; i++) {
|
|
if (ind_act[i]==0) {
|
|
i_bh1 = i;
|
|
acf_def_grapite_bh_count++;
|
|
}
|
|
#ifdef ADD_BH2
|
|
else if (ind_act[i]==1) {
|
|
i_bh2 = i;
|
|
acf_def_grapite_bh_count++;
|
|
}
|
|
#endif
|
|
if (acf_def_grapite_bh_count==ACT_DEF_GRAPITE_NUMBH) break;
|
|
}
|
|
if (i==n_act) {
|
|
fprintf(stderr, "ERROR: black holes were not found in the active particle list");
|
|
exit(1);
|
|
}
|
|
#else
|
|
if (config->live_smbh_count > 0) {
|
|
i_bh1 = 0;
|
|
i_bh2 = 1;
|
|
}
|
|
#endif
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
for (int i=0; i<n_act; i++) {
|
|
int j_act = ind_act[i];
|
|
m_act[i] = m[j_act];
|
|
x_act[i] = x[j_act];
|
|
v_act[i] = v[j_act];
|
|
t_act[i] = t[j_act];
|
|
dt_act[i] = dt[j_act];
|
|
// NOTICE Why do we need pot_act and pot_act_ext? Probably redundant.
|
|
pot_act[i] = pot[j_act];
|
|
pot_act_ext[i] = pot_ext[j_act];
|
|
a_act[i] = a[j_act];
|
|
adot_act[i] = adot[j_act];
|
|
} /* i */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_ACT_DEF3 += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
|
|
/* predict the active particles positions etc... on all the nodes */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
|
|
for (int i=0; i<n_act; i++) {
|
|
dt_tmp = min_t - t_act[i];
|
|
dt2half = 0.5*dt_tmp*dt_tmp;
|
|
dt3over6 = (1./3.)*dt_tmp*dt2half;
|
|
x_act_new[i] = x_act[i] + v_act[i]*dt_tmp + a_act[i]*dt2half + adot_act[i]*dt3over6;
|
|
v_act_new[i] = v_act[i] + a_act[i]*dt_tmp + adot_act[i]*dt2half;
|
|
} /* i */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_ACT_PRED += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
|
|
calc_self_grav(min_t, eps2, g6_calls, n_loc,
|
|
n_act, ind_act,
|
|
x_act_new, v_act_new,
|
|
pot_act_tmp,
|
|
a_act_tmp,
|
|
adot_act_tmp,
|
|
h2_i);
|
|
|
|
/* Reduce the "global" vectors from "local" on all the nodes */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
|
|
MPI_Allreduce(pot_act_tmp, pot_act_new, n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
MPI_Allreduce(a_act_tmp, a_act_new, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
MPI_Allreduce(adot_act_tmp, adot_act_new, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_ACT_REDUCE += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
|
|
if (config->live_smbh_count == 2) {
|
|
if (config->live_smbh_custom_eps >= 0) {
|
|
m_bh1 = m_act[i_bh1];
|
|
m_bh2 = m_act[i_bh2];
|
|
|
|
x_bh1 = x_act_new[i_bh1];
|
|
v_bh1 = v_act_new[i_bh1];
|
|
|
|
x_bh2 = x_act_new[i_bh2];
|
|
v_bh2 = v_act_new[i_bh2];
|
|
|
|
// calculate and "minus" the BH <-> BH softened pot, acc & jerk
|
|
|
|
tmp_i = calc_force_n_BH(m_bh1, x_bh1, v_bh1,
|
|
m_bh2, x_bh2, v_bh2,
|
|
eps,
|
|
&pot_bh1, a_bh1, adot_bh1,
|
|
&pot_bh2, a_bh2, adot_bh2);
|
|
|
|
pot_act_new[i_bh1] -= pot_bh1;
|
|
pot_act_new[i_bh2] -= pot_bh2;
|
|
|
|
a_act_new[i_bh1] -= a_bh1;
|
|
a_act_new[i_bh2] -= a_bh2;
|
|
|
|
adot_act_new[i_bh1] -= adot_bh1;
|
|
adot_act_new[i_bh2] -= adot_bh2;
|
|
|
|
// calculate and "plus" the new BH <-> BH unsoftened pot, acc, jerk
|
|
|
|
tmp_i = calc_force_n_BH(m_bh1, x_bh1, v_bh1,
|
|
m_bh2, x_bh2, v_bh2,
|
|
config->live_smbh_custom_eps,
|
|
&pot_bh1, a_bh1, adot_bh1,
|
|
&pot_bh2, a_bh2, adot_bh2);
|
|
|
|
pot_act_new[i_bh1] += pot_bh1;
|
|
pot_act_new[i_bh2] += pot_bh2;
|
|
|
|
a_act_new[i_bh1] += a_bh1;
|
|
a_act_new[i_bh2] += a_bh2;
|
|
|
|
adot_act_new[i_bh1] += adot_bh1;
|
|
adot_act_new[i_bh2] += adot_bh2;
|
|
}
|
|
|
|
if (config->binary_smbh_pn) {
|
|
// calculate and "plus" the new BH <-> BH : PN1, PN2, PN2.5, PN3, PN3.5 : acc, jerk
|
|
|
|
dt_bh_tmp = dt[0];
|
|
|
|
tmp_i = calc_force_pn_BH(m_bh1, x_bh1, v_bh1, s_bh1,
|
|
m_bh2, x_bh2, v_bh2, s_bh2,
|
|
C_NB, dt_bh_tmp, usedOrNot,
|
|
(double(*)[3])a_pn1, (double(*)[3])adot_pn1,
|
|
(double(*)[3])a_pn2, (double(*)[3])adot_pn2, myRank, rootRank);
|
|
|
|
a_act_new[i_bh1] += a_pn1[1] + a_pn1[2] + a_pn1[3] + a_pn1[4] + a_pn1[5] + a_pn1[6];
|
|
a_act_new[i_bh2] += a_pn2[1] + a_pn2[2] + a_pn2[3] + a_pn2[4] + a_pn2[5] + a_pn2[6];
|
|
|
|
adot_act_new[i_bh1] += adot_pn1[1] + adot_pn1[2] + adot_pn1[3] + adot_pn1[4] + adot_pn1[5] + adot_pn1[6];
|
|
adot_act_new[i_bh2] += adot_pn2[1] + adot_pn2[2] + adot_pn2[3] + adot_pn2[4] + adot_pn2[5] + adot_pn2[6];
|
|
|
|
if (myRank == rootRank) {
|
|
if (tmp_i == 505) {
|
|
printf("PN RSDIST: TS = %.8E \t t = %.8E \n", timesteps, time_cur);
|
|
fflush(stdout);
|
|
exit(-1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
calc_ext_grav(external_gravity_components, n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new);
|
|
|
|
/* correct the active particles positions etc... on all the nodes */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
|
|
for (int i=0; i<n_act; i++) {
|
|
// NOTICE looks like we're doing three unrelated things in this loop: (1) correcting positions and velocities (2) calculating new steps, and (3) putting the corrected values from the _act_new back in the _act arrays.
|
|
// After going back to the _act arrays they don't do much before they go back to the main arrays, so this copy seems redundant (the SMBH influence sphere printout needs these values but it should be a function anyway).
|
|
// TODO split this loop into the three tasks it is doing, and remove the redundancy.
|
|
dt_tmp = min_t - t_act[i];
|
|
|
|
double3 a2, a3;
|
|
calc_high_derivatives(dt_tmp, a_act[i], a_act_new[i], adot_act[i], adot_act_new[i], &a2, &a3);
|
|
|
|
corrector(dt_tmp, a2, a3, &x_act_new[i], &v_act_new[i]);
|
|
|
|
|
|
//TODO make beautiful
|
|
double eta_curr;
|
|
if ((config->live_smbh_count > 0) && (ind_act[i] < config->live_smbh_count)) eta_curr = eta_bh;
|
|
else eta_curr = eta;
|
|
|
|
double dt_new = aarseth_step(eta_curr, dt_tmp, a_act_new[i], adot_act_new[i], a2, a3);
|
|
|
|
//TODO the below should be moved to a function
|
|
if (dt_new < dt_min) dt_tmp = dt_min;
|
|
|
|
if ((dt_new < dt_tmp) && (dt_new > dt_min)) {
|
|
power = log(dt_new)/log(2.0) - 1;
|
|
dt_tmp = pow(2.0, (double)power); // TODO why is this casting needed here?
|
|
}
|
|
|
|
if ((dt_new > 2*dt_tmp) && (fmod(min_t, 2*dt_tmp) == 0) && (2*dt_tmp <= dt_max)) {
|
|
dt_tmp *= 2;
|
|
}
|
|
|
|
if (config->dt_min_warning && (myRank == 0)) {
|
|
if (dt_act[i] == dt_min) {
|
|
printf("!!! Warning1: dt_act = dt_min = %.6E \t ind_act = %07d \n", dt[i], ind_act[i]);
|
|
fflush(stdout);
|
|
}
|
|
}
|
|
|
|
/* BEGIN copy of everything */
|
|
|
|
dt_act[i] = dt_tmp;
|
|
t_act[i] = min_t;
|
|
|
|
pot_act[i] = pot_act_new[i];
|
|
|
|
x_act[i] = x_act_new[i];
|
|
v_act[i] = v_act_new[i];
|
|
a_act[i] = a_act_new[i];
|
|
adot_act[i] = adot_act_new[i];
|
|
|
|
/* END copy of everything */
|
|
|
|
|
|
|
|
} /* i */
|
|
|
|
/* define the min. dt over all the act. part. and set it also for the BH... */
|
|
|
|
if (config->live_smbh_count > 0) {
|
|
double min_dt = *std::min_element(dt_act, dt_act+n_act);
|
|
if (config->live_smbh_count>=1) dt_act[i_bh1] = min_dt;
|
|
if (config->live_smbh_count==2) dt_act[i_bh2] = min_dt;
|
|
}
|
|
|
|
if (config->binary_smbh_influence_sphere_output && (myRank == rootRank)) {
|
|
//TODO clean up this mass. We don't want to have all these _act arrays; they are only needed for this lame function.
|
|
binary_smbh_influence_sphere_output(i_bh1, i_bh2, ind_act, m_act, x_act, v_act, pot_act, dt_act, n_act, timesteps, time_cur, config->binary_smbh_influence_radius_factor, inf_event);
|
|
}
|
|
|
|
/* Return back the new coordinates + etc... of active part. to the global data... */
|
|
|
|
for (int i=0; i<n_act; i++) {
|
|
|
|
iii = ind_act[i];
|
|
|
|
m[iii] = m_act[i];
|
|
|
|
x[iii] = x_act[i];
|
|
v[iii] = v_act[i];
|
|
|
|
t[iii] = t_act[i];
|
|
dt[iii] = dt_act[i];
|
|
|
|
pot[iii] = pot_act[i];
|
|
|
|
pot_ext[iii] = pot_act_ext[i];
|
|
|
|
a[iii] = a_act[i];
|
|
adot[iii] = adot_act[i];
|
|
} /* i */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_ACT_CORR += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
|
|
/* load the new values for active particles to the local GRAPE's */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
#endif
|
|
|
|
for (int i=0; i<n_act; i++) { // TODO would be nicer to use i instead of j here
|
|
#ifdef ETICS_CEP
|
|
if (ind_act[i] == grapite_cep_index) grapite_update_cep(t_act[i], x_act[i], v_act[i], a_act[i], adot_act[i]); // All ranks should do it.
|
|
#endif
|
|
cur_rank = ind_act[i]/n_loc;
|
|
|
|
if (myRank == cur_rank) {
|
|
|
|
jjj = ind_act[i] - myRank*n_loc;
|
|
|
|
g6_set_j_particle(clusterid, jjj, ind_act[i], t_act[i], dt_act[i], m_act[i], zeros, adot_act[i]*(1./6.), a_act[i]*0.5, v_act[i], x_act[i]);
|
|
|
|
} /* if (myRank == cur_rank) */
|
|
|
|
} /* i */
|
|
|
|
#ifdef TIMING
|
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
DT_ACT_LOAD += (CPU_tmp_user - CPU_tmp_user0);
|
|
#endif
|
|
|
|
/* Current time set to min_t */
|
|
|
|
time_cur = min_t;
|
|
|
|
timesteps += 1.0;
|
|
n_act_sum += n_act;
|
|
n_act_distr[n_act-1]++;
|
|
|
|
if (time_cur >= t_bh) {
|
|
if (myRank == rootRank) {
|
|
/* Write BH data... */
|
|
if (config->live_smbh_output) write_bh_data(time_cur, m, x, v, pot, a, adot, dt);
|
|
|
|
/* Write BH NB data... */
|
|
if (config->live_smbh_neighbor_output) write_bh_nb_data(time_cur, N, m, x, v);
|
|
|
|
} /* if (myRank == rootRank) */
|
|
|
|
t_bh += dt_bh;
|
|
} /* if (time_cur >= t_bh) */
|
|
|
|
if (time_cur >= t_contr) {
|
|
if (myRank == rootRank) {
|
|
|
|
energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
|
|
|
|
/* write cont data */
|
|
|
|
if (config->output_hdf5) h5_write("data.con", diskstep, N, time_cur, m, x, v, pot, a, adot, 0, true);
|
|
else ascii_write("data.con", diskstep, N, time_cur, m, x, v, 16);
|
|
|
|
/* possible OUT for timing !!! */
|
|
|
|
#ifdef TIMING
|
|
out = fopen("timing.dat", "a");
|
|
|
|
DT_TOT = DT_ACT_DEF1 + DT_ACT_DEF2 + DT_ACT_DEF3 + DT_ACT_PRED +
|
|
DT_ACT_GRAV + DT_EXT_GRAV + DT_GMC_GRAV +
|
|
DT_GMC_GMC_GRAV + DT_EXT_GMC_GRAV +
|
|
DT_ACT_CORR + DT_ACT_LOAD +
|
|
DT_STEVOL + DT_STARDISK + DT_STARDESTR +
|
|
DT_ACT_REDUCE;
|
|
|
|
fprintf(out,"%.8E \t %.6E \t %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f \t %.3f \t %.8E %.8E %.8E \t %.8E %.8E %.8E \n",
|
|
time_cur, DT_TOT,
|
|
100.0*DT_ACT_DEF1/DT_TOT, 100.0*DT_ACT_DEF2/DT_TOT, 100.0*DT_ACT_DEF3/DT_TOT, 100.0*DT_ACT_PRED/DT_TOT,
|
|
100.0*DT_ACT_GRAV/DT_TOT, 100.0*DT_EXT_GRAV/DT_TOT, 100.0*DT_GMC_GRAV/DT_TOT,
|
|
100.0*DT_GMC_GMC_GRAV/DT_TOT, 100.0*DT_EXT_GMC_GRAV/DT_TOT,
|
|
100.0*DT_ACT_CORR/DT_TOT, 100.0*DT_ACT_LOAD/DT_TOT,
|
|
100.0*DT_STEVOL/DT_TOT, 100.0*DT_STARDISK/DT_TOT, 100.0*DT_STARDESTR/DT_TOT,
|
|
100.0*DT_ACT_REDUCE/DT_TOT,
|
|
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0,
|
|
timesteps, n_act_sum, 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09);
|
|
|
|
fclose(out);
|
|
#endif
|
|
|
|
} /* if (myRank == rootRank) */
|
|
|
|
#ifdef ETICS_CEP
|
|
// We are /inside/ a control step, so all particles must be synchronized; we can safely calculate their density centre. The acceleration and jerk currently in the memory are for the predicted position of the CEP, by calling grapite_calc_center we "correct" the position and velocity, but not the gravity at that point.
|
|
|
|
// First calculate the DC
|
|
grapite_calc_center(N, m, x, v, xcm, vcm, xdc, vdc);
|
|
|
|
// Now copy it to the global particle list
|
|
memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
|
|
memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
|
|
|
|
// Now copy it to the local particle list for tha appropriate rank
|
|
if (myRank == grapite_cep_index/n_loc) {
|
|
memcpy(x_loc[grapite_cep_index - myRank*n_loc], xdc, 3*sizeof(double));
|
|
memcpy(v_loc[grapite_cep_index - myRank*n_loc], vdc, 3*sizeof(double));
|
|
}
|
|
grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]);
|
|
#endif
|
|
|
|
t_contr += dt_contr;
|
|
} /* if (time_cur >= t_contr) */
|
|
|
|
if (time_cur >= t_disk) {
|
|
if (myRank == rootRank) {
|
|
diskstep++;
|
|
char out_fname[256];
|
|
sprintf(out_fname, "%06d", diskstep);
|
|
if (config->output_hdf5) h5_write(std::string(out_fname) + ".h5", diskstep, N, time_cur, m, x, v, pot, a, adot, config->output_extra_mode, config->output_hdf5_double_precision);
|
|
else ascii_write(std::string(out_fname) + ".dat", diskstep, N, time_cur, m, x, v, config->output_ascii_precision);
|
|
} /* if (myRank == rootRank) */
|
|
|
|
#ifdef ETICS_DUMP
|
|
sprintf(out_fname, "coeffs.%06d.%02d.dat", diskstep, myRank);
|
|
grapite_dump(out_fname, 2);
|
|
#endif
|
|
|
|
t_disk += dt_disk;
|
|
} /* if (time_cur >= t_disk) */
|
|
|
|
} /* while (time_cur < t_end) */
|
|
|
|
/* close the local GRAPEs */
|
|
|
|
g6_close(clusterid);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
MPI_Reduce(&g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
if (myRank == rootRank) {
|
|
|
|
/* Write some output for the timestep annalize... */
|
|
|
|
printf("\n");
|
|
printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls);
|
|
printf("\n");
|
|
printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09);
|
|
fflush(stdout);
|
|
|
|
} /* if (myRank == rootRank) */
|
|
|
|
/* Wait to all processors to finish his works... */
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* Finalize the MPI work */
|
|
MPI_Finalize();
|
|
|
|
return 0;
|
|
}
|