New timing, moved MPI reduction into function call, started converting some of the pointers into std::vectors
This commit is contained in:
parent
1a438449a8
commit
329dd2ca4d
5 changed files with 88 additions and 312 deletions
|
|
@ -99,7 +99,7 @@ void Black_hole_physics::adjust_post_newtonian(
|
||||||
jrk2 += jrk2_corr;
|
jrk2 += jrk2_corr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Black_hole_physics::write_bh_data(double time_cur, double m[], double3 x[], double3 v[], double pot[], double3 a[], double3 adot[], double dt[])
|
void Black_hole_physics::write_bh_data(double time_cur, double m[], double3 x[], double3 v[], const std::vector<double>& pot, double3 a[], double3 adot[], double dt[])
|
||||||
{
|
{
|
||||||
// This function logs data on the black hole(s). It uses both external data
|
// This function logs data on the black hole(s). It uses both external data
|
||||||
// (the arguments to this function) and optionall internal data to this
|
// (the arguments to this function) and optionall internal data to this
|
||||||
|
|
@ -195,7 +195,7 @@ void Write_bh_nb_data::operator()(double time_cur)
|
||||||
fflush(out);
|
fflush(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Binary_smbh_influence_sphere_output::operator()(int ind_act[], int n_act, double timesteps, double time_cur)
|
void Binary_smbh_influence_sphere_output::operator()(const std::vector<int>& ind_act, int n_act, double timesteps, double time_cur)
|
||||||
{
|
{
|
||||||
double m_bh1 = m[0];
|
double m_bh1 = m[0];
|
||||||
double m_bh2 = m[1];
|
double m_bh2 = m[1];
|
||||||
|
|
@ -216,13 +216,13 @@ void Binary_smbh_influence_sphere_output::operator()(int ind_act[], int n_act, d
|
||||||
for (int i=0; i<n_act; i++) {
|
for (int i=0; i<n_act; i++) {
|
||||||
int j_act = ind_act[i];
|
int j_act = ind_act[i];
|
||||||
if (j_act<2) continue;
|
if (j_act<2) continue;
|
||||||
double& pot_bh1 = pot[0];
|
const double& pot_bh1 = pot[0];
|
||||||
double& pot_bh2 = pot[1];
|
const double& pot_bh2 = pot[1];
|
||||||
double& m_act = m[j_act];
|
const double& m_act = m[j_act];
|
||||||
double3& x_act = x[j_act];
|
const double3& x_act = x[j_act];
|
||||||
double3& v_act = v[j_act];
|
const double3& v_act = v[j_act];
|
||||||
double& dt_act = dt[j_act];
|
const double& dt_act = dt[j_act];
|
||||||
double& pot_act = pot[j_act];
|
const double& pot_act = pot[j_act];
|
||||||
double tmp_r2 = (x_act - x_bbhc).norm2();
|
double tmp_r2 = (x_act - x_bbhc).norm2();
|
||||||
if (tmp_r2 < SEMI_a2*factor*factor) {
|
if (tmp_r2 < SEMI_a2*factor*factor) {
|
||||||
if (inf_event[j_act] == 0) {
|
if (inf_event[j_act] == 0) {
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ public:
|
||||||
const double dt_bh, // pn_usage should be const
|
const double dt_bh, // pn_usage should be const
|
||||||
double3& acc1, double3& acc2,
|
double3& acc1, double3& acc2,
|
||||||
double3& jrk1, double3& jrk2);
|
double3& jrk1, double3& jrk2);
|
||||||
void write_bh_data(double time_cur, double m[], double3 x[], double3 v[], double pot[], double3 a[], double3 adot[], double dt[]);
|
void write_bh_data(double time_cur, double m[], double3 x[], double3 v[], const std::vector<double>& pot, double3 a[], double3 adot[], double dt[]);
|
||||||
public: //TODO make private
|
public: //TODO make private
|
||||||
double m1, m2;
|
double m1, m2;
|
||||||
int count;
|
int count;
|
||||||
|
|
@ -85,7 +85,7 @@ private:
|
||||||
|
|
||||||
class Binary_smbh_influence_sphere_output {
|
class Binary_smbh_influence_sphere_output {
|
||||||
public:
|
public:
|
||||||
Binary_smbh_influence_sphere_output(double factor, int N, double *m, double3 *x, double3 *v, double *pot, double *dt)
|
Binary_smbh_influence_sphere_output(double factor, int N, double *m, double3 *x, double3 *v, const std::vector<double>& pot, double *dt)
|
||||||
: factor(factor), m(m), x(x), v(v), pot(pot), dt(dt)
|
: factor(factor), m(m), x(x), v(v), pot(pot), dt(dt)
|
||||||
{
|
{
|
||||||
inf_event.assign(N, 0);
|
inf_event.assign(N, 0);
|
||||||
|
|
@ -96,10 +96,11 @@ public:
|
||||||
{
|
{
|
||||||
fclose(out);
|
fclose(out);
|
||||||
}
|
}
|
||||||
void operator()(int ind_act[], int n_act, double timesteps, double time_cur);
|
void operator()(const std::vector<int>& ind_act, int n_act, double timesteps, double time_cur);
|
||||||
private:
|
private:
|
||||||
double factor;
|
double factor;
|
||||||
double *m, *pot, *dt;
|
const std::vector<double>& pot;
|
||||||
|
double *m, /**pot,*/ *dt;
|
||||||
double3 *x, *v;
|
double3 *x, *v;
|
||||||
std::vector<int> inf_event;
|
std::vector<int> inf_event;
|
||||||
FILE *out;
|
FILE *out;
|
||||||
|
|
|
||||||
4
io.cpp
4
io.cpp
|
|
@ -146,7 +146,7 @@ void h5_read(const std::string file_name, int *step_num, int *N, double *t, doub
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const double *pot, const double3 *acc, const double3 *jrk, const int extra_mode=0, const bool use_double_precision=true)
|
void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const std::vector<double>& pot, const double3 *acc, const double3 *jrk, const int extra_mode=0, const bool use_double_precision=true)
|
||||||
{
|
{
|
||||||
#ifdef HAS_HDF5
|
#ifdef HAS_HDF5
|
||||||
hid_t file_id, group_id, attribute_id, dataspace_id;
|
hid_t file_id, group_id, attribute_id, dataspace_id;
|
||||||
|
|
@ -181,7 +181,7 @@ void h5_write(const std::string file_name, const int step_num, const int N, cons
|
||||||
bool write_pot = (extra_mode ) & 1;
|
bool write_pot = (extra_mode ) & 1;
|
||||||
bool write_acc = (extra_mode >> 1) & 1;
|
bool write_acc = (extra_mode >> 1) & 1;
|
||||||
bool write_jrk = (extra_mode >> 2) & 1;
|
bool write_jrk = (extra_mode >> 2) & 1;
|
||||||
if (write_pot) write_dataset("POT", 1, (double*)pot);
|
if (write_pot) write_dataset("POT", 1, (double*)pot.data());
|
||||||
if (write_acc) write_dataset("ACC", 2, (double*)acc);
|
if (write_acc) write_dataset("ACC", 2, (double*)acc);
|
||||||
if (write_jrk) write_dataset("JRK", 2, (double*)jrk);
|
if (write_jrk) write_dataset("JRK", 2, (double*)jrk);
|
||||||
|
|
||||||
|
|
|
||||||
2
io.h
2
io.h
|
|
@ -12,5 +12,5 @@ void ascii_write(const std::string file_name, const int step_num, const int N, c
|
||||||
void h5_read(const std::string file_name, int *step_num, int *N, double *t, double m[], double3 x[], double3 v[]);
|
void h5_read(const std::string file_name, int *step_num, int *N, double *t, double m[], double3 x[], double3 v[]);
|
||||||
// In case the code is compiled without HDF5 support, the implementation of this function just throws an error
|
// In case the code is compiled without HDF5 support, the implementation of this function just throws an error
|
||||||
|
|
||||||
void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const double *pot, const double3 *acc, const double3 *jrk, const int write_mode=0, const bool use_double_precision=true);
|
void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const std::vector<double>& pot, const double3 *acc, const double3 *jrk, const int write_mode=0, const bool use_double_precision=true);
|
||||||
// In case the code is compiled without HDF5 support, the implementation of this function just throws an error
|
// In case the code is compiled without HDF5 support, the implementation of this function just throws an error
|
||||||
|
|
|
||||||
367
phigrape.cpp
367
phigrape.cpp
|
|
@ -1,60 +1,3 @@
|
||||||
/*****************************************************************************
|
|
||||||
File Name : "phi-GRAPE/GPU.c" // BH (1 || 2) + ACC + EJECT
|
|
||||||
:
|
|
||||||
Contents : N-body code with integration by individual block time step
|
|
||||||
: together with the parallel using of GRAPE6a board's.
|
|
||||||
:
|
|
||||||
: Added the GPU support via SAPPORO library.
|
|
||||||
:
|
|
||||||
: Normalization to the physical units!!!
|
|
||||||
:
|
|
||||||
: External Potential added
|
|
||||||
: Plummer-Kuzmin: Bulge, Disk, Halo
|
|
||||||
: Kharchenko+Andreas...
|
|
||||||
:
|
|
||||||
: SC extra POT for Bek SC test runs...
|
|
||||||
:
|
|
||||||
: Rebuced to the Single BH -> Plummer
|
|
||||||
: Andreas+Fazeel...
|
|
||||||
:
|
|
||||||
: Stellar evolution added
|
|
||||||
: Stellar lifetimes: Raiteri, Villata & Navarro (1996)
|
|
||||||
: IMS mass loss: van den Hoeg & Groenewegen (1997)
|
|
||||||
:
|
|
||||||
: STARDESTR_EXT: Tidal disruption of stars by external BH...
|
|
||||||
: Chingis, Denis & Maxim...
|
|
||||||
:
|
|
||||||
: STARDESTR: Tidal disruption of stars by BH...
|
|
||||||
: Jose, Li Shuo & Shiyan Zhong
|
|
||||||
:
|
|
||||||
: STARDISK: Drag force...
|
|
||||||
: Chingis, Denis & Maxim...
|
|
||||||
:
|
|
||||||
: STARDISK: variable hz = HZ*(R/R_crit) up to R_crit...
|
|
||||||
: Taras, Andreas...
|
|
||||||
:
|
|
||||||
: Live BH (1 || 2) + ACC + EJECT...
|
|
||||||
: Li Shuo & Shiyan Zhong
|
|
||||||
:
|
|
||||||
: dt_min for BH (1 || 2)...
|
|
||||||
:
|
|
||||||
: added the PN calculus for the BBH
|
|
||||||
: PN0, PN1, PN2, PN2.5 (coded on the base of
|
|
||||||
: Gabor Kupi original routine)
|
|
||||||
:
|
|
||||||
: added the "name" array...
|
|
||||||
:
|
|
||||||
: added the GMC's calculus (GMC on CPU; GMC2 on GPU)
|
|
||||||
: for Alexey SC runs... and also for Fazeel Zurich runs...
|
|
||||||
:
|
|
||||||
: CPU_TIMELIMIT added for the Julich MW cluster runs...
|
|
||||||
:
|
|
||||||
Coded by : Peter Berczik
|
|
||||||
Version number : 19.04
|
|
||||||
Last redaction : 2019.04.16 12:55
|
|
||||||
*****************************************************************************/
|
|
||||||
#define TIMING
|
|
||||||
|
|
||||||
#define ETA_S_CORR 4.0
|
#define ETA_S_CORR 4.0
|
||||||
#define ETA_BH_CORR 4.0
|
#define ETA_BH_CORR 4.0
|
||||||
|
|
||||||
|
|
@ -62,6 +5,7 @@ Last redaction : 2019.04.16 12:55
|
||||||
#define DTMINPOWER -36.0
|
#define DTMINPOWER -36.0
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <chrono>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
|
@ -85,47 +29,24 @@ Last redaction : 2019.04.16 12:55
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Config *config;
|
Config *config;
|
||||||
|
//chrono::steady_clock::time_point walltime_start;
|
||||||
|
|
||||||
// These are used in the energy control, could be static but will probably be removed in the end anyway
|
namespace std::chrono {
|
||||||
double CPU_time_real0, CPU_time_user0, CPU_time_syst0;
|
struct Timer {
|
||||||
double CPU_time_real, CPU_time_user, CPU_time_syst;
|
void start()
|
||||||
|
{
|
||||||
#ifdef TIMING
|
t_start = steady_clock::now();
|
||||||
// TODO clean up here
|
}
|
||||||
double CPU_tmp_real0, CPU_tmp_user0, CPU_tmp_syst0;
|
void stop()
|
||||||
double CPU_tmp_real, CPU_tmp_user, CPU_tmp_syst;
|
{
|
||||||
|
t_stop = steady_clock::now();
|
||||||
double DT_TOT,
|
time = duration_cast<nanoseconds>(t_stop - t_start).count()*1E-9;
|
||||||
DT_ACT_DEF1, DT_ACT_DEF2, DT_ACT_DEF3, DT_ACT_PRED,
|
}
|
||||||
DT_ACT_GRAV, DT_EXT_GRAV,
|
double time; // seconds
|
||||||
DT_GMC_GRAV, DT_GMC_GMC_GRAV, DT_EXT_GMC_GRAV,
|
steady_clock::time_point t_start, t_stop;
|
||||||
DT_ACT_CORR, DT_ACT_LOAD,
|
};
|
||||||
DT_STEVOL, DT_STARDISK, DT_STARDESTR;
|
|
||||||
double DT_ACT_REDUCE;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void get_CPU_time(double *time_real, double *time_user, double *time_syst)
|
|
||||||
{
|
|
||||||
struct rusage xxx;
|
|
||||||
double sec_u, microsec_u, sec_s, microsec_s;
|
|
||||||
struct timeval tv;
|
|
||||||
|
|
||||||
getrusage(RUSAGE_SELF,&xxx);
|
|
||||||
|
|
||||||
sec_u = xxx.ru_utime.tv_sec;
|
|
||||||
sec_s = xxx.ru_stime.tv_sec;
|
|
||||||
|
|
||||||
microsec_u = xxx.ru_utime.tv_usec;
|
|
||||||
microsec_s = xxx.ru_stime.tv_usec;
|
|
||||||
|
|
||||||
*time_user = sec_u + microsec_u * 1.0E-06;
|
|
||||||
*time_syst = sec_s + microsec_s * 1.0E-06;
|
|
||||||
|
|
||||||
gettimeofday(&tv, NULL);
|
|
||||||
*time_real = tv.tv_sec + 1.0E-06 * tv.tv_usec;
|
|
||||||
|
|
||||||
*time_user = *time_real;
|
|
||||||
}
|
}
|
||||||
|
std::chrono::Timer timer;
|
||||||
|
|
||||||
class Calc_self_grav {
|
class Calc_self_grav {
|
||||||
public:
|
public:
|
||||||
|
|
@ -133,49 +54,49 @@ public:
|
||||||
: g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps)
|
: g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps)
|
||||||
{
|
{
|
||||||
h2.assign(N, eps2);
|
h2.assign(N, eps2);
|
||||||
|
pot_loc.resize(N);
|
||||||
|
acc_loc.resize(N);
|
||||||
|
jrk_loc.resize(N);
|
||||||
}
|
}
|
||||||
void operator()(const double t, const int n_act, int ind_act[], const double3 x_act[], const double3 v_act[],
|
void operator()(const double t, const int n_act, std::vector<int>& ind_act, const double3 x_act[], const double3 v_act[],
|
||||||
double pot[], double3 acc[], double3 jrk[])
|
std::vector<double>& pot, double3 acc[], double3 jrk[])
|
||||||
{
|
{
|
||||||
g6_set_ti(clusterid, t);
|
g6_set_ti(clusterid, t);
|
||||||
for (int i=0; i<n_act; i+=npipe) {
|
for (int i=0; i<n_act; i+=npipe) {
|
||||||
int nn = npipe;
|
int nn = npipe;
|
||||||
if (n_act-i < npipe) nn = n_act - i;
|
if (n_act-i < npipe) nn = n_act - i;
|
||||||
//TODO any way we can clean up this ugly casting?
|
//TODO any way we can clean up this ugly casting?
|
||||||
g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act+i, (double(*)[3])v_act+i, (double(*)[3])acc+i, (double(*)[3])jrk+i, pot+i, eps2, h2.data());
|
g6calc_firsthalf(clusterid, n_loc, nn, ind_act.data()+i, (double(*)[3])&x_act[i], (double(*)[3])&v_act[i], (double(*)[3])&acc_loc[i], (double(*)[3])&jrk_loc[i], &pot_loc[i], eps2, h2.data());
|
||||||
g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act+i, (double(*)[3])v_act+i, eps2, h2.data(), (double(*)[3])acc+i, (double(*)[3])jrk+i, pot+i);
|
g6calc_lasthalf( clusterid, n_loc, nn, ind_act.data()+i, (double(*)[3])&x_act[i], (double(*)[3])&v_act[i], eps2, h2.data(), (double(*)[3])&acc_loc[i], (double(*)[3])&jrk_loc[i], &pot_loc[i]);
|
||||||
g6_calls++;
|
g6_calls++;
|
||||||
} /* i */
|
} /* i */
|
||||||
|
/* Reduce the "global" vectors from "local" on all the nodes */
|
||||||
|
MPI_Allreduce(pot_loc.data(), pot.data(), n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
MPI_Allreduce(acc_loc.data(), acc, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
MPI_Allreduce(jrk_loc.data(), jrk, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
double g6_calls;
|
double g6_calls;
|
||||||
private:
|
private:
|
||||||
int n_loc, clusterid, npipe;
|
int n_loc, clusterid, npipe;
|
||||||
double eps2;
|
double eps2;
|
||||||
std::vector<double> h2;
|
std::vector<double> h2;
|
||||||
|
std::vector<double> pot_loc; // the _loc variables are for this node only.
|
||||||
|
std::vector<double3> acc_loc, jrk_loc;
|
||||||
};
|
};
|
||||||
|
|
||||||
void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n, double3 *x, double3 *v, double *pot, double3 *acc, double3* jrk)
|
void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n, double3 *x, double3 *v, double *pot, double3 *acc, double3* jrk)
|
||||||
// TODO should just be a class that has this pointer array as a member
|
// TODO should just be a class that has this pointer array as a member
|
||||||
{
|
{
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::fill(pot, pot+n, 0.);
|
std::fill(pot, pot+n, 0.);
|
||||||
for (auto component : external_gravity_components) {
|
for (auto component : external_gravity_components) {
|
||||||
if (component->is_active)
|
if (component->is_active)
|
||||||
component->apply(n, x, v, pot, acc, jrk);
|
component->apply(n, x, v, pot, acc, jrk);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
||||||
DT_EXT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, int N, double m[], double3 x[], double3 v[], double pot[], double pot_ext[])
|
void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, int N, double m[], double3 x[], double3 v[], const std::vector<double>& pot, double pot_ext[])
|
||||||
{
|
{
|
||||||
double E_pot = 0;
|
double E_pot = 0;
|
||||||
for (int i=0; i<N; i++) E_pot += m[i]*pot[i];
|
for (int i=0; i<N; i++) E_pot += m[i]*pot[i];
|
||||||
E_pot *= 0.5;
|
E_pot *= 0.5;
|
||||||
|
|
||||||
|
|
@ -206,7 +127,7 @@ void energy_contr(const double time_cur, const double timesteps, const double n_
|
||||||
mom[2] += m[i] * (x[i][0]* v[i][1] - x[i][1]*v[i][0]);
|
mom[2] += m[i] * (x[i][0]* v[i][1] - x[i][1]*v[i][0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst);
|
timer.stop();
|
||||||
|
|
||||||
double E_tot = E_pot + E_kin + E_pot_ext;
|
double E_tot = E_pot + E_kin + E_pot_ext;
|
||||||
|
|
||||||
|
|
@ -219,18 +140,18 @@ void energy_contr(const double time_cur, const double timesteps, const double n_
|
||||||
printf("%.3E %.3E % .4E %.4E % .4E % .4E % .4E %.2E\n",
|
printf("%.3E %.3E % .4E %.4E % .4E % .4E % .4E %.2E\n",
|
||||||
time_cur, timesteps,
|
time_cur, timesteps,
|
||||||
E_pot, E_kin, E_pot_ext, E_tot, DE_tot,
|
E_pot, E_kin, E_pot_ext, E_tot, DE_tot,
|
||||||
CPU_time_user-CPU_time_user0);
|
timer.time);
|
||||||
|
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
auto out = fopen("contr.dat", "a");
|
auto out = fopen("contr.dat", "a");
|
||||||
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
|
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E\n",
|
||||||
time_cur, timesteps, n_act_sum, g6_calls,
|
time_cur, timesteps, n_act_sum, g6_calls,
|
||||||
E_pot, E_kin, E_pot_ext,
|
E_pot, E_kin, E_pot_ext,
|
||||||
E_tot, DE_tot,
|
E_tot, DE_tot,
|
||||||
rcm_mod, vcm_mod,
|
rcm_mod, vcm_mod,
|
||||||
mom[0], mom[1], mom[2],
|
mom[0], mom[1], mom[2],
|
||||||
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
|
timer.time);
|
||||||
fclose(out);
|
fclose(out);
|
||||||
|
|
||||||
E_tot_prev = E_tot;
|
E_tot_prev = E_tot;
|
||||||
|
|
@ -330,14 +251,13 @@ inline double aarseth_step(const double eta, const double dt, const double3 a, c
|
||||||
return sqrt(eta*(a1abs*a2dot1abs+adot1abs*adot1abs)/(adot1abs*a3dot1abs+a2dot1abs*a2dot1abs));
|
return sqrt(eta*(a1abs*a2dot1abs+adot1abs*adot1abs)/(adot1abs*a3dot1abs+a2dot1abs*a2dot1abs));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
timer.start();
|
||||||
|
|
||||||
double timesteps=0.0, n_act_sum=0.0;
|
double timesteps=0.0, n_act_sum=0.0;
|
||||||
|
|
||||||
double3 xcm, vcm, xdc, vdc; // these should go away
|
|
||||||
|
|
||||||
double3 x_bbhc, v_bbhc;
|
|
||||||
|
|
||||||
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
|
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
|
||||||
|
|
||||||
/* INIT the rand() !!! */
|
/* INIT the rand() !!! */
|
||||||
|
|
@ -365,6 +285,7 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
int diskstep, N;
|
int diskstep, N;
|
||||||
double time_cur;
|
double time_cur;
|
||||||
|
// The memory for m, x, and v is allocated inside h5_read or ascii_read
|
||||||
double *m;
|
double *m;
|
||||||
double3 *x, *v;
|
double3 *x, *v;
|
||||||
if (is_hdf5(config->input_file_name)) {
|
if (is_hdf5(config->input_file_name)) {
|
||||||
|
|
@ -377,18 +298,19 @@ int main(int argc, char *argv[])
|
||||||
else
|
else
|
||||||
ascii_read(config->input_file_name, diskstep, N, time_cur, &m, &x, &v);
|
ascii_read(config->input_file_name, diskstep, N, time_cur, &m, &x, &v);
|
||||||
|
|
||||||
int *ind = new int[N];
|
std::vector<int> ind(N);
|
||||||
std::iota(ind, ind+N, 0);
|
std::iota(begin(ind), end(ind), 0);
|
||||||
double3 *a = new double3[N], *adot = new double3[N];
|
double3 *a = new double3[N], *adot = new double3[N];
|
||||||
double *pot = new double[N], *pot_ext = new double[N], *t = new double[N], *dt = new double[N];
|
std::vector<double> pot(N);
|
||||||
|
double *pot_ext = new double[N], *t = new double[N], *dt = new double[N];
|
||||||
|
|
||||||
/* data for active particles */
|
/* data for active particles */
|
||||||
// x_act_new and v_act_new arrays hold the predicted position and velocity of i-particles, which is later corrected before moving into the j-particle memory. The [pot,a,adot]_act_tmp arrays hold the calculation results from each node. The [pot,a,adot]_act_new arrays hold the reduced calculation results from all nodes.
|
int n_act;
|
||||||
int n_act, *ind_act = new int[N];
|
std::vector<int> ind_act(N);
|
||||||
double *pot_act_new = new double[N], *pot_act_tmp = new double[N], *pot_act_ext = new double[N];
|
std::vector<double> pot_act_new(N);
|
||||||
double3 *x_act_new = new double3[N], *v_act_new = new double3[N],
|
double *pot_act_ext = new double[N];
|
||||||
*a_act_tmp = new double3[N], *adot_act_tmp = new double3[N],
|
double3 *x_act_new = new double3[N], *v_act_new = new double3[N],
|
||||||
*a_act_new = new double3[N], *adot_act_new = new double3[N];
|
*a_act_new = new double3[N], *adot_act_new = new double3[N];
|
||||||
|
|
||||||
double eps = config->eps;
|
double eps = config->eps;
|
||||||
double eta = config->eta;
|
double eta = config->eta;
|
||||||
|
|
@ -411,10 +333,6 @@ int main(int argc, char *argv[])
|
||||||
if ((diskstep == 0) && (time_cur == 0)) {
|
if ((diskstep == 0) && (time_cur == 0)) {
|
||||||
FILE *out = fopen("contr.dat", "w");
|
FILE *out = fopen("contr.dat", "w");
|
||||||
fclose(out);
|
fclose(out);
|
||||||
#ifdef TIMING
|
|
||||||
out = fopen("timing.dat", "w");
|
|
||||||
fclose(out);
|
|
||||||
#endif
|
|
||||||
if (config->live_smbh_output && (config->live_smbh_count > 0)) {
|
if (config->live_smbh_output && (config->live_smbh_count > 0)) {
|
||||||
out = fopen("bh.dat", "w");
|
out = fopen("bh.dat", "w");
|
||||||
fclose(out);
|
fclose(out);
|
||||||
|
|
@ -424,9 +342,6 @@ int main(int argc, char *argv[])
|
||||||
fclose(out);
|
fclose(out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0);
|
|
||||||
|
|
||||||
} /* if (myRank == rootRank) */
|
} /* if (myRank == rootRank) */
|
||||||
|
|
||||||
double normalization_mass=1, normalization_length=1, normalization_velocity=1;
|
double normalization_mass=1, normalization_length=1, normalization_velocity=1;
|
||||||
|
|
@ -531,7 +446,6 @@ int main(int argc, char *argv[])
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* load the nj particles to the G6 */
|
/* load the nj particles to the G6 */
|
||||||
|
|
||||||
for (int k=0; k<n_loc; k++) {
|
for (int k=0; k<n_loc; k++) {
|
||||||
int j = k + myRank*n_loc;
|
int j = k + myRank*n_loc;
|
||||||
g6_set_j_particle(clusterid, k, ind[j], t[j], dt[j], m[j], zeros, zeros, zeros, v[j], x[j]);
|
g6_set_j_particle(clusterid, k, ind[j], t[j], dt[j], m[j], zeros, zeros, zeros, v[j], x[j]);
|
||||||
|
|
@ -545,6 +459,7 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
int grapite_cep_index = grapite_get_cep_index();
|
int grapite_cep_index = grapite_get_cep_index();
|
||||||
if (grapite_cep_index >= 0) {
|
if (grapite_cep_index >= 0) {
|
||||||
|
double3 xcm, vcm, xdc, vdc;
|
||||||
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
||||||
x[grapite_cep_index] = xdc;
|
x[grapite_cep_index] = xdc;
|
||||||
v[grapite_cep_index] = vdc;
|
v[grapite_cep_index] = vdc;
|
||||||
|
|
@ -553,13 +468,7 @@ int main(int argc, char *argv[])
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* define the all particles as a active on all the processors for the first time grav calc. */
|
/* define the all particles as a active on all the processors for the first time grav calc. */
|
||||||
calc_self_grav(time_cur, N, ind, x, v, pot_act_tmp, a_act_tmp, adot_act_tmp);
|
calc_self_grav(time_cur, N, ind, x, v, pot, a, adot);
|
||||||
|
|
||||||
/* Reduce the "global" vectors from "local" on all processors) */
|
|
||||||
// TODO why won't we do the MPI_Allreduce inside the calc_self_grav function, and get rid of these _tmp arrays?
|
|
||||||
MPI_Allreduce(pot_act_tmp, pot, N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
||||||
MPI_Allreduce(a_act_tmp, a, 3*N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
||||||
MPI_Allreduce(adot_act_tmp, adot, 3*N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
if (config->live_smbh_count == 2) {
|
if (config->live_smbh_count == 2) {
|
||||||
black_hole_physics.set_xv(x[0], x[1], v[0], v[1]);
|
black_hole_physics.set_xv(x[0], x[1], v[0], v[1]);
|
||||||
|
|
@ -580,6 +489,7 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
if (grapite_cep_index >= 0) {
|
if (grapite_cep_index >= 0) {
|
||||||
|
double3 xcm, vcm, xdc, vdc;
|
||||||
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
||||||
x[grapite_cep_index] = xdc;
|
x[grapite_cep_index] = xdc;
|
||||||
v[grapite_cep_index] = vdc;
|
v[grapite_cep_index] = vdc;
|
||||||
|
|
@ -588,7 +498,6 @@ int main(int argc, char *argv[])
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Define initial timestep for all particles on all nodes */
|
/* Define initial timestep for all particles on all nodes */
|
||||||
|
|
||||||
for (int j=0; j<N; j++) {
|
for (int j=0; j<N; j++) {
|
||||||
double a2_mod = a[j].norm2();
|
double a2_mod = a[j].norm2();
|
||||||
double adot2_mod = adot[j].norm2();
|
double adot2_mod = adot[j].norm2();
|
||||||
|
|
@ -620,14 +529,12 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
/* load the new values for particles to the local GRAPEs */
|
/* load the new values for particles to the local GRAPEs */
|
||||||
|
|
||||||
for (int k=0; k<n_loc; k++) {
|
for (int k=0; k<n_loc; k++) {
|
||||||
int j = k + myRank*n_loc;
|
int j = k + myRank*n_loc;
|
||||||
g6_set_j_particle(clusterid, k, ind[j], t[j], dt[j], m[j], zeros, adot[j]*(1./6.), a[j]*0.5, v[j], x[j]);
|
g6_set_j_particle(clusterid, k, ind[j], t[j], dt[j], m[j], zeros, adot[j]*(1./6.), a[j]*0.5, v[j], x[j]);
|
||||||
} /* k */
|
} /* k */
|
||||||
|
|
||||||
if (myRank == rootRank) {
|
if (myRank == rootRank) {
|
||||||
|
|
||||||
/* Write BH data... */
|
/* Write BH data... */
|
||||||
if (config->live_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt);
|
if (config->live_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt);
|
||||||
|
|
||||||
|
|
@ -636,60 +543,19 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
} /* if (myRank == rootRank) */
|
} /* if (myRank == rootRank) */
|
||||||
|
|
||||||
/* Get the Starting time on rootRank */
|
|
||||||
|
|
||||||
if (myRank == rootRank) {
|
|
||||||
get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0);
|
|
||||||
get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst);
|
|
||||||
} /* if (myRank == rootRank) */
|
|
||||||
|
|
||||||
timesteps = 0.0; // Why won't those two be long long instead of double + should include the zeroth step
|
timesteps = 0.0; // Why won't those two be long long instead of double + should include the zeroth step
|
||||||
n_act_sum = 0.0;
|
n_act_sum = 0.0;
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
DT_TOT = 0.0;
|
|
||||||
|
|
||||||
DT_ACT_DEF1 = 0.0;
|
|
||||||
DT_ACT_DEF2 = 0.0;
|
|
||||||
DT_ACT_DEF3 = 0.0;
|
|
||||||
DT_ACT_PRED = 0.0;
|
|
||||||
DT_ACT_GRAV = 0.0;
|
|
||||||
DT_EXT_GRAV = 0.0;
|
|
||||||
DT_EXT_GMC_GRAV = 0.0;
|
|
||||||
DT_GMC_GMC_GRAV = 0.0;
|
|
||||||
DT_ACT_CORR = 0.0;
|
|
||||||
DT_ACT_LOAD = 0.0;
|
|
||||||
|
|
||||||
DT_STEVOL = 0.0;
|
|
||||||
DT_STARDISK = 0.0;
|
|
||||||
DT_STARDESTR = 0.0;
|
|
||||||
|
|
||||||
DT_ACT_REDUCE = 0.0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The main integration loop */
|
/* The main integration loop */
|
||||||
|
|
||||||
while (time_cur <= t_end) {
|
while (time_cur <= t_end) {
|
||||||
|
|
||||||
/* Define the minimal time and the active particles on all the nodes (exclude the ZERO masses!!!) */
|
/* Define the minimal time and the active particles on all the nodes */
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
double min_t = active_search.get_minimum_time(t, dt);
|
double min_t = active_search.get_minimum_time(t, dt);
|
||||||
|
|
||||||
#ifdef TIMING
|
/* Get indices of all particles that will be active in this bunch */
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
active_search.get_active_indices(min_t, t, dt, ind_act.data(), n_act);
|
||||||
DT_ACT_DEF1 += (CPU_tmp_user - CPU_tmp_user0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
active_search.get_active_indices(min_t, t, dt, ind_act, n_act);
|
|
||||||
|
|
||||||
|
/* Find the BH(s) indices in the active list */
|
||||||
int i_bh1=0, i_bh2=1;
|
int i_bh1=0, i_bh2=1;
|
||||||
#ifdef ETICS
|
#ifdef ETICS
|
||||||
int n_bh = config->live_smbh_count;
|
int n_bh = config->live_smbh_count;
|
||||||
|
|
@ -707,18 +573,7 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
||||||
DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* predict the active particles positions etc... on all the nodes */
|
/* predict the active particles positions etc... on all the nodes */
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (int i=0; i<n_act; i++) {
|
for (int i=0; i<n_act; i++) {
|
||||||
int j_act = ind_act[i];
|
int j_act = ind_act[i];
|
||||||
double dt = min_t - t[j_act];
|
double dt = min_t - t[j_act];
|
||||||
|
|
@ -728,36 +583,8 @@ int main(int argc, char *argv[])
|
||||||
v_act_new[i] = v[j_act] + a[j_act]*dt + adot[j_act]*dt2half;
|
v_act_new[i] = v[j_act] + a[j_act]*dt + adot[j_act]*dt2half;
|
||||||
} /* i */
|
} /* i */
|
||||||
|
|
||||||
#ifdef TIMING
|
/* Calculate gravity on active particles */
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
calc_self_grav(min_t, n_act, ind_act, x_act_new, v_act_new, pot_act_new, a_act_new, adot_act_new);
|
||||||
DT_ACT_PRED += (CPU_tmp_user - CPU_tmp_user0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
||||||
#endif
|
|
||||||
calc_self_grav(min_t, n_act, ind_act, x_act_new, v_act_new,
|
|
||||||
pot_act_tmp, a_act_tmp, adot_act_tmp);
|
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
||||||
DT_ACT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Reduce the "global" vectors from "local" on all the nodes */
|
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
MPI_Allreduce(pot_act_tmp, pot_act_new, n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
||||||
MPI_Allreduce(a_act_tmp, a_act_new, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
||||||
MPI_Allreduce(adot_act_tmp, adot_act_new, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
||||||
DT_ACT_REDUCE += (CPU_tmp_user - CPU_tmp_user0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (config->live_smbh_count == 2) {
|
if (config->live_smbh_count == 2) {
|
||||||
black_hole_physics.set_xv(x_act_new[i_bh1], x_act_new[i_bh2], v_act_new[i_bh1], v_act_new[i_bh2]);
|
black_hole_physics.set_xv(x_act_new[i_bh1], x_act_new[i_bh2], v_act_new[i_bh1], v_act_new[i_bh2]);
|
||||||
|
|
@ -765,19 +592,12 @@ int main(int argc, char *argv[])
|
||||||
if (config->binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt[i_bh1], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]);
|
if (config->binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt[i_bh1], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Calculate gravity on active particles due to external forces */
|
||||||
calc_ext_grav(external_gravity_components, n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new);
|
calc_ext_grav(external_gravity_components, n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new);
|
||||||
|
|
||||||
/* correct the active particles positions etc... on all the nodes */
|
/* correct the active particles positions etc... on all the nodes */
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
double min_dt = dt_max;
|
double min_dt = dt_max;
|
||||||
for (int i=0; i<n_act; i++) {
|
for (int i=0; i<n_act; i++) {
|
||||||
// NOTICE looks like we're doing three unrelated things in this loop: (1) correcting positions and velocities (2) calculating new steps, and (3) putting the corrected values from the _act_new back in the _act arrays.
|
|
||||||
// After going back to the _act arrays they don't do much before they go back to the main arrays, so this copy seems redundant (the SMBH influence sphere printout needs these values but it should be a function anyway).
|
|
||||||
// TODO split this loop into the three tasks it is doing, and remove the redundancy.
|
|
||||||
int j_act = ind_act[i];
|
int j_act = ind_act[i];
|
||||||
double dt_tmp = min_t - t[j_act];
|
double dt_tmp = min_t - t[j_act];
|
||||||
|
|
||||||
|
|
@ -823,11 +643,9 @@ int main(int argc, char *argv[])
|
||||||
} /* i */
|
} /* i */
|
||||||
|
|
||||||
/* define the min. dt over all the act. part. and set it also for the BH... */
|
/* define the min. dt over all the act. part. and set it also for the BH... */
|
||||||
|
|
||||||
if (config->live_smbh_count > 0) {
|
if (config->live_smbh_count > 0) {
|
||||||
if (config->live_smbh_count>=1) dt[0] = min_dt;
|
if (config->live_smbh_count>=1) dt[0] = min_dt;
|
||||||
if (config->live_smbh_count==2) dt[1] = min_dt;
|
if (config->live_smbh_count==2) dt[1] = min_dt;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config->binary_smbh_influence_sphere_output && (myRank == rootRank)) {
|
if (config->binary_smbh_influence_sphere_output && (myRank == rootRank)) {
|
||||||
|
|
@ -835,41 +653,22 @@ int main(int argc, char *argv[])
|
||||||
binary_smbh_influence_sphere_output(ind_act, n_act, timesteps, time_cur);
|
binary_smbh_influence_sphere_output(ind_act, n_act, timesteps, time_cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
||||||
DT_ACT_CORR += (CPU_tmp_user - CPU_tmp_user0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* load the new values for active particles to the local GRAPE's */
|
/* load the new values for active particles to the local GRAPE's */
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (int i=0; i<n_act; i++) {
|
for (int i=0; i<n_act; i++) {
|
||||||
#ifdef ETICS
|
#ifdef ETICS
|
||||||
if (ind_act[i] == grapite_cep_index) grapite_update_cep(t[grapite_cep_index], x[grapite_cep_index], v[grapite_cep_index], a[grapite_cep_index], adot[grapite_cep_index]); // All ranks should do it.
|
if (ind_act[i] == grapite_cep_index) grapite_update_cep(t[grapite_cep_index], x[grapite_cep_index], v[grapite_cep_index], a[grapite_cep_index], adot[grapite_cep_index]); // All ranks should do it.
|
||||||
#endif
|
#endif
|
||||||
int cur_rank = ind_act[i]/n_loc;
|
int cur_rank = ind_act[i]/n_loc;
|
||||||
|
|
||||||
if (myRank == cur_rank) {
|
if (myRank == cur_rank) {
|
||||||
int j_act = ind_act[i];
|
int j_act = ind_act[i];
|
||||||
int address = ind_act[i] - myRank*n_loc;
|
int address = ind_act[i] - myRank*n_loc;
|
||||||
g6_set_j_particle(clusterid, address, ind_act[i], t[j_act], dt[j_act], m[j_act], zeros, adot[j_act]*(1./6.), a[j_act]*0.5, v[j_act], x[j_act]);
|
g6_set_j_particle(clusterid, address, ind_act[i], t[j_act], dt[j_act], m[j_act], zeros, adot[j_act]*(1./6.), a[j_act]*0.5, v[j_act], x[j_act]);
|
||||||
|
|
||||||
} /* if (myRank == cur_rank) */
|
} /* if (myRank == cur_rank) */
|
||||||
|
|
||||||
} /* i */
|
} /* i */
|
||||||
|
|
||||||
#ifdef TIMING
|
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
|
||||||
DT_ACT_LOAD += (CPU_tmp_user - CPU_tmp_user0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Current time set to min_t */
|
/* Current time set to min_t */
|
||||||
|
|
||||||
time_cur = min_t;
|
time_cur = min_t;
|
||||||
|
|
||||||
timesteps += 1.0;
|
timesteps += 1.0;
|
||||||
n_act_sum += n_act;
|
n_act_sum += n_act;
|
||||||
|
|
||||||
|
|
@ -888,43 +687,21 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
if (time_cur >= t_contr) {
|
if (time_cur >= t_contr) {
|
||||||
if (myRank == rootRank) {
|
if (myRank == rootRank) {
|
||||||
|
|
||||||
energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext);
|
energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext);
|
||||||
|
|
||||||
/* write cont data */
|
/* write cont data */
|
||||||
if (config->output_hdf5) h5_write("data.con", diskstep, N, time_cur, m, x, v, pot, a, adot, 0, true);
|
if (config->output_hdf5) h5_write("data.con", diskstep, N, time_cur, m, x, v, pot, a, adot, 0, true);
|
||||||
else ascii_write("data.con", diskstep, N, time_cur, m, x, v, 16);
|
else ascii_write("data.con", diskstep, N, time_cur, m, x, v, 16);
|
||||||
|
|
||||||
/* possible OUT for timing !!! */
|
|
||||||
#ifdef TIMING
|
|
||||||
FILE *out = fopen("timing.dat", "a");
|
|
||||||
|
|
||||||
DT_TOT = DT_ACT_DEF1 + DT_ACT_DEF2 + DT_ACT_DEF3 + DT_ACT_PRED +
|
|
||||||
DT_ACT_GRAV + DT_EXT_GRAV + DT_GMC_GRAV +
|
|
||||||
DT_GMC_GMC_GRAV + DT_EXT_GMC_GRAV +
|
|
||||||
DT_ACT_CORR + DT_ACT_LOAD +
|
|
||||||
DT_STEVOL + DT_STARDISK + DT_STARDESTR +
|
|
||||||
DT_ACT_REDUCE;
|
|
||||||
|
|
||||||
fprintf(out,"%.8E \t %.6E \t %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f \t %.3f \t %.8E %.8E %.8E \t %.8E %.8E %.8E \n",
|
|
||||||
time_cur, DT_TOT,
|
|
||||||
100.0*DT_ACT_DEF1/DT_TOT, 100.0*DT_ACT_DEF2/DT_TOT, 100.0*DT_ACT_DEF3/DT_TOT, 100.0*DT_ACT_PRED/DT_TOT,
|
|
||||||
100.0*DT_ACT_GRAV/DT_TOT, 100.0*DT_EXT_GRAV/DT_TOT, 100.0*DT_GMC_GRAV/DT_TOT,
|
|
||||||
100.0*DT_GMC_GMC_GRAV/DT_TOT, 100.0*DT_EXT_GMC_GRAV/DT_TOT,
|
|
||||||
100.0*DT_ACT_CORR/DT_TOT, 100.0*DT_ACT_LOAD/DT_TOT,
|
|
||||||
100.0*DT_STEVOL/DT_TOT, 100.0*DT_STARDISK/DT_TOT, 100.0*DT_STARDESTR/DT_TOT,
|
|
||||||
100.0*DT_ACT_REDUCE/DT_TOT,
|
|
||||||
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0,
|
|
||||||
timesteps, n_act_sum, 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09);
|
|
||||||
|
|
||||||
fclose(out);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} /* if (myRank == rootRank) */
|
} /* if (myRank == rootRank) */
|
||||||
|
|
||||||
#ifdef ETICS
|
#ifdef ETICS
|
||||||
// We are /inside/ a control step, so all particles must be synchronized; we can safely calculate their density centre. The acceleration and jerk currently in the memory are for the predicted position of the CEP, by calling grapite_calc_center we "correct" the position and velocity, but not the gravity at that point.
|
// We are /inside/ a control step, so all particles must be
|
||||||
|
// synchronized; we can safely calculate their density centre. The
|
||||||
|
// acceleration and jerk currently in the memory are for the
|
||||||
|
// predicted position of the CEP, by calling grapite_calc_center we
|
||||||
|
// "correct" the position and velocity, but not the gravity at that
|
||||||
|
// point.
|
||||||
if (grapite_cep_index >= 0) {
|
if (grapite_cep_index >= 0) {
|
||||||
|
double3 xcm, vcm, xdc, vdc;
|
||||||
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
|
||||||
x[grapite_cep_index] = xdc;
|
x[grapite_cep_index] = xdc;
|
||||||
v[grapite_cep_index] = vdc;
|
v[grapite_cep_index] = vdc;
|
||||||
|
|
@ -955,24 +732,22 @@ int main(int argc, char *argv[])
|
||||||
} /* while (time_cur < t_end) */
|
} /* while (time_cur < t_end) */
|
||||||
|
|
||||||
/* close the local GRAPEs */
|
/* close the local GRAPEs */
|
||||||
|
timer.stop();
|
||||||
g6_close(clusterid);
|
g6_close(clusterid);
|
||||||
|
|
||||||
double g6_calls_sum;
|
double g6_calls_sum;
|
||||||
MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
|
MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
|
||||||
|
|
||||||
if (myRank == rootRank) {
|
if (myRank == rootRank) {
|
||||||
|
|
||||||
/* Write some output for the timestep annalize... */
|
/* Write some output for the timestep annalize... */
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum);
|
printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09);
|
printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(timer.time)/1.0E+09);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
} /* if (myRank == rootRank) */
|
} /* if (myRank == rootRank) */
|
||||||
|
|
||||||
delete[] m; delete[] x; delete[] v; delete[] ind; delete[] a; delete[] adot; delete[] pot; delete[] pot_ext; delete[] t; delete[] dt; delete[] ind_act; delete[] pot_act_new; delete[] pot_act_tmp; delete[] x_act_new; delete[] v_act_new; delete[] a_act_tmp; delete[] adot_act_tmp; delete[] a_act_new; delete[] adot_act_new; delete[] pot_act_ext;
|
delete config;
|
||||||
|
delete[] m; delete[] x; delete[] v; delete[] a; delete[] adot; delete[] pot_ext; delete[] t; delete[] dt; delete[] x_act_new; delete[] v_act_new; delete[] a_act_new; delete[] adot_act_new; delete[] pot_act_ext;
|
||||||
|
|
||||||
/* Finalize the MPI work */
|
/* Finalize the MPI work */
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue