diff --git a/black_holes.cpp b/black_holes.cpp index 0db9a13..4f5ffce 100644 --- a/black_holes.cpp +++ b/black_holes.cpp @@ -99,7 +99,7 @@ void Black_hole_physics::adjust_post_newtonian( jrk2 += jrk2_corr; } -void Black_hole_physics::write_bh_data(double time_cur, double m[], double3 x[], double3 v[], double pot[], double3 a[], double3 adot[], double dt[]) +void Black_hole_physics::write_bh_data(double time_cur, double m[], double3 x[], double3 v[], const std::vector& pot, double3 a[], double3 adot[], double dt[]) { // This function logs data on the black hole(s). It uses both external data // (the arguments to this function) and optionall internal data to this @@ -195,7 +195,7 @@ void Write_bh_nb_data::operator()(double time_cur) fflush(out); } -void Binary_smbh_influence_sphere_output::operator()(int ind_act[], int n_act, double timesteps, double time_cur) +void Binary_smbh_influence_sphere_output::operator()(const std::vector& ind_act, int n_act, double timesteps, double time_cur) { double m_bh1 = m[0]; double m_bh2 = m[1]; @@ -216,13 +216,13 @@ void Binary_smbh_influence_sphere_output::operator()(int ind_act[], int n_act, d for (int i=0; i& pot, double3 a[], double3 adot[], double dt[]); public: //TODO make private double m1, m2; int count; @@ -85,7 +85,7 @@ private: class Binary_smbh_influence_sphere_output { public: - Binary_smbh_influence_sphere_output(double factor, int N, double *m, double3 *x, double3 *v, double *pot, double *dt) + Binary_smbh_influence_sphere_output(double factor, int N, double *m, double3 *x, double3 *v, const std::vector& pot, double *dt) : factor(factor), m(m), x(x), v(v), pot(pot), dt(dt) { inf_event.assign(N, 0); @@ -96,10 +96,11 @@ public: { fclose(out); } - void operator()(int ind_act[], int n_act, double timesteps, double time_cur); + void operator()(const std::vector& ind_act, int n_act, double timesteps, double time_cur); private: double factor; - double *m, *pot, *dt; + const std::vector& pot; + double *m, /**pot,*/ *dt; double3 *x, *v; std::vector inf_event; FILE *out; diff --git a/io.cpp b/io.cpp index 2ca64c8..3a1b086 100644 --- a/io.cpp +++ b/io.cpp @@ -146,7 +146,7 @@ void h5_read(const std::string file_name, int *step_num, int *N, double *t, doub #endif } -void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const double *pot, const double3 *acc, const double3 *jrk, const int extra_mode=0, const bool use_double_precision=true) +void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const std::vector& pot, const double3 *acc, const double3 *jrk, const int extra_mode=0, const bool use_double_precision=true) { #ifdef HAS_HDF5 hid_t file_id, group_id, attribute_id, dataspace_id; @@ -181,7 +181,7 @@ void h5_write(const std::string file_name, const int step_num, const int N, cons bool write_pot = (extra_mode ) & 1; bool write_acc = (extra_mode >> 1) & 1; bool write_jrk = (extra_mode >> 2) & 1; - if (write_pot) write_dataset("POT", 1, (double*)pot); + if (write_pot) write_dataset("POT", 1, (double*)pot.data()); if (write_acc) write_dataset("ACC", 2, (double*)acc); if (write_jrk) write_dataset("JRK", 2, (double*)jrk); diff --git a/io.h b/io.h index 770a0f5..8e8d2ad 100644 --- a/io.h +++ b/io.h @@ -12,5 +12,5 @@ void ascii_write(const std::string file_name, const int step_num, const int N, c void h5_read(const std::string file_name, int *step_num, int *N, double *t, double m[], double3 x[], double3 v[]); // In case the code is compiled without HDF5 support, the implementation of this function just throws an error -void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const double *pot, const double3 *acc, const double3 *jrk, const int write_mode=0, const bool use_double_precision=true); +void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const std::vector& pot, const double3 *acc, const double3 *jrk, const int write_mode=0, const bool use_double_precision=true); // In case the code is compiled without HDF5 support, the implementation of this function just throws an error diff --git a/phigrape.cpp b/phigrape.cpp index 548468e..f8cc761 100644 --- a/phigrape.cpp +++ b/phigrape.cpp @@ -1,60 +1,3 @@ -/***************************************************************************** -File Name : "phi-GRAPE/GPU.c" // BH (1 || 2) + ACC + EJECT - : -Contents : N-body code with integration by individual block time step - : together with the parallel using of GRAPE6a board's. - : - : Added the GPU support via SAPPORO library. - : - : Normalization to the physical units!!! - : - : External Potential added - : Plummer-Kuzmin: Bulge, Disk, Halo - : Kharchenko+Andreas... - : - : SC extra POT for Bek SC test runs... - : - : Rebuced to the Single BH -> Plummer - : Andreas+Fazeel... - : - : Stellar evolution added - : Stellar lifetimes: Raiteri, Villata & Navarro (1996) - : IMS mass loss: van den Hoeg & Groenewegen (1997) - : - : STARDESTR_EXT: Tidal disruption of stars by external BH... - : Chingis, Denis & Maxim... - : - : STARDESTR: Tidal disruption of stars by BH... - : Jose, Li Shuo & Shiyan Zhong - : - : STARDISK: Drag force... - : Chingis, Denis & Maxim... - : - : STARDISK: variable hz = HZ*(R/R_crit) up to R_crit... - : Taras, Andreas... - : - : Live BH (1 || 2) + ACC + EJECT... - : Li Shuo & Shiyan Zhong - : - : dt_min for BH (1 || 2)... - : - : added the PN calculus for the BBH - : PN0, PN1, PN2, PN2.5 (coded on the base of - : Gabor Kupi original routine) - : - : added the "name" array... - : - : added the GMC's calculus (GMC on CPU; GMC2 on GPU) - : for Alexey SC runs... and also for Fazeel Zurich runs... - : - : CPU_TIMELIMIT added for the Julich MW cluster runs... - : -Coded by : Peter Berczik -Version number : 19.04 -Last redaction : 2019.04.16 12:55 -*****************************************************************************/ -#define TIMING - #define ETA_S_CORR 4.0 #define ETA_BH_CORR 4.0 @@ -62,6 +5,7 @@ Last redaction : 2019.04.16 12:55 #define DTMINPOWER -36.0 #include +#include #include #include #include @@ -85,47 +29,24 @@ Last redaction : 2019.04.16 12:55 #endif Config *config; +//chrono::steady_clock::time_point walltime_start; -// These are used in the energy control, could be static but will probably be removed in the end anyway -double CPU_time_real0, CPU_time_user0, CPU_time_syst0; -double CPU_time_real, CPU_time_user, CPU_time_syst; - -#ifdef TIMING -// TODO clean up here -double CPU_tmp_real0, CPU_tmp_user0, CPU_tmp_syst0; -double CPU_tmp_real, CPU_tmp_user, CPU_tmp_syst; - -double DT_TOT, - DT_ACT_DEF1, DT_ACT_DEF2, DT_ACT_DEF3, DT_ACT_PRED, - DT_ACT_GRAV, DT_EXT_GRAV, - DT_GMC_GRAV, DT_GMC_GMC_GRAV, DT_EXT_GMC_GRAV, - DT_ACT_CORR, DT_ACT_LOAD, - DT_STEVOL, DT_STARDISK, DT_STARDESTR; -double DT_ACT_REDUCE; -#endif - -void get_CPU_time(double *time_real, double *time_user, double *time_syst) -{ - struct rusage xxx; - double sec_u, microsec_u, sec_s, microsec_s; - struct timeval tv; - - getrusage(RUSAGE_SELF,&xxx); - - sec_u = xxx.ru_utime.tv_sec; - sec_s = xxx.ru_stime.tv_sec; - - microsec_u = xxx.ru_utime.tv_usec; - microsec_s = xxx.ru_stime.tv_usec; - - *time_user = sec_u + microsec_u * 1.0E-06; - *time_syst = sec_s + microsec_s * 1.0E-06; - - gettimeofday(&tv, NULL); - *time_real = tv.tv_sec + 1.0E-06 * tv.tv_usec; - - *time_user = *time_real; +namespace std::chrono { +struct Timer { + void start() + { + t_start = steady_clock::now(); + } + void stop() + { + t_stop = steady_clock::now(); + time = duration_cast(t_stop - t_start).count()*1E-9; + } + double time; // seconds + steady_clock::time_point t_start, t_stop; +}; } +std::chrono::Timer timer; class Calc_self_grav { public: @@ -133,49 +54,49 @@ public: : g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps) { h2.assign(N, eps2); + pot_loc.resize(N); + acc_loc.resize(N); + jrk_loc.resize(N); } - void operator()(const double t, const int n_act, int ind_act[], const double3 x_act[], const double3 v_act[], - double pot[], double3 acc[], double3 jrk[]) + void operator()(const double t, const int n_act, std::vector& ind_act, const double3 x_act[], const double3 v_act[], + std::vector& pot, double3 acc[], double3 jrk[]) { g6_set_ti(clusterid, t); for (int i=0; i h2; + std::vector pot_loc; // the _loc variables are for this node only. + std::vector acc_loc, jrk_loc; }; void calc_ext_grav(std::vector &external_gravity_components, int n, double3 *x, double3 *v, double *pot, double3 *acc, double3* jrk) // TODO should just be a class that has this pointer array as a member { -#ifdef TIMING - get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); -#endif - std::fill(pot, pot+n, 0.); for (auto component : external_gravity_components) { if (component->is_active) component->apply(n, x, v, pot, acc, jrk); } - -#ifdef TIMING - get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); - DT_EXT_GRAV += (CPU_tmp_user - CPU_tmp_user0); -#endif } -void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, int N, double m[], double3 x[], double3 v[], double pot[], double pot_ext[]) +void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, int N, double m[], double3 x[], double3 v[], const std::vector& pot, double pot_ext[]) { - double E_pot = 0; + double E_pot = 0; for (int i=0; iinput_file_name)) { @@ -377,18 +298,19 @@ int main(int argc, char *argv[]) else ascii_read(config->input_file_name, diskstep, N, time_cur, &m, &x, &v); - int *ind = new int[N]; - std::iota(ind, ind+N, 0); + std::vector ind(N); + std::iota(begin(ind), end(ind), 0); double3 *a = new double3[N], *adot = new double3[N]; - double *pot = new double[N], *pot_ext = new double[N], *t = new double[N], *dt = new double[N]; + std::vector pot(N); + double *pot_ext = new double[N], *t = new double[N], *dt = new double[N]; /* data for active particles */ - // x_act_new and v_act_new arrays hold the predicted position and velocity of i-particles, which is later corrected before moving into the j-particle memory. The [pot,a,adot]_act_tmp arrays hold the calculation results from each node. The [pot,a,adot]_act_new arrays hold the reduced calculation results from all nodes. - int n_act, *ind_act = new int[N]; - double *pot_act_new = new double[N], *pot_act_tmp = new double[N], *pot_act_ext = new double[N]; - double3 *x_act_new = new double3[N], *v_act_new = new double3[N], - *a_act_tmp = new double3[N], *adot_act_tmp = new double3[N], - *a_act_new = new double3[N], *adot_act_new = new double3[N]; + int n_act; + std::vector ind_act(N); + std::vector pot_act_new(N); + double *pot_act_ext = new double[N]; + double3 *x_act_new = new double3[N], *v_act_new = new double3[N], + *a_act_new = new double3[N], *adot_act_new = new double3[N]; double eps = config->eps; double eta = config->eta; @@ -411,10 +333,6 @@ int main(int argc, char *argv[]) if ((diskstep == 0) && (time_cur == 0)) { FILE *out = fopen("contr.dat", "w"); fclose(out); -#ifdef TIMING - out = fopen("timing.dat", "w"); - fclose(out); -#endif if (config->live_smbh_output && (config->live_smbh_count > 0)) { out = fopen("bh.dat", "w"); fclose(out); @@ -424,9 +342,6 @@ int main(int argc, char *argv[]) fclose(out); } } - - get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0); - } /* if (myRank == rootRank) */ double normalization_mass=1, normalization_length=1, normalization_velocity=1; @@ -531,7 +446,6 @@ int main(int argc, char *argv[]) #endif /* load the nj particles to the G6 */ - for (int k=0; k= 0) { + double3 xcm, vcm, xdc, vdc; grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; @@ -553,13 +468,7 @@ int main(int argc, char *argv[]) #endif /* define the all particles as a active on all the processors for the first time grav calc. */ - calc_self_grav(time_cur, N, ind, x, v, pot_act_tmp, a_act_tmp, adot_act_tmp); - - /* Reduce the "global" vectors from "local" on all processors) */ - // TODO why won't we do the MPI_Allreduce inside the calc_self_grav function, and get rid of these _tmp arrays? - MPI_Allreduce(pot_act_tmp, pot, N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(a_act_tmp, a, 3*N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(adot_act_tmp, adot, 3*N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + calc_self_grav(time_cur, N, ind, x, v, pot, a, adot); if (config->live_smbh_count == 2) { black_hole_physics.set_xv(x[0], x[1], v[0], v[1]); @@ -580,6 +489,7 @@ int main(int argc, char *argv[]) } if (grapite_cep_index >= 0) { + double3 xcm, vcm, xdc, vdc; grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; @@ -588,7 +498,6 @@ int main(int argc, char *argv[]) #endif /* Define initial timestep for all particles on all nodes */ - for (int j=0; jlive_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt); @@ -636,60 +543,19 @@ int main(int argc, char *argv[]) } /* if (myRank == rootRank) */ - /* Get the Starting time on rootRank */ - - if (myRank == rootRank) { - get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0); - get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst); - } /* if (myRank == rootRank) */ - timesteps = 0.0; // Why won't those two be long long instead of double + should include the zeroth step n_act_sum = 0.0; -#ifdef TIMING - DT_TOT = 0.0; - - DT_ACT_DEF1 = 0.0; - DT_ACT_DEF2 = 0.0; - DT_ACT_DEF3 = 0.0; - DT_ACT_PRED = 0.0; - DT_ACT_GRAV = 0.0; - DT_EXT_GRAV = 0.0; - DT_EXT_GMC_GRAV = 0.0; - DT_GMC_GMC_GRAV = 0.0; - DT_ACT_CORR = 0.0; - DT_ACT_LOAD = 0.0; - - DT_STEVOL = 0.0; - DT_STARDISK = 0.0; - DT_STARDESTR = 0.0; - - DT_ACT_REDUCE = 0.0; -#endif - /* The main integration loop */ - while (time_cur <= t_end) { - /* Define the minimal time and the active particles on all the nodes (exclude the ZERO masses!!!) */ - -#ifdef TIMING - get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); -#endif - + /* Define the minimal time and the active particles on all the nodes */ double min_t = active_search.get_minimum_time(t, dt); -#ifdef TIMING - get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); - DT_ACT_DEF1 += (CPU_tmp_user - CPU_tmp_user0); -#endif - -#ifdef TIMING - get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); -#endif - - active_search.get_active_indices(min_t, t, dt, ind_act, n_act); + /* Get indices of all particles that will be active in this bunch */ + active_search.get_active_indices(min_t, t, dt, ind_act.data(), n_act); + /* Find the BH(s) indices in the active list */ int i_bh1=0, i_bh2=1; #ifdef ETICS int n_bh = config->live_smbh_count; @@ -707,18 +573,7 @@ int main(int argc, char *argv[]) } #endif - -#ifdef TIMING - get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); - DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0); -#endif - /* predict the active particles positions etc... on all the nodes */ - -#ifdef TIMING - get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); -#endif - for (int i=0; ilive_smbh_count == 2) { black_hole_physics.set_xv(x_act_new[i_bh1], x_act_new[i_bh2], v_act_new[i_bh1], v_act_new[i_bh2]); @@ -765,19 +592,12 @@ int main(int argc, char *argv[]) if (config->binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt[i_bh1], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]); } + /* Calculate gravity on active particles due to external forces */ calc_ext_grav(external_gravity_components, n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new); /* correct the active particles positions etc... on all the nodes */ - -#ifdef TIMING - get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); -#endif - double min_dt = dt_max; for (int i=0; ilive_smbh_count > 0) { if (config->live_smbh_count>=1) dt[0] = min_dt; if (config->live_smbh_count==2) dt[1] = min_dt; - } if (config->binary_smbh_influence_sphere_output && (myRank == rootRank)) { @@ -835,41 +653,22 @@ int main(int argc, char *argv[]) binary_smbh_influence_sphere_output(ind_act, n_act, timesteps, time_cur); } -#ifdef TIMING - get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); - DT_ACT_CORR += (CPU_tmp_user - CPU_tmp_user0); -#endif - /* load the new values for active particles to the local GRAPE's */ - -#ifdef TIMING - get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); -#endif - for (int i=0; i= t_contr) { if (myRank == rootRank) { - energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext); - /* write cont data */ if (config->output_hdf5) h5_write("data.con", diskstep, N, time_cur, m, x, v, pot, a, adot, 0, true); else ascii_write("data.con", diskstep, N, time_cur, m, x, v, 16); - - /* possible OUT for timing !!! */ -#ifdef TIMING - FILE *out = fopen("timing.dat", "a"); - - DT_TOT = DT_ACT_DEF1 + DT_ACT_DEF2 + DT_ACT_DEF3 + DT_ACT_PRED + - DT_ACT_GRAV + DT_EXT_GRAV + DT_GMC_GRAV + - DT_GMC_GMC_GRAV + DT_EXT_GMC_GRAV + - DT_ACT_CORR + DT_ACT_LOAD + - DT_STEVOL + DT_STARDISK + DT_STARDESTR + - DT_ACT_REDUCE; - - fprintf(out,"%.8E \t %.6E \t %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f \t %.3f \t %.8E %.8E %.8E \t %.8E %.8E %.8E \n", - time_cur, DT_TOT, - 100.0*DT_ACT_DEF1/DT_TOT, 100.0*DT_ACT_DEF2/DT_TOT, 100.0*DT_ACT_DEF3/DT_TOT, 100.0*DT_ACT_PRED/DT_TOT, - 100.0*DT_ACT_GRAV/DT_TOT, 100.0*DT_EXT_GRAV/DT_TOT, 100.0*DT_GMC_GRAV/DT_TOT, - 100.0*DT_GMC_GMC_GRAV/DT_TOT, 100.0*DT_EXT_GMC_GRAV/DT_TOT, - 100.0*DT_ACT_CORR/DT_TOT, 100.0*DT_ACT_LOAD/DT_TOT, - 100.0*DT_STEVOL/DT_TOT, 100.0*DT_STARDISK/DT_TOT, 100.0*DT_STARDESTR/DT_TOT, - 100.0*DT_ACT_REDUCE/DT_TOT, - CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0, - timesteps, n_act_sum, 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09); - - fclose(out); -#endif - } /* if (myRank == rootRank) */ #ifdef ETICS - // We are /inside/ a control step, so all particles must be synchronized; we can safely calculate their density centre. The acceleration and jerk currently in the memory are for the predicted position of the CEP, by calling grapite_calc_center we "correct" the position and velocity, but not the gravity at that point. + // We are /inside/ a control step, so all particles must be + // synchronized; we can safely calculate their density centre. The + // acceleration and jerk currently in the memory are for the + // predicted position of the CEP, by calling grapite_calc_center we + // "correct" the position and velocity, but not the gravity at that + // point. if (grapite_cep_index >= 0) { + double3 xcm, vcm, xdc, vdc; grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; @@ -955,24 +732,22 @@ int main(int argc, char *argv[]) } /* while (time_cur < t_end) */ /* close the local GRAPEs */ + timer.stop(); g6_close(clusterid); double g6_calls_sum; MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD); - if (myRank == rootRank) { - /* Write some output for the timestep annalize... */ - printf("\n"); printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum); printf("\n"); - printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09); + printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(timer.time)/1.0E+09); fflush(stdout); - } /* if (myRank == rootRank) */ - delete[] m; delete[] x; delete[] v; delete[] ind; delete[] a; delete[] adot; delete[] pot; delete[] pot_ext; delete[] t; delete[] dt; delete[] ind_act; delete[] pot_act_new; delete[] pot_act_tmp; delete[] x_act_new; delete[] v_act_new; delete[] a_act_tmp; delete[] adot_act_tmp; delete[] a_act_new; delete[] adot_act_new; delete[] pot_act_ext; + delete config; + delete[] m; delete[] x; delete[] v; delete[] a; delete[] adot; delete[] pot_ext; delete[] t; delete[] dt; delete[] x_act_new; delete[] v_act_new; delete[] a_act_new; delete[] adot_act_new; delete[] pot_act_ext; /* Finalize the MPI work */ MPI_Finalize();