#include #include #include #include #include "black_holes.h" #include "config.h" #include "double3.h" #include "external.h" #include "grape6.h" #include "io.h" #ifdef ETICS #include "grapite.h" #endif namespace std::chrono { struct Timer { void start() { t_start = steady_clock::now(); } void stop() { t_stop = steady_clock::now(); time = duration_cast(t_stop - t_start).count()*1E-9; } double time; // seconds steady_clock::time_point t_start, t_stop; }; } std::chrono::Timer timer; class Calc_self_grav { public: Calc_self_grav(const int N, const int n_loc, const int clusterid, const int npipe, const double eps) : g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps) { h2.assign(N, eps2); pot_loc.resize(N); acc_loc.resize(N); jrk_loc.resize(N); } void operator()(const double t, const int n_act, std::vector &ind_act, std::vector &x_act, std::vector &v_act, std::vector& pot, std::vector &acc, std::vector &jrk) { g6_set_ti(clusterid, t); for (int i=0; i h2; std::vector pot_loc; // the _loc variables are for this node only. std::vector acc_loc, jrk_loc; }; class Calc_ext_grav { public: void add_component(External_gravity &component) { components.push_back(&component); } bool any_active() { for (auto component : components) if (component->is_active) return true; return false; } void operator()(int n, const std::vector &x, const std::vector &v, std::vector &pot, std::vector &acc, std::vector &jrk) { for (auto component : components) { if (component->is_active) component->apply(n, x, v, pot, acc, jrk); } } private: std::vector components; }; void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, int N, const std::vector &m, const std::vector &x, const std::vector &v, const std::vector &pot, const std::vector &pot_ext) { double E_pot = 0; for (int i=0; i &t, std::vector &dt) { double min_t_loc, min_t; #ifdef ETICS if (grapite_active_search_flag) { min_t_loc = grapite_get_minimum_time(); } else #endif { min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc]; for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) { double tmp = t[j] + dt[j]; if (tmp < min_t_loc) min_t_loc = tmp; } } /* Reduce the "global" min_t from min_t_loc "local" on all processors) */ MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); return min_t; } void get_active_indices(const double min_t, const std::vector &t, const std::vector &dt, std::vector &ind_act, int &n_act) { #ifdef ETICS if (grapite_active_search_flag) { int n_act_loc; grapite_active_search(min_t, ind_act_loc.data(), &n_act_loc); if (myRank > 0) for (int i=0; i ind_act_loc; bool grapite_active_search_flag; }; inline void calc_high_derivatives(const double dt_tmp, const double3 &a_old, const double3 &a_new, const double3 &a1_old, const double3 &a1_new, double3 &a2, double3 &a3) { double dtinv = 1/dt_tmp; double dt2inv = dtinv*dtinv; double dt3inv = dt2inv*dtinv; double3 a0mia1 = a_old-a_new; double3 ad04plad12 = 4*a1_old + 2*a1_new; double3 ad0plad1 = a1_old + a1_new; a2 = -6*a0mia1*dt2inv - ad04plad12*dtinv; a3 = 12*a0mia1*dt3inv + 6*ad0plad1*dt2inv; } inline void corrector(const double dt_tmp, const double3 &a2, const double3 &a3, double3 &x, double3 &v) { double dt3over6 = dt_tmp*dt_tmp*dt_tmp/6.0; double dt4over24 = dt3over6*dt_tmp/4.0; double dt5over120 = dt4over24*dt_tmp/5.0; x += dt4over24*a2 + dt5over120*a3; v += dt3over6*a2 + dt4over24*a3; } inline double aarseth_step(const double eta, const double dt, const double3 &a, const double3 &a1, const double3 &a2, const double3 &a3) { double a1abs = a.norm(); double adot1abs = a1.norm(); double3 a2dot1 = a2 + dt*a3; double a2dot1abs = a2dot1.norm(); double a3dot1abs = a3.norm(); return sqrt(eta*(a1abs*a2dot1abs+adot1abs*adot1abs)/(adot1abs*a3dot1abs+a2dot1abs*a2dot1abs)); } int main(int argc, char *argv[]) { /* read the input parameters */ const Config config("phigrape.conf"); timer.start(); double timesteps=0.0, n_act_sum=0.0; /* INIT the rand() !!! */ srand(19640916); /* it is just my birthday :-) */ /* Init MPI */ MPI_Init(&argc, &argv); /* Define the total number of processors and the Rank of each processors */ int n_proc, myRank; const int rootRank = 0; MPI_Comm_size(MPI_COMM_WORLD, &n_proc); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); /* Define the processors names */ int name_proc; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Get_processor_name(processor_name, &name_proc); /* Print the Rank and the names of processors */ printf("Rank of the processor %03d on %s \n", myRank, processor_name); Input_data input_data; if (is_hdf5(config.input_file_name)) { #ifndef HAS_HDF5 fprintf(stderr, "ERROR: input file is in HDF5 format, but the code was compiled without HDF5 support\n"); return -1; #endif input_data = h5_read(config.input_file_name); } else input_data = ascii_read(config.input_file_name); int N = input_data.N; int diskstep = input_data.step_num; double time_cur = input_data.t; auto &m = input_data.m; auto &x = input_data.x; auto &v = input_data.v; if (myRank == rootRank) { printf("\n"); printf("Begin the calculation of phi-GRAPE program on %03d processors\n", n_proc); printf("\n"); printf("N = %07d \t eps = %.6E \n", N, config.eps); printf("t_beg = %.6E \t t_end = %.6E \n", time_cur, config.t_end); printf("dt_disk = %.6E \t dt_contr = %.6E \n", config.dt_disk, config.dt_contr); printf("dt_bh = %.6E \n", config.dt_bh); printf("eta = %.6E \n", config.eta); printf("\n"); if ((diskstep == 0) && (time_cur == 0)) { FILE *out = fopen("contr.dat", "w"); fclose(out); if (config.live_smbh_output && (config.live_smbh_count > 0)) { out = fopen("bh.dat", "w"); fclose(out); } if ((config.live_smbh_neighbor_output) && (config.live_smbh_count > 0)) { out = fopen("bh_neighbors.dat", "w"); fclose(out); } } } /* if (myRank == rootRank) */ double normalization_mass=1, normalization_length=1, normalization_velocity=1; if (config.ext_units_physical) { normalization_mass = 1/config.unit_mass; normalization_length = 1000/config.unit_length; normalization_velocity = 1.52484071426404437233e+01*sqrt(config.unit_length/config.unit_mass); } Calc_ext_grav calc_ext_grav; Plummer ext_bulge(config.ext_m_bulge*normalization_mass, config.ext_b_bulge*normalization_length); calc_ext_grav.add_component(ext_bulge); Miyamoto_Nagai ext_disk(config.ext_m_disk*normalization_mass, config.ext_a_disk*normalization_length, config.ext_b_disk*normalization_length); calc_ext_grav.add_component(ext_disk); Plummer ext_halo_plummer(config.ext_m_halo_plummer*normalization_mass, config.ext_b_halo_plummer*normalization_length); calc_ext_grav.add_component(ext_halo_plummer); Logarithmic_halo ext_log_halo(config.ext_log_halo_v*normalization_velocity, config.ext_log_halo_r*normalization_length); calc_ext_grav.add_component(ext_log_halo); Dehnen ext_dehnen(config.ext_dehnen_m*normalization_mass, config.ext_dehnen_r*normalization_length, config.ext_dehnen_gamma); calc_ext_grav.add_component(ext_dehnen); bool has_external_gravity = calc_ext_grav.any_active(); if (has_external_gravity) printf("External Potential: \n\n"); if (ext_bulge.is_active) printf("m_bulge = %.4E b_bulge = %.4E\n", config.ext_m_bulge*normalization_mass, config.ext_b_bulge*normalization_length); if (ext_disk.is_active) printf("m_disk = %.4E a_disk = %.4E b_disk = %.4E\n", config.ext_m_disk*normalization_mass, config.ext_a_disk*normalization_length, config.ext_b_disk*normalization_length); if (ext_halo_plummer.is_active) printf("m_halo = %.4E b_halo = %.4E\n", config.ext_m_halo_plummer*normalization_mass, config.ext_b_halo_plummer*normalization_length); if (ext_log_halo.is_active) printf("v_halo = %.6E r_halo = %.6E \n", config.ext_log_halo_v*normalization_velocity, config.ext_log_halo_r*normalization_length); if (ext_dehnen.is_active) printf("m_ext = %.6E r_ext = %.6E \t g_ext = %.3E \n\n", config.ext_dehnen_m*normalization_mass, config.ext_dehnen_r*normalization_length, config.ext_dehnen_gamma); fflush(stdout); double t_disk = config.dt_disk*(1+floor(time_cur/config.dt_disk)); double t_contr = config.dt_contr*(1+floor(time_cur/config.dt_contr)); double t_bh = config.dt_bh*(1+floor(time_cur/config.dt_bh)); if (myRank == rootRank) { printf("t_disk = %.6E t_contr = %.6E t_bh = %.6E \n\n", t_disk, t_contr, t_bh); fflush(stdout); } /* if (myRank == rootRank) */ /* some local settings for G6a boards */ int clusterid, numGPU; if (config.devices_per_node==0) { MPI_Comm shmcomm; MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm); MPI_Comm_size(shmcomm, &numGPU); MPI_Comm_rank(shmcomm, &clusterid); } else { numGPU = config.devices_per_node; clusterid = myRank % numGPU; } printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid); fflush(stdout); /* init the local GRAPEs */ g6_open(clusterid); int npipe = g6_npipes(); g6_set_tunit(51); g6_set_xunit(51); bool grapite_active_search_flag = false; #ifdef ETICS grapite_set_dev_exec_threshold(config.grapite_dev_exec_threshold); grapite_active_search_flag = config.grapite_active_search; #endif int n_loc = N/n_proc; #ifdef ETICS grapite_read_particle_tags(N, config.grapite_mask_file_name.c_str(), myRank, n_loc); grapite_set_dt_exp(config.dt_scf); grapite_set_t_exp(time_cur); #endif const double dt_min = pow(2, config.dt_min_power); std::vector ind(N); std::iota(begin(ind), end(ind), 0); /* load the nj particles to the G6 */ double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface. for (int k=0; k= 0) { double3 xcm, vcm, xdc, vdc; grapite_calc_center(N, m.data(), (double(*)[3])x.data(), (double(*)[3])v.data(), xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; grapite_update_cep(time_cur, xdc, vdc, zeros, zeros); } #endif std::vector a(N), adot(N); std::vector pot(N); /* define the all particles as a active on all the processors for the first time grav calc. */ Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, config.eps); calc_self_grav(time_cur, N, ind, x, v, pot, a, adot); Black_hole_physics black_hole_physics; if (config.live_smbh_count == 1) black_hole_physics = Black_hole_physics(m[0], 0, myRank, rootRank); else if (config.live_smbh_count == 2) { black_hole_physics = Black_hole_physics(m[0], m[1], myRank, rootRank); black_hole_physics.set_xv(x[0], x[1], v[0], v[1]); if (config.live_smbh_custom_eps >= 0) black_hole_physics.adjust_softening(pot[0], pot[1], a[0], a[1], adot[0], adot[1]); if (config.binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt_min, a[0], a[1], adot[0], adot[1]); } if (config.binary_smbh_pn) { black_hole_physics.set_post_newtonian(config.pn_c, config.pn_usage.data()); if (config.pn_usage[6]) black_hole_physics.set_spins(config.smbh1_spin.data(), config.smbh2_spin.data()); } black_hole_physics.set_softening(config.eps, config.live_smbh_custom_eps); std::vector pot_ext(N, 0.); calc_ext_grav(N, x, v, pot_ext, a, adot); /* Energy control... */ if (myRank == rootRank) energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext); #ifdef ETICS if (config.etics_dump_coeffs && (diskstep==0)) { char out_fname[256]; sprintf(out_fname, "coeffs.%06d.%02d.dat", 0, myRank); grapite_dump(out_fname, 2); } if (grapite_cep_index >= 0) { double3 xcm, vcm, xdc, vdc; grapite_calc_center(N, m.data(), (double(*)[3])x.data(), (double(*)[3])v.data(), xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]); } #endif const double dt_max = std::min({config.dt_disk, config.dt_contr, pow(2, config.dt_max_power)}); std::vector dt(N); /* Define initial timestep for all particles on all nodes */ for (int j=0; j dt_max) dt_tmp = dt_max; dt[j] = dt_tmp; if (config.dt_min_warning && (myRank == 0)) { if (dt[j] == dt_min) { printf("!!! Warning0: dt = dt_min = %.6E \t ind = %07d \n", dt[j], ind[j]); fflush(stdout); } } } /* j */ if (config.live_smbh_count > 0) { double min_dt = *std::min_element(begin(dt), end(dt)); for (int i=0; i ind_act(N); std::vector x_act_new(N), v_act_new(N), a_act_new(N), adot_act_new(N); std::vector t(N, time_cur), pot_act_new(N); // Functors for the main integration loop Active_search active_search(myRank, n_proc, n_loc, N, grapite_active_search_flag); Binary_smbh_influence_sphere_output binary_smbh_influence_sphere_output(config.binary_smbh_influence_radius_factor, N, m, x, v, pot, dt); Write_bh_nb_data write_bh_nb_data(config.live_smbh_neighbor_number, config.live_smbh_count, N, m, x, v); if (myRank == rootRank) { if (config.live_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt); if (config.live_smbh_neighbor_output) write_bh_nb_data(time_cur); } /* if (myRank == rootRank) */ /* The main integration loop */ while (time_cur <= config.t_end) { /* Define the minimal time and the active particles on all the nodes */ double min_t = active_search.get_minimum_time(t, dt); /* Get indices of all particles that will be active in this bunch */ int n_act; active_search.get_active_indices(min_t, t, dt, ind_act, n_act); /* Find the BH(s) indices in the active list */ int i_bh1=0, i_bh2=1; #ifdef ETICS int n_bh = config.live_smbh_count; if (config.grapite_active_search && (n_bh>0)) { int act_def_grapite_bh_count = 0; int i_bh[n_bh]; for (int i=0; i= 0) black_hole_physics.adjust_softening(pot_act_new[i_bh1], pot_act_new[i_bh2], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]); if (config.binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt[i_bh1], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]); } /* Calculate gravity on active particles due to external forces */ std::vector pot_act_ext(N, 0.); calc_ext_grav(n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new); /* correct the active particles positions etc... on all the nodes */ double min_dt = dt_max; for (int i=0; i 0) && (ind_act[i] < config.live_smbh_count)) eta_curr = config.eta/config.eta_bh_corr; else eta_curr = config.eta; double dt_new = aarseth_step(eta_curr, dt_tmp, a_act_new[i], adot_act_new[i], a2, a3); //TODO the below should be moved to a function if (dt_new < dt_min) dt_tmp = dt_min; if ((dt_new < dt_tmp) && (dt_new > dt_min)) { int power = log(dt_new)/log(2.0) - 1; dt_tmp = pow(2.0, (double)power); // TODO why is this casting needed here? } if ((dt_new > 2*dt_tmp) && (fmod(min_t, 2*dt_tmp) == 0) && (2*dt_tmp <= dt_max)) { dt_tmp *= 2; } if (config.dt_min_warning && (myRank == 0)) { if (dt_tmp == dt_min) { printf("!!! Warning1: dt_act = dt_min = %.6E \t ind_act = %07d \n", dt_tmp, ind_act[i]); fflush(stdout); } } if (dt_tmp < min_dt) min_dt = dt_tmp; x[j_act] = x_act_new[i]; v[j_act] = v_act_new[i]; t[j_act] = min_t; dt[j_act] = dt_tmp; pot[j_act] = pot_act_new[i]; pot_ext[j_act] = pot_act_ext[i]; a[j_act] = a_act_new[i]; adot[j_act] = adot_act_new[i]; } /* i */ /* define the min. dt over all the act. part. and set it also for the BH... */ if (config.live_smbh_count > 0) { if (config.live_smbh_count>=1) dt[0] = min_dt; if (config.live_smbh_count==2) dt[1] = min_dt; } if (config.binary_smbh_influence_sphere_output && (myRank == rootRank)) { //TODO clean up this mass. We don't want to have all these _act arrays; they are only needed for this lame function. binary_smbh_influence_sphere_output(ind_act, n_act, timesteps, time_cur); } /* load the new values for active particles to the local GRAPE's */ for (int i=0; i= t_bh) { if (myRank == rootRank) { /* Write BH data... */ if (config.live_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt); /* Write BH NB data... */ if (config.live_smbh_neighbor_output) write_bh_nb_data(time_cur); } /* if (myRank == rootRank) */ t_bh += config.dt_bh; } /* if (time_cur >= t_bh) */ if (time_cur >= t_contr) { if (myRank == rootRank) { energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext); /* write cont data */ if (config.output_hdf5) h5_write("data.con", diskstep, N, time_cur, m, x, v, pot, a, adot, 0, true); else ascii_write("data.con", diskstep, N, time_cur, m, x, v, 16); } /* if (myRank == rootRank) */ #ifdef ETICS // We are /inside/ a control step, so all particles must be // synchronized; we can safely calculate their density centre. The // acceleration and jerk currently in the memory are for the // predicted position of the CEP, by calling grapite_calc_center we // "correct" the position and velocity, but not the gravity at that // point. if (grapite_cep_index >= 0) { double3 xcm, vcm, xdc, vdc; grapite_calc_center(N, m.data(), (double(*)[3])x.data(), (double(*)[3])v.data(), xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]); } #endif t_contr += config.dt_contr; } /* if (time_cur >= t_contr) */ if (time_cur >= t_disk) { char out_fname[256]; diskstep++; if (myRank == rootRank) { sprintf(out_fname, "%06d", diskstep); if (config.output_hdf5) h5_write(std::string(out_fname) + ".h5", diskstep, N, time_cur, m, x, v, pot, a, adot, config.output_extra_mode, config.output_hdf5_double_precision); else ascii_write(std::string(out_fname) + ".dat", diskstep, N, time_cur, m, x, v, config.output_ascii_precision); } /* if (myRank == rootRank) */ #ifdef ETICS if (config.etics_dump_coeffs) { sprintf(out_fname, "coeffs.%06d.%02d.dat", diskstep, myRank); grapite_dump(out_fname, 2); } #endif t_disk += config.dt_disk; } /* if (time_cur >= t_disk) */ } /* while (time_cur < t_end) */ /* close the local GRAPEs */ timer.stop(); g6_close(clusterid); double g6_calls_sum; MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD); if (myRank == rootRank) { /* Write some output for the timestep annalize... */ printf("\n"); printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum); printf("\n"); printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(timer.time)/1.0E+09); fflush(stdout); } /* if (myRank == rootRank) */ /* Finalize the MPI work */ MPI_Finalize(); }