/***************************************************************************** File Name : "phi-GRAPE/GPU.c" // BH (1 || 2) + ACC + EJECT : Contents : N-body code with integration by individual block time step : together with the parallel using of GRAPE6a board's. : : Added the GPU support via SAPPORO library. : : Normalization to the physical units!!! : : External Potential added : Plummer-Kuzmin: Bulge, Disk, Halo : Kharchenko+Andreas... : : SC extra POT for Bek SC test runs... : : Rebuced to the Single BH -> Plummer : Andreas+Fazeel... : : Stellar evolution added : Stellar lifetimes: Raiteri, Villata & Navarro (1996) : IMS mass loss: van den Hoeg & Groenewegen (1997) : : STARDESTR_EXT: Tidal disruption of stars by external BH... : Chingis, Denis & Maxim... : : STARDESTR: Tidal disruption of stars by BH... : Jose, Li Shuo & Shiyan Zhong : : STARDISK: Drag force... : Chingis, Denis & Maxim... : : STARDISK: variable hz = HZ*(R/R_crit) up to R_crit... : Taras, Andreas... : : Live BH (1 || 2) + ACC + EJECT... : Li Shuo & Shiyan Zhong : : dt_min for BH (1 || 2)... : : added the PN calculus for the BBH : PN0, PN1, PN2, PN2.5 (coded on the base of : Gabor Kupi original routine) : : added the "name" array... : : added the GMC's calculus (GMC on CPU; GMC2 on GPU) : for Alexey SC runs... and also for Fazeel Zurich runs... : : CPU_TIMELIMIT added for the Julich MW cluster runs... : Coded by : Peter Berczik Version number : 19.04 Last redaction : 2019.04.16 12:55 *****************************************************************************/ #include "double3.h" #include "black_holes.h" #include "config.h" #include "io.h" #include "external.h" Config *config; #ifdef ETICS #include "grapite.h" #endif #define TIMING #define ETA_S_CORR 4.0 #define ETA_BH_CORR 4.0 #define DTMAXPOWER -3.0 #define DTMINPOWER -36.0 #include #include #include #include #include #include #include #include #include #include #include "grape6.h" #include /* Some "good" functions and constants... */ // TODO replace with inline functions #define MIN(a,b) (((a)<(b)) ? (a):(b) ) #define SQR(x) ((x)*(x) ) #define KB 1024 #define MB (KB*KB) #define N_MAX (32*KB) // double L[3]; // needed in pn_bh_spin.c // Needed for things related to BHs #include "debug.h" double CPU_time_real0, CPU_time_user0, CPU_time_syst0; double CPU_time_real, CPU_time_user, CPU_time_syst; #ifdef TIMING double CPU_tmp_real0, CPU_tmp_user0, CPU_tmp_syst0; double CPU_tmp_real, CPU_tmp_user, CPU_tmp_syst; double DT_TOT, DT_ACT_DEF1, DT_ACT_DEF2, DT_ACT_DEF3, DT_ACT_PRED, DT_ACT_GRAV, DT_EXT_GRAV, DT_GMC_GRAV, DT_GMC_GMC_GRAV, DT_EXT_GMC_GRAV, DT_ACT_CORR, DT_ACT_LOAD, DT_STEVOL, DT_STARDISK, DT_STARDESTR; double DT_ACT_REDUCE; #endif /* external potential... */ double3 x_bh1, x_bh2, v_bh1, v_bh2; double pot_bh1, pot_bh2; double3 a_bh1, adot_bh1, a_bh2, adot_bh2; //#include "n_bh.c" //double3 a_pn1[7], adot_pn1[7], a_pn2[7], adot_pn2[7]; //#include "pn_bh_spin.c" #ifdef ETICS int grapite_cep_index; #endif void get_CPU_time(double *time_real, double *time_user, double *time_syst) { struct rusage xxx; double sec_u, microsec_u, sec_s, microsec_s; struct timeval tv; getrusage(RUSAGE_SELF,&xxx); sec_u = xxx.ru_utime.tv_sec; sec_s = xxx.ru_stime.tv_sec; microsec_u = xxx.ru_utime.tv_usec; microsec_s = xxx.ru_stime.tv_usec; *time_user = sec_u + microsec_u * 1.0E-06; *time_syst = sec_s + microsec_s * 1.0E-06; gettimeofday(&tv, NULL); *time_real = tv.tv_sec + 1.0E-06 * tv.tv_usec; *time_user = *time_real; } class Calc_self_grav { public: Calc_self_grav(const int N, const int n_loc, const int clusterid, const int npipe, const double eps) : g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps) { h2.assign(N, eps2); } void operator()(const double t, const int n_act, int ind_act[], const double3 x_act[], const double3 v_act[], double pot[], double3 acc[], double3 jrk[]) { g6_set_ti(clusterid, t); for (int i=0; i h2; }; void calc_ext_grav(std::vector &external_gravity_components, int n_act, double3 *x_act_new, double3 *v_act_new, double *pot_act_ext, double3 *a_act_new, double3* adot_act_new) { #ifdef TIMING get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); #endif /* Define the external potential for all active particles on all nodes */ std::fill(pot_act_ext, pot_act_ext+n_act, 0.); for (auto component : external_gravity_components) { if (component->is_active) component->apply(n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new); } /* For simple Plummer potential... */ #ifdef TIMING get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); DT_EXT_GRAV += (CPU_tmp_user - CPU_tmp_user0); #endif } void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, double& rcm_sum, double& vcm_sum, double& E_tot_0, double& E_tot_corr_0, double& E_tot_corr_sd_0, int &skip_con, int N, double m[], double3 x[], double3 v[], double pot[], double pot_ext[]) { // TODO maybe use static variables here for the previous step energy? double E_pot = 0.0; for (int i=0; i 0) for (int i=0; im[0]; double m_bh2 = source_particle_list->m[1]; double3 x_bh1 = source_particle_list->x[0]; double3 x_bh2 = source_particle_list->x[1]; double3 v_bh1 = source_particle_list->v[0]; double3 v_bh2 = source_particle_list->v[1]; double3 x_bbhc = (m_bh1*x_bh1 + m_bh2*x_bh2)/(m_bh1 + m_bh2); double3 v_bbhc = (m_bh1*v_bh1 + m_bh2*v_bh2)/(m_bh1 + m_bh2); double DR2 = (x_bh1 - x_bh2).norm2(); double DV2 = (v_bh1 - v_bh2).norm2(); double EB = -(m_bh1 + m_bh2) / sqrt(DR2) + 0.5 * DV2; double SEMI_a = -0.5 * (m_bh1 + m_bh2)/EB; double SEMI_a2 = SQR(SEMI_a); for (int i=0; ipot[0]; double& pot_bh2 = source_particle_list->pot[1]; double& m_act = source_particle_list->m[j_act]; double3& x_act = source_particle_list->x[j_act]; double3& v_act = source_particle_list->v[j_act]; double& dt_act = source_particle_list->dt[j_act]; double& pot_act = source_particle_list->pot[j_act]; double tmp_r2 = (x_act - x_bbhc).norm2(); if (tmp_r2 < SEMI_a2*SQR(factor)) { if (inf_event[j_act] == 0) { fprintf(out,"INF1 %.6E %.16E %07d %07d %.6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E \n", timesteps, time_cur, i, j_act, sqrt(DR2), x_bbhc[0], x_bbhc[1], x_bbhc[2], v_bbhc[0], v_bbhc[1], v_bbhc[2], m_bh1, x_bh1[0], x_bh1[1], x_bh1[2], v_bh1[0], v_bh1[1], v_bh1[2], pot_bh1, m_bh2, x_bh2[0], x_bh2[1], x_bh2[2], v_bh2[0], v_bh2[1], v_bh2[2], pot_bh2, sqrt(tmp_r2), m_act, x_act[0], x_act[1], x_act[2], v_act[0], v_act[1], v_act[2], pot_act, dt_act); inf_event[j_act] = 1; } } else { if (inf_event[j_act] == 1) { fprintf(out,"INF2 %.6E %.16E %07d %07d %.6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E %.6E % .6E % .6E % .6E % .6E % .6E % .6E % .6E %.6E \n", timesteps, time_cur, i, ind_act[i], sqrt(DR2), x_bbhc[0], x_bbhc[1], x_bbhc[2], v_bbhc[0], v_bbhc[1], v_bbhc[2], m_bh1, x_bh1[0], x_bh1[1], x_bh1[2], v_bh1[0], v_bh1[1], v_bh1[2], pot_bh1, m_bh2, x_bh2[0], x_bh2[1], x_bh2[2], v_bh2[0], v_bh2[1], v_bh2[2], pot_bh2, sqrt(tmp_r2), m_act, x_act[0], x_act[1], x_act[2], v_act[0], v_act[1], v_act[2], pot_act, dt_act); } inf_event[j_act] = 0; } /* if (tmp_r2 < DR2*R_INF2) */ } /* i */ fclose(out); } int main(int argc, char *argv[]) { int skip_con=0; // skip_con should just be a static in the energy control function double E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, rcm_sum=0.0, vcm_sum=0.0, timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX]; double3 xcm, vcm, xdc, vdc; // these should go away /* data for active particles */ int n_act, ind_act[N_MAX]; double t_act[N_MAX], pot_act_new[N_MAX], pot_act_tmp[N_MAX]; // x_act_new and v_act_new arrays hold the predicted position and velocity of i-particles, which is later corrected before moving into the j-particle memory. The _old arrays hold the previous values which are needed to calculate the high derivatives. The _old arrays hold the previous values which are needed to calculate the high derivatives. The [a,adot]_act_tmp arrays hold the calculation results from each node. The [a,adot]_act_new arrays hold the reduced calculation results from all nodes. double3 x_act_new[N_MAX], v_act_new[N_MAX], a_act_old[N_MAX], adot_act_old[N_MAX], a_act_tmp[N_MAX], adot_act_tmp[N_MAX], a_act_new[N_MAX], adot_act_new[N_MAX]; double pot_ext[N_MAX], pot_act_ext[N_MAX]; // for all EXTPOT int i_bh1, i_bh2; double m_bh1, m_bh2; int inf_event[N_MAX]; double3 x_bbhc, v_bbhc; int new_tunit=51, new_xunit=51; double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface. /* INIT the rand() !!! */ srand(19640916); /* it is just my birthday :-) */ /* Init MPI */ MPI_Init(&argc, &argv); /* Define the total number of processors and the Rank of each processors */ int n_proc, myRank; const int rootRank = 0; MPI_Comm_size(MPI_COMM_WORLD, &n_proc); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); /* Define the processors names */ int name_proc; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Get_processor_name(processor_name, &name_proc); /* Print the Rank and the names of processors */ printf("Rank of the processor %03d on %s \n", myRank, processor_name); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); /* read the input parameters */ config = new Config("phigrape.conf"); Source_particle_list source_particle_list(N_MAX); int *ind = source_particle_list.ind; double *m = source_particle_list.m; double3 *x = source_particle_list.x; double3 *v = source_particle_list.v; double *pot = source_particle_list.pot; double3 *a = source_particle_list.a; double3 *adot = source_particle_list.adot; double *t = source_particle_list.t; double *dt = source_particle_list.dt; int diskstep, N; double time_cur; if (is_hdf5(config->input_file_name)) { #ifndef HAS_HDF5 fprintf(stderr, "ERROR: input file is in HDF5 format, but the code was compiled without HDF5 support\n"); return -1; #endif h5_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v); } else ascii_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v); std::iota(ind, ind+N, 0); double eps = config->eps; double eta = config->eta; double t_end = config->t_end; double dt_disk = config->dt_disk; double dt_contr = config->dt_contr; double dt_bh = config->dt_bh; if (myRank == rootRank) { if (config->binary_smbh_influence_sphere_output) for (int i=0; ilive_smbh_output && (config->live_smbh_count > 0)) { out = fopen("bh.dat", "w"); fclose(out); } if ((config->live_smbh_neighbor_output) && (config->live_smbh_count > 0)) { out = fopen("bh_neighbors.dat", "w"); fclose(out); } if (config->binary_smbh_influence_sphere_output) { out = fopen("bbh_inf.dat", "w"); fclose(out); } } get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0); } /* if (myRank == rootRank) */ /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); double normalization_mass=1, normalization_length=1, normalization_velocity=1; if (config->ext_units_physical) { normalization_mass = 1/config->unit_mass; normalization_length = 1000/config->unit_length; normalization_velocity = 1.52484071426404437233e+01*sqrt(config->unit_length/config->unit_mass); } Plummer ext_bulge(config->ext_m_bulge*normalization_mass, config->ext_b_bulge*normalization_length); Miyamoto_Nagai ext_disk(config->ext_m_disk*normalization_mass, config->ext_a_disk*normalization_length, config->ext_b_disk*normalization_length); Plummer ext_halo_plummer(config->ext_m_halo_plummer*normalization_mass, config->ext_b_halo_plummer*normalization_length); Logarithmic_halo ext_log_halo(config->ext_log_halo_v*normalization_velocity, config->ext_log_halo_r*normalization_length); Dehnen ext_dehnen(config->ext_dehnen_m*normalization_mass, config->ext_dehnen_r*normalization_length, config->ext_dehnen_gamma); std::vector external_gravity_components; external_gravity_components.push_back(&ext_bulge); external_gravity_components.push_back(&ext_disk); external_gravity_components.push_back(&ext_halo_plummer); external_gravity_components.push_back(&ext_log_halo); external_gravity_components.push_back(&ext_dehnen); bool has_external_gravity = false; for (auto component : external_gravity_components) { if (component->is_active) { has_external_gravity = true; break; } } if (has_external_gravity) printf("External Potential: \n\n"); if (ext_bulge.is_active) printf("m_bulge = %.4E b_bulge = %.4E\n", config->ext_m_bulge*normalization_mass, config->ext_b_bulge*normalization_length); if (ext_disk.is_active) printf("m_disk = %.4E a_disk = %.4E b_disk = %.4E\n", config->ext_m_disk*normalization_mass, config->ext_a_disk*normalization_length, config->ext_b_disk*normalization_length); if (ext_halo_plummer.is_active) printf("m_halo = %.4E b_halo = %.4E\n", config->ext_m_halo_plummer*normalization_mass, config->ext_b_halo_plummer*normalization_length); if (ext_log_halo.is_active) printf("v_halo = %.6E r_halo = %.6E \n", config->ext_log_halo_v*normalization_velocity, config->ext_log_halo_r*normalization_length); if (ext_dehnen.is_active) printf("m_ext = %.6E r_ext = %.6E \t g_ext = %.3E \n", config->ext_dehnen_m*normalization_mass, config->ext_dehnen_r*normalization_length, config->ext_dehnen_gamma); printf("\n"); fflush(stdout); double eta_bh = eta/ETA_BH_CORR; const double dt_min = pow(2, DTMINPOWER); const double dt_max = std::min({dt_disk, dt_contr, pow(2, DTMAXPOWER)}); double t_disk = dt_disk*(1+floor(time_cur/dt_disk)); double t_contr = dt_contr*(1+floor(time_cur/dt_contr)); double t_bh = dt_bh*(1+floor(time_cur/dt_bh)); if (myRank == rootRank) { printf("t_disk = %.6E t_contr = %.6E t_bh = %.6E \n", t_disk, t_contr, t_bh); printf("\n"); fflush(stdout); } /* if (myRank == rootRank) */ std::fill(t, t+N, time_cur); std::fill(dt, dt+N, dt_min); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); /* some local settings for G6a boards */ int clusterid, numGPU; if (config->devices_per_node==0) { MPI_Comm shmcomm; MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm); MPI_Comm_size(shmcomm, &numGPU); MPI_Comm_rank(shmcomm, &clusterid); } else { numGPU = config->devices_per_node; clusterid = myRank % numGPU; } printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid); fflush(stdout); /* init the local GRAPEs */ g6_open(clusterid); int npipe = g6_npipes(); g6_set_tunit(new_tunit); g6_set_xunit(new_xunit); int n_loc = N/n_proc; Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps); Active_search active_search(myRank, n_proc, n_loc, N); Black_hole_physics black_hole_physics; if (config->live_smbh_count == 1) black_hole_physics = Black_hole_physics(m[0], 0, myRank, rootRank); else if (config->live_smbh_count == 2) black_hole_physics = Black_hole_physics(m[0], m[1], myRank, rootRank); if (config->binary_smbh_pn) { black_hole_physics.set_post_newtonian(config->pn_c, config->pn_usage.data()); if (config->pn_usage[6]) black_hole_physics.set_spins(config->smbh1_spin.data(), config->smbh2_spin.data()); } black_hole_physics.set_softening(eps, config->live_smbh_custom_eps); #ifdef ETICS grapite_read_particle_tags(N, config->grapite_mask_file_name.c_str(), myRank, n_loc); grapite_set_dt_exp(config->dt_scf); grapite_set_t_exp(time_cur); #endif /* load the nj particles to the G6 */ for (int k=0; k= 0) { grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; grapite_update_cep(time_cur, xdc, vdc, zeros, zeros); } #endif /* define the all particles as a active on all the processors for the first time grav calc. */ calc_self_grav(time_cur, N, ind, x, v, pot_act_tmp, a_act_tmp, adot_act_tmp); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); /* Reduce the "global" vectors from "local" on all processors) */ MPI_Allreduce(pot_act_tmp, pot, N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(a_act_tmp, a, 3*N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(adot_act_tmp, adot, 3*N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); if (config->live_smbh_count == 2) { black_hole_physics.set_xv(x[0], x[1], v[0], v[1]); if (config->live_smbh_custom_eps >= 0) black_hole_physics.adjust_softening(pot[0], pot[1], a[0], a[1], adot[0], adot[1]); if (config->binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt[0], a[0], a[1], adot[0], adot[1]); } calc_ext_grav(external_gravity_components, N, x, v, pot_ext, a, adot); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); /* Energy control... */ if (myRank == rootRank) { energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext); } /* if (myRank == rootRank) */ #ifdef ETICS if (config->etics_dump_coeffs && (diskstep==0)) { char out_fname[256]; sprintf(out_fname, "coeffs.%06d.%02d.dat", 0, myRank); grapite_dump(out_fname, 2); } if (grapite_cep_index >= 0) { grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]); } #endif /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); /* Define initial timestep for all particles on all nodes */ for (int j=0; j dt_max) dt_tmp = dt_max; dt[j] = dt_tmp; if (config->dt_min_warning && (myRank == 0)) { if (dt[j] == dt_min) { printf("!!! Warning0: dt = dt_min = %.6E \t ind = %07d \n", dt[j], ind[j]); fflush(stdout); } } } /* j */ if (config->live_smbh_count > 0) { double min_dt = *std::min_element(dt, dt+N); for (int i=0; ilive_smbh_count; i++) dt[i] = min_dt; } /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); /* load the new values for particles to the local GRAPE's */ /* load the nj particles to the G6 */ for (int k=0; klive_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt); /* Write BH NB data... */ if (config->live_smbh_neighbor_output) write_bh_nb_data(config->live_smbh_neighbor_number, config->live_smbh_count, time_cur, N, m, x, v); } /* if (myRank == rootRank) */ /* Get the Starting time on rootRank */ if (myRank == rootRank) { get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0); get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst); } /* if (myRank == rootRank) */ timesteps = 0.0; // Why won't those two be long long instead of double + should include the zeroth step n_act_sum = 0.0; for (int i=1; ilive_smbh_count > 0) { i_bh1 = 0; i_bh2 = 1; } #endif #ifdef TIMING get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0); #endif #ifdef TIMING get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); #endif for (int i=0; ilive_smbh_count == 2) { black_hole_physics.set_xv(x_act_new[i_bh1], x_act_new[i_bh2], v_act_new[i_bh1], v_act_new[i_bh2]); if (config->live_smbh_custom_eps >= 0) black_hole_physics.adjust_softening(pot_act_new[i_bh1], pot_act_new[i_bh2], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]); if (config->binary_smbh_pn) black_hole_physics.adjust_post_newtonian(dt[i_bh1], a_act_new[i_bh1], a_act_new[i_bh2], adot_act_new[i_bh1], adot_act_new[i_bh2]); } calc_ext_grav(external_gravity_components, n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new); /* correct the active particles positions etc... on all the nodes */ #ifdef TIMING get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); #endif double min_dt = dt_max; for (int i=0; ilive_smbh_count > 0) && (ind_act[i] < config->live_smbh_count)) eta_curr = eta_bh; else eta_curr = eta; double dt_new = aarseth_step(eta_curr, dt_tmp, a_act_new[i], adot_act_new[i], a2, a3); //TODO the below should be moved to a function if (dt_new < dt_min) dt_tmp = dt_min; if ((dt_new < dt_tmp) && (dt_new > dt_min)) { int power = log(dt_new)/log(2.0) - 1; dt_tmp = pow(2.0, (double)power); // TODO why is this casting needed here? } if ((dt_new > 2*dt_tmp) && (fmod(min_t, 2*dt_tmp) == 0) && (2*dt_tmp <= dt_max)) { dt_tmp *= 2; } if (config->dt_min_warning && (myRank == 0)) { if (dt_tmp == dt_min) { printf("!!! Warning1: dt_act = dt_min = %.6E \t ind_act = %07d \n", dt_tmp, ind_act[i]); fflush(stdout); } } if (dt_tmp < min_dt) min_dt = dt_tmp; int j_act = ind_act[i]; x[j_act] = x_act_new[i]; v[j_act] = v_act_new[i]; t[j_act] = min_t; dt[j_act] = dt_tmp; pot[j_act] = pot_act_new[i]; pot_ext[j_act] = pot_act_ext[i]; // ??? a[j_act] = a_act_new[i]; adot[j_act] = adot_act_new[i]; } /* i */ /* define the min. dt over all the act. part. and set it also for the BH... */ if (config->live_smbh_count > 0) { if (config->live_smbh_count>=1) dt[0] = min_dt; if (config->live_smbh_count==2) dt[1] = min_dt; } if (config->binary_smbh_influence_sphere_output && (myRank == rootRank)) { //TODO clean up this mass. We don't want to have all these _act arrays; they are only needed for this lame function. binary_smbh_influence_sphere_output(i_bh1, i_bh2, ind_act, n_act, timesteps, time_cur, config->binary_smbh_influence_radius_factor, inf_event, &source_particle_list); } #ifdef TIMING get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); DT_ACT_CORR += (CPU_tmp_user - CPU_tmp_user0); #endif /* load the new values for active particles to the local GRAPE's */ #ifdef TIMING get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); #endif for (int i=0; i= t_bh) { if (myRank == rootRank) { /* Write BH data... */ if (config->live_smbh_output) black_hole_physics.write_bh_data(time_cur, m, x, v, pot, a, adot, dt); /* Write BH NB data... */ if (config->live_smbh_neighbor_output) write_bh_nb_data(config->live_smbh_neighbor_number, config->live_smbh_count, time_cur, N, m, x, v); } /* if (myRank == rootRank) */ t_bh += dt_bh; } /* if (time_cur >= t_bh) */ if (time_cur >= t_contr) { if (myRank == rootRank) { energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext); /* write cont data */ if (config->output_hdf5) h5_write("data.con", diskstep, N, time_cur, m, x, v, pot, a, adot, 0, true); else ascii_write("data.con", diskstep, N, time_cur, m, x, v, 16); /* possible OUT for timing !!! */ #ifdef TIMING FILE *out = fopen("timing.dat", "a"); DT_TOT = DT_ACT_DEF1 + DT_ACT_DEF2 + DT_ACT_DEF3 + DT_ACT_PRED + DT_ACT_GRAV + DT_EXT_GRAV + DT_GMC_GRAV + DT_GMC_GMC_GRAV + DT_EXT_GMC_GRAV + DT_ACT_CORR + DT_ACT_LOAD + DT_STEVOL + DT_STARDISK + DT_STARDESTR + DT_ACT_REDUCE; fprintf(out,"%.8E \t %.6E \t %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f \t %.3f \t %.8E %.8E %.8E \t %.8E %.8E %.8E \n", time_cur, DT_TOT, 100.0*DT_ACT_DEF1/DT_TOT, 100.0*DT_ACT_DEF2/DT_TOT, 100.0*DT_ACT_DEF3/DT_TOT, 100.0*DT_ACT_PRED/DT_TOT, 100.0*DT_ACT_GRAV/DT_TOT, 100.0*DT_EXT_GRAV/DT_TOT, 100.0*DT_GMC_GRAV/DT_TOT, 100.0*DT_GMC_GMC_GRAV/DT_TOT, 100.0*DT_EXT_GMC_GRAV/DT_TOT, 100.0*DT_ACT_CORR/DT_TOT, 100.0*DT_ACT_LOAD/DT_TOT, 100.0*DT_STEVOL/DT_TOT, 100.0*DT_STARDISK/DT_TOT, 100.0*DT_STARDESTR/DT_TOT, 100.0*DT_ACT_REDUCE/DT_TOT, CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0, timesteps, n_act_sum, 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09); fclose(out); #endif } /* if (myRank == rootRank) */ #ifdef ETICS // We are /inside/ a control step, so all particles must be synchronized; we can safely calculate their density centre. The acceleration and jerk currently in the memory are for the predicted position of the CEP, by calling grapite_calc_center we "correct" the position and velocity, but not the gravity at that point. if (grapite_cep_index >= 0) { grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); x[grapite_cep_index] = xdc; v[grapite_cep_index] = vdc; grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]); } #endif t_contr += dt_contr; } /* if (time_cur >= t_contr) */ if (time_cur >= t_disk) { char out_fname[256]; diskstep++; if (myRank == rootRank) { sprintf(out_fname, "%06d", diskstep); if (config->output_hdf5) h5_write(std::string(out_fname) + ".h5", diskstep, N, time_cur, m, x, v, pot, a, adot, config->output_extra_mode, config->output_hdf5_double_precision); else ascii_write(std::string(out_fname) + ".dat", diskstep, N, time_cur, m, x, v, config->output_ascii_precision); } /* if (myRank == rootRank) */ #ifdef ETICS if (config->etics_dump_coeffs) { sprintf(out_fname, "coeffs.%06d.%02d.dat", diskstep, myRank); grapite_dump(out_fname, 2); } #endif t_disk += dt_disk; } /* if (time_cur >= t_disk) */ } /* while (time_cur < t_end) */ /* close the local GRAPEs */ g6_close(clusterid); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); double g6_calls_sum; MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); if (myRank == rootRank) { /* Write some output for the timestep annalize... */ printf("\n"); printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum); printf("\n"); printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09); fflush(stdout); } /* if (myRank == rootRank) */ /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); /* Finalize the MPI work */ MPI_Finalize(); return 0; }