diff --git a/phigrape.cpp b/phigrape.cpp index d8bbd04..8e43da3 100644 --- a/phigrape.cpp +++ b/phigrape.cpp @@ -123,17 +123,7 @@ double DT_ACT_REDUCE; #endif -/* some local settings for G6a board's */ -int clusterid, ii, nn, numGPU; - -int npipe=G6_NPIPE, index_i[G6_NPIPE]; -double h2_i[G6_NPIPE], p_i[G6_NPIPE]; -double3 x_i[G6_NPIPE], v_i[G6_NPIPE], - a_i[G6_NPIPE], jerk_i[G6_NPIPE]; -int new_tunit=51, new_xunit=51; - -double ti=0.0; /* external potential... */ @@ -286,43 +276,32 @@ void write_bh_nb_data(double time_cur, int N, double m[], double3 x[], double3 v fclose(out); } -void calc_self_grav(double t, double eps2, double &g6_calls, int n_loc, - int n_act, int ind_act[], - double3 x_act_new[], double3 v_act_new[], - double pot_act_tmp[], - double3 a_act_tmp[], - double3 adot_act_tmp[], - double h2_i[]) -{ - /* calc the new grav for the active particles */ - -#ifdef TIMING - get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); -#endif - - g6_set_ti(clusterid, t); - - int ni = n_act; // TODO why is this needed? - - /* define the local phi, a, adot for these active particles */ - - for (int i=0; i h2; +}; void calc_ext_grav(std::vector &external_gravity_components, int n_act, double3 *x_act_new, double3 *v_act_new, double *pot_act_ext, double3 *a_act_new, double3* adot_act_new) { @@ -705,15 +684,15 @@ int main(int argc, char *argv[]) skip_con=0, tmp_i; double dt_disk, dt_contr, t_disk=0.0, t_contr=0.0, - dt_bh, t_bh=0.0, dt_bh_tmp, + dt_bh, t_bh=0.0, t_end, time_cur, dt_min, dt_max, min_t, min_t_loc, eta_s, eta, eta_bh, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, rcm_sum=0.0, vcm_sum=0.0, - eps=0.0, eps2, + eps, a2_mod, adot2_mod, dt_tmp, dt2half, dt3over6, - timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX], g6_calls=0.0, g6_calls_sum=0.0; + timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX]; double3 xcm, vcm, mom, xdc, vdc, @@ -743,13 +722,11 @@ int main(int argc, char *argv[]) /* data for active particles */ int n_act, ind_act[N_MAX]; - double m_act[N_MAX], - pot_act[N_MAX], t_act[N_MAX], dt_act[N_MAX], + double t_act[N_MAX], pot_act_new[N_MAX], pot_act_tmp[N_MAX]; - double3 x_act[N_MAX], v_act[N_MAX], - a_act[N_MAX], adot_act[N_MAX], + double3 a_act[N_MAX], adot_act[N_MAX], x_act_new[N_MAX], v_act_new[N_MAX], a_act_new[N_MAX], adot_act_new[N_MAX], a_act_tmp[N_MAX], adot_act_tmp[N_MAX];; @@ -765,6 +742,12 @@ int main(int argc, char *argv[]) int inf_event[N_MAX]; double3 x_bbhc, v_bbhc; + /* some local settings for G6a board's */ + + int clusterid, numGPU, npipe=G6_NPIPE; + + int new_tunit=51, new_xunit=51; + double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface. /* INIT the rand() !!! */ @@ -801,31 +784,19 @@ int main(int argc, char *argv[]) } else ascii_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v); + std::iota(ind, ind+N, 0); + eps = config->eps; + eta = config->eta; + t_end = config->t_end; + dt_disk = config->dt_disk; + dt_contr = config->dt_contr; + dt_bh = config->dt_bh; + if (myRank == rootRank) { - - //TODO move it out of (myRank == rootRank) so you don't need to communicate them. - eps = config->eps; - t_end = config->t_end; - dt_disk = config->dt_disk; - dt_contr = config->dt_contr; - dt_bh = config->dt_bh; - eta = config->eta; - strcpy(inp_fname, config->input_file_name.c_str()); - if (config->binary_smbh_influence_sphere_output) for (int i=0; iext_units_physical) { normalization_mass = 1/config->unit_mass; @@ -915,8 +873,6 @@ int main(int argc, char *argv[]) eta_s = eta/ETA_S_CORR; eta_bh = eta/ETA_BH_CORR; - eps2 = SQR(eps); - dt_min = 1.0*pow(2.0, DTMINPOWER); dt_max = 1.0*pow(2.0, DTMAXPOWER); @@ -942,25 +898,13 @@ int main(int argc, char *argv[]) n_loc = N/n_proc; + Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps); + Active_search active_search(myRank, n_proc, n_loc, N); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); - /* Broadcast the values of all particles to all processors... */ - MPI_Bcast(ind, N, MPI_INT, rootRank, MPI_COMM_WORLD); - MPI_Bcast(m, N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD); - MPI_Bcast(x, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD); - MPI_Bcast(v, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD); - - /* Wait to all processors to finish his works... */ - MPI_Barrier(MPI_COMM_WORLD); - - /* Wait to all processors to finish his works... */ - MPI_Barrier(MPI_COMM_WORLD); - - /* init the local GRAPE's */ - if (config->devices_per_node==0) { MPI_Comm shmcomm; MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm); @@ -973,6 +917,7 @@ int main(int argc, char *argv[]) printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid); fflush(stdout); + /* init the local GRAPEs */ g6_open(clusterid); npipe = g6_npipes(); g6_set_tunit(new_tunit); @@ -1011,26 +956,14 @@ int main(int argc, char *argv[]) n_act = N; for (int i=0; i= t_contr) { if (myRank == rootRank) { - energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext); + energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext); /* write cont data */ @@ -1600,7 +1534,8 @@ int main(int argc, char *argv[]) /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); - MPI_Reduce(&g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD); + double g6_calls_sum; + MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD); /* Wait to all processors to finish his works... */ MPI_Barrier(MPI_COMM_WORLD); @@ -1610,7 +1545,7 @@ int main(int argc, char *argv[]) /* Write some output for the timestep annalize... */ printf("\n"); - printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls); + printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum); printf("\n"); printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09); fflush(stdout);