Moved calc_self_grav to a class and cleaned up a little

This commit is contained in:
Yohai Meiron 2020-04-14 20:20:39 -04:00
parent b51613695f
commit 30ae8631a9

View file

@ -123,17 +123,7 @@ double DT_ACT_REDUCE;
#endif #endif
/* some local settings for G6a board's */
int clusterid, ii, nn, numGPU;
int npipe=G6_NPIPE, index_i[G6_NPIPE];
double h2_i[G6_NPIPE], p_i[G6_NPIPE];
double3 x_i[G6_NPIPE], v_i[G6_NPIPE],
a_i[G6_NPIPE], jerk_i[G6_NPIPE];
int new_tunit=51, new_xunit=51;
double ti=0.0;
/* external potential... */ /* external potential... */
@ -286,43 +276,32 @@ void write_bh_nb_data(double time_cur, int N, double m[], double3 x[], double3 v
fclose(out); fclose(out);
} }
void calc_self_grav(double t, double eps2, double &g6_calls, int n_loc, class Calc_self_grav {
int n_act, int ind_act[], public:
double3 x_act_new[], double3 v_act_new[], Calc_self_grav(const int N, const int n_loc, const int clusterid, const int npipe, const double eps)
double pot_act_tmp[], : g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps)
double3 a_act_tmp[], {
double3 adot_act_tmp[], h2.assign(N, eps2);
double h2_i[]) }
{ void operator()(const double t, const int n_act, int ind_act[], const double3 x_act[], const double3 v_act[],
/* calc the new grav for the active particles */ double pot[], double3 acc[], double3 jrk[])
{
#ifdef TIMING g6_set_ti(clusterid, t);
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); for (int i=0; i<n_act; i+=npipe) {
#endif int nn = npipe;
if (n_act-i < npipe) nn = n_act - i;
g6_set_ti(clusterid, t); //TODO any way we can clean up this ugly casting?
g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act+i, (double(*)[3])v_act+i, (double(*)[3])acc+i, (double(*)[3])jrk+i, pot+i, eps2, h2.data());
int ni = n_act; // TODO why is this needed? g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act+i, (double(*)[3])v_act+i, eps2, h2.data(), (double(*)[3])acc+i, (double(*)[3])jrk+i, pot+i);
g6_calls++;
/* define the local phi, a, adot for these active particles */ } /* i */
}
for (int i=0; i<ni; i+=npipe) { double g6_calls;
nn = npipe; private:
if (ni-i < npipe) nn = ni - i; int n_loc, clusterid, npipe;
for (ii=0; ii<nn; ii++) { double eps2;
h2_i[ii] = eps2; // TODO This should be a global or something std::vector<double> h2;
} /* ii */ };
//TODO any way we can clean up this ugly casting?
g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i, eps2, h2_i);
g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, eps2, h2_i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i);
g6_calls++;
} /* i */
#ifdef TIMING
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
DT_ACT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
#endif
}
void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n_act, double3 *x_act_new, double3 *v_act_new, double *pot_act_ext, double3 *a_act_new, double3* adot_act_new) void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n_act, double3 *x_act_new, double3 *v_act_new, double *pot_act_ext, double3 *a_act_new, double3* adot_act_new)
{ {
@ -705,15 +684,15 @@ int main(int argc, char *argv[])
skip_con=0, tmp_i; skip_con=0, tmp_i;
double dt_disk, dt_contr, t_disk=0.0, t_contr=0.0, double dt_disk, dt_contr, t_disk=0.0, t_contr=0.0,
dt_bh, t_bh=0.0, dt_bh_tmp, dt_bh, t_bh=0.0,
t_end, time_cur, dt_min, dt_max, min_t, min_t_loc, t_end, time_cur, dt_min, dt_max, min_t, min_t_loc,
eta_s, eta, eta_bh, eta_s, eta, eta_bh,
E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0,
rcm_sum=0.0, vcm_sum=0.0, rcm_sum=0.0, vcm_sum=0.0,
eps=0.0, eps2, eps,
a2_mod, adot2_mod, a2_mod, adot2_mod,
dt_tmp, dt2half, dt3over6, dt_tmp, dt2half, dt3over6,
timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX], g6_calls=0.0, g6_calls_sum=0.0; timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX];
double3 xcm, vcm, mom, double3 xcm, vcm, mom,
xdc, vdc, xdc, vdc,
@ -743,13 +722,11 @@ int main(int argc, char *argv[])
/* data for active particles */ /* data for active particles */
int n_act, ind_act[N_MAX]; int n_act, ind_act[N_MAX];
double m_act[N_MAX], double t_act[N_MAX],
pot_act[N_MAX], t_act[N_MAX], dt_act[N_MAX],
pot_act_new[N_MAX], pot_act_new[N_MAX],
pot_act_tmp[N_MAX]; pot_act_tmp[N_MAX];
double3 x_act[N_MAX], v_act[N_MAX], double3 a_act[N_MAX], adot_act[N_MAX],
a_act[N_MAX], adot_act[N_MAX],
x_act_new[N_MAX], v_act_new[N_MAX], x_act_new[N_MAX], v_act_new[N_MAX],
a_act_new[N_MAX], adot_act_new[N_MAX], a_act_new[N_MAX], adot_act_new[N_MAX],
a_act_tmp[N_MAX], adot_act_tmp[N_MAX];; a_act_tmp[N_MAX], adot_act_tmp[N_MAX];;
@ -765,6 +742,12 @@ int main(int argc, char *argv[])
int inf_event[N_MAX]; int inf_event[N_MAX];
double3 x_bbhc, v_bbhc; double3 x_bbhc, v_bbhc;
/* some local settings for G6a board's */
int clusterid, numGPU, npipe=G6_NPIPE;
int new_tunit=51, new_xunit=51;
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface. double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
/* INIT the rand() !!! */ /* INIT the rand() !!! */
@ -801,31 +784,19 @@ int main(int argc, char *argv[])
} }
else else
ascii_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v); ascii_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v);
std::iota(ind, ind+N, 0); std::iota(ind, ind+N, 0);
eps = config->eps;
eta = config->eta;
t_end = config->t_end;
dt_disk = config->dt_disk;
dt_contr = config->dt_contr;
dt_bh = config->dt_bh;
if (myRank == rootRank) { if (myRank == rootRank) {
//TODO move it out of (myRank == rootRank) so you don't need to communicate them.
eps = config->eps;
t_end = config->t_end;
dt_disk = config->dt_disk;
dt_contr = config->dt_contr;
dt_bh = config->dt_bh;
eta = config->eta;
strcpy(inp_fname, config->input_file_name.c_str());
if (config->binary_smbh_influence_sphere_output) for (int i=0; i<N; i++) inf_event[i] = 0; // WARNING N wasn't set yet! if (config->binary_smbh_influence_sphere_output) for (int i=0; i<N; i++) inf_event[i] = 0; // WARNING N wasn't set yet!
/*
eps : Plummer softening parameter (can be even 0)
t_end : end time of calculation
dt_disk : interval of snapshot files output (0xxx.dat)
dt_contr : interval for the energy control output (contr.dat)
dt_bh : interval for BH output (bh.dat & bh_neighbors.dat)
eta : parameter for timestep determination
inp_data : name of the input file (data.inp)
*/
printf("\n"); printf("\n");
printf("Begin the calculation of phi-GRAPE program on %03d processors\n", n_proc); printf("Begin the calculation of phi-GRAPE program on %03d processors\n", n_proc);
printf("\n"); printf("\n");
@ -872,19 +843,6 @@ int main(int argc, char *argv[])
/* Wait to all processors to finish his works... */ /* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
/* Broadcast all useful values to all processors... */
MPI_Bcast(&N, 1, MPI_INT, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&eps, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&eta, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&t_end, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&dt_disk, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&dt_contr, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&dt_bh, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&time_cur, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
double normalization_mass=1, normalization_length=1, normalization_velocity=1; double normalization_mass=1, normalization_length=1, normalization_velocity=1;
if (config->ext_units_physical) { if (config->ext_units_physical) {
normalization_mass = 1/config->unit_mass; normalization_mass = 1/config->unit_mass;
@ -915,8 +873,6 @@ int main(int argc, char *argv[])
eta_s = eta/ETA_S_CORR; eta_s = eta/ETA_S_CORR;
eta_bh = eta/ETA_BH_CORR; eta_bh = eta/ETA_BH_CORR;
eps2 = SQR(eps);
dt_min = 1.0*pow(2.0, DTMINPOWER); dt_min = 1.0*pow(2.0, DTMINPOWER);
dt_max = 1.0*pow(2.0, DTMAXPOWER); dt_max = 1.0*pow(2.0, DTMAXPOWER);
@ -942,25 +898,13 @@ int main(int argc, char *argv[])
n_loc = N/n_proc; n_loc = N/n_proc;
Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
Active_search active_search(myRank, n_proc, n_loc, N); Active_search active_search(myRank, n_proc, n_loc, N);
/* Wait to all processors to finish his works... */ /* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
/* Broadcast the values of all particles to all processors... */
MPI_Bcast(ind, N, MPI_INT, rootRank, MPI_COMM_WORLD);
MPI_Bcast(m, N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(x, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(v, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
/* init the local GRAPE's */
if (config->devices_per_node==0) { if (config->devices_per_node==0) {
MPI_Comm shmcomm; MPI_Comm shmcomm;
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm); MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
@ -973,6 +917,7 @@ int main(int argc, char *argv[])
printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid); printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid);
fflush(stdout); fflush(stdout);
/* init the local GRAPEs */
g6_open(clusterid); g6_open(clusterid);
npipe = g6_npipes(); npipe = g6_npipes();
g6_set_tunit(new_tunit); g6_set_tunit(new_tunit);
@ -1011,26 +956,14 @@ int main(int argc, char *argv[])
n_act = N; n_act = N;
for (int i=0; i<n_act; i++) { for (int i=0; i<n_act; i++) {
ind_act[i] = ind[i]; ind_act[i] = ind[i]; // isn't it just i?
iii = ind_act[i]; iii = ind_act[i];
m_act[i] = m[iii];
x_act[i] = x[iii];
v_act[i] = v[iii];
t_act[i] = t[iii]; t_act[i] = t[iii];
dt_act[i] = dt[iii];
} /* i */ } /* i */
// NOTE this is where calc_self_grav_zero() used to be. calc_self_grav(time_cur, n_act, ind_act, x, v,
calc_self_grav(time_cur, eps2, g6_calls, n_loc, pot_act_tmp, a_act_tmp, adot_act_tmp);
n_act, ind_act,
x_act, v_act,
pot_act_tmp,
a_act_tmp,
adot_act_tmp,
h2_i);
/* Wait to all processors to finish his works... */ /* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
@ -1119,7 +1052,7 @@ int main(int argc, char *argv[])
} }
} }
calc_ext_grav(external_gravity_components, n_act, x_act, v_act, pot_ext, a, adot); calc_ext_grav(external_gravity_components, n_act, x, v, pot_ext, a, adot);
/* Wait to all processors to finish his works... */ /* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
@ -1129,7 +1062,7 @@ int main(int argc, char *argv[])
/* Energy control... */ /* Energy control... */
if (myRank == rootRank) { if (myRank == rootRank) {
energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext); energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
} /* if (myRank == rootRank) */ } /* if (myRank == rootRank) */
#ifdef ETICS #ifdef ETICS
@ -1216,8 +1149,6 @@ int main(int argc, char *argv[])
n_act_distr[i-1] = 0.0; n_act_distr[i-1] = 0.0;
} }
g6_calls = 0.0; //TODO this should include the calls at the zeroth step, so move it further up.
#ifdef TIMING #ifdef TIMING
DT_TOT = 0.0; DT_TOT = 0.0;
@ -1345,13 +1276,16 @@ int main(int argc, char *argv[])
DT_ACT_PRED += (CPU_tmp_user - CPU_tmp_user0); DT_ACT_PRED += (CPU_tmp_user - CPU_tmp_user0);
#endif #endif
calc_self_grav(min_t, eps2, g6_calls, n_loc, #ifdef TIMING
n_act, ind_act, get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
x_act_new, v_act_new, #endif
pot_act_tmp, calc_self_grav(min_t, n_act, ind_act, x_act_new, v_act_new,
a_act_tmp, pot_act_tmp, a_act_tmp, adot_act_tmp);
adot_act_tmp,
h2_i); #ifdef TIMING
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
DT_ACT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
#endif
/* Reduce the "global" vectors from "local" on all the nodes */ /* Reduce the "global" vectors from "local" on all the nodes */
@ -1527,7 +1461,7 @@ int main(int argc, char *argv[])
if (time_cur >= t_contr) { if (time_cur >= t_contr) {
if (myRank == rootRank) { if (myRank == rootRank) {
energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext); energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
/* write cont data */ /* write cont data */
@ -1600,7 +1534,8 @@ int main(int argc, char *argv[])
/* Wait to all processors to finish his works... */ /* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
MPI_Reduce(&g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD); double g6_calls_sum;
MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
/* Wait to all processors to finish his works... */ /* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
@ -1610,7 +1545,7 @@ int main(int argc, char *argv[])
/* Write some output for the timestep annalize... */ /* Write some output for the timestep annalize... */
printf("\n"); printf("\n");
printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls); printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum);
printf("\n"); printf("\n");
printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09); printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09);
fflush(stdout); fflush(stdout);