Moved calc_self_grav to a class and cleaned up a little

This commit is contained in:
Yohai Meiron 2020-04-14 20:20:39 -04:00
parent b51613695f
commit 30ae8631a9

View file

@ -123,17 +123,7 @@ double DT_ACT_REDUCE;
#endif
/* some local settings for G6a board's */
int clusterid, ii, nn, numGPU;
int npipe=G6_NPIPE, index_i[G6_NPIPE];
double h2_i[G6_NPIPE], p_i[G6_NPIPE];
double3 x_i[G6_NPIPE], v_i[G6_NPIPE],
a_i[G6_NPIPE], jerk_i[G6_NPIPE];
int new_tunit=51, new_xunit=51;
double ti=0.0;
/* external potential... */
@ -286,43 +276,32 @@ void write_bh_nb_data(double time_cur, int N, double m[], double3 x[], double3 v
fclose(out);
}
void calc_self_grav(double t, double eps2, double &g6_calls, int n_loc,
int n_act, int ind_act[],
double3 x_act_new[], double3 v_act_new[],
double pot_act_tmp[],
double3 a_act_tmp[],
double3 adot_act_tmp[],
double h2_i[])
{
/* calc the new grav for the active particles */
#ifdef TIMING
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
#endif
class Calc_self_grav {
public:
Calc_self_grav(const int N, const int n_loc, const int clusterid, const int npipe, const double eps)
: g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps)
{
h2.assign(N, eps2);
}
void operator()(const double t, const int n_act, int ind_act[], const double3 x_act[], const double3 v_act[],
double pot[], double3 acc[], double3 jrk[])
{
g6_set_ti(clusterid, t);
int ni = n_act; // TODO why is this needed?
/* define the local phi, a, adot for these active particles */
for (int i=0; i<ni; i+=npipe) {
nn = npipe;
if (ni-i < npipe) nn = ni - i;
for (ii=0; ii<nn; ii++) {
h2_i[ii] = eps2; // TODO This should be a global or something
} /* ii */
for (int i=0; i<n_act; i+=npipe) {
int nn = npipe;
if (n_act-i < npipe) nn = n_act - i;
//TODO any way we can clean up this ugly casting?
g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i, eps2, h2_i);
g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, eps2, h2_i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i);
g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act+i, (double(*)[3])v_act+i, (double(*)[3])acc+i, (double(*)[3])jrk+i, pot+i, eps2, h2.data());
g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act+i, (double(*)[3])v_act+i, eps2, h2.data(), (double(*)[3])acc+i, (double(*)[3])jrk+i, pot+i);
g6_calls++;
} /* i */
#ifdef TIMING
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
DT_ACT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
#endif
}
}
double g6_calls;
private:
int n_loc, clusterid, npipe;
double eps2;
std::vector<double> h2;
};
void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n_act, double3 *x_act_new, double3 *v_act_new, double *pot_act_ext, double3 *a_act_new, double3* adot_act_new)
{
@ -705,15 +684,15 @@ int main(int argc, char *argv[])
skip_con=0, tmp_i;
double dt_disk, dt_contr, t_disk=0.0, t_contr=0.0,
dt_bh, t_bh=0.0, dt_bh_tmp,
dt_bh, t_bh=0.0,
t_end, time_cur, dt_min, dt_max, min_t, min_t_loc,
eta_s, eta, eta_bh,
E_tot_0, E_tot_corr_0, E_tot_corr_sd_0,
rcm_sum=0.0, vcm_sum=0.0,
eps=0.0, eps2,
eps,
a2_mod, adot2_mod,
dt_tmp, dt2half, dt3over6,
timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX], g6_calls=0.0, g6_calls_sum=0.0;
timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX];
double3 xcm, vcm, mom,
xdc, vdc,
@ -743,13 +722,11 @@ int main(int argc, char *argv[])
/* data for active particles */
int n_act, ind_act[N_MAX];
double m_act[N_MAX],
pot_act[N_MAX], t_act[N_MAX], dt_act[N_MAX],
double t_act[N_MAX],
pot_act_new[N_MAX],
pot_act_tmp[N_MAX];
double3 x_act[N_MAX], v_act[N_MAX],
a_act[N_MAX], adot_act[N_MAX],
double3 a_act[N_MAX], adot_act[N_MAX],
x_act_new[N_MAX], v_act_new[N_MAX],
a_act_new[N_MAX], adot_act_new[N_MAX],
a_act_tmp[N_MAX], adot_act_tmp[N_MAX];;
@ -765,6 +742,12 @@ int main(int argc, char *argv[])
int inf_event[N_MAX];
double3 x_bbhc, v_bbhc;
/* some local settings for G6a board's */
int clusterid, numGPU, npipe=G6_NPIPE;
int new_tunit=51, new_xunit=51;
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
/* INIT the rand() !!! */
@ -801,31 +784,19 @@ int main(int argc, char *argv[])
}
else
ascii_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v);
std::iota(ind, ind+N, 0);
if (myRank == rootRank) {
//TODO move it out of (myRank == rootRank) so you don't need to communicate them.
eps = config->eps;
eta = config->eta;
t_end = config->t_end;
dt_disk = config->dt_disk;
dt_contr = config->dt_contr;
dt_bh = config->dt_bh;
eta = config->eta;
strcpy(inp_fname, config->input_file_name.c_str());
if (myRank == rootRank) {
if (config->binary_smbh_influence_sphere_output) for (int i=0; i<N; i++) inf_event[i] = 0; // WARNING N wasn't set yet!
/*
eps : Plummer softening parameter (can be even 0)
t_end : end time of calculation
dt_disk : interval of snapshot files output (0xxx.dat)
dt_contr : interval for the energy control output (contr.dat)
dt_bh : interval for BH output (bh.dat & bh_neighbors.dat)
eta : parameter for timestep determination
inp_data : name of the input file (data.inp)
*/
printf("\n");
printf("Begin the calculation of phi-GRAPE program on %03d processors\n", n_proc);
printf("\n");
@ -872,19 +843,6 @@ int main(int argc, char *argv[])
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
/* Broadcast all useful values to all processors... */
MPI_Bcast(&N, 1, MPI_INT, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&eps, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&eta, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&t_end, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&dt_disk, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&dt_contr, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&dt_bh, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(&time_cur, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
double normalization_mass=1, normalization_length=1, normalization_velocity=1;
if (config->ext_units_physical) {
normalization_mass = 1/config->unit_mass;
@ -915,8 +873,6 @@ int main(int argc, char *argv[])
eta_s = eta/ETA_S_CORR;
eta_bh = eta/ETA_BH_CORR;
eps2 = SQR(eps);
dt_min = 1.0*pow(2.0, DTMINPOWER);
dt_max = 1.0*pow(2.0, DTMAXPOWER);
@ -942,25 +898,13 @@ int main(int argc, char *argv[])
n_loc = N/n_proc;
Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
Active_search active_search(myRank, n_proc, n_loc, N);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
/* Broadcast the values of all particles to all processors... */
MPI_Bcast(ind, N, MPI_INT, rootRank, MPI_COMM_WORLD);
MPI_Bcast(m, N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(x, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
MPI_Bcast(v, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
/* init the local GRAPE's */
if (config->devices_per_node==0) {
MPI_Comm shmcomm;
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
@ -973,6 +917,7 @@ int main(int argc, char *argv[])
printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid);
fflush(stdout);
/* init the local GRAPEs */
g6_open(clusterid);
npipe = g6_npipes();
g6_set_tunit(new_tunit);
@ -1011,26 +956,14 @@ int main(int argc, char *argv[])
n_act = N;
for (int i=0; i<n_act; i++) {
ind_act[i] = ind[i];
ind_act[i] = ind[i]; // isn't it just i?
iii = ind_act[i];
m_act[i] = m[iii];
x_act[i] = x[iii];
v_act[i] = v[iii];
t_act[i] = t[iii];
dt_act[i] = dt[iii];
} /* i */
// NOTE this is where calc_self_grav_zero() used to be.
calc_self_grav(time_cur, eps2, g6_calls, n_loc,
n_act, ind_act,
x_act, v_act,
pot_act_tmp,
a_act_tmp,
adot_act_tmp,
h2_i);
calc_self_grav(time_cur, n_act, ind_act, x, v,
pot_act_tmp, a_act_tmp, adot_act_tmp);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
@ -1119,7 +1052,7 @@ int main(int argc, char *argv[])
}
}
calc_ext_grav(external_gravity_components, n_act, x_act, v_act, pot_ext, a, adot);
calc_ext_grav(external_gravity_components, n_act, x, v, pot_ext, a, adot);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
@ -1129,7 +1062,7 @@ int main(int argc, char *argv[])
/* Energy control... */
if (myRank == rootRank) {
energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
} /* if (myRank == rootRank) */
#ifdef ETICS
@ -1216,8 +1149,6 @@ int main(int argc, char *argv[])
n_act_distr[i-1] = 0.0;
}
g6_calls = 0.0; //TODO this should include the calls at the zeroth step, so move it further up.
#ifdef TIMING
DT_TOT = 0.0;
@ -1345,13 +1276,16 @@ int main(int argc, char *argv[])
DT_ACT_PRED += (CPU_tmp_user - CPU_tmp_user0);
#endif
calc_self_grav(min_t, eps2, g6_calls, n_loc,
n_act, ind_act,
x_act_new, v_act_new,
pot_act_tmp,
a_act_tmp,
adot_act_tmp,
h2_i);
#ifdef TIMING
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
#endif
calc_self_grav(min_t, n_act, ind_act, x_act_new, v_act_new,
pot_act_tmp, a_act_tmp, adot_act_tmp);
#ifdef TIMING
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
DT_ACT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
#endif
/* Reduce the "global" vectors from "local" on all the nodes */
@ -1527,7 +1461,7 @@ int main(int argc, char *argv[])
if (time_cur >= t_contr) {
if (myRank == rootRank) {
energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
/* write cont data */
@ -1600,7 +1534,8 @@ int main(int argc, char *argv[])
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
MPI_Reduce(&g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
double g6_calls_sum;
MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
/* Wait to all processors to finish his works... */
MPI_Barrier(MPI_COMM_WORLD);
@ -1610,7 +1545,7 @@ int main(int argc, char *argv[])
/* Write some output for the timestep annalize... */
printf("\n");
printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls);
printf("timesteps = %.0f Total sum of integrated part. = %.0f g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum);
printf("\n");
printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09);
fflush(stdout);