Moved the active search outside of the main loop
This commit is contained in:
parent
c79cef895a
commit
ea94dbb626
1 changed files with 81 additions and 72 deletions
153
phigrape.cpp
153
phigrape.cpp
|
|
@ -529,6 +529,60 @@ void energy_contr(const double time_cur, const double timesteps, const double n_
|
|||
E_tot_corr_sd_0 = 0;
|
||||
}
|
||||
|
||||
class Active_search {
|
||||
// TODO you can add pointers to t and dt at the constructor, no point giving them at get_minimum_time but without the size.
|
||||
public:
|
||||
Active_search(const int myRank, const int n_proc, const int n_loc, const int N)
|
||||
: myRank(myRank), n_proc(n_proc), n_loc(n_loc), N(N)
|
||||
{
|
||||
ind_act_loc = new int[n_loc];
|
||||
}
|
||||
~Active_search() { delete[] ind_act_loc; };
|
||||
double get_minimum_time(const double t[], const double dt[])
|
||||
{
|
||||
double min_t_loc, min_t;
|
||||
#ifdef ACT_DEF_GRAPITE
|
||||
min_t_loc = grapite_get_minimum_time();
|
||||
#else
|
||||
min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc];
|
||||
for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) {
|
||||
double tmp = t[j] + dt[j];
|
||||
if (tmp < min_t_loc) min_t_loc = tmp;
|
||||
}
|
||||
#endif
|
||||
/* Reduce the "global" min_t from min_t_loc "local" on all processors) */
|
||||
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
||||
return min_t;
|
||||
}
|
||||
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int *n_act)
|
||||
{
|
||||
#ifdef ACT_DEF_GRAPITE
|
||||
int n_act_loc;
|
||||
grapite_active_search(min_t, ind_act_loc, &n_act_loc);
|
||||
if (myRank > 0)
|
||||
for (int i=0; i<n_act_loc; i++)
|
||||
ind_act_loc[i] += myRank*n_loc;
|
||||
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
|
||||
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
|
||||
*n_act = n_act_arr[0];
|
||||
for (int i=1; i<n_proc; i++)
|
||||
*n_act += n_act_arr[i];
|
||||
displs[0] = 0;
|
||||
for (int i=1; i<n_proc; i++)
|
||||
displs[i]=displs[i-1]+n_act_arr[i-1];
|
||||
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
|
||||
#else
|
||||
*n_act = 0;
|
||||
for (int i=0; i<N; i++) {
|
||||
if (t[i]+dt[i] == min_t) ind_act[(*n_act)++] = i;
|
||||
} /* i */
|
||||
#endif // ACT_DEF_GRAPITE
|
||||
}
|
||||
private:
|
||||
int myRank, n_proc, n_loc, N;
|
||||
int *ind_act_loc;
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int name_proc, n_proc=1, myRank=0, rootRank=0, cur_rank,
|
||||
|
|
@ -601,7 +655,7 @@ int main(int argc, char *argv[])
|
|||
|
||||
double s_bh1[3] = {0.0, 0.0, 1.0};
|
||||
double s_bh2[3] = {0.0, 0.0, 1.0};
|
||||
|
||||
|
||||
|
||||
/* INIT the rand() !!! */
|
||||
srand(19640916); /* it is just my birthday :-) */
|
||||
|
|
@ -780,6 +834,8 @@ int main(int argc, char *argv[])
|
|||
|
||||
n_loc = N/n_proc;
|
||||
|
||||
Active_search active_search(myRank, n_proc, n_loc, N);
|
||||
|
||||
/* Wait to all processors to finish his works... */
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
|
|
@ -1145,26 +1201,7 @@ int main(int argc, char *argv[])
|
|||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
||||
#endif
|
||||
|
||||
#ifdef ACT_DEF_GRAPITE
|
||||
min_t_loc = grapite_get_minimum_time();
|
||||
#else
|
||||
min_t_loc = t[0]+dt[0];
|
||||
|
||||
for (int j=0; j<n_loc; j++) {
|
||||
jjj = j + myRank*n_loc;
|
||||
tmp = t[jjj] + dt[jjj];
|
||||
if (tmp < min_t_loc) min_t_loc = tmp;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Wait to all processors to finish his works... */
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
/* Reduce the "global" min_t from min_t_loc "local" on all processors) */
|
||||
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
||||
|
||||
/* Wait to all processors to finish his works... */
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
min_t = active_search.get_minimum_time(t, dt);
|
||||
|
||||
#ifdef TIMING
|
||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
||||
|
|
@ -1175,31 +1212,7 @@ int main(int argc, char *argv[])
|
|||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
||||
#endif
|
||||
|
||||
#ifdef ACT_DEF_GRAPITE
|
||||
int ind_act_loc[N_MAX], n_act_loc;
|
||||
grapite_active_search(min_t, ind_act_loc, &n_act_loc);
|
||||
if (myRank > 0)
|
||||
for (int i=0; i<n_act_loc; i++)
|
||||
ind_act_loc[i] += myRank*n_loc;
|
||||
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
|
||||
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
|
||||
n_act = n_act_arr[0];
|
||||
for (int i=1; i<n_proc; i++)
|
||||
n_act += n_act_arr[i];
|
||||
displs[0] = 0;
|
||||
for (int i=1; i<n_proc; i++)
|
||||
displs[i]=displs[i-1]+n_act_arr[i-1];
|
||||
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
|
||||
#else
|
||||
n_act = 0;
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (t[i]+dt[i] == min_t ) {
|
||||
ind_act[n_act] = i;
|
||||
n_act++;
|
||||
}
|
||||
} /* i */
|
||||
#endif // ACT_DEF_GRAPITE
|
||||
active_search.get_active_indices(min_t, t, dt, ind_act, &n_act);
|
||||
|
||||
// TODO deal with it below
|
||||
#ifdef ACT_DEF_GRAPITE
|
||||
|
|
@ -1245,23 +1258,17 @@ int main(int argc, char *argv[])
|
|||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
||||
#endif
|
||||
for (int i=0; i<n_act; i++) {
|
||||
iii = ind_act[i];
|
||||
|
||||
m_act[i] = m[iii];
|
||||
|
||||
x_act[i] = x[iii];
|
||||
v_act[i] = v[iii];
|
||||
|
||||
t_act[i] = t[iii];
|
||||
dt_act[i] = dt[iii];
|
||||
|
||||
pot_act[i] = pot[iii];
|
||||
|
||||
pot_act_ext[i] = pot_ext[iii];
|
||||
|
||||
a_act[i] = a[iii];
|
||||
adot_act[i] = adot[iii];
|
||||
|
||||
int j_act = ind_act[i];
|
||||
m_act[i] = m[j_act];
|
||||
x_act[i] = x[j_act];
|
||||
v_act[i] = v[j_act];
|
||||
t_act[i] = t[j_act];
|
||||
dt_act[i] = dt[j_act];
|
||||
// NOTICE Why do we need pot_act and pot_act_ext? Probably redundant.
|
||||
pot_act[i] = pot[j_act];
|
||||
pot_act_ext[i] = pot_ext[j_act];
|
||||
a_act[i] = a[j_act];
|
||||
adot_act[i] = adot[j_act];
|
||||
} /* i */
|
||||
|
||||
#ifdef TIMING
|
||||
|
|
@ -1303,7 +1310,6 @@ int main(int argc, char *argv[])
|
|||
#endif
|
||||
|
||||
MPI_Allreduce(pot_act_tmp, pot_act_new, n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
MPI_Allreduce(a_act_tmp, a_act_new, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(adot_act_tmp, adot_act_new, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
|
|
@ -1397,6 +1403,9 @@ int main(int argc, char *argv[])
|
|||
#endif
|
||||
|
||||
for (int i=0; i<n_act; i++) {
|
||||
// NOTICE looks like we're doing three unrelated things in this loop: (1) correcting positions and velocities (2) calculating new steps, and (3) putting the corrected values from the _act_new back in the _act arrays.
|
||||
// After going back to the _act arrays they don't do much before they go back to the main arrays, so this copy seems redundant (the SMBH influence sphere printout needs these values but it should be a function anyway).
|
||||
// TODO split this loop into the three tasks it is doing, and remove the redundancy.
|
||||
dt_tmp = min_t - t_act[i];
|
||||
|
||||
dt3over6 = dt_tmp*dt_tmp*dt_tmp/6.0;
|
||||
|
|
@ -1573,25 +1582,25 @@ int main(int argc, char *argv[])
|
|||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
||||
#endif
|
||||
|
||||
for (int j=0; j<n_act; j++) { // TODO would be nicer to use i instead of j here
|
||||
for (int i=0; i<n_act; i++) { // TODO would be nicer to use i instead of j here
|
||||
#ifdef ETICS_CEP
|
||||
if (ind_act[j] == grapite_cep_index) grapite_update_cep(t_act[j], x_act[j], v_act[j], a_act[j], adot_act[j]); // All ranks should do it.
|
||||
if (ind_act[i] == grapite_cep_index) grapite_update_cep(t_act[i], x_act[i], v_act[i], a_act[i], adot_act[i]); // All ranks should do it.
|
||||
#endif
|
||||
cur_rank = ind_act[j]/n_loc;
|
||||
cur_rank = ind_act[i]/n_loc;
|
||||
|
||||
if (myRank == cur_rank) {
|
||||
|
||||
jjj = ind_act[j] - myRank*n_loc;
|
||||
jjj = ind_act[i] - myRank*n_loc;
|
||||
|
||||
a2by18 = {0, 0, 0};
|
||||
a1by6 = adot_act[j]*(1./6.);
|
||||
aby2 = a_act[j]*0.5;
|
||||
a1by6 = adot_act[i]*(1./6.);
|
||||
aby2 = a_act[i]*0.5;
|
||||
|
||||
g6_set_j_particle(clusterid, jjj, ind_act[j], t_act[j], dt_act[j], m_act[j], a2by18, a1by6, aby2, v_act[j], x_act[j]);
|
||||
g6_set_j_particle(clusterid, jjj, ind_act[i], t_act[i], dt_act[i], m_act[i], a2by18, a1by6, aby2, v_act[i], x_act[i]);
|
||||
|
||||
} /* if (myRank == cur_rank) */
|
||||
|
||||
} /* j */
|
||||
} /* i */
|
||||
|
||||
#ifdef TIMING
|
||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue