Now fast active search works, and can be controlled from config file

This commit is contained in:
Yohai Meiron 2020-05-01 20:41:12 -04:00
parent 2f8f8c582c
commit 1a438449a8
4 changed files with 44 additions and 84 deletions

View file

@ -233,6 +233,7 @@ Config::Config(std::string file_name)
grapite_mask_file_name = get_parameter<std::string>(dictionary, "grapite_mask_file_name", "grapite.mask");
etics_dump_coeffs = get_parameter<bool>(dictionary, "etics_dump_coeffs", false);
grapite_active_search = get_parameter<bool>(dictionary, "grapite_active_search", false);
grapite_dev_exec_threshold = get_parameter<int>(dictionary, "grapite_dev_exec_threshold", 32);
#endif
error_checking();

View file

@ -57,6 +57,7 @@ public:
std::string grapite_mask_file_name;
bool etics_dump_coeffs;
bool grapite_active_search;
int grapite_dev_exec_threshold;
#endif
private:

View file

@ -31,7 +31,7 @@ eta = 0.01
# processes on a machine with a single device, set the value to 1 and use the
# mpirun utility (or whatever is used in your job scheduler) to launch as many
# processes as you like.
devices_per_node = 1
#devices_per_node = 1
##########
@ -168,7 +168,7 @@ devices_per_node = 1
# always included.
#pn_usage = {1, 1, 1, 1, 0, 0, 0}
# The speed of light in N-body units [default: 500]
# The speed of light in N-body units
#pn_c = 477.12
# The spin vectors of the two SMBHs. Only define these if the last component of
@ -188,22 +188,26 @@ devices_per_node = 1
dt_scf = 0.015625
# Name of the mask file for GRAPite [default: grapite.mask]
grapite_mask_file_name = grapite.mask
#grapite_mask_file_name = grapite.mask
# Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false]
etics_dump_coeffs = true
#etics_dump_coeffs = true
# Whether to use an alternative procedure for active particle search that is
# available in the GRAPite library. This requires the number of particles in
# each MPI process to be exactly divisible by 32. This can substantially
# accelerate the calculation in some circumstances [default: false]
grapite_active_search = true
#grapite_active_search = true
# If the number of active particles in a particular bunch is bigger than this
# threshold, then the execution is on the GPU, otherwise on the CPU. When the
# active bunch is small, the overhead of calculating the SCF gravity on the GPU
# makes the operation more expensive than if it is done on the CPU. [default: 32]
#grapite_dev_exec_threshold = 512
# TODO
########
# etics dump mode
# threshold for execution on device for grapite
# scaling parameter override

View file

@ -82,9 +82,7 @@ Last redaction : 2019.04.16 12:55
#ifdef ETICS
#include "grapite.h"
//#define ACT_DEF_GRAPITE
#endif
const bool act_def_grapite = true;
Config *config;
@ -250,10 +248,12 @@ public:
double get_minimum_time(const double t[], const double dt[])
{
double min_t_loc, min_t;
if (act_def_grapite) {
#ifdef ETICS
if (config->grapite_active_search) {
min_t_loc = grapite_get_minimum_time();
printf("gggggggggggg min_t_loc=%.10e\n", min_t_loc);
} else {
} else
#endif
{
min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc];
for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) {
double tmp = t[j] + dt[j];
@ -264,10 +264,10 @@ public:
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
return min_t;
}
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int *n_act)
#warning refrence not pointer
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int& n_act)
{
if (act_def_grapite) {
#ifdef ETICS
if (config->grapite_active_search) {
int n_act_loc;
grapite_active_search(min_t, ind_act_loc, &n_act_loc);
if (myRank > 0)
@ -275,17 +275,19 @@ public:
ind_act_loc[i] += myRank*n_loc;
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
*n_act = n_act_arr[0];
n_act = n_act_arr[0];
for (int i=1; i<n_proc; i++)
*n_act += n_act_arr[i];
n_act += n_act_arr[i];
displs[0] = 0;
for (int i=1; i<n_proc; i++)
displs[i]=displs[i-1]+n_act_arr[i-1];
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
} else {
*n_act = 0;
} else
#endif
{
n_act = 0;
for (int i=0; i<N; i++) {
if (t[i]+dt[i] == min_t) ind_act[(*n_act)++] = i;
if (t[i]+dt[i] == min_t) ind_act[n_act++] = i;
} /* i */
}
}
@ -334,8 +336,6 @@ int main(int argc, char *argv[])
double3 xcm, vcm, xdc, vdc; // these should go away
int i_bh1, i_bh2;
double3 x_bbhc, v_bbhc;
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
@ -502,6 +502,10 @@ int main(int argc, char *argv[])
g6_set_tunit(51);
g6_set_xunit(51);
#ifdef ETICS
grapite_set_dev_exec_threshold(config->grapite_dev_exec_threshold);
#endif
int n_loc = N/n_proc;
Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
Active_search active_search(myRank, n_proc, n_loc, N);
@ -674,7 +678,6 @@ int main(int argc, char *argv[])
#endif
double min_t = active_search.get_minimum_time(t, dt);
printf("zzzzzzzzzzzzzzzzz %.10e\n", min_t);
#ifdef TIMING
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
@ -685,17 +688,12 @@ int main(int argc, char *argv[])
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
#endif
active_search.get_active_indices(min_t, t, dt, ind_act, &n_act);
active_search.get_active_indices(min_t, t, dt, ind_act, n_act);
// TODO deal with it below
// #ifdef ACT_DEF_GRAPITE
// #error please fix here
// #endif
static int printouts = 0;
int i_bh1=0, i_bh2=1;
#ifdef ETICS
int n_bh = config->live_smbh_count;
if (n_bh>0) {
if (act_def_grapite) {
if (config->grapite_active_search && (n_bh>0)) {
int act_def_grapite_bh_count = 0;
int i_bh[n_bh];
for (int i=0; i<n_act; i++) {
@ -706,54 +704,10 @@ int main(int argc, char *argv[])
}
i_bh1 = i_bh[0];
if (n_bh == 2) i_bh2 = i_bh[1];
} else {
i_bh1 == 0;
if (n_bh == 2) i_bh2 = 1;
}
}
printf("previously got i_bh1=%d and i_bh2=%d\n", i_bh1, i_bh2);
for (int i=0; i<n_act; i++) {
if (ind_act[i]==0) i_bh1=i;
if (ind_act[i]==1) i_bh2=i;
}
printf("now finding i_bh1=%d and i_bh2=%d\n", i_bh1, i_bh2);
if (++printouts >= 10) return 0;
#endif
// #if defined(ACT_DEF_GRAPITE) && (defined(ADD_BH1) || defined(ADD_BH2))
// #ifdef ADD_BH1
// #define ACT_DEF_GRAPITE_NUMBH 1
// #else
// #define ACT_DEF_GRAPITE_NUMBH 2
// #endif
// int act_def_grapite_bh_count = 0;
// for (i=0; i<n_act; i++) {
// if (ind_act[i]==0) {
// i_bh1 = i;
// act_def_grapite_bh_count++;
// }
// #ifdef ADD_BH2
// else if (ind_act[i]==1) {
// i_bh2 = i;
// act_def_grapite_bh_count++;
// }
// #endif
// if (act_def_grapite_bh_count==config->live_smbh_count) break;
// }
// if (i==n_act) {
// fprintf(stderr, "ERROR: black holes were not found in the active particle list");
// return -1;
// }
// #else
// if (config->live_smbh_count > 0) {
// i_bh1 = 0;
// i_bh2 = 1;
// }
// #endif
#ifdef TIMING
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);