Now fast active search works, and can be controlled from config file

This commit is contained in:
Yohai Meiron 2020-05-01 20:41:12 -04:00
parent 2f8f8c582c
commit 1a438449a8
4 changed files with 44 additions and 84 deletions

View file

@ -233,6 +233,7 @@ Config::Config(std::string file_name)
grapite_mask_file_name = get_parameter<std::string>(dictionary, "grapite_mask_file_name", "grapite.mask"); grapite_mask_file_name = get_parameter<std::string>(dictionary, "grapite_mask_file_name", "grapite.mask");
etics_dump_coeffs = get_parameter<bool>(dictionary, "etics_dump_coeffs", false); etics_dump_coeffs = get_parameter<bool>(dictionary, "etics_dump_coeffs", false);
grapite_active_search = get_parameter<bool>(dictionary, "grapite_active_search", false); grapite_active_search = get_parameter<bool>(dictionary, "grapite_active_search", false);
grapite_dev_exec_threshold = get_parameter<int>(dictionary, "grapite_dev_exec_threshold", 32);
#endif #endif
error_checking(); error_checking();

View file

@ -57,6 +57,7 @@ public:
std::string grapite_mask_file_name; std::string grapite_mask_file_name;
bool etics_dump_coeffs; bool etics_dump_coeffs;
bool grapite_active_search; bool grapite_active_search;
int grapite_dev_exec_threshold;
#endif #endif
private: private:

View file

@ -31,7 +31,7 @@ eta = 0.01
# processes on a machine with a single device, set the value to 1 and use the # processes on a machine with a single device, set the value to 1 and use the
# mpirun utility (or whatever is used in your job scheduler) to launch as many # mpirun utility (or whatever is used in your job scheduler) to launch as many
# processes as you like. # processes as you like.
devices_per_node = 1 #devices_per_node = 1
########## ##########
@ -168,7 +168,7 @@ devices_per_node = 1
# always included. # always included.
#pn_usage = {1, 1, 1, 1, 0, 0, 0} #pn_usage = {1, 1, 1, 1, 0, 0, 0}
# The speed of light in N-body units [default: 500] # The speed of light in N-body units
#pn_c = 477.12 #pn_c = 477.12
# The spin vectors of the two SMBHs. Only define these if the last component of # The spin vectors of the two SMBHs. Only define these if the last component of
@ -188,22 +188,26 @@ devices_per_node = 1
dt_scf = 0.015625 dt_scf = 0.015625
# Name of the mask file for GRAPite [default: grapite.mask] # Name of the mask file for GRAPite [default: grapite.mask]
grapite_mask_file_name = grapite.mask #grapite_mask_file_name = grapite.mask
# Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false] # Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false]
etics_dump_coeffs = true #etics_dump_coeffs = true
# Whether to use an alternative procedure for active particle search that is # Whether to use an alternative procedure for active particle search that is
# available in the GRAPite library. This requires the number of particles in # available in the GRAPite library. This requires the number of particles in
# each MPI process to be exactly divisible by 32. This can substantially # each MPI process to be exactly divisible by 32. This can substantially
# accelerate the calculation in some circumstances [default: false] # accelerate the calculation in some circumstances [default: false]
grapite_active_search = true #grapite_active_search = true
# If the number of active particles in a particular bunch is bigger than this
# threshold, then the execution is on the GPU, otherwise on the CPU. When the
# active bunch is small, the overhead of calculating the SCF gravity on the GPU
# makes the operation more expensive than if it is done on the CPU. [default: 32]
#grapite_dev_exec_threshold = 512
# TODO # TODO
######## ########
# etics dump mode # etics dump mode
# threshold for execution on device for grapite
# scaling parameter override # scaling parameter override

View file

@ -82,9 +82,7 @@ Last redaction : 2019.04.16 12:55
#ifdef ETICS #ifdef ETICS
#include "grapite.h" #include "grapite.h"
//#define ACT_DEF_GRAPITE
#endif #endif
const bool act_def_grapite = true;
Config *config; Config *config;
@ -250,10 +248,12 @@ public:
double get_minimum_time(const double t[], const double dt[]) double get_minimum_time(const double t[], const double dt[])
{ {
double min_t_loc, min_t; double min_t_loc, min_t;
if (act_def_grapite) { #ifdef ETICS
if (config->grapite_active_search) {
min_t_loc = grapite_get_minimum_time(); min_t_loc = grapite_get_minimum_time();
printf("gggggggggggg min_t_loc=%.10e\n", min_t_loc); } else
} else { #endif
{
min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc]; min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc];
for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) { for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) {
double tmp = t[j] + dt[j]; double tmp = t[j] + dt[j];
@ -264,10 +264,10 @@ public:
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
return min_t; return min_t;
} }
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int *n_act) void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int& n_act)
#warning refrence not pointer
{ {
if (act_def_grapite) { #ifdef ETICS
if (config->grapite_active_search) {
int n_act_loc; int n_act_loc;
grapite_active_search(min_t, ind_act_loc, &n_act_loc); grapite_active_search(min_t, ind_act_loc, &n_act_loc);
if (myRank > 0) if (myRank > 0)
@ -275,17 +275,19 @@ public:
ind_act_loc[i] += myRank*n_loc; ind_act_loc[i] += myRank*n_loc;
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe. int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
*n_act = n_act_arr[0]; n_act = n_act_arr[0];
for (int i=1; i<n_proc; i++) for (int i=1; i<n_proc; i++)
*n_act += n_act_arr[i]; n_act += n_act_arr[i];
displs[0] = 0; displs[0] = 0;
for (int i=1; i<n_proc; i++) for (int i=1; i<n_proc; i++)
displs[i]=displs[i-1]+n_act_arr[i-1]; displs[i]=displs[i-1]+n_act_arr[i-1];
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD); MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
} else { } else
*n_act = 0; #endif
{
n_act = 0;
for (int i=0; i<N; i++) { for (int i=0; i<N; i++) {
if (t[i]+dt[i] == min_t) ind_act[(*n_act)++] = i; if (t[i]+dt[i] == min_t) ind_act[n_act++] = i;
} /* i */ } /* i */
} }
} }
@ -334,8 +336,6 @@ int main(int argc, char *argv[])
double3 xcm, vcm, xdc, vdc; // these should go away double3 xcm, vcm, xdc, vdc; // these should go away
int i_bh1, i_bh2;
double3 x_bbhc, v_bbhc; double3 x_bbhc, v_bbhc;
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface. double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
@ -502,6 +502,10 @@ int main(int argc, char *argv[])
g6_set_tunit(51); g6_set_tunit(51);
g6_set_xunit(51); g6_set_xunit(51);
#ifdef ETICS
grapite_set_dev_exec_threshold(config->grapite_dev_exec_threshold);
#endif
int n_loc = N/n_proc; int n_loc = N/n_proc;
Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps); Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
Active_search active_search(myRank, n_proc, n_loc, N); Active_search active_search(myRank, n_proc, n_loc, N);
@ -674,7 +678,6 @@ int main(int argc, char *argv[])
#endif #endif
double min_t = active_search.get_minimum_time(t, dt); double min_t = active_search.get_minimum_time(t, dt);
printf("zzzzzzzzzzzzzzzzz %.10e\n", min_t);
#ifdef TIMING #ifdef TIMING
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
@ -685,75 +688,26 @@ int main(int argc, char *argv[])
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
#endif #endif
active_search.get_active_indices(min_t, t, dt, ind_act, &n_act); active_search.get_active_indices(min_t, t, dt, ind_act, n_act);
// TODO deal with it below int i_bh1=0, i_bh2=1;
// #ifdef ACT_DEF_GRAPITE #ifdef ETICS
// #error please fix here
// #endif
static int printouts = 0;
int n_bh = config->live_smbh_count; int n_bh = config->live_smbh_count;
if (n_bh>0) { if (config->grapite_active_search && (n_bh>0)) {
if (act_def_grapite) { int act_def_grapite_bh_count = 0;
int act_def_grapite_bh_count = 0; int i_bh[n_bh];
int i_bh[n_bh]; for (int i=0; i<n_act; i++) {
for (int i=0; i<n_act; i++) { if (ind_act[i]<n_bh) {
if (ind_act[i]<n_bh) { i_bh[ind_act[i]] = i;
i_bh[ind_act[i]] = i; if (++act_def_grapite_bh_count == n_bh) break;
if (++act_def_grapite_bh_count == n_bh) break;
}
} }
i_bh1 = i_bh[0];
if (n_bh == 2) i_bh2 = i_bh[1];
} else {
i_bh1 == 0;
if (n_bh == 2) i_bh2 = 1;
} }
i_bh1 = i_bh[0];
if (n_bh == 2) i_bh2 = i_bh[1];
} }
printf("previously got i_bh1=%d and i_bh2=%d\n", i_bh1, i_bh2); #endif
for (int i=0; i<n_act; i++) {
if (ind_act[i]==0) i_bh1=i;
if (ind_act[i]==1) i_bh2=i;
}
printf("now finding i_bh1=%d and i_bh2=%d\n", i_bh1, i_bh2);
if (++printouts >= 10) return 0;
// #if defined(ACT_DEF_GRAPITE) && (defined(ADD_BH1) || defined(ADD_BH2))
// #ifdef ADD_BH1
// #define ACT_DEF_GRAPITE_NUMBH 1
// #else
// #define ACT_DEF_GRAPITE_NUMBH 2
// #endif
// int act_def_grapite_bh_count = 0;
// for (i=0; i<n_act; i++) {
// if (ind_act[i]==0) {
// i_bh1 = i;
// act_def_grapite_bh_count++;
// }
// #ifdef ADD_BH2
// else if (ind_act[i]==1) {
// i_bh2 = i;
// act_def_grapite_bh_count++;
// }
// #endif
// if (act_def_grapite_bh_count==config->live_smbh_count) break;
// }
// if (i==n_act) {
// fprintf(stderr, "ERROR: black holes were not found in the active particle list");
// return -1;
// }
// #else
// if (config->live_smbh_count > 0) {
// i_bh1 = 0;
// i_bh2 = 1;
// }
// #endif
#ifdef TIMING #ifdef TIMING
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0); DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);