Now fast active search works, and can be controlled from config file
This commit is contained in:
parent
2f8f8c582c
commit
1a438449a8
4 changed files with 44 additions and 84 deletions
|
|
@ -233,6 +233,7 @@ Config::Config(std::string file_name)
|
|||
grapite_mask_file_name = get_parameter<std::string>(dictionary, "grapite_mask_file_name", "grapite.mask");
|
||||
etics_dump_coeffs = get_parameter<bool>(dictionary, "etics_dump_coeffs", false);
|
||||
grapite_active_search = get_parameter<bool>(dictionary, "grapite_active_search", false);
|
||||
grapite_dev_exec_threshold = get_parameter<int>(dictionary, "grapite_dev_exec_threshold", 32);
|
||||
#endif
|
||||
|
||||
error_checking();
|
||||
|
|
|
|||
1
config.h
1
config.h
|
|
@ -57,6 +57,7 @@ public:
|
|||
std::string grapite_mask_file_name;
|
||||
bool etics_dump_coeffs;
|
||||
bool grapite_active_search;
|
||||
int grapite_dev_exec_threshold;
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ eta = 0.01
|
|||
# processes on a machine with a single device, set the value to 1 and use the
|
||||
# mpirun utility (or whatever is used in your job scheduler) to launch as many
|
||||
# processes as you like.
|
||||
devices_per_node = 1
|
||||
#devices_per_node = 1
|
||||
|
||||
|
||||
##########
|
||||
|
|
@ -168,7 +168,7 @@ devices_per_node = 1
|
|||
# always included.
|
||||
#pn_usage = {1, 1, 1, 1, 0, 0, 0}
|
||||
|
||||
# The speed of light in N-body units [default: 500]
|
||||
# The speed of light in N-body units
|
||||
#pn_c = 477.12
|
||||
|
||||
# The spin vectors of the two SMBHs. Only define these if the last component of
|
||||
|
|
@ -188,22 +188,26 @@ devices_per_node = 1
|
|||
dt_scf = 0.015625
|
||||
|
||||
# Name of the mask file for GRAPite [default: grapite.mask]
|
||||
grapite_mask_file_name = grapite.mask
|
||||
#grapite_mask_file_name = grapite.mask
|
||||
|
||||
# Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false]
|
||||
etics_dump_coeffs = true
|
||||
#etics_dump_coeffs = true
|
||||
|
||||
# Whether to use an alternative procedure for active particle search that is
|
||||
# available in the GRAPite library. This requires the number of particles in
|
||||
# each MPI process to be exactly divisible by 32. This can substantially
|
||||
# accelerate the calculation in some circumstances [default: false]
|
||||
grapite_active_search = true
|
||||
#grapite_active_search = true
|
||||
|
||||
# If the number of active particles in a particular bunch is bigger than this
|
||||
# threshold, then the execution is on the GPU, otherwise on the CPU. When the
|
||||
# active bunch is small, the overhead of calculating the SCF gravity on the GPU
|
||||
# makes the operation more expensive than if it is done on the CPU. [default: 32]
|
||||
#grapite_dev_exec_threshold = 512
|
||||
|
||||
# TODO
|
||||
########
|
||||
# etics dump mode
|
||||
# threshold for execution on device for grapite
|
||||
# scaling parameter override
|
||||
|
||||
|
||||
|
|
|
|||
94
phigrape.cpp
94
phigrape.cpp
|
|
@ -82,9 +82,7 @@ Last redaction : 2019.04.16 12:55
|
|||
|
||||
#ifdef ETICS
|
||||
#include "grapite.h"
|
||||
//#define ACT_DEF_GRAPITE
|
||||
#endif
|
||||
const bool act_def_grapite = true;
|
||||
|
||||
Config *config;
|
||||
|
||||
|
|
@ -250,10 +248,12 @@ public:
|
|||
double get_minimum_time(const double t[], const double dt[])
|
||||
{
|
||||
double min_t_loc, min_t;
|
||||
if (act_def_grapite) {
|
||||
#ifdef ETICS
|
||||
if (config->grapite_active_search) {
|
||||
min_t_loc = grapite_get_minimum_time();
|
||||
printf("gggggggggggg min_t_loc=%.10e\n", min_t_loc);
|
||||
} else {
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc];
|
||||
for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) {
|
||||
double tmp = t[j] + dt[j];
|
||||
|
|
@ -264,10 +264,10 @@ public:
|
|||
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
||||
return min_t;
|
||||
}
|
||||
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int *n_act)
|
||||
#warning refrence not pointer
|
||||
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int& n_act)
|
||||
{
|
||||
if (act_def_grapite) {
|
||||
#ifdef ETICS
|
||||
if (config->grapite_active_search) {
|
||||
int n_act_loc;
|
||||
grapite_active_search(min_t, ind_act_loc, &n_act_loc);
|
||||
if (myRank > 0)
|
||||
|
|
@ -275,17 +275,19 @@ public:
|
|||
ind_act_loc[i] += myRank*n_loc;
|
||||
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
|
||||
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
|
||||
*n_act = n_act_arr[0];
|
||||
n_act = n_act_arr[0];
|
||||
for (int i=1; i<n_proc; i++)
|
||||
*n_act += n_act_arr[i];
|
||||
n_act += n_act_arr[i];
|
||||
displs[0] = 0;
|
||||
for (int i=1; i<n_proc; i++)
|
||||
displs[i]=displs[i-1]+n_act_arr[i-1];
|
||||
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
|
||||
} else {
|
||||
*n_act = 0;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
n_act = 0;
|
||||
for (int i=0; i<N; i++) {
|
||||
if (t[i]+dt[i] == min_t) ind_act[(*n_act)++] = i;
|
||||
if (t[i]+dt[i] == min_t) ind_act[n_act++] = i;
|
||||
} /* i */
|
||||
}
|
||||
}
|
||||
|
|
@ -334,8 +336,6 @@ int main(int argc, char *argv[])
|
|||
|
||||
double3 xcm, vcm, xdc, vdc; // these should go away
|
||||
|
||||
int i_bh1, i_bh2;
|
||||
|
||||
double3 x_bbhc, v_bbhc;
|
||||
|
||||
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
|
||||
|
|
@ -502,6 +502,10 @@ int main(int argc, char *argv[])
|
|||
g6_set_tunit(51);
|
||||
g6_set_xunit(51);
|
||||
|
||||
#ifdef ETICS
|
||||
grapite_set_dev_exec_threshold(config->grapite_dev_exec_threshold);
|
||||
#endif
|
||||
|
||||
int n_loc = N/n_proc;
|
||||
Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
|
||||
Active_search active_search(myRank, n_proc, n_loc, N);
|
||||
|
|
@ -674,7 +678,6 @@ int main(int argc, char *argv[])
|
|||
#endif
|
||||
|
||||
double min_t = active_search.get_minimum_time(t, dt);
|
||||
printf("zzzzzzzzzzzzzzzzz %.10e\n", min_t);
|
||||
|
||||
#ifdef TIMING
|
||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
||||
|
|
@ -685,17 +688,12 @@ int main(int argc, char *argv[])
|
|||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
||||
#endif
|
||||
|
||||
active_search.get_active_indices(min_t, t, dt, ind_act, &n_act);
|
||||
active_search.get_active_indices(min_t, t, dt, ind_act, n_act);
|
||||
|
||||
// TODO deal with it below
|
||||
// #ifdef ACT_DEF_GRAPITE
|
||||
// #error please fix here
|
||||
// #endif
|
||||
|
||||
static int printouts = 0;
|
||||
int i_bh1=0, i_bh2=1;
|
||||
#ifdef ETICS
|
||||
int n_bh = config->live_smbh_count;
|
||||
if (n_bh>0) {
|
||||
if (act_def_grapite) {
|
||||
if (config->grapite_active_search && (n_bh>0)) {
|
||||
int act_def_grapite_bh_count = 0;
|
||||
int i_bh[n_bh];
|
||||
for (int i=0; i<n_act; i++) {
|
||||
|
|
@ -706,54 +704,10 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
i_bh1 = i_bh[0];
|
||||
if (n_bh == 2) i_bh2 = i_bh[1];
|
||||
} else {
|
||||
i_bh1 == 0;
|
||||
if (n_bh == 2) i_bh2 = 1;
|
||||
}
|
||||
}
|
||||
printf("previously got i_bh1=%d and i_bh2=%d\n", i_bh1, i_bh2);
|
||||
for (int i=0; i<n_act; i++) {
|
||||
if (ind_act[i]==0) i_bh1=i;
|
||||
if (ind_act[i]==1) i_bh2=i;
|
||||
}
|
||||
printf("now finding i_bh1=%d and i_bh2=%d\n", i_bh1, i_bh2);
|
||||
if (++printouts >= 10) return 0;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// #if defined(ACT_DEF_GRAPITE) && (defined(ADD_BH1) || defined(ADD_BH2))
|
||||
|
||||
// #ifdef ADD_BH1
|
||||
// #define ACT_DEF_GRAPITE_NUMBH 1
|
||||
// #else
|
||||
// #define ACT_DEF_GRAPITE_NUMBH 2
|
||||
// #endif
|
||||
|
||||
// int act_def_grapite_bh_count = 0;
|
||||
// for (i=0; i<n_act; i++) {
|
||||
// if (ind_act[i]==0) {
|
||||
// i_bh1 = i;
|
||||
// act_def_grapite_bh_count++;
|
||||
// }
|
||||
// #ifdef ADD_BH2
|
||||
// else if (ind_act[i]==1) {
|
||||
// i_bh2 = i;
|
||||
// act_def_grapite_bh_count++;
|
||||
// }
|
||||
// #endif
|
||||
// if (act_def_grapite_bh_count==config->live_smbh_count) break;
|
||||
// }
|
||||
// if (i==n_act) {
|
||||
// fprintf(stderr, "ERROR: black holes were not found in the active particle list");
|
||||
// return -1;
|
||||
// }
|
||||
// #else
|
||||
// if (config->live_smbh_count > 0) {
|
||||
// i_bh1 = 0;
|
||||
// i_bh2 = 1;
|
||||
// }
|
||||
// #endif
|
||||
|
||||
#ifdef TIMING
|
||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
||||
DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue