Now fast active search works, and can be controlled from config file
This commit is contained in:
parent
2f8f8c582c
commit
1a438449a8
4 changed files with 44 additions and 84 deletions
|
|
@ -233,6 +233,7 @@ Config::Config(std::string file_name)
|
||||||
grapite_mask_file_name = get_parameter<std::string>(dictionary, "grapite_mask_file_name", "grapite.mask");
|
grapite_mask_file_name = get_parameter<std::string>(dictionary, "grapite_mask_file_name", "grapite.mask");
|
||||||
etics_dump_coeffs = get_parameter<bool>(dictionary, "etics_dump_coeffs", false);
|
etics_dump_coeffs = get_parameter<bool>(dictionary, "etics_dump_coeffs", false);
|
||||||
grapite_active_search = get_parameter<bool>(dictionary, "grapite_active_search", false);
|
grapite_active_search = get_parameter<bool>(dictionary, "grapite_active_search", false);
|
||||||
|
grapite_dev_exec_threshold = get_parameter<int>(dictionary, "grapite_dev_exec_threshold", 32);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
error_checking();
|
error_checking();
|
||||||
|
|
|
||||||
1
config.h
1
config.h
|
|
@ -57,6 +57,7 @@ public:
|
||||||
std::string grapite_mask_file_name;
|
std::string grapite_mask_file_name;
|
||||||
bool etics_dump_coeffs;
|
bool etics_dump_coeffs;
|
||||||
bool grapite_active_search;
|
bool grapite_active_search;
|
||||||
|
int grapite_dev_exec_threshold;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ eta = 0.01
|
||||||
# processes on a machine with a single device, set the value to 1 and use the
|
# processes on a machine with a single device, set the value to 1 and use the
|
||||||
# mpirun utility (or whatever is used in your job scheduler) to launch as many
|
# mpirun utility (or whatever is used in your job scheduler) to launch as many
|
||||||
# processes as you like.
|
# processes as you like.
|
||||||
devices_per_node = 1
|
#devices_per_node = 1
|
||||||
|
|
||||||
|
|
||||||
##########
|
##########
|
||||||
|
|
@ -168,7 +168,7 @@ devices_per_node = 1
|
||||||
# always included.
|
# always included.
|
||||||
#pn_usage = {1, 1, 1, 1, 0, 0, 0}
|
#pn_usage = {1, 1, 1, 1, 0, 0, 0}
|
||||||
|
|
||||||
# The speed of light in N-body units [default: 500]
|
# The speed of light in N-body units
|
||||||
#pn_c = 477.12
|
#pn_c = 477.12
|
||||||
|
|
||||||
# The spin vectors of the two SMBHs. Only define these if the last component of
|
# The spin vectors of the two SMBHs. Only define these if the last component of
|
||||||
|
|
@ -188,22 +188,26 @@ devices_per_node = 1
|
||||||
dt_scf = 0.015625
|
dt_scf = 0.015625
|
||||||
|
|
||||||
# Name of the mask file for GRAPite [default: grapite.mask]
|
# Name of the mask file for GRAPite [default: grapite.mask]
|
||||||
grapite_mask_file_name = grapite.mask
|
#grapite_mask_file_name = grapite.mask
|
||||||
|
|
||||||
# Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false]
|
# Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false]
|
||||||
etics_dump_coeffs = true
|
#etics_dump_coeffs = true
|
||||||
|
|
||||||
# Whether to use an alternative procedure for active particle search that is
|
# Whether to use an alternative procedure for active particle search that is
|
||||||
# available in the GRAPite library. This requires the number of particles in
|
# available in the GRAPite library. This requires the number of particles in
|
||||||
# each MPI process to be exactly divisible by 32. This can substantially
|
# each MPI process to be exactly divisible by 32. This can substantially
|
||||||
# accelerate the calculation in some circumstances [default: false]
|
# accelerate the calculation in some circumstances [default: false]
|
||||||
grapite_active_search = true
|
#grapite_active_search = true
|
||||||
|
|
||||||
|
# If the number of active particles in a particular bunch is bigger than this
|
||||||
|
# threshold, then the execution is on the GPU, otherwise on the CPU. When the
|
||||||
|
# active bunch is small, the overhead of calculating the SCF gravity on the GPU
|
||||||
|
# makes the operation more expensive than if it is done on the CPU. [default: 32]
|
||||||
|
#grapite_dev_exec_threshold = 512
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
########
|
########
|
||||||
# etics dump mode
|
# etics dump mode
|
||||||
# threshold for execution on device for grapite
|
|
||||||
# scaling parameter override
|
# scaling parameter override
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
94
phigrape.cpp
94
phigrape.cpp
|
|
@ -82,9 +82,7 @@ Last redaction : 2019.04.16 12:55
|
||||||
|
|
||||||
#ifdef ETICS
|
#ifdef ETICS
|
||||||
#include "grapite.h"
|
#include "grapite.h"
|
||||||
//#define ACT_DEF_GRAPITE
|
|
||||||
#endif
|
#endif
|
||||||
const bool act_def_grapite = true;
|
|
||||||
|
|
||||||
Config *config;
|
Config *config;
|
||||||
|
|
||||||
|
|
@ -250,10 +248,12 @@ public:
|
||||||
double get_minimum_time(const double t[], const double dt[])
|
double get_minimum_time(const double t[], const double dt[])
|
||||||
{
|
{
|
||||||
double min_t_loc, min_t;
|
double min_t_loc, min_t;
|
||||||
if (act_def_grapite) {
|
#ifdef ETICS
|
||||||
|
if (config->grapite_active_search) {
|
||||||
min_t_loc = grapite_get_minimum_time();
|
min_t_loc = grapite_get_minimum_time();
|
||||||
printf("gggggggggggg min_t_loc=%.10e\n", min_t_loc);
|
} else
|
||||||
} else {
|
#endif
|
||||||
|
{
|
||||||
min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc];
|
min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc];
|
||||||
for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) {
|
for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) {
|
||||||
double tmp = t[j] + dt[j];
|
double tmp = t[j] + dt[j];
|
||||||
|
|
@ -264,10 +264,10 @@ public:
|
||||||
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
||||||
return min_t;
|
return min_t;
|
||||||
}
|
}
|
||||||
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int *n_act)
|
void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int& n_act)
|
||||||
#warning refrence not pointer
|
|
||||||
{
|
{
|
||||||
if (act_def_grapite) {
|
#ifdef ETICS
|
||||||
|
if (config->grapite_active_search) {
|
||||||
int n_act_loc;
|
int n_act_loc;
|
||||||
grapite_active_search(min_t, ind_act_loc, &n_act_loc);
|
grapite_active_search(min_t, ind_act_loc, &n_act_loc);
|
||||||
if (myRank > 0)
|
if (myRank > 0)
|
||||||
|
|
@ -275,17 +275,19 @@ public:
|
||||||
ind_act_loc[i] += myRank*n_loc;
|
ind_act_loc[i] += myRank*n_loc;
|
||||||
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
|
int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe.
|
||||||
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
|
MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD);
|
||||||
*n_act = n_act_arr[0];
|
n_act = n_act_arr[0];
|
||||||
for (int i=1; i<n_proc; i++)
|
for (int i=1; i<n_proc; i++)
|
||||||
*n_act += n_act_arr[i];
|
n_act += n_act_arr[i];
|
||||||
displs[0] = 0;
|
displs[0] = 0;
|
||||||
for (int i=1; i<n_proc; i++)
|
for (int i=1; i<n_proc; i++)
|
||||||
displs[i]=displs[i-1]+n_act_arr[i-1];
|
displs[i]=displs[i-1]+n_act_arr[i-1];
|
||||||
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
|
MPI_Allgatherv(ind_act_loc, n_act_loc, MPI_INT, ind_act, n_act_arr, displs, MPI_INT, MPI_COMM_WORLD);
|
||||||
} else {
|
} else
|
||||||
*n_act = 0;
|
#endif
|
||||||
|
{
|
||||||
|
n_act = 0;
|
||||||
for (int i=0; i<N; i++) {
|
for (int i=0; i<N; i++) {
|
||||||
if (t[i]+dt[i] == min_t) ind_act[(*n_act)++] = i;
|
if (t[i]+dt[i] == min_t) ind_act[n_act++] = i;
|
||||||
} /* i */
|
} /* i */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -334,8 +336,6 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
double3 xcm, vcm, xdc, vdc; // these should go away
|
double3 xcm, vcm, xdc, vdc; // these should go away
|
||||||
|
|
||||||
int i_bh1, i_bh2;
|
|
||||||
|
|
||||||
double3 x_bbhc, v_bbhc;
|
double3 x_bbhc, v_bbhc;
|
||||||
|
|
||||||
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
|
double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
|
||||||
|
|
@ -502,6 +502,10 @@ int main(int argc, char *argv[])
|
||||||
g6_set_tunit(51);
|
g6_set_tunit(51);
|
||||||
g6_set_xunit(51);
|
g6_set_xunit(51);
|
||||||
|
|
||||||
|
#ifdef ETICS
|
||||||
|
grapite_set_dev_exec_threshold(config->grapite_dev_exec_threshold);
|
||||||
|
#endif
|
||||||
|
|
||||||
int n_loc = N/n_proc;
|
int n_loc = N/n_proc;
|
||||||
Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
|
Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
|
||||||
Active_search active_search(myRank, n_proc, n_loc, N);
|
Active_search active_search(myRank, n_proc, n_loc, N);
|
||||||
|
|
@ -674,7 +678,6 @@ int main(int argc, char *argv[])
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
double min_t = active_search.get_minimum_time(t, dt);
|
double min_t = active_search.get_minimum_time(t, dt);
|
||||||
printf("zzzzzzzzzzzzzzzzz %.10e\n", min_t);
|
|
||||||
|
|
||||||
#ifdef TIMING
|
#ifdef TIMING
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
||||||
|
|
@ -685,17 +688,12 @@ int main(int argc, char *argv[])
|
||||||
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
active_search.get_active_indices(min_t, t, dt, ind_act, &n_act);
|
active_search.get_active_indices(min_t, t, dt, ind_act, n_act);
|
||||||
|
|
||||||
// TODO deal with it below
|
int i_bh1=0, i_bh2=1;
|
||||||
// #ifdef ACT_DEF_GRAPITE
|
#ifdef ETICS
|
||||||
// #error please fix here
|
|
||||||
// #endif
|
|
||||||
|
|
||||||
static int printouts = 0;
|
|
||||||
int n_bh = config->live_smbh_count;
|
int n_bh = config->live_smbh_count;
|
||||||
if (n_bh>0) {
|
if (config->grapite_active_search && (n_bh>0)) {
|
||||||
if (act_def_grapite) {
|
|
||||||
int act_def_grapite_bh_count = 0;
|
int act_def_grapite_bh_count = 0;
|
||||||
int i_bh[n_bh];
|
int i_bh[n_bh];
|
||||||
for (int i=0; i<n_act; i++) {
|
for (int i=0; i<n_act; i++) {
|
||||||
|
|
@ -706,54 +704,10 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
i_bh1 = i_bh[0];
|
i_bh1 = i_bh[0];
|
||||||
if (n_bh == 2) i_bh2 = i_bh[1];
|
if (n_bh == 2) i_bh2 = i_bh[1];
|
||||||
} else {
|
|
||||||
i_bh1 == 0;
|
|
||||||
if (n_bh == 2) i_bh2 = 1;
|
|
||||||
}
|
}
|
||||||
}
|
#endif
|
||||||
printf("previously got i_bh1=%d and i_bh2=%d\n", i_bh1, i_bh2);
|
|
||||||
for (int i=0; i<n_act; i++) {
|
|
||||||
if (ind_act[i]==0) i_bh1=i;
|
|
||||||
if (ind_act[i]==1) i_bh2=i;
|
|
||||||
}
|
|
||||||
printf("now finding i_bh1=%d and i_bh2=%d\n", i_bh1, i_bh2);
|
|
||||||
if (++printouts >= 10) return 0;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// #if defined(ACT_DEF_GRAPITE) && (defined(ADD_BH1) || defined(ADD_BH2))
|
|
||||||
|
|
||||||
// #ifdef ADD_BH1
|
|
||||||
// #define ACT_DEF_GRAPITE_NUMBH 1
|
|
||||||
// #else
|
|
||||||
// #define ACT_DEF_GRAPITE_NUMBH 2
|
|
||||||
// #endif
|
|
||||||
|
|
||||||
// int act_def_grapite_bh_count = 0;
|
|
||||||
// for (i=0; i<n_act; i++) {
|
|
||||||
// if (ind_act[i]==0) {
|
|
||||||
// i_bh1 = i;
|
|
||||||
// act_def_grapite_bh_count++;
|
|
||||||
// }
|
|
||||||
// #ifdef ADD_BH2
|
|
||||||
// else if (ind_act[i]==1) {
|
|
||||||
// i_bh2 = i;
|
|
||||||
// act_def_grapite_bh_count++;
|
|
||||||
// }
|
|
||||||
// #endif
|
|
||||||
// if (act_def_grapite_bh_count==config->live_smbh_count) break;
|
|
||||||
// }
|
|
||||||
// if (i==n_act) {
|
|
||||||
// fprintf(stderr, "ERROR: black holes were not found in the active particle list");
|
|
||||||
// return -1;
|
|
||||||
// }
|
|
||||||
// #else
|
|
||||||
// if (config->live_smbh_count > 0) {
|
|
||||||
// i_bh1 = 0;
|
|
||||||
// i_bh2 = 1;
|
|
||||||
// }
|
|
||||||
// #endif
|
|
||||||
|
|
||||||
#ifdef TIMING
|
#ifdef TIMING
|
||||||
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
|
||||||
DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);
|
DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue