diff --git a/config.cpp b/config.cpp index 6223df7..9a7b9ce 100644 --- a/config.cpp +++ b/config.cpp @@ -233,6 +233,7 @@ Config::Config(std::string file_name) grapite_mask_file_name = get_parameter(dictionary, "grapite_mask_file_name", "grapite.mask"); etics_dump_coeffs = get_parameter(dictionary, "etics_dump_coeffs", false); grapite_active_search = get_parameter(dictionary, "grapite_active_search", false); + grapite_dev_exec_threshold = get_parameter(dictionary, "grapite_dev_exec_threshold", 32); #endif error_checking(); diff --git a/config.h b/config.h index 875d626..9305e34 100644 --- a/config.h +++ b/config.h @@ -57,6 +57,7 @@ public: std::string grapite_mask_file_name; bool etics_dump_coeffs; bool grapite_active_search; + int grapite_dev_exec_threshold; #endif private: diff --git a/phigrape.conf b/phigrape.conf index 3c7411b..13080de 100644 --- a/phigrape.conf +++ b/phigrape.conf @@ -31,7 +31,7 @@ eta = 0.01 # processes on a machine with a single device, set the value to 1 and use the # mpirun utility (or whatever is used in your job scheduler) to launch as many # processes as you like. -devices_per_node = 1 +#devices_per_node = 1 ########## @@ -168,7 +168,7 @@ devices_per_node = 1 # always included. #pn_usage = {1, 1, 1, 1, 0, 0, 0} -# The speed of light in N-body units [default: 500] +# The speed of light in N-body units #pn_c = 477.12 # The spin vectors of the two SMBHs. Only define these if the last component of @@ -188,22 +188,26 @@ devices_per_node = 1 dt_scf = 0.015625 # Name of the mask file for GRAPite [default: grapite.mask] -grapite_mask_file_name = grapite.mask +#grapite_mask_file_name = grapite.mask # Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false] -etics_dump_coeffs = true +#etics_dump_coeffs = true # Whether to use an alternative procedure for active particle search that is # available in the GRAPite library. This requires the number of particles in # each MPI process to be exactly divisible by 32. This can substantially # accelerate the calculation in some circumstances [default: false] -grapite_active_search = true +#grapite_active_search = true +# If the number of active particles in a particular bunch is bigger than this +# threshold, then the execution is on the GPU, otherwise on the CPU. When the +# active bunch is small, the overhead of calculating the SCF gravity on the GPU +# makes the operation more expensive than if it is done on the CPU. [default: 32] +#grapite_dev_exec_threshold = 512 # TODO ######## # etics dump mode -# threshold for execution on device for grapite # scaling parameter override diff --git a/phigrape.cpp b/phigrape.cpp index 8d176d8..548468e 100644 --- a/phigrape.cpp +++ b/phigrape.cpp @@ -82,9 +82,7 @@ Last redaction : 2019.04.16 12:55 #ifdef ETICS #include "grapite.h" -//#define ACT_DEF_GRAPITE #endif -const bool act_def_grapite = true; Config *config; @@ -250,10 +248,12 @@ public: double get_minimum_time(const double t[], const double dt[]) { double min_t_loc, min_t; - if (act_def_grapite) { +#ifdef ETICS + if (config->grapite_active_search) { min_t_loc = grapite_get_minimum_time(); - printf("gggggggggggg min_t_loc=%.10e\n", min_t_loc); - } else { + } else +#endif + { min_t_loc = t[myRank*n_loc]+dt[myRank*n_loc]; for (int j=myRank*n_loc+1; j<(myRank+1)*n_loc; j++) { double tmp = t[j] + dt[j]; @@ -264,10 +264,10 @@ public: MPI_Allreduce(&min_t_loc, &min_t, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); return min_t; } - void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int *n_act) -#warning refrence not pointer + void get_active_indices(const double min_t, const double t[], const double dt[], int ind_act[], int& n_act) { - if (act_def_grapite) { +#ifdef ETICS + if (config->grapite_active_search) { int n_act_loc; grapite_active_search(min_t, ind_act_loc, &n_act_loc); if (myRank > 0) @@ -275,17 +275,19 @@ public: ind_act_loc[i] += myRank*n_loc; int n_act_arr[256], displs[256]; // Assuming maximum of 256 processes... seems safe. MPI_Allgather(&n_act_loc, 1, MPI_INT, n_act_arr, 1, MPI_INT, MPI_COMM_WORLD); - *n_act = n_act_arr[0]; + n_act = n_act_arr[0]; for (int i=1; igrapite_dev_exec_threshold); +#endif + int n_loc = N/n_proc; Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps); Active_search active_search(myRank, n_proc, n_loc, N); @@ -674,7 +678,6 @@ int main(int argc, char *argv[]) #endif double min_t = active_search.get_minimum_time(t, dt); - printf("zzzzzzzzzzzzzzzzz %.10e\n", min_t); #ifdef TIMING get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); @@ -685,75 +688,26 @@ int main(int argc, char *argv[]) get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); #endif - active_search.get_active_indices(min_t, t, dt, ind_act, &n_act); + active_search.get_active_indices(min_t, t, dt, ind_act, n_act); -// TODO deal with it below -// #ifdef ACT_DEF_GRAPITE -// #error please fix here -// #endif - - static int printouts = 0; + int i_bh1=0, i_bh2=1; +#ifdef ETICS int n_bh = config->live_smbh_count; - if (n_bh>0) { - if (act_def_grapite) { - int act_def_grapite_bh_count = 0; - int i_bh[n_bh]; - for (int i=0; igrapite_active_search && (n_bh>0)) { + int act_def_grapite_bh_count = 0; + int i_bh[n_bh]; + for (int i=0; i= 10) return 0; +#endif - -// #if defined(ACT_DEF_GRAPITE) && (defined(ADD_BH1) || defined(ADD_BH2)) - -// #ifdef ADD_BH1 -// #define ACT_DEF_GRAPITE_NUMBH 1 -// #else -// #define ACT_DEF_GRAPITE_NUMBH 2 -// #endif - -// int act_def_grapite_bh_count = 0; -// for (i=0; ilive_smbh_count) break; -// } -// if (i==n_act) { -// fprintf(stderr, "ERROR: black holes were not found in the active particle list"); -// return -1; -// } -// #else -// if (config->live_smbh_count > 0) { -// i_bh1 = 0; -// i_bh2 = 1; -// } -// #endif - #ifdef TIMING get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst); DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);