diff --git a/Makefile b/Makefile index 319f579..6e4538f 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,6 @@ CUDAHOME ?= /usr/local/cuda CPPFLAGS += -DETICS OPTIMIZATION ?= 3 -ETICS_DTSCF ?= 0.015625 - CUDAINC = -I$(CUDAHOME)/include -I$(CUDAHOME)/samples/common/inc/ CUDALIB = -L$(CUDAHOME)/lib64 -lcudart -lcudadevrt @@ -25,7 +23,7 @@ CPPFLAGS += -DHAS_HDF5 LIB += -lhdf5 -lz -ldl default: - $(MPICXX) $(CPPFLAGS) $(CXXFLAGS) -DETICS_DTSCF=$(ETICS_DTSCF) $(INC) external.cpp io.cpp config.cpp phigrape.cpp -o $(EXECUTABLE) $(LIB) + $(MPICXX) $(CPPFLAGS) $(CXXFLAGS) $(INC) external.cpp io.cpp config.cpp phigrape.cpp -o $(EXECUTABLE) $(LIB) yebisu: CPPFLAGS := $(filter-out -DETICS, $(CPPFLAGS)) yebisu: default diff --git a/config.cpp b/config.cpp index 36d2b16..6223df7 100644 --- a/config.cpp +++ b/config.cpp @@ -228,5 +228,12 @@ Config::Config(std::string file_name) ext_dehnen_r = get_parameter(dictionary, "ext_dehnen_r", -1); ext_dehnen_gamma = get_parameter(dictionary, "ext_dehnen_gamma", -1); +#ifdef ETICS + dt_scf = get_parameter(dictionary, "dt_scf"); + grapite_mask_file_name = get_parameter(dictionary, "grapite_mask_file_name", "grapite.mask"); + etics_dump_coeffs = get_parameter(dictionary, "etics_dump_coeffs", false); + grapite_active_search = get_parameter(dictionary, "grapite_active_search", false); +#endif + error_checking(); } diff --git a/config.h b/config.h index 6805646..875d626 100644 --- a/config.h +++ b/config.h @@ -52,6 +52,13 @@ public: double ext_dehnen_r; double ext_dehnen_gamma; +#ifdef ETICS + double dt_scf; + std::string grapite_mask_file_name; + bool etics_dump_coeffs; + bool grapite_active_search; +#endif + private: using Dictionary = std::unordered_map; Dictionary read_config_file(const std::string file_name); diff --git a/phigrape.conf b/phigrape.conf index 707b597..3c7411b 100644 --- a/phigrape.conf +++ b/phigrape.conf @@ -6,7 +6,7 @@ eps = 1E-4 # End time of the calculation -t_end = 0.25 +t_end = 1 # Interval of snapshot files output dt_disk = 0.125 @@ -25,33 +25,44 @@ eta = 0.01 # Number of devices (GRAPEs or GPUs per node) [default: 0] -# By default, each MPI process will attempt to bind to one device. To change this behaviour, set the value to the number of available devices in the system. For example, if from some reason you want to run multiple MPI processes on a machine with a single device, set the value to 1 and use the mpirun utility (or whatever is used in your job scheduler) to launch as many processes as you like. -#devices_per_node = 1 +# By default, each MPI process will attempt to bind to one device. To change +# this behaviour, set the value to the number of available devices in the +# system. For example, if from some reason you want to run multiple MPI +# processes on a machine with a single device, set the value to 1 and use the +# mpirun utility (or whatever is used in your job scheduler) to launch as many +# processes as you like. +devices_per_node = 1 ########## # OUTPUT # ########## -# Whether to use HDF5 format for snapshot and restart; regular ASCII snapshorts are saved if false [default: false] +# Whether to use HDF5 format for snapshot and restart; regular ASCII snapshorts +# are saved if false [default: false] #output_hdf5 = true -# If using HDF5 output, use double precision or not [default: true] -# Consider setting to false to save disk space. Restart file is always saved in double precision. +# If using HDF5 output, use double precision or not [default: true] Consider +# setting to false to save disk space. Restart file is always saved in double +# precision. #output_hdf5_double_precision = true # If using ASCII output, the number of digits after the decimal point [default: 10] # Restart file is saved with 16 digits after the decimal point. #output_ascii_precision = 6 -# Extra output: optionally save potential, acceleration and jerk in snapshot files [default: 0] -# This is a number between 0 and 7 that encodes the output options in the following way: +# Extra output: optionally save potential, acceleration and jerk in snapshot +# files [default: 0] +# This is a number between 0 and 7 that encodes the output options in the +# following way: # [value] = [save jerk]*4 + [save acceleration]*2 + [save potential] -# Example: choose 5 if it is needed to save the jerk and the potential, but not the acceleration for some reason. +# Example: choose 5 if it is needed to save the jerk and the potential, but not +# the acceleration for some reason. # Currently implemented in HDF5 output only. #output_extra_mode = 7 -# Whether to output a warning on the screen when the minimum time step is encountered. [default: false] +# Whether to output a warning on the screen when the minimum time step is +# encountered. [default: false] #dt_min_warning = false @@ -59,12 +70,18 @@ eta = 0.01 # EXTERNAL GRAVITY # #################### -# Remember that external gravity models are applied at the same coordinate system as the particles. If the idea is to simulate a globular cluster orbiting in an external field, be sure to set the initial conditions appropriately (applying a shift to the coordinates and velocities). +# Remember that external gravity models are applied at the same coordinate +# system as the particles. If the idea is to simulate a globular cluster +# orbiting in an external field, be sure to set the initial conditions +# appropriately (applying a shift to the coordinates and velocities). -# Whether the parameters for the external gravitational field given below are in physical units or Hénon units. If true, the system used is {kiloparsec, solar mass, kilometre per second} [default: false] +# Whether the parameters for the external gravitational field given below are in +# physical units or Hénon units. If true, the system used is {kiloparsec, solar +# mass, kilometre per second} [default: false] #ext_units_physical = true -# If Physical units were selected, specify the simulation's unit mass (is solar masses) and unit lenght (in parsec; not kiloparsec) +# If Physical units were selected, specify the simulation's unit mass (is solar +# masses) and unit lenght (in parsec; not kiloparsec) # TODO: add the option to normalize using other units. #unit_mass = 4E5 # MSun #unit_length = 15 # pc @@ -75,18 +92,21 @@ eta = 0.01 #ext_b_bulge = 1.9 # kpc -# The disk is a Miyamoto-Nagai potential with the following total mass, scale length, and scale height +# The disk is a Miyamoto-Nagai potential with the following total mass, scale +# length, and scale height #ext_m_disk = 6.8E10 # MSun #ext_a_disk = 3.00 # kpc #ext_b_disk = 0.28 # kpc -# This halo option is yet another Plummer potential with the following total mass and radius. +# This halo option is yet another Plummer potential with the following total +# mass and radius. #ext_m_halo_plummer = 8E11 # MSun #ext_b_halo_plummer = 245 # kpc -# This halo option is a logarithmic potential with the following velocity and radius parameters. +# This halo option is a logarithmic potential with the following velocity and +# radius parameters. #ext_log_halo_v = 240 # km/s #ext_log_halo_r = 1 # kpc @@ -100,18 +120,24 @@ eta = 0.01 # LIVE SUPERMASSIVE BLACK HOLE(S) # ################################### -# There is special treatment for particles representing supermassive black holes (SMBHs): they are integrated at every time step, they can have custom softening in SMBH-SMBH interactions, and post Newtonian terms can be added to the gravity. +# There is special treatment for particles representing supermassive black holes +# (SMBHs): they are integrated at every time step, they can have custom +# softening in SMBH-SMBH interactions, and post Newtonian terms can be added to +# the gravity. # The number of SMBH particles. Can be 0 (no SMBH), 1, or 2. [default: 0] #live_smbh_count = 2 -# Custom softening length for SMBH-SMBH interactions (can also be zero). If non-negative, the custom softening is applied. [default: -1] +# Custom softening length for SMBH-SMBH interactions (can also be zero). If +# non-negative, the custom softening is applied. [default: -1] #live_smbh_custom_eps = 0 +#TODO this is actually related only to BINARY smbh! # Output additional diagnostics about live SMBHs. [default: false] #live_smbh_output = true -# Output additional diagnostics about the SMBH's (or SMBHs') nearest neighbours (number could be set as shown below). [default: false] +# Output additional diagnostics about the SMBH's (or SMBHs') nearest neighbours +# (number could be set as shown below). [default: false] #live_smbh_neighbor_output = true # Number of nearest neighbours to the SMBH (or SMBHs) to include in output. [default: 10] @@ -124,22 +150,62 @@ eta = 0.01 # The following parameters can be set when live_smbh_count is 2. -# Output additional diagnostics about the sphere of influence (size could be set as shown below). [default: false] +# Output additional diagnostics about the sphere of influence (size could be set +# as shown below). [default: false] #binary_smbh_influence_sphere_output = true -# The influence sphere is centred at the binary SMBH's centre of mass, and its radius is the semi-major axis of the binary times the factor below. [default: 10] -#binary_smbh_influence_radius_factor = 20 +# The influence sphere is centred at the binary SMBH's centre of mass, and its +# radius is the semi-major axis of the binary times the factor below. [default: 10] +#binary_smbh_influence_radius_factor = 3.162277660168379497918067e+03 # Add post Newtonian terms to SMBH-SMBH gravity. [default: false] #binary_smbh_pn = true -# A mask array (zeros and ones) determining whether or not to use specific post-Newtonian terms. -# The elements represent {0, 1, 2, 2.5, 3, 3.5, spin} [default: {1,1,1,1,1,1,1}] +# A mask array (zeros and ones) determining whether or not to use specific +# post-Newtonian terms. +# The elements represent {Newtonian, 1, 2, 2.5, 3, 3.5, spin} +# Note: the first element in the array has no effect, the Newtonian force is +# always included. #pn_usage = {1, 1, 1, 1, 0, 0, 0} # The speed of light in N-body units [default: 500] #pn_c = 477.12 +# The spin vectors of the two SMBHs. Only define these if the last component of +# pn_usage is set to one. +#smbh1_spin = {0, 0, 1} +#smbh2_spin = {0, 0, 1} + + +############### +# HYBRID CODE # +############### + +# The hybridization with the SCF code is enabled if the ETICS preprocessor flag +# is defined in the when compiling. + +# Time intervals to calculate the SCF series expansion. +dt_scf = 0.015625 + +# Name of the mask file for GRAPite [default: grapite.mask] +grapite_mask_file_name = grapite.mask + +# Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false] +etics_dump_coeffs = true + +# Whether to use an alternative procedure for active particle search that is +# available in the GRAPite library. This requires the number of particles in +# each MPI process to be exactly divisible by 32. This can substantially +# accelerate the calculation in some circumstances [default: false] +grapite_active_search = true + + +# TODO +######## +# etics dump mode +# threshold for execution on device for grapite +# scaling parameter override + #################################### # Negative powers of two # diff --git a/phigrape.cpp b/phigrape.cpp index eae49f2..d8bbd04 100644 --- a/phigrape.cpp +++ b/phigrape.cpp @@ -62,11 +62,6 @@ Config *config; #ifdef ETICS #include "grapite.h" -// why do we need CEP as a compilaion flag... just have it always on when ETICS is on. IF there is no CEP, there should be a graceful skipping of those operations. -//#define ETICS_CEP -#ifndef ETICS_DTSCF -#error "ETICS_DTSCF must be defined" -#endif #endif #define TIMING @@ -140,8 +135,6 @@ int new_tunit=51, new_xunit=51; double ti=0.0; -double eps_BH=0.0; - /* external potential... */ double3 x_bh1, x_bh2, v_bh1, v_bh2; @@ -156,11 +149,8 @@ double3 a_pn1[7], adot_pn1[7], a_pn2[7], adot_pn2[7]; #include "pn_bh_spin.c" #ifdef ETICS -double t_exp, dt_exp=ETICS_DTSCF; // t_exp is just the initial value -#ifdef ETICS_CEP int grapite_cep_index; #endif -#endif void get_CPU_time(double *time_real, double *time_user, double *time_syst) { @@ -989,10 +979,9 @@ int main(int argc, char *argv[]) g6_set_xunit(new_xunit); #ifdef ETICS - grapite_read_particle_tags(N, "grapite.mask", myRank, n_loc); - grapite_set_dt_exp(dt_exp); - dt_exp = time_cur; // if we don't have a binary restart then we don't remember the coefficients, and we need to calculate them now. - grapite_set_t_exp(t_exp); + grapite_read_particle_tags(N, config->grapite_mask_file_name.c_str(), myRank, n_loc); + grapite_set_dt_exp(config->dt_scf); + grapite_set_t_exp(time_cur); #endif /* load the nj particles to the G6 */ @@ -1004,24 +993,17 @@ int main(int argc, char *argv[]) #ifdef ETICS double etics_length_scale; - if (myRank == rootRank) etics_length_scale = grapite_get_length_scale(N, m, x, v); // We don't want all ranks to do it, because they need to write a file and might confuse each other + if (myRank == rootRank) etics_length_scale = grapite_get_length_scale(N, m, (double(*)[3])x, (double(*)[3])v); // We don't want all ranks to do it, because they need to write a file and might confuse each other MPI_Bcast(&etics_length_scale, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD); grapite_set_length_scale(etics_length_scale); -#endif -#ifdef ETICS_CEP - // First time only: get the CEP index grapite_cep_index = grapite_get_cep_index(); - - // First calculate the DC - grapite_calc_center(N, m, x, v, xcm, vcm, xdc, vdc); - - // Now copy it to the global particle list - memcpy(x[grapite_cep_index], xdc, 3*sizeof(double)); - memcpy(v[grapite_cep_index], vdc, 3*sizeof(double)); - - double zeros[3] = {0., 0., 0.}; // We haven't calculated the force yet! - grapite_update_cep(time_cur, xdc, vdc, zeros, zeros); + if (grapite_cep_index >= 0) { + grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); + memcpy(x[grapite_cep_index], xdc, 3*sizeof(double)); + memcpy(v[grapite_cep_index], vdc, 3*sizeof(double)); + grapite_update_cep(time_cur, xdc, vdc, zeros, zeros); + } #endif /* define the all particles as a active on all the processors for the first time grav calc. */ @@ -1150,25 +1132,19 @@ int main(int argc, char *argv[]) energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext); } /* if (myRank == rootRank) */ -#ifdef ETICS_DUMP - if (diskstep==0) { +#ifdef ETICS + if (config->etics_dump_coeffs && (diskstep==0)) { + char out_fname[256]; sprintf(out_fname, "coeffs.%06d.%02d.dat", 0, myRank); grapite_dump(out_fname, 2); } -#endif - /* Wait to all processors to finish his works... */ - MPI_Barrier(MPI_COMM_WORLD); - -#ifdef ETICS_CEP - // First calculate the DC - grapite_calc_center(N, m, x, v, xcm, vcm, xdc, vdc); - - // Now copy it to the global particle list - memcpy(x[grapite_cep_index], xdc, 3*sizeof(double)); - memcpy(v[grapite_cep_index], vdc, 3*sizeof(double)); - - grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]); + if (grapite_cep_index >= 0) { + grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); + memcpy(x[grapite_cep_index], xdc, 3*sizeof(double)); + memcpy(v[grapite_cep_index], vdc, 3*sizeof(double)); + grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]); + } #endif /* Wait to all processors to finish his works... */ @@ -1506,9 +1482,9 @@ int main(int argc, char *argv[]) get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0); #endif - for (int i=0; i= 0) { + grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc); + memcpy(x[grapite_cep_index], xdc, 3*sizeof(double)); + memcpy(v[grapite_cep_index], vdc, 3*sizeof(double)); + grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]); } - grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]); #endif t_contr += dt_contr; } /* if (time_cur >= t_contr) */ if (time_cur >= t_disk) { + char out_fname[256]; + diskstep++; if (myRank == rootRank) { - diskstep++; - char out_fname[256]; sprintf(out_fname, "%06d", diskstep); if (config->output_hdf5) h5_write(std::string(out_fname) + ".h5", diskstep, N, time_cur, m, x, v, pot, a, adot, config->output_extra_mode, config->output_hdf5_double_precision); else ascii_write(std::string(out_fname) + ".dat", diskstep, N, time_cur, m, x, v, config->output_ascii_precision); } /* if (myRank == rootRank) */ -#ifdef ETICS_DUMP - sprintf(out_fname, "coeffs.%06d.%02d.dat", diskstep, myRank); - grapite_dump(out_fname, 2); +#ifdef ETICS + if (config->etics_dump_coeffs) { + sprintf(out_fname, "coeffs.%06d.%02d.dat", diskstep, myRank); + grapite_dump(out_fname, 2); + } #endif - t_disk += dt_disk; } /* if (time_cur >= t_disk) */ - } /* while (time_cur < t_end) */ /* close the local GRAPEs */ - g6_close(clusterid); /* Wait to all processors to finish his works... */