Now working as the hybrid code

Removed the need for the annoying ETICS_CEP flag; now dt_scf is read from the config file.
2020-04-13 23:08:42 -04:00 · 2020-04-13 23:08:42 -04:00 · b51613695f
commit b51613695f
parent 75ad0f0e89
5 changed files with 140 additions and 95 deletions
--- a/4
+++ b/4
@ -2,8 +2,6 @@ CUDAHOME     ?= /usr/local/cuda
 CPPFLAGS     += -DETICS
 OPTIMIZATION ?= 3
 ETICS_DTSCF  ?= 0.015625
 CUDAINC       = -I$(CUDAHOME)/include -I$(CUDAHOME)/samples/common/inc/
 CUDALIB       = -L$(CUDAHOME)/lib64 -lcudart -lcudadevrt
@ -25,7 +23,7 @@ CPPFLAGS     += -DHAS_HDF5
 LIB          += -lhdf5 -lz -ldl
 default:
-	$(MPICXX) $(CPPFLAGS) $(CXXFLAGS) -DETICS_DTSCF=$(ETICS_DTSCF) $(INC) external.cpp io.cpp config.cpp phigrape.cpp -o $(EXECUTABLE) $(LIB)
+	$(MPICXX) $(CPPFLAGS) $(CXXFLAGS) $(INC) external.cpp io.cpp config.cpp phigrape.cpp -o $(EXECUTABLE) $(LIB)
 yebisu: CPPFLAGS := $(filter-out -DETICS, $(CPPFLAGS))
 yebisu: default
--- a/config.cpp
+++ b/config.cpp
@ -228,5 +228,12 @@ Config::Config(std::string file_name)
    ext_dehnen_r = get_parameter<double>(dictionary, "ext_dehnen_r", -1);
    ext_dehnen_gamma = get_parameter<double>(dictionary, "ext_dehnen_gamma", -1);
 #ifdef ETICS
    dt_scf = get_parameter<double>(dictionary, "dt_scf");
    grapite_mask_file_name = get_parameter<std::string>(dictionary, "grapite_mask_file_name", "grapite.mask");
    etics_dump_coeffs = get_parameter<bool>(dictionary, "etics_dump_coeffs", false);
    grapite_active_search = get_parameter<bool>(dictionary, "grapite_active_search", false);
 #endif
    error_checking();
 }
--- a/config.h
+++ b/config.h
@ -52,6 +52,13 @@ public:
    double              ext_dehnen_r;
    double              ext_dehnen_gamma;
 #ifdef ETICS
    double              dt_scf;
    std::string         grapite_mask_file_name;
    bool                etics_dump_coeffs;
    bool                grapite_active_search;
 #endif
 private:
    using Dictionary = std::unordered_map<std::string,std::string>;
    Dictionary read_config_file(const std::string file_name);
--- a/phigrape.conf
+++ b/phigrape.conf
@ -6,7 +6,7 @@
 eps =                                   1E-4
 # End time of the calculation
-t_end =                                 0.25
+t_end =                                 1
 # Interval of snapshot files output
 dt_disk =                               0.125
@ -25,33 +25,44 @@ eta =                                   0.01
 # Number of devices (GRAPEs or GPUs per node) [default: 0]
-# By default, each MPI process will attempt to bind to one device. To change this behaviour, set the value to the number of available devices in the system. For example, if from some reason you want to run multiple MPI processes on a machine with a single device, set the value to 1 and use the mpirun utility (or whatever is used in your job scheduler) to launch as many processes as you like.
+# By default, each MPI process will attempt to bind to one device. To change
-#devices_per_node =                      1
+# this behaviour, set the value to the number of available devices in the
 # system. For example, if from some reason you want to run multiple MPI
 # processes on a machine with a single device, set the value to 1 and use the
 # mpirun utility (or whatever is used in your job scheduler) to launch as many
 # processes as you like.
 devices_per_node =                      1
 ##########
 # OUTPUT #
 ##########
-# Whether to use HDF5 format for snapshot and restart; regular ASCII snapshorts are saved if false [default: false]
+# Whether to use HDF5 format for snapshot and restart; regular ASCII snapshorts
 # are saved if false [default: false]
 #output_hdf5 =                           true
-# If using HDF5 output, use double precision or not [default: true]
+# If using HDF5 output, use double precision or not [default: true] Consider
-# Consider setting to false to save disk space. Restart file is always saved in double precision.
+# setting to false to save disk space. Restart file is always saved in double
 # precision.
 #output_hdf5_double_precision =          true
 # If using ASCII output, the number of digits after the decimal point [default: 10]
 # Restart file is saved with 16 digits after the decimal point.
 #output_ascii_precision =                6
-# Extra output: optionally save potential, acceleration and jerk in snapshot files [default: 0]
+# Extra output: optionally save potential, acceleration and jerk in snapshot
-# This is a number between 0 and 7 that encodes the output options in the following way:
+# files [default: 0]
 # This is a number between 0 and 7 that encodes the output options in the
 # following way:
 # [value] = [save jerk]*4 + [save acceleration]*2 + [save potential]
-# Example: choose 5 if it is needed to save the jerk and the potential, but not the acceleration for some reason.
+# Example: choose 5 if it is needed to save the jerk and the potential, but not
 # the acceleration for some reason.
 # Currently implemented in HDF5 output only.
 #output_extra_mode =                     7
-# Whether to output a warning on the screen when the minimum time step is encountered. [default: false]
+# Whether to output a warning on the screen when the minimum time step is
 # encountered. [default: false]
 #dt_min_warning =                        false
@ -59,12 +70,18 @@ eta =                                   0.01
 # EXTERNAL GRAVITY #
 ####################
-# Remember that external gravity models are applied at the same coordinate system as the particles. If the idea is to simulate a globular cluster orbiting in an external field, be sure to set the initial conditions appropriately (applying a shift to the coordinates and velocities).
+# Remember that external gravity models are applied at the same coordinate
 # system as the particles. If the idea is to simulate a globular cluster
 # orbiting in an external field, be sure to set the initial conditions
 # appropriately (applying a shift to the coordinates and velocities).
-# Whether the parameters for the external gravitational field given below are in physical units or Hénon units. If true, the system used is {kiloparsec, solar mass, kilometre per second} [default: false]
+# Whether the parameters for the external gravitational field given below are in
 # physical units or Hénon units. If true, the system used is {kiloparsec, solar
 # mass, kilometre per second} [default: false]
 #ext_units_physical =                    true
-# If Physical units were selected, specify the simulation's unit mass (is solar masses) and unit lenght (in parsec; not kiloparsec)
+# If Physical units were selected, specify the simulation's unit mass (is solar
 # masses) and unit lenght (in parsec; not kiloparsec)
 # TODO: add the option to normalize using other units.
 #unit_mass   =                           4E5 # MSun
 #unit_length =                           15  # pc
@ -75,18 +92,21 @@ eta =                                   0.01
 #ext_b_bulge =                           1.9 # kpc
-# The disk is a Miyamoto-Nagai potential with the following total mass, scale length, and scale height
+# The disk is a Miyamoto-Nagai potential with the following total mass, scale
 # length, and scale height
 #ext_m_disk =                            6.8E10 # MSun
 #ext_a_disk =                            3.00   # kpc
 #ext_b_disk =                            0.28   # kpc
-# This halo option is yet another Plummer potential with the following total mass and radius.
+# This halo option is yet another Plummer potential with the following total
 # mass and radius.
 #ext_m_halo_plummer =                    8E11 # MSun
 #ext_b_halo_plummer =                    245  # kpc
-# This halo option is a logarithmic potential with the following velocity and radius parameters.
+# This halo option is a logarithmic potential with the following velocity and
 # radius parameters.
 #ext_log_halo_v =                        240 # km/s
 #ext_log_halo_r =                        1   # kpc
@ -100,18 +120,24 @@ eta =                                   0.01
 # LIVE SUPERMASSIVE BLACK HOLE(S) #
 ###################################
-# There is special treatment for particles representing supermassive black holes (SMBHs): they are integrated at every time step, they can have custom softening in SMBH-SMBH interactions, and post Newtonian terms can be added to the gravity.
+# There is special treatment for particles representing supermassive black holes
 # (SMBHs): they are integrated at every time step, they can have custom
 # softening in SMBH-SMBH interactions, and post Newtonian terms can be added to
 # the gravity.
 # The number of SMBH particles. Can be 0 (no SMBH), 1, or 2. [default: 0]
 #live_smbh_count =                       2
-# Custom softening length for SMBH-SMBH interactions (can also be zero). If non-negative, the custom softening is applied. [default: -1]
+# Custom softening length for SMBH-SMBH interactions (can also be zero). If
 # non-negative, the custom softening is applied. [default: -1]
 #live_smbh_custom_eps =                  0
 #TODO this is actually related only to BINARY smbh!
 # Output additional diagnostics about live SMBHs. [default: false]
 #live_smbh_output =                      true
-# Output additional diagnostics about the SMBH's (or SMBHs') nearest neighbours (number could be set as shown below). [default: false]
+# Output additional diagnostics about the SMBH's (or SMBHs') nearest neighbours
 # (number could be set as shown below). [default: false]
 #live_smbh_neighbor_output =             true
 # Number of nearest neighbours to the SMBH (or SMBHs) to include in output. [default: 10]
@ -124,22 +150,62 @@ eta =                                   0.01
 # The following parameters can be set when live_smbh_count is 2.
-# Output additional diagnostics about the sphere of influence (size could be set as shown below). [default: false]
+# Output additional diagnostics about the sphere of influence (size could be set
 # as shown below). [default: false]
 #binary_smbh_influence_sphere_output =   true
-# The influence sphere is centred at the binary SMBH's centre of mass, and its radius is the semi-major axis of the binary times the factor below. [default: 10]
+# The influence sphere is centred at the binary SMBH's centre of mass, and its
-#binary_smbh_influence_radius_factor =   20
+# radius is the semi-major axis of the binary times the factor below. [default: 10]
 #binary_smbh_influence_radius_factor =   3.162277660168379497918067e+03
 # Add post Newtonian terms to SMBH-SMBH gravity. [default: false]
 #binary_smbh_pn =                        true
-# A mask array (zeros and ones) determining whether or not to use specific post-Newtonian terms.
+# A mask array (zeros and ones) determining whether or not to use specific
-# The elements represent {0, 1, 2, 2.5, 3, 3.5, spin} [default: {1,1,1,1,1,1,1}]
+# post-Newtonian terms.
 # The elements represent {Newtonian, 1, 2, 2.5, 3, 3.5, spin}
 # Note: the first element in the array has no effect, the Newtonian force is
 # always included.
 #pn_usage =                              {1, 1, 1, 1, 0, 0, 0}
 # The speed of light in N-body units [default: 500]
 #pn_c =                                  477.12
 # The spin vectors of the two SMBHs. Only define these if the last component of
 # pn_usage is set to one.
 #smbh1_spin =                            {0, 0, 1}
 #smbh2_spin =                            {0, 0, 1}
 ###############
 # HYBRID CODE #
 ###############
 # The hybridization with the SCF code is enabled if the ETICS preprocessor flag
 # is defined in the when compiling.
 # Time intervals to calculate the SCF series expansion.
 dt_scf =                                0.015625
 # Name of the mask file for GRAPite [default: grapite.mask]
 grapite_mask_file_name =                grapite.mask
 # Whether to write to disk a list of SCF coefficients at every dt_disk. [default: false]
 etics_dump_coeffs =                     true
 # Whether to use an alternative procedure for active particle search that is
 # available in the GRAPite library. This requires the number of particles in
 # each MPI process to be exactly divisible by 32. This can substantially
 # accelerate the calculation in some circumstances [default: false]
 grapite_active_search =                 true
 # TODO
 ########
 # etics dump mode
 # threshold for execution on device for grapite
 # scaling parameter override
 ####################################
 # Negative powers of two           #
--- a/phigrape.cpp
+++ b/phigrape.cpp
@ -62,11 +62,6 @@ Config *config;
 #ifdef ETICS
 #include "grapite.h"
 // why do we need CEP as a compilaion flag... just have it always on when ETICS is on. IF there is no CEP, there should be a graceful skipping of those operations.
 //#define ETICS_CEP
 #ifndef ETICS_DTSCF
 #error "ETICS_DTSCF must be defined"
 #endif
 #endif
 #define TIMING
@ -140,8 +135,6 @@ int    new_tunit=51, new_xunit=51;
 double ti=0.0;
 double eps_BH=0.0;
 /* external potential... */
 double3 x_bh1, x_bh2, v_bh1, v_bh2;
@ -156,11 +149,8 @@ double3 a_pn1[7], adot_pn1[7], a_pn2[7], adot_pn2[7];
 #include "pn_bh_spin.c"
 #ifdef ETICS
 double t_exp, dt_exp=ETICS_DTSCF; // t_exp is just the initial value
 #ifdef ETICS_CEP
 int grapite_cep_index;
 #endif
 #endif
 void get_CPU_time(double *time_real, double *time_user, double *time_syst)
 {
@ -989,10 +979,9 @@ int main(int argc, char *argv[])
    g6_set_xunit(new_xunit);
 #ifdef ETICS
-    grapite_read_particle_tags(N, "grapite.mask", myRank, n_loc);
+    grapite_read_particle_tags(N, config->grapite_mask_file_name.c_str(), myRank, n_loc);
-    grapite_set_dt_exp(dt_exp);
+    grapite_set_dt_exp(config->dt_scf);
-    dt_exp = time_cur; // if we don't have a binary restart then we don't remember the coefficients, and we need to calculate them now.
+    grapite_set_t_exp(time_cur);
    grapite_set_t_exp(t_exp);
 #endif
    /* load the nj particles to the G6 */
@ -1004,24 +993,17 @@ int main(int argc, char *argv[])
 #ifdef ETICS
    double etics_length_scale;
-    if (myRank == rootRank) etics_length_scale = grapite_get_length_scale(N, m, x, v); // We don't want all ranks to do it, because they need to write a file and might confuse each other
+    if (myRank == rootRank) etics_length_scale = grapite_get_length_scale(N, m, (double(*)[3])x, (double(*)[3])v); // We don't want all ranks to do it, because they need to write a file and might confuse each other
    MPI_Bcast(&etics_length_scale, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
    grapite_set_length_scale(etics_length_scale);
 #endif
 #ifdef ETICS_CEP
    // First time only: get the CEP index
    grapite_cep_index = grapite_get_cep_index();
-
+    if (grapite_cep_index >= 0) {
-    // First calculate the DC
+        grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
-    grapite_calc_center(N, m, x, v, xcm, vcm, xdc, vdc);
+        memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
-
+        memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
-    // Now copy it to the global particle list
+        grapite_update_cep(time_cur, xdc, vdc, zeros, zeros);
-    memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
+    }
    memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
    double zeros[3] = {0., 0., 0.}; // We haven't calculated the force yet!
    grapite_update_cep(time_cur, xdc, vdc, zeros, zeros);
 #endif
    /* define the all particles as a active on all the processors for the first time grav calc. */
@ -1150,25 +1132,19 @@ int main(int argc, char *argv[])
        energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
    } /* if (myRank == rootRank) */
-#ifdef ETICS_DUMP
+#ifdef ETICS
-    if (diskstep==0) {
+    if (config->etics_dump_coeffs && (diskstep==0)) {
        char out_fname[256];
        sprintf(out_fname, "coeffs.%06d.%02d.dat", 0, myRank);
        grapite_dump(out_fname, 2);
    }
 #endif
-    /* Wait to all processors to finish his works... */
+    if (grapite_cep_index >= 0) {
-    MPI_Barrier(MPI_COMM_WORLD);
+        grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
-
+        memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
-#ifdef ETICS_CEP
+        memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
-    // First calculate the DC
+        grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]);
-    grapite_calc_center(N, m, x, v, xcm, vcm, xdc, vdc);
+    }
    // Now copy it to the global particle list
    memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
    memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
    grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]);
 #endif
    /* Wait to all processors to finish his works... */
@ -1506,9 +1482,9 @@ int main(int argc, char *argv[])
        get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
 #endif
-        for (int i=0; i<n_act; i++) { // TODO would be nicer to use i instead of j here
+        for (int i=0; i<n_act; i++) {
-#ifdef ETICS_CEP
+#ifdef ETICS
-            if (ind_act[i] == grapite_cep_index) grapite_update_cep(t_act[i], x_act[i], v_act[i], a_act[i], adot_act[i]); // All ranks should do it.
+            if (ind_act[i] == grapite_cep_index) grapite_update_cep(t[grapite_cep_index], x[grapite_cep_index], v[grapite_cep_index], a[grapite_cep_index], adot[grapite_cep_index]); // All ranks should do it.
 #endif
            cur_rank = ind_act[i]/n_loc;
@ -1586,48 +1562,39 @@ int main(int argc, char *argv[])
            } /* if (myRank == rootRank) */
-#ifdef ETICS_CEP
+#ifdef ETICS
            // We are /inside/ a control step, so all particles must be synchronized; we can safely calculate their density centre. The acceleration and jerk currently in the memory are for the predicted position of the CEP, by calling grapite_calc_center we "correct" the position and velocity, but not the gravity at that point.
-
+            if (grapite_cep_index >= 0) {
-            // First calculate the DC
+                grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
-            grapite_calc_center(N, m, x, v, xcm, vcm, xdc, vdc);
+                memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
-
+                memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
-            // Now copy it to the global particle list
+                grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]);
            memcpy(x[grapite_cep_index], xdc, 3*sizeof(double));
            memcpy(v[grapite_cep_index], vdc, 3*sizeof(double));
            // Now copy it to the local particle list for tha appropriate rank
            if (myRank == grapite_cep_index/n_loc) {
                memcpy(x_loc[grapite_cep_index - myRank*n_loc], xdc, 3*sizeof(double));
                memcpy(v_loc[grapite_cep_index - myRank*n_loc], vdc, 3*sizeof(double));
            }
            grapite_update_cep(time_cur, xdc, vdc, a[grapite_cep_index], adot[grapite_cep_index]);
 #endif
            t_contr += dt_contr;
        } /* if (time_cur >= t_contr) */
        if (time_cur >= t_disk) {
            char out_fname[256];
            diskstep++;
            if (myRank == rootRank) {
                diskstep++;
                char out_fname[256];
                sprintf(out_fname, "%06d", diskstep);
                if (config->output_hdf5) h5_write(std::string(out_fname) + ".h5", diskstep, N, time_cur, m, x, v, pot, a, adot, config->output_extra_mode, config->output_hdf5_double_precision);
                else ascii_write(std::string(out_fname) + ".dat", diskstep, N, time_cur, m, x, v, config->output_ascii_precision);
            } /* if (myRank == rootRank) */
-#ifdef ETICS_DUMP
+#ifdef ETICS
-            sprintf(out_fname, "coeffs.%06d.%02d.dat", diskstep, myRank);
+            if (config->etics_dump_coeffs) {
-            grapite_dump(out_fname, 2);
+                sprintf(out_fname, "coeffs.%06d.%02d.dat", diskstep, myRank);
                grapite_dump(out_fname, 2);
            }
 #endif
            t_disk += dt_disk;
        } /* if (time_cur >= t_disk) */
    } /* while (time_cur < t_end) */
    /* close the local GRAPEs */
    g6_close(clusterid);
    /* Wait to all processors to finish his works... */