Moved calc_self_grav to a class and cleaned up a little

2020-04-14 20:20:39 -04:00 · 2020-04-14 20:20:39 -04:00 · 30ae8631a9
commit 30ae8631a9
parent b51613695f
1 changed files with 67 additions and 132 deletions
--- a/phigrape.cpp
+++ b/phigrape.cpp
@ -123,17 +123,7 @@ double DT_ACT_REDUCE;

 #endif

-/* some local settings for G6a board's */

-int clusterid, ii, nn, numGPU;
-
-int    npipe=G6_NPIPE, index_i[G6_NPIPE];
-double h2_i[G6_NPIPE], p_i[G6_NPIPE];
-double3 x_i[G6_NPIPE], v_i[G6_NPIPE],
-        a_i[G6_NPIPE], jerk_i[G6_NPIPE];
-int    new_tunit=51, new_xunit=51;
-
-double ti=0.0;

 /* external potential... */

@ -286,43 +276,32 @@ void write_bh_nb_data(double time_cur, int N, double m[], double3 x[], double3 v
    fclose(out);
 }

-void calc_self_grav(double t, double eps2, double &g6_calls, int n_loc,
-                    int n_act, int ind_act[],
-                    double3 x_act_new[], double3 v_act_new[],
-                    double pot_act_tmp[],
-                    double3 a_act_tmp[],
-                    double3 adot_act_tmp[],
-                    double h2_i[])
-{
-    /* calc the new grav for the active particles */
-
-#ifdef TIMING
-    get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
-#endif
-
+class Calc_self_grav {
+public:
+    Calc_self_grav(const int N, const int n_loc, const int clusterid, const int npipe, const double eps)
+        : g6_calls(0), n_loc(n_loc), clusterid(clusterid), npipe(npipe), eps2(eps*eps)
+    {
+        h2.assign(N, eps2); 
+    }
+    void operator()(const double t, const int n_act, int ind_act[], const double3 x_act[], const double3 v_act[],
+              double pot[], double3 acc[], double3 jrk[])
+    {
        g6_set_ti(clusterid, t);
-
-    int ni = n_act; // TODO why is this needed?
-
-    /* define the local phi, a, adot for these active particles */
-
-    for (int i=0; i<ni; i+=npipe) {
-        nn = npipe;
-        if (ni-i < npipe) nn = ni - i;
-        for (ii=0; ii<nn; ii++) {
-            h2_i[ii]    = eps2; // TODO This should be a global or something
-        } /* ii */
+        for (int i=0; i<n_act; i+=npipe) {
+            int nn = npipe;
+            if (n_act-i < npipe) nn = n_act - i;
            //TODO any way we can clean up this ugly casting?
-        g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i, eps2, h2_i);
-        g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, eps2, h2_i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i);
+            g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act+i, (double(*)[3])v_act+i, (double(*)[3])acc+i, (double(*)[3])jrk+i, pot+i, eps2, h2.data());
+            g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act+i, (double(*)[3])v_act+i, eps2, h2.data(), (double(*)[3])acc+i, (double(*)[3])jrk+i, pot+i);
            g6_calls++;
        } /* i */
-
-#ifdef TIMING
-    get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
-    DT_ACT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
-#endif
-}
+    }
+    double g6_calls;
+private:
+    int n_loc, clusterid, npipe;
+    double eps2;
+    std::vector<double> h2;
+};

 void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n_act, double3 *x_act_new, double3 *v_act_new, double *pot_act_ext, double3 *a_act_new, double3* adot_act_new)
 {
@ -705,15 +684,15 @@ int main(int argc, char *argv[])
        skip_con=0, tmp_i;

    double dt_disk, dt_contr, t_disk=0.0, t_contr=0.0,
-           dt_bh, t_bh=0.0, dt_bh_tmp,
+           dt_bh, t_bh=0.0,
           t_end, time_cur, dt_min, dt_max, min_t, min_t_loc,
           eta_s, eta, eta_bh,
           E_tot_0, E_tot_corr_0, E_tot_corr_sd_0,
           rcm_sum=0.0, vcm_sum=0.0,
-           eps=0.0, eps2,
+           eps,
           a2_mod, adot2_mod,
           dt_tmp, dt2half, dt3over6,
-           timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX], g6_calls=0.0, g6_calls_sum=0.0;
+           timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX];

    double3 xcm, vcm, mom,
            xdc, vdc,
@ -743,13 +722,11 @@ int main(int argc, char *argv[])
    /* data for active particles */

    int    n_act, ind_act[N_MAX];
-    double m_act[N_MAX],
-           pot_act[N_MAX], t_act[N_MAX], dt_act[N_MAX],
+    double t_act[N_MAX],
           pot_act_new[N_MAX],
           pot_act_tmp[N_MAX];

-    double3 x_act[N_MAX], v_act[N_MAX],
-            a_act[N_MAX], adot_act[N_MAX],
+    double3 a_act[N_MAX], adot_act[N_MAX],
            x_act_new[N_MAX], v_act_new[N_MAX],
            a_act_new[N_MAX], adot_act_new[N_MAX],
            a_act_tmp[N_MAX], adot_act_tmp[N_MAX];;
@ -765,6 +742,12 @@ int main(int argc, char *argv[])
    int    inf_event[N_MAX];
    double3 x_bbhc, v_bbhc;

+    /* some local settings for G6a board's */
+
+    int clusterid, numGPU, npipe=G6_NPIPE;
+
+    int    new_tunit=51, new_xunit=51;
+
    double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.

    /* INIT the rand() !!! */
@ -801,31 +784,19 @@ int main(int argc, char *argv[])
    }
    else
        ascii_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v);
+    
    std::iota(ind, ind+N, 0);

-    if (myRank == rootRank) {
-
-        //TODO move it out of (myRank == rootRank) so you don't need to communicate them.
    eps = config->eps;
+    eta = config->eta;
    t_end = config->t_end;
    dt_disk = config->dt_disk;
    dt_contr = config->dt_contr;
    dt_bh = config->dt_bh;
-        eta = config->eta;
-        strcpy(inp_fname, config->input_file_name.c_str());

+    if (myRank == rootRank) {
        if (config->binary_smbh_influence_sphere_output) for (int i=0; i<N; i++) inf_event[i] = 0; // WARNING N wasn't set yet!

-        /*
-        eps      : Plummer softening parameter (can be even 0)
-        t_end    : end time of calculation
-        dt_disk  : interval of snapshot files output (0xxx.dat)
-        dt_contr : interval for the energy control output (contr.dat)
-        dt_bh    : interval for BH output (bh.dat & bh_neighbors.dat)
-        eta      : parameter for timestep determination
-        inp_data : name of the input file (data.inp)
-        */
-
        printf("\n");
        printf("Begin the calculation of phi-GRAPE program on %03d processors\n", n_proc);
        printf("\n");
@ -872,19 +843,6 @@ int main(int argc, char *argv[])
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);

-    /* Broadcast all useful values to all processors... */
-    MPI_Bcast(&N,        1, MPI_INT,    rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(&eps,      1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(&eta,      1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(&t_end,    1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(&dt_disk,  1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(&dt_contr, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(&dt_bh,    1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(&time_cur, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-
-    /* Wait to all processors to finish his works... */
-    MPI_Barrier(MPI_COMM_WORLD);
-
    double normalization_mass=1, normalization_length=1, normalization_velocity=1;
    if (config->ext_units_physical) {
        normalization_mass = 1/config->unit_mass;
@ -915,8 +873,6 @@ int main(int argc, char *argv[])
    eta_s  = eta/ETA_S_CORR;
    eta_bh = eta/ETA_BH_CORR;

-    eps2 = SQR(eps);
-
    dt_min = 1.0*pow(2.0, DTMINPOWER);
    dt_max = 1.0*pow(2.0, DTMAXPOWER);

@ -942,25 +898,13 @@ int main(int argc, char *argv[])

    n_loc = N/n_proc;

+    Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
+
    Active_search active_search(myRank, n_proc, n_loc, N);

    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);

-    /* Broadcast the values of all particles to all processors... */
-    MPI_Bcast(ind, N, MPI_INT,    rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(m,   N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(x, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-    MPI_Bcast(v, 3*N, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
-
-    /* Wait to all processors to finish his works... */
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    /* Wait to all processors to finish his works... */
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    /* init the local GRAPE's */
-
    if (config->devices_per_node==0) {
        MPI_Comm shmcomm;
        MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
@ -973,6 +917,7 @@ int main(int argc, char *argv[])
    printf("Rank of the processor %03d : Number of GPUs %01d : Cluster ID %01d \n", myRank, numGPU, clusterid);
    fflush(stdout);

+    /* init the local GRAPEs */
    g6_open(clusterid);
    npipe = g6_npipes();
    g6_set_tunit(new_tunit);
@ -1011,26 +956,14 @@ int main(int argc, char *argv[])
    n_act = N;

    for (int i=0; i<n_act; i++) {
-        ind_act[i]  = ind[i];
+        ind_act[i]  = ind[i]; // isn't it just i?
        iii         = ind_act[i];
-        m_act[i]    = m[iii];
-
-        x_act[i] = x[iii];
-        v_act[i] = v[iii];

        t_act[i]  = t[iii];
-        dt_act[i] = dt[iii];
-
    } /* i */

-    // NOTE this is where calc_self_grav_zero() used to be.
-    calc_self_grav(time_cur, eps2, g6_calls, n_loc,
-                   n_act, ind_act,
-                   x_act, v_act,
-                   pot_act_tmp,
-                   a_act_tmp,
-                   adot_act_tmp,
-                   h2_i);
+    calc_self_grav(time_cur, n_act, ind_act, x, v,
+                           pot_act_tmp, a_act_tmp, adot_act_tmp);

    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
@ -1119,7 +1052,7 @@ int main(int argc, char *argv[])
        }
    }

-    calc_ext_grav(external_gravity_components, n_act, x_act, v_act, pot_ext, a, adot);
+    calc_ext_grav(external_gravity_components, n_act, x, v, pot_ext, a, adot);

    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
@ -1129,7 +1062,7 @@ int main(int argc, char *argv[])

    /* Energy control... */
    if (myRank == rootRank) {
-        energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
+        energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
    } /* if (myRank == rootRank) */

 #ifdef ETICS
@ -1216,8 +1149,6 @@ int main(int argc, char *argv[])
        n_act_distr[i-1] = 0.0;
    }

-    g6_calls = 0.0; //TODO this should include the calls at the zeroth step, so move it further up.
-
 #ifdef TIMING
    DT_TOT      = 0.0;

@ -1345,13 +1276,16 @@ int main(int argc, char *argv[])
        DT_ACT_PRED += (CPU_tmp_user - CPU_tmp_user0);
 #endif

-        calc_self_grav(min_t, eps2, g6_calls, n_loc,
-                n_act, ind_act,
-                x_act_new, v_act_new,
-                pot_act_tmp,
-                a_act_tmp,
-                adot_act_tmp,
-                h2_i);
+#ifdef TIMING
+        get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
+#endif
+        calc_self_grav(min_t, n_act, ind_act, x_act_new, v_act_new,
+                               pot_act_tmp, a_act_tmp, adot_act_tmp);
+
+#ifdef TIMING
+    get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
+    DT_ACT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
+#endif

        /* Reduce the "global" vectors from "local" on all the nodes */

@ -1527,7 +1461,7 @@ int main(int argc, char *argv[])
        if (time_cur >= t_contr) {
            if (myRank == rootRank) {

-                energy_contr(time_cur, timesteps, n_act_sum, g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
+                energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);

                /* write cont data */

@ -1600,7 +1534,8 @@ int main(int argc, char *argv[])
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);

-    MPI_Reduce(&g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
+    double g6_calls_sum;
+    MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);

    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
@ -1610,7 +1545,7 @@ int main(int argc, char *argv[])
        /* Write some output for the timestep annalize... */

        printf("\n");
-        printf("timesteps = %.0f   Total sum of integrated part. = %.0f   g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls);
+        printf("timesteps = %.0f   Total sum of integrated part. = %.0f   g6_calls on all nodes = %.0f \n", timesteps, n_act_sum, g6_calls_sum);
        printf("\n");
        printf("Real Speed = %.3f GFlops \n", 57.0*N*n_act_sum/(CPU_time_user-CPU_time_user0)/1.0E+09);
        fflush(stdout);