Cleaned up energy control as well as many other things

2020-04-29 21:51:05 -04:00 · 2020-04-29 21:51:05 -04:00 · d046c189b3
commit d046c189b3
parent df5d89a0c8
4 changed files with 99 additions and 7556 deletions
--- a/debug.h
+++ b/debug.h
@ -1,143 +0,0 @@
 /***************************************************************/
 void d_swap(double *a, double *b)
 {
 register double tmp;
 tmp = *a; *a = *b; *b = tmp;
 }
 void i_swap(int *a, int *b)
 {
 register int tmp;
 tmp = *a; *a = *b; *b = tmp;
 }
 /***************************************************************/
 /***************************************************************/
 void my_sort(int l, int r, double *arr, int *ind)
 {
 int    i, j, cikl;
 double tmp;
 i = l; j = r;
 tmp = arr[(l+r)/2];
 cikl = 1;
 while (cikl)
  {
    while (arr[i]<tmp) i++;
    while (tmp<arr[j]) j--;
    if (i<=j)
     {
 	d_swap(&arr[i],&arr[j]);
 	i_swap(&ind[i],&ind[j]);
 	i++; j--;
     }
    else
     {
 	cikl = 0;
     }
  }
  if (l<j) my_sort(l, j, arr, ind);
  if (i<r) my_sort(i, r, arr, ind);
 }
 /***************************************************************/
 /***************************************************************/
 double my_select(int n_1, int n_2, int k, double *arr, int *ind)
 {
 int   i, ir, j, l, mid, a_ind;
 double a;
 l  = n_1;
 ir = n_2;
 for(;;)
  {
  if (ir <= l+1) 
    {
    if (ir == l+1 && arr[ir] < arr[l]) 
      {
      d_swap(&arr[l],&arr[ir]);
      i_swap(&ind[l],&ind[ir]);
      }
    return(arr[k]);
    } 
    else 
    {
    mid=(l+ir) >> 1;
    d_swap(&arr[mid],&arr[l+1]);
    i_swap(&ind[mid],&ind[l+1]);
    if (arr[l+1] > arr[ir]) 
      {
      d_swap(&arr[l+1],&arr[ir]);
      i_swap(&ind[l+1],&ind[ir]);
      }
    if (arr[l] > arr[ir]) 
      {
      d_swap(&arr[l],&arr[ir]);
      i_swap(&ind[l],&ind[ir]);
      }
    if (arr[l+1] > arr[l])
      {
      d_swap(&arr[l+1],&arr[l]);
      i_swap(&ind[l+1],&ind[l]);
      }
    i = l+1;
    j = ir;
    a = arr[l];
    a_ind = ind[l];
    for (;;) 
     {
     do i++; while (arr[i] < a);
     do j--; while (arr[j] > a);
     if (j < i) break;
     d_swap(&arr[i],&arr[j]);
     i_swap(&ind[i],&ind[j]);
     }
    arr[l] = arr[j];
    ind[l] = ind[j];
    arr[j] = a;
    ind[j] = a_ind;
    if (j >= k) ir = j-1;
    if (j <= k) l = i;
    }
  }
 }
 /***************************************************************/
 /***************************************************************/
 /*
 double lagrange_radius(double percent)
 { 
 int    Nb;
 double tmp;
 Nb = (int)(percent * N);
 my_sort(0, N-1, d, ind);
 tmp = my_select(0, N-1, Nb-1, d, ind); d[Nb-1] = tmp;
 return(tmp);
 }
 */
 /***************************************************************/
--- a/io.cpp
+++ b/io.cpp
@ -24,7 +24,7 @@ bool is_hdf5(std::string file_name)
    return result;
 }
-void ascii_read(const std::string file_name, int *step_num, int *N, double *t, double *m, double3 *x, double3 *v)
+void ascii_read(const std::string file_name, int& step_num, int& N, double& t, double **m, double3 **x, double3 **v)
 {
    char rest[512];
    int result;
@ -34,21 +34,25 @@ void ascii_read(const std::string file_name, int *step_num, int *N, double *t, d
    std::string str;
    std::getline(file, str);
-    result = sscanf(str.c_str(), "%d%s", step_num, rest);
+    result = sscanf(str.c_str(), "%d%s", &step_num, rest);
    if (result!=1) throw std::runtime_error("Error parsing line 1: expected one integer");
    std::getline(file, str);
-    result = sscanf(str.c_str(), "%d%s", N, rest);
+    result = sscanf(str.c_str(), "%d%s", &N, rest);
    if (result!=1) throw std::runtime_error("Error parsing line 2: expected one integer");
    std::getline(file, str);
-    result = sscanf(str.c_str(), "%lf%s", t, rest);
+    result = sscanf(str.c_str(), "%lf%s", &t, rest);
    if (result!=1) throw std::runtime_error("Error parsing line 3: expected one real number");
    *m = new double[N];
    *x = new double3[N];
    *v = new double3[N];
    int i = -1;
    while (std::getline(file, str)) {
-        if (++i > *N) throw std::runtime_error("Error parsing line " + std::to_string(i+4) + ": particle out of range");
+        if (++i > N) throw std::runtime_error("Error parsing line " + std::to_string(i+4) + ": particle out of range");
-        result = sscanf(str.c_str(), "%*s %lf %lf %lf %lf %lf %lf %lf%s", &m[i], &x[i][0], &x[i][1], &x[i][2], &v[i][0], &v[i][1], &v[i][2], rest);
+        result = sscanf(str.c_str(), "%*s %lf %lf %lf %lf %lf %lf %lf%s", &(*m)[i], &(*x)[i][0], &(*x)[i][1], &(*x)[i][2], &(*v)[i][0], &(*v)[i][1], &(*v)[i][2], rest);
    }
    file.close();
 }
@ -145,7 +149,7 @@ void h5_read(const std::string file_name, int *step_num, int *N, double *t, doub
 void h5_write(const std::string file_name, const int step_num, const int N, const double t, const double *m, const double3 *x, const double3 *v, const double *pot, const double3 *acc, const double3 *jrk, const int extra_mode=0, const bool use_double_precision=true)
 {
 #ifdef HAS_HDF5
-    hid_t file_id, group_id, attribute_id, dataset_id, dataspace_id;
+    hid_t file_id, group_id, attribute_id, dataspace_id;
    hsize_t dims[2] = {(hsize_t)N, 3};
    hid_t h5_float_type;
--- a/phigrape.cpp
+++ b/phigrape.cpp
@ -53,18 +53,6 @@ Coded by       : Peter Berczik
 Version number : 19.04
 Last redaction : 2019.04.16 12:55
 *****************************************************************************/
 #include "double3.h"
 #include "black_holes.h"
 #include "config.h"
 #include "io.h"
 #include "external.h"
 Config *config;
 #ifdef ETICS
 #include "grapite.h"
 #endif
 #define TIMING
 #define ETA_S_CORR   4.0
@ -73,42 +61,37 @@ Config *config;
 #define DTMAXPOWER  -3.0
 #define DTMINPOWER -36.0
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include <time.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <algorithm>
 #include <math.h>
 #include <mpi.h>
 #include <numeric>
 #include <stdexcept>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <time.h>
 #include "black_holes.h"
 #include "config.h"
 #include "double3.h"
 #include "external.h"
 #include "grape6.h"
 #include "io.h"
-#include <mpi.h>
+#ifdef ETICS
 #include "grapite.h"
 #endif
-/* Some "good" functions and constants... */
+Config *config;
 // TODO replace with inline functions
 #define MIN(a,b) (((a)<(b)) ? (a):(b) )
 #define SQR(x)   ((x)*(x) )
 #define KB         1024
 #define MB         (KB*KB)
 #define N_MAX      (32*KB)
 // double L[3]; // needed in pn_bh_spin.c
 // Needed for things related to BHs
 #include "debug.h"
 // These are used in the energy control, could be static but will probably be removed in the end anyway
 double CPU_time_real0, CPU_time_user0, CPU_time_syst0;
 double CPU_time_real,  CPU_time_user,  CPU_time_syst;
 #ifdef TIMING
-
+// TODO clean up here
 double CPU_tmp_real0, CPU_tmp_user0, CPU_tmp_syst0;
 double CPU_tmp_real,  CPU_tmp_user,  CPU_tmp_syst;
@ -118,14 +101,7 @@ double DT_TOT,
    DT_GMC_GRAV, DT_GMC_GMC_GRAV, DT_EXT_GMC_GRAV,
    DT_ACT_CORR, DT_ACT_LOAD,
    DT_STEVOL, DT_STARDISK, DT_STARDESTR;
 double DT_ACT_REDUCE;
 #endif
 #ifdef ETICS
 int grapite_cep_index;
 #endif
 void get_CPU_time(double *time_real, double *time_user, double *time_syst)
@ -178,110 +154,66 @@ private:
    std::vector<double> h2;
 };
-void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n_act, double3 *x_act_new, double3 *v_act_new, double *pot_act_ext, double3 *a_act_new, double3* adot_act_new)
+void calc_ext_grav(std::vector<External_gravity*> &external_gravity_components, int n, double3 *x, double3 *v, double *pot, double3 *acc, double3* jrk)
 // TODO should just be a class that has this pointer array as a member
 {
 #ifdef TIMING
    get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
 #endif
-    /* Define the external potential for all active particles on all nodes */
+    std::fill(pot, pot+n, 0.);
    std::fill(pot_act_ext, pot_act_ext+n_act, 0.);
    for (auto component : external_gravity_components) {
        if (component->is_active)
-            component->apply(n_act, x_act_new, v_act_new, pot_act_ext, a_act_new, adot_act_new);
+            component->apply(n, x, v, pot, acc, jrk);
    }
    /* For simple Plummer potential... */
 #ifdef TIMING
    get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
    DT_EXT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
 #endif
 }
-void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, double& rcm_sum, double& vcm_sum, double& E_tot_0, double& E_tot_corr_0, double& E_tot_corr_sd_0, int &skip_con, int N, double m[], double3 x[], double3 v[], double pot[], double pot_ext[])
+void energy_contr(const double time_cur, const double timesteps, const double n_act_sum, const double g6_calls, int N, double m[], double3 x[], double3 v[], double pot[], double pot_ext[])
 {
-    // TODO maybe use static variables here for the previous step energy?
+     double E_pot = 0;
    double E_pot = 0.0;
    for (int i=0; i<N; i++) E_pot += m[i]*pot[i];
    E_pot *= 0.5;
-    double E_kin = 0.0;
+    double E_kin = 0;
-    for (int i=0; i<N; i++) E_kin += m[i]*( SQR(v[i][0]) + SQR(v[i][1]) + SQR(v[i][2]) );
+    for (int i=0; i<N; i++) E_kin += m[i]*v[i].norm2();
    E_kin *= 0.5;
-    double E_pot_ext = 0.0;
+    double E_pot_ext = 0;
    for (int i=0; i<N; i++) E_pot_ext += m[i]*pot_ext[i];
-    if (timesteps == 0.0) { //TODO what is this thing?
+    double m_tot = 0;
        rcm_sum = 0.0;
        vcm_sum = 0.0;
    }
    double mcm     = 0.0;
    double rcm_mod = 0.0;
    double vcm_mod = 0.0;
    double3 xcm = {0, 0, 0};
    double3 vcm = {0, 0, 0};
    for (int i=0; i<N; i++) {
-        mcm += m[i];
+        m_tot += m[i];
        xcm += m[i] * x[i];
        vcm += m[i] * v[i];
-    } /* i */
+    }
-
+    xcm /= m_tot;
-    xcm /= mcm;
+    vcm /= m_tot;
-    vcm /= mcm;
+    double rcm_mod = xcm.norm();
-
+    double vcm_mod = vcm.norm();
    rcm_mod = xcm.norm();
    vcm_mod = vcm.norm();
    rcm_sum += rcm_mod;
    vcm_sum += vcm_mod;
    double3 mom = {0, 0, 0};
    for (int i=0; i<N; i++) {
        mom[0] += m[i] * (x[i][1]* v[i][2] - x[i][2]*v[i][1]);
        mom[1] += m[i] * (x[i][2]* v[i][0] - x[i][0]*v[i][2]);
        mom[2] += m[i] * (x[i][0]* v[i][1] - x[i][1]*v[i][0]);
-    } /* i */
+    }
    get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst);
    double E_tot = E_pot + E_kin + E_pot_ext;
    double E_tot_corr    = E_tot /* + E_corr */; // TODO what is this?
-    if (timesteps == 0.0) {
+    static double E_tot_prev;
-        /* initialize the E_tot_0 + etc... */
+    if (timesteps == 0.0) E_tot_prev = E_tot;
-        E_tot_0 = E_tot;
+    double DE_tot = E_tot - E_tot_prev;
        E_tot_corr_0 = E_tot;
        E_tot_corr_sd_0 = E_tot;
        skip_con = 1;
    } else {
        /* read the E_tot_0 + etc... */
        if (skip_con == 0) {
            auto inp = fopen("contr.con","r");
            double tmp;
            fscanf(inp,"%lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE",
                   &tmp, &tmp, &tmp, &tmp,
                   &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp,
                   &E_tot_0, &tmp,
                   &E_tot_corr_0, &tmp,
                   &E_tot_corr_sd_0, &tmp,
                   &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp);
            fclose(inp);
            skip_con = 1;
        }
    }
    double DE_tot         = E_tot         - E_tot_0;
    /* This is the only output to screen */
    printf("%.3E %.3E  % .4E %.4E % .4E  % .4E % .4E  %.2E\n",
@ -301,19 +233,7 @@ void energy_contr(const double time_cur, const double timesteps, const double n_
            CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
    fclose(out);
-    out = fopen("contr.con", "w");
+    E_tot_prev = E_tot;
    fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
            time_cur, timesteps, n_act_sum, g6_calls,
            E_pot, E_kin, E_pot_ext,
            E_tot, DE_tot,
            rcm_mod, vcm_mod,
            mom[0], mom[1], mom[2],
            CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
    fclose(out);
    E_tot_0 = E_tot;
    E_tot_corr_0 = E_tot_corr;
    E_tot_corr_sd_0 = 0;
 }
 class Active_search {
@ -370,108 +290,50 @@ private:
    int *ind_act_loc;
 };
-class Source_particle_list {
+inline void calc_high_derivatives(const double dt_tmp, const double3 a_old, const double3 a_new, const double3 a1_old, const double3 a1_new, double3& a2, double3& a3)
 public:
    Source_particle_list(int N)
        : N(N)
 {
-        ind  = new int[N];
+    double dtinv = 1/dt_tmp;
        m    = new double[N];
        x    = new double3[N];
        v    = new double3[N];
        pot  = new double[N];
        a    = new double3[N];
        adot = new double3[N];
        t    = new double[N];
        dt   = new double[N];
    }
    ~Source_particle_list()
    {
        delete[] ind;
        delete[] m;
        delete[] x;
        delete[] v;
        delete[] pot;
        delete[] a;
        delete[] adot;
        delete[] t;
        delete[] dt;
    }
    int N;
    int *ind;
    double3 *x, *v, *a, *adot;
    double *m, *t, *dt, *pot;
 };
 inline void calc_high_derivatives(const double dt_tmp, const double3 a_act, const double3 a_act_new, const double3 adot_act, const double3 adot_act_new, double3 *a2, double3 *a3)
 {
    double dtinv = 1.0/dt_tmp;
    double dt2inv = dtinv*dtinv;
    double dt3inv = dt2inv*dtinv;
-    double3 a0mia1 = a_act-a_act_new;
+    double3 a0mia1 = a_old-a_new;
-    double3 ad04plad12 = 4.0*adot_act + 2.0*adot_act_new;
+    double3 ad04plad12 = 4*a1_old + 2*a1_new;
-    double3 ad0plad1 = adot_act + adot_act_new;
+    double3 ad0plad1 = a1_old + a1_new;
-    *a2 = -6.0*a0mia1*dt2inv - ad04plad12*dtinv;
+    a2 = -6*a0mia1*dt2inv - ad04plad12*dtinv;
-    *a3 = 12.0*a0mia1*dt3inv + 6.0*ad0plad1*dt2inv;
+    a3 = 12*a0mia1*dt3inv + 6*ad0plad1*dt2inv;
 }
-inline void corrector(const double dt_tmp, const double3 a2, const double3 a3, double3* x_act_new, double3* v_act_new)
+inline void corrector(const double dt_tmp, const double3 a2, const double3 a3, double3& x, double3& v)
 {
    double dt3over6 = dt_tmp*dt_tmp*dt_tmp/6.0;
    double dt4over24 = dt3over6*dt_tmp/4.0;
    double dt5over120 = dt4over24*dt_tmp/5.0;
-    *x_act_new += dt4over24*a2 + dt5over120*a3;
+    x += dt4over24*a2 + dt5over120*a3;
-    *v_act_new += dt3over6*a2 + dt4over24*a3;
+    v += dt3over6*a2 + dt4over24*a3;
 }
-inline double aarseth_step(const double eta, const double dt_tmp, const double3 a_act_new, const double3 adot_act_new, const double3 a2, const double3 a3)
+inline double aarseth_step(const double eta, const double dt, const double3 a, const double3 a1, const double3 a2, const double3 a3)
 {
-    double a1abs = a_act_new.norm();
+    double a1abs = a.norm();
-    double adot1abs = adot_act_new.norm();
+    double adot1abs = a1.norm();
-    double3 a2dot1 = a2 + dt_tmp*a3;
+    double3 a2dot1 = a2 + dt*a3;
    double a2dot1abs = a2dot1.norm();
    double a3dot1abs = a3.norm();
    return sqrt(eta*(a1abs*a2dot1abs+adot1abs*adot1abs)/(adot1abs*a3dot1abs+a2dot1abs*a2dot1abs));
 }
 int main(int argc, char *argv[])
 {
-    int skip_con=0; // skip_con should just be a static in the energy control function
+    double timesteps=0.0, n_act_sum=0.0;
    double E_tot_0, E_tot_corr_0, E_tot_corr_sd_0,
           rcm_sum=0.0, vcm_sum=0.0,
           timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX];
    double3 xcm, vcm, xdc, vdc; // these should go away
    /* data for active particles */
    int    n_act, ind_act[N_MAX];
    double t_act[N_MAX],
           pot_act_new[N_MAX],
           pot_act_tmp[N_MAX];
    // x_act_new and v_act_new arrays hold the predicted position and velocity of i-particles, which is later corrected before moving into the j-particle memory. The _old arrays hold the previous values which are needed to calculate the high derivatives. The _old arrays hold the previous values which are needed to calculate the high derivatives. The [a,adot]_act_tmp arrays hold the calculation results from each node. The [a,adot]_act_new arrays hold the reduced calculation results from all nodes.
    double3 x_act_new[N_MAX], v_act_new[N_MAX],
            a_act_old[N_MAX], adot_act_old[N_MAX],
            a_act_tmp[N_MAX], adot_act_tmp[N_MAX],
            a_act_new[N_MAX], adot_act_new[N_MAX];
    double pot_ext[N_MAX], pot_act_ext[N_MAX];    // for all EXTPOT
    int    i_bh1, i_bh2;
    double m_bh1, m_bh2;
    int    inf_event[N_MAX];
    double3 x_bbhc, v_bbhc;
    int    new_tunit=51, new_xunit=51;
    double3 zeros = {0, 0, 0}; // Dummy; can't really be const because of the GRAPE interface.
    /* INIT the rand() !!! */
@ -494,26 +356,13 @@ int main(int argc, char *argv[])
    /* Print the Rank and the names of processors */
    printf("Rank of the processor %03d on %s \n", myRank, processor_name);
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    /* read the input parameters */
    config = new Config("phigrape.conf");
    Source_particle_list source_particle_list(N_MAX);
    int     *ind  = source_particle_list.ind;
    double  *m    = source_particle_list.m;
    double3 *x    = source_particle_list.x;
    double3 *v    = source_particle_list.v;
    double  *pot  = source_particle_list.pot;
    double3 *a    = source_particle_list.a;
    double3 *adot = source_particle_list.adot;
    double  *t    = source_particle_list.t;
    double  *dt   = source_particle_list.dt;
    int diskstep, N;
    double time_cur;
    double *m;
    double3 *x, *v;
    if (is_hdf5(config->input_file_name)) {
 #ifndef HAS_HDF5
        fprintf(stderr, "ERROR: input file is in HDF5 format, but the code was compiled without HDF5 support\n");
@ -522,8 +371,20 @@ int main(int argc, char *argv[])
        h5_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v);
    }
    else
-        ascii_read(config->input_file_name, &diskstep, &N, &time_cur, m, x, v);
+        ascii_read(config->input_file_name, diskstep, N, time_cur, &m, &x, &v);
    int *ind  = new int[N];
    std::iota(ind, ind+N, 0);
    double3 *a = new double3[N], *adot = new double3[N];
    double *pot  = new double[N], *pot_ext = new double[N], *t = new double[N], *dt = new double[N];
    /* data for active particles */
    // x_act_new and v_act_new arrays hold the predicted position and velocity of i-particles, which is later corrected before moving into the j-particle memory. The [pot,a,adot]_act_tmp arrays hold the calculation results from each node. The [pot,a,adot]_act_new arrays hold the reduced calculation results from all nodes.
    int n_act, *ind_act = new int[N];
    double *pot_act_new = new double[N], *pot_act_tmp = new double[N], *pot_act_ext = new double[N];
    double3 *x_act_new = new double3[N], *v_act_new    = new double3[N],
            *a_act_tmp = new double3[N], *adot_act_tmp = new double3[N],
            *a_act_new = new double3[N], *adot_act_new = new double3[N];
    double eps = config->eps;
    double eta = config->eta;
@ -558,19 +419,12 @@ int main(int argc, char *argv[])
                out = fopen("bh_neighbors.dat", "w");
                fclose(out);
            }
            // if (config->binary_smbh_influence_sphere_output) {
            //     out = fopen("bbh_inf.dat", "w");
            //     fclose(out);
            // }
        }
        get_CPU_time(&CPU_time_real0, &CPU_time_user0, &CPU_time_syst0);
    } /* if (myRank == rootRank) */
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    double normalization_mass=1, normalization_length=1, normalization_velocity=1;
    if (config->ext_units_physical) {
        normalization_mass = 1/config->unit_mass;
@ -624,10 +478,6 @@ int main(int argc, char *argv[])
    std::fill(t,  t+N,  time_cur);
    std::fill(dt, dt+N, dt_min);
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    /* some local settings for G6a boards */
    int clusterid, numGPU;
    if (config->devices_per_node==0) {
@ -645,8 +495,8 @@ int main(int argc, char *argv[])
    /* init the local GRAPEs */
    g6_open(clusterid);
    int npipe = g6_npipes();
-    g6_set_tunit(new_tunit);
+    g6_set_tunit(51);
-    g6_set_xunit(new_xunit);
+    g6_set_xunit(51);
    int n_loc = N/n_proc;
    Calc_self_grav calc_self_grav(N, n_loc, clusterid, npipe, eps);
@ -685,7 +535,7 @@ int main(int argc, char *argv[])
    MPI_Bcast(&etics_length_scale, 1, MPI_DOUBLE, rootRank, MPI_COMM_WORLD);
    grapite_set_length_scale(etics_length_scale);
-    grapite_cep_index = grapite_get_cep_index();
+    int grapite_cep_index = grapite_get_cep_index();
    if (grapite_cep_index >= 0) {
        grapite_calc_center(N, m, (double(*)[3])x, (double(*)[3])v, xcm, vcm, xdc, vdc);
        x[grapite_cep_index] = xdc;
@ -695,20 +545,14 @@ int main(int argc, char *argv[])
 #endif
    /* define the all particles as a active on all the processors for the first time grav calc. */
    calc_self_grav(time_cur, N, ind, x, v, pot_act_tmp, a_act_tmp, adot_act_tmp);
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    /* Reduce the "global" vectors from "local" on all processors) */
    // TODO why won't we do the MPI_Allreduce inside the calc_self_grav function, and get rid of these _tmp arrays?
    MPI_Allreduce(pot_act_tmp,  pot,    N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    MPI_Allreduce(a_act_tmp,    a,    3*N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    MPI_Allreduce(adot_act_tmp, adot, 3*N, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    if (config->live_smbh_count == 2) {
        black_hole_physics.set_xv(x[0], x[1], v[0], v[1]);
        if (config->live_smbh_custom_eps >= 0) black_hole_physics.adjust_softening(pot[0], pot[1], a[0], a[1], adot[0], adot[1]);
@ -717,14 +561,8 @@ int main(int argc, char *argv[])
    calc_ext_grav(external_gravity_components, N, x, v, pot_ext, a, adot);
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    /* Energy control... */
-    if (myRank == rootRank) {
+    if (myRank == rootRank) energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext);
        energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
    } /* if (myRank == rootRank) */
 #ifdef ETICS
    if (config->etics_dump_coeffs && (diskstep==0)) {
@ -741,9 +579,6 @@ int main(int argc, char *argv[])
    }
 #endif
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    /* Define initial timestep for all particles on all nodes */
    for (int j=0; j<N; j++) {
@ -776,11 +611,7 @@ int main(int argc, char *argv[])
        for (int i=0; i<config->live_smbh_count; i++) dt[i] = min_dt;
    }
-    /* Wait to all processors to finish his works... */
+    /* load the new values for particles to the local GRAPEs */
    MPI_Barrier(MPI_COMM_WORLD);
    /* load the new values for particles to the local GRAPE's */
    /* load the nj particles to the G6 */
    for (int k=0; k<n_loc; k++) {
        int j = k + myRank*n_loc;
@ -807,10 +638,6 @@ int main(int argc, char *argv[])
    timesteps = 0.0; // Why won't those two be long long instead of double + should include the zeroth step
    n_act_sum = 0.0;
    for (int i=1; i<N+1; i++) { // TODO why like this?
        n_act_distr[i-1] = 0.0;
    }
 #ifdef TIMING
    DT_TOT      = 0.0;
@ -895,29 +722,6 @@ int main(int argc, char *argv[])
        DT_ACT_DEF2 += (CPU_tmp_user - CPU_tmp_user0);
 #endif
 #ifdef TIMING
        get_CPU_time(&CPU_tmp_real0, &CPU_tmp_user0, &CPU_tmp_syst0);
 #endif
        for (int i=0; i<n_act; i++) {
            int j_act = ind_act[i];
            // NOTICE much of these are just not needed!
            // m_act[i] = m[j_act];
            // x_act[i] = x[j_act];
            // v_act[i] = v[j_act];
            t_act[i]  = t[j_act];
            //dt_act[i] = dt[j_act];
            // NOTICE Why do we need pot_act and pot_act_ext? Probably redundant.
            //pot_act[i] = pot[j_act];
            pot_act_ext[i] = pot_ext[j_act]; // TODO almost certainly redundant
            a_act_old[i] = a[j_act];
            adot_act_old[i] = adot[j_act];
        } /* i */
 #ifdef TIMING
        get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
        DT_ACT_DEF3 += (CPU_tmp_user - CPU_tmp_user0);
 #endif
        /* predict the active particles positions etc... on all the nodes */
 #ifdef TIMING
@ -959,9 +763,6 @@ int main(int argc, char *argv[])
        MPI_Allreduce(a_act_tmp,    a_act_new,    3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
        MPI_Allreduce(adot_act_tmp, adot_act_new, 3*n_act, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
        /* Wait to all processors to finish his works... */
        MPI_Barrier(MPI_COMM_WORLD);
 #ifdef TIMING
        get_CPU_time(&CPU_tmp_real, &CPU_tmp_user, &CPU_tmp_syst);
        DT_ACT_REDUCE += (CPU_tmp_user - CPU_tmp_user0);
@ -986,12 +787,13 @@ int main(int argc, char *argv[])
            // NOTICE looks like we're doing three unrelated things in this loop: (1) correcting positions and velocities (2) calculating new steps, and (3) putting the corrected values from the _act_new back in the _act arrays.
            // After going back to the _act arrays they don't do much before they go back to the main arrays, so this copy seems redundant (the SMBH influence sphere printout needs these values but it should be a function anyway).
            // TODO split this loop into the three tasks it is doing, and remove the redundancy.
-            double dt_tmp = min_t - t_act[i];
+            int j_act = ind_act[i];
            double dt_tmp = min_t - t[j_act];
            double3 a2, a3;
-            calc_high_derivatives(dt_tmp, a_act_old[i], a_act_new[i], adot_act_old[i], adot_act_new[i], &a2, &a3);
+            calc_high_derivatives(dt_tmp, a[j_act], a_act_new[i], adot[j_act], adot_act_new[i], a2, a3);
-            corrector(dt_tmp, a2, a3, &x_act_new[i], &v_act_new[i]);
+            corrector(dt_tmp, a2, a3, x_act_new[i], v_act_new[i]);
            //TODO make beautiful
            double eta_curr;
@ -1019,13 +821,12 @@ int main(int argc, char *argv[])
            }
            if (dt_tmp < min_dt) min_dt = dt_tmp;
            int j_act = ind_act[i];
            x[j_act] = x_act_new[i];
            v[j_act] = v_act_new[i];
            t[j_act] = min_t;
            dt[j_act] = dt_tmp;
            pot[j_act] = pot_act_new[i];
-            pot_ext[j_act] = pot_act_ext[i]; // ???
+            pot_ext[j_act] = pot_act_ext[i];
            a[j_act] = a_act_new[i];
            adot[j_act] = adot_act_new[i];
        } /* i */
@ -1080,7 +881,6 @@ int main(int argc, char *argv[])
        timesteps += 1.0;
        n_act_sum += n_act;
        n_act_distr[n_act-1]++;
        if (time_cur >= t_bh) {
            if (myRank == rootRank) {
@ -1098,15 +898,13 @@ int main(int argc, char *argv[])
        if (time_cur >= t_contr) {
            if (myRank == rootRank) {
-                energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, rcm_sum, vcm_sum, E_tot_0, E_tot_corr_0, E_tot_corr_sd_0, skip_con, N, m, x, v, pot, pot_ext);
+                energy_contr(time_cur, timesteps, n_act_sum, calc_self_grav.g6_calls, N, m, x, v, pot, pot_ext);
                /* write cont data */
                if (config->output_hdf5) h5_write("data.con", diskstep, N, time_cur, m, x, v, pot, a, adot, 0, true);
                else ascii_write("data.con", diskstep, N, time_cur, m, x, v, 16);
                /* possible OUT for timing !!! */
 #ifdef TIMING
                FILE *out = fopen("timing.dat", "a");
@ -1168,15 +966,9 @@ int main(int argc, char *argv[])
    /* close the local GRAPEs */
    g6_close(clusterid);
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    double g6_calls_sum;
    MPI_Reduce(&calc_self_grav.g6_calls, &g6_calls_sum, 1, MPI_DOUBLE, MPI_SUM, rootRank, MPI_COMM_WORLD);
    /* Wait to all processors to finish his works... */
    MPI_Barrier(MPI_COMM_WORLD);
    if (myRank == rootRank) {
        /* Write some output for the timestep annalize... */
@ -1189,8 +981,7 @@ int main(int argc, char *argv[])
    } /* if (myRank == rootRank) */
-    /* Wait to all processors to finish his works... */
+    delete[] m; delete[] x; delete[] v; delete[] ind; delete[] a; delete[] adot; delete[] pot; delete[] pot_ext; delete[] t; delete[] dt; delete[] ind_act; delete[] pot_act_new; delete[] pot_act_tmp; delete[] x_act_new; delete[] v_act_new; delete[] a_act_tmp; delete[] adot_act_tmp; delete[] a_act_new; delete[] adot_act_new; delete[] pot_act_ext;
    MPI_Barrier(MPI_COMM_WORLD);
    /* Finalize the MPI work */
    MPI_Finalize();
--- a/sse_sse.f
+++ b/sse_sse.f