Introduced double3 structure in preparation of dynamic allocation and getting rid of k-loops

This commit is contained in:
Yohai Meiron 2020-03-13 20:42:09 -04:00
parent 4b8ef0ca61
commit 34030c06d0

View file

@ -220,6 +220,12 @@ extern void qwerty_(double *aaa, double *aaapars);
#define N_MAX (6*MB)
#define N_MAX_loc (2*MB)
struct double3 {
double data[3];
double& operator[](int i) {return data[i];}
operator double*() {return data;}
};
int name_proc, n_proc=1, myRank=0, rootRank=0, cur_rank,
i, j, k, ni, nj, diskstep=0, power, jjj, iii,
skip_con=0, tmp_i;
@ -235,14 +241,10 @@ double dt_disk, dt_contr, t_disk=0.0, t_contr=0.0,
mcm, rcm_mod, vcm_mod,
rcm_sum=0.0, vcm_sum=0.0,
eps=0.0, eps2,
xcm[3], vcm[3], mom[3],
xdc[3], vdc[2],
over2=(1.0/2.0), over3=(1.0/3.0), over6=(1.0/6.0),
a2_mod, adot2_mod,
dt_tmp, dt2half, dt3over6, dt4over24, dt5over120,
dtinv, dt2inv, dt3inv,
a0mia1, ad04plad12, ad0plad1,
a2[3], a3[3], a2dot1[3],
a1abs, adot1abs, a2dot1abs, a3dot1abs,
Timesteps=0.0, n_act_sum=0.0, n_act_distr[N_MAX], g6_calls=0.0, g6_calls_sum=0.0,
tmp, tmp_r, tmp_v, tmp_rv, tmp_cpu,
@ -251,6 +253,10 @@ double dt_disk, dt_contr, t_disk=0.0, t_contr=0.0,
tmp_a_bh_pn0, tmp_a_bh_pn1, tmp_a_bh_pn2, tmp_a_bh_pn2_5, tmp_a_bh_pn3, tmp_a_bh_pn3_5, tmp_a_bh_spin,
tmp_adot_bh_pn0, tmp_adot_bh_pn1, tmp_adot_bh_pn2, tmp_adot_bh_pn2_5, tmp_adot_bh_pn3, tmp_adot_bh_pn3_5, tmp_adot_bh_spin;
double3 xcm, vcm, mom,
xdc, vdc,
a2, a3, a2dot1;
char processor_name[MPI_MAX_PROCESSOR_NAME],
inp_fname[30],
out_fname[30], dbg_fname[30];
@ -259,28 +265,31 @@ char processor_name[MPI_MAX_PROCESSOR_NAME],
int N, N_star, N_bh,
ind[N_MAX], name[N_MAX];
double m[N_MAX], x[N_MAX][3], v[N_MAX][3],
pot[N_MAX], a[N_MAX][3], adot[N_MAX][3],
t[N_MAX], dt[N_MAX];
double m[N_MAX], pot[N_MAX], t[N_MAX], dt[N_MAX];
double3 x[N_MAX], v[N_MAX], a[N_MAX], adot[N_MAX];
/* local variables */
int n_loc, ind_loc[N_MAX_loc];
double m_loc[N_MAX_loc], x_loc[N_MAX_loc][3], v_loc[N_MAX_loc][3],
pot_loc[N_MAX_loc], a_loc[N_MAX_loc][3], adot_loc[N_MAX_loc][3],
t_loc[N_MAX_loc], dt_loc[N_MAX_loc];
double m_loc[N_MAX_loc], pot_loc[N_MAX_loc], t_loc[N_MAX_loc], dt_loc[N_MAX_loc];
double3 x_loc[N_MAX_loc], v_loc[N_MAX_loc],
a_loc[N_MAX_loc], adot_loc[N_MAX_loc];
/* data for active particles */
int n_act, ind_act[N_MAX];
double m_act[N_MAX],
x_act[N_MAX][3], v_act[N_MAX][3],
pot_act[N_MAX], a_act[N_MAX][3], adot_act[N_MAX][3],
t_act[N_MAX], dt_act[N_MAX],
x_act_new[N_MAX][3], v_act_new[N_MAX][3],
pot_act_new[N_MAX], a_act_new[N_MAX][3], adot_act_new[N_MAX][3],
pot_act_tmp[N_MAX], a_act_tmp[N_MAX][3], adot_act_tmp[N_MAX][3],
pot_act_tmp_loc[N_MAX], a_act_tmp_loc[N_MAX][3], adot_act_tmp_loc[N_MAX][3];
pot_act[N_MAX], t_act[N_MAX], dt_act[N_MAX],
pot_act_new[N_MAX],
pot_act_tmp[N_MAX],
pot_act_tmp_loc[N_MAX];
double3 x_act[N_MAX], v_act[N_MAX],
a_act[N_MAX], adot_act[N_MAX],
x_act_new[N_MAX], v_act_new[N_MAX],
a_act_new[N_MAX], adot_act_new[N_MAX],
a_act_tmp[N_MAX], adot_act_tmp[N_MAX],
a_act_tmp_loc[N_MAX], adot_act_tmp_loc[N_MAX];
FILE *inp, *out, *tmp_file, *dbg;
@ -308,13 +317,15 @@ double DT_ACT_REDUCE;
int clusterid, ii, nn, numGPU;
int npipe=G6_NPIPE, index_i[G6_NPIPE];
double h2_i[G6_NPIPE], x_i[G6_NPIPE][3], v_i[G6_NPIPE][3],
p_i[G6_NPIPE], a_i[G6_NPIPE][3], jerk_i[G6_NPIPE][3];
double h2_i[G6_NPIPE], p_i[G6_NPIPE];
double3 x_i[G6_NPIPE], v_i[G6_NPIPE],
a_i[G6_NPIPE], jerk_i[G6_NPIPE];
int new_tunit=51, new_xunit=51;
int aflag=1, jflag=1, pflag=1;
double ti=0.0, a2by18[3], a1by6[3], aby2[3];
double ti=0.0;
double3 a2by18, a1by6, aby2;
/* normalization... */
@ -1223,8 +1234,9 @@ void calc_self_grav(double t)
for (ii=0; ii<nn; ii++) {
h2_i[ii] = eps2; // TODO This should be a global or something
} /* ii */
g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, x_act_new+i, v_act_new+i, a_act_tmp+i, adot_act_tmp+i, pot_act_tmp+i, eps2, h2_i);
g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, x_act_new+i, v_act_new+i, eps2, h2_i, a_act_tmp+i, adot_act_tmp+i, pot_act_tmp+i);
//TODO any way we can clean up this ugly casting?
g6calc_firsthalf(clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i, eps2, h2_i);
g6calc_lasthalf( clusterid, n_loc, nn, ind_act+i, (double(*)[3])x_act_new+i, (double(*)[3])v_act_new+i, eps2, h2_i, (double(*)[3])a_act_tmp+i, (double(*)[3])adot_act_tmp+i, pot_act_tmp+i);
g6_calls++;
} /* i */
@ -2372,7 +2384,7 @@ int main(int argc, char *argv[])
for (j=0; j<nj; j++) {
for (k=0;k<3;k++) {
a2by18[k] = 0.0; a1by6[k] = adot_loc[j][k]*over6; aby2[k] = a_loc[j][k]*over2;
a2by18[k] = 0.0; a1by6[k] = adot_loc[j][k]*(1./6.); aby2[k] = a_loc[j][k]*0.5;
} /* k */
g6_set_j_particle(clusterid, j, ind_loc[j], t_loc[j], dt_loc[j], m_loc[j], a2by18, a1by6, aby2, v_loc[j], x_loc[j]);
} /* j */
@ -2576,8 +2588,8 @@ int main(int argc, char *argv[])
for (i=0; i<n_act; i++) {
dt_tmp = min_t - t_act[i];
dt2half = over2*dt_tmp*dt_tmp;
dt3over6 = over3*dt_tmp*dt2half;
dt2half = 0.5*dt_tmp*dt_tmp;
dt3over6 = (1./3.)*dt_tmp*dt2half;
for (k=0;k<3;k++) {
x_act_new[i][k] = x_act[i][k] + v_act[i][k]*dt_tmp + a_act[i][k]*dt2half + adot_act[i][k]*dt3over6;
@ -2943,7 +2955,7 @@ int main(int argc, char *argv[])
jjj = ind_act[j] - myRank*n_loc;
for (k=0;k<3;k++) {
a2by18[k] = 0.0; a1by6[k] = adot_act[j][k]*over6; aby2[k] = a_act[j][k]*over2;
a2by18[k] = 0.0; a1by6[k] = adot_act[j][k]*(1./6.); aby2[k] = a_act[j][k]*0.5;
} /* k */
g6_set_j_particle(clusterid, jjj, ind_act[j], t_act[j], dt_act[j], m_act[j], a2by18, a1by6, aby2, v_act[j], x_act[j]);