Got rid of most k-loops

This commit is contained in:
Yohai Meiron 2020-03-14 16:29:00 -04:00
parent 7177148b6c
commit fd33819a7a

View file

@ -222,10 +222,63 @@ extern void qwerty_(double *aaa, double *aaapars);
struct double3 { struct double3 {
double data[3]; double data[3];
double3() {}
double3(const double x, const double y, const double z)
{
data[0] = x;
data[1] = y;
data[2] = z;
}
double& operator[](int i) {return data[i];} double& operator[](int i) {return data[i];}
double3& operator=(const double3& a)
{
data[0] = a.data[0];
data[1] = a.data[1];
data[2] = a.data[2];
return *this;
}
double3& operator+=(const double3& a)
{
data[0] += a.data[0];
data[1] += a.data[1];
data[2] += a.data[2];
return *this;
}
double3& operator/=(const double& c)
{
data[0] /= c;
data[1] /= c;
data[2] /= c;
return *this;
}
double norm()
{
return sqrt(data[0]*data[0]+data[1]*data[1]+data[2]*data[2]);
}
operator double*() {return data;} operator double*() {return data;}
}; };
double3 operator*(const double& c, const double3& a)
{
return double3(a.data[0]*c, a.data[1]*c, a.data[2]*c);
}
double3 operator*(const double3& a, const double& c)
{
return double3(a.data[0]*c, a.data[1]*c, a.data[2]*c);
}
double3 operator+(const double3& a, const double3& b)
{
return double3(a.data[0]+b.data[0], a.data[1]+b.data[1], a.data[2]+b.data[2]);
}
double3 operator-(const double3& a, const double3& b)
{
return double3(a.data[0]-b.data[0], a.data[1]-b.data[1], a.data[2]-b.data[2]);
}
int name_proc, n_proc=1, myRank=0, rootRank=0, cur_rank, int name_proc, n_proc=1, myRank=0, rootRank=0, cur_rank,
i, j, k, ni, nj, diskstep=0, power, jjj, iii, i, j, k, ni, nj, diskstep=0, power, jjj, iii,
skip_con=0, tmp_i; skip_con=0, tmp_i;
@ -1245,10 +1298,8 @@ void calc_self_grav(double t)
for (i=0; i<n_act; i++) { for (i=0; i<n_act; i++) {
iii = ind_act[i]; iii = ind_act[i];
pot_act_tmp_loc[iii] = pot_act_tmp[i]; pot_act_tmp_loc[iii] = pot_act_tmp[i];
for (k=0;k<3;k++) { a_act_tmp_loc[iii] = a_act_tmp[i];
a_act_tmp_loc[iii][k] = a_act_tmp[i][k]; adot_act_tmp_loc[iii] = adot_act_tmp[i];
adot_act_tmp_loc[iii][k] = adot_act_tmp[i][k];
} /* k */
} /* i */ } /* i */
#ifdef TIMING #ifdef TIMING
@ -1563,98 +1614,73 @@ DT_EXT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
void energy_contr() void energy_contr()
{ {
E_pot = 0.0;
for (i=0; i<N; i++) E_pot += m[i]*pot[i];
E_pot *= 0.5;
E_pot = 0.0; E_kin = 0.0;
for (i=0; i<N; i++) E_pot += m[i]*pot[i]; for (i=0; i<N; i++) E_kin += m[i]*( SQR(v[i][0]) + SQR(v[i][1]) + SQR(v[i][2]) );
E_pot *= 0.5; E_kin *= 0.5;
E_kin = 0.0; E_pot_ext = 0.0;
for (i=0; i<N; i++) E_kin += m[i]*( SQR(v[i][0]) + SQR(v[i][1]) + SQR(v[i][2]) );
E_kin *= 0.5;
E_pot_ext = 0.0;
#ifdef EXTPOT #ifdef EXTPOT
for (i=0; i<N; i++) E_pot_ext += m[i]*pot_ext[i]; for (i=0; i<N; i++) E_pot_ext += m[i]*pot_ext[i];
#endif #endif
if (Timesteps == 0.0) if (Timesteps == 0.0) {
{
rcm_sum = 0.0; rcm_sum = 0.0;
vcm_sum = 0.0; vcm_sum = 0.0;
} }
mcm = 0.0; mcm = 0.0;
rcm_mod = 0.0; rcm_mod = 0.0;
vcm_mod = 0.0; vcm_mod = 0.0;
for (k=0;k<3;k++) xcm = {0, 0, 0};
{ vcm = {0, 0, 0};
xcm[k] = 0.0;
vcm[k] = 0.0;
} /* k */
for (i=0; i<N; i++) for (i=0; i<N; i++) {
{
mcm += m[i]; mcm += m[i];
xcm += m[i] * x[i];
for (k=0;k<3;k++) vcm += m[i] * v[i];
{
xcm[k] += m[i] * x[i][k];
vcm[k] += m[i] * v[i][k];
} /* k */
} /* i */ } /* i */
for (k=0;k<3;k++) xcm /= mcm;
{ vcm /= mcm;
xcm[k] /= mcm;
vcm[k] /= mcm;
} /* k */
rcm_mod = sqrt(xcm[0]*xcm[0]+xcm[1]*xcm[1]+xcm[2]*xcm[2]); rcm_mod = xcm.norm();
vcm_mod = sqrt(vcm[0]*vcm[0]+vcm[1]*vcm[1]+vcm[2]*vcm[2]); vcm_mod = vcm.norm();
rcm_sum += rcm_mod; rcm_sum += rcm_mod;
vcm_sum += vcm_mod; vcm_sum += vcm_mod;
for (k=0;k<3;k++) mom[k] = 0.0; mom = {0, 0, 0};
for (i=0; i<N; i++) for (i=0; i<N; i++) {
{
mom[0] += m[i] * ( x[i][1] * v[i][2] - x[i][2] * v[i][1] ); mom[0] += m[i] * ( x[i][1] * v[i][2] - x[i][2] * v[i][1] );
mom[1] += m[i] * ( x[i][2] * v[i][0] - x[i][0] * v[i][2] ); mom[1] += m[i] * ( x[i][2] * v[i][0] - x[i][0] * v[i][2] );
mom[2] += m[i] * ( x[i][0] * v[i][1] - x[i][1] * v[i][0] ); mom[2] += m[i] * ( x[i][0] * v[i][1] - x[i][1] * v[i][0] );
} /* i */ } /* i */
get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst); get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst);
E_tot = E_pot + E_kin + E_pot_ext; E_tot = E_pot + E_kin + E_pot_ext;
E_tot_corr = E_tot + E_corr; E_tot_corr = E_tot + E_corr;
E_tot_corr_sd = E_tot + E_corr + E_sd; E_tot_corr_sd = E_tot + E_corr + E_sd;
// if ((Timesteps == 0.0) && (time_cur == 0.0) ) if (Timesteps == 0.0) {
if (Timesteps == 0.0)
{
/* initialize the E_tot_0 + etc... */ /* initialize the E_tot_0 + etc... */
E_tot_0 = E_tot; E_tot_0 = E_tot;
E_tot_corr_0 = E_tot; E_tot_corr_0 = E_tot;
E_tot_corr_sd_0 = E_tot; E_tot_corr_sd_0 = E_tot;
// printf("1: %.6E %.6E \t % .8E % .8E \n", time_cur, Timesteps, E_tot, E_tot_0);
skip_con = 1; skip_con = 1;
} } else {
else
{
/* read the E_tot_0 + etc... */ /* read the E_tot_0 + etc... */
if ( skip_con == 0 ) {
if ( skip_con == 0 )
{
inp = fopen("contr.con","r"); inp = fopen("contr.con","r");
fscanf(inp,"%lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE", fscanf(inp,"%lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE",
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp,
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp,
@ -1662,96 +1688,44 @@ else
&E_tot_corr_0, &tmp, &E_tot_corr_0, &tmp,
&E_tot_corr_sd_0, &tmp, &E_tot_corr_sd_0, &tmp,
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp); &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp);
fclose(inp); fclose(inp);
skip_con = 1; skip_con = 1;
} }
} }
DE_tot = E_tot - E_tot_0;
DE_tot_corr = E_tot_corr - E_tot_corr_0;
DE_tot_corr_sd = E_tot_corr_sd - E_tot_corr_sd_0;
DE_tot = E_tot - E_tot_0; printf("%.3E %.3E % .4E %.4E % .4E % .4E % .4E %.2E\n",
// printf("2: %.6E %.6E \t % .8E % .8E \n", time_cur, Timesteps, E_tot, E_tot_0);
DE_tot_corr = E_tot_corr - E_tot_corr_0;
DE_tot_corr_sd = E_tot_corr_sd - E_tot_corr_sd_0;
printf("%.3E %.3E % .4E %.4E % .4E % .4E % .4E %.2E\n",
time_cur, Timesteps, time_cur, Timesteps,
E_pot, E_kin, E_pot_ext, E_tot, DE_tot, E_pot, E_kin, E_pot_ext, E_tot, DE_tot,
CPU_time_user-CPU_time_user0); CPU_time_user-CPU_time_user0);
fflush(stdout); fflush(stdout);
out = fopen("contr.dat","a"); out = fopen("contr.dat","a");
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n", fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
time_cur, Timesteps, n_act_sum, g6_calls, time_cur, Timesteps, n_act_sum, g6_calls,
E_pot, E_kin, E_pot_ext, E_pot, E_kin, E_pot_ext,
E_tot, DE_tot, E_tot, DE_tot,
rcm_mod, vcm_mod, rcm_mod, vcm_mod,
mom[0], mom[1], mom[2], mom[0], mom[1], mom[2],
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0); CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
fclose(out); fclose(out);
out = fopen("contr.con","w"); out = fopen("contr.con","w");
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n", fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
time_cur, Timesteps, n_act_sum, g6_calls, time_cur, Timesteps, n_act_sum, g6_calls,
E_pot, E_kin, E_pot_ext, E_pot, E_kin, E_pot_ext,
E_tot, DE_tot, E_tot, DE_tot,
rcm_mod, vcm_mod, rcm_mod, vcm_mod,
mom[0], mom[1], mom[2], mom[0], mom[1], mom[2],
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0); CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
fclose(out); fclose(out);
E_tot_0 = E_tot; E_tot_0 = E_tot;
E_tot_corr_0 = E_tot_corr;
// printf("3: %.6E %.6E \t % .8E % .8E \n", time_cur, Timesteps, E_tot, E_tot_0); E_tot_corr_sd_0 = E_tot_corr_sd;
E_tot_corr_0 = E_tot_corr;
E_tot_corr_sd_0 = E_tot_corr_sd;
}
void cm_corr()
{
mcm = 0.0;
for (k=0;k<3;k++)
{
xcm[k] = 0.0;
vcm[k] = 0.0;
} /* k */
for (i=0; i<N; i++)
{
mcm += m[i];
for (k=0;k<3;k++)
{
xcm[k] += m[i] * x[i][k];
vcm[k] += m[i] * v[i][k];
} /* k */
} /* i */
for (k=0;k<3;k++)
{
xcm[k] /= mcm;
vcm[k] /= mcm;
} /* k */
for (i=0; i<N; i++)
{
for (k=0;k<3;k++)
{
x[i][k] -= xcm[k];
v[i][k] -= vcm[k];
} /* k */
} /* i */
} }
int main(int argc, char *argv[]) int main(int argc, char *argv[])
@ -2095,10 +2069,9 @@ int main(int argc, char *argv[])
/* load the nj particles to the G6 */ /* load the nj particles to the G6 */
for (j=0; j<nj; j++) { for (j=0; j<nj; j++) {
for (k=0;k<3;k++) { a2by18 = {0, 0, 0};
a2by18[k] = 0.0; a1by6[k] = 0.0; aby2[k] = 0.0; a1by6 = {0, 0, 0};
} /* k */ aby2 = {0, 0, 0};
g6_set_j_particle(clusterid, j, ind_loc[j], t_loc[j], dt_loc[j], m_loc[j], a2by18, a1by6, aby2, v_loc[j], x_loc[j]); g6_set_j_particle(clusterid, j, ind_loc[j], t_loc[j], dt_loc[j], m_loc[j], a2by18, a1by6, aby2, v_loc[j], x_loc[j]);
} /* j */ } /* j */
@ -2140,9 +2113,8 @@ int main(int argc, char *argv[])
iii = ind_act[i]; iii = ind_act[i];
m_act[i] = m[iii]; m_act[i] = m[iii];
for (k=0;k<3;k++) { x_act[i] = x[iii];
x_act[i][k] = x[iii][k]; v_act[i][k] = v[iii][k]; v_act[i] = v[iii];
} /* k */
t_act[i] = t[iii]; t_act[i] = t[iii];
dt_act[i] = dt[iii]; dt_act[i] = dt[iii];
@ -2383,9 +2355,9 @@ int main(int argc, char *argv[])
/* load the nj particles to the G6 */ /* load the nj particles to the G6 */
for (j=0; j<nj; j++) { for (j=0; j<nj; j++) {
for (k=0;k<3;k++) { a2by18 = {0, 0, 0};
a2by18[k] = 0.0; a1by6[k] = adot_loc[j][k]*(1./6.); aby2[k] = a_loc[j][k]*0.5; a1by6 = adot_loc[j]*(1./6.);
} /* k */ aby2 = a_loc[j]*0.5;
g6_set_j_particle(clusterid, j, ind_loc[j], t_loc[j], dt_loc[j], m_loc[j], a2by18, a1by6, aby2, v_loc[j], x_loc[j]); g6_set_j_particle(clusterid, j, ind_loc[j], t_loc[j], dt_loc[j], m_loc[j], a2by18, a1by6, aby2, v_loc[j], x_loc[j]);
} /* j */ } /* j */
@ -2464,7 +2436,7 @@ int main(int argc, char *argv[])
for (j=0; j<n_loc; j++) { for (j=0; j<n_loc; j++) {
jjj = j + myRank*n_loc; jjj = j + myRank*n_loc;
tmp = t[jjj] + dt[jjj]; tmp = t[jjj] + dt[jjj];
if ( tmp < min_t_loc ) min_t_loc = tmp; if (tmp < min_t_loc) min_t_loc = tmp;
} }
#endif #endif
@ -2554,10 +2526,8 @@ int main(int argc, char *argv[])
m_act[i] = m[iii]; m_act[i] = m[iii];
for (k=0;k<3;k++) { x_act[i] = x[iii];
x_act[i][k] = x[iii][k]; v_act[i] = v[iii];
v_act[i][k] = v[iii][k];
} /* k */
t_act[i] = t[iii]; t_act[i] = t[iii];
dt_act[i] = dt[iii]; dt_act[i] = dt[iii];
@ -2568,10 +2538,8 @@ int main(int argc, char *argv[])
pot_act_ext[i] = pot_ext[iii]; pot_act_ext[i] = pot_ext[iii];
#endif #endif
for (k=0;k<3;k++) { a_act[i] = a[iii];
a_act[i][k] = a[iii][k]; adot_act[i] = adot[iii];
adot_act[i][k] = adot[iii][k];
} /* k */
} /* i */ } /* i */
@ -2590,12 +2558,8 @@ int main(int argc, char *argv[])
dt_tmp = min_t - t_act[i]; dt_tmp = min_t - t_act[i];
dt2half = 0.5*dt_tmp*dt_tmp; dt2half = 0.5*dt_tmp*dt_tmp;
dt3over6 = (1./3.)*dt_tmp*dt2half; dt3over6 = (1./3.)*dt_tmp*dt2half;
x_act_new[i] = x_act[i] + v_act[i]*dt_tmp + a_act[i]*dt2half + adot_act[i]*dt3over6;
for (k=0;k<3;k++) { v_act_new[i] = v_act[i] + a_act[i]*dt_tmp + adot_act[i]*dt2half;
x_act_new[i][k] = x_act[i][k] + v_act[i][k]*dt_tmp + a_act[i][k]*dt2half + adot_act[i][k]*dt3over6;
v_act_new[i][k] = v_act[i][k] + a_act[i][k]*dt_tmp + adot_act[i][k]*dt2half;
} /* k */
} /* i */ } /* i */
#ifdef TIMING #ifdef TIMING
@ -2731,25 +2695,23 @@ int main(int argc, char *argv[])
dt2inv = dtinv*dtinv; dt2inv = dtinv*dtinv;
dt3inv = dt2inv*dtinv; dt3inv = dt2inv*dtinv;
for (k=0; k<3; k++) { double3 a0mia1 = a_act[i]-a_act_new[i];
a0mia1 = a_act[i][k]-a_act_new[i][k]; double3 ad04plad12 = 4.0*adot_act[i] + 2.0*adot_act_new[i];
ad04plad12 = 4.0*adot_act[i][k] + 2.0*adot_act_new[i][k]; double3 ad0plad1 = adot_act[i] + adot_act_new[i];
ad0plad1 = adot_act[i][k] + adot_act_new[i][k];
a2[k] = -6.0*a0mia1*dt2inv - ad04plad12*dtinv; a2 = -6.0*a0mia1*dt2inv - ad04plad12*dtinv;
a3[k] = 12.0*a0mia1*dt3inv + 6.0*ad0plad1*dt2inv; a3 = 12.0*a0mia1*dt3inv + 6.0*ad0plad1*dt2inv;
x_act_new[i][k] += dt4over24*a2[k] + dt5over120*a3[k]; x_act_new[i] += dt4over24*a2 + dt5over120*a3;
v_act_new[i][k] += dt3over6*a2[k] + dt4over24*a3[k]; v_act_new[i] += dt3over6*a2 + dt4over24*a3;
} /* k */
a1abs = sqrt(a_act_new[i][0]*a_act_new[i][0]+a_act_new[i][1]*a_act_new[i][1]+a_act_new[i][2]*a_act_new[i][2]); a1abs = sqrt(a_act_new[i][0]*a_act_new[i][0]+a_act_new[i][1]*a_act_new[i][1]+a_act_new[i][2]*a_act_new[i][2]);
adot1abs = sqrt(adot_act_new[i][0]*adot_act_new[i][0]+adot_act_new[i][1]*adot_act_new[i][1]+adot_act_new[i][2]*adot_act_new[i][2]); adot1abs = sqrt(adot_act_new[i][0]*adot_act_new[i][0]+adot_act_new[i][1]*adot_act_new[i][1]+adot_act_new[i][2]*adot_act_new[i][2]);
for (k=0; k<3; k++) a2dot1[k] = a2[k] + dt_tmp*a3[k]; a2dot1 = a2 + dt_tmp*a3;
a2dot1abs = sqrt(a2dot1[0]*a2dot1[0]+a2dot1[1]*a2dot1[1]+a2dot1[2]*a2dot1[2]); a2dot1abs = a2dot1.norm();
a3dot1abs = sqrt(a3[0]*a3[0]+a3[1]*a3[1]+a3[2]*a3[2]); a3dot1abs = a3.norm();
#ifdef ADD_BH2 #ifdef ADD_BH2
if ((ind_act[i] == 0) || (ind_act[i] == 1)) { if ((ind_act[i] == 0) || (ind_act[i] == 1)) {
@ -2789,12 +2751,10 @@ int main(int argc, char *argv[])
pot_act[i] = pot_act_new[i]; pot_act[i] = pot_act_new[i];
for (k=0; k<3; k++) { x_act[i] = x_act_new[i];
x_act[i][k] = x_act_new[i][k]; v_act[i] = v_act_new[i];
v_act[i][k] = v_act_new[i][k]; a_act[i] = a_act_new[i];
a_act[i][k] = a_act_new[i][k]; adot_act[i] = adot_act_new[i];
adot_act[i][k] = adot_act_new[i][k];
} /* k */
#ifdef DT_MIN_WARNING #ifdef DT_MIN_WARNING
if (myRank == 0) { if (myRank == 0) {
@ -2913,10 +2873,8 @@ int main(int argc, char *argv[])
m[iii] = m_act[i]; m[iii] = m_act[i];
for (k=0;k<3;k++) { x[iii] = x_act[i];
x[iii][k] = x_act[i][k]; v[iii] = v_act[i];
v[iii][k] = v_act[i][k];
} /* k */
t[iii] = t_act[i]; t[iii] = t_act[i];
dt[iii] = dt_act[i]; dt[iii] = dt_act[i];
@ -2927,10 +2885,8 @@ int main(int argc, char *argv[])
pot_ext[iii] = pot_act_ext[i]; pot_ext[iii] = pot_act_ext[i];
#endif #endif
for (k=0;k<3;k++) { a[iii] = a_act[i];
a[iii][k] = a_act[i][k]; adot[iii] = adot_act[i];
adot[iii][k] = adot_act[i][k];
} /* k */
} /* i */ } /* i */
#ifdef TIMING #ifdef TIMING
@ -2954,9 +2910,9 @@ int main(int argc, char *argv[])
jjj = ind_act[j] - myRank*n_loc; jjj = ind_act[j] - myRank*n_loc;
for (k=0;k<3;k++) { a2by18 = {0, 0, 0};
a2by18[k] = 0.0; a1by6[k] = adot_act[j][k]*(1./6.); aby2[k] = a_act[j][k]*0.5; a1by6 = adot_act[j]*(1./6.);
} /* k */ aby2 = a_act[j]*0.5;
g6_set_j_particle(clusterid, jjj, ind_act[j], t_act[j], dt_act[j], m_act[j], a2by18, a1by6, aby2, v_act[j], x_act[j]); g6_set_j_particle(clusterid, jjj, ind_act[j], t_act[j], dt_act[j], m_act[j], a2by18, a1by6, aby2, v_act[j], x_act[j]);