Got rid of most k-loops

This commit is contained in:
Yohai Meiron 2020-03-14 16:29:00 -04:00
parent 7177148b6c
commit fd33819a7a

View file

@ -222,10 +222,63 @@ extern void qwerty_(double *aaa, double *aaapars);
struct double3 {
double data[3];
double3() {}
double3(const double x, const double y, const double z)
{
data[0] = x;
data[1] = y;
data[2] = z;
}
double& operator[](int i) {return data[i];}
double3& operator=(const double3& a)
{
data[0] = a.data[0];
data[1] = a.data[1];
data[2] = a.data[2];
return *this;
}
double3& operator+=(const double3& a)
{
data[0] += a.data[0];
data[1] += a.data[1];
data[2] += a.data[2];
return *this;
}
double3& operator/=(const double& c)
{
data[0] /= c;
data[1] /= c;
data[2] /= c;
return *this;
}
double norm()
{
return sqrt(data[0]*data[0]+data[1]*data[1]+data[2]*data[2]);
}
operator double*() {return data;}
};
double3 operator*(const double& c, const double3& a)
{
return double3(a.data[0]*c, a.data[1]*c, a.data[2]*c);
}
double3 operator*(const double3& a, const double& c)
{
return double3(a.data[0]*c, a.data[1]*c, a.data[2]*c);
}
double3 operator+(const double3& a, const double3& b)
{
return double3(a.data[0]+b.data[0], a.data[1]+b.data[1], a.data[2]+b.data[2]);
}
double3 operator-(const double3& a, const double3& b)
{
return double3(a.data[0]-b.data[0], a.data[1]-b.data[1], a.data[2]-b.data[2]);
}
int name_proc, n_proc=1, myRank=0, rootRank=0, cur_rank,
i, j, k, ni, nj, diskstep=0, power, jjj, iii,
skip_con=0, tmp_i;
@ -1245,10 +1298,8 @@ void calc_self_grav(double t)
for (i=0; i<n_act; i++) {
iii = ind_act[i];
pot_act_tmp_loc[iii] = pot_act_tmp[i];
for (k=0;k<3;k++) {
a_act_tmp_loc[iii][k] = a_act_tmp[i][k];
adot_act_tmp_loc[iii][k] = adot_act_tmp[i][k];
} /* k */
a_act_tmp_loc[iii] = a_act_tmp[i];
adot_act_tmp_loc[iii] = adot_act_tmp[i];
} /* i */
#ifdef TIMING
@ -1563,195 +1614,118 @@ DT_EXT_GRAV += (CPU_tmp_user - CPU_tmp_user0);
void energy_contr()
{
E_pot = 0.0;
for (i=0; i<N; i++) E_pot += m[i]*pot[i];
E_pot *= 0.5;
E_pot = 0.0;
for (i=0; i<N; i++) E_pot += m[i]*pot[i];
E_pot *= 0.5;
E_kin = 0.0;
for (i=0; i<N; i++) E_kin += m[i]*( SQR(v[i][0]) + SQR(v[i][1]) + SQR(v[i][2]) );
E_kin *= 0.5;
E_kin = 0.0;
for (i=0; i<N; i++) E_kin += m[i]*( SQR(v[i][0]) + SQR(v[i][1]) + SQR(v[i][2]) );
E_kin *= 0.5;
E_pot_ext = 0.0;
E_pot_ext = 0.0;
#ifdef EXTPOT
for (i=0; i<N; i++) E_pot_ext += m[i]*pot_ext[i];
for (i=0; i<N; i++) E_pot_ext += m[i]*pot_ext[i];
#endif
if (Timesteps == 0.0)
{
rcm_sum = 0.0;
vcm_sum = 0.0;
if (Timesteps == 0.0) {
rcm_sum = 0.0;
vcm_sum = 0.0;
}
mcm = 0.0;
rcm_mod = 0.0;
vcm_mod = 0.0;
mcm = 0.0;
rcm_mod = 0.0;
vcm_mod = 0.0;
for (k=0;k<3;k++)
{
xcm[k] = 0.0;
vcm[k] = 0.0;
} /* k */
for (i=0; i<N; i++)
{
mcm += m[i];
for (k=0;k<3;k++)
{
xcm[k] += m[i] * x[i][k];
vcm[k] += m[i] * v[i][k];
} /* k */
xcm = {0, 0, 0};
vcm = {0, 0, 0};
for (i=0; i<N; i++) {
mcm += m[i];
xcm += m[i] * x[i];
vcm += m[i] * v[i];
} /* i */
for (k=0;k<3;k++)
{
xcm[k] /= mcm;
vcm[k] /= mcm;
} /* k */
xcm /= mcm;
vcm /= mcm;
rcm_mod = sqrt(xcm[0]*xcm[0]+xcm[1]*xcm[1]+xcm[2]*xcm[2]);
vcm_mod = sqrt(vcm[0]*vcm[0]+vcm[1]*vcm[1]+vcm[2]*vcm[2]);
rcm_mod = xcm.norm();
vcm_mod = vcm.norm();
rcm_sum += rcm_mod;
vcm_sum += vcm_mod;
rcm_sum += rcm_mod;
vcm_sum += vcm_mod;
for (k=0;k<3;k++) mom[k] = 0.0;
mom = {0, 0, 0};
for (i=0; i<N; i++)
{
mom[0] += m[i] * ( x[i][1] * v[i][2] - x[i][2] * v[i][1] );
mom[1] += m[i] * ( x[i][2] * v[i][0] - x[i][0] * v[i][2] );
mom[2] += m[i] * ( x[i][0] * v[i][1] - x[i][1] * v[i][0] );
for (i=0; i<N; i++) {
mom[0] += m[i] * ( x[i][1] * v[i][2] - x[i][2] * v[i][1] );
mom[1] += m[i] * ( x[i][2] * v[i][0] - x[i][0] * v[i][2] );
mom[2] += m[i] * ( x[i][0] * v[i][1] - x[i][1] * v[i][0] );
} /* i */
get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst);
get_CPU_time(&CPU_time_real, &CPU_time_user, &CPU_time_syst);
E_tot = E_pot + E_kin + E_pot_ext;
E_tot = E_pot + E_kin + E_pot_ext;
E_tot_corr = E_tot + E_corr;
E_tot_corr_sd = E_tot + E_corr + E_sd;
E_tot_corr = E_tot + E_corr;
E_tot_corr_sd = E_tot + E_corr + E_sd;
// if ((Timesteps == 0.0) && (time_cur == 0.0) )
if (Timesteps == 0.0)
{
/* initialize the E_tot_0 + etc... */
if (Timesteps == 0.0) {
/* initialize the E_tot_0 + etc... */
E_tot_0 = E_tot;
E_tot_corr_0 = E_tot;
E_tot_corr_sd_0 = E_tot;
skip_con = 1;
} else {
/* read the E_tot_0 + etc... */
if ( skip_con == 0 ) {
inp = fopen("contr.con","r");
fscanf(inp,"%lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE",
&tmp, &tmp, &tmp, &tmp,
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp,
&E_tot_0, &tmp,
&E_tot_corr_0, &tmp,
&E_tot_corr_sd_0, &tmp,
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp);
fclose(inp);
skip_con = 1;
}
}
DE_tot = E_tot - E_tot_0;
DE_tot_corr = E_tot_corr - E_tot_corr_0;
DE_tot_corr_sd = E_tot_corr_sd - E_tot_corr_sd_0;
printf("%.3E %.3E % .4E %.4E % .4E % .4E % .4E %.2E\n",
time_cur, Timesteps,
E_pot, E_kin, E_pot_ext, E_tot, DE_tot,
CPU_time_user-CPU_time_user0);
fflush(stdout);
out = fopen("contr.dat","a");
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
time_cur, Timesteps, n_act_sum, g6_calls,
E_pot, E_kin, E_pot_ext,
E_tot, DE_tot,
rcm_mod, vcm_mod,
mom[0], mom[1], mom[2],
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
fclose(out);
out = fopen("contr.con","w");
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
time_cur, Timesteps, n_act_sum, g6_calls,
E_pot, E_kin, E_pot_ext,
E_tot, DE_tot,
rcm_mod, vcm_mod,
mom[0], mom[1], mom[2],
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
fclose(out);
E_tot_0 = E_tot;
E_tot_corr_0 = E_tot;
E_tot_corr_sd_0 = E_tot;
// printf("1: %.6E %.6E \t % .8E % .8E \n", time_cur, Timesteps, E_tot, E_tot_0);
skip_con = 1;
}
else
{
/* read the E_tot_0 + etc... */
if ( skip_con == 0 )
{
inp = fopen("contr.con","r");
fscanf(inp,"%lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE %lE",
&tmp, &tmp, &tmp, &tmp,
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp,
&E_tot_0, &tmp,
&E_tot_corr_0, &tmp,
&E_tot_corr_sd_0, &tmp,
&tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp, &tmp);
fclose(inp);
skip_con = 1;
}
}
DE_tot = E_tot - E_tot_0;
// printf("2: %.6E %.6E \t % .8E % .8E \n", time_cur, Timesteps, E_tot, E_tot_0);
DE_tot_corr = E_tot_corr - E_tot_corr_0;
DE_tot_corr_sd = E_tot_corr_sd - E_tot_corr_sd_0;
printf("%.3E %.3E % .4E %.4E % .4E % .4E % .4E %.2E\n",
time_cur, Timesteps,
E_pot, E_kin, E_pot_ext, E_tot, DE_tot,
CPU_time_user-CPU_time_user0);
fflush(stdout);
out = fopen("contr.dat","a");
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
time_cur, Timesteps, n_act_sum, g6_calls,
E_pot, E_kin, E_pot_ext,
E_tot, DE_tot,
rcm_mod, vcm_mod,
mom[0], mom[1], mom[2],
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
fclose(out);
out = fopen("contr.con","w");
fprintf(out,"%.8E \t %.8E %.8E %.8E \t % .8E % .8E % .8E % .8E % .8E \t % .8E % .8E \t % .8E % .8E % .8E \t %.8E %.8E %.8E \n",
time_cur, Timesteps, n_act_sum, g6_calls,
E_pot, E_kin, E_pot_ext,
E_tot, DE_tot,
rcm_mod, vcm_mod,
mom[0], mom[1], mom[2],
CPU_time_real-CPU_time_real0, CPU_time_user-CPU_time_user0, CPU_time_syst-CPU_time_syst0);
fclose(out);
E_tot_0 = E_tot;
// printf("3: %.6E %.6E \t % .8E % .8E \n", time_cur, Timesteps, E_tot, E_tot_0);
E_tot_corr_0 = E_tot_corr;
E_tot_corr_sd_0 = E_tot_corr_sd;
}
void cm_corr()
{
mcm = 0.0;
for (k=0;k<3;k++)
{
xcm[k] = 0.0;
vcm[k] = 0.0;
} /* k */
for (i=0; i<N; i++)
{
mcm += m[i];
for (k=0;k<3;k++)
{
xcm[k] += m[i] * x[i][k];
vcm[k] += m[i] * v[i][k];
} /* k */
} /* i */
for (k=0;k<3;k++)
{
xcm[k] /= mcm;
vcm[k] /= mcm;
} /* k */
for (i=0; i<N; i++)
{
for (k=0;k<3;k++)
{
x[i][k] -= xcm[k];
v[i][k] -= vcm[k];
} /* k */
} /* i */
E_tot_corr_0 = E_tot_corr;
E_tot_corr_sd_0 = E_tot_corr_sd;
}
int main(int argc, char *argv[])
@ -2095,10 +2069,9 @@ int main(int argc, char *argv[])
/* load the nj particles to the G6 */
for (j=0; j<nj; j++) {
for (k=0;k<3;k++) {
a2by18[k] = 0.0; a1by6[k] = 0.0; aby2[k] = 0.0;
} /* k */
a2by18 = {0, 0, 0};
a1by6 = {0, 0, 0};
aby2 = {0, 0, 0};
g6_set_j_particle(clusterid, j, ind_loc[j], t_loc[j], dt_loc[j], m_loc[j], a2by18, a1by6, aby2, v_loc[j], x_loc[j]);
} /* j */
@ -2140,9 +2113,8 @@ int main(int argc, char *argv[])
iii = ind_act[i];
m_act[i] = m[iii];
for (k=0;k<3;k++) {
x_act[i][k] = x[iii][k]; v_act[i][k] = v[iii][k];
} /* k */
x_act[i] = x[iii];
v_act[i] = v[iii];
t_act[i] = t[iii];
dt_act[i] = dt[iii];
@ -2383,9 +2355,9 @@ int main(int argc, char *argv[])
/* load the nj particles to the G6 */
for (j=0; j<nj; j++) {
for (k=0;k<3;k++) {
a2by18[k] = 0.0; a1by6[k] = adot_loc[j][k]*(1./6.); aby2[k] = a_loc[j][k]*0.5;
} /* k */
a2by18 = {0, 0, 0};
a1by6 = adot_loc[j]*(1./6.);
aby2 = a_loc[j]*0.5;
g6_set_j_particle(clusterid, j, ind_loc[j], t_loc[j], dt_loc[j], m_loc[j], a2by18, a1by6, aby2, v_loc[j], x_loc[j]);
} /* j */
@ -2464,7 +2436,7 @@ int main(int argc, char *argv[])
for (j=0; j<n_loc; j++) {
jjj = j + myRank*n_loc;
tmp = t[jjj] + dt[jjj];
if ( tmp < min_t_loc ) min_t_loc = tmp;
if (tmp < min_t_loc) min_t_loc = tmp;
}
#endif
@ -2554,10 +2526,8 @@ int main(int argc, char *argv[])
m_act[i] = m[iii];
for (k=0;k<3;k++) {
x_act[i][k] = x[iii][k];
v_act[i][k] = v[iii][k];
} /* k */
x_act[i] = x[iii];
v_act[i] = v[iii];
t_act[i] = t[iii];
dt_act[i] = dt[iii];
@ -2568,10 +2538,8 @@ int main(int argc, char *argv[])
pot_act_ext[i] = pot_ext[iii];
#endif
for (k=0;k<3;k++) {
a_act[i][k] = a[iii][k];
adot_act[i][k] = adot[iii][k];
} /* k */
a_act[i] = a[iii];
adot_act[i] = adot[iii];
} /* i */
@ -2590,12 +2558,8 @@ int main(int argc, char *argv[])
dt_tmp = min_t - t_act[i];
dt2half = 0.5*dt_tmp*dt_tmp;
dt3over6 = (1./3.)*dt_tmp*dt2half;
for (k=0;k<3;k++) {
x_act_new[i][k] = x_act[i][k] + v_act[i][k]*dt_tmp + a_act[i][k]*dt2half + adot_act[i][k]*dt3over6;
v_act_new[i][k] = v_act[i][k] + a_act[i][k]*dt_tmp + adot_act[i][k]*dt2half;
} /* k */
x_act_new[i] = x_act[i] + v_act[i]*dt_tmp + a_act[i]*dt2half + adot_act[i]*dt3over6;
v_act_new[i] = v_act[i] + a_act[i]*dt_tmp + adot_act[i]*dt2half;
} /* i */
#ifdef TIMING
@ -2731,25 +2695,23 @@ int main(int argc, char *argv[])
dt2inv = dtinv*dtinv;
dt3inv = dt2inv*dtinv;
for (k=0; k<3; k++) {
a0mia1 = a_act[i][k]-a_act_new[i][k];
ad04plad12 = 4.0*adot_act[i][k] + 2.0*adot_act_new[i][k];
ad0plad1 = adot_act[i][k] + adot_act_new[i][k];
double3 a0mia1 = a_act[i]-a_act_new[i];
double3 ad04plad12 = 4.0*adot_act[i] + 2.0*adot_act_new[i];
double3 ad0plad1 = adot_act[i] + adot_act_new[i];
a2[k] = -6.0*a0mia1*dt2inv - ad04plad12*dtinv;
a3[k] = 12.0*a0mia1*dt3inv + 6.0*ad0plad1*dt2inv;
a2 = -6.0*a0mia1*dt2inv - ad04plad12*dtinv;
a3 = 12.0*a0mia1*dt3inv + 6.0*ad0plad1*dt2inv;
x_act_new[i][k] += dt4over24*a2[k] + dt5over120*a3[k];
v_act_new[i][k] += dt3over6*a2[k] + dt4over24*a3[k];
} /* k */
x_act_new[i] += dt4over24*a2 + dt5over120*a3;
v_act_new[i] += dt3over6*a2 + dt4over24*a3;
a1abs = sqrt(a_act_new[i][0]*a_act_new[i][0]+a_act_new[i][1]*a_act_new[i][1]+a_act_new[i][2]*a_act_new[i][2]);
adot1abs = sqrt(adot_act_new[i][0]*adot_act_new[i][0]+adot_act_new[i][1]*adot_act_new[i][1]+adot_act_new[i][2]*adot_act_new[i][2]);
for (k=0; k<3; k++) a2dot1[k] = a2[k] + dt_tmp*a3[k];
a2dot1 = a2 + dt_tmp*a3;
a2dot1abs = sqrt(a2dot1[0]*a2dot1[0]+a2dot1[1]*a2dot1[1]+a2dot1[2]*a2dot1[2]);
a3dot1abs = sqrt(a3[0]*a3[0]+a3[1]*a3[1]+a3[2]*a3[2]);
a2dot1abs = a2dot1.norm();
a3dot1abs = a3.norm();
#ifdef ADD_BH2
if ((ind_act[i] == 0) || (ind_act[i] == 1)) {
@ -2789,12 +2751,10 @@ int main(int argc, char *argv[])
pot_act[i] = pot_act_new[i];
for (k=0; k<3; k++) {
x_act[i][k] = x_act_new[i][k];
v_act[i][k] = v_act_new[i][k];
a_act[i][k] = a_act_new[i][k];
adot_act[i][k] = adot_act_new[i][k];
} /* k */
x_act[i] = x_act_new[i];
v_act[i] = v_act_new[i];
a_act[i] = a_act_new[i];
adot_act[i] = adot_act_new[i];
#ifdef DT_MIN_WARNING
if (myRank == 0) {
@ -2913,10 +2873,8 @@ int main(int argc, char *argv[])
m[iii] = m_act[i];
for (k=0;k<3;k++) {
x[iii][k] = x_act[i][k];
v[iii][k] = v_act[i][k];
} /* k */
x[iii] = x_act[i];
v[iii] = v_act[i];
t[iii] = t_act[i];
dt[iii] = dt_act[i];
@ -2927,10 +2885,8 @@ int main(int argc, char *argv[])
pot_ext[iii] = pot_act_ext[i];
#endif
for (k=0;k<3;k++) {
a[iii][k] = a_act[i][k];
adot[iii][k] = adot_act[i][k];
} /* k */
a[iii] = a_act[i];
adot[iii] = adot_act[i];
} /* i */
#ifdef TIMING
@ -2954,9 +2910,9 @@ int main(int argc, char *argv[])
jjj = ind_act[j] - myRank*n_loc;
for (k=0;k<3;k++) {
a2by18[k] = 0.0; a1by6[k] = adot_act[j][k]*(1./6.); aby2[k] = a_act[j][k]*0.5;
} /* k */
a2by18 = {0, 0, 0};
a1by6 = adot_act[j]*(1./6.);
aby2 = a_act[j]*0.5;
g6_set_j_particle(clusterid, jjj, ind_act[j], t_act[j], dt_act[j], m_act[j], a2by18, a1by6, aby2, v_act[j], x_act[j]);