nb_free_energy.cpp
1 
/*


2 
* This file is part of the GROMACS molecular simulation package.

3 
*

4 
* Copyright (c) 19912000, University of Groningen, The Netherlands.

5 
* Copyright (c) 20012004, The GROMACS development team.

6 
* Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by

7 
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,

8 
* and including many others, as listed in the AUTHORS file in the

9 
* toplevel source directory and at http://www.gromacs.org.

10 
*

11 
* GROMACS is free software; you can redistribute it and/or

12 
* modify it under the terms of the GNU Lesser General Public License

13 
* as published by the Free Software Foundation; either version 2.1

14 
* of the License, or (at your option) any later version.

15 
*

16 
* GROMACS is distributed in the hope that it will be useful,

17 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

18 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

19 
* Lesser General Public License for more details.

20 
*

21 
* You should have received a copy of the GNU Lesser General Public

22 
* License along with GROMACS; if not, see

23 
* http://www.gnu.org/licenses, or write to the Free Software Foundation,

24 
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA.

25 
*

26 
* If you want to redistribute modifications to GROMACS, please

27 
* consider that scientific software is very special. Version

28 
* control is crucial  bugs must be traceable. We will be happy to

29 
* consider code for inclusion in the official distribution, but

30 
* derived work must not be called official GROMACS. Details are found

31 
* in the README & COPYING files  if they are missing, get the

32 
* official version at http://www.gromacs.org.

33 
*

34 
* To help us fund GROMACS development, we humbly ask that you cite

35 
* the research papers on the package. Check out http://www.gromacs.org.

36 
*/

37 
#include "gmxpre.h" 
38  
39 
#include "nb_free_energy.h" 
40  
41 
#include <cmath> 
42  
43 
#include <algorithm> 
44  
45 
#include "gromacs/gmxlib/nrnb.h" 
46 
#include "gromacs/gmxlib/nonbonded/nb_kernel.h" 
47 
#include "gromacs/gmxlib/nonbonded/nonbonded.h" 
48 
#include "gromacs/math/functions.h" 
49 
#include "gromacs/math/vec.h" 
50 
#include "gromacs/mdtypes/forcerec.h" 
51 
#include "gromacs/mdtypes/md_enums.h" 
52 
#include "gromacs/utility/fatalerror.h" 
53  
54 
void

55 
gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,

56 
rvec * gmx_restrict xx, 
57 
rvec * gmx_restrict ff, 
58 
t_forcerec * gmx_restrict fr, 
59 
const t_mdatoms * gmx_restrict mdatoms,

60 
nb_kernel_data_t * gmx_restrict kernel_data, 
61 
t_nrnb * gmx_restrict nrnb) 
62 
{ 
63  
64 
#define STATE_A 0 
65 
#define STATE_B 1 
66 
#define NSTATES 2 
67 
int i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;

68 
real shX, shY, shZ; 
69 
real tx, ty, tz, Fscal; 
70 
double FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */ 
71 
double Vcoul[NSTATES], Vvdw[NSTATES]; /* Needs double for sc_power==48 */ 
72 
real rinv6, r, rtC, rtV; 
73 
real iqA, iqB; 
74 
real qq[NSTATES], vctot; 
75 
int ntiA, ntiB, tj[NSTATES];

76 
real Vvdw6, Vvdw12, vvtot; 
77 
real ix, iy, iz, fix, fiy, fiz; 
78 
real dx, dy, dz, rsq, rinv; 
79 
real c6[NSTATES], c12[NSTATES], c6grid; 
80 
real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES]; 
81 
double dvdl_coul, dvdl_vdw;

82 
real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES]; 
83 
real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min; 
84 
double rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */ 
85 
real sigma2[NSTATES], sigma_pow[NSTATES]; 
86 
int do_tab, tab_elemsize = 0; 
87 
int n0, n1C, n1V, nnn;

88 
real Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF; 
89 
int icoul, ivdw;

90 
int nri;

91 
const int * iinr; 
92 
const int * jindex; 
93 
const int * jjnr; 
94 
const int * shift; 
95 
const int * gid; 
96 
const int * typeA; 
97 
const int * typeB; 
98 
int ntype;

99 
const real * shiftvec;

100 
real * fshift; 
101 
real tabscale = 0;

102 
const real * VFtab = NULL; 
103 
const real * x;

104 
real * f; 
105 
real facel, krf, crf; 
106 
const real * chargeA;

107 
const real * chargeB;

108 
real sigma6_min, sigma6_def, lam_power, sc_r_power; 
109 
real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw; 
110 
real ewcljrsq, ewclj, ewclj2, exponent, poly, vvdw_disp, vvdw_rep, sh_lj_ewald; 
111 
real ewclj6; 
112 
const real * nbfp, *nbfp_grid;

113 
real * dvdl; 
114 
real * Vv; 
115 
real * Vc; 
116 
gmx_bool bDoForces, bDoShiftForces, bDoPotential; 
117 
real rcoulomb, rvdw, sh_invrc6; 
118 
gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll; 
119 
gmx_bool bEwald, bEwaldLJ; 
120 
real rcutoff_max2; 
121 
const real * tab_ewald_F_lj = nullptr; 
122 
const real * tab_ewald_V_lj = nullptr; 
123 
real d, d2, sw, dsw, rinvcorr; 
124 
real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4; 
125 
real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4; 
126 
gmx_bool bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6; 
127 
gmx_bool bComputeVdwInteraction, bComputeElecInteraction; 
128 
const real * ewtab = nullptr; 
129 
int ewitab;

130 
real ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0; 
131  
132 
const real onetwelfth = 1.0/12.0; 
133 
const real onesixth = 1.0/6.0; 
134 
const real zero = 0.0; 
135 
const real half = 0.5; 
136 
const real one = 1.0; 
137 
const real two = 2.0; 
138 
const real six = 6.0; 
139 
const real fourtyeight = 48.0; 
140  
141 
x = xx[0];

142 
f = ff[0];

143  
144 
fshift = fr>fshift[0];

145  
146 
nri = nlist>nri; 
147 
iinr = nlist>iinr; 
148 
jindex = nlist>jindex; 
149 
jjnr = nlist>jjnr; 
150 
icoul = nlist>ielec; 
151 
ivdw = nlist>ivdw; 
152 
shift = nlist>shift; 
153 
gid = nlist>gid; 
154  
155 
shiftvec = fr>shift_vec[0];

156 
chargeA = mdatoms>chargeA; 
157 
chargeB = mdatoms>chargeB; 
158 
facel = fr>epsfac; 
159 
krf = fr>k_rf; 
160 
crf = fr>c_rf; 
161 
Vc = kernel_data>energygrp_elec; 
162 
typeA = mdatoms>typeA; 
163 
typeB = mdatoms>typeB; 
164 
ntype = fr>ntype; 
165 
nbfp = fr>nbfp; 
166 
nbfp_grid = fr>ljpme_c6grid; 
167 
Vv = kernel_data>energygrp_vdw; 
168 
lambda_coul = kernel_data>lambda[efptCOUL]; 
169 
lambda_vdw = kernel_data>lambda[efptVDW]; 
170 
dvdl = kernel_data>dvdl; 
171 
alpha_coul = fr>sc_alphacoul; 
172 
alpha_vdw = fr>sc_alphavdw; 
173 
lam_power = fr>sc_power; 
174 
sc_r_power = fr>sc_r_power; 
175 
sigma6_def = fr>sc_sigma6_def; 
176 
sigma6_min = fr>sc_sigma6_min; 
177 
bDoForces = kernel_data>flags & GMX_NONBONDED_DO_FORCE; 
178 
bDoShiftForces = kernel_data>flags & GMX_NONBONDED_DO_SHIFTFORCE; 
179 
bDoPotential = kernel_data>flags & GMX_NONBONDED_DO_POTENTIAL; 
180  
181 
rcoulomb = fr>rcoulomb; 
182 
rvdw = fr>rvdw; 
183 
sh_invrc6 = fr>ic>sh_invrc6; 
184 
sh_lj_ewald = fr>ic>sh_lj_ewald; 
185 
ewclj = fr>ewaldcoeff_lj; 
186 
ewclj2 = ewclj*ewclj; 
187 
ewclj6 = ewclj2*ewclj2*ewclj2; 
188  
189 
/* if (fr>coulomb_modifier == eintmodPOTSWITCH)

190 
{

191 
d = fr>rcoulombfr>rcoulomb_switch;

192 
elec_swV3 = 10.0/(d*d*d);

193 
elec_swV4 = 15.0/(d*d*d*d);

194 
elec_swV5 = 6.0/(d*d*d*d*d);

195 
elec_swF2 = 30.0/(d*d*d);

196 
elec_swF3 = 60.0/(d*d*d*d);

197 
elec_swF4 = 30.0/(d*d*d*d*d);

198 
}

199 
else

200 
{ */

201 
/* Avoid warnings from stupid compilers (looking at you, Clang!) */

202 
elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0; 
203 
// }

204  
205 
/* if (fr>vdw_modifier == eintmodPOTSWITCH)

206 
{

207 
d = fr>rvdwfr>rvdw_switch;

208 
vdw_swV3 = 10.0/(d*d*d);

209 
vdw_swV4 = 15.0/(d*d*d*d);

210 
vdw_swV5 = 6.0/(d*d*d*d*d);

211 
vdw_swF2 = 30.0/(d*d*d);

212 
vdw_swF3 = 60.0/(d*d*d*d);

213 
vdw_swF4 = 30.0/(d*d*d*d*d);

214 
}

215 
else

216 
{ */

217 
/* Avoid warnings from stupid compilers (looking at you, Clang!) */

218 
vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0; 
219 
// }

220  
221 
// if (fr>cutoff_scheme == ecutsVERLET)

222 
// {

223 
const interaction_const_t *ic;

224  
225 
ic = fr>ic; 
226 
/* if (EVDW_PME(ic>vdwtype))

227 
{

228 
ivdw = GMX_NBKERNEL_VDW_LJEWALD;

229 
} */

230 
// else

231 
// {

232 
ivdw = GMX_NBKERNEL_VDW_LENNARDJONES; 
233 
// }

234  
235 
/* if (ic>eeltype == eelCUT  EEL_RF(ic>eeltype))

236 
{

237 
icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;

238 
} */

239 
// else if (EEL_PME_EWALD(ic>eeltype))

240 
// {

241 
icoul = GMX_NBKERNEL_ELEC_EWALD; 
242 
// }

243 
/* else

244 
{

245 
gmx_incons("Unsupported eeltype with Verlet and freeenergy");

246 
} */

247  
248 
bExactElecCutoff = TRUE; 
249 
bExactVdwCutoff = TRUE; 
250 
// }

251 
/* else

252 
{

253 
bExactElecCutoff = (fr>coulomb_modifier != eintmodNONE)  fr>eeltype == eelRF_ZERO;

254 
bExactVdwCutoff = (fr>vdw_modifier != eintmodNONE);

255 
} */

256  
257 
bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff); 
258 
rcutoff_max2 = std::max(fr>rcoulomb, fr>rvdw); 
259 
rcutoff_max2 = rcutoff_max2*rcutoff_max2; 
260  
261 
bEwald = (icoul == GMX_NBKERNEL_ELEC_EWALD); 
262 
bEwaldLJ = (ivdw == GMX_NBKERNEL_VDW_LJEWALD); 
263  
264 
// if (bEwald  bEwaldLJ)

265 
// {

266 
sh_ewald = fr>ic>sh_ewald; 
267 
ewtab = fr>ic>tabq_coul_FDV0; 
268 
ewtabscale = fr>ic>tabq_scale; 
269 
ewtabhalfspace = half/ewtabscale; 
270 
tab_ewald_F_lj = fr>ic>tabq_vdw_F; 
271 
tab_ewald_V_lj = fr>ic>tabq_vdw_V; 
272 
// }

273  
274 
/* For Ewald/PME interactions we cannot easily apply the softcore component to

275 
* reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we

276 
* can apply the small trick of subtracting the _reciprocal_ space contribution

277 
* in this kernel, and instead apply the free energy interaction to the 1/r

278 
* (standard coulomb) interaction.

279 
*

280 
* However, we cannot use this approach for switchmodified since we would then

281 
* effectively end up evaluating a significantly different interaction here compared to the

282 
* normal (nonfreeenergy) kernels, either by applying a cutoff at a different

283 
* position than what the user requested, or by switching different

284 
* things (1/r rather than shortrange Ewald). For these settings, we just

285 
* use the traditional shortrange Ewald interaction in that case.

286 
*/

287 
bConvertEwaldToCoulomb = (bEwald && (fr>coulomb_modifier != eintmodPOTSWITCH)); 
288 
/* For now the below will always be true (since LJPME only works with Shift in Gromacs5.0),

289 
* but writing it this way means we stay in sync with coulomb, and it avoids future bugs.

290 
*/

291 
bConvertLJEwaldToLJ6 = (bEwaldLJ && (fr>vdw_modifier != eintmodPOTSWITCH)); 
292  
293 
/* We currently don't implement exclusion correction, needed with the Verlet cutoff scheme, without conversion */

294 
/* if (fr>cutoff_scheme == ecutsVERLET &&

295 
((bEwald && !bConvertEwaldToCoulomb) 

296 
(bEwaldLJ && !bConvertLJEwaldToLJ6)))

297 
{

298 
gmx_incons("Unimplemented nonbonded setup");

299 
} */

300  
301 
/* fix compiler warnings */

302 
n1C = n1V = 0;

303 
epsC = epsV = 0;

304 
eps2C = eps2V = 0;

305  
306 
dvdl_coul = 0;

307 
dvdl_vdw = 0;

308  
309 
/* Lambda factor for state A, 1lambda*/

310 
LFC[STATE_A] = one  lambda_coul; 
311 
LFV[STATE_A] = one  lambda_vdw; 
312  
313 
/* Lambda factor for state B, lambda*/

314 
LFC[STATE_B] = lambda_coul; 
315 
LFV[STATE_B] = lambda_vdw; 
316  
317 
/*derivative of the lambda factor for state A and B */

318 
DLF[STATE_A] = 1;

319 
DLF[STATE_B] = 1;

320  
321 
for (i = 0; i < NSTATES; i++) 
322 
{ 
323 
lfac_coul[i] = (lam_power == 2 ? (1LFC[i])*(1LFC[i]) : (1LFC[i])); 
324 
dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1LFC[i]) : 1); 
325 
lfac_vdw[i] = (lam_power == 2 ? (1LFV[i])*(1LFV[i]) : (1LFV[i])); 
326 
dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1LFV[i]) : 1); 
327 
} 
328 
/* precalculate */

329 
sigma2_def = std::cbrt(sigma6_def); 
330 
sigma2_min = std::cbrt(sigma6_min); 
331  
332 
/* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */

333  
334 
/* do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE 

335 
ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);

336 
if (do_tab)

337 
{

338 
tabscale = kernel_data>table_elec_vdw>scale;

339 
VFtab = kernel_data>table_elec_vdw>data; */

340 
/* we always use the combined table here */

341 
/* tab_elemsize = kernel_data>table_elec_vdw>stride;

342 
} */

343  
344 
for (n = 0; (n < nri); n++) 
345 
{ 
346 
int npair_within_cutoff;

347  
348 
npair_within_cutoff = 0;

349  
350 
is3 = 3*shift[n];

351 
shX = shiftvec[is3]; 
352 
shY = shiftvec[is3+1];

353 
shZ = shiftvec[is3+2];

354 
nj0 = jindex[n]; 
355 
nj1 = jindex[n+1];

356 
ii = iinr[n]; 
357 
ii3 = 3*ii;

358 
ix = shX + x[ii3+0];

359 
iy = shY + x[ii3+1];

360 
iz = shZ + x[ii3+2];

361 
iqA = facel*chargeA[ii]; 
362 
iqB = facel*chargeB[ii]; 
363 
ntiA = 2*ntype*typeA[ii];

364 
ntiB = 2*ntype*typeB[ii];

365 
vctot = 0;

366 
vvtot = 0;

367 
fix = 0;

368 
fiy = 0;

369 
fiz = 0;

370  
371 
for (k = nj0; (k < nj1); k++)

372 
{ 
373 
jnr = jjnr[k]; 
374 
j3 = 3*jnr;

375 
dx = ix  x[j3]; 
376 
dy = iy  x[j3+1];

377 
dz = iz  x[j3+2];

378 
rsq = dx*dx + dy*dy + dz*dz; 
379  
380 
if (bExactCutoffAll && rsq >= rcutoff_max2)

381 
{ 
382 
/* We save significant time by skipping all code below.

383 
* Note that with softcore interactions, the actual cutoff

384 
* check might be different. But since the softcore distance

385 
* is always larger than r, checking on r here is safe.

386 
*/

387 
continue;

388 
} 
389 
npair_within_cutoff++; 
390  
391 
if (rsq > 0) 
392 
{ 
393 
/* Note that unlike in the nbnxn kernels, we do not need

394 
* to clamp the value of rsq before taking the invsqrt

395 
* to avoid NaN in the LJ calculation, since here we do

396 
* not calculate LJ interactions when C6 and C12 are zero.

397 
*/

398  
399 
rinv = gmx::invsqrt(rsq); 
400 
r = rsq*rinv; 
401 
} 
402 
else

403 
{ 
404 
/* The force at r=0 is zero, because of symmetry.

405 
* But note that the potential is in general nonzero,

406 
* since the softcored r will be nonzero.

407 
*/

408 
rinv = 0;

409 
r = 0;

410 
} 
411  
412 
// if (sc_r_power == six)

413 
// {

414 
rpm2 = rsq*rsq; /* r4 */

415 
rp = rpm2*rsq; /* r6 */

416 
// }

417 
// else if (sc_r_power == fourtyeight)

418 
// {

419 
// rp = rsq*rsq*rsq; /* r6 */

420 
// rp = rp*rp; /* r12 */

421 
// rp = rp*rp; /* r24 */

422 
// rp = rp*rp; /* r48 */

423 
// rpm2 = rp/rsq; /* r46 */

424 
// }

425 
// else

426 
// {

427 
// rp = std::pow(r, sc_r_power); /* not currently supported as input, but can handle it */

428 
// rpm2 = rp/rsq;

429 
// }

430  
431 
Fscal = 0;

432  
433 
qq[STATE_A] = iqA*chargeA[jnr]; 
434 
qq[STATE_B] = iqB*chargeB[jnr]; 
435  
436 
tj[STATE_A] = ntiA+2*typeA[jnr];

437 
tj[STATE_B] = ntiB+2*typeB[jnr];

438  
439 
if (nlist>excl_fep == NULL  nlist>excl_fep[k]) 
440 
{ 
441 
c6[STATE_A] = nbfp[tj[STATE_A]]; 
442 
c6[STATE_B] = nbfp[tj[STATE_B]]; 
443  
444 
for (i = 0; i < NSTATES; i++) 
445 
{ 
446 
c12[i] = nbfp[tj[i]+1];

447 
if ((c6[i] > 0) && (c12[i] > 0)) 
448 
{ 
449 
/* c12 is stored scaled with 12.0 and c6 is scaled with 6.0  correct for this */

450 
sigma6[i] = half*c12[i]/c6[i]; 
451 
sigma2[i] = std::cbrt(sigma6[i]); 
452 
/* should be able to get rid of cbrt call eventually. Will require agreement on

453 
what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */

454 
// if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */

455 
// {

456 
// sigma6[i] = sigma6_min;

457 
// sigma2[i] = sigma2_min;

458 
// }

459 
} 
460 
else

461 
{ 
462 
sigma6[i] = sigma6_def; 
463 
sigma2[i] = sigma2_def; 
464 
} 
465 
// if (sc_r_power == six)

466 
// {

467 
sigma_pow[i] = sigma6[i]; 
468 
// }

469 
// else if (sc_r_power == fourtyeight)

470 
// {

471 
// sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */

472 
// sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */

473 
// sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */

474 
// }

475 
// else

476 
// { /* not really supported as input, but in here for testing the general case*/

477 
// sigma_pow[i] = std::pow(sigma2[i], sc_r_power/2);

478 
// }

479 
} 
480  
481 
/* only use softcore if one of the states has a zero endstate  softcore is for avoiding infinities!*/

482 
// if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))

483 
// {

484 
// alpha_vdw_eff = 0;

485 
// alpha_coul_eff = 0;

486 
// }

487 
// else

488 
// {

489 
alpha_vdw_eff = alpha_vdw; 
490 
alpha_coul_eff = alpha_coul; 
491 
// }

492  
493 
for (i = 0; i < NSTATES; i++) 
494 
{ 
495 
FscalC[i] = 0;

496 
FscalV[i] = 0;

497 
Vcoul[i] = 0;

498 
Vvdw[i] = 0;

499  
500 
/* Only spend time on A or B state if it is nonzero */

501 
if ( (qq[i] != 0)  (c6[i] != 0)  (c12[i] != 0) ) 
502 
{ 
503 
/* this section has to be inside the loop because of the dependence on sigma_pow */

504 
rpinvC = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp); 
505 
rinvC = std::pow(rpinvC, one/sc_r_power); 
506 
rC = one/rinvC; 
507  
508 
rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp); 
509 
rinvV = std::pow(rpinvV, one/sc_r_power); 
510 
rV = one/rinvV; 
511  
512 
// if (do_tab)

513 
// {

514 
// rtC = rC*tabscale;

515 
// n0 = rtC;

516 
// epsC = rtCn0;

517 
// eps2C = epsC*epsC;

518 
// n1C = tab_elemsize*n0;

519 
//

520 
// rtV = rV*tabscale;

521 
// n0 = rtV;

522 
// epsV = rtVn0;

523 
// eps2V = epsV*epsV;

524 
// n1V = tab_elemsize*n0;

525 
// }

526  
527 
/* Only process the coulomb interactions if we have charges,

528 
* and if we either include all entries in the list (no cutoff

529 
* used in the kernel), or if we are within the cutoff.

530 
*/

531 
bComputeElecInteraction = !bExactElecCutoff  
532 
( bConvertEwaldToCoulomb && r < rcoulomb)  
533 
(!bConvertEwaldToCoulomb && rC < rcoulomb); 
534  
535 
if ( (qq[i] != 0) && bComputeElecInteraction) 
536 
{ 
537 
// switch (icoul)

538 
// {

539 
// case GMX_NBKERNEL_ELEC_COULOMB:

540 
// /* simple cutoff */

541 
// Vcoul[i] = qq[i]*rinvC;

542 
// FscalC[i] = Vcoul[i];

543 
// /* The shift for the Coulomb potential is stored in

544 
// * the RF parameter c_rf, which is 0 without shift.

545 
// */

546 
// Vcoul[i] = qq[i]*fr>ic>c_rf;

547 
// break;

548 
//

549 
// case GMX_NBKERNEL_ELEC_REACTIONFIELD:

550 
// /* reactionfield */

551 
// Vcoul[i] = qq[i]*(rinvC + krf*rC*rCcrf);

552 
// FscalC[i] = qq[i]*(rinvC  two*krf*rC*rC);

553 
// break;

554 
//

555 
// case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:

556 
// /* nonEwald tabulated coulomb */

557 
// nnn = n1C;

558 
// Y = VFtab[nnn];

559 
// F = VFtab[nnn+1];

560 
// Geps = epsC*VFtab[nnn+2];

561 
// Heps2 = eps2C*VFtab[nnn+3];

562 
// Fp = F+Geps+Heps2;

563 
// VV = Y+epsC*Fp;

564 
// FF = Fp+Geps+two*Heps2;

565 
// Vcoul[i] = qq[i]*VV;

566 
// FscalC[i] = qq[i]*tabscale*FF*rC;

567 
// break;

568 
//

569 
// case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:

570 
// gmx_fatal(FARGS, "Free energy and GB not implemented.\n");

571 
// break;

572 
//

573 
// case GMX_NBKERNEL_ELEC_EWALD:

574 
// if (bConvertEwaldToCoulomb)

575 
// {

576 
/* Ewald FEP is done only on the 1/r part */

577 
Vcoul[i] = qq[i]*(rinvCsh_ewald); 
578 
FscalC[i] = qq[i]*rinvC; 
579 
// }

580 
// else

581 
// {

582 
// ewrt = rC*ewtabscale;

583 
// ewitab = static_cast<int>(ewrt);

584 
// eweps = ewrtewitab;

585 
// ewitab = 4*ewitab;

586 
// FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];

587 
// rinvcorr = rinvCsh_ewald;

588 
// Vcoul[i] = qq[i]*(rinvcorr(ewtab[ewitab+2]ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));

589 
// FscalC[i] = qq[i]*(rinvCrC*FscalC[i]);

590 
// }

591 
// break;

592 
//

593 
// case GMX_NBKERNEL_ELEC_NONE:

594 
// FscalC[i] = zero;

595 
// Vcoul[i] = zero;

596 
// break;

597 
//

598 
// default:

599 
// gmx_incons("Invalid icoul in free energy kernel");

600 
// break;

601 
// }

602  
603 
// if (fr>coulomb_modifier == eintmodPOTSWITCH)

604 
// {

605 
// d = rCfr>rcoulomb_switch;

606 
// d = (d > zero) ? d : zero;

607 
// d2 = d*d;

608 
// sw = one+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));

609 
// dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));

610 
//

611 
// FscalC[i] = FscalC[i]*sw  rC*Vcoul[i]*dsw;

612 
// Vcoul[i] *= sw;

613 
//

614 
// FscalC[i] = (rC < rcoulomb) ? FscalC[i] : zero;

615 
// Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : zero;

616 
// }

617 
} 
618  
619 
/* Only process the VDW interactions if we have

620 
* some nonzero parameters, and if we either

621 
* include all entries in the list (no cutoff used

622 
* in the kernel), or if we are within the cutoff.

623 
*/

624 
bComputeVdwInteraction = !bExactVdwCutoff  
625 
( bConvertLJEwaldToLJ6 && r < rvdw)  
626 
(!bConvertLJEwaldToLJ6 && rV < rvdw); 
627 
if ((c6[i] != 0  c12[i] != 0) && bComputeVdwInteraction) 
628 
{ 
629 
// switch (ivdw) // What about LJEWALD? (probably not)

630 
// {

631 
// case GMX_NBKERNEL_VDW_LENNARDJONES:

632 
// /* cutoff LJ */

633 
// if (sc_r_power == six)

634 
// {

635 
rinv6 = rpinvV; 
636 
// }

637 
// else

638 
// {

639 
// rinv6 = rinvV*rinvV;

640 
// rinv6 = rinv6*rinv6*rinv6;

641 
// }

642 
Vvdw6 = c6[i]*rinv6; 
643 
Vvdw12 = c12[i]*rinv6*rinv6; 
644  
645 
Vvdw[i] = ( (Vvdw12  c12[i]*sh_invrc6*sh_invrc6)*onetwelfth 
646 
 (Vvdw6  c6[i]*sh_invrc6)*onesixth); 
647 
FscalV[i] = Vvdw12  Vvdw6; 
648 
// break;

649 
//

650 
// case GMX_NBKERNEL_VDW_BUCKINGHAM:

651 
// gmx_fatal(FARGS, "Buckingham free energy not supported.");

652 
// break;

653 
//

654 
// case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:

655 
// /* Table LJ */

656 
// nnn = n1V+4;

657 
// /* dispersion */

658 
// Y = VFtab[nnn];

659 
// F = VFtab[nnn+1];

660 
// Geps = epsV*VFtab[nnn+2];

661 
// Heps2 = eps2V*VFtab[nnn+3];

662 
// Fp = F+Geps+Heps2;

663 
// VV = Y+epsV*Fp;

664 
// FF = Fp+Geps+two*Heps2;

665 
// Vvdw[i] += c6[i]*VV;

666 
// FscalV[i] = c6[i]*tabscale*FF*rV;

667 
//

668 
// /* repulsion */

669 
// Y = VFtab[nnn+4];

670 
// F = VFtab[nnn+5];

671 
// Geps = epsV*VFtab[nnn+6];

672 
// Heps2 = eps2V*VFtab[nnn+7];

673 
// Fp = F+Geps+Heps2;

674 
// VV = Y+epsV*Fp;

675 
// FF = Fp+Geps+two*Heps2;

676 
// Vvdw[i] += c12[i]*VV;

677 
// FscalV[i] = c12[i]*tabscale*FF*rV;

678 
// break;

679 
//

680 
// case GMX_NBKERNEL_VDW_LJEWALD:

681 
// if (sc_r_power == six)

682 
// {

683 
// rinv6 = rpinvV;

684 
// }

685 
// else

686 
// {

687 
// rinv6 = rinvV*rinvV;

688 
// rinv6 = rinv6*rinv6*rinv6;

689 
// }

690 
// c6grid = nbfp_grid[tj[i]];

691 
//

692 
// if (bConvertLJEwaldToLJ6)

693 
// {

694 
// /* cutoff LJ */

695 
// Vvdw6 = c6[i]*rinv6;

696 
// Vvdw12 = c12[i]*rinv6*rinv6;

697 
//

698 
// Vvdw[i] = ( (Vvdw12  c12[i]*sh_invrc6*sh_invrc6)*onetwelfth

699 
//  (Vvdw6  c6[i]*sh_invrc6  c6grid*sh_lj_ewald)*onesixth);

700 
// FscalV[i] = Vvdw12  Vvdw6;

701 
// }

702 
// else

703 
// {

704 
// /* Normal LJPME */

705 
// ewcljrsq = ewclj2*rV*rV;

706 
// exponent = std::exp(ewcljrsq);

707 
// poly = exponent*(one + ewcljrsq + ewcljrsq*ewcljrsq*half);

708 
// vvdw_disp = (c6[i]c6grid*(onepoly))*rinv6;

709 
// vvdw_rep = c12[i]*rinv6*rinv6;

710 
// FscalV[i] = vvdw_rep  vvdw_disp  c6grid*onesixth*exponent*ewclj6;

711 
// Vvdw[i] = (vvdw_rep  c12[i]*sh_invrc6*sh_invrc6)*onetwelfth  (vvdw_disp  c6[i]*sh_invrc6  c6grid*sh_lj_ewald)/six;

712 
// }

713 
// break;

714 
//

715 
// case GMX_NBKERNEL_VDW_NONE:

716 
// Vvdw[i] = zero;

717 
// FscalV[i] = zero;

718 
// break;

719 
//

720 
// default:

721 
// gmx_incons("Invalid ivdw in free energy kernel");

722 
// break;

723 
// }

724  
725 
// if (fr>vdw_modifier == eintmodPOTSWITCH)

726 
// {

727 
// d = rVfr>rvdw_switch;

728 
// d = (d > zero) ? d : zero;

729 
// d2 = d*d;

730 
// sw = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));

731 
// dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));

732 
//

733 
// FscalV[i] = FscalV[i]*sw  rV*Vvdw[i]*dsw;

734 
// Vvdw[i] *= sw;

735 
//

736 
// FscalV[i] = (rV < rvdw) ? FscalV[i] : zero;

737 
// Vvdw[i] = (rV < rvdw) ? Vvdw[i] : zero;

738 
// }

739 
} 
740  
741 
/* FscalC (and FscalV) now contain: dV/drC * rC

742 
* Now we multiply by rC^p, so it will be: dV/drC * rC^1p

743 
* Further down we first multiply by r^p2 and then by

744 
* the vector r, which in total gives: dV/drC * (r/rC)^1p

745 
*/

746 
FscalC[i] *= rpinvC; 
747 
FscalV[i] *= rpinvV; 
748 
} 
749 
} 
750  
751 
/* Assemble A and B states */

752 
for (i = 0; i < NSTATES; i++) 
753 
{ 
754 
vctot += LFC[i]*Vcoul[i]; 
755 
vvtot += LFV[i]*Vvdw[i]; 
756  
757 
Fscal += LFC[i]*FscalC[i]*rpm2; 
758 
Fscal += LFV[i]*FscalV[i]*rpm2; 
759  
760 
dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i]; 
761 
dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i]; 
762 
} 
763 
} 
764 
// else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)

765 
// {

766 
// /* For excluded pairs, which are only in this pair list when

767 
// * using the Verlet scheme, we don't use softcore.

768 
// * The group scheme also doesn't softcore for these.

769 
// * As there is no singularity, there is no need for softcore.

770 
// */

771 
// VV = krf*rsq  crf;

772 
// FF = two*krf;

773 
//

774 
// if (ii == jnr)

775 
// {

776 
// VV *= half;

777 
// }

778 
//

779 
// for (i = 0; i < NSTATES; i++)

780 
// {

781 
// vctot += LFC[i]*qq[i]*VV;

782 
// Fscal += LFC[i]*qq[i]*FF;

783 
// dvdl_coul += DLF[i]*qq[i]*VV;

784 
// }

785 
// }

786  
787 
if (bConvertEwaldToCoulomb && ( !bExactElecCutoff  r < rcoulomb ) ) // can't get rid of this: r is not known a priori 
788 
{ 
789 
/* See comment in the preamble. When using Ewald interactions

790 
* (unless we use a switch modifier) we subtract the reciprocalspace

791 
* Ewald component here which made it possible to apply the free

792 
* energy interaction to 1/r (vanilla coulomb shortrange part)

793 
* above. This gets us closer to the ideal case of applying

794 
* the softcore to the entire electrostatic interaction,

795 
* including the reciprocalspace component.

796 
*/

797 
real v_lr, f_lr; 
798  
799 
ewrt = r*ewtabscale; 
800 
ewitab = static_cast<int>(ewrt); 
801 
eweps = ewrtewitab; 
802 
ewitab = 4*ewitab;

803 
f_lr = ewtab[ewitab]+eweps*ewtab[ewitab+1];

804 
v_lr = (ewtab[ewitab+2]ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));

805 
f_lr *= rinv; 
806  
807 
/* Note that any possible Ewald shift has already been applied in

808 
* the normal interaction part above.

809 
*/

810  
811 
if (ii == jnr)

812 
{ 
813 
/* If we get here, the i particle (ii) has itself (jnr)

814 
* in its neighborlist. This can only happen with the Verlet

815 
* scheme, and corresponds to a selfinteraction that will

816 
* occur twice. Scale it down by 50% to only include it once.

817 
*/

818 
v_lr *= half; 
819 
} 
820  
821 
for (i = 0; i < NSTATES; i++) 
822 
{ 
823 
vctot = LFC[i]*qq[i]*v_lr; 
824 
Fscal = LFC[i]*qq[i]*f_lr; 
825 
dvdl_coul = (DLF[i]*qq[i])*v_lr; 
826 
} 
827 
} 
828  
829 
// if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff  r < rvdw))

830 
// {

831 
// /* See comment in the preamble. When using LJEwald interactions

832 
// * (unless we use a switch modifier) we subtract the reciprocalspace

833 
// * Ewald component here which made it possible to apply the free

834 
// * energy interaction to r^6 (vanilla LJ6 shortrange part)

835 
// * above. This gets us closer to the ideal case of applying

836 
// * the softcore to the entire VdW interaction,

837 
// * including the reciprocalspace component.

838 
// */

839 
// /* We could also use the analytical form here

840 
// * iso a table, but that can cause issues for

841 
// * r close to 0 for noninteracting pairs.

842 
// */

843 
// real rs, frac, f_lr;

844 
// int ri;

845 
//

846 
// rs = rsq*rinv*ewtabscale;

847 
// ri = static_cast<int>(rs);

848 
// frac = rs  ri;

849 
// f_lr = (1  frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];

850 
// /* TODO: Currently the Ewald LJ table does not contain

851 
// * the factor 1/6, we should add this.

852 
// */

853 
// FF = f_lr*rinv/six;

854 
// VV = (tab_ewald_V_lj[ri]  ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;

855 
//

856 
// if (ii == jnr)

857 
// {

858 
// /* If we get here, the i particle (ii) has itself (jnr)

859 
// * in its neighborlist. This can only happen with the Verlet

860 
// * scheme, and corresponds to a selfinteraction that will

861 
// * occur twice. Scale it down by 50% to only include it once.

862 
// */

863 
// VV *= half;

864 
// }

865 
//

866 
// for (i = 0; i < NSTATES; i++)

867 
// {

868 
// c6grid = nbfp_grid[tj[i]];

869 
// vvtot += LFV[i]*c6grid*VV;

870 
// Fscal += LFV[i]*c6grid*FF;

871 
// dvdl_vdw += (DLF[i]*c6grid)*VV;

872 
// }

873 
// }

874  
875 
if (bDoForces) // can't get rid of this: task is not known a priori 
876 
{ 
877 
tx = Fscal*dx; 
878 
ty = Fscal*dy; 
879 
tz = Fscal*dz; 
880 
fix = fix + tx; 
881 
fiy = fiy + ty; 
882 
fiz = fiz + tz; 
883 
/* OpenMP atomics are expensive, but this kernels is also

884 
* expensive, so we can take this hit, instead of using

885 
* threadlocal output buffers and extra reduction.

886 
*

887 
* All the OpenMP regions in this file are trivial and should

888 
* not throw, so no need for try/catch.

889 
*/

890 
#pragma omp atomic

891 
f[j3] = tx; 
892 
#pragma omp atomic

893 
f[j3+1] = ty;

894 
#pragma omp atomic

895 
f[j3+2] = tz;

896 
} 
897 
} 
898  
899 
/* The atomics below are expensive with many OpenMP threads.

900 
* Here unperturbed iparticles will usually only have a few

901 
* (perturbed) jparticles in the list. Thus with a buffered list

902 
* we can skip a significant number of ireductions with a check.

903 
*/

904 
if (npair_within_cutoff > 0) 
905 
{ 
906 
if (bDoForces) // can't get rid of this: task is not known a priori 
907 
{ 
908 
#pragma omp atomic

909 
f[ii3] += fix; 
910 
#pragma omp atomic

911 
f[ii3+1] += fiy;

912 
#pragma omp atomic

913 
f[ii3+2] += fiz;

914 
} 
915 
if (bDoShiftForces) // can't get rid of this: task is not known a priori 
916 
{ 
917 
#pragma omp atomic

918 
fshift[is3] += fix; 
919 
#pragma omp atomic

920 
fshift[is3+1] += fiy;

921 
#pragma omp atomic

922 
fshift[is3+2] += fiz;

923 
} 
924 
if (bDoPotential) // can't get rid of this: task is not known a priori 
925 
{ 
926 
ggid = gid[n]; 
927 
#pragma omp atomic

928 
Vc[ggid] += vctot; 
929 
#pragma omp atomic

930 
Vv[ggid] += vvtot; 
931 
} 
932 
} 
933 
} 
934  
935 
#pragma omp atomic

936 
dvdl[efptCOUL] += dvdl_coul; 
937 
#pragma omp atomic

938 
dvdl[efptVDW] += dvdl_vdw; 
939  
940 
/* Estimate flops, average for free energy stuff:

941 
* 12 flops per outer iteration

942 
* 150 flops per inner iteration

943 
*/

944 
#pragma omp atomic

945 
inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist>nri*12 + nlist>jindex[n]*150); 
946 
} 