nb_free_energy.cpp
1 |
/*
|
---|---|
2 |
* This file is part of the GROMACS molecular simulation package.
|
3 |
*
|
4 |
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
|
5 |
* Copyright (c) 2001-2004, The GROMACS development team.
|
6 |
* Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
|
7 |
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
|
8 |
* and including many others, as listed in the AUTHORS file in the
|
9 |
* top-level source directory and at http://www.gromacs.org.
|
10 |
*
|
11 |
* GROMACS is free software; you can redistribute it and/or
|
12 |
* modify it under the terms of the GNU Lesser General Public License
|
13 |
* as published by the Free Software Foundation; either version 2.1
|
14 |
* of the License, or (at your option) any later version.
|
15 |
*
|
16 |
* GROMACS is distributed in the hope that it will be useful,
|
17 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
19 |
* Lesser General Public License for more details.
|
20 |
*
|
21 |
* You should have received a copy of the GNU Lesser General Public
|
22 |
* License along with GROMACS; if not, see
|
23 |
* http://www.gnu.org/licenses, or write to the Free Software Foundation,
|
24 |
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
25 |
*
|
26 |
* If you want to redistribute modifications to GROMACS, please
|
27 |
* consider that scientific software is very special. Version
|
28 |
* control is crucial - bugs must be traceable. We will be happy to
|
29 |
* consider code for inclusion in the official distribution, but
|
30 |
* derived work must not be called official GROMACS. Details are found
|
31 |
* in the README & COPYING files - if they are missing, get the
|
32 |
* official version at http://www.gromacs.org.
|
33 |
*
|
34 |
* To help us fund GROMACS development, we humbly ask that you cite
|
35 |
* the research papers on the package. Check out http://www.gromacs.org.
|
36 |
*/
|
37 |
#include "gmxpre.h" |
38 |
|
39 |
#include "nb_free_energy.h" |
40 |
|
41 |
#include <cmath> |
42 |
|
43 |
#include <algorithm> |
44 |
|
45 |
#include "gromacs/gmxlib/nrnb.h" |
46 |
#include "gromacs/gmxlib/nonbonded/nb_kernel.h" |
47 |
#include "gromacs/gmxlib/nonbonded/nonbonded.h" |
48 |
#include "gromacs/math/functions.h" |
49 |
#include "gromacs/math/vec.h" |
50 |
#include "gromacs/mdtypes/forcerec.h" |
51 |
#include "gromacs/mdtypes/md_enums.h" |
52 |
#include "gromacs/utility/fatalerror.h" |
53 |
|
54 |
void
|
55 |
gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,
|
56 |
rvec * gmx_restrict xx, |
57 |
rvec * gmx_restrict ff, |
58 |
t_forcerec * gmx_restrict fr, |
59 |
const t_mdatoms * gmx_restrict mdatoms,
|
60 |
nb_kernel_data_t * gmx_restrict kernel_data, |
61 |
t_nrnb * gmx_restrict nrnb) |
62 |
{ |
63 |
|
64 |
#define STATE_A 0 |
65 |
#define STATE_B 1 |
66 |
#define NSTATES 2 |
67 |
int i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
|
68 |
real shX, shY, shZ; |
69 |
real tx, ty, tz, Fscal; |
70 |
double FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */ |
71 |
double Vcoul[NSTATES], Vvdw[NSTATES]; /* Needs double for sc_power==48 */ |
72 |
real rinv6, r, rtC, rtV; |
73 |
real iqA, iqB; |
74 |
real qq[NSTATES], vctot; |
75 |
int ntiA, ntiB, tj[NSTATES];
|
76 |
real Vvdw6, Vvdw12, vvtot; |
77 |
real ix, iy, iz, fix, fiy, fiz; |
78 |
real dx, dy, dz, rsq, rinv; |
79 |
real c6[NSTATES], c12[NSTATES], c6grid; |
80 |
real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES]; |
81 |
double dvdl_coul, dvdl_vdw;
|
82 |
real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES]; |
83 |
real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min; |
84 |
double rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */ |
85 |
real sigma2[NSTATES], sigma_pow[NSTATES]; |
86 |
int do_tab, tab_elemsize = 0; |
87 |
int n0, n1C, n1V, nnn;
|
88 |
real Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF; |
89 |
int icoul, ivdw;
|
90 |
int nri;
|
91 |
const int * iinr; |
92 |
const int * jindex; |
93 |
const int * jjnr; |
94 |
const int * shift; |
95 |
const int * gid; |
96 |
const int * typeA; |
97 |
const int * typeB; |
98 |
int ntype;
|
99 |
const real * shiftvec;
|
100 |
real * fshift; |
101 |
real tabscale = 0;
|
102 |
const real * VFtab = NULL; |
103 |
const real * x;
|
104 |
real * f; |
105 |
real facel, krf, crf; |
106 |
const real * chargeA;
|
107 |
const real * chargeB;
|
108 |
real sigma6_min, sigma6_def, lam_power, sc_r_power; |
109 |
real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw; |
110 |
real ewcljrsq, ewclj, ewclj2, exponent, poly, vvdw_disp, vvdw_rep, sh_lj_ewald; |
111 |
real ewclj6; |
112 |
const real * nbfp, *nbfp_grid;
|
113 |
real * dvdl; |
114 |
real * Vv; |
115 |
real * Vc; |
116 |
gmx_bool bDoForces, bDoShiftForces, bDoPotential; |
117 |
real rcoulomb, rvdw, sh_invrc6; |
118 |
gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll; |
119 |
gmx_bool bEwald, bEwaldLJ; |
120 |
real rcutoff_max2; |
121 |
const real * tab_ewald_F_lj = nullptr; |
122 |
const real * tab_ewald_V_lj = nullptr; |
123 |
real d, d2, sw, dsw, rinvcorr; |
124 |
real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4; |
125 |
real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4; |
126 |
gmx_bool bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6; |
127 |
gmx_bool bComputeVdwInteraction, bComputeElecInteraction; |
128 |
const real * ewtab = nullptr; |
129 |
int ewitab;
|
130 |
real ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0; |
131 |
|
132 |
const real onetwelfth = 1.0/12.0; |
133 |
const real onesixth = 1.0/6.0; |
134 |
const real zero = 0.0; |
135 |
const real half = 0.5; |
136 |
const real one = 1.0; |
137 |
const real two = 2.0; |
138 |
const real six = 6.0; |
139 |
const real fourtyeight = 48.0; |
140 |
|
141 |
x = xx[0];
|
142 |
f = ff[0];
|
143 |
|
144 |
fshift = fr->fshift[0];
|
145 |
|
146 |
nri = nlist->nri; |
147 |
iinr = nlist->iinr; |
148 |
jindex = nlist->jindex; |
149 |
jjnr = nlist->jjnr; |
150 |
icoul = nlist->ielec; |
151 |
ivdw = nlist->ivdw; |
152 |
shift = nlist->shift; |
153 |
gid = nlist->gid; |
154 |
|
155 |
shiftvec = fr->shift_vec[0];
|
156 |
chargeA = mdatoms->chargeA; |
157 |
chargeB = mdatoms->chargeB; |
158 |
facel = fr->epsfac; |
159 |
krf = fr->k_rf; |
160 |
crf = fr->c_rf; |
161 |
Vc = kernel_data->energygrp_elec; |
162 |
typeA = mdatoms->typeA; |
163 |
typeB = mdatoms->typeB; |
164 |
ntype = fr->ntype; |
165 |
nbfp = fr->nbfp; |
166 |
nbfp_grid = fr->ljpme_c6grid; |
167 |
Vv = kernel_data->energygrp_vdw; |
168 |
lambda_coul = kernel_data->lambda[efptCOUL]; |
169 |
lambda_vdw = kernel_data->lambda[efptVDW]; |
170 |
dvdl = kernel_data->dvdl; |
171 |
alpha_coul = fr->sc_alphacoul; |
172 |
alpha_vdw = fr->sc_alphavdw; |
173 |
lam_power = fr->sc_power; |
174 |
sc_r_power = fr->sc_r_power; |
175 |
sigma6_def = fr->sc_sigma6_def; |
176 |
sigma6_min = fr->sc_sigma6_min; |
177 |
bDoForces = kernel_data->flags & GMX_NONBONDED_DO_FORCE; |
178 |
bDoShiftForces = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE; |
179 |
bDoPotential = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL; |
180 |
|
181 |
rcoulomb = fr->rcoulomb; |
182 |
rvdw = fr->rvdw; |
183 |
sh_invrc6 = fr->ic->sh_invrc6; |
184 |
sh_lj_ewald = fr->ic->sh_lj_ewald; |
185 |
ewclj = fr->ewaldcoeff_lj; |
186 |
ewclj2 = ewclj*ewclj; |
187 |
ewclj6 = ewclj2*ewclj2*ewclj2; |
188 |
|
189 |
/* if (fr->coulomb_modifier == eintmodPOTSWITCH)
|
190 |
{
|
191 |
d = fr->rcoulomb-fr->rcoulomb_switch;
|
192 |
elec_swV3 = -10.0/(d*d*d);
|
193 |
elec_swV4 = 15.0/(d*d*d*d);
|
194 |
elec_swV5 = -6.0/(d*d*d*d*d);
|
195 |
elec_swF2 = -30.0/(d*d*d);
|
196 |
elec_swF3 = 60.0/(d*d*d*d);
|
197 |
elec_swF4 = -30.0/(d*d*d*d*d);
|
198 |
}
|
199 |
else
|
200 |
{ */
|
201 |
/* Avoid warnings from stupid compilers (looking at you, Clang!) */
|
202 |
elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0; |
203 |
// }
|
204 |
|
205 |
/* if (fr->vdw_modifier == eintmodPOTSWITCH)
|
206 |
{
|
207 |
d = fr->rvdw-fr->rvdw_switch;
|
208 |
vdw_swV3 = -10.0/(d*d*d);
|
209 |
vdw_swV4 = 15.0/(d*d*d*d);
|
210 |
vdw_swV5 = -6.0/(d*d*d*d*d);
|
211 |
vdw_swF2 = -30.0/(d*d*d);
|
212 |
vdw_swF3 = 60.0/(d*d*d*d);
|
213 |
vdw_swF4 = -30.0/(d*d*d*d*d);
|
214 |
}
|
215 |
else
|
216 |
{ */
|
217 |
/* Avoid warnings from stupid compilers (looking at you, Clang!) */
|
218 |
vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0; |
219 |
// }
|
220 |
|
221 |
// if (fr->cutoff_scheme == ecutsVERLET)
|
222 |
// {
|
223 |
const interaction_const_t *ic;
|
224 |
|
225 |
ic = fr->ic; |
226 |
/* if (EVDW_PME(ic->vdwtype))
|
227 |
{
|
228 |
ivdw = GMX_NBKERNEL_VDW_LJEWALD;
|
229 |
} */
|
230 |
// else
|
231 |
// {
|
232 |
ivdw = GMX_NBKERNEL_VDW_LENNARDJONES; |
233 |
// }
|
234 |
|
235 |
/* if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
|
236 |
{
|
237 |
icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
|
238 |
} */
|
239 |
// else if (EEL_PME_EWALD(ic->eeltype))
|
240 |
// {
|
241 |
icoul = GMX_NBKERNEL_ELEC_EWALD; |
242 |
// }
|
243 |
/* else
|
244 |
{
|
245 |
gmx_incons("Unsupported eeltype with Verlet and free-energy");
|
246 |
} */
|
247 |
|
248 |
bExactElecCutoff = TRUE; |
249 |
bExactVdwCutoff = TRUE; |
250 |
// }
|
251 |
/* else
|
252 |
{
|
253 |
bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
|
254 |
bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE);
|
255 |
} */
|
256 |
|
257 |
bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff); |
258 |
rcutoff_max2 = std::max(fr->rcoulomb, fr->rvdw); |
259 |
rcutoff_max2 = rcutoff_max2*rcutoff_max2; |
260 |
|
261 |
bEwald = (icoul == GMX_NBKERNEL_ELEC_EWALD); |
262 |
bEwaldLJ = (ivdw == GMX_NBKERNEL_VDW_LJEWALD); |
263 |
|
264 |
// if (bEwald || bEwaldLJ)
|
265 |
// {
|
266 |
sh_ewald = fr->ic->sh_ewald; |
267 |
ewtab = fr->ic->tabq_coul_FDV0; |
268 |
ewtabscale = fr->ic->tabq_scale; |
269 |
ewtabhalfspace = half/ewtabscale; |
270 |
tab_ewald_F_lj = fr->ic->tabq_vdw_F; |
271 |
tab_ewald_V_lj = fr->ic->tabq_vdw_V; |
272 |
// }
|
273 |
|
274 |
/* For Ewald/PME interactions we cannot easily apply the soft-core component to
|
275 |
* reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
|
276 |
* can apply the small trick of subtracting the _reciprocal_ space contribution
|
277 |
* in this kernel, and instead apply the free energy interaction to the 1/r
|
278 |
* (standard coulomb) interaction.
|
279 |
*
|
280 |
* However, we cannot use this approach for switch-modified since we would then
|
281 |
* effectively end up evaluating a significantly different interaction here compared to the
|
282 |
* normal (non-free-energy) kernels, either by applying a cutoff at a different
|
283 |
* position than what the user requested, or by switching different
|
284 |
* things (1/r rather than short-range Ewald). For these settings, we just
|
285 |
* use the traditional short-range Ewald interaction in that case.
|
286 |
*/
|
287 |
bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH)); |
288 |
/* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
|
289 |
* but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
|
290 |
*/
|
291 |
bConvertLJEwaldToLJ6 = (bEwaldLJ && (fr->vdw_modifier != eintmodPOTSWITCH)); |
292 |
|
293 |
/* We currently don't implement exclusion correction, needed with the Verlet cut-off scheme, without conversion */
|
294 |
/* if (fr->cutoff_scheme == ecutsVERLET &&
|
295 |
((bEwald && !bConvertEwaldToCoulomb) ||
|
296 |
(bEwaldLJ && !bConvertLJEwaldToLJ6)))
|
297 |
{
|
298 |
gmx_incons("Unimplemented non-bonded setup");
|
299 |
} */
|
300 |
|
301 |
/* fix compiler warnings */
|
302 |
n1C = n1V = 0;
|
303 |
epsC = epsV = 0;
|
304 |
eps2C = eps2V = 0;
|
305 |
|
306 |
dvdl_coul = 0;
|
307 |
dvdl_vdw = 0;
|
308 |
|
309 |
/* Lambda factor for state A, 1-lambda*/
|
310 |
LFC[STATE_A] = one - lambda_coul; |
311 |
LFV[STATE_A] = one - lambda_vdw; |
312 |
|
313 |
/* Lambda factor for state B, lambda*/
|
314 |
LFC[STATE_B] = lambda_coul; |
315 |
LFV[STATE_B] = lambda_vdw; |
316 |
|
317 |
/*derivative of the lambda factor for state A and B */
|
318 |
DLF[STATE_A] = -1;
|
319 |
DLF[STATE_B] = 1;
|
320 |
|
321 |
for (i = 0; i < NSTATES; i++) |
322 |
{ |
323 |
lfac_coul[i] = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i])); |
324 |
dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1); |
325 |
lfac_vdw[i] = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i])); |
326 |
dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1); |
327 |
} |
328 |
/* precalculate */
|
329 |
sigma2_def = std::cbrt(sigma6_def); |
330 |
sigma2_min = std::cbrt(sigma6_min); |
331 |
|
332 |
/* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
|
333 |
|
334 |
/* do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
|
335 |
ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
|
336 |
if (do_tab)
|
337 |
{
|
338 |
tabscale = kernel_data->table_elec_vdw->scale;
|
339 |
VFtab = kernel_data->table_elec_vdw->data; */
|
340 |
/* we always use the combined table here */
|
341 |
/* tab_elemsize = kernel_data->table_elec_vdw->stride;
|
342 |
} */
|
343 |
|
344 |
for (n = 0; (n < nri); n++) |
345 |
{ |
346 |
int npair_within_cutoff;
|
347 |
|
348 |
npair_within_cutoff = 0;
|
349 |
|
350 |
is3 = 3*shift[n];
|
351 |
shX = shiftvec[is3]; |
352 |
shY = shiftvec[is3+1];
|
353 |
shZ = shiftvec[is3+2];
|
354 |
nj0 = jindex[n]; |
355 |
nj1 = jindex[n+1];
|
356 |
ii = iinr[n]; |
357 |
ii3 = 3*ii;
|
358 |
ix = shX + x[ii3+0];
|
359 |
iy = shY + x[ii3+1];
|
360 |
iz = shZ + x[ii3+2];
|
361 |
iqA = facel*chargeA[ii]; |
362 |
iqB = facel*chargeB[ii]; |
363 |
ntiA = 2*ntype*typeA[ii];
|
364 |
ntiB = 2*ntype*typeB[ii];
|
365 |
vctot = 0;
|
366 |
vvtot = 0;
|
367 |
fix = 0;
|
368 |
fiy = 0;
|
369 |
fiz = 0;
|
370 |
|
371 |
for (k = nj0; (k < nj1); k++)
|
372 |
{ |
373 |
jnr = jjnr[k]; |
374 |
j3 = 3*jnr;
|
375 |
dx = ix - x[j3]; |
376 |
dy = iy - x[j3+1];
|
377 |
dz = iz - x[j3+2];
|
378 |
rsq = dx*dx + dy*dy + dz*dz; |
379 |
|
380 |
if (bExactCutoffAll && rsq >= rcutoff_max2)
|
381 |
{ |
382 |
/* We save significant time by skipping all code below.
|
383 |
* Note that with soft-core interactions, the actual cut-off
|
384 |
* check might be different. But since the soft-core distance
|
385 |
* is always larger than r, checking on r here is safe.
|
386 |
*/
|
387 |
continue;
|
388 |
} |
389 |
npair_within_cutoff++; |
390 |
|
391 |
if (rsq > 0) |
392 |
{ |
393 |
/* Note that unlike in the nbnxn kernels, we do not need
|
394 |
* to clamp the value of rsq before taking the invsqrt
|
395 |
* to avoid NaN in the LJ calculation, since here we do
|
396 |
* not calculate LJ interactions when C6 and C12 are zero.
|
397 |
*/
|
398 |
|
399 |
rinv = gmx::invsqrt(rsq); |
400 |
r = rsq*rinv; |
401 |
} |
402 |
else
|
403 |
{ |
404 |
/* The force at r=0 is zero, because of symmetry.
|
405 |
* But note that the potential is in general non-zero,
|
406 |
* since the soft-cored r will be non-zero.
|
407 |
*/
|
408 |
rinv = 0;
|
409 |
r = 0;
|
410 |
} |
411 |
|
412 |
// if (sc_r_power == six)
|
413 |
// {
|
414 |
rpm2 = rsq*rsq; /* r4 */
|
415 |
rp = rpm2*rsq; /* r6 */
|
416 |
// }
|
417 |
// else if (sc_r_power == fourtyeight)
|
418 |
// {
|
419 |
// rp = rsq*rsq*rsq; /* r6 */
|
420 |
// rp = rp*rp; /* r12 */
|
421 |
// rp = rp*rp; /* r24 */
|
422 |
// rp = rp*rp; /* r48 */
|
423 |
// rpm2 = rp/rsq; /* r46 */
|
424 |
// }
|
425 |
// else
|
426 |
// {
|
427 |
// rp = std::pow(r, sc_r_power); /* not currently supported as input, but can handle it */
|
428 |
// rpm2 = rp/rsq;
|
429 |
// }
|
430 |
|
431 |
Fscal = 0;
|
432 |
|
433 |
qq[STATE_A] = iqA*chargeA[jnr]; |
434 |
qq[STATE_B] = iqB*chargeB[jnr]; |
435 |
|
436 |
tj[STATE_A] = ntiA+2*typeA[jnr];
|
437 |
tj[STATE_B] = ntiB+2*typeB[jnr];
|
438 |
|
439 |
if (nlist->excl_fep == NULL || nlist->excl_fep[k]) |
440 |
{ |
441 |
c6[STATE_A] = nbfp[tj[STATE_A]]; |
442 |
c6[STATE_B] = nbfp[tj[STATE_B]]; |
443 |
|
444 |
for (i = 0; i < NSTATES; i++) |
445 |
{ |
446 |
c12[i] = nbfp[tj[i]+1];
|
447 |
if ((c6[i] > 0) && (c12[i] > 0)) |
448 |
{ |
449 |
/* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
|
450 |
sigma6[i] = half*c12[i]/c6[i]; |
451 |
sigma2[i] = std::cbrt(sigma6[i]); |
452 |
/* should be able to get rid of cbrt call eventually. Will require agreement on
|
453 |
what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
|
454 |
// if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
|
455 |
// {
|
456 |
// sigma6[i] = sigma6_min;
|
457 |
// sigma2[i] = sigma2_min;
|
458 |
// }
|
459 |
} |
460 |
else
|
461 |
{ |
462 |
sigma6[i] = sigma6_def; |
463 |
sigma2[i] = sigma2_def; |
464 |
} |
465 |
// if (sc_r_power == six)
|
466 |
// {
|
467 |
sigma_pow[i] = sigma6[i]; |
468 |
// }
|
469 |
// else if (sc_r_power == fourtyeight)
|
470 |
// {
|
471 |
// sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
|
472 |
// sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
|
473 |
// sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
|
474 |
// }
|
475 |
// else
|
476 |
// { /* not really supported as input, but in here for testing the general case*/
|
477 |
// sigma_pow[i] = std::pow(sigma2[i], sc_r_power/2);
|
478 |
// }
|
479 |
} |
480 |
|
481 |
/* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
|
482 |
// if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
|
483 |
// {
|
484 |
// alpha_vdw_eff = 0;
|
485 |
// alpha_coul_eff = 0;
|
486 |
// }
|
487 |
// else
|
488 |
// {
|
489 |
alpha_vdw_eff = alpha_vdw; |
490 |
alpha_coul_eff = alpha_coul; |
491 |
// }
|
492 |
|
493 |
for (i = 0; i < NSTATES; i++) |
494 |
{ |
495 |
FscalC[i] = 0;
|
496 |
FscalV[i] = 0;
|
497 |
Vcoul[i] = 0;
|
498 |
Vvdw[i] = 0;
|
499 |
|
500 |
/* Only spend time on A or B state if it is non-zero */
|
501 |
if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) ) |
502 |
{ |
503 |
/* this section has to be inside the loop because of the dependence on sigma_pow */
|
504 |
rpinvC = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp); |
505 |
rinvC = std::pow(rpinvC, one/sc_r_power); |
506 |
rC = one/rinvC; |
507 |
|
508 |
rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp); |
509 |
rinvV = std::pow(rpinvV, one/sc_r_power); |
510 |
rV = one/rinvV; |
511 |
|
512 |
// if (do_tab)
|
513 |
// {
|
514 |
// rtC = rC*tabscale;
|
515 |
// n0 = rtC;
|
516 |
// epsC = rtC-n0;
|
517 |
// eps2C = epsC*epsC;
|
518 |
// n1C = tab_elemsize*n0;
|
519 |
//
|
520 |
// rtV = rV*tabscale;
|
521 |
// n0 = rtV;
|
522 |
// epsV = rtV-n0;
|
523 |
// eps2V = epsV*epsV;
|
524 |
// n1V = tab_elemsize*n0;
|
525 |
// }
|
526 |
|
527 |
/* Only process the coulomb interactions if we have charges,
|
528 |
* and if we either include all entries in the list (no cutoff
|
529 |
* used in the kernel), or if we are within the cutoff.
|
530 |
*/
|
531 |
bComputeElecInteraction = !bExactElecCutoff || |
532 |
( bConvertEwaldToCoulomb && r < rcoulomb) || |
533 |
(!bConvertEwaldToCoulomb && rC < rcoulomb); |
534 |
|
535 |
if ( (qq[i] != 0) && bComputeElecInteraction) |
536 |
{ |
537 |
// switch (icoul)
|
538 |
// {
|
539 |
// case GMX_NBKERNEL_ELEC_COULOMB:
|
540 |
// /* simple cutoff */
|
541 |
// Vcoul[i] = qq[i]*rinvC;
|
542 |
// FscalC[i] = Vcoul[i];
|
543 |
// /* The shift for the Coulomb potential is stored in
|
544 |
// * the RF parameter c_rf, which is 0 without shift.
|
545 |
// */
|
546 |
// Vcoul[i] -= qq[i]*fr->ic->c_rf;
|
547 |
// break;
|
548 |
//
|
549 |
// case GMX_NBKERNEL_ELEC_REACTIONFIELD:
|
550 |
// /* reaction-field */
|
551 |
// Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf);
|
552 |
// FscalC[i] = qq[i]*(rinvC - two*krf*rC*rC);
|
553 |
// break;
|
554 |
//
|
555 |
// case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
|
556 |
// /* non-Ewald tabulated coulomb */
|
557 |
// nnn = n1C;
|
558 |
// Y = VFtab[nnn];
|
559 |
// F = VFtab[nnn+1];
|
560 |
// Geps = epsC*VFtab[nnn+2];
|
561 |
// Heps2 = eps2C*VFtab[nnn+3];
|
562 |
// Fp = F+Geps+Heps2;
|
563 |
// VV = Y+epsC*Fp;
|
564 |
// FF = Fp+Geps+two*Heps2;
|
565 |
// Vcoul[i] = qq[i]*VV;
|
566 |
// FscalC[i] = -qq[i]*tabscale*FF*rC;
|
567 |
// break;
|
568 |
//
|
569 |
// case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
|
570 |
// gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
|
571 |
// break;
|
572 |
//
|
573 |
// case GMX_NBKERNEL_ELEC_EWALD:
|
574 |
// if (bConvertEwaldToCoulomb)
|
575 |
// {
|
576 |
/* Ewald FEP is done only on the 1/r part */
|
577 |
Vcoul[i] = qq[i]*(rinvC-sh_ewald); |
578 |
FscalC[i] = qq[i]*rinvC; |
579 |
// }
|
580 |
// else
|
581 |
// {
|
582 |
// ewrt = rC*ewtabscale;
|
583 |
// ewitab = static_cast<int>(ewrt);
|
584 |
// eweps = ewrt-ewitab;
|
585 |
// ewitab = 4*ewitab;
|
586 |
// FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
|
587 |
// rinvcorr = rinvC-sh_ewald;
|
588 |
// Vcoul[i] = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
|
589 |
// FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
|
590 |
// }
|
591 |
// break;
|
592 |
//
|
593 |
// case GMX_NBKERNEL_ELEC_NONE:
|
594 |
// FscalC[i] = zero;
|
595 |
// Vcoul[i] = zero;
|
596 |
// break;
|
597 |
//
|
598 |
// default:
|
599 |
// gmx_incons("Invalid icoul in free energy kernel");
|
600 |
// break;
|
601 |
// }
|
602 |
|
603 |
// if (fr->coulomb_modifier == eintmodPOTSWITCH)
|
604 |
// {
|
605 |
// d = rC-fr->rcoulomb_switch;
|
606 |
// d = (d > zero) ? d : zero;
|
607 |
// d2 = d*d;
|
608 |
// sw = one+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
|
609 |
// dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
|
610 |
//
|
611 |
// FscalC[i] = FscalC[i]*sw - rC*Vcoul[i]*dsw;
|
612 |
// Vcoul[i] *= sw;
|
613 |
//
|
614 |
// FscalC[i] = (rC < rcoulomb) ? FscalC[i] : zero;
|
615 |
// Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : zero;
|
616 |
// }
|
617 |
} |
618 |
|
619 |
/* Only process the VDW interactions if we have
|
620 |
* some non-zero parameters, and if we either
|
621 |
* include all entries in the list (no cutoff used
|
622 |
* in the kernel), or if we are within the cutoff.
|
623 |
*/
|
624 |
bComputeVdwInteraction = !bExactVdwCutoff || |
625 |
( bConvertLJEwaldToLJ6 && r < rvdw) || |
626 |
(!bConvertLJEwaldToLJ6 && rV < rvdw); |
627 |
if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction) |
628 |
{ |
629 |
// switch (ivdw) // What about LJEWALD? (probably not)
|
630 |
// {
|
631 |
// case GMX_NBKERNEL_VDW_LENNARDJONES:
|
632 |
// /* cutoff LJ */
|
633 |
// if (sc_r_power == six)
|
634 |
// {
|
635 |
rinv6 = rpinvV; |
636 |
// }
|
637 |
// else
|
638 |
// {
|
639 |
// rinv6 = rinvV*rinvV;
|
640 |
// rinv6 = rinv6*rinv6*rinv6;
|
641 |
// }
|
642 |
Vvdw6 = c6[i]*rinv6; |
643 |
Vvdw12 = c12[i]*rinv6*rinv6; |
644 |
|
645 |
Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth |
646 |
- (Vvdw6 - c6[i]*sh_invrc6)*onesixth); |
647 |
FscalV[i] = Vvdw12 - Vvdw6; |
648 |
// break;
|
649 |
//
|
650 |
// case GMX_NBKERNEL_VDW_BUCKINGHAM:
|
651 |
// gmx_fatal(FARGS, "Buckingham free energy not supported.");
|
652 |
// break;
|
653 |
//
|
654 |
// case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
|
655 |
// /* Table LJ */
|
656 |
// nnn = n1V+4;
|
657 |
// /* dispersion */
|
658 |
// Y = VFtab[nnn];
|
659 |
// F = VFtab[nnn+1];
|
660 |
// Geps = epsV*VFtab[nnn+2];
|
661 |
// Heps2 = eps2V*VFtab[nnn+3];
|
662 |
// Fp = F+Geps+Heps2;
|
663 |
// VV = Y+epsV*Fp;
|
664 |
// FF = Fp+Geps+two*Heps2;
|
665 |
// Vvdw[i] += c6[i]*VV;
|
666 |
// FscalV[i] -= c6[i]*tabscale*FF*rV;
|
667 |
//
|
668 |
// /* repulsion */
|
669 |
// Y = VFtab[nnn+4];
|
670 |
// F = VFtab[nnn+5];
|
671 |
// Geps = epsV*VFtab[nnn+6];
|
672 |
// Heps2 = eps2V*VFtab[nnn+7];
|
673 |
// Fp = F+Geps+Heps2;
|
674 |
// VV = Y+epsV*Fp;
|
675 |
// FF = Fp+Geps+two*Heps2;
|
676 |
// Vvdw[i] += c12[i]*VV;
|
677 |
// FscalV[i] -= c12[i]*tabscale*FF*rV;
|
678 |
// break;
|
679 |
//
|
680 |
// case GMX_NBKERNEL_VDW_LJEWALD:
|
681 |
// if (sc_r_power == six)
|
682 |
// {
|
683 |
// rinv6 = rpinvV;
|
684 |
// }
|
685 |
// else
|
686 |
// {
|
687 |
// rinv6 = rinvV*rinvV;
|
688 |
// rinv6 = rinv6*rinv6*rinv6;
|
689 |
// }
|
690 |
// c6grid = nbfp_grid[tj[i]];
|
691 |
//
|
692 |
// if (bConvertLJEwaldToLJ6)
|
693 |
// {
|
694 |
// /* cutoff LJ */
|
695 |
// Vvdw6 = c6[i]*rinv6;
|
696 |
// Vvdw12 = c12[i]*rinv6*rinv6;
|
697 |
//
|
698 |
// Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
|
699 |
// - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
|
700 |
// FscalV[i] = Vvdw12 - Vvdw6;
|
701 |
// }
|
702 |
// else
|
703 |
// {
|
704 |
// /* Normal LJ-PME */
|
705 |
// ewcljrsq = ewclj2*rV*rV;
|
706 |
// exponent = std::exp(-ewcljrsq);
|
707 |
// poly = exponent*(one + ewcljrsq + ewcljrsq*ewcljrsq*half);
|
708 |
// vvdw_disp = (c6[i]-c6grid*(one-poly))*rinv6;
|
709 |
// vvdw_rep = c12[i]*rinv6*rinv6;
|
710 |
// FscalV[i] = vvdw_rep - vvdw_disp - c6grid*onesixth*exponent*ewclj6;
|
711 |
// Vvdw[i] = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/six;
|
712 |
// }
|
713 |
// break;
|
714 |
//
|
715 |
// case GMX_NBKERNEL_VDW_NONE:
|
716 |
// Vvdw[i] = zero;
|
717 |
// FscalV[i] = zero;
|
718 |
// break;
|
719 |
//
|
720 |
// default:
|
721 |
// gmx_incons("Invalid ivdw in free energy kernel");
|
722 |
// break;
|
723 |
// }
|
724 |
|
725 |
// if (fr->vdw_modifier == eintmodPOTSWITCH)
|
726 |
// {
|
727 |
// d = rV-fr->rvdw_switch;
|
728 |
// d = (d > zero) ? d : zero;
|
729 |
// d2 = d*d;
|
730 |
// sw = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
|
731 |
// dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
|
732 |
//
|
733 |
// FscalV[i] = FscalV[i]*sw - rV*Vvdw[i]*dsw;
|
734 |
// Vvdw[i] *= sw;
|
735 |
//
|
736 |
// FscalV[i] = (rV < rvdw) ? FscalV[i] : zero;
|
737 |
// Vvdw[i] = (rV < rvdw) ? Vvdw[i] : zero;
|
738 |
// }
|
739 |
} |
740 |
|
741 |
/* FscalC (and FscalV) now contain: dV/drC * rC
|
742 |
* Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
|
743 |
* Further down we first multiply by r^p-2 and then by
|
744 |
* the vector r, which in total gives: dV/drC * (r/rC)^1-p
|
745 |
*/
|
746 |
FscalC[i] *= rpinvC; |
747 |
FscalV[i] *= rpinvV; |
748 |
} |
749 |
} |
750 |
|
751 |
/* Assemble A and B states */
|
752 |
for (i = 0; i < NSTATES; i++) |
753 |
{ |
754 |
vctot += LFC[i]*Vcoul[i]; |
755 |
vvtot += LFV[i]*Vvdw[i]; |
756 |
|
757 |
Fscal += LFC[i]*FscalC[i]*rpm2; |
758 |
Fscal += LFV[i]*FscalV[i]*rpm2; |
759 |
|
760 |
dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i]; |
761 |
dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i]; |
762 |
} |
763 |
} |
764 |
// else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
|
765 |
// {
|
766 |
// /* For excluded pairs, which are only in this pair list when
|
767 |
// * using the Verlet scheme, we don't use soft-core.
|
768 |
// * The group scheme also doesn't soft-core for these.
|
769 |
// * As there is no singularity, there is no need for soft-core.
|
770 |
// */
|
771 |
// VV = krf*rsq - crf;
|
772 |
// FF = -two*krf;
|
773 |
//
|
774 |
// if (ii == jnr)
|
775 |
// {
|
776 |
// VV *= half;
|
777 |
// }
|
778 |
//
|
779 |
// for (i = 0; i < NSTATES; i++)
|
780 |
// {
|
781 |
// vctot += LFC[i]*qq[i]*VV;
|
782 |
// Fscal += LFC[i]*qq[i]*FF;
|
783 |
// dvdl_coul += DLF[i]*qq[i]*VV;
|
784 |
// }
|
785 |
// }
|
786 |
|
787 |
if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) ) // can't get rid of this: r is not known a priori |
788 |
{ |
789 |
/* See comment in the preamble. When using Ewald interactions
|
790 |
* (unless we use a switch modifier) we subtract the reciprocal-space
|
791 |
* Ewald component here which made it possible to apply the free
|
792 |
* energy interaction to 1/r (vanilla coulomb short-range part)
|
793 |
* above. This gets us closer to the ideal case of applying
|
794 |
* the softcore to the entire electrostatic interaction,
|
795 |
* including the reciprocal-space component.
|
796 |
*/
|
797 |
real v_lr, f_lr; |
798 |
|
799 |
ewrt = r*ewtabscale; |
800 |
ewitab = static_cast<int>(ewrt); |
801 |
eweps = ewrt-ewitab; |
802 |
ewitab = 4*ewitab;
|
803 |
f_lr = ewtab[ewitab]+eweps*ewtab[ewitab+1];
|
804 |
v_lr = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
|
805 |
f_lr *= rinv; |
806 |
|
807 |
/* Note that any possible Ewald shift has already been applied in
|
808 |
* the normal interaction part above.
|
809 |
*/
|
810 |
|
811 |
if (ii == jnr)
|
812 |
{ |
813 |
/* If we get here, the i particle (ii) has itself (jnr)
|
814 |
* in its neighborlist. This can only happen with the Verlet
|
815 |
* scheme, and corresponds to a self-interaction that will
|
816 |
* occur twice. Scale it down by 50% to only include it once.
|
817 |
*/
|
818 |
v_lr *= half; |
819 |
} |
820 |
|
821 |
for (i = 0; i < NSTATES; i++) |
822 |
{ |
823 |
vctot -= LFC[i]*qq[i]*v_lr; |
824 |
Fscal -= LFC[i]*qq[i]*f_lr; |
825 |
dvdl_coul -= (DLF[i]*qq[i])*v_lr; |
826 |
} |
827 |
} |
828 |
|
829 |
// if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
|
830 |
// {
|
831 |
// /* See comment in the preamble. When using LJ-Ewald interactions
|
832 |
// * (unless we use a switch modifier) we subtract the reciprocal-space
|
833 |
// * Ewald component here which made it possible to apply the free
|
834 |
// * energy interaction to r^-6 (vanilla LJ6 short-range part)
|
835 |
// * above. This gets us closer to the ideal case of applying
|
836 |
// * the softcore to the entire VdW interaction,
|
837 |
// * including the reciprocal-space component.
|
838 |
// */
|
839 |
// /* We could also use the analytical form here
|
840 |
// * iso a table, but that can cause issues for
|
841 |
// * r close to 0 for non-interacting pairs.
|
842 |
// */
|
843 |
// real rs, frac, f_lr;
|
844 |
// int ri;
|
845 |
//
|
846 |
// rs = rsq*rinv*ewtabscale;
|
847 |
// ri = static_cast<int>(rs);
|
848 |
// frac = rs - ri;
|
849 |
// f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
|
850 |
// /* TODO: Currently the Ewald LJ table does not contain
|
851 |
// * the factor 1/6, we should add this.
|
852 |
// */
|
853 |
// FF = f_lr*rinv/six;
|
854 |
// VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
|
855 |
//
|
856 |
// if (ii == jnr)
|
857 |
// {
|
858 |
// /* If we get here, the i particle (ii) has itself (jnr)
|
859 |
// * in its neighborlist. This can only happen with the Verlet
|
860 |
// * scheme, and corresponds to a self-interaction that will
|
861 |
// * occur twice. Scale it down by 50% to only include it once.
|
862 |
// */
|
863 |
// VV *= half;
|
864 |
// }
|
865 |
//
|
866 |
// for (i = 0; i < NSTATES; i++)
|
867 |
// {
|
868 |
// c6grid = nbfp_grid[tj[i]];
|
869 |
// vvtot += LFV[i]*c6grid*VV;
|
870 |
// Fscal += LFV[i]*c6grid*FF;
|
871 |
// dvdl_vdw += (DLF[i]*c6grid)*VV;
|
872 |
// }
|
873 |
// }
|
874 |
|
875 |
if (bDoForces) // can't get rid of this: task is not known a priori |
876 |
{ |
877 |
tx = Fscal*dx; |
878 |
ty = Fscal*dy; |
879 |
tz = Fscal*dz; |
880 |
fix = fix + tx; |
881 |
fiy = fiy + ty; |
882 |
fiz = fiz + tz; |
883 |
/* OpenMP atomics are expensive, but this kernels is also
|
884 |
* expensive, so we can take this hit, instead of using
|
885 |
* thread-local output buffers and extra reduction.
|
886 |
*
|
887 |
* All the OpenMP regions in this file are trivial and should
|
888 |
* not throw, so no need for try/catch.
|
889 |
*/
|
890 |
#pragma omp atomic
|
891 |
f[j3] -= tx; |
892 |
#pragma omp atomic
|
893 |
f[j3+1] -= ty;
|
894 |
#pragma omp atomic
|
895 |
f[j3+2] -= tz;
|
896 |
} |
897 |
} |
898 |
|
899 |
/* The atomics below are expensive with many OpenMP threads.
|
900 |
* Here unperturbed i-particles will usually only have a few
|
901 |
* (perturbed) j-particles in the list. Thus with a buffered list
|
902 |
* we can skip a significant number of i-reductions with a check.
|
903 |
*/
|
904 |
if (npair_within_cutoff > 0) |
905 |
{ |
906 |
if (bDoForces) // can't get rid of this: task is not known a priori |
907 |
{ |
908 |
#pragma omp atomic
|
909 |
f[ii3] += fix; |
910 |
#pragma omp atomic
|
911 |
f[ii3+1] += fiy;
|
912 |
#pragma omp atomic
|
913 |
f[ii3+2] += fiz;
|
914 |
} |
915 |
if (bDoShiftForces) // can't get rid of this: task is not known a priori |
916 |
{ |
917 |
#pragma omp atomic
|
918 |
fshift[is3] += fix; |
919 |
#pragma omp atomic
|
920 |
fshift[is3+1] += fiy;
|
921 |
#pragma omp atomic
|
922 |
fshift[is3+2] += fiz;
|
923 |
} |
924 |
if (bDoPotential) // can't get rid of this: task is not known a priori |
925 |
{ |
926 |
ggid = gid[n]; |
927 |
#pragma omp atomic
|
928 |
Vc[ggid] += vctot; |
929 |
#pragma omp atomic
|
930 |
Vv[ggid] += vvtot; |
931 |
} |
932 |
} |
933 |
} |
934 |
|
935 |
#pragma omp atomic
|
936 |
dvdl[efptCOUL] += dvdl_coul; |
937 |
#pragma omp atomic
|
938 |
dvdl[efptVDW] += dvdl_vdw; |
939 |
|
940 |
/* Estimate flops, average for free energy stuff:
|
941 |
* 12 flops per outer iteration
|
942 |
* 150 flops per inner iteration
|
943 |
*/
|
944 |
#pragma omp atomic
|
945 |
inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150); |
946 |
} |