Project

General

Profile

nb_free_energy.cpp

Sebastian Wingberm├╝hle, 12/07/2017 12:08 PM

 
1
/*
2
 * This file is part of the GROMACS molecular simulation package.
3
 *
4
 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5
 * Copyright (c) 2001-2004, The GROMACS development team.
6
 * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
7
 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8
 * and including many others, as listed in the AUTHORS file in the
9
 * top-level source directory and at http://www.gromacs.org.
10
 *
11
 * GROMACS is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Lesser General Public License
13
 * as published by the Free Software Foundation; either version 2.1
14
 * of the License, or (at your option) any later version.
15
 *
16
 * GROMACS is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with GROMACS; if not, see
23
 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25
 *
26
 * If you want to redistribute modifications to GROMACS, please
27
 * consider that scientific software is very special. Version
28
 * control is crucial - bugs must be traceable. We will be happy to
29
 * consider code for inclusion in the official distribution, but
30
 * derived work must not be called official GROMACS. Details are found
31
 * in the README & COPYING files - if they are missing, get the
32
 * official version at http://www.gromacs.org.
33
 *
34
 * To help us fund GROMACS development, we humbly ask that you cite
35
 * the research papers on the package. Check out http://www.gromacs.org.
36
 */
37
#include "gmxpre.h"
38

    
39
#include "nb_free_energy.h"
40

    
41
#include <cmath>
42

    
43
#include <algorithm>
44

    
45
#include "gromacs/gmxlib/nrnb.h"
46
#include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47
#include "gromacs/gmxlib/nonbonded/nonbonded.h"
48
#include "gromacs/math/functions.h"
49
#include "gromacs/math/vec.h"
50
#include "gromacs/mdtypes/forcerec.h"
51
#include "gromacs/mdtypes/md_enums.h"
52
#include "gromacs/utility/fatalerror.h"
53

    
54
void
55
gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
56
                          rvec * gmx_restrict              xx,
57
                          rvec * gmx_restrict              ff,
58
                          t_forcerec * gmx_restrict        fr,
59
                          const t_mdatoms * gmx_restrict   mdatoms,
60
                          nb_kernel_data_t * gmx_restrict  kernel_data,
61
                          t_nrnb * gmx_restrict            nrnb)
62
{
63

    
64
#define  STATE_A  0
65
#define  STATE_B  1
66
#define  NSTATES  2
67
    int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
68
    real          shX, shY, shZ;
69
    real          tx, ty, tz, Fscal;
70
    double        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
71
    double        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
72
    real          rinv6, r, rtC, rtV;
73
    real          iqA, iqB;
74
    real          qq[NSTATES], vctot;
75
    int           ntiA, ntiB, tj[NSTATES];
76
    real          Vvdw6, Vvdw12, vvtot;
77
    real          ix, iy, iz, fix, fiy, fiz;
78
    real          dx, dy, dz, rsq, rinv;
79
    real          c6[NSTATES], c12[NSTATES], c6grid;
80
    real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
81
    double        dvdl_coul, dvdl_vdw;
82
    real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
83
    real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
84
    double        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
85
    real          sigma2[NSTATES], sigma_pow[NSTATES];
86
    int           do_tab, tab_elemsize = 0;
87
    int           n0, n1C, n1V, nnn;
88
    real          Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
89
    int           icoul, ivdw;
90
    int           nri;
91
    const int *   iinr;
92
    const int *   jindex;
93
    const int *   jjnr;
94
    const int *   shift;
95
    const int *   gid;
96
    const int *   typeA;
97
    const int *   typeB;
98
    int           ntype;
99
    const real *  shiftvec;
100
    real *        fshift;
101
    real          tabscale = 0;
102
    const real *  VFtab    = NULL;
103
    const real *  x;
104
    real *        f;
105
    real          facel, krf, crf;
106
    const real *  chargeA;
107
    const real *  chargeB;
108
    real          sigma6_min, sigma6_def, lam_power, sc_r_power;
109
    real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
110
    real          ewcljrsq, ewclj, ewclj2, exponent, poly, vvdw_disp, vvdw_rep, sh_lj_ewald;
111
    real          ewclj6;
112
    const real *  nbfp, *nbfp_grid;
113
    real *        dvdl;
114
    real *        Vv;
115
    real *        Vc;
116
    gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
117
    real          rcoulomb, rvdw, sh_invrc6;
118
    gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
119
    gmx_bool      bEwald, bEwaldLJ;
120
    real          rcutoff_max2;
121
    const real *  tab_ewald_F_lj = nullptr;
122
    const real *  tab_ewald_V_lj = nullptr;
123
    real          d, d2, sw, dsw, rinvcorr;
124
    real          elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
125
    real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
126
    gmx_bool      bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
127
    gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
128
    const real *  ewtab = nullptr;
129
    int           ewitab;
130
    real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
131

    
132
    const real    onetwelfth  = 1.0/12.0;
133
    const real    onesixth    = 1.0/6.0;
134
    const real    zero        = 0.0;
135
    const real    half        = 0.5;
136
    const real    one         = 1.0;
137
    const real    two         = 2.0;
138
    const real    six         = 6.0;
139
    const real    fourtyeight = 48.0;
140

    
141
    x                   = xx[0];
142
    f                   = ff[0];
143

    
144
    fshift              = fr->fshift[0];
145

    
146
    nri                 = nlist->nri;
147
    iinr                = nlist->iinr;
148
    jindex              = nlist->jindex;
149
    jjnr                = nlist->jjnr;
150
    icoul               = nlist->ielec;
151
    ivdw                = nlist->ivdw;
152
    shift               = nlist->shift;
153
    gid                 = nlist->gid;
154

    
155
    shiftvec            = fr->shift_vec[0];
156
    chargeA             = mdatoms->chargeA;
157
    chargeB             = mdatoms->chargeB;
158
    facel               = fr->epsfac;
159
    krf                 = fr->k_rf;
160
    crf                 = fr->c_rf;
161
    Vc                  = kernel_data->energygrp_elec;
162
    typeA               = mdatoms->typeA;
163
    typeB               = mdatoms->typeB;
164
    ntype               = fr->ntype;
165
    nbfp                = fr->nbfp;
166
    nbfp_grid           = fr->ljpme_c6grid;
167
    Vv                  = kernel_data->energygrp_vdw;
168
    lambda_coul         = kernel_data->lambda[efptCOUL];
169
    lambda_vdw          = kernel_data->lambda[efptVDW];
170
    dvdl                = kernel_data->dvdl;
171
    alpha_coul          = fr->sc_alphacoul;
172
    alpha_vdw           = fr->sc_alphavdw;
173
    lam_power           = fr->sc_power;
174
    sc_r_power          = fr->sc_r_power;
175
    sigma6_def          = fr->sc_sigma6_def;
176
    sigma6_min          = fr->sc_sigma6_min;
177
    bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
178
    bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
179
    bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
180

    
181
    rcoulomb            = fr->rcoulomb;
182
    rvdw                = fr->rvdw;
183
    sh_invrc6           = fr->ic->sh_invrc6;
184
    sh_lj_ewald         = fr->ic->sh_lj_ewald;
185
    ewclj               = fr->ewaldcoeff_lj;
186
    ewclj2              = ewclj*ewclj;
187
    ewclj6              = ewclj2*ewclj2*ewclj2;
188

    
189
/*    if (fr->coulomb_modifier == eintmodPOTSWITCH)
190
    {
191
        d               = fr->rcoulomb-fr->rcoulomb_switch;
192
        elec_swV3       = -10.0/(d*d*d);
193
        elec_swV4       =  15.0/(d*d*d*d);
194
        elec_swV5       =  -6.0/(d*d*d*d*d);
195
        elec_swF2       = -30.0/(d*d*d);
196
        elec_swF3       =  60.0/(d*d*d*d);
197
        elec_swF4       = -30.0/(d*d*d*d*d);
198
    }
199
    else
200
    { */
201
        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
202
        elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
203
//    }
204

    
205
/*    if (fr->vdw_modifier == eintmodPOTSWITCH)
206
    {
207
        d               = fr->rvdw-fr->rvdw_switch;
208
        vdw_swV3        = -10.0/(d*d*d);
209
        vdw_swV4        =  15.0/(d*d*d*d);
210
        vdw_swV5        =  -6.0/(d*d*d*d*d);
211
        vdw_swF2        = -30.0/(d*d*d);
212
        vdw_swF3        =  60.0/(d*d*d*d);
213
        vdw_swF4        = -30.0/(d*d*d*d*d);
214
    }
215
    else
216
    { */
217
        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
218
        vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
219
//    }
220

    
221
//    if (fr->cutoff_scheme == ecutsVERLET)
222
//    {
223
        const interaction_const_t *ic;
224

    
225
        ic = fr->ic;
226
/*        if (EVDW_PME(ic->vdwtype))
227
        {
228
            ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
229
        } */
230
//        else
231
//        {
232
            ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
233
//        }
234

    
235
/*        if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
236
        {
237
            icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
238
        } */
239
//        else if (EEL_PME_EWALD(ic->eeltype))
240
//        {
241
            icoul        = GMX_NBKERNEL_ELEC_EWALD;
242
//        }
243
/*        else
244
        {
245
            gmx_incons("Unsupported eeltype with Verlet and free-energy");
246
        } */
247

    
248
        bExactElecCutoff = TRUE;
249
        bExactVdwCutoff  = TRUE;
250
//    }
251
/*    else
252
    {
253
        bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
254
        bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
255
    } */
256

    
257
    bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
258
    rcutoff_max2    = std::max(fr->rcoulomb, fr->rvdw);
259
    rcutoff_max2    = rcutoff_max2*rcutoff_max2;
260

    
261
    bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
262
    bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
263

    
264
//    if (bEwald || bEwaldLJ)
265
//    {
266
        sh_ewald       = fr->ic->sh_ewald;
267
        ewtab          = fr->ic->tabq_coul_FDV0;
268
        ewtabscale     = fr->ic->tabq_scale;
269
        ewtabhalfspace = half/ewtabscale;
270
        tab_ewald_F_lj = fr->ic->tabq_vdw_F;
271
        tab_ewald_V_lj = fr->ic->tabq_vdw_V;
272
//    }
273

    
274
    /* For Ewald/PME interactions we cannot easily apply the soft-core component to
275
     * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
276
     * can apply the small trick of subtracting the _reciprocal_ space contribution
277
     * in this kernel, and instead apply the free energy interaction to the 1/r
278
     * (standard coulomb) interaction.
279
     *
280
     * However, we cannot use this approach for switch-modified since we would then
281
     * effectively end up evaluating a significantly different interaction here compared to the
282
     * normal (non-free-energy) kernels, either by applying a cutoff at a different
283
     * position than what the user requested, or by switching different
284
     * things (1/r rather than short-range Ewald). For these settings, we just
285
     * use the traditional short-range Ewald interaction in that case.
286
     */
287
    bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH));
288
    /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
289
     * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
290
     */
291
    bConvertLJEwaldToLJ6   = (bEwaldLJ && (fr->vdw_modifier   != eintmodPOTSWITCH));
292

    
293
    /* We currently don't implement exclusion correction, needed with the Verlet cut-off scheme, without conversion */
294
/*    if (fr->cutoff_scheme == ecutsVERLET &&
295
        ((bEwald   && !bConvertEwaldToCoulomb) ||
296
         (bEwaldLJ && !bConvertLJEwaldToLJ6)))
297
    {
298
        gmx_incons("Unimplemented non-bonded setup");
299
    } */
300

    
301
    /* fix compiler warnings */
302
    n1C   = n1V   = 0;
303
    epsC  = epsV  = 0;
304
    eps2C = eps2V = 0;
305

    
306
    dvdl_coul  = 0;
307
    dvdl_vdw   = 0;
308

    
309
    /* Lambda factor for state A, 1-lambda*/
310
    LFC[STATE_A] = one - lambda_coul;
311
    LFV[STATE_A] = one - lambda_vdw;
312

    
313
    /* Lambda factor for state B, lambda*/
314
    LFC[STATE_B] = lambda_coul;
315
    LFV[STATE_B] = lambda_vdw;
316

    
317
    /*derivative of the lambda factor for state A and B */
318
    DLF[STATE_A] = -1;
319
    DLF[STATE_B] = 1;
320

    
321
    for (i = 0; i < NSTATES; i++)
322
    {
323
        lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
324
        dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
325
        lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
326
        dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
327
    }
328
    /* precalculate */
329
    sigma2_def = std::cbrt(sigma6_def);
330
    sigma2_min = std::cbrt(sigma6_min);
331

    
332
    /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
333

    
334
/*    do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
335
              ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
336
    if (do_tab)
337
    {
338
        tabscale         = kernel_data->table_elec_vdw->scale;
339
        VFtab            = kernel_data->table_elec_vdw->data; */
340
        /* we always use the combined table here */
341
/*        tab_elemsize     = kernel_data->table_elec_vdw->stride;
342
    } */
343

    
344
    for (n = 0; (n < nri); n++)
345
    {
346
        int npair_within_cutoff;
347

    
348
        npair_within_cutoff = 0;
349

    
350
        is3              = 3*shift[n];
351
        shX              = shiftvec[is3];
352
        shY              = shiftvec[is3+1];
353
        shZ              = shiftvec[is3+2];
354
        nj0              = jindex[n];
355
        nj1              = jindex[n+1];
356
        ii               = iinr[n];
357
        ii3              = 3*ii;
358
        ix               = shX + x[ii3+0];
359
        iy               = shY + x[ii3+1];
360
        iz               = shZ + x[ii3+2];
361
        iqA              = facel*chargeA[ii];
362
        iqB              = facel*chargeB[ii];
363
        ntiA             = 2*ntype*typeA[ii];
364
        ntiB             = 2*ntype*typeB[ii];
365
        vctot            = 0;
366
        vvtot            = 0;
367
        fix              = 0;
368
        fiy              = 0;
369
        fiz              = 0;
370

    
371
        for (k = nj0; (k < nj1); k++)
372
        {
373
            jnr              = jjnr[k];
374
            j3               = 3*jnr;
375
            dx               = ix - x[j3];
376
            dy               = iy - x[j3+1];
377
            dz               = iz - x[j3+2];
378
            rsq              = dx*dx + dy*dy + dz*dz;
379

    
380
            if (bExactCutoffAll && rsq >= rcutoff_max2)
381
            {
382
                /* We save significant time by skipping all code below.
383
                 * Note that with soft-core interactions, the actual cut-off
384
                 * check might be different. But since the soft-core distance
385
                 * is always larger than r, checking on r here is safe.
386
                 */
387
                continue;
388
            }
389
            npair_within_cutoff++;
390

    
391
            if (rsq > 0)
392
            {
393
                /* Note that unlike in the nbnxn kernels, we do not need
394
                 * to clamp the value of rsq before taking the invsqrt
395
                 * to avoid NaN in the LJ calculation, since here we do
396
                 * not calculate LJ interactions when C6 and C12 are zero.
397
                 */
398

    
399
                rinv         = gmx::invsqrt(rsq);
400
                r            = rsq*rinv;
401
            }
402
            else
403
            {
404
                /* The force at r=0 is zero, because of symmetry.
405
                 * But note that the potential is in general non-zero,
406
                 * since the soft-cored r will be non-zero.
407
                 */
408
                rinv         = 0;
409
                r            = 0;
410
            }
411

    
412
//            if (sc_r_power == six)
413
//            {
414
                rpm2             = rsq*rsq;  /* r4 */
415
                rp               = rpm2*rsq; /* r6 */
416
//            }
417
//            else if (sc_r_power == fourtyeight)
418
//            {
419
//                rp               = rsq*rsq*rsq; /* r6 */
420
//                rp               = rp*rp;       /* r12 */
421
//                rp               = rp*rp;       /* r24 */
422
//                rp               = rp*rp;       /* r48 */
423
//                rpm2             = rp/rsq;      /* r46 */
424
//            }
425
//            else
426
//            {
427
//                rp             = std::pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
428
//                rpm2           = rp/rsq;
429
//            }
430

    
431
            Fscal = 0;
432

    
433
            qq[STATE_A]      = iqA*chargeA[jnr];
434
            qq[STATE_B]      = iqB*chargeB[jnr];
435

    
436
            tj[STATE_A]      = ntiA+2*typeA[jnr];
437
            tj[STATE_B]      = ntiB+2*typeB[jnr];
438

    
439
            if (nlist->excl_fep == NULL || nlist->excl_fep[k])
440
            {
441
                c6[STATE_A]      = nbfp[tj[STATE_A]];
442
                c6[STATE_B]      = nbfp[tj[STATE_B]];
443

    
444
                for (i = 0; i < NSTATES; i++)
445
                {
446
                    c12[i]             = nbfp[tj[i]+1];
447
                    if ((c6[i] > 0) && (c12[i] > 0))
448
                    {
449
                        /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
450
                        sigma6[i]       = half*c12[i]/c6[i];
451
                        sigma2[i]       = std::cbrt(sigma6[i]);
452
                        /* should be able to get rid of cbrt call eventually.  Will require agreement on
453
                           what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
454
//                        if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
455
//                        {
456
//                            sigma6[i] = sigma6_min;
457
//                            sigma2[i] = sigma2_min;
458
//                        }
459
                    }
460
                    else
461
                    {
462
                        sigma6[i]       = sigma6_def;
463
                        sigma2[i]       = sigma2_def;
464
                    }
465
//                    if (sc_r_power == six)
466
//                    {
467
                        sigma_pow[i]    = sigma6[i];
468
//                    }
469
//                    else if (sc_r_power == fourtyeight)
470
//                    {
471
//                        sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
472
//                        sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
473
//                        sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
474
//                    }
475
//                    else
476
//                    {    /* not really supported as input, but in here for testing the general case*/
477
//                        sigma_pow[i]    = std::pow(sigma2[i], sc_r_power/2);
478
//                    }
479
                }
480

    
481
                /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
482
//                if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
483
//                {
484
//                    alpha_vdw_eff    = 0;
485
//                    alpha_coul_eff   = 0;
486
//                }
487
//                else
488
//                {
489
                    alpha_vdw_eff    = alpha_vdw;
490
                    alpha_coul_eff   = alpha_coul;
491
//                }
492

    
493
                for (i = 0; i < NSTATES; i++)
494
                {
495
                    FscalC[i]    = 0;
496
                    FscalV[i]    = 0;
497
                    Vcoul[i]     = 0;
498
                    Vvdw[i]      = 0;
499

    
500
                    /* Only spend time on A or B state if it is non-zero */
501
                    if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
502
                    {
503
                        /* this section has to be inside the loop because of the dependence on sigma_pow */
504
                        rpinvC         = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
505
                        rinvC          = std::pow(rpinvC, one/sc_r_power);
506
                        rC             = one/rinvC;
507

    
508
                        rpinvV         = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
509
                        rinvV          = std::pow(rpinvV, one/sc_r_power);
510
                        rV             = one/rinvV;
511

    
512
//                        if (do_tab)
513
//                        {
514
//                            rtC        = rC*tabscale;
515
//                            n0         = rtC;
516
//                            epsC       = rtC-n0;
517
//                            eps2C      = epsC*epsC;
518
//                            n1C        = tab_elemsize*n0;
519
//
520
//                            rtV        = rV*tabscale;
521
//                            n0         = rtV;
522
//                            epsV       = rtV-n0;
523
//                            eps2V      = epsV*epsV;
524
//                            n1V        = tab_elemsize*n0;
525
//                        }
526

    
527
                        /* Only process the coulomb interactions if we have charges,
528
                         * and if we either include all entries in the list (no cutoff
529
                         * used in the kernel), or if we are within the cutoff.
530
                         */
531
                        bComputeElecInteraction = !bExactElecCutoff ||
532
                            ( bConvertEwaldToCoulomb && r < rcoulomb) ||
533
                            (!bConvertEwaldToCoulomb && rC < rcoulomb);
534

    
535
                        if ( (qq[i] != 0) && bComputeElecInteraction)
536
                        {
537
//                            switch (icoul)
538
//                            {
539
//                                case GMX_NBKERNEL_ELEC_COULOMB:
540
//                                    /* simple cutoff */
541
//                                    Vcoul[i]   = qq[i]*rinvC;
542
//                                    FscalC[i]  = Vcoul[i];
543
//                                    /* The shift for the Coulomb potential is stored in
544
//                                     * the RF parameter c_rf, which is 0 without shift.
545
//                                     */
546
//                                    Vcoul[i]  -= qq[i]*fr->ic->c_rf;
547
//                                    break;
548
//
549
//                                case GMX_NBKERNEL_ELEC_REACTIONFIELD:
550
//                                    /* reaction-field */
551
//                                    Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
552
//                                    FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
553
//                                    break;
554
//
555
//                                case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
556
//                                    /* non-Ewald tabulated coulomb */
557
//                                    nnn        = n1C;
558
//                                    Y          = VFtab[nnn];
559
//                                    F          = VFtab[nnn+1];
560
//                                    Geps       = epsC*VFtab[nnn+2];
561
//                                    Heps2      = eps2C*VFtab[nnn+3];
562
//                                    Fp         = F+Geps+Heps2;
563
//                                    VV         = Y+epsC*Fp;
564
//                                    FF         = Fp+Geps+two*Heps2;
565
//                                    Vcoul[i]   = qq[i]*VV;
566
//                                    FscalC[i]  = -qq[i]*tabscale*FF*rC;
567
//                                    break;
568
//
569
//                                case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
570
//                                    gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
571
//                                    break;
572
//
573
//                                case GMX_NBKERNEL_ELEC_EWALD:
574
//                                    if (bConvertEwaldToCoulomb)
575
//                                    {
576
                                        /* Ewald FEP is done only on the 1/r part */
577
                                        Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
578
                                        FscalC[i]  = qq[i]*rinvC;
579
//                                    }
580
//                                    else
581
//                                    {
582
//                                        ewrt      = rC*ewtabscale;
583
//                                        ewitab    = static_cast<int>(ewrt);
584
//                                        eweps     = ewrt-ewitab;
585
//                                        ewitab    = 4*ewitab;
586
//                                        FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
587
//                                        rinvcorr  = rinvC-sh_ewald;
588
//                                        Vcoul[i]  = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
589
//                                        FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
590
//                                    }
591
//                                    break;
592
//
593
//                                case GMX_NBKERNEL_ELEC_NONE:
594
//                                    FscalC[i]  = zero;
595
//                                    Vcoul[i]   = zero;
596
//                                    break;
597
//
598
//                                default:
599
//                                    gmx_incons("Invalid icoul in free energy kernel");
600
//                                    break;
601
//                            }
602

    
603
//                            if (fr->coulomb_modifier == eintmodPOTSWITCH)
604
//                            {
605
//                                d                = rC-fr->rcoulomb_switch;
606
//                                d                = (d > zero) ? d : zero;
607
//                                d2               = d*d;
608
//                                sw               = one+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
609
//                                dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
610
//
611
//                                FscalC[i]        = FscalC[i]*sw - rC*Vcoul[i]*dsw;
612
//                                Vcoul[i]        *= sw;
613
//
614
//                                FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : zero;
615
//                                Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : zero;
616
//                            }
617
                        }
618

    
619
                        /* Only process the VDW interactions if we have
620
                         * some non-zero parameters, and if we either
621
                         * include all entries in the list (no cutoff used
622
                         * in the kernel), or if we are within the cutoff.
623
                         */
624
                        bComputeVdwInteraction = !bExactVdwCutoff ||
625
                            ( bConvertLJEwaldToLJ6 && r < rvdw) ||
626
                            (!bConvertLJEwaldToLJ6 && rV < rvdw);
627
                        if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
628
                        {
629
//                            switch (ivdw) // What about LJEWALD? (probably not)
630
//                            {
631
//                                case GMX_NBKERNEL_VDW_LENNARDJONES:
632
//                                    /* cutoff LJ */
633
//                                    if (sc_r_power == six)
634
//                                    {
635
                                        rinv6            = rpinvV;
636
//                                    }
637
//                                    else
638
//                                    {
639
//                                        rinv6            = rinvV*rinvV;
640
//                                        rinv6            = rinv6*rinv6*rinv6;
641
//                                    }
642
                                    Vvdw6            = c6[i]*rinv6;
643
                                    Vvdw12           = c12[i]*rinv6*rinv6;
644

    
645
                                    Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
646
                                                         - (Vvdw6 - c6[i]*sh_invrc6)*onesixth);
647
                                    FscalV[i]        = Vvdw12 - Vvdw6;
648
//                                    break;
649
//
650
//                                case GMX_NBKERNEL_VDW_BUCKINGHAM:
651
//                                    gmx_fatal(FARGS, "Buckingham free energy not supported.");
652
//                                    break;
653
//
654
//                                case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
655
//                                    /* Table LJ */
656
//                                    nnn = n1V+4;
657
//                                    /* dispersion */
658
//                                    Y          = VFtab[nnn];
659
//                                    F          = VFtab[nnn+1];
660
//                                    Geps       = epsV*VFtab[nnn+2];
661
//                                    Heps2      = eps2V*VFtab[nnn+3];
662
//                                    Fp         = F+Geps+Heps2;
663
//                                    VV         = Y+epsV*Fp;
664
//                                    FF         = Fp+Geps+two*Heps2;
665
//                                    Vvdw[i]   += c6[i]*VV;
666
//                                    FscalV[i] -= c6[i]*tabscale*FF*rV;
667
//
668
//                                    /* repulsion */
669
//                                    Y          = VFtab[nnn+4];
670
//                                    F          = VFtab[nnn+5];
671
//                                    Geps       = epsV*VFtab[nnn+6];
672
//                                    Heps2      = eps2V*VFtab[nnn+7];
673
//                                    Fp         = F+Geps+Heps2;
674
//                                    VV         = Y+epsV*Fp;
675
//                                    FF         = Fp+Geps+two*Heps2;
676
//                                    Vvdw[i]   += c12[i]*VV;
677
//                                    FscalV[i] -= c12[i]*tabscale*FF*rV;
678
//                                    break;
679
//
680
//                                case GMX_NBKERNEL_VDW_LJEWALD:
681
//                                    if (sc_r_power == six)
682
//                                    {
683
//                                        rinv6            = rpinvV;
684
//                                    }
685
//                                    else
686
//                                    {
687
//                                        rinv6            = rinvV*rinvV;
688
//                                        rinv6            = rinv6*rinv6*rinv6;
689
//                                    }
690
//                                    c6grid           = nbfp_grid[tj[i]];
691
//
692
//                                    if (bConvertLJEwaldToLJ6)
693
//                                    {
694
//                                        /* cutoff LJ */
695
//                                        Vvdw6            = c6[i]*rinv6;
696
//                                        Vvdw12           = c12[i]*rinv6*rinv6;
697
//
698
//                                        Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
699
//                                                             - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
700
//                                        FscalV[i]        = Vvdw12 - Vvdw6;
701
//                                    }
702
//                                    else
703
//                                    {
704
//                                        /* Normal LJ-PME */
705
//                                        ewcljrsq         = ewclj2*rV*rV;
706
//                                        exponent         = std::exp(-ewcljrsq);
707
//                                        poly             = exponent*(one + ewcljrsq + ewcljrsq*ewcljrsq*half);
708
//                                        vvdw_disp        = (c6[i]-c6grid*(one-poly))*rinv6;
709
//                                        vvdw_rep         = c12[i]*rinv6*rinv6;
710
//                                        FscalV[i]        = vvdw_rep - vvdw_disp - c6grid*onesixth*exponent*ewclj6;
711
//                                        Vvdw[i]          = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/six;
712
//                                    }
713
//                                    break;
714
//
715
//                                case GMX_NBKERNEL_VDW_NONE:
716
//                                    Vvdw[i]    = zero;
717
//                                    FscalV[i]  = zero;
718
//                                    break;
719
//
720
//                                default:
721
//                                    gmx_incons("Invalid ivdw in free energy kernel");
722
//                                    break;
723
//                            }
724

    
725
//                            if (fr->vdw_modifier == eintmodPOTSWITCH)
726
//                            {
727
//                                d                = rV-fr->rvdw_switch;
728
//                                d                = (d > zero) ? d : zero;
729
//                                d2               = d*d;
730
//                                sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
731
//                                dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
732
//
733
//                                FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
734
//                                Vvdw[i]         *= sw;
735
//
736
//                                FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
737
//                                Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
738
//                            }
739
                        }
740

    
741
                        /* FscalC (and FscalV) now contain: dV/drC * rC
742
                         * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
743
                         * Further down we first multiply by r^p-2 and then by
744
                         * the vector r, which in total gives: dV/drC * (r/rC)^1-p
745
                         */
746
                        FscalC[i] *= rpinvC;
747
                        FscalV[i] *= rpinvV;
748
                    }
749
                }
750

    
751
                /* Assemble A and B states */
752
                for (i = 0; i < NSTATES; i++)
753
                {
754
                    vctot         += LFC[i]*Vcoul[i];
755
                    vvtot         += LFV[i]*Vvdw[i];
756

    
757
                    Fscal         += LFC[i]*FscalC[i]*rpm2;
758
                    Fscal         += LFV[i]*FscalV[i]*rpm2;
759

    
760
                    dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
761
                    dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
762
                }
763
            }
764
//            else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
765
//            {
766
//                /* For excluded pairs, which are only in this pair list when
767
//                 * using the Verlet scheme, we don't use soft-core.
768
//                 * The group scheme also doesn't soft-core for these.
769
//                 * As there is no singularity, there is no need for soft-core.
770
//                 */
771
//                VV = krf*rsq - crf;
772
//                FF = -two*krf;
773
//
774
//                if (ii == jnr)
775
//                {
776
//                    VV *= half;
777
//                }
778
//
779
//                for (i = 0; i < NSTATES; i++)
780
//                {
781
//                    vctot      += LFC[i]*qq[i]*VV;
782
//                    Fscal      += LFC[i]*qq[i]*FF;
783
//                    dvdl_coul  += DLF[i]*qq[i]*VV;
784
//                }
785
//            }
786

    
787
            if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )  // can't get rid of this: r is not known a priori
788
            {
789
                /* See comment in the preamble. When using Ewald interactions
790
                 * (unless we use a switch modifier) we subtract the reciprocal-space
791
                 * Ewald component here which made it possible to apply the free
792
                 * energy interaction to 1/r (vanilla coulomb short-range part)
793
                 * above. This gets us closer to the ideal case of applying
794
                 * the softcore to the entire electrostatic interaction,
795
                 * including the reciprocal-space component.
796
                 */
797
                real v_lr, f_lr;
798

    
799
                ewrt      = r*ewtabscale;
800
                ewitab    = static_cast<int>(ewrt);
801
                eweps     = ewrt-ewitab;
802
                ewitab    = 4*ewitab;
803
                f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
804
                v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
805
                f_lr     *= rinv;
806

    
807
                /* Note that any possible Ewald shift has already been applied in
808
                 * the normal interaction part above.
809
                 */
810

    
811
                if (ii == jnr)
812
                {
813
                    /* If we get here, the i particle (ii) has itself (jnr)
814
                     * in its neighborlist. This can only happen with the Verlet
815
                     * scheme, and corresponds to a self-interaction that will
816
                     * occur twice. Scale it down by 50% to only include it once.
817
                     */
818
                    v_lr *= half;
819
                }
820

    
821
                for (i = 0; i < NSTATES; i++)
822
                {
823
                    vctot      -= LFC[i]*qq[i]*v_lr;
824
                    Fscal      -= LFC[i]*qq[i]*f_lr;
825
                    dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
826
                }
827
            }
828

    
829
//            if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
830
//            {
831
//                /* See comment in the preamble. When using LJ-Ewald interactions
832
//                 * (unless we use a switch modifier) we subtract the reciprocal-space
833
//                 * Ewald component here which made it possible to apply the free
834
//                 * energy interaction to r^-6 (vanilla LJ6 short-range part)
835
//                 * above. This gets us closer to the ideal case of applying
836
//                 * the softcore to the entire VdW interaction,
837
//                 * including the reciprocal-space component.
838
//                 */
839
//                /* We could also use the analytical form here
840
//                 * iso a table, but that can cause issues for
841
//                 * r close to 0 for non-interacting pairs.
842
//                 */
843
//                real rs, frac, f_lr;
844
//                int  ri;
845
//
846
//                rs     = rsq*rinv*ewtabscale;
847
//                ri     = static_cast<int>(rs);
848
//                frac   = rs - ri;
849
//                f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
850
//                /* TODO: Currently the Ewald LJ table does not contain
851
//                 * the factor 1/6, we should add this.
852
//                 */
853
//                FF     = f_lr*rinv/six;
854
//                VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
855
//
856
//                if (ii == jnr)
857
//                {
858
//                    /* If we get here, the i particle (ii) has itself (jnr)
859
//                     * in its neighborlist. This can only happen with the Verlet
860
//                     * scheme, and corresponds to a self-interaction that will
861
//                     * occur twice. Scale it down by 50% to only include it once.
862
//                     */
863
//                    VV *= half;
864
//                }
865
//
866
//                for (i = 0; i < NSTATES; i++)
867
//                {
868
//                    c6grid      = nbfp_grid[tj[i]];
869
//                    vvtot      += LFV[i]*c6grid*VV;
870
//                    Fscal      += LFV[i]*c6grid*FF;
871
//                    dvdl_vdw   += (DLF[i]*c6grid)*VV;
872
//                }
873
//            }
874

    
875
            if (bDoForces) // can't get rid of this: task is not known a priori
876
            {
877
                tx         = Fscal*dx;
878
                ty         = Fscal*dy;
879
                tz         = Fscal*dz;
880
                fix        = fix + tx;
881
                fiy        = fiy + ty;
882
                fiz        = fiz + tz;
883
                /* OpenMP atomics are expensive, but this kernels is also
884
                 * expensive, so we can take this hit, instead of using
885
                 * thread-local output buffers and extra reduction.
886
                 *
887
                 * All the OpenMP regions in this file are trivial and should
888
                 * not throw, so no need for try/catch.
889
                 */
890
#pragma omp atomic
891
                f[j3]     -= tx;
892
#pragma omp atomic
893
                f[j3+1]   -= ty;
894
#pragma omp atomic
895
                f[j3+2]   -= tz;
896
            }
897
        }
898

    
899
        /* The atomics below are expensive with many OpenMP threads.
900
         * Here unperturbed i-particles will usually only have a few
901
         * (perturbed) j-particles in the list. Thus with a buffered list
902
         * we can skip a significant number of i-reductions with a check.
903
         */
904
        if (npair_within_cutoff > 0)
905
        {
906
            if (bDoForces) // can't get rid of this: task is not known a priori
907
            {
908
#pragma omp atomic
909
                f[ii3]        += fix;
910
#pragma omp atomic
911
                f[ii3+1]      += fiy;
912
#pragma omp atomic
913
                f[ii3+2]      += fiz;
914
            }
915
            if (bDoShiftForces) // can't get rid of this: task is not known a priori
916
            {
917
#pragma omp atomic
918
                fshift[is3]   += fix;
919
#pragma omp atomic
920
                fshift[is3+1] += fiy;
921
#pragma omp atomic
922
                fshift[is3+2] += fiz;
923
            }
924
            if (bDoPotential) // can't get rid of this: task is not known a priori
925
            {
926
                ggid               = gid[n];
927
#pragma omp atomic
928
                Vc[ggid]          += vctot;
929
#pragma omp atomic
930
                Vv[ggid]          += vvtot;
931
            }
932
        }
933
    }
934

    
935
#pragma omp atomic
936
    dvdl[efptCOUL]     += dvdl_coul;
937
 #pragma omp atomic
938
    dvdl[efptVDW]      += dvdl_vdw;
939

    
940
    /* Estimate flops, average for free energy stuff:
941
     * 12  flops per outer iteration
942
     * 150 flops per inner iteration
943
     */
944
#pragma omp atomic
945
    inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
946
}