Project

General

Profile

minimize_new.cpp

Dmitry Morozov, 03/29/2019 02:27 PM

 
1
/*
2
 * This file is part of the GROMACS molecular simulation package.
3
 *
4
 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5
 * Copyright (c) 2001-2004, The GROMACS development team.
6
 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7
 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8
 * and including many others, as listed in the AUTHORS file in the
9
 * top-level source directory and at http://www.gromacs.org.
10
 *
11
 * GROMACS is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Lesser General Public License
13
 * as published by the Free Software Foundation; either version 2.1
14
 * of the License, or (at your option) any later version.
15
 *
16
 * GROMACS is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with GROMACS; if not, see
23
 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25
 *
26
 * If you want to redistribute modifications to GROMACS, please
27
 * consider that scientific software is very special. Version
28
 * control is crucial - bugs must be traceable. We will be happy to
29
 * consider code for inclusion in the official distribution, but
30
 * derived work must not be called official GROMACS. Details are found
31
 * in the README & COPYING files - if they are missing, get the
32
 * official version at http://www.gromacs.org.
33
 *
34
 * To help us fund GROMACS development, we humbly ask that you cite
35
 * the research papers on the package. Check out http://www.gromacs.org.
36
 */
37
/*! \internal \file
38
 *
39
 * \brief This file defines integrators for energy minimization
40
 *
41
 * \author Berk Hess <hess@kth.se>
42
 * \author Erik Lindahl <erik@kth.se>
43
 * \ingroup module_mdrun
44
 */
45
#include "gmxpre.h"
46

    
47
#include "config.h"
48

    
49
#include <cmath>
50
#include <cstring>
51
#include <ctime>
52

    
53
#include <algorithm>
54
#include <vector>
55

    
56
#include "gromacs/commandline/filenm.h"
57
#include "gromacs/domdec/collect.h"
58
#include "gromacs/domdec/domdec.h"
59
#include "gromacs/domdec/domdec_struct.h"
60
#include "gromacs/domdec/partition.h"
61
#include "gromacs/ewald/pme.h"
62
#include "gromacs/fileio/confio.h"
63
#include "gromacs/fileio/mtxio.h"
64
#include "gromacs/gmxlib/network.h"
65
#include "gromacs/gmxlib/nrnb.h"
66
#include "gromacs/imd/imd.h"
67
#include "gromacs/linearalgebra/sparsematrix.h"
68
#include "gromacs/listed-forces/manage-threading.h"
69
#include "gromacs/math/functions.h"
70
#include "gromacs/math/vec.h"
71
#include "gromacs/mdlib/constr.h"
72
#include "gromacs/mdlib/force.h"
73
#include "gromacs/mdlib/forcerec.h"
74
#include "gromacs/mdlib/gmx_omp_nthreads.h"
75
#include "gromacs/mdlib/md_support.h"
76
#include "gromacs/mdlib/mdatoms.h"
77
#include "gromacs/mdlib/mdebin.h"
78
#include "gromacs/mdlib/mdrun.h"
79
#include "gromacs/mdlib/mdsetup.h"
80
#include "gromacs/mdlib/ns.h"
81
#include "gromacs/mdlib/shellfc.h"
82
#include "gromacs/mdlib/sim_util.h"
83
#include "gromacs/mdlib/tgroup.h"
84
#include "gromacs/mdlib/trajectory_writing.h"
85
#include "gromacs/mdlib/update.h"
86
#include "gromacs/mdlib/vsite.h"
87
#include "gromacs/mdtypes/commrec.h"
88
#include "gromacs/mdtypes/inputrec.h"
89
#include "gromacs/mdtypes/md_enums.h"
90
#include "gromacs/mdtypes/state.h"
91
#include "gromacs/pbcutil/mshift.h"
92
#include "gromacs/pbcutil/pbc.h"
93
#include "gromacs/timing/wallcycle.h"
94
#include "gromacs/timing/walltime_accounting.h"
95
#include "gromacs/topology/mtop_util.h"
96
#include "gromacs/topology/topology.h"
97
#include "gromacs/utility/cstringutil.h"
98
#include "gromacs/utility/exceptions.h"
99
#include "gromacs/utility/fatalerror.h"
100
#include "gromacs/utility/logger.h"
101
#include "gromacs/utility/smalloc.h"
102

    
103
#include "integrator.h"
104

    
105
//! Utility structure for manipulating states during EM
106
typedef struct {
107
    //! Copy of the global state
108
    t_state                 s;
109
    //! Force array
110
    PaddedVector<gmx::RVec> f;
111
    //! Potential energy
112
    real                    epot;
113
    //! Norm of the force
114
    real                    fnorm;
115
    //! Maximum force
116
    real                    fmax;
117
    //! Direction
118
    int                     a_fmax;
119
} em_state_t;
120

    
121
//! Print the EM starting conditions
122
static void print_em_start(FILE                     *fplog,
123
                           const t_commrec          *cr,
124
                           gmx_walltime_accounting_t walltime_accounting,
125
                           gmx_wallcycle_t           wcycle,
126
                           const char               *name)
127
{
128
    walltime_accounting_start_time(walltime_accounting);
129
    wallcycle_start(wcycle, ewcRUN);
130
    print_start(fplog, cr, walltime_accounting, name);
131
}
132

    
133
//! Stop counting time for EM
134
static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
135
                        gmx_wallcycle_t           wcycle)
136
{
137
    wallcycle_stop(wcycle, ewcRUN);
138

    
139
    walltime_accounting_end_time(walltime_accounting);
140
}
141

    
142
//! Printing a log file and console header
143
static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
144
{
145
    fprintf(out, "\n");
146
    fprintf(out, "%s:\n", minimizer);
147
    fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
148
    fprintf(out, "   Number of steps    = %12d\n", nsteps);
149
}
150

    
151
//! Print warning message
152
static void warn_step(FILE     *fp,
153
                      real      ftol,
154
                      real      fmax,
155
                      gmx_bool  bLastStep,
156
                      gmx_bool  bConstrain)
157
{
158
    constexpr bool realIsDouble = GMX_DOUBLE;
159
    char           buffer[2048];
160

    
161
    if (!std::isfinite(fmax))
162
    {
163
        sprintf(buffer,
164
                "\nEnergy minimization has stopped because the force "
165
                "on at least one atom is not finite. This usually means "
166
                "atoms are overlapping. Modify the input coordinates to "
167
                "remove atom overlap or use soft-core potentials with "
168
                "the free energy code to avoid infinite forces.\n%s",
169
                !realIsDouble ?
170
                "You could also be lucky that switching to double precision "
171
                "is sufficient to obtain finite forces.\n" :
172
                "");
173
    }
174
    else if (bLastStep)
175
    {
176
        sprintf(buffer,
177
                "\nEnergy minimization reached the maximum number "
178
                "of steps before the forces reached the requested "
179
                "precision Fmax < %g.\n", ftol);
180
    }
181
    else
182
    {
183
        sprintf(buffer,
184
                "\nEnergy minimization has stopped, but the forces have "
185
                "not converged to the requested precision Fmax < %g (which "
186
                "may not be possible for your system). It stopped "
187
                "because the algorithm tried to make a new step whose size "
188
                "was too small, or there was no change in the energy since "
189
                "last step. Either way, we regard the minimization as "
190
                "converged to within the available machine precision, "
191
                "given your starting configuration and EM parameters.\n%s%s",
192
                ftol,
193
                !realIsDouble ?
194
                "\nDouble precision normally gives you higher accuracy, but "
195
                "this is often not needed for preparing to run molecular "
196
                "dynamics.\n" :
197
                "",
198
                bConstrain ?
199
                "You might need to increase your constraint accuracy, or turn\n"
200
                "off constraints altogether (set constraints = none in mdp file)\n" :
201
                "");
202
    }
203

    
204
    fputs(wrap_lines(buffer, 78, 0, FALSE), stderr);
205
    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
206
}
207

    
208
//! Print message about convergence of the EM
209
static void print_converged(FILE *fp, const char *alg, real ftol,
210
                            int64_t count, gmx_bool bDone, int64_t nsteps,
211
                            const em_state_t *ems, double sqrtNumAtoms)
212
{
213
    char buf[STEPSTRSIZE];
214

    
215
    if (bDone)
216
    {
217
        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
218
                alg, ftol, gmx_step_str(count, buf));
219
    }
220
    else if (count < nsteps)
221
    {
222
        fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
223
                "but did not reach the requested Fmax < %g.\n",
224
                alg, gmx_step_str(count, buf), ftol);
225
    }
226
    else
227
    {
228
        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
229
                alg, ftol, gmx_step_str(count, buf));
230
    }
231

    
232
#if GMX_DOUBLE
233
    fprintf(fp, "Potential Energy  = %21.14e\n", ems->epot);
234
    fprintf(fp, "Maximum force     = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1);
235
    fprintf(fp, "Norm of force     = %21.14e\n", ems->fnorm/sqrtNumAtoms);
236
#else
237
    fprintf(fp, "Potential Energy  = %14.7e\n", ems->epot);
238
    fprintf(fp, "Maximum force     = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1);
239
    fprintf(fp, "Norm of force     = %14.7e\n", ems->fnorm/sqrtNumAtoms);
240
#endif
241
}
242

    
243
//! Compute the norm and max of the force array in parallel
244
static void get_f_norm_max(const t_commrec *cr,
245
                           t_grpopts *opts, t_mdatoms *mdatoms, const rvec *f,
246
                           real *fnorm, real *fmax, int *a_fmax)
247
{
248
    double fnorm2, *sum;
249
    real   fmax2, fam;
250
    int    la_max, a_max, start, end, i, m, gf;
251

    
252
    /* This routine finds the largest force and returns it.
253
     * On parallel machines the global max is taken.
254
     */
255
    fnorm2 = 0;
256
    fmax2  = 0;
257
    la_max = -1;
258
    start  = 0;
259
    end    = mdatoms->homenr;
260
    if (mdatoms->cFREEZE)
261
    {
262
        for (i = start; i < end; i++)
263
        {
264
            gf  = mdatoms->cFREEZE[i];
265
            fam = 0;
266
            for (m = 0; m < DIM; m++)
267
            {
268
                if (!opts->nFreeze[gf][m])
269
                {
270
                    fam += gmx::square(f[i][m]);
271
                }
272
            }
273
            fnorm2 += fam;
274
            if (fam > fmax2)
275
            {
276
                fmax2  = fam;
277
                la_max = i;
278
            }
279
        }
280
    }
281
    else
282
    {
283
        for (i = start; i < end; i++)
284
        {
285
            fam     = norm2(f[i]);
286
            fnorm2 += fam;
287
            if (fam > fmax2)
288
            {
289
                fmax2  = fam;
290
                la_max = i;
291
            }
292
        }
293
    }
294

    
295
    if (la_max >= 0 && DOMAINDECOMP(cr))
296
    {
297
        a_max = cr->dd->globalAtomIndices[la_max];
298
    }
299
    else
300
    {
301
        a_max = la_max;
302
    }
303
    if (PAR(cr))
304
    {
305
        snew(sum, 2*cr->nnodes+1);
306
        sum[2*cr->nodeid]   = fmax2;
307
        sum[2*cr->nodeid+1] = a_max;
308
        sum[2*cr->nnodes]   = fnorm2;
309
        gmx_sumd(2*cr->nnodes+1, sum, cr);
310
        fnorm2 = sum[2*cr->nnodes];
311
        /* Determine the global maximum */
312
        for (i = 0; i < cr->nnodes; i++)
313
        {
314
            if (sum[2*i] > fmax2)
315
            {
316
                fmax2 = sum[2*i];
317
                a_max = gmx::roundToInt(sum[2*i+1]);
318
            }
319
        }
320
        sfree(sum);
321
    }
322

    
323
    if (fnorm)
324
    {
325
        *fnorm = sqrt(fnorm2);
326
    }
327
    if (fmax)
328
    {
329
        *fmax  = sqrt(fmax2);
330
    }
331
    if (a_fmax)
332
    {
333
        *a_fmax = a_max;
334
    }
335
}
336

    
337
//! Compute the norm of the force
338
static void get_state_f_norm_max(const t_commrec *cr,
339
                                 t_grpopts *opts, t_mdatoms *mdatoms,
340
                                 em_state_t *ems)
341
{
342
    get_f_norm_max(cr, opts, mdatoms, ems->f.rvec_array(),
343
                   &ems->fnorm, &ems->fmax, &ems->a_fmax);
344
}
345

    
346
//! Initialize the energy minimization
347
static void init_em(FILE *fplog,
348
                    const gmx::MDLogger &mdlog,
349
                    const char *title,
350
                    const t_commrec *cr,
351
                    const gmx_multisim_t *ms,
352
                    gmx::IMDOutputProvider *outputProvider,
353
                    t_inputrec *ir,
354
                    const MdrunOptions &mdrunOptions,
355
                    t_state *state_global, gmx_mtop_t *top_global,
356
                    em_state_t *ems, gmx_localtop_t **top,
357
                    t_nrnb *nrnb, rvec mu_tot,
358
                    t_forcerec *fr, gmx_enerdata_t **enerd,
359
                    t_graph **graph, gmx::MDAtoms *mdAtoms, gmx_global_stat_t *gstat,
360
                    gmx_vsite_t *vsite, gmx::Constraints *constr, gmx_shellfc_t **shellfc,
361
                    int nfile, const t_filenm fnm[],
362
                    gmx_mdoutf_t *outf, t_mdebin **mdebin,
363
                    gmx_wallcycle_t wcycle)
364
{
365
    real dvdl_constr;
366

    
367
    if (fplog)
368
    {
369
        fprintf(fplog, "Initiating %s\n", title);
370
    }
371

    
372
    if (MASTER(cr))
373
    {
374
        state_global->ngtc = 0;
375

    
376
        /* Initialize lambda variables */
377
        initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, nullptr);
378
    }
379

    
380
    init_nrnb(nrnb);
381

    
382
    /* Interactive molecular dynamics */
383
    init_IMD(ir, cr, ms, top_global, fplog, 1,
384
             MASTER(cr) ? state_global->x.rvec_array() : nullptr,
385
             nfile, fnm, nullptr, mdrunOptions);
386

    
387
    if (ir->eI == eiNM)
388
    {
389
        GMX_ASSERT(shellfc != nullptr, "With NM we always support shells");
390

    
391
        *shellfc = init_shell_flexcon(stdout,
392
                                      top_global,
393
                                      constr ? constr->numFlexibleConstraints() : 0,
394
                                      ir->nstcalcenergy,
395
                                      DOMAINDECOMP(cr));
396
    }
397
    else
398
    {
399
        GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI), "This else currently only handles energy minimizers, consider if your algorithm needs shell/flexible-constraint support");
400

    
401
        /* With energy minimization, shells and flexible constraints are
402
         * automatically minimized when treated like normal DOFS.
403
         */
404
        if (shellfc != nullptr)
405
        {
406
            *shellfc = nullptr;
407
        }
408
    }
409

    
410
    auto mdatoms = mdAtoms->mdatoms();
411
    if (DOMAINDECOMP(cr))
412
    {
413
        *top = dd_init_local_top(top_global);
414

    
415
        dd_init_local_state(cr->dd, state_global, &ems->s);
416

    
417
        /* Distribute the charge groups over the nodes from the master node */
418
        dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
419
                            state_global, top_global, ir,
420
                            &ems->s, &ems->f, mdAtoms, *top,
421
                            fr, vsite, constr,
422
                            nrnb, nullptr, FALSE);
423
        dd_store_state(cr->dd, &ems->s);
424

    
425
        *graph = nullptr;
426
    }
427
    else
428
    {
429
        state_change_natoms(state_global, state_global->natoms);
430
        /* Just copy the state */
431
        ems->s = *state_global;
432
        state_change_natoms(&ems->s, ems->s.natoms);
433
        ems->f.resizeWithPadding(ems->s.natoms);
434

    
435
        snew(*top, 1);
436
        mdAlgorithmsSetupAtomData(cr, ir, top_global, *top, fr,
437
                                  graph, mdAtoms,
438
                                  constr, vsite, shellfc ? *shellfc : nullptr);
439

    
440
        if (vsite)
441
        {
442
            set_vsite_top(vsite, *top, mdatoms);
443
        }
444
    }
445

    
446
    update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]);
447

    
448
    if (constr)
449
    {
450
        // TODO how should this cross-module support dependency be managed?
451
        if (ir->eConstrAlg == econtSHAKE &&
452
            gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
453
        {
454
            gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
455
                      econstr_names[econtSHAKE], econstr_names[econtLINCS]);
456
        }
457

    
458
        if (!ir->bContinuation)
459
        {
460
            /* Constrain the starting coordinates */
461
            dvdl_constr = 0;
462
            constr->apply(TRUE, TRUE,
463
                          -1, 0, 1.0,
464
                          ems->s.x.rvec_array(),
465
                          ems->s.x.rvec_array(),
466
                          nullptr,
467
                          ems->s.box,
468
                          ems->s.lambda[efptFEP], &dvdl_constr,
469
                          nullptr, nullptr, gmx::ConstraintVariable::Positions);
470
        }
471
    }
472

    
473
    if (PAR(cr))
474
    {
475
        *gstat = global_stat_init(ir);
476
    }
477
    else
478
    {
479
        *gstat = nullptr;
480
    }
481

    
482
    *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, nullptr, wcycle);
483

    
484
    snew(*enerd, 1);
485
    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
486
                  *enerd);
487

    
488
    if (mdebin != nullptr)
489
    {
490
        /* Init bin for energy stuff */
491
        *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, nullptr);
492
    }
493

    
494
    clear_rvec(mu_tot);
495
    calc_shifts(ems->s.box, fr->shift_vec);
496
}
497

    
498
//! Finalize the minimization
499
static void finish_em(const t_commrec *cr, gmx_mdoutf_t outf,
500
                      gmx_walltime_accounting_t walltime_accounting,
501
                      gmx_wallcycle_t wcycle)
502
{
503
    if (!thisRankHasDuty(cr, DUTY_PME))
504
    {
505
        /* Tell the PME only node to finish */
506
        gmx_pme_send_finish(cr);
507
    }
508

    
509
    done_mdoutf(outf);
510

    
511
    em_time_end(walltime_accounting, wcycle);
512
}
513

    
514
//! Swap two different EM states during minimization
515
static void swap_em_state(em_state_t **ems1, em_state_t **ems2)
516
{
517
    em_state_t *tmp;
518

    
519
    tmp   = *ems1;
520
    *ems1 = *ems2;
521
    *ems2 = tmp;
522
}
523

    
524
//! Save the EM trajectory
525
static void write_em_traj(FILE *fplog, const t_commrec *cr,
526
                          gmx_mdoutf_t outf,
527
                          gmx_bool bX, gmx_bool bF, const char *confout,
528
                          gmx_mtop_t *top_global,
529
                          t_inputrec *ir, int64_t step,
530
                          em_state_t *state,
531
                          t_state *state_global,
532
                          ObservablesHistory *observablesHistory)
533
{
534
    int mdof_flags = 0;
535

    
536
    if (bX)
537
    {
538
        mdof_flags |= MDOF_X;
539
    }
540
    if (bF)
541
    {
542
        mdof_flags |= MDOF_F;
543
    }
544

    
545
    /* If we want IMD output, set appropriate MDOF flag */
546
    if (ir->bIMD)
547
    {
548
        mdof_flags |= MDOF_IMD;
549
    }
550

    
551
    mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
552
                                     top_global, step, static_cast<double>(step),
553
                                     &state->s, state_global, observablesHistory,
554
                                     state->f);
555

    
556
    if (confout != nullptr)
557
    {
558
        if (DOMAINDECOMP(cr))
559
        {
560
            /* If bX=true, x was collected to state_global in the call above */
561
            if (!bX)
562
            {
563
                gmx::ArrayRef<gmx::RVec> globalXRef = MASTER(cr) ? makeArrayRef(state_global->x) : gmx::EmptyArrayRef();
564
                dd_collect_vec(cr->dd, &state->s, makeArrayRef(state->s.x), globalXRef);
565
            }
566
        }
567
        else
568
        {
569
            /* Copy the local state pointer */
570
            state_global = &state->s;
571
        }
572

    
573
        if (MASTER(cr))
574
        {
575
            if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
576
            {
577
                /* Make molecules whole only for confout writing */
578
                do_pbc_mtop(fplog, ir->ePBC, state->s.box, top_global,
579
                            state_global->x.rvec_array());
580
            }
581

    
582
            write_sto_conf_mtop(confout,
583
                                *top_global->name, top_global,
584
                                state_global->x.rvec_array(), nullptr, ir->ePBC, state->s.box);
585
        }
586
    }
587
}
588

    
589
//! \brief Do one minimization step
590
//
591
// \returns true when the step succeeded, false when a constraint error occurred
592
static bool do_em_step(const t_commrec *cr,
593
                       t_inputrec *ir, t_mdatoms *md,
594
                       em_state_t *ems1, real a, const PaddedVector<gmx::RVec> *force,
595
                       em_state_t *ems2,
596
                       gmx::Constraints *constr,
597
                       int64_t count)
598

    
599
{
600
    t_state *s1, *s2;
601
    int      start, end;
602
    real     dvdl_constr;
603
    int      nthreads gmx_unused;
604

    
605
    bool     validStep = true;
606

    
607
    s1 = &ems1->s;
608
    s2 = &ems2->s;
609

    
610
    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
611
    {
612
        gmx_incons("state mismatch in do_em_step");
613
    }
614

    
615
    s2->flags = s1->flags;
616

    
617
    if (s2->natoms != s1->natoms)
618
    {
619
        state_change_natoms(s2, s1->natoms);
620
        ems2->f.resizeWithPadding(s2->natoms);
621
    }
622
    if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
623
    {
624
        s2->cg_gl.resize(s1->cg_gl.size());
625
    }
626

    
627
    copy_mat(s1->box, s2->box);
628
    /* Copy free energy state */
629
    s2->lambda = s1->lambda;
630
    copy_mat(s1->box, s2->box);
631

    
632
    start = 0;
633
    end   = md->homenr;
634

    
635
    nthreads = gmx_omp_nthreads_get(emntUpdate);
636
#pragma omp parallel num_threads(nthreads)
637
    {
638
        const rvec *x1 = s1->x.rvec_array();
639
        rvec       *x2 = s2->x.rvec_array();
640
        const rvec *f  = force->rvec_array();
641

    
642
        int         gf = 0;
643
#pragma omp for schedule(static) nowait
644
        for (int i = start; i < end; i++)
645
        {
646
            try
647
            {
648
                if (md->cFREEZE)
649
                {
650
                    gf = md->cFREEZE[i];
651
                }
652
                for (int m = 0; m < DIM; m++)
653
                {
654
                    if (ir->opts.nFreeze[gf][m])
655
                    {
656
                        x2[i][m] = x1[i][m];
657
                    }
658
                    else
659
                    {
660
                        x2[i][m] = x1[i][m] + a*f[i][m];
661
                    }
662
                }
663
            }
664
            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
665
        }
666

    
667
        if (s2->flags & (1<<estCGP))
668
        {
669
            /* Copy the CG p vector */
670
            const rvec *p1 = s1->cg_p.rvec_array();
671
            rvec       *p2 = s2->cg_p.rvec_array();
672
#pragma omp for schedule(static) nowait
673
            for (int i = start; i < end; i++)
674
            {
675
                // Trivial OpenMP block that does not throw
676
                copy_rvec(p1[i], p2[i]);
677
            }
678
        }
679

    
680
        if (DOMAINDECOMP(cr))
681
        {
682
            s2->ddp_count = s1->ddp_count;
683

    
684
            /* OpenMP does not supported unsigned loop variables */
685
#pragma omp for schedule(static) nowait
686
            for (int i = 0; i < static_cast<int>(s2->cg_gl.size()); i++)
687
            {
688
                s2->cg_gl[i] = s1->cg_gl[i];
689
            }
690
            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
691
        }
692
    }
693

    
694
    if (constr)
695
    {
696
        dvdl_constr = 0;
697
        validStep   =
698
            constr->apply(TRUE, TRUE,
699
                          count, 0, 1.0,
700
                          s1->x.rvec_array(), s2->x.rvec_array(),
701
                          nullptr, s2->box,
702
                          s2->lambda[efptBONDED], &dvdl_constr,
703
                          nullptr, nullptr, gmx::ConstraintVariable::Positions);
704

    
705
        if (cr->nnodes > 1)
706
        {
707
            /* This global reduction will affect performance at high
708
             * parallelization, but we can not really avoid it.
709
             * But usually EM is not run at high parallelization.
710
             */
711
            int reductionBuffer = static_cast<int>(!validStep);
712
            gmx_sumi(1, &reductionBuffer, cr);
713
            validStep           = (reductionBuffer == 0);
714
        }
715

    
716
        // We should move this check to the different minimizers
717
        if (!validStep && ir->eI != eiSteep)
718
        {
719
            gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",
720
                      EI(ir->eI), EI(eiSteep), EI(ir->eI));
721
        }
722
    }
723

    
724
    return validStep;
725
}
726

    
727
//! Prepare EM for using domain decomposition parallellization
728
static void em_dd_partition_system(FILE *fplog,
729
                                   const gmx::MDLogger &mdlog,
730
                                   int step, const t_commrec *cr,
731
                                   gmx_mtop_t *top_global, t_inputrec *ir,
732
                                   em_state_t *ems, gmx_localtop_t *top,
733
                                   gmx::MDAtoms *mdAtoms, t_forcerec *fr,
734
                                   gmx_vsite_t *vsite, gmx::Constraints *constr,
735
                                   t_nrnb *nrnb, gmx_wallcycle_t wcycle)
736
{
737
    /* Repartition the domain decomposition */
738
    dd_partition_system(fplog, mdlog, step, cr, FALSE, 1,
739
                        nullptr, top_global, ir,
740
                        &ems->s, &ems->f,
741
                        mdAtoms, top, fr, vsite, constr,
742
                        nrnb, wcycle, FALSE);
743
    dd_store_state(cr->dd, &ems->s);
744
}
745

    
746
namespace
747
{
748

    
749
/*! \brief Class to handle the work of setting and doing an energy evaluation.
750
 *
751
 * This class is a mere aggregate of parameters to pass to evaluate an
752
 * energy, so that future changes to names and types of them consume
753
 * less time when refactoring other code.
754
 *
755
 * Aggregate initialization is used, for which the chief risk is that
756
 * if a member is added at the end and not all initializer lists are
757
 * updated, then the member will be value initialized, which will
758
 * typically mean initialization to zero.
759
 *
760
 * We only want to construct one of these with an initializer list, so
761
 * we explicitly delete the default constructor. */
762
class EnergyEvaluator
763
{
764
    public:
765
        //! We only intend to construct such objects with an initializer list.
766
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)
767
        // Aspects of the C++11 spec changed after GCC 4.8.5, and
768
        // compilation of the initializer list construction in
769
        // runner.cpp fails in GCC 4.8.5.
770
        EnergyEvaluator() = delete;
771
#endif
772
        /*! \brief Evaluates an energy on the state in \c ems.
773
         *
774
         * \todo In practice, the same objects mu_tot, vir, and pres
775
         * are always passed to this function, so we would rather have
776
         * them as data members. However, their C-array types are
777
         * unsuited for aggregate initialization. When the types
778
         * improve, the call signature of this method can be reduced.
779
         */
780
        void run(em_state_t *ems, rvec mu_tot,
781
                 tensor vir, tensor pres,
782
                 int64_t count, gmx_bool bFirst);
783
        //! Handles logging (deprecated).
784
        FILE                 *fplog;
785
        //! Handles logging.
786
        const gmx::MDLogger  &mdlog;
787
        //! Handles communication.
788
        const t_commrec      *cr;
789
        //! Coordinates multi-simulations.
790
        const gmx_multisim_t *ms;
791
        //! Holds the simulation topology.
792
        gmx_mtop_t           *top_global;
793
        //! Holds the domain topology.
794
        gmx_localtop_t       *top;
795
        //! User input options.
796
        t_inputrec           *inputrec;
797
        //! Manages flop accounting.
798
        t_nrnb               *nrnb;
799
        //! Manages wall cycle accounting.
800
        gmx_wallcycle_t       wcycle;
801
        //! Coordinates global reduction.
802
        gmx_global_stat_t     gstat;
803
        //! Handles virtual sites.
804
        gmx_vsite_t          *vsite;
805
        //! Handles constraints.
806
        gmx::Constraints     *constr;
807
        //! Handles strange things.
808
        t_fcdata             *fcd;
809
        //! Molecular graph for SHAKE.
810
        t_graph              *graph;
811
        //! Per-atom data for this domain.
812
        gmx::MDAtoms         *mdAtoms;
813
        //! Handles how to calculate the forces.
814
        t_forcerec           *fr;
815
        //! Schedule of force-calculation work each step for this task.
816
        gmx::PpForceWorkload *ppForceWorkload;
817
        //! Stores the computed energies.
818
        gmx_enerdata_t       *enerd;
819
};
820

    
821
void
822
EnergyEvaluator::run(em_state_t *ems, rvec mu_tot,
823
                     tensor vir, tensor pres,
824
                     int64_t count, gmx_bool bFirst)
825
{
826
    real     t;
827
    gmx_bool bNS;
828
    tensor   force_vir, shake_vir, ekin;
829
    real     dvdl_constr, prescorr, enercorr, dvdlcorr;
830
    real     terminate = 0;
831

    
832
    /* Set the time to the initial time, the time does not change during EM */
833
    t = inputrec->init_t;
834

    
835
    if (bFirst ||
836
        (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
837
    {
838
        /* This is the first state or an old state used before the last ns */
839
        bNS = TRUE;
840
    }
841
    else
842
    {
843
        bNS = FALSE;
844
        if (inputrec->nstlist > 0)
845
        {
846
            bNS = TRUE;
847
        }
848
    }
849

    
850
    if (vsite)
851
    {
852
        construct_vsites(vsite, ems->s.x.rvec_array(), 1, nullptr,
853
                         top->idef.iparams, top->idef.il,
854
                         fr->ePBC, fr->bMolPBC, cr, ems->s.box);
855
    }
856

    
857
    if (DOMAINDECOMP(cr) && bNS)
858
    {
859
        /* Repartition the domain decomposition */
860
        em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec,
861
                               ems, top, mdAtoms, fr, vsite, constr,
862
                               nrnb, wcycle);
863
    }
864

    
865
    /* Calc force & energy on new trial position  */
866
    /* do_force always puts the charge groups in the box and shifts again
867
     * We do not unshift, so molecules are always whole in congrad.c
868
     */
869
    do_force(fplog, cr, ms, inputrec, nullptr, nullptr,
870
             count, nrnb, wcycle, top, &top_global->groups,
871
             ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist,
872
             ems->f.arrayRefWithPadding(), force_vir, mdAtoms->mdatoms(), enerd, fcd,
873
             ems->s.lambda, graph, fr, ppForceWorkload, vsite, mu_tot, t, nullptr,
874
             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
875
             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
876
             (bNS ? GMX_FORCE_NS : 0),
877
             DOMAINDECOMP(cr) ?
878
             DdOpenBalanceRegionBeforeForceComputation::yes :
879
             DdOpenBalanceRegionBeforeForceComputation::no,
880
             DOMAINDECOMP(cr) ?
881
             DdCloseBalanceRegionAfterForceComputation::yes :
882
             DdCloseBalanceRegionAfterForceComputation::no);
883

    
884
    /* Clear the unused shake virial and pressure */
885
    clear_mat(shake_vir);
886
    clear_mat(pres);
887

    
888
    /* Communicate stuff when parallel */
889
    if (PAR(cr) && inputrec->eI != eiNM)
890
    {
891
        wallcycle_start(wcycle, ewcMoveE);
892

    
893
        global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
894
                    inputrec, nullptr, nullptr, nullptr, 1, &terminate,
895
                    nullptr, FALSE,
896
                    CGLO_ENERGY |
897
                    CGLO_PRESSURE |
898
                    CGLO_CONSTRAINT);
899

    
900
        wallcycle_stop(wcycle, ewcMoveE);
901
    }
902

    
903
    /* Calculate long range corrections to pressure and energy */
904
    calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW],
905
                  pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
906
    enerd->term[F_DISPCORR] = enercorr;
907
    enerd->term[F_EPOT]    += enercorr;
908
    enerd->term[F_PRES]    += prescorr;
909
    enerd->term[F_DVDL]    += dvdlcorr;
910

    
911
    ems->epot = enerd->term[F_EPOT];
912

    
913
    if (constr)
914
    {
915
        /* Project out the constraint components of the force */
916
        dvdl_constr = 0;
917
        rvec *f_rvec = ems->f.rvec_array();
918
        constr->apply(FALSE, FALSE,
919
                      count, 0, 1.0,
920
                      ems->s.x.rvec_array(), f_rvec, f_rvec,
921
                      ems->s.box,
922
                      ems->s.lambda[efptBONDED], &dvdl_constr,
923
                      nullptr, &shake_vir, gmx::ConstraintVariable::ForceDispl);
924
        enerd->term[F_DVDL_CONSTR] += dvdl_constr;
925
        m_add(force_vir, shake_vir, vir);
926
    }
927
    else
928
    {
929
        copy_mat(force_vir, vir);
930
    }
931

    
932
    clear_mat(ekin);
933
    enerd->term[F_PRES] =
934
        calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
935

    
936
    sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
937

    
938
    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
939
    {
940
        get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems);
941
    }
942
}
943

    
944
} // namespace
945

    
946
//! Parallel utility summing energies and forces
947
static double reorder_partsum(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
948
                              gmx_mtop_t *top_global,
949
                              em_state_t *s_min, em_state_t *s_b)
950
{
951
    t_block       *cgs_gl;
952
    int            ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
953
    double         partsum;
954
    unsigned char *grpnrFREEZE;
955

    
956
    if (debug)
957
    {
958
        fprintf(debug, "Doing reorder_partsum\n");
959
    }
960

    
961
    const rvec *fm = s_min->f.rvec_array();
962
    const rvec *fb = s_b->f.rvec_array();
963

    
964
    cgs_gl = dd_charge_groups_global(cr->dd);
965
    index  = cgs_gl->index;
966

    
967
    /* Collect fm in a global vector fmg.
968
     * This conflicts with the spirit of domain decomposition,
969
     * but to fully optimize this a much more complicated algorithm is required.
970
     */
971
    rvec *fmg;
972
    snew(fmg, top_global->natoms);
973

    
974
    ncg   = s_min->s.cg_gl.size();
975
    cg_gl = s_min->s.cg_gl.data();
976
    i     = 0;
977
    for (c = 0; c < ncg; c++)
978
    {
979
        cg = cg_gl[c];
980
        a0 = index[cg];
981
        a1 = index[cg+1];
982
        for (a = a0; a < a1; a++)
983
        {
984
            copy_rvec(fm[i], fmg[a]);
985
            i++;
986
        }
987
    }
988
    gmx_sum(top_global->natoms*3, fmg[0], cr);
989

    
990
    /* Now we will determine the part of the sum for the cgs in state s_b */
991
    ncg         = s_b->s.cg_gl.size();
992
    cg_gl       = s_b->s.cg_gl.data();
993
    partsum     = 0;
994
    i           = 0;
995
    gf          = 0;
996
    grpnrFREEZE = top_global->groups.grpnr[egcFREEZE];
997
    for (c = 0; c < ncg; c++)
998
    {
999
        cg = cg_gl[c];
1000
        a0 = index[cg];
1001
        a1 = index[cg+1];
1002
        for (a = a0; a < a1; a++)
1003
        {
1004
            if (mdatoms->cFREEZE && grpnrFREEZE)
1005
            {
1006
                gf = grpnrFREEZE[i];
1007
            }
1008
            for (m = 0; m < DIM; m++)
1009
            {
1010
                if (!opts->nFreeze[gf][m])
1011
                {
1012
                    partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
1013
                }
1014
            }
1015
            i++;
1016
        }
1017
    }
1018

    
1019
    sfree(fmg);
1020

    
1021
    return partsum;
1022
}
1023

    
1024
//! Print some stuff, like beta, whatever that means.
1025
static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
1026
                    gmx_mtop_t *top_global,
1027
                    em_state_t *s_min, em_state_t *s_b)
1028
{
1029
    double sum;
1030

    
1031
    /* This is just the classical Polak-Ribiere calculation of beta;
1032
     * it looks a bit complicated since we take freeze groups into account,
1033
     * and might have to sum it in parallel runs.
1034
     */
1035

    
1036
    if (!DOMAINDECOMP(cr) ||
1037
        (s_min->s.ddp_count == cr->dd->ddp_count &&
1038
         s_b->s.ddp_count   == cr->dd->ddp_count))
1039
    {
1040
        const rvec *fm  = s_min->f.rvec_array();
1041
        const rvec *fb  = s_b->f.rvec_array();
1042
        sum             = 0;
1043
        int         gf  = 0;
1044
        /* This part of code can be incorrect with DD,
1045
         * since the atom ordering in s_b and s_min might differ.
1046
         */
1047
        for (int i = 0; i < mdatoms->homenr; i++)
1048
        {
1049
            if (mdatoms->cFREEZE)
1050
            {
1051
                gf = mdatoms->cFREEZE[i];
1052
            }
1053
            for (int m = 0; m < DIM; m++)
1054
            {
1055
                if (!opts->nFreeze[gf][m])
1056
                {
1057
                    sum += (fb[i][m] - fm[i][m])*fb[i][m];
1058
                }
1059
            }
1060
        }
1061
    }
1062
    else
1063
    {
1064
        /* We need to reorder cgs while summing */
1065
        sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b);
1066
    }
1067
    if (PAR(cr))
1068
    {
1069
        gmx_sumd(1, &sum, cr);
1070
    }
1071

    
1072
    return sum/gmx::square(s_min->fnorm);
1073
}
1074

    
1075
namespace gmx
1076
{
1077

    
1078
void
1079
Integrator::do_cg()
1080
{
1081
    const char       *CG = "Polak-Ribiere Conjugate Gradients";
1082

    
1083
    gmx_localtop_t   *top;
1084
    gmx_enerdata_t   *enerd;
1085
    gmx_global_stat_t gstat;
1086
    t_graph          *graph;
1087
    double            tmp, minstep;
1088
    real              stepsize;
1089
    real              a, b, c, beta = 0.0;
1090
    real              epot_repl = 0;
1091
    real              pnorm;
1092
    t_mdebin         *mdebin;
1093
    gmx_bool          converged, foundlower;
1094
    rvec              mu_tot;
1095
    gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
1096
    tensor            vir, pres;
1097
    int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
1098
    gmx_mdoutf_t      outf;
1099
    int               m, step, nminstep;
1100
    auto              mdatoms = mdAtoms->mdatoms();
1101

    
1102
    GMX_LOG(mdlog.info).asParagraph().
1103
        appendText("Note that activating conjugate gradient energy minimization via the "
1104
                   "integrator .mdp option and the command gmx mdrun may "
1105
                   "be available in a different form in a future version of GROMACS, "
1106
                   "e.g. gmx minimize and an .mdp option.");
1107

    
1108
    step = 0;
1109

    
1110
    if (MASTER(cr))
1111
    {
1112
        // In CG, the state is extended with a search direction
1113
        state_global->flags |= (1<<estCGP);
1114

    
1115
        // Ensure the extra per-atom state array gets allocated
1116
        state_change_natoms(state_global, state_global->natoms);
1117

    
1118
        // Initialize the search direction to zero
1119
        for (RVec &cg_p : state_global->cg_p)
1120
        {
1121
            cg_p = { 0, 0, 0 };
1122
        }
1123
    }
1124

    
1125
    /* Create 4 states on the stack and extract pointers that we will swap */
1126
    em_state_t  s0 {}, s1 {}, s2 {}, s3 {};
1127
    em_state_t *s_min = &s0;
1128
    em_state_t *s_a   = &s1;
1129
    em_state_t *s_b   = &s2;
1130
    em_state_t *s_c   = &s3;
1131

    
1132
    /* Init em and store the local state in s_min */
1133
    init_em(fplog, mdlog, CG, cr, ms, outputProvider, inputrec, mdrunOptions,
1134
            state_global, top_global, s_min, &top,
1135
            nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat,
1136
            vsite, constr, nullptr,
1137
            nfile, fnm, &outf, &mdebin, wcycle);
1138

    
1139
    /* Print to log file */
1140
    print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
1141

    
1142
    /* Max number of steps */
1143
    number_steps = inputrec->nsteps;
1144

    
1145
    if (MASTER(cr))
1146
    {
1147
        sp_header(stderr, CG, inputrec->em_tol, number_steps);
1148
    }
1149
    if (fplog)
1150
    {
1151
        sp_header(fplog, CG, inputrec->em_tol, number_steps);
1152
    }
1153

    
1154
    EnergyEvaluator energyEvaluator {
1155
        fplog, mdlog, cr, ms,
1156
        top_global, top,
1157
        inputrec, nrnb, wcycle, gstat,
1158
        vsite, constr, fcd, graph,
1159
        mdAtoms, fr, ppForceWorkload, enerd
1160
    };
1161
    /* Call the force routine and some auxiliary (neighboursearching etc.) */
1162
    /* do_force always puts the charge groups in the box and shifts again
1163
     * We do not unshift, so molecules are always whole in congrad.c
1164
     */
1165
    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE);
1166

    
1167
    if (MASTER(cr))
1168
    {
1169
        /* Copy stuff to the energy bin for easy printing etc. */
1170
        matrix nullBox = {};
1171
        upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step),
1172
                   mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
1173
                   nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
1174

    
1175
        print_ebin_header(fplog, step, step);
1176
        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1177
                   mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
1178
    }
1179

    
1180
    /* Estimate/guess the initial stepsize */
1181
    stepsize = inputrec->em_stepsize/s_min->fnorm;
1182

    
1183
    if (MASTER(cr))
1184
    {
1185
        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1186
        fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
1187
                s_min->fmax, s_min->a_fmax+1);
1188
        fprintf(stderr, "   F-Norm            = %12.5e\n",
1189
                s_min->fnorm/sqrtNumAtoms);
1190
        fprintf(stderr, "\n");
1191
        /* and copy to the log file too... */
1192
        fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
1193
                s_min->fmax, s_min->a_fmax+1);
1194
        fprintf(fplog, "   F-Norm            = %12.5e\n",
1195
                s_min->fnorm/sqrtNumAtoms);
1196
        fprintf(fplog, "\n");
1197
    }
1198
    /* Start the loop over CG steps.
1199
     * Each successful step is counted, and we continue until
1200
     * we either converge or reach the max number of steps.
1201
     */
1202
    converged = FALSE;
1203
    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1204
    {
1205

    
1206
        /* start taking steps in a new direction
1207
         * First time we enter the routine, beta=0, and the direction is
1208
         * simply the negative gradient.
1209
         */
1210

    
1211
        /* Calculate the new direction in p, and the gradient in this direction, gpa */
1212
        rvec       *pm  = s_min->s.cg_p.rvec_array();
1213
        const rvec *sfm = s_min->f.rvec_array();
1214
        double      gpa = 0;
1215
        int         gf  = 0;
1216
        for (int i = 0; i < mdatoms->homenr; i++)
1217
        {
1218
            if (mdatoms->cFREEZE)
1219
            {
1220
                gf = mdatoms->cFREEZE[i];
1221
            }
1222
            for (m = 0; m < DIM; m++)
1223
            {
1224
                if (!inputrec->opts.nFreeze[gf][m])
1225
                {
1226
                    pm[i][m] = sfm[i][m] + beta*pm[i][m];
1227
                    gpa     -= pm[i][m]*sfm[i][m];
1228
                    /* f is negative gradient, thus the sign */
1229
                }
1230
                else
1231
                {
1232
                    pm[i][m] = 0;
1233
                }
1234
            }
1235
        }
1236

    
1237
        /* Sum the gradient along the line across CPUs */
1238
        if (PAR(cr))
1239
        {
1240
            gmx_sumd(1, &gpa, cr);
1241
        }
1242

    
1243
        /* Calculate the norm of the search vector */
1244
        get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr);
1245

    
1246
        /* Just in case stepsize reaches zero due to numerical precision... */
1247
        if (stepsize <= 0)
1248
        {
1249
            stepsize = inputrec->em_stepsize/pnorm;
1250
        }
1251

    
1252
        /*
1253
         * Double check the value of the derivative in the search direction.
1254
         * If it is positive it must be due to the old information in the
1255
         * CG formula, so just remove that and start over with beta=0.
1256
         * This corresponds to a steepest descent step.
1257
         */
1258
        if (gpa > 0)
1259
        {
1260
            beta = 0;
1261
            step--;   /* Don't count this step since we are restarting */
1262
            continue; /* Go back to the beginning of the big for-loop */
1263
        }
1264

    
1265
        /* Calculate minimum allowed stepsize, before the average (norm)
1266
         * relative change in coordinate is smaller than precision
1267
         */
1268
        minstep = 0;
1269
        auto s_min_x = makeArrayRef(s_min->s.x);
1270
        for (int i = 0; i < mdatoms->homenr; i++)
1271
        {
1272
            for (m = 0; m < DIM; m++)
1273
            {
1274
                tmp = fabs(s_min_x[i][m]);
1275
                if (tmp < 1.0)
1276
                {
1277
                    tmp = 1.0;
1278
                }
1279
                tmp      = pm[i][m]/tmp;
1280
                minstep += tmp*tmp;
1281
            }
1282
        }
1283
        /* Add up from all CPUs */
1284
        if (PAR(cr))
1285
        {
1286
            gmx_sumd(1, &minstep, cr);
1287
        }
1288

    
1289
        minstep = GMX_REAL_EPS/sqrt(minstep/(3*top_global->natoms));
1290

    
1291
        if (stepsize < minstep)
1292
        {
1293
            converged = TRUE;
1294
            break;
1295
        }
1296

    
1297
        /* Write coordinates if necessary */
1298
        do_x = do_per_step(step, inputrec->nstxout);
1299
        do_f = do_per_step(step, inputrec->nstfout);
1300

    
1301
        write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,
1302
                      top_global, inputrec, step,
1303
                      s_min, state_global, observablesHistory);
1304

    
1305
        /* Take a step downhill.
1306
         * In theory, we should minimize the function along this direction.
1307
         * That is quite possible, but it turns out to take 5-10 function evaluations
1308
         * for each line. However, we dont really need to find the exact minimum -
1309
         * it is much better to start a new CG step in a modified direction as soon
1310
         * as we are close to it. This will save a lot of energy evaluations.
1311
         *
1312
         * In practice, we just try to take a single step.
1313
         * If it worked (i.e. lowered the energy), we increase the stepsize but
1314
         * the continue straight to the next CG step without trying to find any minimum.
1315
         * If it didn't work (higher energy), there must be a minimum somewhere between
1316
         * the old position and the new one.
1317
         *
1318
         * Due to the finite numerical accuracy, it turns out that it is a good idea
1319
         * to even accept a SMALL increase in energy, if the derivative is still downhill.
1320
         * This leads to lower final energies in the tests I've done. / Erik
1321
         */
1322
        s_a->epot = s_min->epot;
1323
        a         = 0.0;
1324
        c         = a + stepsize; /* reference position along line is zero */
1325

    
1326
        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
1327
        {
1328
            em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec,
1329
                                   s_min, top, mdAtoms, fr, vsite, constr,
1330
                                   nrnb, wcycle);
1331
        }
1332

    
1333
        /* Take a trial step (new coords in s_c) */
1334
        do_em_step(cr, inputrec, mdatoms, s_min, c, &s_min->s.cg_p, s_c,
1335
                   constr, -1);
1336

    
1337
        neval++;
1338
        /* Calculate energy for the trial step */
1339
        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE);
1340

    
1341
        /* Calc derivative along line */
1342
        const rvec *pc  = s_c->s.cg_p.rvec_array();
1343
        const rvec *sfc = s_c->f.rvec_array();
1344
        double      gpc = 0;
1345
        for (int i = 0; i < mdatoms->homenr; i++)
1346
        {
1347
            for (m = 0; m < DIM; m++)
1348
            {
1349
                gpc -= pc[i][m]*sfc[i][m]; /* f is negative gradient, thus the sign */
1350
            }
1351
        }
1352
        /* Sum the gradient along the line across CPUs */
1353
        if (PAR(cr))
1354
        {
1355
            gmx_sumd(1, &gpc, cr);
1356
        }
1357

    
1358
        /* This is the max amount of increase in energy we tolerate */
1359
        tmp = std::sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1360

    
1361
        /* Accept the step if the energy is lower, or if it is not significantly higher
1362
         * and the line derivative is still negative.
1363
         */
1364
        if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
1365
        {
1366
            foundlower = TRUE;
1367
            /* Great, we found a better energy. Increase step for next iteration
1368
             * if we are still going down, decrease it otherwise
1369
             */
1370
            if (gpc < 0)
1371
            {
1372
                stepsize *= 1.618034; /* The golden section */
1373
            }
1374
            else
1375
            {
1376
                stepsize *= 0.618034; /* 1/golden section */
1377
            }
1378
        }
1379
        else
1380
        {
1381
            /* New energy is the same or higher. We will have to do some work
1382
             * to find a smaller value in the interval. Take smaller step next time!
1383
             */
1384
            foundlower = FALSE;
1385
            stepsize  *= 0.618034;
1386
        }
1387

    
1388

    
1389

    
1390

    
1391
        /* OK, if we didn't find a lower value we will have to locate one now - there must
1392
         * be one in the interval [a=0,c].
1393
         * The same thing is valid here, though: Don't spend dozens of iterations to find
1394
         * the line minimum. We try to interpolate based on the derivative at the endpoints,
1395
         * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1396
         *
1397
         * I also have a safeguard for potentially really pathological functions so we never
1398
         * take more than 20 steps before we give up ...
1399
         *
1400
         * If we already found a lower value we just skip this step and continue to the update.
1401
         */
1402
        double gpb;
1403
        if (!foundlower)
1404
        {
1405
            nminstep = 0;
1406

    
1407
            do
1408
            {
1409
                /* Select a new trial point.
1410
                 * If the derivatives at points a & c have different sign we interpolate to zero,
1411
                 * otherwise just do a bisection.
1412
                 */
1413
                if (gpa < 0 && gpc > 0)
1414
                {
1415
                    b = a + gpa*(a-c)/(gpc-gpa);
1416
                }
1417
                else
1418
                {
1419
                    b = 0.5*(a+c);
1420
                }
1421

    
1422
                /* safeguard if interpolation close to machine accuracy causes errors:
1423
                 * never go outside the interval
1424
                 */
1425
                if (b <= a || b >= c)
1426
                {
1427
                    b = 0.5*(a+c);
1428
                }
1429

    
1430
                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
1431
                {
1432
                    /* Reload the old state */
1433
                    em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec,
1434
                                           s_min, top, mdAtoms, fr, vsite, constr,
1435
                                           nrnb, wcycle);
1436
                }
1437

    
1438
                /* Take a trial step to this new point - new coords in s_b */
1439
                do_em_step(cr, inputrec, mdatoms, s_min, b, &s_min->s.cg_p, s_b,
1440
                           constr, -1);
1441

    
1442
                neval++;
1443
                /* Calculate energy for the trial step */
1444
                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE);
1445

    
1446
                /* p does not change within a step, but since the domain decomposition
1447
                 * might change, we have to use cg_p of s_b here.
1448
                 */
1449
                const rvec *pb  = s_b->s.cg_p.rvec_array();
1450
                const rvec *sfb = s_b->f.rvec_array();
1451
                gpb             = 0;
1452
                for (int i = 0; i < mdatoms->homenr; i++)
1453
                {
1454
                    for (m = 0; m < DIM; m++)
1455
                    {
1456
                        gpb -= pb[i][m]*sfb[i][m]; /* f is negative gradient, thus the sign */
1457
                    }
1458
                }
1459
                /* Sum the gradient along the line across CPUs */
1460
                if (PAR(cr))
1461
                {
1462
                    gmx_sumd(1, &gpb, cr);
1463
                }
1464

    
1465
                if (debug)
1466
                {
1467
                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1468
                            s_a->epot, s_b->epot, s_c->epot, gpb);
1469
                }
1470

    
1471
                epot_repl = s_b->epot;
1472

    
1473
                /* Keep one of the intervals based on the value of the derivative at the new point */
1474
                if (gpb > 0)
1475
                {
1476
                    /* Replace c endpoint with b */
1477
                    swap_em_state(&s_b, &s_c);
1478
                    c   = b;
1479
                    gpc = gpb;
1480
                }
1481
                else
1482
                {
1483
                    /* Replace a endpoint with b */
1484
                    swap_em_state(&s_b, &s_a);
1485
                    a   = b;
1486
                    gpa = gpb;
1487
                }
1488

    
1489
                /*
1490
                 * Stop search as soon as we find a value smaller than the endpoints.
1491
                 * Never run more than 20 steps, no matter what.
1492
                 */
1493
                nminstep++;
1494
            }
1495
            while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1496
                   (nminstep < 20));
1497

    
1498
            if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1499
                nminstep >= 20)
1500
            {
1501
                /* OK. We couldn't find a significantly lower energy.
1502
                 * If beta==0 this was steepest descent, and then we give up.
1503
                 * If not, set beta=0 and restart with steepest descent before quitting.
1504
                 */
1505
                if (beta == 0.0)
1506
                {
1507
                    /* Converged */
1508
                    converged = TRUE;
1509
                    break;
1510
                }
1511
                else
1512
                {
1513
                    /* Reset memory before giving up */
1514
                    beta = 0.0;
1515
                    continue;
1516
                }
1517
            }
1518

    
1519
            /* Select min energy state of A & C, put the best in B.
1520
             */
1521
            if (s_c->epot < s_a->epot)
1522
            {
1523
                if (debug)
1524
                {
1525
                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
1526
                            s_c->epot, s_a->epot);
1527
                }
1528
                swap_em_state(&s_b, &s_c);
1529
                gpb = gpc;
1530
            }
1531
            else
1532
            {
1533
                if (debug)
1534
                {
1535
                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
1536
                            s_a->epot, s_c->epot);
1537
                }
1538
                swap_em_state(&s_b, &s_a);
1539
                gpb = gpa;
1540
            }
1541

    
1542
        }
1543
        else
1544
        {
1545
            if (debug)
1546
            {
1547
                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
1548
                        s_c->epot);
1549
            }
1550
            swap_em_state(&s_b, &s_c);
1551
            gpb = gpc;
1552
        }
1553

    
1554
        /* new search direction */
1555
        /* beta = 0 means forget all memory and restart with steepest descents. */
1556
        if (nstcg && ((step % nstcg) == 0))
1557
        {
1558
            beta = 0.0;
1559
        }
1560
        else
1561
        {
1562
            /* s_min->fnorm cannot be zero, because then we would have converged
1563
             * and broken out.
1564
             */
1565

    
1566
            /* Polak-Ribiere update.
1567
             * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1568
             */
1569
            beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
1570
        }
1571
        /* Limit beta to prevent oscillations */
1572
        if (fabs(beta) > 5.0)
1573
        {
1574
            beta = 0.0;
1575
        }
1576

    
1577

    
1578
        /* update positions */
1579
        swap_em_state(&s_min, &s_b);
1580
        gpa = gpb;
1581

    
1582
        /* Print it if necessary */
1583
        if (MASTER(cr))
1584
        {
1585
            if (mdrunOptions.verbose)
1586
            {
1587
                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1588
                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1589
                        step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
1590
                        s_min->fmax, s_min->a_fmax+1);
1591
                fflush(stderr);
1592
            }
1593
            /* Store the new (lower) energies */
1594
            matrix nullBox = {};
1595
            upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step),
1596
                       mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
1597
                       nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
1598

    
1599
            do_log = do_per_step(step, inputrec->nstlog);
1600
            do_ene = do_per_step(step, inputrec->nstenergy);
1601

    
1602
            /* Prepare IMD energy record, if bIMD is TRUE. */
1603
            IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE);
1604

    
1605
            if (do_log)
1606
            {
1607
                print_ebin_header(fplog, step, step);
1608
            }
1609
            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
1610
                       do_log ? fplog : nullptr, step, step, eprNORMAL,
1611
                       mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
1612
        }
1613

    
1614
        /* Send energies and positions to the IMD client if bIMD is TRUE. */
1615
        if (MASTER(cr) && do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x.rvec_array(), inputrec, 0, wcycle))
1616
        {
1617
            IMD_send_positions(inputrec->imd);
1618
        }
1619

    
1620
        /* Stop when the maximum force lies below tolerance.
1621
         * If we have reached machine precision, converged is already set to true.
1622
         */
1623
        converged = converged || (s_min->fmax < inputrec->em_tol);
1624

    
1625
    }   /* End of the loop */
1626

    
1627
    /* IMD cleanup, if bIMD is TRUE. */
1628
    IMD_finalize(inputrec->bIMD, inputrec->imd);
1629

    
1630
    if (converged)
1631
    {
1632
        step--; /* we never took that last step in this case */
1633

    
1634
    }
1635
    if (s_min->fmax > inputrec->em_tol)
1636
    {
1637
        if (MASTER(cr))
1638
        {
1639
            warn_step(fplog, inputrec->em_tol, s_min->fmax,
1640
                      step-1 == number_steps, FALSE);
1641
        }
1642
        converged = FALSE;
1643
    }
1644

    
1645
    if (MASTER(cr))
1646
    {
1647
        /* If we printed energy and/or logfile last step (which was the last step)
1648
         * we don't have to do it again, but otherwise print the final values.
1649
         */
1650
        if (!do_log)
1651
        {
1652
            /* Write final value to log since we didn't do anything the last step */
1653
            print_ebin_header(fplog, step, step);
1654
        }
1655
        if (!do_ene || !do_log)
1656
        {
1657
            /* Write final energy file entries */
1658
            print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
1659
                       !do_log ? fplog : nullptr, step, step, eprNORMAL,
1660
                       mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
1661
        }
1662
    }
1663

    
1664
    /* Print some stuff... */
1665
    if (MASTER(cr))
1666
    {
1667
        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
1668
    }
1669

    
1670
    /* IMPORTANT!
1671
     * For accurate normal mode calculation it is imperative that we
1672
     * store the last conformation into the full precision binary trajectory.
1673
     *
1674
     * However, we should only do it if we did NOT already write this step
1675
     * above (which we did if do_x or do_f was true).
1676
     */
1677
    /* Note that with 0 < nstfout != nstxout we can end up with two frames
1678
     * in the trajectory with the same step number.
1679
     */
1680
    do_x = !do_per_step(step, inputrec->nstxout);
1681
    do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
1682

    
1683
    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
1684
                  top_global, inputrec, step,
1685
                  s_min, state_global, observablesHistory);
1686

    
1687

    
1688
    if (MASTER(cr))
1689
    {
1690
        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1691
        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
1692
                        s_min, sqrtNumAtoms);
1693
        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
1694
                        s_min, sqrtNumAtoms);
1695

    
1696
        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
1697
    }
1698

    
1699
    finish_em(cr, outf, walltime_accounting, wcycle);
1700

    
1701
    /* To print the actual number of steps we needed somewhere */
1702
    walltime_accounting_set_nsteps_done(walltime_accounting, step);
1703
}
1704

    
1705

    
1706
void
1707
Integrator::do_lbfgs()
1708
{
1709
    static const char *LBFGS = "Low-Memory BFGS Minimizer";
1710
    em_state_t         ems;
1711
    gmx_localtop_t    *top;
1712
    gmx_enerdata_t    *enerd;
1713
    gmx_global_stat_t  gstat;
1714
    t_graph           *graph;
1715
    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
1716
    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
1717
    real              *rho, *alpha, *p, *s, **dx, **dg;
1718
    real               a, b, c, maxdelta, delta;
1719
    real               diag, Epot0;
1720
    real               dgdx, dgdg, sq, yr, beta;
1721
    t_mdebin          *mdebin;
1722
    gmx_bool           converged;
1723
    rvec               mu_tot;
1724
    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
1725
    tensor             vir, pres;
1726
    int                start, end, number_steps;
1727
    gmx_mdoutf_t       outf;
1728
    int                i, k, m, n, gf, step;
1729
    int                mdof_flags;
1730
    auto               mdatoms = mdAtoms->mdatoms();
1731

    
1732
    GMX_LOG(mdlog.info).asParagraph().
1733
        appendText("Note that activating L-BFGS energy minimization via the "
1734
                   "integrator .mdp option and the command gmx mdrun may "
1735
                   "be available in a different form in a future version of GROMACS, "
1736
                   "e.g. gmx minimize and an .mdp option.");
1737

    
1738
    if (PAR(cr))
1739
    {
1740
        gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
1741
    }
1742

    
1743
    if (nullptr != constr)
1744
    {
1745
        gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1746
    }
1747

    
1748
    n        = 3*state_global->natoms;
1749
    nmaxcorr = inputrec->nbfgscorr;
1750

    
1751
    snew(frozen, n);
1752

    
1753
    snew(p, n);
1754
    snew(rho, nmaxcorr);
1755
    snew(alpha, nmaxcorr);
1756

    
1757
    snew(dx, nmaxcorr);
1758
    for (i = 0; i < nmaxcorr; i++)
1759
    {
1760
        snew(dx[i], n);
1761
    }
1762

    
1763
    snew(dg, nmaxcorr);
1764
    for (i = 0; i < nmaxcorr; i++)
1765
    {
1766
        snew(dg[i], n);
1767
    }
1768

    
1769
    step  = 0;
1770
    neval = 0;
1771

    
1772
    /* Init em */
1773
    init_em(fplog, mdlog, LBFGS, cr, ms, outputProvider, inputrec, mdrunOptions,
1774
            state_global, top_global, &ems, &top,
1775
            nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat,
1776
            vsite, constr, nullptr,
1777
            nfile, fnm, &outf, &mdebin, wcycle);
1778

    
1779
    start = 0;
1780
    end   = mdatoms->homenr;
1781

    
1782
    /* We need 4 working states */
1783
    em_state_t  s0 {}, s1 {}, s2 {}, s3 {};
1784
    em_state_t *sa   = &s0;
1785
    em_state_t *sb   = &s1;
1786
    em_state_t *sc   = &s2;
1787
    em_state_t *last = &s3;
1788
    /* Initialize by copying the state from ems (we could skip x and f here) */
1789
    *sa              = ems;
1790
    *sb              = ems;
1791
    *sc              = ems;
1792

    
1793
    /* Print to log file */
1794
    print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
1795

    
1796
    do_log = do_ene = do_x = do_f = TRUE;
1797

    
1798
    /* Max number of steps */
1799
    number_steps = inputrec->nsteps;
1800

    
1801
    /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1802
    gf = 0;
1803
    for (i = start; i < end; i++)
1804
    {
1805
        if (mdatoms->cFREEZE)
1806
        {
1807
            gf = mdatoms->cFREEZE[i];
1808
        }
1809
        for (m = 0; m < DIM; m++)
1810
        {
1811
            frozen[3*i+m] = (inputrec->opts.nFreeze[gf][m] != 0);
1812
        }
1813
    }
1814
    if (MASTER(cr))
1815
    {
1816
        sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
1817
    }
1818
    if (fplog)
1819
    {
1820
        sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
1821
    }
1822

    
1823
    if (vsite)
1824
    {
1825
        construct_vsites(vsite, state_global->x.rvec_array(), 1, nullptr,
1826
                         top->idef.iparams, top->idef.il,
1827
                         fr->ePBC, fr->bMolPBC, cr, state_global->box);
1828
    }
1829

    
1830
    /* Call the force routine and some auxiliary (neighboursearching etc.) */
1831
    /* do_force always puts the charge groups in the box and shifts again
1832
     * We do not unshift, so molecules are always whole
1833
     */
1834
    neval++;
1835
    EnergyEvaluator energyEvaluator {
1836
        fplog, mdlog, cr, ms,
1837
        top_global, top,
1838
        inputrec, nrnb, wcycle, gstat,
1839
        vsite, constr, fcd, graph,
1840
        mdAtoms, fr, ppForceWorkload, enerd
1841
    };
1842
    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
1843

    
1844
    if (MASTER(cr))
1845
    {
1846
        /* Copy stuff to the energy bin for easy printing etc. */
1847
        matrix nullBox = {};
1848
        upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step),
1849
                   mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
1850
                   nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
1851

    
1852
        print_ebin_header(fplog, step, step);
1853
        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1854
                   mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
1855
    }
1856

    
1857
    /* Set the initial step.
1858
     * since it will be multiplied by the non-normalized search direction
1859
     * vector (force vector the first time), we scale it by the
1860
     * norm of the force.
1861
     */
1862

    
1863
    if (MASTER(cr))
1864
    {
1865
        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1866
        fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1867
        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
1868
        fprintf(stderr, "   F-Norm            = %12.5e\n", ems.fnorm/sqrtNumAtoms);
1869
        fprintf(stderr, "\n");
1870
        /* and copy to the log file too... */
1871
        fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1872
        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
1873
        fprintf(fplog, "   F-Norm            = %12.5e\n", ems.fnorm/sqrtNumAtoms);
1874
        fprintf(fplog, "\n");
1875
    }
1876

    
1877
    // Point is an index to the memory of search directions, where 0 is the first one.
1878
    point = 0;
1879

    
1880
    // Set initial search direction to the force (-gradient), or 0 for frozen particles.
1881
    real *fInit = static_cast<real *>(ems.f.rvec_array()[0]);
1882
    for (i = 0; i < n; i++)
1883
    {
1884
        if (!frozen[i])
1885
        {
1886
            dx[point][i] = fInit[i]; /* Initial search direction */
1887
        }
1888
        else
1889
        {
1890
            dx[point][i] = 0;
1891
        }
1892
    }
1893

    
1894
    // Stepsize will be modified during the search, and actually it is not critical
1895
    // (the main efficiency in the algorithm comes from changing directions), but
1896
    // we still need an initial value, so estimate it as the inverse of the norm
1897
    // so we take small steps where the potential fluctuates a lot.
1898
    stepsize  = 1.0/ems.fnorm;
1899

    
1900
    /* Start the loop over BFGS steps.
1901
     * Each successful step is counted, and we continue until
1902
     * we either converge or reach the max number of steps.
1903
     */
1904

    
1905
    ncorr = 0;
1906

    
1907
    /* Set the gradient from the force */
1908
    converged = FALSE;
1909
    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1910
    {
1911

    
1912
        /* Write coordinates if necessary */
1913
        do_x = do_per_step(step, inputrec->nstxout);
1914
        do_f = do_per_step(step, inputrec->nstfout);
1915

    
1916
        mdof_flags = 0;
1917
        if (do_x)
1918
        {
1919
            mdof_flags |= MDOF_X;
1920
        }
1921

    
1922
        if (do_f)
1923
        {
1924
            mdof_flags |= MDOF_F;
1925
        }
1926

    
1927
        if (inputrec->bIMD)
1928
        {
1929
            mdof_flags |= MDOF_IMD;
1930
        }
1931

    
1932
        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
1933
                                         top_global, step, static_cast<real>(step), &ems.s, state_global, observablesHistory, ems.f);
1934

    
1935
        /* Do the linesearching in the direction dx[point][0..(n-1)] */
1936

    
1937
        /* make s a pointer to current search direction - point=0 first time we get here */
1938
        s = dx[point];
1939

    
1940
        real *xx = static_cast<real *>(ems.s.x.rvec_array()[0]);
1941
        real *ff = static_cast<real *>(ems.f.rvec_array()[0]);
1942

    
1943
        // calculate line gradient in position A
1944
        for (gpa = 0, i = 0; i < n; i++)
1945
        {
1946
            gpa -= s[i]*ff[i];
1947
        }
1948

    
1949
        /* Calculate minimum allowed stepsize along the line, before the average (norm)
1950
         * relative change in coordinate is smaller than precision
1951
         */
1952
        for (minstep = 0, i = 0; i < n; i++)
1953
        {
1954
            tmp = fabs(xx[i]);
1955
            if (tmp < 1.0)
1956
            {
1957
                tmp = 1.0;
1958
            }
1959
            tmp      = s[i]/tmp;
1960
            minstep += tmp*tmp;
1961
        }
1962
        minstep = GMX_REAL_EPS/sqrt(minstep/n);
1963

    
1964
        if (stepsize < minstep)
1965
        {
1966
            converged = TRUE;
1967
            break;
1968
        }
1969

    
1970
        // Before taking any steps along the line, store the old position
1971
        *last       = ems;
1972
        real *lastx = static_cast<real *>(last->s.x.data()[0]);
1973
        real *lastf = static_cast<real *>(last->f.data()[0]);
1974
        Epot0       = ems.epot;
1975

    
1976
        *sa         = ems;
1977

    
1978
        /* Take a step downhill.
1979
         * In theory, we should find the actual minimum of the function in this
1980
         * direction, somewhere along the line.
1981
         * That is quite possible, but it turns out to take 5-10 function evaluations
1982
         * for each line. However, we dont really need to find the exact minimum -
1983
         * it is much better to start a new BFGS step in a modified direction as soon
1984
         * as we are close to it. This will save a lot of energy evaluations.
1985
         *
1986
         * In practice, we just try to take a single step.
1987
         * If it worked (i.e. lowered the energy), we increase the stepsize but
1988
         * continue straight to the next BFGS step without trying to find any minimum,
1989
         * i.e. we change the search direction too. If the line was smooth, it is
1990
         * likely we are in a smooth region, and then it makes sense to take longer
1991
         * steps in the modified search direction too.
1992
         *
1993
         * If it didn't work (higher energy), there must be a minimum somewhere between
1994
         * the old position and the new one. Then we need to start by finding a lower
1995
         * value before we change search direction. Since the energy was apparently
1996
         * quite rough, we need to decrease the step size.
1997
         *
1998
         * Due to the finite numerical accuracy, it turns out that it is a good idea
1999
         * to accept a SMALL increase in energy, if the derivative is still downhill.
2000
         * This leads to lower final energies in the tests I've done. / Erik
2001
         */
2002

    
2003
        // State "A" is the first position along the line.
2004
        // reference position along line is initially zero
2005
        a          = 0.0;
2006

    
2007
        // Check stepsize first. We do not allow displacements
2008
        // larger than emstep.
2009
        //
2010
        do
2011
        {
2012
            // Pick a new position C by adding stepsize to A.
2013
            c        = a + stepsize;
2014

    
2015
            // Calculate what the largest change in any individual coordinate
2016
            // would be (translation along line * gradient along line)
2017
            maxdelta = 0;
2018
            for (i = 0; i < n; i++)
2019
            {
2020
                delta = c*s[i];
2021
                if (delta > maxdelta)
2022
                {
2023
                    maxdelta = delta;
2024
                }
2025
            }
2026
            // If any displacement is larger than the stepsize limit, reduce the step
2027
            if (maxdelta > inputrec->em_stepsize)
2028
            {
2029
                stepsize *= 0.1;
2030
            }
2031
        }
2032
        while (maxdelta > inputrec->em_stepsize);
2033

    
2034
        // Take a trial step and move the coordinate array xc[] to position C
2035
        real *xc = static_cast<real *>(sc->s.x.rvec_array()[0]);
2036
        for (i = 0; i < n; i++)
2037
        {
2038
            xc[i] = lastx[i] + c*s[i];
2039
        }
2040

    
2041
        neval++;
2042
        // Calculate energy for the trial step in position C
2043
        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE);
2044

    
2045
        // Calc line gradient in position C
2046
        real *fc = static_cast<real *>(sc->f.rvec_array()[0]);
2047
        for (gpc = 0, i = 0; i < n; i++)
2048
        {
2049
            gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
2050
        }
2051
        /* Sum the gradient along the line across CPUs */
2052
        if (PAR(cr))
2053
        {
2054
            gmx_sumd(1, &gpc, cr);
2055
        }
2056

    
2057
        // This is the max amount of increase in energy we tolerate.
2058
        // By allowing VERY small changes (close to numerical precision) we
2059
        // frequently find even better (lower) final energies.
2060
        tmp = std::sqrt(GMX_REAL_EPS)*fabs(sa->epot);
2061

    
2062
        // Accept the step if the energy is lower in the new position C (compared to A),
2063
        // or if it is not significantly higher and the line derivative is still negative.
2064
        foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
2065
        // If true, great, we found a better energy. We no longer try to alter the
2066
        // stepsize, but simply accept this new better position. The we select a new
2067
        // search direction instead, which will be much more efficient than continuing
2068
        // to take smaller steps along a line. Set fnorm based on the new C position,
2069
        // which will be used to update the stepsize to 1/fnorm further down.
2070

    
2071
        // If false, the energy is NOT lower in point C, i.e. it will be the same
2072
        // or higher than in point A. In this case it is pointless to move to point C,
2073
        // so we will have to do more iterations along the same line to find a smaller
2074
        // value in the interval [A=0.0,C].
2075
        // Here, A is still 0.0, but that will change when we do a search in the interval
2076
        // [0.0,C] below. That search we will do by interpolation or bisection rather
2077
        // than with the stepsize, so no need to modify it. For the next search direction
2078
        // it will be reset to 1/fnorm anyway.
2079

    
2080
        if (!foundlower)
2081
        {
2082
            // OK, if we didn't find a lower value we will have to locate one now - there must
2083
            // be one in the interval [a,c].
2084
            // The same thing is valid here, though: Don't spend dozens of iterations to find
2085
            // the line minimum. We try to interpolate based on the derivative at the endpoints,
2086
            // and only continue until we find a lower value. In most cases this means 1-2 iterations.
2087
            // I also have a safeguard for potentially really pathological functions so we never
2088
            // take more than 20 steps before we give up.
2089
            // If we already found a lower value we just skip this step and continue to the update.
2090
            real fnorm = 0;
2091
            nminstep   = 0;
2092
            do
2093
            {
2094
                // Select a new trial point B in the interval [A,C].
2095
                // If the derivatives at points a & c have different sign we interpolate to zero,
2096
                // otherwise just do a bisection since there might be multiple minima/maxima
2097
                // inside the interval.
2098
                if (gpa < 0 && gpc > 0)
2099
                {
2100
                    b = a + gpa*(a-c)/(gpc-gpa);
2101
                }
2102
                else
2103
                {
2104
                    b = 0.5*(a+c);
2105
                }
2106

    
2107
                /* safeguard if interpolation close to machine accuracy causes errors:
2108
                 * never go outside the interval
2109
                 */
2110
                if (b <= a || b >= c)
2111
                {
2112
                    b = 0.5*(a+c);
2113
                }
2114

    
2115
                // Take a trial step to point B
2116
                real *xb = static_cast<real *>(sb->s.x.rvec_array()[0]);
2117
                for (i = 0; i < n; i++)
2118
                {
2119
                    xb[i] = lastx[i] + b*s[i];
2120
                }
2121

    
2122
                neval++;
2123
                // Calculate energy for the trial step in point B
2124
                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE);
2125
                fnorm = sb->fnorm;
2126

    
2127
                // Calculate gradient in point B
2128
                real *fb = static_cast<real *>(sb->f.rvec_array()[0]);
2129
                for (gpb = 0, i = 0; i < n; i++)
2130
                {
2131
                    gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
2132

    
2133
                }
2134
                /* Sum the gradient along the line across CPUs */
2135
                if (PAR(cr))
2136
                {
2137
                    gmx_sumd(1, &gpb, cr);
2138
                }
2139

    
2140
                // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
2141
                // at the new point B, and rename the endpoints of this new interval A and C.
2142
                if (gpb > 0)
2143
                {
2144
                    /* Replace c endpoint with b */
2145
                                        sc->epot = sb->epot;
2146
                                        c = b;
2147
                                        gpc = gpb;
2148
                    /* swap states b and c */
2149
                    swap_em_state(&sb, &sc);
2150
                }
2151
                else
2152
                {
2153
                    /* Replace a endpoint with b */
2154
                                        sa->epot = sb->epot;
2155
                                        a = b;
2156
                                        gpa = gpb;
2157
                    /* swap states a and b */
2158
                    swap_em_state(&sa, &sb);
2159
                }
2160

    
2161
                /*
2162
                 * Stop search as soon as we find a value smaller than the endpoints,
2163
                 * or if the tolerance is below machine precision.
2164
                 * Never run more than 20 steps, no matter what.
2165
                 */
2166
                nminstep++;
2167
            }
2168
            while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20));
2169

    
2170
            if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20)
2171
            {
2172
                /* OK. We couldn't find a significantly lower energy.
2173
                 * If ncorr==0 this was steepest descent, and then we give up.
2174
                 * If not, reset memory to restart as steepest descent before quitting.
2175
                 */
2176
                if (ncorr == 0)
2177
                {
2178
                    /* Converged */
2179
                    converged = TRUE;
2180
                    break;
2181
                }
2182
                else
2183
                {
2184
                    /* Reset memory */
2185
                    ncorr = 0;
2186
                    /* Search in gradient direction */
2187
                    for (i = 0; i < n; i++)
2188
                    {
2189
                        dx[point][i] = ff[i];
2190
                    }
2191
                    /* Reset stepsize */
2192
                    stepsize = 1.0/fnorm;
2193
                    continue;
2194
                }
2195
            }
2196

    
2197
            /* Select min energy state of A & C, put the best in xx/ff/Epot
2198
             */
2199
            if (sc->epot < sa->epot)
2200
            {
2201
                /* Use state C */
2202
                ems        = *sc;
2203
                step_taken = c;
2204
            }
2205
            else
2206
            {
2207
                /* Use state A */
2208
                ems        = *sa;
2209
                step_taken = a;
2210
            }
2211

    
2212
        }
2213
        else
2214
        {
2215
            /* found lower */
2216
            /* Use state C */
2217
            ems        = *sc;
2218
            step_taken = c;
2219
        }
2220

    
2221
        /* Update the memory information, and calculate a new
2222
         * approximation of the inverse hessian
2223
         */
2224

    
2225
        /* Have new data in Epot, xx, ff */
2226
        if (ncorr < nmaxcorr)
2227
        {
2228
            ncorr++;
2229
        }
2230

    
2231
        for (i = 0; i < n; i++)
2232
        {
2233
            dg[point][i]  = lastf[i]-ff[i];
2234
            dx[point][i] *= step_taken;
2235
        }
2236

    
2237
        dgdg = 0;
2238
        dgdx = 0;
2239
        for (i = 0; i < n; i++)
2240
        {
2241
            dgdg += dg[point][i]*dg[point][i];
2242
            dgdx += dg[point][i]*dx[point][i];
2243
        }
2244

    
2245
        diag = dgdx/dgdg;
2246

    
2247
        rho[point] = 1.0/dgdx;
2248
        point++;
2249

    
2250
        if (point >= nmaxcorr)
2251
        {
2252
            point = 0;
2253
        }
2254

    
2255
        /* Update */
2256
        for (i = 0; i < n; i++)
2257
        {
2258
            p[i] = ff[i];
2259
        }
2260

    
2261
        cp = point;
2262

    
2263
        /* Recursive update. First go back over the memory points */
2264
        for (k = 0; k < ncorr; k++)
2265
        {
2266
            cp--;
2267
            if (cp < 0)
2268
            {
2269
                cp = ncorr-1;
2270
            }
2271

    
2272
            sq = 0;
2273
            for (i = 0; i < n; i++)
2274
            {
2275
                sq += dx[cp][i]*p[i];
2276
            }
2277

    
2278
            alpha[cp] = rho[cp]*sq;
2279

    
2280
            for (i = 0; i < n; i++)
2281
            {
2282
                p[i] -= alpha[cp]*dg[cp][i];
2283
            }
2284
        }
2285

    
2286
        for (i = 0; i < n; i++)
2287
        {
2288
            p[i] *= diag;
2289
        }
2290

    
2291
        /* And then go forward again */
2292
        for (k = 0; k < ncorr; k++)
2293
        {
2294
            yr = 0;
2295
            for (i = 0; i < n; i++)
2296
            {
2297
                yr += p[i]*dg[cp][i];
2298
            }
2299

    
2300
            beta = rho[cp]*yr;
2301
            beta = alpha[cp]-beta;
2302

    
2303
            for (i = 0; i < n; i++)
2304
            {
2305
                p[i] += beta*dx[cp][i];
2306
            }
2307

    
2308
            cp++;
2309
            if (cp >= ncorr)
2310
            {
2311
                cp = 0;
2312
            }
2313
        }
2314

    
2315
        for (i = 0; i < n; i++)
2316
        {
2317
            if (!frozen[i])
2318
            {
2319
                dx[point][i] = p[i];
2320
            }
2321
            else
2322
            {
2323
                dx[point][i] = 0;
2324
            }
2325
        }
2326

    
2327
        /* Print it if necessary */
2328
        if (MASTER(cr))
2329
        {
2330
            if (mdrunOptions.verbose)
2331
            {
2332
                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2333
                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
2334
                        step, ems.epot, ems.fnorm/sqrtNumAtoms, ems.fmax, ems.a_fmax + 1);
2335
                fflush(stderr);
2336
            }
2337
            /* Store the new (lower) energies */
2338
            matrix nullBox = {};
2339
            upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step),
2340
                       mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox,
2341
                       nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
2342
            do_log = do_per_step(step, inputrec->nstlog);
2343
            do_ene = do_per_step(step, inputrec->nstenergy);
2344
            if (do_log)
2345
            {
2346
                print_ebin_header(fplog, step, step);
2347
            }
2348
            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
2349
                       do_log ? fplog : nullptr, step, step, eprNORMAL,
2350
                       mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
2351
        }
2352

    
2353
        /* Send x and E to IMD client, if bIMD is TRUE. */
2354
        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x.rvec_array(), inputrec, 0, wcycle) && MASTER(cr))
2355
        {
2356
            IMD_send_positions(inputrec->imd);
2357
        }
2358

    
2359
        // Reset stepsize if we are doing more iterations
2360
        stepsize = 1.0;
2361

    
2362
        /* Stop when the maximum force lies below tolerance.
2363
         * If we have reached machine precision, converged is already set to true.
2364
         */
2365
        converged = converged || (ems.fmax < inputrec->em_tol);
2366

    
2367
    }   /* End of the loop */
2368

    
2369
    /* IMD cleanup, if bIMD is TRUE. */
2370
    IMD_finalize(inputrec->bIMD, inputrec->imd);
2371

    
2372
    if (converged)
2373
    {
2374
        step--; /* we never took that last step in this case */
2375

    
2376
    }
2377
    if (ems.fmax > inputrec->em_tol)
2378
    {
2379
        if (MASTER(cr))
2380
        {
2381
            warn_step(fplog, inputrec->em_tol, ems.fmax,
2382
                      step-1 == number_steps, FALSE);
2383
        }
2384
        converged = FALSE;
2385
    }
2386

    
2387
    /* If we printed energy and/or logfile last step (which was the last step)
2388
     * we don't have to do it again, but otherwise print the final values.
2389
     */
2390
    if (!do_log) /* Write final value to log since we didn't do anythin last step */
2391
    {
2392
        print_ebin_header(fplog, step, step);
2393
    }
2394
    if (!do_ene || !do_log) /* Write final energy file entries */
2395
    {
2396
        print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
2397
                   !do_log ? fplog : nullptr, step, step, eprNORMAL,
2398
                   mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
2399
    }
2400

    
2401
    /* Print some stuff... */
2402
    if (MASTER(cr))
2403
    {
2404
        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2405
    }
2406

    
2407
    /* IMPORTANT!
2408
     * For accurate normal mode calculation it is imperative that we
2409
     * store the last conformation into the full precision binary trajectory.
2410
     *
2411
     * However, we should only do it if we did NOT already write this step
2412
     * above (which we did if do_x or do_f was true).
2413
     */
2414
    do_x = !do_per_step(step, inputrec->nstxout);
2415
    do_f = !do_per_step(step, inputrec->nstfout);
2416
    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
2417
                  top_global, inputrec, step,
2418
                  &ems, state_global, observablesHistory);
2419

    
2420
    if (MASTER(cr))
2421
    {
2422
        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2423
        print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
2424
                        number_steps, &ems, sqrtNumAtoms);
2425
        print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
2426
                        number_steps, &ems, sqrtNumAtoms);
2427

    
2428
        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
2429
    }
2430

    
2431
    finish_em(cr, outf, walltime_accounting, wcycle);
2432

    
2433
    /* To print the actual number of steps we needed somewhere */
2434
    walltime_accounting_set_nsteps_done(walltime_accounting, step);
2435
}
2436

    
2437
void
2438
Integrator::do_steep()
2439
{
2440
    const char       *SD = "Steepest Descents";
2441
    gmx_localtop_t   *top;
2442
    gmx_enerdata_t   *enerd;
2443
    gmx_global_stat_t gstat;
2444
    t_graph          *graph;
2445
    real              stepsize;
2446
    real              ustep;
2447
    gmx_mdoutf_t      outf;
2448
    t_mdebin         *mdebin;
2449
    gmx_bool          bDone, bAbort, do_x, do_f;
2450
    tensor            vir, pres;
2451
    rvec              mu_tot;
2452
    int               nsteps;
2453
    int               count          = 0;
2454
    int               steps_accepted = 0;
2455
    auto              mdatoms        = mdAtoms->mdatoms();
2456

    
2457
    GMX_LOG(mdlog.info).asParagraph().
2458
        appendText("Note that activating steepest-descent energy minimization via the "
2459
                   "integrator .mdp option and the command gmx mdrun may "
2460
                   "be available in a different form in a future version of GROMACS, "
2461
                   "e.g. gmx minimize and an .mdp option.");
2462

    
2463
    /* Create 2 states on the stack and extract pointers that we will swap */
2464
    em_state_t  s0 {}, s1 {};
2465
    em_state_t *s_min = &s0;
2466
    em_state_t *s_try = &s1;
2467

    
2468
    /* Init em and store the local state in s_try */
2469
    init_em(fplog, mdlog, SD, cr, ms, outputProvider, inputrec, mdrunOptions,
2470
            state_global, top_global, s_try, &top,
2471
            nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat,
2472
            vsite, constr, nullptr,
2473
            nfile, fnm, &outf, &mdebin, wcycle);
2474

    
2475
    /* Print to log file  */
2476
    print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
2477

    
2478
    /* Set variables for stepsize (in nm). This is the largest
2479
     * step that we are going to make in any direction.
2480
     */
2481
    ustep    = inputrec->em_stepsize;
2482
    stepsize = 0;
2483

    
2484
    /* Max number of steps  */
2485
    nsteps = inputrec->nsteps;
2486

    
2487
    if (MASTER(cr))
2488
    {
2489
        /* Print to the screen  */
2490
        sp_header(stderr, SD, inputrec->em_tol, nsteps);
2491
    }
2492
    if (fplog)
2493
    {
2494
        sp_header(fplog, SD, inputrec->em_tol, nsteps);
2495
    }
2496
    EnergyEvaluator energyEvaluator {
2497
        fplog, mdlog, cr, ms,
2498
        top_global, top,
2499
        inputrec, nrnb, wcycle, gstat,
2500
        vsite, constr, fcd, graph,
2501
        mdAtoms, fr, ppForceWorkload, enerd
2502
    };
2503

    
2504
    /**** HERE STARTS THE LOOP ****
2505
     * count is the counter for the number of steps
2506
     * bDone will be TRUE when the minimization has converged
2507
     * bAbort will be TRUE when nsteps steps have been performed or when
2508
     * the stepsize becomes smaller than is reasonable for machine precision
2509
     */
2510
    count  = 0;
2511
    bDone  = FALSE;
2512
    bAbort = FALSE;
2513
    while (!bDone && !bAbort)
2514
    {
2515
        bAbort = (nsteps >= 0) && (count == nsteps);
2516

    
2517
        /* set new coordinates, except for first step */
2518
        bool validStep = true;
2519
        if (count > 0)
2520
        {
2521
            validStep =
2522
                do_em_step(cr, inputrec, mdatoms,
2523
                           s_min, stepsize, &s_min->f, s_try,
2524
                           constr, count);
2525
        }
2526

    
2527
        if (validStep)
2528
        {
2529
            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);
2530
        }
2531
        else
2532
        {
2533
            // Signal constraint error during stepping with energy=inf
2534
            s_try->epot = std::numeric_limits<real>::infinity();
2535
        }
2536

    
2537
        if (MASTER(cr))
2538
        {
2539
            print_ebin_header(fplog, count, count);
2540
        }
2541

    
2542
        if (count == 0)
2543
        {
2544
            s_min->epot = s_try->epot;
2545
        }
2546

    
2547
        /* Print it if necessary  */
2548
        if (MASTER(cr))
2549
        {
2550
            if (mdrunOptions.verbose)
2551
            {
2552
                fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2553
                        count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
2554
                        ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
2555
                fflush(stderr);
2556
            }
2557

    
2558
            if ( (count == 0) || (s_try->epot < s_min->epot) )
2559
            {
2560
                /* Store the new (lower) energies  */
2561
                matrix nullBox = {};
2562
                upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(count),
2563
                           mdatoms->tmass, enerd, nullptr, nullptr, nullptr,
2564
                           nullBox, nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
2565

    
2566
                /* Prepare IMD energy record, if bIMD is TRUE. */
2567
                IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE);
2568

    
2569
                print_ebin(mdoutf_get_fp_ene(outf), TRUE,
2570
                           do_per_step(steps_accepted, inputrec->nstdisreout),
2571
                           do_per_step(steps_accepted, inputrec->nstorireout),
2572
                           fplog, count, count, eprNORMAL,
2573
                           mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
2574
                fflush(fplog);
2575
            }
2576
        }
2577

    
2578
        /* Now if the new energy is smaller than the previous...
2579
         * or if this is the first step!
2580
         * or if we did random steps!
2581
         */
2582

    
2583
        if ( (count == 0) || (s_try->epot < s_min->epot) )
2584
        {
2585
            steps_accepted++;
2586

    
2587
            /* Test whether the convergence criterion is met...  */
2588
            bDone = (s_try->fmax < inputrec->em_tol);
2589

    
2590
            /* Copy the arrays for force, positions and energy  */
2591
            /* The 'Min' array always holds the coords and forces of the minimal
2592
               sampled energy  */
2593
            swap_em_state(&s_min, &s_try);
2594
            if (count > 0)
2595
            {
2596
                ustep *= 1.2;
2597
            }
2598

    
2599
            /* Write to trn, if necessary */
2600
            do_x = do_per_step(steps_accepted, inputrec->nstxout);
2601
            do_f = do_per_step(steps_accepted, inputrec->nstfout);
2602
            write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,
2603
                          top_global, inputrec, count,
2604
                          s_min, state_global, observablesHistory);
2605
        }
2606
        else
2607
        {
2608
            /* If energy is not smaller make the step smaller...  */
2609
            ustep *= 0.5;
2610

    
2611
            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
2612
            {
2613
                /* Reload the old state */
2614
                em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec,
2615
                                       s_min, top, mdAtoms, fr, vsite, constr,
2616
                                       nrnb, wcycle);
2617
            }
2618
        }
2619

    
2620
        /* Determine new step  */
2621
        stepsize = ustep/s_min->fmax;
2622

    
2623
        /* Check if stepsize is too small, with 1 nm as a characteristic length */
2624
#if GMX_DOUBLE
2625
        if (count == nsteps || ustep < 1e-12)
2626
#else
2627
        if (count == nsteps || ustep < 1e-6)
2628
#endif
2629
        {
2630
            if (MASTER(cr))
2631
            {
2632
                warn_step(fplog, inputrec->em_tol, s_min->fmax,
2633
                          count == nsteps, constr != nullptr);
2634
            }
2635
            bAbort = TRUE;
2636
        }
2637

    
2638
        /* Send IMD energies and positions, if bIMD is TRUE. */
2639
        if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box,
2640
                   MASTER(cr) ? state_global->x.rvec_array() : nullptr,
2641
                   inputrec, 0, wcycle) &&
2642
            MASTER(cr))
2643
        {
2644
            IMD_send_positions(inputrec->imd);
2645
        }
2646

    
2647
        count++;
2648
    }   /* End of the loop  */
2649

    
2650
    /* IMD cleanup, if bIMD is TRUE. */
2651
    IMD_finalize(inputrec->bIMD, inputrec->imd);
2652

    
2653
    /* Print some data...  */
2654
    if (MASTER(cr))
2655
    {
2656
        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2657
    }
2658
    write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm),
2659
                  top_global, inputrec, count,
2660
                  s_min, state_global, observablesHistory);
2661

    
2662
    if (MASTER(cr))
2663
    {
2664
        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2665

    
2666
        print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
2667
                        s_min, sqrtNumAtoms);
2668
        print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
2669
                        s_min, sqrtNumAtoms);
2670
    }
2671

    
2672
    finish_em(cr, outf, walltime_accounting, wcycle);
2673

    
2674
    /* To print the actual number of steps we needed somewhere */
2675
    inputrec->nsteps = count;
2676

    
2677
    walltime_accounting_set_nsteps_done(walltime_accounting, count);
2678
}
2679

    
2680
void
2681
Integrator::do_nm()
2682
{
2683
    const char          *NM = "Normal Mode Analysis";
2684
    gmx_mdoutf_t         outf;
2685
    int                  nnodes, node;
2686
    gmx_localtop_t      *top;
2687
    gmx_enerdata_t      *enerd;
2688
    gmx_global_stat_t    gstat;
2689
    t_graph             *graph;
2690
    tensor               vir, pres;
2691
    rvec                 mu_tot;
2692
    rvec                *dfdx;
2693
    gmx_bool             bSparse; /* use sparse matrix storage format */
2694
    size_t               sz;
2695
    gmx_sparsematrix_t * sparse_matrix           = nullptr;
2696
    real           *     full_matrix             = nullptr;
2697

    
2698
    /* added with respect to mdrun */
2699
    int                       row, col;
2700
    real                      der_range = 10.0*std::sqrt(GMX_REAL_EPS);
2701
    real                      x_min;
2702
    bool                      bIsMaster = MASTER(cr);
2703
    auto                      mdatoms   = mdAtoms->mdatoms();
2704

    
2705
    GMX_LOG(mdlog.info).asParagraph().
2706
        appendText("Note that activating normal-mode analysis via the integrator "
2707
                   ".mdp option and the command gmx mdrun may "
2708
                   "be available in a different form in a future version of GROMACS, "
2709
                   "e.g. gmx normal-modes.");
2710

    
2711
    if (constr != nullptr)
2712
    {
2713
        gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
2714
    }
2715

    
2716
    gmx_shellfc_t *shellfc;
2717

    
2718
    em_state_t     state_work {};
2719

    
2720
    /* Init em and store the local state in state_minimum */
2721
    init_em(fplog, mdlog, NM, cr, ms, outputProvider, inputrec, mdrunOptions,
2722
            state_global, top_global, &state_work, &top,
2723
            nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat,
2724
            vsite, constr, &shellfc,
2725
            nfile, fnm, &outf, nullptr, wcycle);
2726

    
2727
    std::vector<int>       atom_index = get_atom_index(top_global);
2728
    std::vector<gmx::RVec> fneg(atom_index.size(), {0, 0, 0});
2729
    snew(dfdx, atom_index.size());
2730

    
2731
#if !GMX_DOUBLE
2732
    if (bIsMaster)
2733
    {
2734
        fprintf(stderr,
2735
                "NOTE: This version of GROMACS has been compiled in single precision,\n"
2736
                "      which MIGHT not be accurate enough for normal mode analysis.\n"
2737
                "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
2738
                "      are fairly modest even if you recompile in double precision.\n\n");
2739
    }
2740
#endif
2741

    
2742
    /* Check if we can/should use sparse storage format.
2743
     *
2744
     * Sparse format is only useful when the Hessian itself is sparse, which it
2745
     * will be when we use a cutoff.
2746
     * For small systems (n<1000) it is easier to always use full matrix format, though.
2747
     */
2748
    if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0)
2749
    {
2750
        GMX_LOG(mdlog.warning).appendText("Non-cutoff electrostatics used, forcing full Hessian format.");
2751
        bSparse = FALSE;
2752
    }
2753
    else if (atom_index.size() < 1000)
2754
    {
2755
        GMX_LOG(mdlog.warning).appendTextFormatted("Small system size (N=%zu), using full Hessian format.",
2756
                                                   atom_index.size());
2757
        bSparse = FALSE;
2758
    }
2759
    else
2760
    {
2761
        GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format.");
2762
        bSparse = TRUE;
2763
    }
2764

    
2765
    /* Number of dimensions, based on real atoms, that is not vsites or shell */
2766
    sz = DIM*atom_index.size();
2767

    
2768
    fprintf(stderr, "Allocating Hessian memory...\n\n");
2769

    
2770
    if (bSparse)
2771
    {
2772
        sparse_matrix = gmx_sparsematrix_init(sz);
2773
        sparse_matrix->compressed_symmetric = TRUE;
2774
    }
2775
    else
2776
    {
2777
        snew(full_matrix, sz*sz);
2778
    }
2779

    
2780
    init_nrnb(nrnb);
2781

    
2782

    
2783
    /* Write start time and temperature */
2784
    print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
2785

    
2786
    /* fudge nr of steps to nr of atoms */
2787
    inputrec->nsteps = atom_index.size()*2;
2788

    
2789
    if (bIsMaster)
2790
    {
2791
        fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
2792
                *(top_global->name), inputrec->nsteps);
2793
    }
2794

    
2795
    nnodes = cr->nnodes;
2796

    
2797
    /* Make evaluate_energy do a single node force calculation */
2798
    cr->nnodes = 1;
2799
    EnergyEvaluator energyEvaluator {
2800
        fplog, mdlog, cr, ms,
2801
        top_global, top,
2802
        inputrec, nrnb, wcycle, gstat,
2803
        vsite, constr, fcd, graph,
2804
        mdAtoms, fr, ppForceWorkload, enerd
2805
    };
2806
    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
2807
    cr->nnodes = nnodes;
2808

    
2809
    /* if forces are not small, warn user */
2810
    get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work);
2811

    
2812
    GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax);
2813
    if (state_work.fmax > 1.0e-3)
2814
    {
2815
        GMX_LOG(mdlog.warning).appendText(
2816
                "The force is probably not small enough to "
2817
                "ensure that you are at a minimum.\n"
2818
                "Be aware that negative eigenvalues may occur\n"
2819
                "when the resulting matrix is diagonalized.");
2820
    }
2821

    
2822
    /***********************************************************
2823
     *
2824
     *      Loop over all pairs in matrix
2825
     *
2826
     *      do_force called twice. Once with positive and
2827
     *      once with negative displacement
2828
     *
2829
     ************************************************************/
2830

    
2831
    /* Steps are divided one by one over the nodes */
2832
    bool bNS          = true;
2833
    auto state_work_x = makeArrayRef(state_work.s.x);
2834
    auto state_work_f = makeArrayRef(state_work.f);
2835
    for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes)
2836
    {
2837
        size_t atom = atom_index[aid];
2838
        for (size_t d = 0; d < DIM; d++)
2839
        {
2840
            int64_t     step        = 0;
2841
            int         force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
2842
            double      t           = 0;
2843

    
2844
            x_min = state_work_x[atom][d];
2845

    
2846
            for (unsigned int dx = 0; (dx < 2); dx++)
2847
            {
2848
                if (dx == 0)
2849
                {
2850
                    state_work_x[atom][d] = x_min - der_range;
2851
                }
2852
                else
2853
                {
2854
                    state_work_x[atom][d] = x_min + der_range;
2855
                }
2856

    
2857
                /* Make evaluate_energy do a single node force calculation */
2858
                cr->nnodes = 1;
2859
                if (shellfc)
2860
                {
2861
                    /* Now is the time to relax the shells */
2862
                    relax_shell_flexcon(fplog,
2863
                                        cr,
2864
                                        ms,
2865
                                        mdrunOptions.verbose,
2866
                                        nullptr,
2867
                                        step,
2868
                                        inputrec,
2869
                                        bNS,
2870
                                        force_flags,
2871
                                        top,
2872
                                        constr,
2873
                                        enerd,
2874
                                        fcd,
2875
                                        &state_work.s,
2876
                                        state_work.f.arrayRefWithPadding(),
2877
                                        vir,
2878
                                        mdatoms,
2879
                                        nrnb,
2880
                                        wcycle,
2881
                                        graph,
2882
                                        &top_global->groups,
2883
                                        shellfc,
2884
                                        fr,
2885
                                        ppForceWorkload,
2886
                                        t,
2887
                                        mu_tot,
2888
                                        vsite,
2889
                                        DdOpenBalanceRegionBeforeForceComputation::no,
2890
                                        DdCloseBalanceRegionAfterForceComputation::no);
2891
                    bNS = false;
2892
                    step++;
2893
                }
2894
                else
2895
                {
2896
                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid*2+dx, FALSE);
2897
                }
2898

    
2899
                cr->nnodes = nnodes;
2900

    
2901
                if (dx == 0)
2902
                {
2903
                    std::copy(state_work_f.begin(), state_work_f.begin()+atom_index.size(), fneg.begin());
2904
                }
2905
            }
2906

    
2907
            /* x is restored to original */
2908
            state_work_x[atom][d] = x_min;
2909

    
2910
            for (size_t j = 0; j < atom_index.size(); j++)
2911
            {
2912
                for (size_t k = 0; (k < DIM); k++)
2913
                {
2914
                    dfdx[j][k] =
2915
                        -(state_work_f[atom_index[j]][k] - fneg[j][k])/(2*der_range);
2916
                }
2917
            }
2918

    
2919
            if (!bIsMaster)
2920
            {
2921
#if GMX_MPI
2922
#define mpi_type GMX_MPI_REAL
2923
                MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),
2924
                         cr->nodeid, cr->mpi_comm_mygroup);
2925
#endif
2926
            }
2927
            else
2928
            {
2929
                for (node = 0; (node < nnodes && aid+node < atom_index.size()); node++)
2930
                {
2931
                    if (node > 0)
2932
                    {
2933
#if GMX_MPI
2934
                        MPI_Status stat;
2935
                        MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,
2936
                                 cr->mpi_comm_mygroup, &stat);
2937
#undef mpi_type
2938
#endif
2939
                    }
2940

    
2941
                    row = (aid + node)*DIM + d;
2942

    
2943
                    for (size_t j = 0; j < atom_index.size(); j++)
2944
                    {
2945
                        for (size_t k = 0; k < DIM; k++)
2946
                        {
2947
                            col = j*DIM + k;
2948

    
2949
                            if (bSparse)
2950
                            {
2951
                                if (col >= row && dfdx[j][k] != 0.0)
2952
                                {
2953
                                    gmx_sparsematrix_increment_value(sparse_matrix,
2954
                                                                     row, col, dfdx[j][k]);
2955
                                }
2956
                            }
2957
                            else
2958
                            {
2959
                                full_matrix[row*sz+col] = dfdx[j][k];
2960
                            }
2961
                        }
2962
                    }
2963
                }
2964
            }
2965

    
2966
            if (mdrunOptions.verbose && fplog)
2967
            {
2968
                fflush(fplog);
2969
            }
2970
        }
2971
        /* write progress */
2972
        if (bIsMaster && mdrunOptions.verbose)
2973
        {
2974
            fprintf(stderr, "\rFinished step %d out of %d",
2975
                    static_cast<int>(std::min(atom+nnodes, atom_index.size())),
2976
                    static_cast<int>(atom_index.size()));
2977
            fflush(stderr);
2978
        }
2979
    }
2980

    
2981
    if (bIsMaster)
2982
    {
2983
        fprintf(stderr, "\n\nWriting Hessian...\n");
2984
        gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
2985
    }
2986

    
2987
    finish_em(cr, outf, walltime_accounting, wcycle);
2988

    
2989
    walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);
2990
}
2991

    
2992
} // namespace gmx