minimize.cpp
1 
/*


2 
* This file is part of the GROMACS molecular simulation package.

3 
*

4 
* Copyright (c) 19912000, University of Groningen, The Netherlands.

5 
* Copyright (c) 20012004, The GROMACS development team.

6 
* Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by

7 
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,

8 
* and including many others, as listed in the AUTHORS file in the

9 
* toplevel source directory and at http://www.gromacs.org.

10 
*

11 
* GROMACS is free software; you can redistribute it and/or

12 
* modify it under the terms of the GNU Lesser General Public License

13 
* as published by the Free Software Foundation; either version 2.1

14 
* of the License, or (at your option) any later version.

15 
*

16 
* GROMACS is distributed in the hope that it will be useful,

17 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

18 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

19 
* Lesser General Public License for more details.

20 
*

21 
* You should have received a copy of the GNU Lesser General Public

22 
* License along with GROMACS; if not, see

23 
* http://www.gnu.org/licenses, or write to the Free Software Foundation,

24 
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA.

25 
*

26 
* If you want to redistribute modifications to GROMACS, please

27 
* consider that scientific software is very special. Version

28 
* control is crucial  bugs must be traceable. We will be happy to

29 
* consider code for inclusion in the official distribution, but

30 
* derived work must not be called official GROMACS. Details are found

31 
* in the README & COPYING files  if they are missing, get the

32 
* official version at http://www.gromacs.org.

33 
*

34 
* To help us fund GROMACS development, we humbly ask that you cite

35 
* the research papers on the package. Check out http://www.gromacs.org.

36 
*/

37 
/*! \internal \file

38 
*

39 
* \brief This file defines integrators for energy minimization

40 
*

41 
* \author Berk Hess <hess@kth.se>

42 
* \author Erik Lindahl <erik@kth.se>

43 
* \ingroup module_mdrun

44 
*/

45 
#include "gmxpre.h" 
46  
47 
#include "config.h" 
48  
49 
#include <cmath> 
50 
#include <cstring> 
51 
#include <ctime> 
52  
53 
#include <algorithm> 
54 
#include <vector> 
55  
56 
#include "gromacs/commandline/filenm.h" 
57 
#include "gromacs/domdec/collect.h" 
58 
#include "gromacs/domdec/dlbtiming.h" 
59 
#include "gromacs/domdec/domdec.h" 
60 
#include "gromacs/domdec/domdec_struct.h" 
61 
#include "gromacs/domdec/partition.h" 
62 
#include "gromacs/ewald/pme.h" 
63 
#include "gromacs/fileio/confio.h" 
64 
#include "gromacs/fileio/mtxio.h" 
65 
#include "gromacs/gmxlib/network.h" 
66 
#include "gromacs/gmxlib/nrnb.h" 
67 
#include "gromacs/imd/imd.h" 
68 
#include "gromacs/linearalgebra/sparsematrix.h" 
69 
#include "gromacs/listed_forces/manage_threading.h" 
70 
#include "gromacs/math/functions.h" 
71 
#include "gromacs/math/vec.h" 
72 
#include "gromacs/mdlib/constr.h" 
73 
#include "gromacs/mdlib/dispersioncorrection.h" 
74 
#include "gromacs/mdlib/ebin.h" 
75 
#include "gromacs/mdlib/energyoutput.h" 
76 
#include "gromacs/mdlib/force.h" 
77 
#include "gromacs/mdlib/forcerec.h" 
78 
#include "gromacs/mdlib/gmx_omp_nthreads.h" 
79 
#include "gromacs/mdlib/md_support.h" 
80 
#include "gromacs/mdlib/mdatoms.h" 
81 
#include "gromacs/mdlib/mdsetup.h" 
82 
#include "gromacs/mdlib/ns.h" 
83 
#include "gromacs/mdlib/shellfc.h" 
84 
#include "gromacs/mdlib/stat.h" 
85 
#include "gromacs/mdlib/tgroup.h" 
86 
#include "gromacs/mdlib/trajectory_writing.h" 
87 
#include "gromacs/mdlib/update.h" 
88 
#include "gromacs/mdlib/vsite.h" 
89 
#include "gromacs/mdrunutility/printtime.h" 
90 
#include "gromacs/mdtypes/commrec.h" 
91 
#include "gromacs/mdtypes/inputrec.h" 
92 
#include "gromacs/mdtypes/md_enums.h" 
93 
#include "gromacs/mdtypes/mdrunoptions.h" 
94 
#include "gromacs/mdtypes/state.h" 
95 
#include "gromacs/pbcutil/mshift.h" 
96 
#include "gromacs/pbcutil/pbc.h" 
97 
#include "gromacs/timing/wallcycle.h" 
98 
#include "gromacs/timing/walltime_accounting.h" 
99 
#include "gromacs/topology/mtop_util.h" 
100 
#include "gromacs/topology/topology.h" 
101 
#include "gromacs/utility/cstringutil.h" 
102 
#include "gromacs/utility/exceptions.h" 
103 
#include "gromacs/utility/fatalerror.h" 
104 
#include "gromacs/utility/logger.h" 
105 
#include "gromacs/utility/smalloc.h" 
106  
107 
#include "integrator.h" 
108  
109 
//! Utility structure for manipulating states during EM

110 
typedef struct { 
111 
//! Copy of the global state

112 
t_state s; 
113 
//! Force array

114 
PaddedVector<gmx::RVec> f; 
115 
//! Potential energy

116 
real epot; 
117 
//! Norm of the force

118 
real fnorm; 
119 
//! Maximum force

120 
real fmax; 
121 
//! Direction

122 
int a_fmax;

123 
} em_state_t; 
124  
125 
//! Print the EM starting conditions

126 
static void print_em_start(FILE *fplog, 
127 
const t_commrec *cr,

128 
gmx_walltime_accounting_t walltime_accounting, 
129 
gmx_wallcycle_t wcycle, 
130 
const char *name) 
131 
{ 
132 
walltime_accounting_start_time(walltime_accounting); 
133 
wallcycle_start(wcycle, ewcRUN); 
134 
print_start(fplog, cr, walltime_accounting, name); 
135 
} 
136  
137 
//! Stop counting time for EM

138 
static void em_time_end(gmx_walltime_accounting_t walltime_accounting, 
139 
gmx_wallcycle_t wcycle) 
140 
{ 
141 
wallcycle_stop(wcycle, ewcRUN); 
142  
143 
walltime_accounting_end_time(walltime_accounting); 
144 
} 
145  
146 
//! Printing a log file and console header

147 
static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) 
148 
{ 
149 
fprintf(out, "\n");

150 
fprintf(out, "%s:\n", minimizer);

151 
fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol);

152 
fprintf(out, " Number of steps = %12d\n", nsteps);

153 
} 
154  
155 
//! Print warning message

156 
static void warn_step(FILE *fp, 
157 
real ftol, 
158 
real fmax, 
159 
gmx_bool bLastStep, 
160 
gmx_bool bConstrain) 
161 
{ 
162 
constexpr bool realIsDouble = GMX_DOUBLE; 
163 
char buffer[2048]; 
164  
165 
if (!std::isfinite(fmax))

166 
{ 
167 
sprintf(buffer, 
168 
"\nEnergy minimization has stopped because the force "

169 
"on at least one atom is not finite. This usually means "

170 
"atoms are overlapping. Modify the input coordinates to "

171 
"remove atom overlap or use softcore potentials with "

172 
"the free energy code to avoid infinite forces.\n%s",

173 
!realIsDouble ? 
174 
"You could also be lucky that switching to double precision "

175 
"is sufficient to obtain finite forces.\n" :

176 
"");

177 
} 
178 
else if (bLastStep) 
179 
{ 
180 
sprintf(buffer, 
181 
"\nEnergy minimization reached the maximum number "

182 
"of steps before the forces reached the requested "

183 
"precision Fmax < %g.\n", ftol);

184 
} 
185 
else

186 
{ 
187 
sprintf(buffer, 
188 
"\nEnergy minimization has stopped, but the forces have "

189 
"not converged to the requested precision Fmax < %g (which "

190 
"may not be possible for your system). It stopped "

191 
"because the algorithm tried to make a new step whose size "

192 
"was too small, or there was no change in the energy since "

193 
"last step. Either way, we regard the minimization as "

194 
"converged to within the available machine precision, "

195 
"given your starting configuration and EM parameters.\n%s%s",

196 
ftol, 
197 
!realIsDouble ? 
198 
"\nDouble precision normally gives you higher accuracy, but "

199 
"this is often not needed for preparing to run molecular "

200 
"dynamics.\n" :

201 
"",

202 
bConstrain ? 
203 
"You might need to increase your constraint accuracy, or turn\n"

204 
"off constraints altogether (set constraints = none in mdp file)\n" :

205 
"");

206 
} 
207  
208 
fputs(wrap_lines(buffer, 78, 0, FALSE), stderr); 
209 
fputs(wrap_lines(buffer, 78, 0, FALSE), fp); 
210 
} 
211  
212 
//! Print message about convergence of the EM

213 
static void print_converged(FILE *fp, const char *alg, real ftol, 
214 
int64_t count, gmx_bool bDone, int64_t nsteps, 
215 
const em_state_t *ems, double sqrtNumAtoms) 
216 
{ 
217 
char buf[STEPSTRSIZE];

218  
219 
if (bDone)

220 
{ 
221 
fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",

222 
alg, ftol, gmx_step_str(count, buf)); 
223 
} 
224 
else if (count < nsteps) 
225 
{ 
226 
fprintf(fp, "\n%s converged to machine precision in %s steps,\n"

227 
"but did not reach the requested Fmax < %g.\n",

228 
alg, gmx_step_str(count, buf), ftol); 
229 
} 
230 
else

231 
{ 
232 
fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",

233 
alg, ftol, gmx_step_str(count, buf)); 
234 
} 
235  
236 
#if GMX_DOUBLE

237 
fprintf(fp, "Potential Energy = %21.14e\n", ems>epot);

238 
fprintf(fp, "Maximum force = %21.14e on atom %d\n", ems>fmax, ems>a_fmax + 1); 
239 
fprintf(fp, "Norm of force = %21.14e\n", ems>fnorm/sqrtNumAtoms);

240 
#else

241 
fprintf(fp, "Potential Energy = %14.7e\n", ems>epot);

242 
fprintf(fp, "Maximum force = %14.7e on atom %d\n", ems>fmax, ems>a_fmax + 1); 
243 
fprintf(fp, "Norm of force = %14.7e\n", ems>fnorm/sqrtNumAtoms);

244 
#endif

245 
} 
246  
247 
//! Compute the norm and max of the force array in parallel

248 
static void get_f_norm_max(const t_commrec *cr, 
249 
t_grpopts *opts, t_mdatoms *mdatoms, const rvec *f,

250 
real *fnorm, real *fmax, int *a_fmax)

251 
{ 
252 
double fnorm2, *sum;

253 
real fmax2, fam; 
254 
int la_max, a_max, start, end, i, m, gf;

255  
256 
/* This routine finds the largest force and returns it.

257 
* On parallel machines the global max is taken.

258 
*/

259 
fnorm2 = 0;

260 
fmax2 = 0;

261 
la_max = 1;

262 
start = 0;

263 
end = mdatoms>homenr; 
264 
if (mdatoms>cFREEZE)

265 
{ 
266 
for (i = start; i < end; i++)

267 
{ 
268 
gf = mdatoms>cFREEZE[i]; 
269 
fam = 0;

270 
for (m = 0; m < DIM; m++) 
271 
{ 
272 
if (!opts>nFreeze[gf][m])

273 
{ 
274 
fam += gmx::square(f[i][m]); 
275 
} 
276 
} 
277 
fnorm2 += fam; 
278 
if (fam > fmax2)

279 
{ 
280 
fmax2 = fam; 
281 
la_max = i; 
282 
} 
283 
} 
284 
} 
285 
else

286 
{ 
287 
for (i = start; i < end; i++)

288 
{ 
289 
fam = norm2(f[i]); 
290 
fnorm2 += fam; 
291 
if (fam > fmax2)

292 
{ 
293 
fmax2 = fam; 
294 
la_max = i; 
295 
} 
296 
} 
297 
} 
298  
299 
if (la_max >= 0 && DOMAINDECOMP(cr)) 
300 
{ 
301 
a_max = cr>dd>globalAtomIndices[la_max]; 
302 
} 
303 
else

304 
{ 
305 
a_max = la_max; 
306 
} 
307 
if (PAR(cr))

308 
{ 
309 
snew(sum, 2*cr>nnodes+1); 
310 
sum[2*cr>nodeid] = fmax2;

311 
sum[2*cr>nodeid+1] = a_max; 
312 
sum[2*cr>nnodes] = fnorm2;

313 
gmx_sumd(2*cr>nnodes+1, sum, cr); 
314 
fnorm2 = sum[2*cr>nnodes];

315 
/* Determine the global maximum */

316 
for (i = 0; i < cr>nnodes; i++) 
317 
{ 
318 
if (sum[2*i] > fmax2) 
319 
{ 
320 
fmax2 = sum[2*i];

321 
a_max = gmx::roundToInt(sum[2*i+1]); 
322 
} 
323 
} 
324 
sfree(sum); 
325 
} 
326  
327 
if (fnorm)

328 
{ 
329 
*fnorm = sqrt(fnorm2); 
330 
} 
331 
if (fmax)

332 
{ 
333 
*fmax = sqrt(fmax2); 
334 
} 
335 
if (a_fmax)

336 
{ 
337 
*a_fmax = a_max; 
338 
} 
339 
} 
340  
341 
//! Compute the norm of the force

342 
static void get_state_f_norm_max(const t_commrec *cr, 
343 
t_grpopts *opts, t_mdatoms *mdatoms, 
344 
em_state_t *ems) 
345 
{ 
346 
get_f_norm_max(cr, opts, mdatoms, ems>f.rvec_array(), 
347 
&ems>fnorm, &ems>fmax, &ems>a_fmax); 
348 
} 
349  
350 
//! Initialize the energy minimization

351 
static void init_em(FILE *fplog, 
352 
const gmx::MDLogger &mdlog,

353 
const char *title, 
354 
const t_commrec *cr,

355 
const gmx_multisim_t *ms,

356 
t_inputrec *ir, 
357 
const gmx::MdrunOptions &mdrunOptions,

358 
t_state *state_global, gmx_mtop_t *top_global, 
359 
em_state_t *ems, gmx_localtop_t *top, 
360 
t_nrnb *nrnb, 
361 
t_forcerec *fr, 
362 
t_graph **graph, gmx::MDAtoms *mdAtoms, gmx_global_stat_t *gstat, 
363 
gmx_vsite_t *vsite, gmx::Constraints *constr, gmx_shellfc_t **shellfc, 
364 
int nfile, const t_filenm fnm[]) 
365 
{ 
366 
real dvdl_constr; 
367  
368 
if (fplog)

369 
{ 
370 
fprintf(fplog, "Initiating %s\n", title);

371 
} 
372  
373 
if (MASTER(cr))

374 
{ 
375 
state_global>ngtc = 0;

376 
} 
377 
initialize_lambdas(fplog, *ir, MASTER(cr), &(state_global>fep_state), state_global>lambda, nullptr);

378  
379 
init_nrnb(nrnb); 
380  
381 
/* Interactive molecular dynamics */

382 
init_IMD(ir, cr, ms, top_global, fplog, 1,

383 
MASTER(cr) ? state_global>x.rvec_array() : nullptr,

384 
nfile, fnm, nullptr, mdrunOptions);

385  
386 
if (ir>eI == eiNM)

387 
{ 
388 
GMX_ASSERT(shellfc != nullptr, "With NM we always support shells"); 
389  
390 
*shellfc = init_shell_flexcon(stdout, 
391 
top_global, 
392 
constr ? constr>numFlexibleConstraints() : 0,

393 
ir>nstcalcenergy, 
394 
DOMAINDECOMP(cr)); 
395 
} 
396 
else

397 
{ 
398 
GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir>eI), "This else currently only handles energy minimizers, consider if your algorithm needs shell/flexibleconstraint support");

399  
400 
/* With energy minimization, shells and flexible constraints are

401 
* automatically minimized when treated like normal DOFS.

402 
*/

403 
if (shellfc != nullptr) 
404 
{ 
405 
*shellfc = nullptr;

406 
} 
407 
} 
408  
409 
auto mdatoms = mdAtoms>mdatoms();

410 
if (DOMAINDECOMP(cr))

411 
{ 
412 
top>useInDomainDecomp_ = true;

413 
dd_init_local_top(*top_global, top); 
414  
415 
dd_init_local_state(cr>dd, state_global, &ems>s); 
416  
417 
/* Distribute the charge groups over the nodes from the master node */

418 
dd_partition_system(fplog, mdlog, ir>init_step, cr, TRUE, 1,

419 
state_global, *top_global, ir, 
420 
&ems>s, &ems>f, mdAtoms, top, 
421 
fr, vsite, constr, 
422 
nrnb, nullptr, FALSE);

423 
dd_store_state(cr>dd, &ems>s); 
424  
425 
*graph = nullptr;

426 
} 
427 
else

428 
{ 
429 
state_change_natoms(state_global, state_global>natoms); 
430 
/* Just copy the state */

431 
ems>s = *state_global; 
432 
state_change_natoms(&ems>s, ems>s.natoms); 
433 
ems>f.resizeWithPadding(ems>s.natoms); 
434  
435 
mdAlgorithmsSetupAtomData(cr, ir, *top_global, top, fr, 
436 
graph, mdAtoms, 
437 
constr, vsite, shellfc ? *shellfc : nullptr);

438  
439 
if (vsite)

440 
{ 
441 
set_vsite_top(vsite, top, mdatoms); 
442 
} 
443 
} 
444  
445 
update_mdatoms(mdAtoms>mdatoms(), ems>s.lambda[efptMASS]); 
446  
447 
if (constr)

448 
{ 
449 
// TODO how should this crossmodule support dependency be managed?

450 
if (ir>eConstrAlg == econtSHAKE &&

451 
gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)

452 
{ 
453 
gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",

454 
econstr_names[econtSHAKE], econstr_names[econtLINCS]); 
455 
} 
456  
457 
if (!ir>bContinuation)

458 
{ 
459 
/* Constrain the starting coordinates */

460 
dvdl_constr = 0;

461 
constr>apply(TRUE, TRUE, 
462 
1, 0, 1.0, 
463 
ems>s.x.rvec_array(), 
464 
ems>s.x.rvec_array(), 
465 
nullptr,

466 
ems>s.box, 
467 
ems>s.lambda[efptFEP], &dvdl_constr, 
468 
nullptr, nullptr, gmx::ConstraintVariable::Positions); 
469 
} 
470 
} 
471  
472 
if (PAR(cr))

473 
{ 
474 
*gstat = global_stat_init(ir); 
475 
} 
476 
else

477 
{ 
478 
*gstat = nullptr;

479 
} 
480  
481 
calc_shifts(ems>s.box, fr>shift_vec); 
482 
} 
483  
484 
//! Finalize the minimization

485 
static void finish_em(const t_commrec *cr, gmx_mdoutf_t outf, 
486 
gmx_walltime_accounting_t walltime_accounting, 
487 
gmx_wallcycle_t wcycle) 
488 
{ 
489 
if (!thisRankHasDuty(cr, DUTY_PME))

490 
{ 
491 
/* Tell the PME only node to finish */

492 
gmx_pme_send_finish(cr); 
493 
} 
494  
495 
done_mdoutf(outf); 
496  
497 
em_time_end(walltime_accounting, wcycle); 
498 
} 
499  
500 
//! Swap two different EM states during minimization

501 
static void swap_em_state(em_state_t **ems1, em_state_t **ems2) 
502 
{ 
503 
em_state_t *tmp; 
504  
505 
tmp = *ems1; 
506 
*ems1 = *ems2; 
507 
*ems2 = tmp; 
508 
} 
509  
510 
//! Save the EM trajectory

511 
static void write_em_traj(FILE *fplog, const t_commrec *cr, 
512 
gmx_mdoutf_t outf, 
513 
gmx_bool bX, gmx_bool bF, const char *confout, 
514 
gmx_mtop_t *top_global, 
515 
t_inputrec *ir, int64_t step, 
516 
em_state_t *state, 
517 
t_state *state_global, 
518 
ObservablesHistory *observablesHistory) 
519 
{ 
520 
int mdof_flags = 0; 
521  
522 
if (bX)

523 
{ 
524 
mdof_flags = MDOF_X; 
525 
} 
526 
if (bF)

527 
{ 
528 
mdof_flags = MDOF_F; 
529 
} 
530  
531 
/* If we want IMD output, set appropriate MDOF flag */

532 
if (ir>bIMD)

533 
{ 
534 
mdof_flags = MDOF_IMD; 
535 
} 
536  
537 
mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, 
538 
top_global, step, static_cast<double>(step), 
539 
&state>s, state_global, observablesHistory, 
540 
state>f); 
541  
542 
if (confout != nullptr) 
543 
{ 
544 
if (DOMAINDECOMP(cr))

545 
{ 
546 
/* If bX=true, x was collected to state_global in the call above */

547 
if (!bX)

548 
{ 
549 
auto globalXRef = MASTER(cr) ? state_global>x : gmx::ArrayRef<gmx::RVec>();

550 
dd_collect_vec(cr>dd, &state>s, state>s.x, globalXRef); 
551 
} 
552 
} 
553 
else

554 
{ 
555 
/* Copy the local state pointer */

556 
state_global = &state>s; 
557 
} 
558  
559 
if (MASTER(cr))

560 
{ 
561 
if (ir>ePBC != epbcNONE && !ir>bPeriodicMols && DOMAINDECOMP(cr))

562 
{ 
563 
/* Make molecules whole only for confout writing */

564 
do_pbc_mtop(ir>ePBC, state>s.box, top_global, 
565 
state_global>x.rvec_array()); 
566 
} 
567  
568 
write_sto_conf_mtop(confout, 
569 
*top_global>name, top_global, 
570 
state_global>x.rvec_array(), nullptr, ir>ePBC, state>s.box);

571 
} 
572 
} 
573 
} 
574  
575 
//! \brief Do one minimization step

576 
//

577 
// \returns true when the step succeeded, false when a constraint error occurred

578 
static bool do_em_step(const t_commrec *cr, 
579 
t_inputrec *ir, t_mdatoms *md, 
580 
em_state_t *ems1, real a, const PaddedVector<gmx::RVec> *force,

581 
em_state_t *ems2, 
582 
gmx::Constraints *constr, 
583 
int64_t count) 
584  
585 
{ 
586 
t_state *s1, *s2; 
587 
int start, end;

588 
real dvdl_constr; 
589 
int nthreads gmx_unused;

590  
591 
bool validStep = true; 
592  
593 
s1 = &ems1>s; 
594 
s2 = &ems2>s; 
595  
596 
if (DOMAINDECOMP(cr) && s1>ddp_count != cr>dd>ddp_count)

597 
{ 
598 
gmx_incons("state mismatch in do_em_step");

599 
} 
600  
601 
s2>flags = s1>flags; 
602  
603 
if (s2>natoms != s1>natoms)

604 
{ 
605 
state_change_natoms(s2, s1>natoms); 
606 
ems2>f.resizeWithPadding(s2>natoms); 
607 
} 
608 
if (DOMAINDECOMP(cr) && s2>cg_gl.size() != s1>cg_gl.size())

609 
{ 
610 
s2>cg_gl.resize(s1>cg_gl.size()); 
611 
} 
612  
613 
copy_mat(s1>box, s2>box); 
614 
/* Copy free energy state */

615 
s2>lambda = s1>lambda; 
616 
copy_mat(s1>box, s2>box); 
617  
618 
start = 0;

619 
end = md>homenr; 
620  
621 
nthreads = gmx_omp_nthreads_get(emntUpdate); 
622 
#pragma omp parallel num_threads(nthreads)

623 
{ 
624 
const rvec *x1 = s1>x.rvec_array();

625 
rvec *x2 = s2>x.rvec_array(); 
626 
const rvec *f = force>rvec_array();

627  
628 
int gf = 0; 
629 
#pragma omp for schedule(static) nowait 
630 
for (int i = start; i < end; i++) 
631 
{ 
632 
try

633 
{ 
634 
if (md>cFREEZE)

635 
{ 
636 
gf = md>cFREEZE[i]; 
637 
} 
638 
for (int m = 0; m < DIM; m++) 
639 
{ 
640 
if (ir>opts.nFreeze[gf][m])

641 
{ 
642 
x2[i][m] = x1[i][m]; 
643 
} 
644 
else

645 
{ 
646 
x2[i][m] = x1[i][m] + a*f[i][m]; 
647 
} 
648 
} 
649 
} 
650 
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; 
651 
} 
652  
653 
if (s2>flags & (1<<estCGP)) 
654 
{ 
655 
/* Copy the CG p vector */

656 
const rvec *p1 = s1>cg_p.rvec_array();

657 
rvec *p2 = s2>cg_p.rvec_array(); 
658 
#pragma omp for schedule(static) nowait 
659 
for (int i = start; i < end; i++) 
660 
{ 
661 
// Trivial OpenMP block that does not throw

662 
copy_rvec(p1[i], p2[i]); 
663 
} 
664 
} 
665  
666 
if (DOMAINDECOMP(cr))

667 
{ 
668 
s2>ddp_count = s1>ddp_count; 
669  
670 
/* OpenMP does not supported unsigned loop variables */

671 
#pragma omp for schedule(static) nowait 
672 
for (int i = 0; i < gmx::ssize(s2>cg_gl); i++) 
673 
{ 
674 
s2>cg_gl[i] = s1>cg_gl[i]; 
675 
} 
676 
s2>ddp_count_cg_gl = s1>ddp_count_cg_gl; 
677 
} 
678 
} 
679  
680 
if (constr)

681 
{ 
682 
dvdl_constr = 0;

683 
validStep = 
684 
constr>apply(TRUE, TRUE, 
685 
count, 0, 1.0, 
686 
s1>x.rvec_array(), s2>x.rvec_array(), 
687 
nullptr, s2>box,

688 
s2>lambda[efptBONDED], &dvdl_constr, 
689 
nullptr, nullptr, gmx::ConstraintVariable::Positions); 
690  
691 
if (cr>nnodes > 1) 
692 
{ 
693 
/* This global reduction will affect performance at high

694 
* parallelization, but we can not really avoid it.

695 
* But usually EM is not run at high parallelization.

696 
*/

697 
int reductionBuffer = static_cast<int>(!validStep); 
698 
gmx_sumi(1, &reductionBuffer, cr);

699 
validStep = (reductionBuffer == 0);

700 
} 
701  
702 
// We should move this check to the different minimizers

703 
if (!validStep && ir>eI != eiSteep)

704 
{ 
705 
gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",

706 
EI(ir>eI), EI(eiSteep), EI(ir>eI)); 
707 
} 
708 
} 
709  
710 
return validStep;

711 
} 
712  
713 
//! Prepare EM for using domain decomposition parallellization

714 
static void em_dd_partition_system(FILE *fplog, 
715 
const gmx::MDLogger &mdlog,

716 
int step, const t_commrec *cr, 
717 
gmx_mtop_t *top_global, t_inputrec *ir, 
718 
em_state_t *ems, gmx_localtop_t *top, 
719 
gmx::MDAtoms *mdAtoms, t_forcerec *fr, 
720 
gmx_vsite_t *vsite, gmx::Constraints *constr, 
721 
t_nrnb *nrnb, gmx_wallcycle_t wcycle) 
722 
{ 
723 
/* Repartition the domain decomposition */

724 
dd_partition_system(fplog, mdlog, step, cr, FALSE, 1,

725 
nullptr, *top_global, ir,

726 
&ems>s, &ems>f, 
727 
mdAtoms, top, fr, vsite, constr, 
728 
nrnb, wcycle, FALSE); 
729 
dd_store_state(cr>dd, &ems>s); 
730 
} 
731  
732 
namespace

733 
{ 
734  
735 
/*! \brief Class to handle the work of setting and doing an energy evaluation.

736 
*

737 
* This class is a mere aggregate of parameters to pass to evaluate an

738 
* energy, so that future changes to names and types of them consume

739 
* less time when refactoring other code.

740 
*

741 
* Aggregate initialization is used, for which the chief risk is that

742 
* if a member is added at the end and not all initializer lists are

743 
* updated, then the member will be value initialized, which will

744 
* typically mean initialization to zero.

745 
*

746 
* We only want to construct one of these with an initializer list, so

747 
* we explicitly delete the default constructor. */

748 
class EnergyEvaluator 
749 
{ 
750 
public:

751 
//! We only intend to construct such objects with an initializer list.

752 
EnergyEvaluator() = delete;

753 
/*! \brief Evaluates an energy on the state in \c ems.

754 
*

755 
* \todo In practice, the same objects mu_tot, vir, and pres

756 
* are always passed to this function, so we would rather have

757 
* them as data members. However, their Carray types are

758 
* unsuited for aggregate initialization. When the types

759 
* improve, the call signature of this method can be reduced.

760 
*/

761 
void run(em_state_t *ems, rvec mu_tot,

762 
tensor vir, tensor pres, 
763 
int64_t count, gmx_bool bFirst); 
764 
//! Handles logging (deprecated).

765 
FILE *fplog; 
766 
//! Handles logging.

767 
const gmx::MDLogger &mdlog;

768 
//! Handles communication.

769 
const t_commrec *cr;

770 
//! Coordinates multisimulations.

771 
const gmx_multisim_t *ms;

772 
//! Holds the simulation topology.

773 
gmx_mtop_t *top_global; 
774 
//! Holds the domain topology.

775 
gmx_localtop_t *top; 
776 
//! User input options.

777 
t_inputrec *inputrec; 
778 
//! Manages flop accounting.

779 
t_nrnb *nrnb; 
780 
//! Manages wall cycle accounting.

781 
gmx_wallcycle_t wcycle; 
782 
//! Coordinates global reduction.

783 
gmx_global_stat_t gstat; 
784 
//! Handles virtual sites.

785 
gmx_vsite_t *vsite; 
786 
//! Handles constraints.

787 
gmx::Constraints *constr; 
788 
//! Handles strange things.

789 
t_fcdata *fcd; 
790 
//! Molecular graph for SHAKE.

791 
t_graph *graph; 
792 
//! Peratom data for this domain.

793 
gmx::MDAtoms *mdAtoms; 
794 
//! Handles how to calculate the forces.

795 
t_forcerec *fr; 
796 
//! Schedule of forcecalculation work each step for this task.

797 
gmx::PpForceWorkload *ppForceWorkload; 
798 
//! Stores the computed energies.

799 
gmx_enerdata_t *enerd; 
800 
}; 
801  
802 
void

803 
EnergyEvaluator::run(em_state_t *ems, rvec mu_tot, 
804 
tensor vir, tensor pres, 
805 
int64_t count, gmx_bool bFirst) 
806 
{ 
807 
real t; 
808 
gmx_bool bNS; 
809 
tensor force_vir, shake_vir, ekin; 
810 
real dvdl_constr, prescorr, enercorr, dvdlcorr; 
811 
real terminate = 0;

812  
813 
/* Set the time to the initial time, the time does not change during EM */

814 
t = inputrec>init_t; 
815  
816 
if (bFirst 

817 
(DOMAINDECOMP(cr) && ems>s.ddp_count < cr>dd>ddp_count)) 
818 
{ 
819 
/* This is the first state or an old state used before the last ns */

820 
bNS = TRUE; 
821 
} 
822 
else

823 
{ 
824 
bNS = FALSE; 
825 
if (inputrec>nstlist > 0) 
826 
{ 
827 
bNS = TRUE; 
828 
} 
829 
} 
830  
831 
if (vsite)

832 
{ 
833 
construct_vsites(vsite, ems>s.x.rvec_array(), 1, nullptr, 
834 
top>idef.iparams, top>idef.il, 
835 
fr>ePBC, fr>bMolPBC, cr, ems>s.box); 
836 
} 
837  
838 
if (DOMAINDECOMP(cr) && bNS)

839 
{ 
840 
/* Repartition the domain decomposition */

841 
em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, 
842 
ems, top, mdAtoms, fr, vsite, constr, 
843 
nrnb, wcycle); 
844 
} 
845  
846 
/* Calc force & energy on new trial position */

847 
/* do_force always puts the charge groups in the box and shifts again

848 
* We do not unshift, so molecules are always whole in congrad.c

849 
*/

850 
do_force(fplog, cr, ms, inputrec, nullptr, nullptr, 
851 
count, nrnb, wcycle, top, &top_global>groups, 
852 
ems>s.box, ems>s.x.arrayRefWithPadding(), &ems>s.hist, 
853 
ems>f.arrayRefWithPadding(), force_vir, mdAtoms>mdatoms(), enerd, fcd, 
854 
ems>s.lambda, graph, fr, ppForceWorkload, vsite, mu_tot, t, nullptr,

855 
GMX_FORCE_STATECHANGED  GMX_FORCE_ALLFORCES  
856 
GMX_FORCE_VIRIAL  GMX_FORCE_ENERGY  
857 
(bNS ? GMX_FORCE_NS : 0),

858 
DDBalanceRegionHandler(cr)); 
859  
860 
/* Clear the unused shake virial and pressure */

861 
clear_mat(shake_vir); 
862 
clear_mat(pres); 
863  
864 
/* Communicate stuff when parallel */

865 
if (PAR(cr) && inputrec>eI != eiNM)

866 
{ 
867 
wallcycle_start(wcycle, ewcMoveE); 
868  
869 
global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, 
870 
inputrec, nullptr, nullptr, nullptr, 1, &terminate, 
871 
nullptr, FALSE,

872 
CGLO_ENERGY  
873 
CGLO_PRESSURE  
874 
CGLO_CONSTRAINT); 
875  
876 
wallcycle_stop(wcycle, ewcMoveE); 
877 
} 
878  
879 
/* Calculate long range corrections to pressure and energy */

880 
calc_dispcorr(inputrec, fr, ems>s.box, ems>s.lambda[efptVDW], 
881 
pres, force_vir, &prescorr, &enercorr, &dvdlcorr); 
882 
enerd>term[F_DISPCORR] = enercorr; 
883 
enerd>term[F_EPOT] += enercorr; 
884 
enerd>term[F_PRES] += prescorr; 
885 
enerd>term[F_DVDL] += dvdlcorr; 
886  
887 
ems>epot = enerd>term[F_EPOT]; 
888  
889 
if (constr)

890 
{ 
891 
/* Project out the constraint components of the force */

892 
dvdl_constr = 0;

893 
rvec *f_rvec = ems>f.rvec_array(); 
894 
constr>apply(FALSE, FALSE, 
895 
count, 0, 1.0, 
896 
ems>s.x.rvec_array(), f_rvec, f_rvec, 
897 
ems>s.box, 
898 
ems>s.lambda[efptBONDED], &dvdl_constr, 
899 
nullptr, &shake_vir, gmx::ConstraintVariable::ForceDispl);

900 
enerd>term[F_DVDL_CONSTR] += dvdl_constr; 
901 
m_add(force_vir, shake_vir, vir); 
902 
} 
903 
else

904 
{ 
905 
copy_mat(force_vir, vir); 
906 
} 
907  
908 
clear_mat(ekin); 
909 
enerd>term[F_PRES] = 
910 
calc_pres(fr>ePBC, inputrec>nwall, ems>s.box, ekin, vir, pres); 
911  
912 
sum_dhdl(enerd, ems>s.lambda, inputrec>fepvals); 
913  
914 
if (EI_ENERGY_MINIMIZATION(inputrec>eI))

915 
{ 
916 
get_state_f_norm_max(cr, &(inputrec>opts), mdAtoms>mdatoms(), ems); 
917 
} 
918 
} 
919  
920 
} // namespace

921  
922 
//! Parallel utility summing energies and forces

923 
static double reorder_partsum(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, 
924 
gmx_mtop_t *top_global, 
925 
em_state_t *s_min, em_state_t *s_b) 
926 
{ 
927 
t_block *cgs_gl; 
928 
int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;

929 
double partsum;

930 
unsigned char *grpnrFREEZE; 
931  
932 
if (debug)

933 
{ 
934 
fprintf(debug, "Doing reorder_partsum\n");

935 
} 
936  
937 
const rvec *fm = s_min>f.rvec_array();

938 
const rvec *fb = s_b>f.rvec_array();

939  
940 
cgs_gl = dd_charge_groups_global(cr>dd); 
941 
index = cgs_gl>index; 
942  
943 
/* Collect fm in a global vector fmg.

944 
* This conflicts with the spirit of domain decomposition,

945 
* but to fully optimize this a much more complicated algorithm is required.

946 
*/

947 
rvec *fmg; 
948 
snew(fmg, top_global>natoms); 
949  
950 
ncg = s_min>s.cg_gl.size(); 
951 
cg_gl = s_min>s.cg_gl.data(); 
952 
i = 0;

953 
for (c = 0; c < ncg; c++) 
954 
{ 
955 
cg = cg_gl[c]; 
956 
a0 = index[cg]; 
957 
a1 = index[cg+1];

958 
for (a = a0; a < a1; a++)

959 
{ 
960 
copy_rvec(fm[i], fmg[a]); 
961 
i++; 
962 
} 
963 
} 
964 
gmx_sum(top_global>natoms*3, fmg[0], cr); 
965  
966 
/* Now we will determine the part of the sum for the cgs in state s_b */

967 
ncg = s_b>s.cg_gl.size(); 
968 
cg_gl = s_b>s.cg_gl.data(); 
969 
partsum = 0;

970 
i = 0;

971 
gf = 0;

972 
grpnrFREEZE = top_global>groups.grpnr[egcFREEZE]; 
973 
for (c = 0; c < ncg; c++) 
974 
{ 
975 
cg = cg_gl[c]; 
976 
a0 = index[cg]; 
977 
a1 = index[cg+1];

978 
for (a = a0; a < a1; a++)

979 
{ 
980 
if (mdatoms>cFREEZE && grpnrFREEZE)

981 
{ 
982 
gf = grpnrFREEZE[i]; 
983 
} 
984 
for (m = 0; m < DIM; m++) 
985 
{ 
986 
if (!opts>nFreeze[gf][m])

987 
{ 
988 
partsum += (fb[i][m]  fmg[a][m])*fb[i][m]; 
989 
} 
990 
} 
991 
i++; 
992 
} 
993 
} 
994  
995 
sfree(fmg); 
996  
997 
return partsum;

998 
} 
999  
1000 
//! Print some stuff, like beta, whatever that means.

1001 
static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, 
1002 
gmx_mtop_t *top_global, 
1003 
em_state_t *s_min, em_state_t *s_b) 
1004 
{ 
1005 
double sum;

1006  
1007 
/* This is just the classical PolakRibiere calculation of beta;

1008 
* it looks a bit complicated since we take freeze groups into account,

1009 
* and might have to sum it in parallel runs.

1010 
*/

1011  
1012 
if (!DOMAINDECOMP(cr) 

1013 
(s_min>s.ddp_count == cr>dd>ddp_count && 
1014 
s_b>s.ddp_count == cr>dd>ddp_count)) 
1015 
{ 
1016 
const rvec *fm = s_min>f.rvec_array();

1017 
const rvec *fb = s_b>f.rvec_array();

1018 
sum = 0;

1019 
int gf = 0; 
1020 
/* This part of code can be incorrect with DD,

1021 
* since the atom ordering in s_b and s_min might differ.

1022 
*/

1023 
for (int i = 0; i < mdatoms>homenr; i++) 
1024 
{ 
1025 
if (mdatoms>cFREEZE)

1026 
{ 
1027 
gf = mdatoms>cFREEZE[i]; 
1028 
} 
1029 
for (int m = 0; m < DIM; m++) 
1030 
{ 
1031 
if (!opts>nFreeze[gf][m])

1032 
{ 
1033 
sum += (fb[i][m]  fm[i][m])*fb[i][m]; 
1034 
} 
1035 
} 
1036 
} 
1037 
} 
1038 
else

1039 
{ 
1040 
/* We need to reorder cgs while summing */

1041 
sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b); 
1042 
} 
1043 
if (PAR(cr))

1044 
{ 
1045 
gmx_sumd(1, &sum, cr);

1046 
} 
1047  
1048 
return sum/gmx::square(s_min>fnorm);

1049 
} 
1050  
1051 
namespace gmx

1052 
{ 
1053  
1054 
void

1055 
Integrator::do_cg() 
1056 
{ 
1057 
const char *CG = "PolakRibiere Conjugate Gradients"; 
1058  
1059 
gmx_localtop_t top; 
1060 
gmx_enerdata_t *enerd; 
1061 
gmx_global_stat_t gstat; 
1062 
t_graph *graph; 
1063 
double tmp, minstep;

1064 
real stepsize; 
1065 
real a, b, c, beta = 0.0; 
1066 
real epot_repl = 0;

1067 
real pnorm; 
1068 
gmx_bool converged, foundlower; 
1069 
rvec mu_tot = {0};

1070 
gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; 
1071 
tensor vir, pres; 
1072 
int number_steps, neval = 0, nstcg = inputrec>nstcgsteep; 
1073 
int m, step, nminstep;

1074 
auto mdatoms = mdAtoms>mdatoms();

1075  
1076 
GMX_LOG(mdlog.info).asParagraph(). 
1077 
appendText("Note that activating conjugate gradient energy minimization via the "

1078 
"integrator .mdp option and the command gmx mdrun may "

1079 
"be available in a different form in a future version of GROMACS, "

1080 
"e.g. gmx minimize and an .mdp option.");

1081  
1082 
step = 0;

1083  
1084 
if (MASTER(cr))

1085 
{ 
1086 
// In CG, the state is extended with a search direction

1087 
state_global>flags = (1<<estCGP);

1088  
1089 
// Ensure the extra peratom state array gets allocated

1090 
state_change_natoms(state_global, state_global>natoms); 
1091  
1092 
// Initialize the search direction to zero

1093 
for (RVec &cg_p : state_global>cg_p)

1094 
{ 
1095 
cg_p = { 0, 0, 0 }; 
1096 
} 
1097 
} 
1098  
1099 
/* Create 4 states on the stack and extract pointers that we will swap */

1100 
em_state_t s0 {}, s1 {}, s2 {}, s3 {}; 
1101 
em_state_t *s_min = &s0; 
1102 
em_state_t *s_a = &s1; 
1103 
em_state_t *s_b = &s2; 
1104 
em_state_t *s_c = &s3; 
1105  
1106 
/* Init em and store the local state in s_min */

1107 
init_em(fplog, mdlog, CG, cr, ms, inputrec, mdrunOptions, 
1108 
state_global, top_global, s_min, &top, 
1109 
nrnb, fr, &graph, mdAtoms, &gstat, 
1110 
vsite, constr, nullptr,

1111 
nfile, fnm); 
1112 
gmx_mdoutf *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, inputrec, top_global, nullptr, wcycle);

1113 
snew(enerd, 1);

1114 
init_enerdata(top_global>groups.grps[egcENER].nr, inputrec>fepvals>n_lambda, enerd); 
1115 
gmx::EnergyOutput energyOutput; 
1116 
energyOutput.prepare(mdoutf_get_fp_ene(outf), top_global, inputrec, nullptr);

1117  
1118 
/* Print to log file */

1119 
print_em_start(fplog, cr, walltime_accounting, wcycle, CG); 
1120  
1121 
/* Max number of steps */

1122 
number_steps = inputrec>nsteps; 
1123  
1124 
if (MASTER(cr))

1125 
{ 
1126 
sp_header(stderr, CG, inputrec>em_tol, number_steps); 
1127 
} 
1128 
if (fplog)

1129 
{ 
1130 
sp_header(fplog, CG, inputrec>em_tol, number_steps); 
1131 
} 
1132  
1133 
EnergyEvaluator energyEvaluator { 
1134 
fplog, mdlog, cr, ms, 
1135 
top_global, &top, 
1136 
inputrec, nrnb, wcycle, gstat, 
1137 
vsite, constr, fcd, graph, 
1138 
mdAtoms, fr, ppForceWorkload, enerd 
1139 
}; 
1140 
/* Call the force routine and some auxiliary (neighboursearching etc.) */

1141 
/* do_force always puts the charge groups in the box and shifts again

1142 
* We do not unshift, so molecules are always whole in congrad.c

1143 
*/

1144 
energyEvaluator.run(s_min, mu_tot, vir, pres, 1, TRUE);

1145  
1146 
if (MASTER(cr))

1147 
{ 
1148 
/* Copy stuff to the energy bin for easy printing etc. */

1149 
matrix nullBox = {}; 
1150 
energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), 
1151 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
1152 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
1153  
1154 
print_ebin_header(fplog, step, step); 
1155 
energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, 
1156 
fplog, step, step, eprNORMAL, 
1157 
fcd, &(top_global>groups), &(inputrec>opts), nullptr);

1158 
} 
1159  
1160 
/* Estimate/guess the initial stepsize */

1161 
stepsize = inputrec>em_stepsize/s_min>fnorm; 
1162  
1163 
if (MASTER(cr))

1164 
{ 
1165 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
1166 
fprintf(stderr, " Fmax = %12.5e on atom %d\n",

1167 
s_min>fmax, s_min>a_fmax+1);

1168 
fprintf(stderr, " FNorm = %12.5e\n",

1169 
s_min>fnorm/sqrtNumAtoms); 
1170 
fprintf(stderr, "\n");

1171 
/* and copy to the log file too... */

1172 
fprintf(fplog, " Fmax = %12.5e on atom %d\n",

1173 
s_min>fmax, s_min>a_fmax+1);

1174 
fprintf(fplog, " FNorm = %12.5e\n",

1175 
s_min>fnorm/sqrtNumAtoms); 
1176 
fprintf(fplog, "\n");

1177 
} 
1178 
/* Start the loop over CG steps.

1179 
* Each successful step is counted, and we continue until

1180 
* we either converge or reach the max number of steps.

1181 
*/

1182 
converged = FALSE; 
1183 
for (step = 0; (number_steps < 0  step <= number_steps) && !converged; step++) 
1184 
{ 
1185  
1186 
/* start taking steps in a new direction

1187 
* First time we enter the routine, beta=0, and the direction is

1188 
* simply the negative gradient.

1189 
*/

1190  
1191 
/* Calculate the new direction in p, and the gradient in this direction, gpa */

1192 
rvec *pm = s_min>s.cg_p.rvec_array(); 
1193 
const rvec *sfm = s_min>f.rvec_array();

1194 
double gpa = 0; 
1195 
int gf = 0; 
1196 
for (int i = 0; i < mdatoms>homenr; i++) 
1197 
{ 
1198 
if (mdatoms>cFREEZE)

1199 
{ 
1200 
gf = mdatoms>cFREEZE[i]; 
1201 
} 
1202 
for (m = 0; m < DIM; m++) 
1203 
{ 
1204 
if (!inputrec>opts.nFreeze[gf][m])

1205 
{ 
1206 
pm[i][m] = sfm[i][m] + beta*pm[i][m]; 
1207 
gpa = pm[i][m]*sfm[i][m]; 
1208 
/* f is negative gradient, thus the sign */

1209 
} 
1210 
else

1211 
{ 
1212 
pm[i][m] = 0;

1213 
} 
1214 
} 
1215 
} 
1216  
1217 
/* Sum the gradient along the line across CPUs */

1218 
if (PAR(cr))

1219 
{ 
1220 
gmx_sumd(1, &gpa, cr);

1221 
} 
1222  
1223 
/* Calculate the norm of the search vector */

1224 
get_f_norm_max(cr, &(inputrec>opts), mdatoms, pm, &pnorm, nullptr, nullptr); 
1225  
1226 
/* Just in case stepsize reaches zero due to numerical precision... */

1227 
if (stepsize <= 0) 
1228 
{ 
1229 
stepsize = inputrec>em_stepsize/pnorm; 
1230 
} 
1231  
1232 
/*

1233 
* Double check the value of the derivative in the search direction.

1234 
* If it is positive it must be due to the old information in the

1235 
* CG formula, so just remove that and start over with beta=0.

1236 
* This corresponds to a steepest descent step.

1237 
*/

1238 
if (gpa > 0) 
1239 
{ 
1240 
beta = 0;

1241 
step; /* Don't count this step since we are restarting */

1242 
continue; /* Go back to the beginning of the big forloop */ 
1243 
} 
1244  
1245 
/* Calculate minimum allowed stepsize, before the average (norm)

1246 
* relative change in coordinate is smaller than precision

1247 
*/

1248 
minstep = 0;

1249 
auto s_min_x = makeArrayRef(s_min>s.x);

1250 
for (int i = 0; i < mdatoms>homenr; i++) 
1251 
{ 
1252 
for (m = 0; m < DIM; m++) 
1253 
{ 
1254 
tmp = fabs(s_min_x[i][m]); 
1255 
if (tmp < 1.0) 
1256 
{ 
1257 
tmp = 1.0; 
1258 
} 
1259 
tmp = pm[i][m]/tmp; 
1260 
minstep += tmp*tmp; 
1261 
} 
1262 
} 
1263 
/* Add up from all CPUs */

1264 
if (PAR(cr))

1265 
{ 
1266 
gmx_sumd(1, &minstep, cr);

1267 
} 
1268  
1269 
minstep = GMX_REAL_EPS/sqrt(minstep/(3*top_global>natoms));

1270  
1271 
if (stepsize < minstep)

1272 
{ 
1273 
converged = TRUE; 
1274 
break;

1275 
} 
1276  
1277 
/* Write coordinates if necessary */

1278 
do_x = do_per_step(step, inputrec>nstxout); 
1279 
do_f = do_per_step(step, inputrec>nstfout); 
1280  
1281 
write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,

1282 
top_global, inputrec, step, 
1283 
s_min, state_global, observablesHistory); 
1284  
1285 
/* Take a step downhill.

1286 
* In theory, we should minimize the function along this direction.

1287 
* That is quite possible, but it turns out to take 510 function evaluations

1288 
* for each line. However, we dont really need to find the exact minimum 

1289 
* it is much better to start a new CG step in a modified direction as soon

1290 
* as we are close to it. This will save a lot of energy evaluations.

1291 
*

1292 
* In practice, we just try to take a single step.

1293 
* If it worked (i.e. lowered the energy), we increase the stepsize but

1294 
* the continue straight to the next CG step without trying to find any minimum.

1295 
* If it didn't work (higher energy), there must be a minimum somewhere between

1296 
* the old position and the new one.

1297 
*

1298 
* Due to the finite numerical accuracy, it turns out that it is a good idea

1299 
* to even accept a SMALL increase in energy, if the derivative is still downhill.

1300 
* This leads to lower final energies in the tests I've done. / Erik

1301 
*/

1302 
s_a>epot = s_min>epot; 
1303 
a = 0.0; 
1304 
c = a + stepsize; /* reference position along line is zero */

1305  
1306 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count < cr>dd>ddp_count)

1307 
{ 
1308 
em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, 
1309 
s_min, &top, mdAtoms, fr, vsite, constr, 
1310 
nrnb, wcycle); 
1311 
} 
1312  
1313 
/* Take a trial step (new coords in s_c) */

1314 
do_em_step(cr, inputrec, mdatoms, s_min, c, &s_min>s.cg_p, s_c, 
1315 
constr, 1);

1316  
1317 
neval++; 
1318 
/* Calculate energy for the trial step */

1319 
energyEvaluator.run(s_c, mu_tot, vir, pres, 1, FALSE);

1320  
1321 
/* Calc derivative along line */

1322 
const rvec *pc = s_c>s.cg_p.rvec_array();

1323 
const rvec *sfc = s_c>f.rvec_array();

1324 
double gpc = 0; 
1325 
for (int i = 0; i < mdatoms>homenr; i++) 
1326 
{ 
1327 
for (m = 0; m < DIM; m++) 
1328 
{ 
1329 
gpc = pc[i][m]*sfc[i][m]; /* f is negative gradient, thus the sign */

1330 
} 
1331 
} 
1332 
/* Sum the gradient along the line across CPUs */

1333 
if (PAR(cr))

1334 
{ 
1335 
gmx_sumd(1, &gpc, cr);

1336 
} 
1337  
1338 
/* This is the max amount of increase in energy we tolerate */

1339 
tmp = std::sqrt(GMX_REAL_EPS)*fabs(s_a>epot); 
1340  
1341 
/* Accept the step if the energy is lower, or if it is not significantly higher

1342 
* and the line derivative is still negative.

1343 
*/

1344 
if (s_c>epot < s_a>epot  (gpc < 0 && s_c>epot < (s_a>epot + tmp))) 
1345 
{ 
1346 
foundlower = TRUE; 
1347 
/* Great, we found a better energy. Increase step for next iteration

1348 
* if we are still going down, decrease it otherwise

1349 
*/

1350 
if (gpc < 0) 
1351 
{ 
1352 
stepsize *= 1.618034; /* The golden section */ 
1353 
} 
1354 
else

1355 
{ 
1356 
stepsize *= 0.618034; /* 1/golden section */ 
1357 
} 
1358 
} 
1359 
else

1360 
{ 
1361 
/* New energy is the same or higher. We will have to do some work

1362 
* to find a smaller value in the interval. Take smaller step next time!

1363 
*/

1364 
foundlower = FALSE; 
1365 
stepsize *= 0.618034; 
1366 
} 
1367  
1368  
1369  
1370  
1371 
/* OK, if we didn't find a lower value we will have to locate one now  there must

1372 
* be one in the interval [a=0,c].

1373 
* The same thing is valid here, though: Don't spend dozens of iterations to find

1374 
* the line minimum. We try to interpolate based on the derivative at the endpoints,

1375 
* and only continue until we find a lower value. In most cases this means 12 iterations.

1376 
*

1377 
* I also have a safeguard for potentially really pathological functions so we never

1378 
* take more than 20 steps before we give up ...

1379 
*

1380 
* If we already found a lower value we just skip this step and continue to the update.

1381 
*/

1382 
double gpb;

1383 
if (!foundlower)

1384 
{ 
1385 
nminstep = 0;

1386  
1387 
do

1388 
{ 
1389 
/* Select a new trial point.

1390 
* If the derivatives at points a & c have different sign we interpolate to zero,

1391 
* otherwise just do a bisection.

1392 
*/

1393 
if (gpa < 0 && gpc > 0) 
1394 
{ 
1395 
b = a + gpa*(ac)/(gpcgpa); 
1396 
} 
1397 
else

1398 
{ 
1399 
b = 0.5*(a+c); 
1400 
} 
1401  
1402 
/* safeguard if interpolation close to machine accuracy causes errors:

1403 
* never go outside the interval

1404 
*/

1405 
if (b <= a  b >= c)

1406 
{ 
1407 
b = 0.5*(a+c); 
1408 
} 
1409  
1410 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count != cr>dd>ddp_count)

1411 
{ 
1412 
/* Reload the old state */

1413 
em_dd_partition_system(fplog, mdlog, 1, cr, top_global, inputrec,

1414 
s_min, &top, mdAtoms, fr, vsite, constr, 
1415 
nrnb, wcycle); 
1416 
} 
1417  
1418 
/* Take a trial step to this new point  new coords in s_b */

1419 
do_em_step(cr, inputrec, mdatoms, s_min, b, &s_min>s.cg_p, s_b, 
1420 
constr, 1);

1421  
1422 
neval++; 
1423 
/* Calculate energy for the trial step */

1424 
energyEvaluator.run(s_b, mu_tot, vir, pres, 1, FALSE);

1425  
1426 
/* p does not change within a step, but since the domain decomposition

1427 
* might change, we have to use cg_p of s_b here.

1428 
*/

1429 
const rvec *pb = s_b>s.cg_p.rvec_array();

1430 
const rvec *sfb = s_b>f.rvec_array();

1431 
gpb = 0;

1432 
for (int i = 0; i < mdatoms>homenr; i++) 
1433 
{ 
1434 
for (m = 0; m < DIM; m++) 
1435 
{ 
1436 
gpb = pb[i][m]*sfb[i][m]; /* f is negative gradient, thus the sign */

1437 
} 
1438 
} 
1439 
/* Sum the gradient along the line across CPUs */

1440 
if (PAR(cr))

1441 
{ 
1442 
gmx_sumd(1, &gpb, cr);

1443 
} 
1444  
1445 
if (debug)

1446 
{ 
1447 
fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",

1448 
s_a>epot, s_b>epot, s_c>epot, gpb); 
1449 
} 
1450  
1451 
epot_repl = s_b>epot; 
1452  
1453 
/* Keep one of the intervals based on the value of the derivative at the new point */

1454 
if (gpb > 0) 
1455 
{ 
1456 
/* Replace c endpoint with b */

1457 
swap_em_state(&s_b, &s_c); 
1458 
c = b; 
1459 
gpc = gpb; 
1460 
} 
1461 
else

1462 
{ 
1463 
/* Replace a endpoint with b */

1464 
swap_em_state(&s_b, &s_a); 
1465 
a = b; 
1466 
gpa = gpb; 
1467 
} 
1468  
1469 
/*

1470 
* Stop search as soon as we find a value smaller than the endpoints.

1471 
* Never run more than 20 steps, no matter what.

1472 
*/

1473 
nminstep++; 
1474 
} 
1475 
while ((epot_repl > s_a>epot  epot_repl > s_c>epot) &&

1476 
(nminstep < 20));

1477  
1478 
if (std::fabs(epot_repl  s_min>epot) < fabs(s_min>epot)*GMX_REAL_EPS 

1479 
nminstep >= 20)

1480 
{ 
1481 
/* OK. We couldn't find a significantly lower energy.

1482 
* If beta==0 this was steepest descent, and then we give up.

1483 
* If not, set beta=0 and restart with steepest descent before quitting.

1484 
*/

1485 
if (beta == 0.0) 
1486 
{ 
1487 
/* Converged */

1488 
converged = TRUE; 
1489 
break;

1490 
} 
1491 
else

1492 
{ 
1493 
/* Reset memory before giving up */

1494 
beta = 0.0; 
1495 
continue;

1496 
} 
1497 
} 
1498  
1499 
/* Select min energy state of A & C, put the best in B.

1500 
*/

1501 
if (s_c>epot < s_a>epot)

1502 
{ 
1503 
if (debug)

1504 
{ 
1505 
fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",

1506 
s_c>epot, s_a>epot); 
1507 
} 
1508 
swap_em_state(&s_b, &s_c); 
1509 
gpb = gpc; 
1510 
} 
1511 
else

1512 
{ 
1513 
if (debug)

1514 
{ 
1515 
fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",

1516 
s_a>epot, s_c>epot); 
1517 
} 
1518 
swap_em_state(&s_b, &s_a); 
1519 
gpb = gpa; 
1520 
} 
1521  
1522 
} 
1523 
else

1524 
{ 
1525 
if (debug)

1526 
{ 
1527 
fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",

1528 
s_c>epot); 
1529 
} 
1530 
swap_em_state(&s_b, &s_c); 
1531 
gpb = gpc; 
1532 
} 
1533  
1534 
/* new search direction */

1535 
/* beta = 0 means forget all memory and restart with steepest descents. */

1536 
if (nstcg && ((step % nstcg) == 0)) 
1537 
{ 
1538 
beta = 0.0; 
1539 
} 
1540 
else

1541 
{ 
1542 
/* s_min>fnorm cannot be zero, because then we would have converged

1543 
* and broken out.

1544 
*/

1545  
1546 
/* PolakRibiere update.

1547 
* Change to fnorm2/fnorm2_old for FletcherReeves

1548 
*/

1549 
beta = pr_beta(cr, &inputrec>opts, mdatoms, top_global, s_min, s_b); 
1550 
} 
1551 
/* Limit beta to prevent oscillations */

1552 
if (fabs(beta) > 5.0) 
1553 
{ 
1554 
beta = 0.0; 
1555 
} 
1556  
1557  
1558 
/* update positions */

1559 
swap_em_state(&s_min, &s_b); 
1560 
gpa = gpb; 
1561  
1562 
/* Print it if necessary */

1563 
if (MASTER(cr))

1564 
{ 
1565 
if (mdrunOptions.verbose)

1566 
{ 
1567 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
1568 
fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",

1569 
step, s_min>epot, s_min>fnorm/sqrtNumAtoms, 
1570 
s_min>fmax, s_min>a_fmax+1);

1571 
fflush(stderr); 
1572 
} 
1573 
/* Store the new (lower) energies */

1574 
matrix nullBox = {}; 
1575 
energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), 
1576 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
1577 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
1578  
1579 
do_log = do_per_step(step, inputrec>nstlog); 
1580 
do_ene = do_per_step(step, inputrec>nstenergy); 
1581  
1582 
/* Prepare IMD energy record, if bIMD is TRUE. */

1583 
IMD_fill_energy_record(inputrec>bIMD, inputrec>imd, enerd, step, TRUE); 
1584  
1585 
if (do_log)

1586 
{ 
1587 
print_ebin_header(fplog, step, step); 
1588 
} 
1589 
energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, 
1590 
do_log ? fplog : nullptr, step, step, eprNORMAL,

1591 
fcd, &(top_global>groups), &(inputrec>opts), nullptr);

1592 
} 
1593  
1594 
/* Send energies and positions to the IMD client if bIMD is TRUE. */

1595 
if (MASTER(cr) && do_IMD(inputrec>bIMD, step, cr, TRUE, state_global>box, state_global>x.rvec_array(), inputrec, 0, wcycle)) 
1596 
{ 
1597 
IMD_send_positions(inputrec>imd); 
1598 
} 
1599  
1600 
/* Stop when the maximum force lies below tolerance.

1601 
* If we have reached machine precision, converged is already set to true.

1602 
*/

1603 
converged = converged  (s_min>fmax < inputrec>em_tol); 
1604  
1605 
} /* End of the loop */

1606  
1607 
/* IMD cleanup, if bIMD is TRUE. */

1608 
IMD_finalize(inputrec>bIMD, inputrec>imd); 
1609  
1610 
if (converged)

1611 
{ 
1612 
step; /* we never took that last step in this case */

1613  
1614 
} 
1615 
if (s_min>fmax > inputrec>em_tol)

1616 
{ 
1617 
if (MASTER(cr))

1618 
{ 
1619 
warn_step(fplog, inputrec>em_tol, s_min>fmax, 
1620 
step1 == number_steps, FALSE);

1621 
} 
1622 
converged = FALSE; 
1623 
} 
1624  
1625 
if (MASTER(cr))

1626 
{ 
1627 
/* If we printed energy and/or logfile last step (which was the last step)

1628 
* we don't have to do it again, but otherwise print the final values.

1629 
*/

1630 
if (!do_log)

1631 
{ 
1632 
/* Write final value to log since we didn't do anything the last step */

1633 
print_ebin_header(fplog, step, step); 
1634 
} 
1635 
if (!do_ene  !do_log)

1636 
{ 
1637 
/* Write final energy file entries */

1638 
energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, 
1639 
!do_log ? fplog : nullptr, step, step, eprNORMAL,

1640 
fcd, &(top_global>groups), &(inputrec>opts), nullptr);

1641 
} 
1642 
} 
1643  
1644 
/* Print some stuff... */

1645 
if (MASTER(cr))

1646 
{ 
1647 
fprintf(stderr, "\nwriting lowest energy coordinates.\n");

1648 
} 
1649  
1650 
/* IMPORTANT!

1651 
* For accurate normal mode calculation it is imperative that we

1652 
* store the last conformation into the full precision binary trajectory.

1653 
*

1654 
* However, we should only do it if we did NOT already write this step

1655 
* above (which we did if do_x or do_f was true).

1656 
*/

1657 
/* Note that with 0 < nstfout != nstxout we can end up with two frames

1658 
* in the trajectory with the same step number.

1659 
*/

1660 
do_x = !do_per_step(step, inputrec>nstxout); 
1661 
do_f = (inputrec>nstfout > 0 && !do_per_step(step, inputrec>nstfout));

1662  
1663 
write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), 
1664 
top_global, inputrec, step, 
1665 
s_min, state_global, observablesHistory); 
1666  
1667  
1668 
if (MASTER(cr))

1669 
{ 
1670 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
1671 
print_converged(stderr, CG, inputrec>em_tol, step, converged, number_steps, 
1672 
s_min, sqrtNumAtoms); 
1673 
print_converged(fplog, CG, inputrec>em_tol, step, converged, number_steps, 
1674 
s_min, sqrtNumAtoms); 
1675  
1676 
fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);

1677 
} 
1678  
1679 
finish_em(cr, outf, walltime_accounting, wcycle); 
1680  
1681 
/* To print the actual number of steps we needed somewhere */

1682 
walltime_accounting_set_nsteps_done(walltime_accounting, step); 
1683 
} 
1684  
1685  
1686 
void

1687 
Integrator::do_lbfgs() 
1688 
{ 
1689 
static const char *LBFGS = "LowMemory BFGS Minimizer"; 
1690 
em_state_t ems; 
1691 
gmx_localtop_t top; 
1692 
gmx_enerdata_t *enerd; 
1693 
gmx_global_stat_t gstat; 
1694 
t_graph *graph; 
1695 
int ncorr, nmaxcorr, point, cp, neval, nminstep;

1696 
double stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;

1697 
real *rho, *alpha, *p, *s, **dx, **dg; 
1698 
real a, b, c, maxdelta, delta; 
1699 
real diag, Epot0; 
1700 
real dgdx, dgdg, sq, yr, beta; 
1701 
gmx_bool converged; 
1702 
rvec mu_tot = {0};

1703 
gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; 
1704 
tensor vir, pres; 
1705 
int start, end, number_steps;

1706 
int i, k, m, n, gf, step;

1707 
int mdof_flags;

1708 
auto mdatoms = mdAtoms>mdatoms();

1709  
1710 
GMX_LOG(mdlog.info).asParagraph(). 
1711 
appendText("Note that activating LBFGS energy minimization via the "

1712 
"integrator .mdp option and the command gmx mdrun may "

1713 
"be available in a different form in a future version of GROMACS, "

1714 
"e.g. gmx minimize and an .mdp option.");

1715  
1716 
if (PAR(cr))

1717 
{ 
1718 
gmx_fatal(FARGS, "LBFGS minimization only supports a single rank");

1719 
} 
1720  
1721 
if (nullptr != constr) 
1722 
{ 
1723 
gmx_fatal(FARGS, "The combination of constraints and LBFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");

1724 
} 
1725  
1726 
n = 3*state_global>natoms;

1727 
nmaxcorr = inputrec>nbfgscorr; 
1728  
1729 
snew(frozen, n); 
1730  
1731 
snew(p, n); 
1732 
snew(rho, nmaxcorr); 
1733 
snew(alpha, nmaxcorr); 
1734  
1735 
snew(dx, nmaxcorr); 
1736 
for (i = 0; i < nmaxcorr; i++) 
1737 
{ 
1738 
snew(dx[i], n); 
1739 
} 
1740  
1741 
snew(dg, nmaxcorr); 
1742 
for (i = 0; i < nmaxcorr; i++) 
1743 
{ 
1744 
snew(dg[i], n); 
1745 
} 
1746  
1747 
step = 0;

1748 
neval = 0;

1749  
1750 
/* Init em */

1751 
init_em(fplog, mdlog, LBFGS, cr, ms, inputrec, mdrunOptions, 
1752 
state_global, top_global, &ems, &top, 
1753 
nrnb, fr, &graph, mdAtoms, &gstat, 
1754 
vsite, constr, nullptr,

1755 
nfile, fnm); 
1756 
gmx_mdoutf *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, inputrec, top_global, nullptr, wcycle);

1757 
snew(enerd, 1);

1758 
init_enerdata(top_global>groups.grps[egcENER].nr, inputrec>fepvals>n_lambda, enerd); 
1759 
gmx::EnergyOutput energyOutput; 
1760 
energyOutput.prepare(mdoutf_get_fp_ene(outf), top_global, inputrec, nullptr);

1761  
1762 
start = 0;

1763 
end = mdatoms>homenr; 
1764  
1765 
/* We need 4 working states */

1766 
em_state_t s0 {}, s1 {}, s2 {}, s3 {}; 
1767 
em_state_t *sa = &s0; 
1768 
em_state_t *sb = &s1; 
1769 
em_state_t *sc = &s2; 
1770 
em_state_t *last = &s3; 
1771 
/* Initialize by copying the state from ems (we could skip x and f here) */

1772 
*sa = ems; 
1773 
*sb = ems; 
1774 
*sc = ems; 
1775  
1776 
/* Print to log file */

1777 
print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); 
1778  
1779 
do_log = do_ene = do_x = do_f = TRUE; 
1780  
1781 
/* Max number of steps */

1782 
number_steps = inputrec>nsteps; 
1783  
1784 
/* Create a 3*natoms index to tell whether each degree of freedom is frozen */

1785 
gf = 0;

1786 
for (i = start; i < end; i++)

1787 
{ 
1788 
if (mdatoms>cFREEZE)

1789 
{ 
1790 
gf = mdatoms>cFREEZE[i]; 
1791 
} 
1792 
for (m = 0; m < DIM; m++) 
1793 
{ 
1794 
frozen[3*i+m] = (inputrec>opts.nFreeze[gf][m] != 0); 
1795 
} 
1796 
} 
1797 
if (MASTER(cr))

1798 
{ 
1799 
sp_header(stderr, LBFGS, inputrec>em_tol, number_steps); 
1800 
} 
1801 
if (fplog)

1802 
{ 
1803 
sp_header(fplog, LBFGS, inputrec>em_tol, number_steps); 
1804 
} 
1805  
1806 
if (vsite)

1807 
{ 
1808 
construct_vsites(vsite, state_global>x.rvec_array(), 1, nullptr, 
1809 
top.idef.iparams, top.idef.il, 
1810 
fr>ePBC, fr>bMolPBC, cr, state_global>box); 
1811 
} 
1812  
1813 
/* Call the force routine and some auxiliary (neighboursearching etc.) */

1814 
/* do_force always puts the charge groups in the box and shifts again

1815 
* We do not unshift, so molecules are always whole

1816 
*/

1817 
neval++; 
1818 
EnergyEvaluator energyEvaluator { 
1819 
fplog, mdlog, cr, ms, 
1820 
top_global, &top, 
1821 
inputrec, nrnb, wcycle, gstat, 
1822 
vsite, constr, fcd, graph, 
1823 
mdAtoms, fr, ppForceWorkload, enerd 
1824 
}; 
1825 
energyEvaluator.run(&ems, mu_tot, vir, pres, 1, TRUE);

1826  
1827 
if (MASTER(cr))

1828 
{ 
1829 
/* Copy stuff to the energy bin for easy printing etc. */

1830 
matrix nullBox = {}; 
1831 
energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), 
1832 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
1833 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
1834  
1835 
print_ebin_header(fplog, step, step); 
1836 
energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, 
1837 
fplog, step, step, eprNORMAL, 
1838 
fcd, &(top_global>groups), &(inputrec>opts), nullptr);

1839 
} 
1840  
1841 
/* Set the initial step.

1842 
* since it will be multiplied by the nonnormalized search direction

1843 
* vector (force vector the first time), we scale it by the

1844 
* norm of the force.

1845 
*/

1846  
1847 
if (MASTER(cr))

1848 
{ 
1849 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
1850 
fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);

1851 
fprintf(stderr, " Fmax = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); 
1852 
fprintf(stderr, " FNorm = %12.5e\n", ems.fnorm/sqrtNumAtoms);

1853 
fprintf(stderr, "\n");

1854 
/* and copy to the log file too... */

1855 
fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);

1856 
fprintf(fplog, " Fmax = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); 
1857 
fprintf(fplog, " FNorm = %12.5e\n", ems.fnorm/sqrtNumAtoms);

1858 
fprintf(fplog, "\n");

1859 
} 
1860  
1861 
// Point is an index to the memory of search directions, where 0 is the first one.

1862 
point = 0;

1863  
1864 
// Set initial search direction to the force (gradient), or 0 for frozen particles.

1865 
real *fInit = static_cast<real *>(ems.f.rvec_array()[0]); 
1866 
for (i = 0; i < n; i++) 
1867 
{ 
1868 
if (!frozen[i])

1869 
{ 
1870 
dx[point][i] = fInit[i]; /* Initial search direction */

1871 
} 
1872 
else

1873 
{ 
1874 
dx[point][i] = 0;

1875 
} 
1876 
} 
1877  
1878 
// Stepsize will be modified during the search, and actually it is not critical

1879 
// (the main efficiency in the algorithm comes from changing directions), but

1880 
// we still need an initial value, so estimate it as the inverse of the norm

1881 
// so we take small steps where the potential fluctuates a lot.

1882 
stepsize = 1.0/ems.fnorm; 
1883  
1884 
/* Start the loop over BFGS steps.

1885 
* Each successful step is counted, and we continue until

1886 
* we either converge or reach the max number of steps.

1887 
*/

1888  
1889 
ncorr = 0;

1890  
1891 
/* Set the gradient from the force */

1892 
converged = FALSE; 
1893 
for (step = 0; (number_steps < 0  step <= number_steps) && !converged; step++) 
1894 
{ 
1895  
1896 
/* Write coordinates if necessary */

1897 
do_x = do_per_step(step, inputrec>nstxout); 
1898 
do_f = do_per_step(step, inputrec>nstfout); 
1899  
1900 
mdof_flags = 0;

1901 
if (do_x)

1902 
{ 
1903 
mdof_flags = MDOF_X; 
1904 
} 
1905  
1906 
if (do_f)

1907 
{ 
1908 
mdof_flags = MDOF_F; 
1909 
} 
1910  
1911 
if (inputrec>bIMD)

1912 
{ 
1913 
mdof_flags = MDOF_IMD; 
1914 
} 
1915  
1916 
mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, 
1917 
top_global, step, static_cast<real>(step), &ems.s, state_global, observablesHistory, ems.f);

1918  
1919 
/* Do the linesearching in the direction dx[point][0..(n1)] */

1920  
1921 
/* make s a pointer to current search direction  point=0 first time we get here */

1922 
s = dx[point]; 
1923  
1924 
real *xx = static_cast<real *>(ems.s.x.rvec_array()[0]); 
1925 
real *ff = static_cast<real *>(ems.f.rvec_array()[0]); 
1926  
1927 
// calculate line gradient in position A

1928 
for (gpa = 0, i = 0; i < n; i++) 
1929 
{ 
1930 
gpa = s[i]*ff[i]; 
1931 
} 
1932  
1933 
/* Calculate minimum allowed stepsize along the line, before the average (norm)

1934 
* relative change in coordinate is smaller than precision

1935 
*/

1936 
for (minstep = 0, i = 0; i < n; i++) 
1937 
{ 
1938 
tmp = fabs(xx[i]); 
1939 
if (tmp < 1.0) 
1940 
{ 
1941 
tmp = 1.0; 
1942 
} 
1943 
tmp = s[i]/tmp; 
1944 
minstep += tmp*tmp; 
1945 
} 
1946 
minstep = GMX_REAL_EPS/sqrt(minstep/n); 
1947  
1948 
if (stepsize < minstep)

1949 
{ 
1950 
converged = TRUE; 
1951 
break;

1952 
} 
1953  
1954 
// Before taking any steps along the line, store the old position

1955 
*last = ems; 
1956 
real *lastx = static_cast<real *>(last>s.x.data()[0]); 
1957 
real *lastf = static_cast<real *>(last>f.data()[0]); 
1958 
Epot0 = ems.epot; 
1959  
1960 
*sa = ems; 
1961  
1962 
/* Take a step downhill.

1963 
* In theory, we should find the actual minimum of the function in this

1964 
* direction, somewhere along the line.

1965 
* That is quite possible, but it turns out to take 510 function evaluations

1966 
* for each line. However, we dont really need to find the exact minimum 

1967 
* it is much better to start a new BFGS step in a modified direction as soon

1968 
* as we are close to it. This will save a lot of energy evaluations.

1969 
*

1970 
* In practice, we just try to take a single step.

1971 
* If it worked (i.e. lowered the energy), we increase the stepsize but

1972 
* continue straight to the next BFGS step without trying to find any minimum,

1973 
* i.e. we change the search direction too. If the line was smooth, it is

1974 
* likely we are in a smooth region, and then it makes sense to take longer

1975 
* steps in the modified search direction too.

1976 
*

1977 
* If it didn't work (higher energy), there must be a minimum somewhere between

1978 
* the old position and the new one. Then we need to start by finding a lower

1979 
* value before we change search direction. Since the energy was apparently

1980 
* quite rough, we need to decrease the step size.

1981 
*

1982 
* Due to the finite numerical accuracy, it turns out that it is a good idea

1983 
* to accept a SMALL increase in energy, if the derivative is still downhill.

1984 
* This leads to lower final energies in the tests I've done. / Erik

1985 
*/

1986  
1987 
// State "A" is the first position along the line.

1988 
// reference position along line is initially zero

1989 
a = 0.0; 
1990  
1991 
// Check stepsize first. We do not allow displacements

1992 
// larger than emstep.

1993 
//

1994 
do

1995 
{ 
1996 
// Pick a new position C by adding stepsize to A.

1997 
c = a + stepsize; 
1998  
1999 
// Calculate what the largest change in any individual coordinate

2000 
// would be (translation along line * gradient along line)

2001 
maxdelta = 0;

2002 
for (i = 0; i < n; i++) 
2003 
{ 
2004 
delta = c*s[i]; 
2005 
if (delta > maxdelta)

2006 
{ 
2007 
maxdelta = delta; 
2008 
} 
2009 
} 
2010 
// If any displacement is larger than the stepsize limit, reduce the step

2011 
if (maxdelta > inputrec>em_stepsize)

2012 
{ 
2013 
stepsize *= 0.1; 
2014 
} 
2015 
} 
2016 
while (maxdelta > inputrec>em_stepsize);

2017  
2018 
// Take a trial step and move the coordinate array xc[] to position C

2019 
real *xc = static_cast<real *>(sc>s.x.rvec_array()[0]); 
2020 
for (i = 0; i < n; i++) 
2021 
{ 
2022 
xc[i] = lastx[i] + c*s[i]; 
2023 
} 
2024  
2025 
neval++; 
2026 
// Calculate energy for the trial step in position C

2027 
energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE); 
2028  
2029 
// Calc line gradient in position C

2030 
real *fc = static_cast<real *>(sc>f.rvec_array()[0]); 
2031 
for (gpc = 0, i = 0; i < n; i++) 
2032 
{ 
2033 
gpc = s[i]*fc[i]; /* f is negative gradient, thus the sign */

2034 
} 
2035 
/* Sum the gradient along the line across CPUs */

2036 
if (PAR(cr))

2037 
{ 
2038 
gmx_sumd(1, &gpc, cr);

2039 
} 
2040  
2041 
// This is the max amount of increase in energy we tolerate.

2042 
// By allowing VERY small changes (close to numerical precision) we

2043 
// frequently find even better (lower) final energies.

2044 
tmp = std::sqrt(GMX_REAL_EPS)*fabs(sa>epot); 
2045  
2046 
// Accept the step if the energy is lower in the new position C (compared to A),

2047 
// or if it is not significantly higher and the line derivative is still negative.

2048 
foundlower = sc>epot < sa>epot  (gpc < 0 && sc>epot < (sa>epot + tmp));

2049 
// If true, great, we found a better energy. We no longer try to alter the

2050 
// stepsize, but simply accept this new better position. The we select a new

2051 
// search direction instead, which will be much more efficient than continuing

2052 
// to take smaller steps along a line. Set fnorm based on the new C position,

2053 
// which will be used to update the stepsize to 1/fnorm further down.

2054  
2055 
// If false, the energy is NOT lower in point C, i.e. it will be the same

2056 
// or higher than in point A. In this case it is pointless to move to point C,

2057 
// so we will have to do more iterations along the same line to find a smaller

2058 
// value in the interval [A=0.0,C].

2059 
// Here, A is still 0.0, but that will change when we do a search in the interval

2060 
// [0.0,C] below. That search we will do by interpolation or bisection rather

2061 
// than with the stepsize, so no need to modify it. For the next search direction

2062 
// it will be reset to 1/fnorm anyway.

2063  
2064 
if (!foundlower)

2065 
{ 
2066 
// OK, if we didn't find a lower value we will have to locate one now  there must

2067 
// be one in the interval [a,c].

2068 
// The same thing is valid here, though: Don't spend dozens of iterations to find

2069 
// the line minimum. We try to interpolate based on the derivative at the endpoints,

2070 
// and only continue until we find a lower value. In most cases this means 12 iterations.

2071 
// I also have a safeguard for potentially really pathological functions so we never

2072 
// take more than 20 steps before we give up.

2073 
// If we already found a lower value we just skip this step and continue to the update.

2074 
real fnorm = 0;

2075 
nminstep = 0;

2076 
do

2077 
{ 
2078 
// Select a new trial point B in the interval [A,C].

2079 
// If the derivatives at points a & c have different sign we interpolate to zero,

2080 
// otherwise just do a bisection since there might be multiple minima/maxima

2081 
// inside the interval.

2082 
if (gpa < 0 && gpc > 0) 
2083 
{ 
2084 
b = a + gpa*(ac)/(gpcgpa); 
2085 
} 
2086 
else

2087 
{ 
2088 
b = 0.5*(a+c); 
2089 
} 
2090  
2091 
/* safeguard if interpolation close to machine accuracy causes errors:

2092 
* never go outside the interval

2093 
*/

2094 
if (b <= a  b >= c)

2095 
{ 
2096 
b = 0.5*(a+c); 
2097 
} 
2098  
2099 
// Take a trial step to point B

2100 
real *xb = static_cast<real *>(sb>s.x.rvec_array()[0]); 
2101 
for (i = 0; i < n; i++) 
2102 
{ 
2103 
xb[i] = lastx[i] + b*s[i]; 
2104 
} 
2105  
2106 
neval++; 
2107 
// Calculate energy for the trial step in point B

2108 
energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE); 
2109 
fnorm = sb>fnorm; 
2110  
2111 
// Calculate gradient in point B

2112 
real *fb = static_cast<real *>(sb>f.rvec_array()[0]); 
2113 
for (gpb = 0, i = 0; i < n; i++) 
2114 
{ 
2115 
gpb = s[i]*fb[i]; /* f is negative gradient, thus the sign */

2116  
2117 
} 
2118 
/* Sum the gradient along the line across CPUs */

2119 
if (PAR(cr))

2120 
{ 
2121 
gmx_sumd(1, &gpb, cr);

2122 
} 
2123  
2124 
// Keep one of the intervals [A,B] or [B,C] based on the value of the derivative

2125 
// at the new point B, and rename the endpoints of this new interval A and C.

2126 
if (gpb > 0) 
2127 
{ 
2128 
/* Replace c endpoint with b */

2129 
sc>epot = sb>epot; 
2130 
c = b; 
2131 
gpc = gpb; 
2132 
/* swap states b and c */

2133 
swap_em_state(&sb, &sc); 
2134 
} 
2135 
else

2136 
{ 
2137 
/* Replace a endpoint with b */

2138 
sa>epot = sb>epot; 
2139 
a = b; 
2140 
gpa = gpb; 
2141 
/* swap states a and b */

2142 
swap_em_state(&sa, &sb); 
2143 
} 
2144  
2145 
/*

2146 
* Stop search as soon as we find a value smaller than the endpoints,

2147 
* or if the tolerance is below machine precision.

2148 
* Never run more than 20 steps, no matter what.

2149 
*/

2150 
nminstep++; 
2151 
} 
2152 
while ((sb>epot > sa>epot  sb>epot > sc>epot) && (nminstep < 20)); 
2153  
2154 
if (std::fabs(sb>epot  Epot0) < GMX_REAL_EPS  nminstep >= 20) 
2155 
{ 
2156 
/* OK. We couldn't find a significantly lower energy.

2157 
* If ncorr==0 this was steepest descent, and then we give up.

2158 
* If not, reset memory to restart as steepest descent before quitting.

2159 
*/

2160 
if (ncorr == 0) 
2161 
{ 
2162 
/* Converged */

2163 
converged = TRUE; 
2164 
break;

2165 
} 
2166 
else

2167 
{ 
2168 
/* Reset memory */

2169 
ncorr = 0;

2170 
/* Search in gradient direction */

2171 
for (i = 0; i < n; i++) 
2172 
{ 
2173 
dx[point][i] = ff[i]; 
2174 
} 
2175 
/* Reset stepsize */

2176 
stepsize = 1.0/fnorm; 
2177 
continue;

2178 
} 
2179 
} 
2180  
2181 
/* Select min energy state of A & C, put the best in xx/ff/Epot

2182 
*/

2183 
if (sc>epot < sa>epot)

2184 
{ 
2185 
/* Use state C */

2186 
ems = *sc; 
2187 
step_taken = c; 
2188 
} 
2189 
else

2190 
{ 
2191 
/* Use state A */

2192 
ems = *sa; 
2193 
step_taken = a; 
2194 
} 
2195  
2196 
} 
2197 
else

2198 
{ 
2199 
/* found lower */

2200 
/* Use state C */

2201 
ems = *sc; 
2202 
step_taken = c; 
2203 
} 
2204  
2205 
/* Update the memory information, and calculate a new

2206 
* approximation of the inverse hessian

2207 
*/

2208  
2209 
/* Have new data in Epot, xx, ff */

2210 
if (ncorr < nmaxcorr)

2211 
{ 
2212 
ncorr++; 
2213 
} 
2214  
2215 
for (i = 0; i < n; i++) 
2216 
{ 
2217 
dg[point][i] = lastf[i]ff[i]; 
2218 
dx[point][i] *= step_taken; 
2219 
} 
2220  
2221 
dgdg = 0;

2222 
dgdx = 0;

2223 
for (i = 0; i < n; i++) 
2224 
{ 
2225 
dgdg += dg[point][i]*dg[point][i]; 
2226 
dgdx += dg[point][i]*dx[point][i]; 
2227 
} 
2228  
2229 
diag = dgdx/dgdg; 
2230  
2231 
rho[point] = 1.0/dgdx; 
2232 
point++; 
2233  
2234 
if (point >= nmaxcorr)

2235 
{ 
2236 
point = 0;

2237 
} 
2238  
2239 
/* Update */

2240 
for (i = 0; i < n; i++) 
2241 
{ 
2242 
p[i] = ff[i]; 
2243 
} 
2244  
2245 
cp = point; 
2246  
2247 
/* Recursive update. First go back over the memory points */

2248 
for (k = 0; k < ncorr; k++) 
2249 
{ 
2250 
cp; 
2251 
if (cp < 0) 
2252 
{ 
2253 
cp = ncorr1;

2254 
} 
2255  
2256 
sq = 0;

2257 
for (i = 0; i < n; i++) 
2258 
{ 
2259 
sq += dx[cp][i]*p[i]; 
2260 
} 
2261  
2262 
alpha[cp] = rho[cp]*sq; 
2263  
2264 
for (i = 0; i < n; i++) 
2265 
{ 
2266 
p[i] = alpha[cp]*dg[cp][i]; 
2267 
} 
2268 
} 
2269  
2270 
for (i = 0; i < n; i++) 
2271 
{ 
2272 
p[i] *= diag; 
2273 
} 
2274  
2275 
/* And then go forward again */

2276 
for (k = 0; k < ncorr; k++) 
2277 
{ 
2278 
yr = 0;

2279 
for (i = 0; i < n; i++) 
2280 
{ 
2281 
yr += p[i]*dg[cp][i]; 
2282 
} 
2283  
2284 
beta = rho[cp]*yr; 
2285 
beta = alpha[cp]beta; 
2286  
2287 
for (i = 0; i < n; i++) 
2288 
{ 
2289 
p[i] += beta*dx[cp][i]; 
2290 
} 
2291  
2292 
cp++; 
2293 
if (cp >= ncorr)

2294 
{ 
2295 
cp = 0;

2296 
} 
2297 
} 
2298  
2299 
for (i = 0; i < n; i++) 
2300 
{ 
2301 
if (!frozen[i])

2302 
{ 
2303 
dx[point][i] = p[i]; 
2304 
} 
2305 
else

2306 
{ 
2307 
dx[point][i] = 0;

2308 
} 
2309 
} 
2310  
2311 
/* Print it if necessary */

2312 
if (MASTER(cr))

2313 
{ 
2314 
if (mdrunOptions.verbose)

2315 
{ 
2316 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
2317 
fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",

2318 
step, ems.epot, ems.fnorm/sqrtNumAtoms, ems.fmax, ems.a_fmax + 1);

2319 
fflush(stderr); 
2320 
} 
2321 
/* Store the new (lower) energies */

2322 
matrix nullBox = {}; 
2323 
energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), 
2324 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
2325 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
2326  
2327 
do_log = do_per_step(step, inputrec>nstlog); 
2328 
do_ene = do_per_step(step, inputrec>nstenergy); 
2329  
2330 
/* Prepare IMD energy record, if bIMD is TRUE. */

2331 
IMD_fill_energy_record(inputrec>bIMD, inputrec>imd, enerd, step, TRUE); 
2332  
2333 
if (do_log)

2334 
{ 
2335 
print_ebin_header(fplog, step, step); 
2336 
} 
2337 
energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, 
2338 
do_log ? fplog : nullptr, step, step, eprNORMAL,

2339 
fcd, &(top_global>groups), &(inputrec>opts), nullptr);

2340 
} 
2341  
2342 
/* Send x and E to IMD client, if bIMD is TRUE. */

2343 
if (do_IMD(inputrec>bIMD, step, cr, TRUE, state_global>box, state_global>x.rvec_array(), inputrec, 0, wcycle) && MASTER(cr)) 
2344 
{ 
2345 
IMD_send_positions(inputrec>imd); 
2346 
} 
2347  
2348 
// Reset stepsize in we are doing more iterations

2349 
stepsize = 1.0; 
2350  
2351 
/* Stop when the maximum force lies below tolerance.

2352 
* If we have reached machine precision, converged is already set to true.

2353 
*/

2354 
converged = converged  (ems.fmax < inputrec>em_tol); 
2355  
2356 
} /* End of the loop */

2357  
2358 
/* IMD cleanup, if bIMD is TRUE. */

2359 
IMD_finalize(inputrec>bIMD, inputrec>imd); 
2360  
2361 
if (converged)

2362 
{ 
2363 
step; /* we never took that last step in this case */

2364  
2365 
} 
2366 
if (ems.fmax > inputrec>em_tol)

2367 
{ 
2368 
if (MASTER(cr))

2369 
{ 
2370 
warn_step(fplog, inputrec>em_tol, ems.fmax, 
2371 
step1 == number_steps, FALSE);

2372 
} 
2373 
converged = FALSE; 
2374 
} 
2375  
2376 
/* If we printed energy and/or logfile last step (which was the last step)

2377 
* we don't have to do it again, but otherwise print the final values.

2378 
*/

2379 
if (!do_log) /* Write final value to log since we didn't do anythin last step */ 
2380 
{ 
2381 
print_ebin_header(fplog, step, step); 
2382 
} 
2383 
if (!do_ene  !do_log) /* Write final energy file entries */ 
2384 
{ 
2385 
energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, 
2386 
!do_log ? fplog : nullptr, step, step, eprNORMAL,

2387 
fcd, &(top_global>groups), &(inputrec>opts), nullptr);

2388 
} 
2389  
2390 
/* Print some stuff... */

2391 
if (MASTER(cr))

2392 
{ 
2393 
fprintf(stderr, "\nwriting lowest energy coordinates.\n");

2394 
} 
2395  
2396 
/* IMPORTANT!

2397 
* For accurate normal mode calculation it is imperative that we

2398 
* store the last conformation into the full precision binary trajectory.

2399 
*

2400 
* However, we should only do it if we did NOT already write this step

2401 
* above (which we did if do_x or do_f was true).

2402 
*/

2403 
do_x = !do_per_step(step, inputrec>nstxout); 
2404 
do_f = !do_per_step(step, inputrec>nstfout); 
2405 
write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), 
2406 
top_global, inputrec, step, 
2407 
&ems, state_global, observablesHistory); 
2408  
2409 
if (MASTER(cr))

2410 
{ 
2411 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
2412 
print_converged(stderr, LBFGS, inputrec>em_tol, step, converged, 
2413 
number_steps, &ems, sqrtNumAtoms); 
2414 
print_converged(fplog, LBFGS, inputrec>em_tol, step, converged, 
2415 
number_steps, &ems, sqrtNumAtoms); 
2416  
2417 
fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);

2418 
} 
2419  
2420 
finish_em(cr, outf, walltime_accounting, wcycle); 
2421  
2422 
/* To print the actual number of steps we needed somewhere */

2423 
walltime_accounting_set_nsteps_done(walltime_accounting, step); 
2424 
} 
2425  
2426 
void

2427 
Integrator::do_steep() 
2428 
{ 
2429 
const char *SD = "Steepest Descents"; 
2430 
gmx_localtop_t top; 
2431 
gmx_enerdata_t *enerd; 
2432 
gmx_global_stat_t gstat; 
2433 
t_graph *graph; 
2434 
real stepsize; 
2435 
real ustep; 
2436 
gmx_bool bDone, bAbort, do_x, do_f; 
2437 
tensor vir, pres; 
2438 
rvec mu_tot = {0};

2439 
int nsteps;

2440 
int count = 0; 
2441 
int steps_accepted = 0; 
2442 
auto mdatoms = mdAtoms>mdatoms();

2443  
2444 
GMX_LOG(mdlog.info).asParagraph(). 
2445 
appendText("Note that activating steepestdescent energy minimization via the "

2446 
"integrator .mdp option and the command gmx mdrun may "

2447 
"be available in a different form in a future version of GROMACS, "

2448 
"e.g. gmx minimize and an .mdp option.");

2449  
2450 
/* Create 2 states on the stack and extract pointers that we will swap */

2451 
em_state_t s0 {}, s1 {}; 
2452 
em_state_t *s_min = &s0; 
2453 
em_state_t *s_try = &s1; 
2454  
2455 
/* Init em and store the local state in s_try */

2456 
init_em(fplog, mdlog, SD, cr, ms, inputrec, mdrunOptions, 
2457 
state_global, top_global, s_try, &top, 
2458 
nrnb, fr, &graph, mdAtoms, &gstat, 
2459 
vsite, constr, nullptr,

2460 
nfile, fnm); 
2461 
gmx_mdoutf *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, inputrec, top_global, nullptr, wcycle);

2462 
snew(enerd, 1);

2463 
init_enerdata(top_global>groups.grps[egcENER].nr, inputrec>fepvals>n_lambda, enerd); 
2464 
gmx::EnergyOutput energyOutput; 
2465 
energyOutput.prepare(mdoutf_get_fp_ene(outf), top_global, inputrec, nullptr);

2466  
2467 
/* Print to log file */

2468 
print_em_start(fplog, cr, walltime_accounting, wcycle, SD); 
2469  
2470 
/* Set variables for stepsize (in nm). This is the largest

2471 
* step that we are going to make in any direction.

2472 
*/

2473 
ustep = inputrec>em_stepsize; 
2474 
stepsize = 0;

2475  
2476 
/* Max number of steps */

2477 
nsteps = inputrec>nsteps; 
2478  
2479 
if (MASTER(cr))

2480 
{ 
2481 
/* Print to the screen */

2482 
sp_header(stderr, SD, inputrec>em_tol, nsteps); 
2483 
} 
2484 
if (fplog)

2485 
{ 
2486 
sp_header(fplog, SD, inputrec>em_tol, nsteps); 
2487 
} 
2488 
EnergyEvaluator energyEvaluator { 
2489 
fplog, mdlog, cr, ms, 
2490 
top_global, &top, 
2491 
inputrec, nrnb, wcycle, gstat, 
2492 
vsite, constr, fcd, graph, 
2493 
mdAtoms, fr, ppForceWorkload, enerd 
2494 
}; 
2495  
2496 
/**** HERE STARTS THE LOOP ****

2497 
* count is the counter for the number of steps

2498 
* bDone will be TRUE when the minimization has converged

2499 
* bAbort will be TRUE when nsteps steps have been performed or when

2500 
* the stepsize becomes smaller than is reasonable for machine precision

2501 
*/

2502 
count = 0;

2503 
bDone = FALSE; 
2504 
bAbort = FALSE; 
2505 
while (!bDone && !bAbort)

2506 
{ 
2507 
bAbort = (nsteps >= 0) && (count == nsteps);

2508  
2509 
/* set new coordinates, except for first step */

2510 
bool validStep = true; 
2511 
if (count > 0) 
2512 
{ 
2513 
validStep = 
2514 
do_em_step(cr, inputrec, mdatoms, 
2515 
s_min, stepsize, &s_min>f, s_try, 
2516 
constr, count); 
2517 
} 
2518  
2519 
if (validStep)

2520 
{ 
2521 
energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);

2522 
} 
2523 
else

2524 
{ 
2525 
// Signal constraint error during stepping with energy=inf

2526 
s_try>epot = std::numeric_limits<real>::infinity(); 
2527 
} 
2528  
2529 
if (MASTER(cr))

2530 
{ 
2531 
print_ebin_header(fplog, count, count); 
2532 
} 
2533  
2534 
if (count == 0) 
2535 
{ 
2536 
s_min>epot = s_try>epot; 
2537 
} 
2538  
2539 
/* Print it if necessary */

2540 
if (MASTER(cr))

2541 
{ 
2542 
if (mdrunOptions.verbose)

2543 
{ 
2544 
fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",

2545 
count, ustep, s_try>epot, s_try>fmax, s_try>a_fmax+1,

2546 
( (count == 0)  (s_try>epot < s_min>epot) ) ? '\n' : '\r'); 
2547 
fflush(stderr); 
2548 
} 
2549  
2550 
if ( (count == 0)  (s_try>epot < s_min>epot) ) 
2551 
{ 
2552 
/* Store the new (lower) energies */

2553 
matrix nullBox = {}; 
2554 
energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(count), 
2555 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
2556 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
2557  
2558 
/* Prepare IMD energy record, if bIMD is TRUE. */

2559 
IMD_fill_energy_record(inputrec>bIMD, inputrec>imd, enerd, count, TRUE); 
2560  
2561 
const bool do_dr = do_per_step(steps_accepted, inputrec>nstdisreout); 
2562 
const bool do_or = do_per_step(steps_accepted, inputrec>nstorireout); 
2563 
energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, 
2564 
do_dr, do_or, 
2565 
fplog, count, count, eprNORMAL, 
2566 
fcd, &(top_global>groups), 
2567 
&(inputrec>opts), nullptr);

2568 
fflush(fplog); 
2569 
} 
2570 
} 
2571  
2572 
/* Now if the new energy is smaller than the previous...

2573 
* or if this is the first step!

2574 
* or if we did random steps!

2575 
*/

2576  
2577 
if ( (count == 0)  (s_try>epot < s_min>epot) ) 
2578 
{ 
2579 
steps_accepted++; 
2580  
2581 
/* Test whether the convergence criterion is met... */

2582 
bDone = (s_try>fmax < inputrec>em_tol); 
2583  
2584 
/* Copy the arrays for force, positions and energy */

2585 
/* The 'Min' array always holds the coords and forces of the minimal

2586 
sampled energy */

2587 
swap_em_state(&s_min, &s_try); 
2588 
if (count > 0) 
2589 
{ 
2590 
ustep *= 1.2; 
2591 
} 
2592  
2593 
/* Write to trn, if necessary */

2594 
do_x = do_per_step(steps_accepted, inputrec>nstxout); 
2595 
do_f = do_per_step(steps_accepted, inputrec>nstfout); 
2596 
write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,

2597 
top_global, inputrec, count, 
2598 
s_min, state_global, observablesHistory); 
2599 
} 
2600 
else

2601 
{ 
2602 
/* If energy is not smaller make the step smaller... */

2603 
ustep *= 0.5; 
2604  
2605 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count != cr>dd>ddp_count)

2606 
{ 
2607 
/* Reload the old state */

2608 
em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, 
2609 
s_min, &top, mdAtoms, fr, vsite, constr, 
2610 
nrnb, wcycle); 
2611 
} 
2612 
} 
2613  
2614 
/* Determine new step */

2615 
stepsize = ustep/s_min>fmax; 
2616  
2617 
/* Check if stepsize is too small, with 1 nm as a characteristic length */

2618 
#if GMX_DOUBLE

2619 
if (count == nsteps  ustep < 1e12) 
2620 
#else

2621 
if (count == nsteps  ustep < 1e6) 
2622 
#endif

2623 
{ 
2624 
if (MASTER(cr))

2625 
{ 
2626 
warn_step(fplog, inputrec>em_tol, s_min>fmax, 
2627 
count == nsteps, constr != nullptr);

2628 
} 
2629 
bAbort = TRUE; 
2630 
} 
2631  
2632 
/* Send IMD energies and positions, if bIMD is TRUE. */

2633 
if (do_IMD(inputrec>bIMD, count, cr, TRUE, state_global>box,

2634 
MASTER(cr) ? state_global>x.rvec_array() : nullptr,

2635 
inputrec, 0, wcycle) &&

2636 
MASTER(cr)) 
2637 
{ 
2638 
IMD_send_positions(inputrec>imd); 
2639 
} 
2640  
2641 
count++; 
2642 
} /* End of the loop */

2643  
2644 
/* IMD cleanup, if bIMD is TRUE. */

2645 
IMD_finalize(inputrec>bIMD, inputrec>imd); 
2646  
2647 
/* Print some data... */

2648 
if (MASTER(cr))

2649 
{ 
2650 
fprintf(stderr, "\nwriting lowest energy coordinates.\n");

2651 
} 
2652 
write_em_traj(fplog, cr, outf, TRUE, inputrec>nstfout != 0, ftp2fn(efSTO, nfile, fnm),

2653 
top_global, inputrec, count, 
2654 
s_min, state_global, observablesHistory); 
2655  
2656 
if (MASTER(cr))

2657 
{ 
2658 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
2659  
2660 
print_converged(stderr, SD, inputrec>em_tol, count, bDone, nsteps, 
2661 
s_min, sqrtNumAtoms); 
2662 
print_converged(fplog, SD, inputrec>em_tol, count, bDone, nsteps, 
2663 
s_min, sqrtNumAtoms); 
2664 
} 
2665  
2666 
finish_em(cr, outf, walltime_accounting, wcycle); 
2667  
2668 
/* To print the actual number of steps we needed somewhere */

2669 
inputrec>nsteps = count; 
2670  
2671 
walltime_accounting_set_nsteps_done(walltime_accounting, count); 
2672 
} 
2673  
2674 
void

2675 
Integrator::do_nm() 
2676 
{ 
2677 
const char *NM = "Normal Mode Analysis"; 
2678 
int nnodes, node;

2679 
gmx_localtop_t top; 
2680 
gmx_enerdata_t *enerd; 
2681 
gmx_global_stat_t gstat; 
2682 
t_graph *graph; 
2683 
tensor vir, pres; 
2684 
rvec mu_tot = {0};

2685 
rvec *dfdx; 
2686 
gmx_bool bSparse; /* use sparse matrix storage format */

2687 
size_t sz; 
2688 
gmx_sparsematrix_t * sparse_matrix = nullptr;

2689 
real * full_matrix = nullptr;

2690  
2691 
/* added with respect to mdrun */

2692 
int row, col;

2693 
real der_range = 10.0*std::sqrt(GMX_REAL_EPS); 
2694 
real x_min; 
2695 
bool bIsMaster = MASTER(cr);

2696 
auto mdatoms = mdAtoms>mdatoms();

2697  
2698 
GMX_LOG(mdlog.info).asParagraph(). 
2699 
appendText("Note that activating normalmode analysis via the integrator "

2700 
".mdp option and the command gmx mdrun may "

2701 
"be available in a different form in a future version of GROMACS, "

2702 
"e.g. gmx normalmodes.");

2703  
2704 
if (constr != nullptr) 
2705 
{ 
2706 
gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");

2707 
} 
2708  
2709 
gmx_shellfc_t *shellfc; 
2710  
2711 
em_state_t state_work {}; 
2712  
2713 
/* Init em and store the local state in state_minimum */

2714 
init_em(fplog, mdlog, NM, cr, ms, inputrec, mdrunOptions, 
2715 
state_global, top_global, &state_work, &top, 
2716 
nrnb, fr, &graph, mdAtoms, &gstat, 
2717 
vsite, constr, &shellfc, 
2718 
nfile, fnm); 
2719 
gmx_mdoutf *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, inputrec, top_global, nullptr, wcycle);

2720 
snew(enerd, 1);

2721 
init_enerdata(top_global>groups.grps[egcENER].nr, inputrec>fepvals>n_lambda, enerd); 
2722  
2723 
std::vector<int> atom_index = get_atom_index(top_global);

2724 
std::vector<gmx::RVec> fneg(atom_index.size(), {0, 0, 0}); 
2725 
snew(dfdx, atom_index.size()); 
2726  
2727 
#if !GMX_DOUBLE

2728 
if (bIsMaster)

2729 
{ 
2730 
fprintf(stderr, 
2731 
"NOTE: This version of GROMACS has been compiled in single precision,\n"

2732 
" which MIGHT not be accurate enough for normal mode analysis.\n"

2733 
" GROMACS now uses sparse matrix storage, so the memory requirements\n"

2734 
" are fairly modest even if you recompile in double precision.\n\n");

2735 
} 
2736 
#endif

2737  
2738 
/* Check if we can/should use sparse storage format.

2739 
*

2740 
* Sparse format is only useful when the Hessian itself is sparse, which it

2741 
* will be when we use a cutoff.

2742 
* For small systems (n<1000) it is easier to always use full matrix format, though.

2743 
*/

2744 
if (EEL_FULL(fr>ic>eeltype)  fr>rlist == 0.0) 
2745 
{ 
2746 
GMX_LOG(mdlog.warning).appendText("Noncutoff electrostatics used, forcing full Hessian format.");

2747 
bSparse = FALSE; 
2748 
} 
2749 
else if (atom_index.size() < 1000) 
2750 
{ 
2751 
GMX_LOG(mdlog.warning).appendTextFormatted("Small system size (N=%zu), using full Hessian format.",

2752 
atom_index.size()); 
2753 
bSparse = FALSE; 
2754 
} 
2755 
else

2756 
{ 
2757 
GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format.");

2758 
bSparse = TRUE; 
2759 
} 
2760  
2761 
/* Number of dimensions, based on real atoms, that is not vsites or shell */

2762 
sz = DIM*atom_index.size(); 
2763  
2764 
fprintf(stderr, "Allocating Hessian memory...\n\n");

2765  
2766 
if (bSparse)

2767 
{ 
2768 
sparse_matrix = gmx_sparsematrix_init(sz); 
2769 
sparse_matrix>compressed_symmetric = TRUE; 
2770 
} 
2771 
else

2772 
{ 
2773 
snew(full_matrix, sz*sz); 
2774 
} 
2775  
2776 
init_nrnb(nrnb); 
2777  
2778  
2779 
/* Write start time and temperature */

2780 
print_em_start(fplog, cr, walltime_accounting, wcycle, NM); 
2781  
2782 
/* fudge nr of steps to nr of atoms */

2783 
inputrec>nsteps = atom_index.size()*2;

2784  
2785 
if (bIsMaster)

2786 
{ 
2787 
fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n", 
2788 
*(top_global>name), inputrec>nsteps); 
2789 
} 
2790  
2791 
nnodes = cr>nnodes; 
2792  
2793 
/* Make evaluate_energy do a single node force calculation */

2794 
cr>nnodes = 1;

2795 
EnergyEvaluator energyEvaluator { 
2796 
fplog, mdlog, cr, ms, 
2797 
top_global, &top, 
2798 
inputrec, nrnb, wcycle, gstat, 
2799 
vsite, constr, fcd, graph, 
2800 
mdAtoms, fr, ppForceWorkload, enerd 
2801 
}; 
2802 
energyEvaluator.run(&state_work, mu_tot, vir, pres, 1, TRUE);

2803 
cr>nnodes = nnodes; 
2804  
2805 
/* if forces are not small, warn user */

2806 
get_state_f_norm_max(cr, &(inputrec>opts), mdatoms, &state_work); 
2807  
2808 
GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax);

2809 
if (state_work.fmax > 1.0e3) 
2810 
{ 
2811 
GMX_LOG(mdlog.warning).appendText( 
2812 
"The force is probably not small enough to "

2813 
"ensure that you are at a minimum.\n"

2814 
"Be aware that negative eigenvalues may occur\n"

2815 
"when the resulting matrix is diagonalized.");

2816 
} 
2817  
2818 
/***********************************************************

2819 
*

2820 
* Loop over all pairs in matrix

2821 
*

2822 
* do_force called twice. Once with positive and

2823 
* once with negative displacement

2824 
*

2825 
************************************************************/

2826  
2827 
/* Steps are divided one by one over the nodes */

2828 
bool bNS = true; 
2829 
auto state_work_x = makeArrayRef(state_work.s.x);

2830 
auto state_work_f = makeArrayRef(state_work.f);

2831 
for (unsigned int aid = cr>nodeid; aid < atom_index.size(); aid += nnodes) 
2832 
{ 
2833 
size_t atom = atom_index[aid]; 
2834 
for (size_t d = 0; d < DIM; d++) 
2835 
{ 
2836 
int64_t step = 0;

2837 
int force_flags = GMX_FORCE_STATECHANGED  GMX_FORCE_ALLFORCES;

2838 
double t = 0; 
2839  
2840 
x_min = state_work_x[atom][d]; 
2841  
2842 
for (unsigned int dx = 0; (dx < 2); dx++) 
2843 
{ 
2844 
if (dx == 0) 
2845 
{ 
2846 
state_work_x[atom][d] = x_min  der_range; 
2847 
} 
2848 
else

2849 
{ 
2850 
state_work_x[atom][d] = x_min + der_range; 
2851 
} 
2852  
2853 
/* Make evaluate_energy do a single node force calculation */

2854 
cr>nnodes = 1;

2855 
if (shellfc)

2856 
{ 
2857 
/* Now is the time to relax the shells */

2858 
relax_shell_flexcon(fplog, 
2859 
cr, 
2860 
ms, 
2861 
mdrunOptions.verbose, 
2862 
nullptr,

2863 
step, 
2864 
inputrec, 
2865 
bNS, 
2866 
force_flags, 
2867 
&top, 
2868 
constr, 
2869 
enerd, 
2870 
fcd, 
2871 
&state_work.s, 
2872 
state_work.f.arrayRefWithPadding(), 
2873 
vir, 
2874 
mdatoms, 
2875 
nrnb, 
2876 
wcycle, 
2877 
graph, 
2878 
&top_global>groups, 
2879 
shellfc, 
2880 
fr, 
2881 
ppForceWorkload, 
2882 
t, 
2883 
mu_tot, 
2884 
vsite, 
2885 
DDBalanceRegionHandler(nullptr));

2886 
bNS = false;

2887 
step++; 
2888 
} 
2889 
else

2890 
{ 
2891 
energyEvaluator.run(&state_work, mu_tot, vir, pres, aid*2+dx, FALSE);

2892 
} 
2893  
2894 
cr>nnodes = nnodes; 
2895  
2896 
if (dx == 0) 
2897 
{ 
2898 
std::copy(state_work_f.begin(), state_work_f.begin()+atom_index.size(), fneg.begin()); 
2899 
} 
2900 
} 
2901  
2902 
/* x is restored to original */

2903 
state_work_x[atom][d] = x_min; 
2904  
2905 
for (size_t j = 0; j < atom_index.size(); j++) 
2906 
{ 
2907 
for (size_t k = 0; (k < DIM); k++) 
2908 
{ 
2909 
dfdx[j][k] = 
2910 
(state_work_f[atom_index[j]][k]  fneg[j][k])/(2*der_range);

2911 
} 
2912 
} 
2913  
2914 
if (!bIsMaster)

2915 
{ 
2916 
#if GMX_MPI

2917 
#define mpi_type GMX_MPI_REAL

2918 
MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),

2919 
cr>nodeid, cr>mpi_comm_mygroup); 
2920 
#endif

2921 
} 
2922 
else

2923 
{ 
2924 
for (node = 0; (node < nnodes && aid+node < atom_index.size()); node++) 
2925 
{ 
2926 
if (node > 0) 
2927 
{ 
2928 
#if GMX_MPI

2929 
MPI_Status stat; 
2930 
MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,

2931 
cr>mpi_comm_mygroup, &stat); 
2932 
#undef mpi_type

2933 
#endif

2934 
} 
2935  
2936 
row = (aid + node)*DIM + d; 
2937  
2938 
for (size_t j = 0; j < atom_index.size(); j++) 
2939 
{ 
2940 
for (size_t k = 0; k < DIM; k++) 
2941 
{ 
2942 
col = j*DIM + k; 
2943  
2944 
if (bSparse)

2945 
{ 
2946 
if (col >= row && dfdx[j][k] != 0.0) 
2947 
{ 
2948 
gmx_sparsematrix_increment_value(sparse_matrix, 
2949 
row, col, dfdx[j][k]); 
2950 
} 
2951 
} 
2952 
else

2953 
{ 
2954 
full_matrix[row*sz+col] = dfdx[j][k]; 
2955 
} 
2956 
} 
2957 
} 
2958 
} 
2959 
} 
2960  
2961 
if (mdrunOptions.verbose && fplog)

2962 
{ 
2963 
fflush(fplog); 
2964 
} 
2965 
} 
2966 
/* write progress */

2967 
if (bIsMaster && mdrunOptions.verbose)

2968 
{ 
2969 
fprintf(stderr, "\rFinished step %d out of %td",

2970 
std::min<int>(atom+nnodes, atom_index.size()),

2971 
ssize(atom_index)); 
2972 
fflush(stderr); 
2973 
} 
2974 
} 
2975  
2976 
if (bIsMaster)

2977 
{ 
2978 
fprintf(stderr, "\n\nWriting Hessian...\n");

2979 
gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); 
2980 
} 
2981  
2982 
finish_em(cr, outf, walltime_accounting, wcycle); 
2983  
2984 
walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);

2985 
} 
2986  
2987 
} // namespace gmx
