minimize_new.cpp
1 
/*


2 
* This file is part of the GROMACS molecular simulation package.

3 
*

4 
* Copyright (c) 19912000, University of Groningen, The Netherlands.

5 
* Copyright (c) 20012004, The GROMACS development team.

6 
* Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by

7 
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,

8 
* and including many others, as listed in the AUTHORS file in the

9 
* toplevel source directory and at http://www.gromacs.org.

10 
*

11 
* GROMACS is free software; you can redistribute it and/or

12 
* modify it under the terms of the GNU Lesser General Public License

13 
* as published by the Free Software Foundation; either version 2.1

14 
* of the License, or (at your option) any later version.

15 
*

16 
* GROMACS is distributed in the hope that it will be useful,

17 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

18 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

19 
* Lesser General Public License for more details.

20 
*

21 
* You should have received a copy of the GNU Lesser General Public

22 
* License along with GROMACS; if not, see

23 
* http://www.gnu.org/licenses, or write to the Free Software Foundation,

24 
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA.

25 
*

26 
* If you want to redistribute modifications to GROMACS, please

27 
* consider that scientific software is very special. Version

28 
* control is crucial  bugs must be traceable. We will be happy to

29 
* consider code for inclusion in the official distribution, but

30 
* derived work must not be called official GROMACS. Details are found

31 
* in the README & COPYING files  if they are missing, get the

32 
* official version at http://www.gromacs.org.

33 
*

34 
* To help us fund GROMACS development, we humbly ask that you cite

35 
* the research papers on the package. Check out http://www.gromacs.org.

36 
*/

37 
/*! \internal \file

38 
*

39 
* \brief This file defines integrators for energy minimization

40 
*

41 
* \author Berk Hess <hess@kth.se>

42 
* \author Erik Lindahl <erik@kth.se>

43 
* \ingroup module_mdrun

44 
*/

45 
#include "gmxpre.h" 
46  
47 
#include "config.h" 
48  
49 
#include <cmath> 
50 
#include <cstring> 
51 
#include <ctime> 
52  
53 
#include <algorithm> 
54 
#include <vector> 
55  
56 
#include "gromacs/commandline/filenm.h" 
57 
#include "gromacs/domdec/collect.h" 
58 
#include "gromacs/domdec/domdec.h" 
59 
#include "gromacs/domdec/domdec_struct.h" 
60 
#include "gromacs/domdec/partition.h" 
61 
#include "gromacs/ewald/pme.h" 
62 
#include "gromacs/fileio/confio.h" 
63 
#include "gromacs/fileio/mtxio.h" 
64 
#include "gromacs/gmxlib/network.h" 
65 
#include "gromacs/gmxlib/nrnb.h" 
66 
#include "gromacs/imd/imd.h" 
67 
#include "gromacs/linearalgebra/sparsematrix.h" 
68 
#include "gromacs/listedforces/managethreading.h" 
69 
#include "gromacs/math/functions.h" 
70 
#include "gromacs/math/vec.h" 
71 
#include "gromacs/mdlib/constr.h" 
72 
#include "gromacs/mdlib/force.h" 
73 
#include "gromacs/mdlib/forcerec.h" 
74 
#include "gromacs/mdlib/gmx_omp_nthreads.h" 
75 
#include "gromacs/mdlib/md_support.h" 
76 
#include "gromacs/mdlib/mdatoms.h" 
77 
#include "gromacs/mdlib/mdebin.h" 
78 
#include "gromacs/mdlib/mdrun.h" 
79 
#include "gromacs/mdlib/mdsetup.h" 
80 
#include "gromacs/mdlib/ns.h" 
81 
#include "gromacs/mdlib/shellfc.h" 
82 
#include "gromacs/mdlib/sim_util.h" 
83 
#include "gromacs/mdlib/tgroup.h" 
84 
#include "gromacs/mdlib/trajectory_writing.h" 
85 
#include "gromacs/mdlib/update.h" 
86 
#include "gromacs/mdlib/vsite.h" 
87 
#include "gromacs/mdtypes/commrec.h" 
88 
#include "gromacs/mdtypes/inputrec.h" 
89 
#include "gromacs/mdtypes/md_enums.h" 
90 
#include "gromacs/mdtypes/state.h" 
91 
#include "gromacs/pbcutil/mshift.h" 
92 
#include "gromacs/pbcutil/pbc.h" 
93 
#include "gromacs/timing/wallcycle.h" 
94 
#include "gromacs/timing/walltime_accounting.h" 
95 
#include "gromacs/topology/mtop_util.h" 
96 
#include "gromacs/topology/topology.h" 
97 
#include "gromacs/utility/cstringutil.h" 
98 
#include "gromacs/utility/exceptions.h" 
99 
#include "gromacs/utility/fatalerror.h" 
100 
#include "gromacs/utility/logger.h" 
101 
#include "gromacs/utility/smalloc.h" 
102  
103 
#include "integrator.h" 
104  
105 
//! Utility structure for manipulating states during EM

106 
typedef struct { 
107 
//! Copy of the global state

108 
t_state s; 
109 
//! Force array

110 
PaddedVector<gmx::RVec> f; 
111 
//! Potential energy

112 
real epot; 
113 
//! Norm of the force

114 
real fnorm; 
115 
//! Maximum force

116 
real fmax; 
117 
//! Direction

118 
int a_fmax;

119 
} em_state_t; 
120  
121 
//! Print the EM starting conditions

122 
static void print_em_start(FILE *fplog, 
123 
const t_commrec *cr,

124 
gmx_walltime_accounting_t walltime_accounting, 
125 
gmx_wallcycle_t wcycle, 
126 
const char *name) 
127 
{ 
128 
walltime_accounting_start_time(walltime_accounting); 
129 
wallcycle_start(wcycle, ewcRUN); 
130 
print_start(fplog, cr, walltime_accounting, name); 
131 
} 
132  
133 
//! Stop counting time for EM

134 
static void em_time_end(gmx_walltime_accounting_t walltime_accounting, 
135 
gmx_wallcycle_t wcycle) 
136 
{ 
137 
wallcycle_stop(wcycle, ewcRUN); 
138  
139 
walltime_accounting_end_time(walltime_accounting); 
140 
} 
141  
142 
//! Printing a log file and console header

143 
static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) 
144 
{ 
145 
fprintf(out, "\n");

146 
fprintf(out, "%s:\n", minimizer);

147 
fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol);

148 
fprintf(out, " Number of steps = %12d\n", nsteps);

149 
} 
150  
151 
//! Print warning message

152 
static void warn_step(FILE *fp, 
153 
real ftol, 
154 
real fmax, 
155 
gmx_bool bLastStep, 
156 
gmx_bool bConstrain) 
157 
{ 
158 
constexpr bool realIsDouble = GMX_DOUBLE; 
159 
char buffer[2048]; 
160  
161 
if (!std::isfinite(fmax))

162 
{ 
163 
sprintf(buffer, 
164 
"\nEnergy minimization has stopped because the force "

165 
"on at least one atom is not finite. This usually means "

166 
"atoms are overlapping. Modify the input coordinates to "

167 
"remove atom overlap or use softcore potentials with "

168 
"the free energy code to avoid infinite forces.\n%s",

169 
!realIsDouble ? 
170 
"You could also be lucky that switching to double precision "

171 
"is sufficient to obtain finite forces.\n" :

172 
"");

173 
} 
174 
else if (bLastStep) 
175 
{ 
176 
sprintf(buffer, 
177 
"\nEnergy minimization reached the maximum number "

178 
"of steps before the forces reached the requested "

179 
"precision Fmax < %g.\n", ftol);

180 
} 
181 
else

182 
{ 
183 
sprintf(buffer, 
184 
"\nEnergy minimization has stopped, but the forces have "

185 
"not converged to the requested precision Fmax < %g (which "

186 
"may not be possible for your system). It stopped "

187 
"because the algorithm tried to make a new step whose size "

188 
"was too small, or there was no change in the energy since "

189 
"last step. Either way, we regard the minimization as "

190 
"converged to within the available machine precision, "

191 
"given your starting configuration and EM parameters.\n%s%s",

192 
ftol, 
193 
!realIsDouble ? 
194 
"\nDouble precision normally gives you higher accuracy, but "

195 
"this is often not needed for preparing to run molecular "

196 
"dynamics.\n" :

197 
"",

198 
bConstrain ? 
199 
"You might need to increase your constraint accuracy, or turn\n"

200 
"off constraints altogether (set constraints = none in mdp file)\n" :

201 
"");

202 
} 
203  
204 
fputs(wrap_lines(buffer, 78, 0, FALSE), stderr); 
205 
fputs(wrap_lines(buffer, 78, 0, FALSE), fp); 
206 
} 
207  
208 
//! Print message about convergence of the EM

209 
static void print_converged(FILE *fp, const char *alg, real ftol, 
210 
int64_t count, gmx_bool bDone, int64_t nsteps, 
211 
const em_state_t *ems, double sqrtNumAtoms) 
212 
{ 
213 
char buf[STEPSTRSIZE];

214  
215 
if (bDone)

216 
{ 
217 
fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",

218 
alg, ftol, gmx_step_str(count, buf)); 
219 
} 
220 
else if (count < nsteps) 
221 
{ 
222 
fprintf(fp, "\n%s converged to machine precision in %s steps,\n"

223 
"but did not reach the requested Fmax < %g.\n",

224 
alg, gmx_step_str(count, buf), ftol); 
225 
} 
226 
else

227 
{ 
228 
fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",

229 
alg, ftol, gmx_step_str(count, buf)); 
230 
} 
231  
232 
#if GMX_DOUBLE

233 
fprintf(fp, "Potential Energy = %21.14e\n", ems>epot);

234 
fprintf(fp, "Maximum force = %21.14e on atom %d\n", ems>fmax, ems>a_fmax + 1); 
235 
fprintf(fp, "Norm of force = %21.14e\n", ems>fnorm/sqrtNumAtoms);

236 
#else

237 
fprintf(fp, "Potential Energy = %14.7e\n", ems>epot);

238 
fprintf(fp, "Maximum force = %14.7e on atom %d\n", ems>fmax, ems>a_fmax + 1); 
239 
fprintf(fp, "Norm of force = %14.7e\n", ems>fnorm/sqrtNumAtoms);

240 
#endif

241 
} 
242  
243 
//! Compute the norm and max of the force array in parallel

244 
static void get_f_norm_max(const t_commrec *cr, 
245 
t_grpopts *opts, t_mdatoms *mdatoms, const rvec *f,

246 
real *fnorm, real *fmax, int *a_fmax)

247 
{ 
248 
double fnorm2, *sum;

249 
real fmax2, fam; 
250 
int la_max, a_max, start, end, i, m, gf;

251  
252 
/* This routine finds the largest force and returns it.

253 
* On parallel machines the global max is taken.

254 
*/

255 
fnorm2 = 0;

256 
fmax2 = 0;

257 
la_max = 1;

258 
start = 0;

259 
end = mdatoms>homenr; 
260 
if (mdatoms>cFREEZE)

261 
{ 
262 
for (i = start; i < end; i++)

263 
{ 
264 
gf = mdatoms>cFREEZE[i]; 
265 
fam = 0;

266 
for (m = 0; m < DIM; m++) 
267 
{ 
268 
if (!opts>nFreeze[gf][m])

269 
{ 
270 
fam += gmx::square(f[i][m]); 
271 
} 
272 
} 
273 
fnorm2 += fam; 
274 
if (fam > fmax2)

275 
{ 
276 
fmax2 = fam; 
277 
la_max = i; 
278 
} 
279 
} 
280 
} 
281 
else

282 
{ 
283 
for (i = start; i < end; i++)

284 
{ 
285 
fam = norm2(f[i]); 
286 
fnorm2 += fam; 
287 
if (fam > fmax2)

288 
{ 
289 
fmax2 = fam; 
290 
la_max = i; 
291 
} 
292 
} 
293 
} 
294  
295 
if (la_max >= 0 && DOMAINDECOMP(cr)) 
296 
{ 
297 
a_max = cr>dd>globalAtomIndices[la_max]; 
298 
} 
299 
else

300 
{ 
301 
a_max = la_max; 
302 
} 
303 
if (PAR(cr))

304 
{ 
305 
snew(sum, 2*cr>nnodes+1); 
306 
sum[2*cr>nodeid] = fmax2;

307 
sum[2*cr>nodeid+1] = a_max; 
308 
sum[2*cr>nnodes] = fnorm2;

309 
gmx_sumd(2*cr>nnodes+1, sum, cr); 
310 
fnorm2 = sum[2*cr>nnodes];

311 
/* Determine the global maximum */

312 
for (i = 0; i < cr>nnodes; i++) 
313 
{ 
314 
if (sum[2*i] > fmax2) 
315 
{ 
316 
fmax2 = sum[2*i];

317 
a_max = gmx::roundToInt(sum[2*i+1]); 
318 
} 
319 
} 
320 
sfree(sum); 
321 
} 
322  
323 
if (fnorm)

324 
{ 
325 
*fnorm = sqrt(fnorm2); 
326 
} 
327 
if (fmax)

328 
{ 
329 
*fmax = sqrt(fmax2); 
330 
} 
331 
if (a_fmax)

332 
{ 
333 
*a_fmax = a_max; 
334 
} 
335 
} 
336  
337 
//! Compute the norm of the force

338 
static void get_state_f_norm_max(const t_commrec *cr, 
339 
t_grpopts *opts, t_mdatoms *mdatoms, 
340 
em_state_t *ems) 
341 
{ 
342 
get_f_norm_max(cr, opts, mdatoms, ems>f.rvec_array(), 
343 
&ems>fnorm, &ems>fmax, &ems>a_fmax); 
344 
} 
345  
346 
//! Initialize the energy minimization

347 
static void init_em(FILE *fplog, 
348 
const gmx::MDLogger &mdlog,

349 
const char *title, 
350 
const t_commrec *cr,

351 
const gmx_multisim_t *ms,

352 
gmx::IMDOutputProvider *outputProvider, 
353 
t_inputrec *ir, 
354 
const MdrunOptions &mdrunOptions,

355 
t_state *state_global, gmx_mtop_t *top_global, 
356 
em_state_t *ems, gmx_localtop_t **top, 
357 
t_nrnb *nrnb, rvec mu_tot, 
358 
t_forcerec *fr, gmx_enerdata_t **enerd, 
359 
t_graph **graph, gmx::MDAtoms *mdAtoms, gmx_global_stat_t *gstat, 
360 
gmx_vsite_t *vsite, gmx::Constraints *constr, gmx_shellfc_t **shellfc, 
361 
int nfile, const t_filenm fnm[], 
362 
gmx_mdoutf_t *outf, t_mdebin **mdebin, 
363 
gmx_wallcycle_t wcycle) 
364 
{ 
365 
real dvdl_constr; 
366  
367 
if (fplog)

368 
{ 
369 
fprintf(fplog, "Initiating %s\n", title);

370 
} 
371  
372 
if (MASTER(cr))

373 
{ 
374 
state_global>ngtc = 0;

375  
376 
/* Initialize lambda variables */

377 
initialize_lambdas(fplog, ir, &(state_global>fep_state), state_global>lambda, nullptr);

378 
} 
379  
380 
init_nrnb(nrnb); 
381  
382 
/* Interactive molecular dynamics */

383 
init_IMD(ir, cr, ms, top_global, fplog, 1,

384 
MASTER(cr) ? state_global>x.rvec_array() : nullptr,

385 
nfile, fnm, nullptr, mdrunOptions);

386  
387 
if (ir>eI == eiNM)

388 
{ 
389 
GMX_ASSERT(shellfc != nullptr, "With NM we always support shells"); 
390  
391 
*shellfc = init_shell_flexcon(stdout, 
392 
top_global, 
393 
constr ? constr>numFlexibleConstraints() : 0,

394 
ir>nstcalcenergy, 
395 
DOMAINDECOMP(cr)); 
396 
} 
397 
else

398 
{ 
399 
GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir>eI), "This else currently only handles energy minimizers, consider if your algorithm needs shell/flexibleconstraint support");

400  
401 
/* With energy minimization, shells and flexible constraints are

402 
* automatically minimized when treated like normal DOFS.

403 
*/

404 
if (shellfc != nullptr) 
405 
{ 
406 
*shellfc = nullptr;

407 
} 
408 
} 
409  
410 
auto mdatoms = mdAtoms>mdatoms();

411 
if (DOMAINDECOMP(cr))

412 
{ 
413 
*top = dd_init_local_top(top_global); 
414  
415 
dd_init_local_state(cr>dd, state_global, &ems>s); 
416  
417 
/* Distribute the charge groups over the nodes from the master node */

418 
dd_partition_system(fplog, mdlog, ir>init_step, cr, TRUE, 1,

419 
state_global, top_global, ir, 
420 
&ems>s, &ems>f, mdAtoms, *top, 
421 
fr, vsite, constr, 
422 
nrnb, nullptr, FALSE);

423 
dd_store_state(cr>dd, &ems>s); 
424  
425 
*graph = nullptr;

426 
} 
427 
else

428 
{ 
429 
state_change_natoms(state_global, state_global>natoms); 
430 
/* Just copy the state */

431 
ems>s = *state_global; 
432 
state_change_natoms(&ems>s, ems>s.natoms); 
433 
ems>f.resizeWithPadding(ems>s.natoms); 
434  
435 
snew(*top, 1);

436 
mdAlgorithmsSetupAtomData(cr, ir, top_global, *top, fr, 
437 
graph, mdAtoms, 
438 
constr, vsite, shellfc ? *shellfc : nullptr);

439  
440 
if (vsite)

441 
{ 
442 
set_vsite_top(vsite, *top, mdatoms); 
443 
} 
444 
} 
445  
446 
update_mdatoms(mdAtoms>mdatoms(), ems>s.lambda[efptMASS]); 
447  
448 
if (constr)

449 
{ 
450 
// TODO how should this crossmodule support dependency be managed?

451 
if (ir>eConstrAlg == econtSHAKE &&

452 
gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)

453 
{ 
454 
gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",

455 
econstr_names[econtSHAKE], econstr_names[econtLINCS]); 
456 
} 
457  
458 
if (!ir>bContinuation)

459 
{ 
460 
/* Constrain the starting coordinates */

461 
dvdl_constr = 0;

462 
constr>apply(TRUE, TRUE, 
463 
1, 0, 1.0, 
464 
ems>s.x.rvec_array(), 
465 
ems>s.x.rvec_array(), 
466 
nullptr,

467 
ems>s.box, 
468 
ems>s.lambda[efptFEP], &dvdl_constr, 
469 
nullptr, nullptr, gmx::ConstraintVariable::Positions); 
470 
} 
471 
} 
472  
473 
if (PAR(cr))

474 
{ 
475 
*gstat = global_stat_init(ir); 
476 
} 
477 
else

478 
{ 
479 
*gstat = nullptr;

480 
} 
481  
482 
*outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, nullptr, wcycle);

483  
484 
snew(*enerd, 1);

485 
init_enerdata(top_global>groups.grps[egcENER].nr, ir>fepvals>n_lambda, 
486 
*enerd); 
487  
488 
if (mdebin != nullptr) 
489 
{ 
490 
/* Init bin for energy stuff */

491 
*mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, nullptr);

492 
} 
493  
494 
clear_rvec(mu_tot); 
495 
calc_shifts(ems>s.box, fr>shift_vec); 
496 
} 
497  
498 
//! Finalize the minimization

499 
static void finish_em(const t_commrec *cr, gmx_mdoutf_t outf, 
500 
gmx_walltime_accounting_t walltime_accounting, 
501 
gmx_wallcycle_t wcycle) 
502 
{ 
503 
if (!thisRankHasDuty(cr, DUTY_PME))

504 
{ 
505 
/* Tell the PME only node to finish */

506 
gmx_pme_send_finish(cr); 
507 
} 
508  
509 
done_mdoutf(outf); 
510  
511 
em_time_end(walltime_accounting, wcycle); 
512 
} 
513  
514 
//! Swap two different EM states during minimization

515 
static void swap_em_state(em_state_t **ems1, em_state_t **ems2) 
516 
{ 
517 
em_state_t *tmp; 
518  
519 
tmp = *ems1; 
520 
*ems1 = *ems2; 
521 
*ems2 = tmp; 
522 
} 
523  
524 
//! Save the EM trajectory

525 
static void write_em_traj(FILE *fplog, const t_commrec *cr, 
526 
gmx_mdoutf_t outf, 
527 
gmx_bool bX, gmx_bool bF, const char *confout, 
528 
gmx_mtop_t *top_global, 
529 
t_inputrec *ir, int64_t step, 
530 
em_state_t *state, 
531 
t_state *state_global, 
532 
ObservablesHistory *observablesHistory) 
533 
{ 
534 
int mdof_flags = 0; 
535  
536 
if (bX)

537 
{ 
538 
mdof_flags = MDOF_X; 
539 
} 
540 
if (bF)

541 
{ 
542 
mdof_flags = MDOF_F; 
543 
} 
544  
545 
/* If we want IMD output, set appropriate MDOF flag */

546 
if (ir>bIMD)

547 
{ 
548 
mdof_flags = MDOF_IMD; 
549 
} 
550  
551 
mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, 
552 
top_global, step, static_cast<double>(step), 
553 
&state>s, state_global, observablesHistory, 
554 
state>f); 
555  
556 
if (confout != nullptr) 
557 
{ 
558 
if (DOMAINDECOMP(cr))

559 
{ 
560 
/* If bX=true, x was collected to state_global in the call above */

561 
if (!bX)

562 
{ 
563 
gmx::ArrayRef<gmx::RVec> globalXRef = MASTER(cr) ? makeArrayRef(state_global>x) : gmx::EmptyArrayRef(); 
564 
dd_collect_vec(cr>dd, &state>s, makeArrayRef(state>s.x), globalXRef); 
565 
} 
566 
} 
567 
else

568 
{ 
569 
/* Copy the local state pointer */

570 
state_global = &state>s; 
571 
} 
572  
573 
if (MASTER(cr))

574 
{ 
575 
if (ir>ePBC != epbcNONE && !ir>bPeriodicMols && DOMAINDECOMP(cr))

576 
{ 
577 
/* Make molecules whole only for confout writing */

578 
do_pbc_mtop(fplog, ir>ePBC, state>s.box, top_global, 
579 
state_global>x.rvec_array()); 
580 
} 
581  
582 
write_sto_conf_mtop(confout, 
583 
*top_global>name, top_global, 
584 
state_global>x.rvec_array(), nullptr, ir>ePBC, state>s.box);

585 
} 
586 
} 
587 
} 
588  
589 
//! \brief Do one minimization step

590 
//

591 
// \returns true when the step succeeded, false when a constraint error occurred

592 
static bool do_em_step(const t_commrec *cr, 
593 
t_inputrec *ir, t_mdatoms *md, 
594 
em_state_t *ems1, real a, const PaddedVector<gmx::RVec> *force,

595 
em_state_t *ems2, 
596 
gmx::Constraints *constr, 
597 
int64_t count) 
598  
599 
{ 
600 
t_state *s1, *s2; 
601 
int start, end;

602 
real dvdl_constr; 
603 
int nthreads gmx_unused;

604  
605 
bool validStep = true; 
606  
607 
s1 = &ems1>s; 
608 
s2 = &ems2>s; 
609  
610 
if (DOMAINDECOMP(cr) && s1>ddp_count != cr>dd>ddp_count)

611 
{ 
612 
gmx_incons("state mismatch in do_em_step");

613 
} 
614  
615 
s2>flags = s1>flags; 
616  
617 
if (s2>natoms != s1>natoms)

618 
{ 
619 
state_change_natoms(s2, s1>natoms); 
620 
ems2>f.resizeWithPadding(s2>natoms); 
621 
} 
622 
if (DOMAINDECOMP(cr) && s2>cg_gl.size() != s1>cg_gl.size())

623 
{ 
624 
s2>cg_gl.resize(s1>cg_gl.size()); 
625 
} 
626  
627 
copy_mat(s1>box, s2>box); 
628 
/* Copy free energy state */

629 
s2>lambda = s1>lambda; 
630 
copy_mat(s1>box, s2>box); 
631  
632 
start = 0;

633 
end = md>homenr; 
634  
635 
nthreads = gmx_omp_nthreads_get(emntUpdate); 
636 
#pragma omp parallel num_threads(nthreads)

637 
{ 
638 
const rvec *x1 = s1>x.rvec_array();

639 
rvec *x2 = s2>x.rvec_array(); 
640 
const rvec *f = force>rvec_array();

641  
642 
int gf = 0; 
643 
#pragma omp for schedule(static) nowait 
644 
for (int i = start; i < end; i++) 
645 
{ 
646 
try

647 
{ 
648 
if (md>cFREEZE)

649 
{ 
650 
gf = md>cFREEZE[i]; 
651 
} 
652 
for (int m = 0; m < DIM; m++) 
653 
{ 
654 
if (ir>opts.nFreeze[gf][m])

655 
{ 
656 
x2[i][m] = x1[i][m]; 
657 
} 
658 
else

659 
{ 
660 
x2[i][m] = x1[i][m] + a*f[i][m]; 
661 
} 
662 
} 
663 
} 
664 
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; 
665 
} 
666  
667 
if (s2>flags & (1<<estCGP)) 
668 
{ 
669 
/* Copy the CG p vector */

670 
const rvec *p1 = s1>cg_p.rvec_array();

671 
rvec *p2 = s2>cg_p.rvec_array(); 
672 
#pragma omp for schedule(static) nowait 
673 
for (int i = start; i < end; i++) 
674 
{ 
675 
// Trivial OpenMP block that does not throw

676 
copy_rvec(p1[i], p2[i]); 
677 
} 
678 
} 
679  
680 
if (DOMAINDECOMP(cr))

681 
{ 
682 
s2>ddp_count = s1>ddp_count; 
683  
684 
/* OpenMP does not supported unsigned loop variables */

685 
#pragma omp for schedule(static) nowait 
686 
for (int i = 0; i < static_cast<int>(s2>cg_gl.size()); i++) 
687 
{ 
688 
s2>cg_gl[i] = s1>cg_gl[i]; 
689 
} 
690 
s2>ddp_count_cg_gl = s1>ddp_count_cg_gl; 
691 
} 
692 
} 
693  
694 
if (constr)

695 
{ 
696 
dvdl_constr = 0;

697 
validStep = 
698 
constr>apply(TRUE, TRUE, 
699 
count, 0, 1.0, 
700 
s1>x.rvec_array(), s2>x.rvec_array(), 
701 
nullptr, s2>box,

702 
s2>lambda[efptBONDED], &dvdl_constr, 
703 
nullptr, nullptr, gmx::ConstraintVariable::Positions); 
704  
705 
if (cr>nnodes > 1) 
706 
{ 
707 
/* This global reduction will affect performance at high

708 
* parallelization, but we can not really avoid it.

709 
* But usually EM is not run at high parallelization.

710 
*/

711 
int reductionBuffer = static_cast<int>(!validStep); 
712 
gmx_sumi(1, &reductionBuffer, cr);

713 
validStep = (reductionBuffer == 0);

714 
} 
715  
716 
// We should move this check to the different minimizers

717 
if (!validStep && ir>eI != eiSteep)

718 
{ 
719 
gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",

720 
EI(ir>eI), EI(eiSteep), EI(ir>eI)); 
721 
} 
722 
} 
723  
724 
return validStep;

725 
} 
726  
727 
//! Prepare EM for using domain decomposition parallellization

728 
static void em_dd_partition_system(FILE *fplog, 
729 
const gmx::MDLogger &mdlog,

730 
int step, const t_commrec *cr, 
731 
gmx_mtop_t *top_global, t_inputrec *ir, 
732 
em_state_t *ems, gmx_localtop_t *top, 
733 
gmx::MDAtoms *mdAtoms, t_forcerec *fr, 
734 
gmx_vsite_t *vsite, gmx::Constraints *constr, 
735 
t_nrnb *nrnb, gmx_wallcycle_t wcycle) 
736 
{ 
737 
/* Repartition the domain decomposition */

738 
dd_partition_system(fplog, mdlog, step, cr, FALSE, 1,

739 
nullptr, top_global, ir,

740 
&ems>s, &ems>f, 
741 
mdAtoms, top, fr, vsite, constr, 
742 
nrnb, wcycle, FALSE); 
743 
dd_store_state(cr>dd, &ems>s); 
744 
} 
745  
746 
namespace

747 
{ 
748  
749 
/*! \brief Class to handle the work of setting and doing an energy evaluation.

750 
*

751 
* This class is a mere aggregate of parameters to pass to evaluate an

752 
* energy, so that future changes to names and types of them consume

753 
* less time when refactoring other code.

754 
*

755 
* Aggregate initialization is used, for which the chief risk is that

756 
* if a member is added at the end and not all initializer lists are

757 
* updated, then the member will be value initialized, which will

758 
* typically mean initialization to zero.

759 
*

760 
* We only want to construct one of these with an initializer list, so

761 
* we explicitly delete the default constructor. */

762 
class EnergyEvaluator 
763 
{ 
764 
public:

765 
//! We only intend to construct such objects with an initializer list.

766 
#if __GNUC__ > 4  (__GNUC__ == 4 && __GNUC_MINOR__ >= 9) 
767 
// Aspects of the C++11 spec changed after GCC 4.8.5, and

768 
// compilation of the initializer list construction in

769 
// runner.cpp fails in GCC 4.8.5.

770 
EnergyEvaluator() = delete;

771 
#endif

772 
/*! \brief Evaluates an energy on the state in \c ems.

773 
*

774 
* \todo In practice, the same objects mu_tot, vir, and pres

775 
* are always passed to this function, so we would rather have

776 
* them as data members. However, their Carray types are

777 
* unsuited for aggregate initialization. When the types

778 
* improve, the call signature of this method can be reduced.

779 
*/

780 
void run(em_state_t *ems, rvec mu_tot,

781 
tensor vir, tensor pres, 
782 
int64_t count, gmx_bool bFirst); 
783 
//! Handles logging (deprecated).

784 
FILE *fplog; 
785 
//! Handles logging.

786 
const gmx::MDLogger &mdlog;

787 
//! Handles communication.

788 
const t_commrec *cr;

789 
//! Coordinates multisimulations.

790 
const gmx_multisim_t *ms;

791 
//! Holds the simulation topology.

792 
gmx_mtop_t *top_global; 
793 
//! Holds the domain topology.

794 
gmx_localtop_t *top; 
795 
//! User input options.

796 
t_inputrec *inputrec; 
797 
//! Manages flop accounting.

798 
t_nrnb *nrnb; 
799 
//! Manages wall cycle accounting.

800 
gmx_wallcycle_t wcycle; 
801 
//! Coordinates global reduction.

802 
gmx_global_stat_t gstat; 
803 
//! Handles virtual sites.

804 
gmx_vsite_t *vsite; 
805 
//! Handles constraints.

806 
gmx::Constraints *constr; 
807 
//! Handles strange things.

808 
t_fcdata *fcd; 
809 
//! Molecular graph for SHAKE.

810 
t_graph *graph; 
811 
//! Peratom data for this domain.

812 
gmx::MDAtoms *mdAtoms; 
813 
//! Handles how to calculate the forces.

814 
t_forcerec *fr; 
815 
//! Schedule of forcecalculation work each step for this task.

816 
gmx::PpForceWorkload *ppForceWorkload; 
817 
//! Stores the computed energies.

818 
gmx_enerdata_t *enerd; 
819 
}; 
820  
821 
void

822 
EnergyEvaluator::run(em_state_t *ems, rvec mu_tot, 
823 
tensor vir, tensor pres, 
824 
int64_t count, gmx_bool bFirst) 
825 
{ 
826 
real t; 
827 
gmx_bool bNS; 
828 
tensor force_vir, shake_vir, ekin; 
829 
real dvdl_constr, prescorr, enercorr, dvdlcorr; 
830 
real terminate = 0;

831  
832 
/* Set the time to the initial time, the time does not change during EM */

833 
t = inputrec>init_t; 
834  
835 
if (bFirst 

836 
(DOMAINDECOMP(cr) && ems>s.ddp_count < cr>dd>ddp_count)) 
837 
{ 
838 
/* This is the first state or an old state used before the last ns */

839 
bNS = TRUE; 
840 
} 
841 
else

842 
{ 
843 
bNS = FALSE; 
844 
if (inputrec>nstlist > 0) 
845 
{ 
846 
bNS = TRUE; 
847 
} 
848 
} 
849  
850 
if (vsite)

851 
{ 
852 
construct_vsites(vsite, ems>s.x.rvec_array(), 1, nullptr, 
853 
top>idef.iparams, top>idef.il, 
854 
fr>ePBC, fr>bMolPBC, cr, ems>s.box); 
855 
} 
856  
857 
if (DOMAINDECOMP(cr) && bNS)

858 
{ 
859 
/* Repartition the domain decomposition */

860 
em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, 
861 
ems, top, mdAtoms, fr, vsite, constr, 
862 
nrnb, wcycle); 
863 
} 
864  
865 
/* Calc force & energy on new trial position */

866 
/* do_force always puts the charge groups in the box and shifts again

867 
* We do not unshift, so molecules are always whole in congrad.c

868 
*/

869 
do_force(fplog, cr, ms, inputrec, nullptr, nullptr, 
870 
count, nrnb, wcycle, top, &top_global>groups, 
871 
ems>s.box, ems>s.x.arrayRefWithPadding(), &ems>s.hist, 
872 
ems>f.arrayRefWithPadding(), force_vir, mdAtoms>mdatoms(), enerd, fcd, 
873 
ems>s.lambda, graph, fr, ppForceWorkload, vsite, mu_tot, t, nullptr,

874 
GMX_FORCE_STATECHANGED  GMX_FORCE_ALLFORCES  
875 
GMX_FORCE_VIRIAL  GMX_FORCE_ENERGY  
876 
(bNS ? GMX_FORCE_NS : 0),

877 
DOMAINDECOMP(cr) ? 
878 
DdOpenBalanceRegionBeforeForceComputation::yes : 
879 
DdOpenBalanceRegionBeforeForceComputation::no, 
880 
DOMAINDECOMP(cr) ? 
881 
DdCloseBalanceRegionAfterForceComputation::yes : 
882 
DdCloseBalanceRegionAfterForceComputation::no); 
883  
884 
/* Clear the unused shake virial and pressure */

885 
clear_mat(shake_vir); 
886 
clear_mat(pres); 
887  
888 
/* Communicate stuff when parallel */

889 
if (PAR(cr) && inputrec>eI != eiNM)

890 
{ 
891 
wallcycle_start(wcycle, ewcMoveE); 
892  
893 
global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, 
894 
inputrec, nullptr, nullptr, nullptr, 1, &terminate, 
895 
nullptr, FALSE,

896 
CGLO_ENERGY  
897 
CGLO_PRESSURE  
898 
CGLO_CONSTRAINT); 
899  
900 
wallcycle_stop(wcycle, ewcMoveE); 
901 
} 
902  
903 
/* Calculate long range corrections to pressure and energy */

904 
calc_dispcorr(inputrec, fr, ems>s.box, ems>s.lambda[efptVDW], 
905 
pres, force_vir, &prescorr, &enercorr, &dvdlcorr); 
906 
enerd>term[F_DISPCORR] = enercorr; 
907 
enerd>term[F_EPOT] += enercorr; 
908 
enerd>term[F_PRES] += prescorr; 
909 
enerd>term[F_DVDL] += dvdlcorr; 
910  
911 
ems>epot = enerd>term[F_EPOT]; 
912  
913 
if (constr)

914 
{ 
915 
/* Project out the constraint components of the force */

916 
dvdl_constr = 0;

917 
rvec *f_rvec = ems>f.rvec_array(); 
918 
constr>apply(FALSE, FALSE, 
919 
count, 0, 1.0, 
920 
ems>s.x.rvec_array(), f_rvec, f_rvec, 
921 
ems>s.box, 
922 
ems>s.lambda[efptBONDED], &dvdl_constr, 
923 
nullptr, &shake_vir, gmx::ConstraintVariable::ForceDispl);

924 
enerd>term[F_DVDL_CONSTR] += dvdl_constr; 
925 
m_add(force_vir, shake_vir, vir); 
926 
} 
927 
else

928 
{ 
929 
copy_mat(force_vir, vir); 
930 
} 
931  
932 
clear_mat(ekin); 
933 
enerd>term[F_PRES] = 
934 
calc_pres(fr>ePBC, inputrec>nwall, ems>s.box, ekin, vir, pres); 
935  
936 
sum_dhdl(enerd, ems>s.lambda, inputrec>fepvals); 
937  
938 
if (EI_ENERGY_MINIMIZATION(inputrec>eI))

939 
{ 
940 
get_state_f_norm_max(cr, &(inputrec>opts), mdAtoms>mdatoms(), ems); 
941 
} 
942 
} 
943  
944 
} // namespace

945  
946 
//! Parallel utility summing energies and forces

947 
static double reorder_partsum(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, 
948 
gmx_mtop_t *top_global, 
949 
em_state_t *s_min, em_state_t *s_b) 
950 
{ 
951 
t_block *cgs_gl; 
952 
int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;

953 
double partsum;

954 
unsigned char *grpnrFREEZE; 
955  
956 
if (debug)

957 
{ 
958 
fprintf(debug, "Doing reorder_partsum\n");

959 
} 
960  
961 
const rvec *fm = s_min>f.rvec_array();

962 
const rvec *fb = s_b>f.rvec_array();

963  
964 
cgs_gl = dd_charge_groups_global(cr>dd); 
965 
index = cgs_gl>index; 
966  
967 
/* Collect fm in a global vector fmg.

968 
* This conflicts with the spirit of domain decomposition,

969 
* but to fully optimize this a much more complicated algorithm is required.

970 
*/

971 
rvec *fmg; 
972 
snew(fmg, top_global>natoms); 
973  
974 
ncg = s_min>s.cg_gl.size(); 
975 
cg_gl = s_min>s.cg_gl.data(); 
976 
i = 0;

977 
for (c = 0; c < ncg; c++) 
978 
{ 
979 
cg = cg_gl[c]; 
980 
a0 = index[cg]; 
981 
a1 = index[cg+1];

982 
for (a = a0; a < a1; a++)

983 
{ 
984 
copy_rvec(fm[i], fmg[a]); 
985 
i++; 
986 
} 
987 
} 
988 
gmx_sum(top_global>natoms*3, fmg[0], cr); 
989  
990 
/* Now we will determine the part of the sum for the cgs in state s_b */

991 
ncg = s_b>s.cg_gl.size(); 
992 
cg_gl = s_b>s.cg_gl.data(); 
993 
partsum = 0;

994 
i = 0;

995 
gf = 0;

996 
grpnrFREEZE = top_global>groups.grpnr[egcFREEZE]; 
997 
for (c = 0; c < ncg; c++) 
998 
{ 
999 
cg = cg_gl[c]; 
1000 
a0 = index[cg]; 
1001 
a1 = index[cg+1];

1002 
for (a = a0; a < a1; a++)

1003 
{ 
1004 
if (mdatoms>cFREEZE && grpnrFREEZE)

1005 
{ 
1006 
gf = grpnrFREEZE[i]; 
1007 
} 
1008 
for (m = 0; m < DIM; m++) 
1009 
{ 
1010 
if (!opts>nFreeze[gf][m])

1011 
{ 
1012 
partsum += (fb[i][m]  fmg[a][m])*fb[i][m]; 
1013 
} 
1014 
} 
1015 
i++; 
1016 
} 
1017 
} 
1018  
1019 
sfree(fmg); 
1020  
1021 
return partsum;

1022 
} 
1023  
1024 
//! Print some stuff, like beta, whatever that means.

1025 
static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, 
1026 
gmx_mtop_t *top_global, 
1027 
em_state_t *s_min, em_state_t *s_b) 
1028 
{ 
1029 
double sum;

1030  
1031 
/* This is just the classical PolakRibiere calculation of beta;

1032 
* it looks a bit complicated since we take freeze groups into account,

1033 
* and might have to sum it in parallel runs.

1034 
*/

1035  
1036 
if (!DOMAINDECOMP(cr) 

1037 
(s_min>s.ddp_count == cr>dd>ddp_count && 
1038 
s_b>s.ddp_count == cr>dd>ddp_count)) 
1039 
{ 
1040 
const rvec *fm = s_min>f.rvec_array();

1041 
const rvec *fb = s_b>f.rvec_array();

1042 
sum = 0;

1043 
int gf = 0; 
1044 
/* This part of code can be incorrect with DD,

1045 
* since the atom ordering in s_b and s_min might differ.

1046 
*/

1047 
for (int i = 0; i < mdatoms>homenr; i++) 
1048 
{ 
1049 
if (mdatoms>cFREEZE)

1050 
{ 
1051 
gf = mdatoms>cFREEZE[i]; 
1052 
} 
1053 
for (int m = 0; m < DIM; m++) 
1054 
{ 
1055 
if (!opts>nFreeze[gf][m])

1056 
{ 
1057 
sum += (fb[i][m]  fm[i][m])*fb[i][m]; 
1058 
} 
1059 
} 
1060 
} 
1061 
} 
1062 
else

1063 
{ 
1064 
/* We need to reorder cgs while summing */

1065 
sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b); 
1066 
} 
1067 
if (PAR(cr))

1068 
{ 
1069 
gmx_sumd(1, &sum, cr);

1070 
} 
1071  
1072 
return sum/gmx::square(s_min>fnorm);

1073 
} 
1074  
1075 
namespace gmx

1076 
{ 
1077  
1078 
void

1079 
Integrator::do_cg() 
1080 
{ 
1081 
const char *CG = "PolakRibiere Conjugate Gradients"; 
1082  
1083 
gmx_localtop_t *top; 
1084 
gmx_enerdata_t *enerd; 
1085 
gmx_global_stat_t gstat; 
1086 
t_graph *graph; 
1087 
double tmp, minstep;

1088 
real stepsize; 
1089 
real a, b, c, beta = 0.0; 
1090 
real epot_repl = 0;

1091 
real pnorm; 
1092 
t_mdebin *mdebin; 
1093 
gmx_bool converged, foundlower; 
1094 
rvec mu_tot; 
1095 
gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; 
1096 
tensor vir, pres; 
1097 
int number_steps, neval = 0, nstcg = inputrec>nstcgsteep; 
1098 
gmx_mdoutf_t outf; 
1099 
int m, step, nminstep;

1100 
auto mdatoms = mdAtoms>mdatoms();

1101  
1102 
GMX_LOG(mdlog.info).asParagraph(). 
1103 
appendText("Note that activating conjugate gradient energy minimization via the "

1104 
"integrator .mdp option and the command gmx mdrun may "

1105 
"be available in a different form in a future version of GROMACS, "

1106 
"e.g. gmx minimize and an .mdp option.");

1107  
1108 
step = 0;

1109  
1110 
if (MASTER(cr))

1111 
{ 
1112 
// In CG, the state is extended with a search direction

1113 
state_global>flags = (1<<estCGP);

1114  
1115 
// Ensure the extra peratom state array gets allocated

1116 
state_change_natoms(state_global, state_global>natoms); 
1117  
1118 
// Initialize the search direction to zero

1119 
for (RVec &cg_p : state_global>cg_p)

1120 
{ 
1121 
cg_p = { 0, 0, 0 }; 
1122 
} 
1123 
} 
1124  
1125 
/* Create 4 states on the stack and extract pointers that we will swap */

1126 
em_state_t s0 {}, s1 {}, s2 {}, s3 {}; 
1127 
em_state_t *s_min = &s0; 
1128 
em_state_t *s_a = &s1; 
1129 
em_state_t *s_b = &s2; 
1130 
em_state_t *s_c = &s3; 
1131  
1132 
/* Init em and store the local state in s_min */

1133 
init_em(fplog, mdlog, CG, cr, ms, outputProvider, inputrec, mdrunOptions, 
1134 
state_global, top_global, s_min, &top, 
1135 
nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, 
1136 
vsite, constr, nullptr,

1137 
nfile, fnm, &outf, &mdebin, wcycle); 
1138  
1139 
/* Print to log file */

1140 
print_em_start(fplog, cr, walltime_accounting, wcycle, CG); 
1141  
1142 
/* Max number of steps */

1143 
number_steps = inputrec>nsteps; 
1144  
1145 
if (MASTER(cr))

1146 
{ 
1147 
sp_header(stderr, CG, inputrec>em_tol, number_steps); 
1148 
} 
1149 
if (fplog)

1150 
{ 
1151 
sp_header(fplog, CG, inputrec>em_tol, number_steps); 
1152 
} 
1153  
1154 
EnergyEvaluator energyEvaluator { 
1155 
fplog, mdlog, cr, ms, 
1156 
top_global, top, 
1157 
inputrec, nrnb, wcycle, gstat, 
1158 
vsite, constr, fcd, graph, 
1159 
mdAtoms, fr, ppForceWorkload, enerd 
1160 
}; 
1161 
/* Call the force routine and some auxiliary (neighboursearching etc.) */

1162 
/* do_force always puts the charge groups in the box and shifts again

1163 
* We do not unshift, so molecules are always whole in congrad.c

1164 
*/

1165 
energyEvaluator.run(s_min, mu_tot, vir, pres, 1, TRUE);

1166  
1167 
if (MASTER(cr))

1168 
{ 
1169 
/* Copy stuff to the energy bin for easy printing etc. */

1170 
matrix nullBox = {}; 
1171 
upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), 
1172 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
1173 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
1174  
1175 
print_ebin_header(fplog, step, step); 
1176 
print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, 
1177 
mdebin, fcd, &(top_global>groups), &(inputrec>opts), nullptr);

1178 
} 
1179  
1180 
/* Estimate/guess the initial stepsize */

1181 
stepsize = inputrec>em_stepsize/s_min>fnorm; 
1182  
1183 
if (MASTER(cr))

1184 
{ 
1185 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
1186 
fprintf(stderr, " Fmax = %12.5e on atom %d\n",

1187 
s_min>fmax, s_min>a_fmax+1);

1188 
fprintf(stderr, " FNorm = %12.5e\n",

1189 
s_min>fnorm/sqrtNumAtoms); 
1190 
fprintf(stderr, "\n");

1191 
/* and copy to the log file too... */

1192 
fprintf(fplog, " Fmax = %12.5e on atom %d\n",

1193 
s_min>fmax, s_min>a_fmax+1);

1194 
fprintf(fplog, " FNorm = %12.5e\n",

1195 
s_min>fnorm/sqrtNumAtoms); 
1196 
fprintf(fplog, "\n");

1197 
} 
1198 
/* Start the loop over CG steps.

1199 
* Each successful step is counted, and we continue until

1200 
* we either converge or reach the max number of steps.

1201 
*/

1202 
converged = FALSE; 
1203 
for (step = 0; (number_steps < 0  step <= number_steps) && !converged; step++) 
1204 
{ 
1205  
1206 
/* start taking steps in a new direction

1207 
* First time we enter the routine, beta=0, and the direction is

1208 
* simply the negative gradient.

1209 
*/

1210  
1211 
/* Calculate the new direction in p, and the gradient in this direction, gpa */

1212 
rvec *pm = s_min>s.cg_p.rvec_array(); 
1213 
const rvec *sfm = s_min>f.rvec_array();

1214 
double gpa = 0; 
1215 
int gf = 0; 
1216 
for (int i = 0; i < mdatoms>homenr; i++) 
1217 
{ 
1218 
if (mdatoms>cFREEZE)

1219 
{ 
1220 
gf = mdatoms>cFREEZE[i]; 
1221 
} 
1222 
for (m = 0; m < DIM; m++) 
1223 
{ 
1224 
if (!inputrec>opts.nFreeze[gf][m])

1225 
{ 
1226 
pm[i][m] = sfm[i][m] + beta*pm[i][m]; 
1227 
gpa = pm[i][m]*sfm[i][m]; 
1228 
/* f is negative gradient, thus the sign */

1229 
} 
1230 
else

1231 
{ 
1232 
pm[i][m] = 0;

1233 
} 
1234 
} 
1235 
} 
1236  
1237 
/* Sum the gradient along the line across CPUs */

1238 
if (PAR(cr))

1239 
{ 
1240 
gmx_sumd(1, &gpa, cr);

1241 
} 
1242  
1243 
/* Calculate the norm of the search vector */

1244 
get_f_norm_max(cr, &(inputrec>opts), mdatoms, pm, &pnorm, nullptr, nullptr); 
1245  
1246 
/* Just in case stepsize reaches zero due to numerical precision... */

1247 
if (stepsize <= 0) 
1248 
{ 
1249 
stepsize = inputrec>em_stepsize/pnorm; 
1250 
} 
1251  
1252 
/*

1253 
* Double check the value of the derivative in the search direction.

1254 
* If it is positive it must be due to the old information in the

1255 
* CG formula, so just remove that and start over with beta=0.

1256 
* This corresponds to a steepest descent step.

1257 
*/

1258 
if (gpa > 0) 
1259 
{ 
1260 
beta = 0;

1261 
step; /* Don't count this step since we are restarting */

1262 
continue; /* Go back to the beginning of the big forloop */ 
1263 
} 
1264  
1265 
/* Calculate minimum allowed stepsize, before the average (norm)

1266 
* relative change in coordinate is smaller than precision

1267 
*/

1268 
minstep = 0;

1269 
auto s_min_x = makeArrayRef(s_min>s.x);

1270 
for (int i = 0; i < mdatoms>homenr; i++) 
1271 
{ 
1272 
for (m = 0; m < DIM; m++) 
1273 
{ 
1274 
tmp = fabs(s_min_x[i][m]); 
1275 
if (tmp < 1.0) 
1276 
{ 
1277 
tmp = 1.0; 
1278 
} 
1279 
tmp = pm[i][m]/tmp; 
1280 
minstep += tmp*tmp; 
1281 
} 
1282 
} 
1283 
/* Add up from all CPUs */

1284 
if (PAR(cr))

1285 
{ 
1286 
gmx_sumd(1, &minstep, cr);

1287 
} 
1288  
1289 
minstep = GMX_REAL_EPS/sqrt(minstep/(3*top_global>natoms));

1290  
1291 
if (stepsize < minstep)

1292 
{ 
1293 
converged = TRUE; 
1294 
break;

1295 
} 
1296  
1297 
/* Write coordinates if necessary */

1298 
do_x = do_per_step(step, inputrec>nstxout); 
1299 
do_f = do_per_step(step, inputrec>nstfout); 
1300  
1301 
write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,

1302 
top_global, inputrec, step, 
1303 
s_min, state_global, observablesHistory); 
1304  
1305 
/* Take a step downhill.

1306 
* In theory, we should minimize the function along this direction.

1307 
* That is quite possible, but it turns out to take 510 function evaluations

1308 
* for each line. However, we dont really need to find the exact minimum 

1309 
* it is much better to start a new CG step in a modified direction as soon

1310 
* as we are close to it. This will save a lot of energy evaluations.

1311 
*

1312 
* In practice, we just try to take a single step.

1313 
* If it worked (i.e. lowered the energy), we increase the stepsize but

1314 
* the continue straight to the next CG step without trying to find any minimum.

1315 
* If it didn't work (higher energy), there must be a minimum somewhere between

1316 
* the old position and the new one.

1317 
*

1318 
* Due to the finite numerical accuracy, it turns out that it is a good idea

1319 
* to even accept a SMALL increase in energy, if the derivative is still downhill.

1320 
* This leads to lower final energies in the tests I've done. / Erik

1321 
*/

1322 
s_a>epot = s_min>epot; 
1323 
a = 0.0; 
1324 
c = a + stepsize; /* reference position along line is zero */

1325  
1326 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count < cr>dd>ddp_count)

1327 
{ 
1328 
em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, 
1329 
s_min, top, mdAtoms, fr, vsite, constr, 
1330 
nrnb, wcycle); 
1331 
} 
1332  
1333 
/* Take a trial step (new coords in s_c) */

1334 
do_em_step(cr, inputrec, mdatoms, s_min, c, &s_min>s.cg_p, s_c, 
1335 
constr, 1);

1336  
1337 
neval++; 
1338 
/* Calculate energy for the trial step */

1339 
energyEvaluator.run(s_c, mu_tot, vir, pres, 1, FALSE);

1340  
1341 
/* Calc derivative along line */

1342 
const rvec *pc = s_c>s.cg_p.rvec_array();

1343 
const rvec *sfc = s_c>f.rvec_array();

1344 
double gpc = 0; 
1345 
for (int i = 0; i < mdatoms>homenr; i++) 
1346 
{ 
1347 
for (m = 0; m < DIM; m++) 
1348 
{ 
1349 
gpc = pc[i][m]*sfc[i][m]; /* f is negative gradient, thus the sign */

1350 
} 
1351 
} 
1352 
/* Sum the gradient along the line across CPUs */

1353 
if (PAR(cr))

1354 
{ 
1355 
gmx_sumd(1, &gpc, cr);

1356 
} 
1357  
1358 
/* This is the max amount of increase in energy we tolerate */

1359 
tmp = std::sqrt(GMX_REAL_EPS)*fabs(s_a>epot); 
1360  
1361 
/* Accept the step if the energy is lower, or if it is not significantly higher

1362 
* and the line derivative is still negative.

1363 
*/

1364 
if (s_c>epot < s_a>epot  (gpc < 0 && s_c>epot < (s_a>epot + tmp))) 
1365 
{ 
1366 
foundlower = TRUE; 
1367 
/* Great, we found a better energy. Increase step for next iteration

1368 
* if we are still going down, decrease it otherwise

1369 
*/

1370 
if (gpc < 0) 
1371 
{ 
1372 
stepsize *= 1.618034; /* The golden section */ 
1373 
} 
1374 
else

1375 
{ 
1376 
stepsize *= 0.618034; /* 1/golden section */ 
1377 
} 
1378 
} 
1379 
else

1380 
{ 
1381 
/* New energy is the same or higher. We will have to do some work

1382 
* to find a smaller value in the interval. Take smaller step next time!

1383 
*/

1384 
foundlower = FALSE; 
1385 
stepsize *= 0.618034; 
1386 
} 
1387  
1388  
1389  
1390  
1391 
/* OK, if we didn't find a lower value we will have to locate one now  there must

1392 
* be one in the interval [a=0,c].

1393 
* The same thing is valid here, though: Don't spend dozens of iterations to find

1394 
* the line minimum. We try to interpolate based on the derivative at the endpoints,

1395 
* and only continue until we find a lower value. In most cases this means 12 iterations.

1396 
*

1397 
* I also have a safeguard for potentially really pathological functions so we never

1398 
* take more than 20 steps before we give up ...

1399 
*

1400 
* If we already found a lower value we just skip this step and continue to the update.

1401 
*/

1402 
double gpb;

1403 
if (!foundlower)

1404 
{ 
1405 
nminstep = 0;

1406  
1407 
do

1408 
{ 
1409 
/* Select a new trial point.

1410 
* If the derivatives at points a & c have different sign we interpolate to zero,

1411 
* otherwise just do a bisection.

1412 
*/

1413 
if (gpa < 0 && gpc > 0) 
1414 
{ 
1415 
b = a + gpa*(ac)/(gpcgpa); 
1416 
} 
1417 
else

1418 
{ 
1419 
b = 0.5*(a+c); 
1420 
} 
1421  
1422 
/* safeguard if interpolation close to machine accuracy causes errors:

1423 
* never go outside the interval

1424 
*/

1425 
if (b <= a  b >= c)

1426 
{ 
1427 
b = 0.5*(a+c); 
1428 
} 
1429  
1430 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count != cr>dd>ddp_count)

1431 
{ 
1432 
/* Reload the old state */

1433 
em_dd_partition_system(fplog, mdlog, 1, cr, top_global, inputrec,

1434 
s_min, top, mdAtoms, fr, vsite, constr, 
1435 
nrnb, wcycle); 
1436 
} 
1437  
1438 
/* Take a trial step to this new point  new coords in s_b */

1439 
do_em_step(cr, inputrec, mdatoms, s_min, b, &s_min>s.cg_p, s_b, 
1440 
constr, 1);

1441  
1442 
neval++; 
1443 
/* Calculate energy for the trial step */

1444 
energyEvaluator.run(s_b, mu_tot, vir, pres, 1, FALSE);

1445  
1446 
/* p does not change within a step, but since the domain decomposition

1447 
* might change, we have to use cg_p of s_b here.

1448 
*/

1449 
const rvec *pb = s_b>s.cg_p.rvec_array();

1450 
const rvec *sfb = s_b>f.rvec_array();

1451 
gpb = 0;

1452 
for (int i = 0; i < mdatoms>homenr; i++) 
1453 
{ 
1454 
for (m = 0; m < DIM; m++) 
1455 
{ 
1456 
gpb = pb[i][m]*sfb[i][m]; /* f is negative gradient, thus the sign */

1457 
} 
1458 
} 
1459 
/* Sum the gradient along the line across CPUs */

1460 
if (PAR(cr))

1461 
{ 
1462 
gmx_sumd(1, &gpb, cr);

1463 
} 
1464  
1465 
if (debug)

1466 
{ 
1467 
fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",

1468 
s_a>epot, s_b>epot, s_c>epot, gpb); 
1469 
} 
1470  
1471 
epot_repl = s_b>epot; 
1472  
1473 
/* Keep one of the intervals based on the value of the derivative at the new point */

1474 
if (gpb > 0) 
1475 
{ 
1476 
/* Replace c endpoint with b */

1477 
swap_em_state(&s_b, &s_c); 
1478 
c = b; 
1479 
gpc = gpb; 
1480 
} 
1481 
else

1482 
{ 
1483 
/* Replace a endpoint with b */

1484 
swap_em_state(&s_b, &s_a); 
1485 
a = b; 
1486 
gpa = gpb; 
1487 
} 
1488  
1489 
/*

1490 
* Stop search as soon as we find a value smaller than the endpoints.

1491 
* Never run more than 20 steps, no matter what.

1492 
*/

1493 
nminstep++; 
1494 
} 
1495 
while ((epot_repl > s_a>epot  epot_repl > s_c>epot) &&

1496 
(nminstep < 20));

1497  
1498 
if (std::fabs(epot_repl  s_min>epot) < fabs(s_min>epot)*GMX_REAL_EPS 

1499 
nminstep >= 20)

1500 
{ 
1501 
/* OK. We couldn't find a significantly lower energy.

1502 
* If beta==0 this was steepest descent, and then we give up.

1503 
* If not, set beta=0 and restart with steepest descent before quitting.

1504 
*/

1505 
if (beta == 0.0) 
1506 
{ 
1507 
/* Converged */

1508 
converged = TRUE; 
1509 
break;

1510 
} 
1511 
else

1512 
{ 
1513 
/* Reset memory before giving up */

1514 
beta = 0.0; 
1515 
continue;

1516 
} 
1517 
} 
1518  
1519 
/* Select min energy state of A & C, put the best in B.

1520 
*/

1521 
if (s_c>epot < s_a>epot)

1522 
{ 
1523 
if (debug)

1524 
{ 
1525 
fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",

1526 
s_c>epot, s_a>epot); 
1527 
} 
1528 
swap_em_state(&s_b, &s_c); 
1529 
gpb = gpc; 
1530 
} 
1531 
else

1532 
{ 
1533 
if (debug)

1534 
{ 
1535 
fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",

1536 
s_a>epot, s_c>epot); 
1537 
} 
1538 
swap_em_state(&s_b, &s_a); 
1539 
gpb = gpa; 
1540 
} 
1541  
1542 
} 
1543 
else

1544 
{ 
1545 
if (debug)

1546 
{ 
1547 
fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",

1548 
s_c>epot); 
1549 
} 
1550 
swap_em_state(&s_b, &s_c); 
1551 
gpb = gpc; 
1552 
} 
1553  
1554 
/* new search direction */

1555 
/* beta = 0 means forget all memory and restart with steepest descents. */

1556 
if (nstcg && ((step % nstcg) == 0)) 
1557 
{ 
1558 
beta = 0.0; 
1559 
} 
1560 
else

1561 
{ 
1562 
/* s_min>fnorm cannot be zero, because then we would have converged

1563 
* and broken out.

1564 
*/

1565  
1566 
/* PolakRibiere update.

1567 
* Change to fnorm2/fnorm2_old for FletcherReeves

1568 
*/

1569 
beta = pr_beta(cr, &inputrec>opts, mdatoms, top_global, s_min, s_b); 
1570 
} 
1571 
/* Limit beta to prevent oscillations */

1572 
if (fabs(beta) > 5.0) 
1573 
{ 
1574 
beta = 0.0; 
1575 
} 
1576  
1577  
1578 
/* update positions */

1579 
swap_em_state(&s_min, &s_b); 
1580 
gpa = gpb; 
1581  
1582 
/* Print it if necessary */

1583 
if (MASTER(cr))

1584 
{ 
1585 
if (mdrunOptions.verbose)

1586 
{ 
1587 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
1588 
fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",

1589 
step, s_min>epot, s_min>fnorm/sqrtNumAtoms, 
1590 
s_min>fmax, s_min>a_fmax+1);

1591 
fflush(stderr); 
1592 
} 
1593 
/* Store the new (lower) energies */

1594 
matrix nullBox = {}; 
1595 
upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), 
1596 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
1597 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
1598  
1599 
do_log = do_per_step(step, inputrec>nstlog); 
1600 
do_ene = do_per_step(step, inputrec>nstenergy); 
1601  
1602 
/* Prepare IMD energy record, if bIMD is TRUE. */

1603 
IMD_fill_energy_record(inputrec>bIMD, inputrec>imd, enerd, step, TRUE); 
1604  
1605 
if (do_log)

1606 
{ 
1607 
print_ebin_header(fplog, step, step); 
1608 
} 
1609 
print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, 
1610 
do_log ? fplog : nullptr, step, step, eprNORMAL,

1611 
mdebin, fcd, &(top_global>groups), &(inputrec>opts), nullptr);

1612 
} 
1613  
1614 
/* Send energies and positions to the IMD client if bIMD is TRUE. */

1615 
if (MASTER(cr) && do_IMD(inputrec>bIMD, step, cr, TRUE, state_global>box, state_global>x.rvec_array(), inputrec, 0, wcycle)) 
1616 
{ 
1617 
IMD_send_positions(inputrec>imd); 
1618 
} 
1619  
1620 
/* Stop when the maximum force lies below tolerance.

1621 
* If we have reached machine precision, converged is already set to true.

1622 
*/

1623 
converged = converged  (s_min>fmax < inputrec>em_tol); 
1624  
1625 
} /* End of the loop */

1626  
1627 
/* IMD cleanup, if bIMD is TRUE. */

1628 
IMD_finalize(inputrec>bIMD, inputrec>imd); 
1629  
1630 
if (converged)

1631 
{ 
1632 
step; /* we never took that last step in this case */

1633  
1634 
} 
1635 
if (s_min>fmax > inputrec>em_tol)

1636 
{ 
1637 
if (MASTER(cr))

1638 
{ 
1639 
warn_step(fplog, inputrec>em_tol, s_min>fmax, 
1640 
step1 == number_steps, FALSE);

1641 
} 
1642 
converged = FALSE; 
1643 
} 
1644  
1645 
if (MASTER(cr))

1646 
{ 
1647 
/* If we printed energy and/or logfile last step (which was the last step)

1648 
* we don't have to do it again, but otherwise print the final values.

1649 
*/

1650 
if (!do_log)

1651 
{ 
1652 
/* Write final value to log since we didn't do anything the last step */

1653 
print_ebin_header(fplog, step, step); 
1654 
} 
1655 
if (!do_ene  !do_log)

1656 
{ 
1657 
/* Write final energy file entries */

1658 
print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, 
1659 
!do_log ? fplog : nullptr, step, step, eprNORMAL,

1660 
mdebin, fcd, &(top_global>groups), &(inputrec>opts), nullptr);

1661 
} 
1662 
} 
1663  
1664 
/* Print some stuff... */

1665 
if (MASTER(cr))

1666 
{ 
1667 
fprintf(stderr, "\nwriting lowest energy coordinates.\n");

1668 
} 
1669  
1670 
/* IMPORTANT!

1671 
* For accurate normal mode calculation it is imperative that we

1672 
* store the last conformation into the full precision binary trajectory.

1673 
*

1674 
* However, we should only do it if we did NOT already write this step

1675 
* above (which we did if do_x or do_f was true).

1676 
*/

1677 
/* Note that with 0 < nstfout != nstxout we can end up with two frames

1678 
* in the trajectory with the same step number.

1679 
*/

1680 
do_x = !do_per_step(step, inputrec>nstxout); 
1681 
do_f = (inputrec>nstfout > 0 && !do_per_step(step, inputrec>nstfout));

1682  
1683 
write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), 
1684 
top_global, inputrec, step, 
1685 
s_min, state_global, observablesHistory); 
1686  
1687  
1688 
if (MASTER(cr))

1689 
{ 
1690 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
1691 
print_converged(stderr, CG, inputrec>em_tol, step, converged, number_steps, 
1692 
s_min, sqrtNumAtoms); 
1693 
print_converged(fplog, CG, inputrec>em_tol, step, converged, number_steps, 
1694 
s_min, sqrtNumAtoms); 
1695  
1696 
fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);

1697 
} 
1698  
1699 
finish_em(cr, outf, walltime_accounting, wcycle); 
1700  
1701 
/* To print the actual number of steps we needed somewhere */

1702 
walltime_accounting_set_nsteps_done(walltime_accounting, step); 
1703 
} 
1704  
1705  
1706 
void

1707 
Integrator::do_lbfgs() 
1708 
{ 
1709 
static const char *LBFGS = "LowMemory BFGS Minimizer"; 
1710 
em_state_t ems; 
1711 
gmx_localtop_t *top; 
1712 
gmx_enerdata_t *enerd; 
1713 
gmx_global_stat_t gstat; 
1714 
t_graph *graph; 
1715 
int ncorr, nmaxcorr, point, cp, neval, nminstep;

1716 
double stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;

1717 
real *rho, *alpha, *p, *s, **dx, **dg; 
1718 
real a, b, c, maxdelta, delta; 
1719 
real diag, Epot0; 
1720 
real dgdx, dgdg, sq, yr, beta; 
1721 
t_mdebin *mdebin; 
1722 
gmx_bool converged; 
1723 
rvec mu_tot; 
1724 
gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; 
1725 
tensor vir, pres; 
1726 
int start, end, number_steps;

1727 
gmx_mdoutf_t outf; 
1728 
int i, k, m, n, gf, step;

1729 
int mdof_flags;

1730 
auto mdatoms = mdAtoms>mdatoms();

1731  
1732 
GMX_LOG(mdlog.info).asParagraph(). 
1733 
appendText("Note that activating LBFGS energy minimization via the "

1734 
"integrator .mdp option and the command gmx mdrun may "

1735 
"be available in a different form in a future version of GROMACS, "

1736 
"e.g. gmx minimize and an .mdp option.");

1737  
1738 
if (PAR(cr))

1739 
{ 
1740 
gmx_fatal(FARGS, "LBFGS minimization only supports a single rank");

1741 
} 
1742  
1743 
if (nullptr != constr) 
1744 
{ 
1745 
gmx_fatal(FARGS, "The combination of constraints and LBFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");

1746 
} 
1747  
1748 
n = 3*state_global>natoms;

1749 
nmaxcorr = inputrec>nbfgscorr; 
1750  
1751 
snew(frozen, n); 
1752  
1753 
snew(p, n); 
1754 
snew(rho, nmaxcorr); 
1755 
snew(alpha, nmaxcorr); 
1756  
1757 
snew(dx, nmaxcorr); 
1758 
for (i = 0; i < nmaxcorr; i++) 
1759 
{ 
1760 
snew(dx[i], n); 
1761 
} 
1762  
1763 
snew(dg, nmaxcorr); 
1764 
for (i = 0; i < nmaxcorr; i++) 
1765 
{ 
1766 
snew(dg[i], n); 
1767 
} 
1768  
1769 
step = 0;

1770 
neval = 0;

1771  
1772 
/* Init em */

1773 
init_em(fplog, mdlog, LBFGS, cr, ms, outputProvider, inputrec, mdrunOptions, 
1774 
state_global, top_global, &ems, &top, 
1775 
nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, 
1776 
vsite, constr, nullptr,

1777 
nfile, fnm, &outf, &mdebin, wcycle); 
1778  
1779 
start = 0;

1780 
end = mdatoms>homenr; 
1781  
1782 
/* We need 4 working states */

1783 
em_state_t s0 {}, s1 {}, s2 {}, s3 {}; 
1784 
em_state_t *sa = &s0; 
1785 
em_state_t *sb = &s1; 
1786 
em_state_t *sc = &s2; 
1787 
em_state_t *last = &s3; 
1788 
/* Initialize by copying the state from ems (we could skip x and f here) */

1789 
*sa = ems; 
1790 
*sb = ems; 
1791 
*sc = ems; 
1792  
1793 
/* Print to log file */

1794 
print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); 
1795  
1796 
do_log = do_ene = do_x = do_f = TRUE; 
1797  
1798 
/* Max number of steps */

1799 
number_steps = inputrec>nsteps; 
1800  
1801 
/* Create a 3*natoms index to tell whether each degree of freedom is frozen */

1802 
gf = 0;

1803 
for (i = start; i < end; i++)

1804 
{ 
1805 
if (mdatoms>cFREEZE)

1806 
{ 
1807 
gf = mdatoms>cFREEZE[i]; 
1808 
} 
1809 
for (m = 0; m < DIM; m++) 
1810 
{ 
1811 
frozen[3*i+m] = (inputrec>opts.nFreeze[gf][m] != 0); 
1812 
} 
1813 
} 
1814 
if (MASTER(cr))

1815 
{ 
1816 
sp_header(stderr, LBFGS, inputrec>em_tol, number_steps); 
1817 
} 
1818 
if (fplog)

1819 
{ 
1820 
sp_header(fplog, LBFGS, inputrec>em_tol, number_steps); 
1821 
} 
1822  
1823 
if (vsite)

1824 
{ 
1825 
construct_vsites(vsite, state_global>x.rvec_array(), 1, nullptr, 
1826 
top>idef.iparams, top>idef.il, 
1827 
fr>ePBC, fr>bMolPBC, cr, state_global>box); 
1828 
} 
1829  
1830 
/* Call the force routine and some auxiliary (neighboursearching etc.) */

1831 
/* do_force always puts the charge groups in the box and shifts again

1832 
* We do not unshift, so molecules are always whole

1833 
*/

1834 
neval++; 
1835 
EnergyEvaluator energyEvaluator { 
1836 
fplog, mdlog, cr, ms, 
1837 
top_global, top, 
1838 
inputrec, nrnb, wcycle, gstat, 
1839 
vsite, constr, fcd, graph, 
1840 
mdAtoms, fr, ppForceWorkload, enerd 
1841 
}; 
1842 
energyEvaluator.run(&ems, mu_tot, vir, pres, 1, TRUE);

1843  
1844 
if (MASTER(cr))

1845 
{ 
1846 
/* Copy stuff to the energy bin for easy printing etc. */

1847 
matrix nullBox = {}; 
1848 
upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), 
1849 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
1850 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
1851  
1852 
print_ebin_header(fplog, step, step); 
1853 
print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, 
1854 
mdebin, fcd, &(top_global>groups), &(inputrec>opts), nullptr);

1855 
} 
1856  
1857 
/* Set the initial step.

1858 
* since it will be multiplied by the nonnormalized search direction

1859 
* vector (force vector the first time), we scale it by the

1860 
* norm of the force.

1861 
*/

1862  
1863 
if (MASTER(cr))

1864 
{ 
1865 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
1866 
fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);

1867 
fprintf(stderr, " Fmax = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); 
1868 
fprintf(stderr, " FNorm = %12.5e\n", ems.fnorm/sqrtNumAtoms);

1869 
fprintf(stderr, "\n");

1870 
/* and copy to the log file too... */

1871 
fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);

1872 
fprintf(fplog, " Fmax = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); 
1873 
fprintf(fplog, " FNorm = %12.5e\n", ems.fnorm/sqrtNumAtoms);

1874 
fprintf(fplog, "\n");

1875 
} 
1876  
1877 
// Point is an index to the memory of search directions, where 0 is the first one.

1878 
point = 0;

1879  
1880 
// Set initial search direction to the force (gradient), or 0 for frozen particles.

1881 
real *fInit = static_cast<real *>(ems.f.rvec_array()[0]); 
1882 
for (i = 0; i < n; i++) 
1883 
{ 
1884 
if (!frozen[i])

1885 
{ 
1886 
dx[point][i] = fInit[i]; /* Initial search direction */

1887 
} 
1888 
else

1889 
{ 
1890 
dx[point][i] = 0;

1891 
} 
1892 
} 
1893  
1894 
// Stepsize will be modified during the search, and actually it is not critical

1895 
// (the main efficiency in the algorithm comes from changing directions), but

1896 
// we still need an initial value, so estimate it as the inverse of the norm

1897 
// so we take small steps where the potential fluctuates a lot.

1898 
stepsize = 1.0/ems.fnorm; 
1899  
1900 
/* Start the loop over BFGS steps.

1901 
* Each successful step is counted, and we continue until

1902 
* we either converge or reach the max number of steps.

1903 
*/

1904  
1905 
ncorr = 0;

1906  
1907 
/* Set the gradient from the force */

1908 
converged = FALSE; 
1909 
for (step = 0; (number_steps < 0  step <= number_steps) && !converged; step++) 
1910 
{ 
1911  
1912 
/* Write coordinates if necessary */

1913 
do_x = do_per_step(step, inputrec>nstxout); 
1914 
do_f = do_per_step(step, inputrec>nstfout); 
1915  
1916 
mdof_flags = 0;

1917 
if (do_x)

1918 
{ 
1919 
mdof_flags = MDOF_X; 
1920 
} 
1921  
1922 
if (do_f)

1923 
{ 
1924 
mdof_flags = MDOF_F; 
1925 
} 
1926  
1927 
if (inputrec>bIMD)

1928 
{ 
1929 
mdof_flags = MDOF_IMD; 
1930 
} 
1931  
1932 
mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, 
1933 
top_global, step, static_cast<real>(step), &ems.s, state_global, observablesHistory, ems.f);

1934  
1935 
/* Do the linesearching in the direction dx[point][0..(n1)] */

1936  
1937 
/* make s a pointer to current search direction  point=0 first time we get here */

1938 
s = dx[point]; 
1939  
1940 
real *xx = static_cast<real *>(ems.s.x.rvec_array()[0]); 
1941 
real *ff = static_cast<real *>(ems.f.rvec_array()[0]); 
1942  
1943 
// calculate line gradient in position A

1944 
for (gpa = 0, i = 0; i < n; i++) 
1945 
{ 
1946 
gpa = s[i]*ff[i]; 
1947 
} 
1948  
1949 
/* Calculate minimum allowed stepsize along the line, before the average (norm)

1950 
* relative change in coordinate is smaller than precision

1951 
*/

1952 
for (minstep = 0, i = 0; i < n; i++) 
1953 
{ 
1954 
tmp = fabs(xx[i]); 
1955 
if (tmp < 1.0) 
1956 
{ 
1957 
tmp = 1.0; 
1958 
} 
1959 
tmp = s[i]/tmp; 
1960 
minstep += tmp*tmp; 
1961 
} 
1962 
minstep = GMX_REAL_EPS/sqrt(minstep/n); 
1963  
1964 
if (stepsize < minstep)

1965 
{ 
1966 
converged = TRUE; 
1967 
break;

1968 
} 
1969  
1970 
// Before taking any steps along the line, store the old position

1971 
*last = ems; 
1972 
real *lastx = static_cast<real *>(last>s.x.data()[0]); 
1973 
real *lastf = static_cast<real *>(last>f.data()[0]); 
1974 
Epot0 = ems.epot; 
1975  
1976 
*sa = ems; 
1977  
1978 
/* Take a step downhill.

1979 
* In theory, we should find the actual minimum of the function in this

1980 
* direction, somewhere along the line.

1981 
* That is quite possible, but it turns out to take 510 function evaluations

1982 
* for each line. However, we dont really need to find the exact minimum 

1983 
* it is much better to start a new BFGS step in a modified direction as soon

1984 
* as we are close to it. This will save a lot of energy evaluations.

1985 
*

1986 
* In practice, we just try to take a single step.

1987 
* If it worked (i.e. lowered the energy), we increase the stepsize but

1988 
* continue straight to the next BFGS step without trying to find any minimum,

1989 
* i.e. we change the search direction too. If the line was smooth, it is

1990 
* likely we are in a smooth region, and then it makes sense to take longer

1991 
* steps in the modified search direction too.

1992 
*

1993 
* If it didn't work (higher energy), there must be a minimum somewhere between

1994 
* the old position and the new one. Then we need to start by finding a lower

1995 
* value before we change search direction. Since the energy was apparently

1996 
* quite rough, we need to decrease the step size.

1997 
*

1998 
* Due to the finite numerical accuracy, it turns out that it is a good idea

1999 
* to accept a SMALL increase in energy, if the derivative is still downhill.

2000 
* This leads to lower final energies in the tests I've done. / Erik

2001 
*/

2002  
2003 
// State "A" is the first position along the line.

2004 
// reference position along line is initially zero

2005 
a = 0.0; 
2006  
2007 
// Check stepsize first. We do not allow displacements

2008 
// larger than emstep.

2009 
//

2010 
do

2011 
{ 
2012 
// Pick a new position C by adding stepsize to A.

2013 
c = a + stepsize; 
2014  
2015 
// Calculate what the largest change in any individual coordinate

2016 
// would be (translation along line * gradient along line)

2017 
maxdelta = 0;

2018 
for (i = 0; i < n; i++) 
2019 
{ 
2020 
delta = c*s[i]; 
2021 
if (delta > maxdelta)

2022 
{ 
2023 
maxdelta = delta; 
2024 
} 
2025 
} 
2026 
// If any displacement is larger than the stepsize limit, reduce the step

2027 
if (maxdelta > inputrec>em_stepsize)

2028 
{ 
2029 
stepsize *= 0.1; 
2030 
} 
2031 
} 
2032 
while (maxdelta > inputrec>em_stepsize);

2033  
2034 
// Take a trial step and move the coordinate array xc[] to position C

2035 
real *xc = static_cast<real *>(sc>s.x.rvec_array()[0]); 
2036 
for (i = 0; i < n; i++) 
2037 
{ 
2038 
xc[i] = lastx[i] + c*s[i]; 
2039 
} 
2040  
2041 
neval++; 
2042 
// Calculate energy for the trial step in position C

2043 
energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE); 
2044  
2045 
// Calc line gradient in position C

2046 
real *fc = static_cast<real *>(sc>f.rvec_array()[0]); 
2047 
for (gpc = 0, i = 0; i < n; i++) 
2048 
{ 
2049 
gpc = s[i]*fc[i]; /* f is negative gradient, thus the sign */

2050 
} 
2051 
/* Sum the gradient along the line across CPUs */

2052 
if (PAR(cr))

2053 
{ 
2054 
gmx_sumd(1, &gpc, cr);

2055 
} 
2056  
2057 
// This is the max amount of increase in energy we tolerate.

2058 
// By allowing VERY small changes (close to numerical precision) we

2059 
// frequently find even better (lower) final energies.

2060 
tmp = std::sqrt(GMX_REAL_EPS)*fabs(sa>epot); 
2061  
2062 
// Accept the step if the energy is lower in the new position C (compared to A),

2063 
// or if it is not significantly higher and the line derivative is still negative.

2064 
foundlower = sc>epot < sa>epot  (gpc < 0 && sc>epot < (sa>epot + tmp));

2065 
// If true, great, we found a better energy. We no longer try to alter the

2066 
// stepsize, but simply accept this new better position. The we select a new

2067 
// search direction instead, which will be much more efficient than continuing

2068 
// to take smaller steps along a line. Set fnorm based on the new C position,

2069 
// which will be used to update the stepsize to 1/fnorm further down.

2070  
2071 
// If false, the energy is NOT lower in point C, i.e. it will be the same

2072 
// or higher than in point A. In this case it is pointless to move to point C,

2073 
// so we will have to do more iterations along the same line to find a smaller

2074 
// value in the interval [A=0.0,C].

2075 
// Here, A is still 0.0, but that will change when we do a search in the interval

2076 
// [0.0,C] below. That search we will do by interpolation or bisection rather

2077 
// than with the stepsize, so no need to modify it. For the next search direction

2078 
// it will be reset to 1/fnorm anyway.

2079  
2080 
if (!foundlower)

2081 
{ 
2082 
// OK, if we didn't find a lower value we will have to locate one now  there must

2083 
// be one in the interval [a,c].

2084 
// The same thing is valid here, though: Don't spend dozens of iterations to find

2085 
// the line minimum. We try to interpolate based on the derivative at the endpoints,

2086 
// and only continue until we find a lower value. In most cases this means 12 iterations.

2087 
// I also have a safeguard for potentially really pathological functions so we never

2088 
// take more than 20 steps before we give up.

2089 
// If we already found a lower value we just skip this step and continue to the update.

2090 
real fnorm = 0;

2091 
nminstep = 0;

2092 
do

2093 
{ 
2094 
// Select a new trial point B in the interval [A,C].

2095 
// If the derivatives at points a & c have different sign we interpolate to zero,

2096 
// otherwise just do a bisection since there might be multiple minima/maxima

2097 
// inside the interval.

2098 
if (gpa < 0 && gpc > 0) 
2099 
{ 
2100 
b = a + gpa*(ac)/(gpcgpa); 
2101 
} 
2102 
else

2103 
{ 
2104 
b = 0.5*(a+c); 
2105 
} 
2106  
2107 
/* safeguard if interpolation close to machine accuracy causes errors:

2108 
* never go outside the interval

2109 
*/

2110 
if (b <= a  b >= c)

2111 
{ 
2112 
b = 0.5*(a+c); 
2113 
} 
2114  
2115 
// Take a trial step to point B

2116 
real *xb = static_cast<real *>(sb>s.x.rvec_array()[0]); 
2117 
for (i = 0; i < n; i++) 
2118 
{ 
2119 
xb[i] = lastx[i] + b*s[i]; 
2120 
} 
2121  
2122 
neval++; 
2123 
// Calculate energy for the trial step in point B

2124 
energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE); 
2125 
fnorm = sb>fnorm; 
2126  
2127 
// Calculate gradient in point B

2128 
real *fb = static_cast<real *>(sb>f.rvec_array()[0]); 
2129 
for (gpb = 0, i = 0; i < n; i++) 
2130 
{ 
2131 
gpb = s[i]*fb[i]; /* f is negative gradient, thus the sign */

2132  
2133 
} 
2134 
/* Sum the gradient along the line across CPUs */

2135 
if (PAR(cr))

2136 
{ 
2137 
gmx_sumd(1, &gpb, cr);

2138 
} 
2139  
2140 
// Keep one of the intervals [A,B] or [B,C] based on the value of the derivative

2141 
// at the new point B, and rename the endpoints of this new interval A and C.

2142 
if (gpb > 0) 
2143 
{ 
2144 
/* Replace c endpoint with b */

2145 
sc>epot = sb>epot; 
2146 
c = b; 
2147 
gpc = gpb; 
2148 
/* swap states b and c */

2149 
swap_em_state(&sb, &sc); 
2150 
} 
2151 
else

2152 
{ 
2153 
/* Replace a endpoint with b */

2154 
sa>epot = sb>epot; 
2155 
a = b; 
2156 
gpa = gpb; 
2157 
/* swap states a and b */

2158 
swap_em_state(&sa, &sb); 
2159 
} 
2160  
2161 
/*

2162 
* Stop search as soon as we find a value smaller than the endpoints,

2163 
* or if the tolerance is below machine precision.

2164 
* Never run more than 20 steps, no matter what.

2165 
*/

2166 
nminstep++; 
2167 
} 
2168 
while ((sb>epot > sa>epot  sb>epot > sc>epot) && (nminstep < 20)); 
2169  
2170 
if (std::fabs(sb>epot  Epot0) < GMX_REAL_EPS  nminstep >= 20) 
2171 
{ 
2172 
/* OK. We couldn't find a significantly lower energy.

2173 
* If ncorr==0 this was steepest descent, and then we give up.

2174 
* If not, reset memory to restart as steepest descent before quitting.

2175 
*/

2176 
if (ncorr == 0) 
2177 
{ 
2178 
/* Converged */

2179 
converged = TRUE; 
2180 
break;

2181 
} 
2182 
else

2183 
{ 
2184 
/* Reset memory */

2185 
ncorr = 0;

2186 
/* Search in gradient direction */

2187 
for (i = 0; i < n; i++) 
2188 
{ 
2189 
dx[point][i] = ff[i]; 
2190 
} 
2191 
/* Reset stepsize */

2192 
stepsize = 1.0/fnorm; 
2193 
continue;

2194 
} 
2195 
} 
2196  
2197 
/* Select min energy state of A & C, put the best in xx/ff/Epot

2198 
*/

2199 
if (sc>epot < sa>epot)

2200 
{ 
2201 
/* Use state C */

2202 
ems = *sc; 
2203 
step_taken = c; 
2204 
} 
2205 
else

2206 
{ 
2207 
/* Use state A */

2208 
ems = *sa; 
2209 
step_taken = a; 
2210 
} 
2211  
2212 
} 
2213 
else

2214 
{ 
2215 
/* found lower */

2216 
/* Use state C */

2217 
ems = *sc; 
2218 
step_taken = c; 
2219 
} 
2220  
2221 
/* Update the memory information, and calculate a new

2222 
* approximation of the inverse hessian

2223 
*/

2224  
2225 
/* Have new data in Epot, xx, ff */

2226 
if (ncorr < nmaxcorr)

2227 
{ 
2228 
ncorr++; 
2229 
} 
2230  
2231 
for (i = 0; i < n; i++) 
2232 
{ 
2233 
dg[point][i] = lastf[i]ff[i]; 
2234 
dx[point][i] *= step_taken; 
2235 
} 
2236  
2237 
dgdg = 0;

2238 
dgdx = 0;

2239 
for (i = 0; i < n; i++) 
2240 
{ 
2241 
dgdg += dg[point][i]*dg[point][i]; 
2242 
dgdx += dg[point][i]*dx[point][i]; 
2243 
} 
2244  
2245 
diag = dgdx/dgdg; 
2246  
2247 
rho[point] = 1.0/dgdx; 
2248 
point++; 
2249  
2250 
if (point >= nmaxcorr)

2251 
{ 
2252 
point = 0;

2253 
} 
2254  
2255 
/* Update */

2256 
for (i = 0; i < n; i++) 
2257 
{ 
2258 
p[i] = ff[i]; 
2259 
} 
2260  
2261 
cp = point; 
2262  
2263 
/* Recursive update. First go back over the memory points */

2264 
for (k = 0; k < ncorr; k++) 
2265 
{ 
2266 
cp; 
2267 
if (cp < 0) 
2268 
{ 
2269 
cp = ncorr1;

2270 
} 
2271  
2272 
sq = 0;

2273 
for (i = 0; i < n; i++) 
2274 
{ 
2275 
sq += dx[cp][i]*p[i]; 
2276 
} 
2277  
2278 
alpha[cp] = rho[cp]*sq; 
2279  
2280 
for (i = 0; i < n; i++) 
2281 
{ 
2282 
p[i] = alpha[cp]*dg[cp][i]; 
2283 
} 
2284 
} 
2285  
2286 
for (i = 0; i < n; i++) 
2287 
{ 
2288 
p[i] *= diag; 
2289 
} 
2290  
2291 
/* And then go forward again */

2292 
for (k = 0; k < ncorr; k++) 
2293 
{ 
2294 
yr = 0;

2295 
for (i = 0; i < n; i++) 
2296 
{ 
2297 
yr += p[i]*dg[cp][i]; 
2298 
} 
2299  
2300 
beta = rho[cp]*yr; 
2301 
beta = alpha[cp]beta; 
2302  
2303 
for (i = 0; i < n; i++) 
2304 
{ 
2305 
p[i] += beta*dx[cp][i]; 
2306 
} 
2307  
2308 
cp++; 
2309 
if (cp >= ncorr)

2310 
{ 
2311 
cp = 0;

2312 
} 
2313 
} 
2314  
2315 
for (i = 0; i < n; i++) 
2316 
{ 
2317 
if (!frozen[i])

2318 
{ 
2319 
dx[point][i] = p[i]; 
2320 
} 
2321 
else

2322 
{ 
2323 
dx[point][i] = 0;

2324 
} 
2325 
} 
2326  
2327 
/* Print it if necessary */

2328 
if (MASTER(cr))

2329 
{ 
2330 
if (mdrunOptions.verbose)

2331 
{ 
2332 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
2333 
fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",

2334 
step, ems.epot, ems.fnorm/sqrtNumAtoms, ems.fmax, ems.a_fmax + 1);

2335 
fflush(stderr); 
2336 
} 
2337 
/* Store the new (lower) energies */

2338 
matrix nullBox = {}; 
2339 
upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), 
2340 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, nullBox, 
2341 
nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
2342 
do_log = do_per_step(step, inputrec>nstlog); 
2343 
do_ene = do_per_step(step, inputrec>nstenergy); 
2344 
if (do_log)

2345 
{ 
2346 
print_ebin_header(fplog, step, step); 
2347 
} 
2348 
print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, 
2349 
do_log ? fplog : nullptr, step, step, eprNORMAL,

2350 
mdebin, fcd, &(top_global>groups), &(inputrec>opts), nullptr);

2351 
} 
2352  
2353 
/* Send x and E to IMD client, if bIMD is TRUE. */

2354 
if (do_IMD(inputrec>bIMD, step, cr, TRUE, state_global>box, state_global>x.rvec_array(), inputrec, 0, wcycle) && MASTER(cr)) 
2355 
{ 
2356 
IMD_send_positions(inputrec>imd); 
2357 
} 
2358  
2359 
// Reset stepsize if we are doing more iterations

2360 
stepsize = 1.0; 
2361  
2362 
/* Stop when the maximum force lies below tolerance.

2363 
* If we have reached machine precision, converged is already set to true.

2364 
*/

2365 
converged = converged  (ems.fmax < inputrec>em_tol); 
2366  
2367 
} /* End of the loop */

2368  
2369 
/* IMD cleanup, if bIMD is TRUE. */

2370 
IMD_finalize(inputrec>bIMD, inputrec>imd); 
2371  
2372 
if (converged)

2373 
{ 
2374 
step; /* we never took that last step in this case */

2375  
2376 
} 
2377 
if (ems.fmax > inputrec>em_tol)

2378 
{ 
2379 
if (MASTER(cr))

2380 
{ 
2381 
warn_step(fplog, inputrec>em_tol, ems.fmax, 
2382 
step1 == number_steps, FALSE);

2383 
} 
2384 
converged = FALSE; 
2385 
} 
2386  
2387 
/* If we printed energy and/or logfile last step (which was the last step)

2388 
* we don't have to do it again, but otherwise print the final values.

2389 
*/

2390 
if (!do_log) /* Write final value to log since we didn't do anythin last step */ 
2391 
{ 
2392 
print_ebin_header(fplog, step, step); 
2393 
} 
2394 
if (!do_ene  !do_log) /* Write final energy file entries */ 
2395 
{ 
2396 
print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, 
2397 
!do_log ? fplog : nullptr, step, step, eprNORMAL,

2398 
mdebin, fcd, &(top_global>groups), &(inputrec>opts), nullptr);

2399 
} 
2400  
2401 
/* Print some stuff... */

2402 
if (MASTER(cr))

2403 
{ 
2404 
fprintf(stderr, "\nwriting lowest energy coordinates.\n");

2405 
} 
2406  
2407 
/* IMPORTANT!

2408 
* For accurate normal mode calculation it is imperative that we

2409 
* store the last conformation into the full precision binary trajectory.

2410 
*

2411 
* However, we should only do it if we did NOT already write this step

2412 
* above (which we did if do_x or do_f was true).

2413 
*/

2414 
do_x = !do_per_step(step, inputrec>nstxout); 
2415 
do_f = !do_per_step(step, inputrec>nstfout); 
2416 
write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), 
2417 
top_global, inputrec, step, 
2418 
&ems, state_global, observablesHistory); 
2419  
2420 
if (MASTER(cr))

2421 
{ 
2422 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
2423 
print_converged(stderr, LBFGS, inputrec>em_tol, step, converged, 
2424 
number_steps, &ems, sqrtNumAtoms); 
2425 
print_converged(fplog, LBFGS, inputrec>em_tol, step, converged, 
2426 
number_steps, &ems, sqrtNumAtoms); 
2427  
2428 
fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);

2429 
} 
2430  
2431 
finish_em(cr, outf, walltime_accounting, wcycle); 
2432  
2433 
/* To print the actual number of steps we needed somewhere */

2434 
walltime_accounting_set_nsteps_done(walltime_accounting, step); 
2435 
} 
2436  
2437 
void

2438 
Integrator::do_steep() 
2439 
{ 
2440 
const char *SD = "Steepest Descents"; 
2441 
gmx_localtop_t *top; 
2442 
gmx_enerdata_t *enerd; 
2443 
gmx_global_stat_t gstat; 
2444 
t_graph *graph; 
2445 
real stepsize; 
2446 
real ustep; 
2447 
gmx_mdoutf_t outf; 
2448 
t_mdebin *mdebin; 
2449 
gmx_bool bDone, bAbort, do_x, do_f; 
2450 
tensor vir, pres; 
2451 
rvec mu_tot; 
2452 
int nsteps;

2453 
int count = 0; 
2454 
int steps_accepted = 0; 
2455 
auto mdatoms = mdAtoms>mdatoms();

2456  
2457 
GMX_LOG(mdlog.info).asParagraph(). 
2458 
appendText("Note that activating steepestdescent energy minimization via the "

2459 
"integrator .mdp option and the command gmx mdrun may "

2460 
"be available in a different form in a future version of GROMACS, "

2461 
"e.g. gmx minimize and an .mdp option.");

2462  
2463 
/* Create 2 states on the stack and extract pointers that we will swap */

2464 
em_state_t s0 {}, s1 {}; 
2465 
em_state_t *s_min = &s0; 
2466 
em_state_t *s_try = &s1; 
2467  
2468 
/* Init em and store the local state in s_try */

2469 
init_em(fplog, mdlog, SD, cr, ms, outputProvider, inputrec, mdrunOptions, 
2470 
state_global, top_global, s_try, &top, 
2471 
nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, 
2472 
vsite, constr, nullptr,

2473 
nfile, fnm, &outf, &mdebin, wcycle); 
2474  
2475 
/* Print to log file */

2476 
print_em_start(fplog, cr, walltime_accounting, wcycle, SD); 
2477  
2478 
/* Set variables for stepsize (in nm). This is the largest

2479 
* step that we are going to make in any direction.

2480 
*/

2481 
ustep = inputrec>em_stepsize; 
2482 
stepsize = 0;

2483  
2484 
/* Max number of steps */

2485 
nsteps = inputrec>nsteps; 
2486  
2487 
if (MASTER(cr))

2488 
{ 
2489 
/* Print to the screen */

2490 
sp_header(stderr, SD, inputrec>em_tol, nsteps); 
2491 
} 
2492 
if (fplog)

2493 
{ 
2494 
sp_header(fplog, SD, inputrec>em_tol, nsteps); 
2495 
} 
2496 
EnergyEvaluator energyEvaluator { 
2497 
fplog, mdlog, cr, ms, 
2498 
top_global, top, 
2499 
inputrec, nrnb, wcycle, gstat, 
2500 
vsite, constr, fcd, graph, 
2501 
mdAtoms, fr, ppForceWorkload, enerd 
2502 
}; 
2503  
2504 
/**** HERE STARTS THE LOOP ****

2505 
* count is the counter for the number of steps

2506 
* bDone will be TRUE when the minimization has converged

2507 
* bAbort will be TRUE when nsteps steps have been performed or when

2508 
* the stepsize becomes smaller than is reasonable for machine precision

2509 
*/

2510 
count = 0;

2511 
bDone = FALSE; 
2512 
bAbort = FALSE; 
2513 
while (!bDone && !bAbort)

2514 
{ 
2515 
bAbort = (nsteps >= 0) && (count == nsteps);

2516  
2517 
/* set new coordinates, except for first step */

2518 
bool validStep = true; 
2519 
if (count > 0) 
2520 
{ 
2521 
validStep = 
2522 
do_em_step(cr, inputrec, mdatoms, 
2523 
s_min, stepsize, &s_min>f, s_try, 
2524 
constr, count); 
2525 
} 
2526  
2527 
if (validStep)

2528 
{ 
2529 
energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);

2530 
} 
2531 
else

2532 
{ 
2533 
// Signal constraint error during stepping with energy=inf

2534 
s_try>epot = std::numeric_limits<real>::infinity(); 
2535 
} 
2536  
2537 
if (MASTER(cr))

2538 
{ 
2539 
print_ebin_header(fplog, count, count); 
2540 
} 
2541  
2542 
if (count == 0) 
2543 
{ 
2544 
s_min>epot = s_try>epot; 
2545 
} 
2546  
2547 
/* Print it if necessary */

2548 
if (MASTER(cr))

2549 
{ 
2550 
if (mdrunOptions.verbose)

2551 
{ 
2552 
fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",

2553 
count, ustep, s_try>epot, s_try>fmax, s_try>a_fmax+1,

2554 
( (count == 0)  (s_try>epot < s_min>epot) ) ? '\n' : '\r'); 
2555 
fflush(stderr); 
2556 
} 
2557  
2558 
if ( (count == 0)  (s_try>epot < s_min>epot) ) 
2559 
{ 
2560 
/* Store the new (lower) energies */

2561 
matrix nullBox = {}; 
2562 
upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(count), 
2563 
mdatoms>tmass, enerd, nullptr, nullptr, nullptr, 
2564 
nullBox, nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); 
2565  
2566 
/* Prepare IMD energy record, if bIMD is TRUE. */

2567 
IMD_fill_energy_record(inputrec>bIMD, inputrec>imd, enerd, count, TRUE); 
2568  
2569 
print_ebin(mdoutf_get_fp_ene(outf), TRUE, 
2570 
do_per_step(steps_accepted, inputrec>nstdisreout), 
2571 
do_per_step(steps_accepted, inputrec>nstorireout), 
2572 
fplog, count, count, eprNORMAL, 
2573 
mdebin, fcd, &(top_global>groups), &(inputrec>opts), nullptr);

2574 
fflush(fplog); 
2575 
} 
2576 
} 
2577  
2578 
/* Now if the new energy is smaller than the previous...

2579 
* or if this is the first step!

2580 
* or if we did random steps!

2581 
*/

2582  
2583 
if ( (count == 0)  (s_try>epot < s_min>epot) ) 
2584 
{ 
2585 
steps_accepted++; 
2586  
2587 
/* Test whether the convergence criterion is met... */

2588 
bDone = (s_try>fmax < inputrec>em_tol); 
2589  
2590 
/* Copy the arrays for force, positions and energy */

2591 
/* The 'Min' array always holds the coords and forces of the minimal

2592 
sampled energy */

2593 
swap_em_state(&s_min, &s_try); 
2594 
if (count > 0) 
2595 
{ 
2596 
ustep *= 1.2; 
2597 
} 
2598  
2599 
/* Write to trn, if necessary */

2600 
do_x = do_per_step(steps_accepted, inputrec>nstxout); 
2601 
do_f = do_per_step(steps_accepted, inputrec>nstfout); 
2602 
write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,

2603 
top_global, inputrec, count, 
2604 
s_min, state_global, observablesHistory); 
2605 
} 
2606 
else

2607 
{ 
2608 
/* If energy is not smaller make the step smaller... */

2609 
ustep *= 0.5; 
2610  
2611 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count != cr>dd>ddp_count)

2612 
{ 
2613 
/* Reload the old state */

2614 
em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, 
2615 
s_min, top, mdAtoms, fr, vsite, constr, 
2616 
nrnb, wcycle); 
2617 
} 
2618 
} 
2619  
2620 
/* Determine new step */

2621 
stepsize = ustep/s_min>fmax; 
2622  
2623 
/* Check if stepsize is too small, with 1 nm as a characteristic length */

2624 
#if GMX_DOUBLE

2625 
if (count == nsteps  ustep < 1e12) 
2626 
#else

2627 
if (count == nsteps  ustep < 1e6) 
2628 
#endif

2629 
{ 
2630 
if (MASTER(cr))

2631 
{ 
2632 
warn_step(fplog, inputrec>em_tol, s_min>fmax, 
2633 
count == nsteps, constr != nullptr);

2634 
} 
2635 
bAbort = TRUE; 
2636 
} 
2637  
2638 
/* Send IMD energies and positions, if bIMD is TRUE. */

2639 
if (do_IMD(inputrec>bIMD, count, cr, TRUE, state_global>box,

2640 
MASTER(cr) ? state_global>x.rvec_array() : nullptr,

2641 
inputrec, 0, wcycle) &&

2642 
MASTER(cr)) 
2643 
{ 
2644 
IMD_send_positions(inputrec>imd); 
2645 
} 
2646  
2647 
count++; 
2648 
} /* End of the loop */

2649  
2650 
/* IMD cleanup, if bIMD is TRUE. */

2651 
IMD_finalize(inputrec>bIMD, inputrec>imd); 
2652  
2653 
/* Print some data... */

2654 
if (MASTER(cr))

2655 
{ 
2656 
fprintf(stderr, "\nwriting lowest energy coordinates.\n");

2657 
} 
2658 
write_em_traj(fplog, cr, outf, TRUE, inputrec>nstfout != 0, ftp2fn(efSTO, nfile, fnm),

2659 
top_global, inputrec, count, 
2660 
s_min, state_global, observablesHistory); 
2661  
2662 
if (MASTER(cr))

2663 
{ 
2664 
double sqrtNumAtoms = sqrt(static_cast<double>(state_global>natoms)); 
2665  
2666 
print_converged(stderr, SD, inputrec>em_tol, count, bDone, nsteps, 
2667 
s_min, sqrtNumAtoms); 
2668 
print_converged(fplog, SD, inputrec>em_tol, count, bDone, nsteps, 
2669 
s_min, sqrtNumAtoms); 
2670 
} 
2671  
2672 
finish_em(cr, outf, walltime_accounting, wcycle); 
2673  
2674 
/* To print the actual number of steps we needed somewhere */

2675 
inputrec>nsteps = count; 
2676  
2677 
walltime_accounting_set_nsteps_done(walltime_accounting, count); 
2678 
} 
2679  
2680 
void

2681 
Integrator::do_nm() 
2682 
{ 
2683 
const char *NM = "Normal Mode Analysis"; 
2684 
gmx_mdoutf_t outf; 
2685 
int nnodes, node;

2686 
gmx_localtop_t *top; 
2687 
gmx_enerdata_t *enerd; 
2688 
gmx_global_stat_t gstat; 
2689 
t_graph *graph; 
2690 
tensor vir, pres; 
2691 
rvec mu_tot; 
2692 
rvec *dfdx; 
2693 
gmx_bool bSparse; /* use sparse matrix storage format */

2694 
size_t sz; 
2695 
gmx_sparsematrix_t * sparse_matrix = nullptr;

2696 
real * full_matrix = nullptr;

2697  
2698 
/* added with respect to mdrun */

2699 
int row, col;

2700 
real der_range = 10.0*std::sqrt(GMX_REAL_EPS); 
2701 
real x_min; 
2702 
bool bIsMaster = MASTER(cr);

2703 
auto mdatoms = mdAtoms>mdatoms();

2704  
2705 
GMX_LOG(mdlog.info).asParagraph(). 
2706 
appendText("Note that activating normalmode analysis via the integrator "

2707 
".mdp option and the command gmx mdrun may "

2708 
"be available in a different form in a future version of GROMACS, "

2709 
"e.g. gmx normalmodes.");

2710  
2711 
if (constr != nullptr) 
2712 
{ 
2713 
gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");

2714 
} 
2715  
2716 
gmx_shellfc_t *shellfc; 
2717  
2718 
em_state_t state_work {}; 
2719  
2720 
/* Init em and store the local state in state_minimum */

2721 
init_em(fplog, mdlog, NM, cr, ms, outputProvider, inputrec, mdrunOptions, 
2722 
state_global, top_global, &state_work, &top, 
2723 
nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, 
2724 
vsite, constr, &shellfc, 
2725 
nfile, fnm, &outf, nullptr, wcycle);

2726  
2727 
std::vector<int> atom_index = get_atom_index(top_global);

2728 
std::vector<gmx::RVec> fneg(atom_index.size(), {0, 0, 0}); 
2729 
snew(dfdx, atom_index.size()); 
2730  
2731 
#if !GMX_DOUBLE

2732 
if (bIsMaster)

2733 
{ 
2734 
fprintf(stderr, 
2735 
"NOTE: This version of GROMACS has been compiled in single precision,\n"

2736 
" which MIGHT not be accurate enough for normal mode analysis.\n"

2737 
" GROMACS now uses sparse matrix storage, so the memory requirements\n"

2738 
" are fairly modest even if you recompile in double precision.\n\n");

2739 
} 
2740 
#endif

2741  
2742 
/* Check if we can/should use sparse storage format.

2743 
*

2744 
* Sparse format is only useful when the Hessian itself is sparse, which it

2745 
* will be when we use a cutoff.

2746 
* For small systems (n<1000) it is easier to always use full matrix format, though.

2747 
*/

2748 
if (EEL_FULL(fr>ic>eeltype)  fr>rlist == 0.0) 
2749 
{ 
2750 
GMX_LOG(mdlog.warning).appendText("Noncutoff electrostatics used, forcing full Hessian format.");

2751 
bSparse = FALSE; 
2752 
} 
2753 
else if (atom_index.size() < 1000) 
2754 
{ 
2755 
GMX_LOG(mdlog.warning).appendTextFormatted("Small system size (N=%zu), using full Hessian format.",

2756 
atom_index.size()); 
2757 
bSparse = FALSE; 
2758 
} 
2759 
else

2760 
{ 
2761 
GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format.");

2762 
bSparse = TRUE; 
2763 
} 
2764  
2765 
/* Number of dimensions, based on real atoms, that is not vsites or shell */

2766 
sz = DIM*atom_index.size(); 
2767  
2768 
fprintf(stderr, "Allocating Hessian memory...\n\n");

2769  
2770 
if (bSparse)

2771 
{ 
2772 
sparse_matrix = gmx_sparsematrix_init(sz); 
2773 
sparse_matrix>compressed_symmetric = TRUE; 
2774 
} 
2775 
else

2776 
{ 
2777 
snew(full_matrix, sz*sz); 
2778 
} 
2779  
2780 
init_nrnb(nrnb); 
2781  
2782  
2783 
/* Write start time and temperature */

2784 
print_em_start(fplog, cr, walltime_accounting, wcycle, NM); 
2785  
2786 
/* fudge nr of steps to nr of atoms */

2787 
inputrec>nsteps = atom_index.size()*2;

2788  
2789 
if (bIsMaster)

2790 
{ 
2791 
fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n", 
2792 
*(top_global>name), inputrec>nsteps); 
2793 
} 
2794  
2795 
nnodes = cr>nnodes; 
2796  
2797 
/* Make evaluate_energy do a single node force calculation */

2798 
cr>nnodes = 1;

2799 
EnergyEvaluator energyEvaluator { 
2800 
fplog, mdlog, cr, ms, 
2801 
top_global, top, 
2802 
inputrec, nrnb, wcycle, gstat, 
2803 
vsite, constr, fcd, graph, 
2804 
mdAtoms, fr, ppForceWorkload, enerd 
2805 
}; 
2806 
energyEvaluator.run(&state_work, mu_tot, vir, pres, 1, TRUE);

2807 
cr>nnodes = nnodes; 
2808  
2809 
/* if forces are not small, warn user */

2810 
get_state_f_norm_max(cr, &(inputrec>opts), mdatoms, &state_work); 
2811  
2812 
GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax);

2813 
if (state_work.fmax > 1.0e3) 
2814 
{ 
2815 
GMX_LOG(mdlog.warning).appendText( 
2816 
"The force is probably not small enough to "

2817 
"ensure that you are at a minimum.\n"

2818 
"Be aware that negative eigenvalues may occur\n"

2819 
"when the resulting matrix is diagonalized.");

2820 
} 
2821  
2822 
/***********************************************************

2823 
*

2824 
* Loop over all pairs in matrix

2825 
*

2826 
* do_force called twice. Once with positive and

2827 
* once with negative displacement

2828 
*

2829 
************************************************************/

2830  
2831 
/* Steps are divided one by one over the nodes */

2832 
bool bNS = true; 
2833 
auto state_work_x = makeArrayRef(state_work.s.x);

2834 
auto state_work_f = makeArrayRef(state_work.f);

2835 
for (unsigned int aid = cr>nodeid; aid < atom_index.size(); aid += nnodes) 
2836 
{ 
2837 
size_t atom = atom_index[aid]; 
2838 
for (size_t d = 0; d < DIM; d++) 
2839 
{ 
2840 
int64_t step = 0;

2841 
int force_flags = GMX_FORCE_STATECHANGED  GMX_FORCE_ALLFORCES;

2842 
double t = 0; 
2843  
2844 
x_min = state_work_x[atom][d]; 
2845  
2846 
for (unsigned int dx = 0; (dx < 2); dx++) 
2847 
{ 
2848 
if (dx == 0) 
2849 
{ 
2850 
state_work_x[atom][d] = x_min  der_range; 
2851 
} 
2852 
else

2853 
{ 
2854 
state_work_x[atom][d] = x_min + der_range; 
2855 
} 
2856  
2857 
/* Make evaluate_energy do a single node force calculation */

2858 
cr>nnodes = 1;

2859 
if (shellfc)

2860 
{ 
2861 
/* Now is the time to relax the shells */

2862 
relax_shell_flexcon(fplog, 
2863 
cr, 
2864 
ms, 
2865 
mdrunOptions.verbose, 
2866 
nullptr,

2867 
step, 
2868 
inputrec, 
2869 
bNS, 
2870 
force_flags, 
2871 
top, 
2872 
constr, 
2873 
enerd, 
2874 
fcd, 
2875 
&state_work.s, 
2876 
state_work.f.arrayRefWithPadding(), 
2877 
vir, 
2878 
mdatoms, 
2879 
nrnb, 
2880 
wcycle, 
2881 
graph, 
2882 
&top_global>groups, 
2883 
shellfc, 
2884 
fr, 
2885 
ppForceWorkload, 
2886 
t, 
2887 
mu_tot, 
2888 
vsite, 
2889 
DdOpenBalanceRegionBeforeForceComputation::no, 
2890 
DdCloseBalanceRegionAfterForceComputation::no); 
2891 
bNS = false;

2892 
step++; 
2893 
} 
2894 
else

2895 
{ 
2896 
energyEvaluator.run(&state_work, mu_tot, vir, pres, aid*2+dx, FALSE);

2897 
} 
2898  
2899 
cr>nnodes = nnodes; 
2900  
2901 
if (dx == 0) 
2902 
{ 
2903 
std::copy(state_work_f.begin(), state_work_f.begin()+atom_index.size(), fneg.begin()); 
2904 
} 
2905 
} 
2906  
2907 
/* x is restored to original */

2908 
state_work_x[atom][d] = x_min; 
2909  
2910 
for (size_t j = 0; j < atom_index.size(); j++) 
2911 
{ 
2912 
for (size_t k = 0; (k < DIM); k++) 
2913 
{ 
2914 
dfdx[j][k] = 
2915 
(state_work_f[atom_index[j]][k]  fneg[j][k])/(2*der_range);

2916 
} 
2917 
} 
2918  
2919 
if (!bIsMaster)

2920 
{ 
2921 
#if GMX_MPI

2922 
#define mpi_type GMX_MPI_REAL

2923 
MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),

2924 
cr>nodeid, cr>mpi_comm_mygroup); 
2925 
#endif

2926 
} 
2927 
else

2928 
{ 
2929 
for (node = 0; (node < nnodes && aid+node < atom_index.size()); node++) 
2930 
{ 
2931 
if (node > 0) 
2932 
{ 
2933 
#if GMX_MPI

2934 
MPI_Status stat; 
2935 
MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,

2936 
cr>mpi_comm_mygroup, &stat); 
2937 
#undef mpi_type

2938 
#endif

2939 
} 
2940  
2941 
row = (aid + node)*DIM + d; 
2942  
2943 
for (size_t j = 0; j < atom_index.size(); j++) 
2944 
{ 
2945 
for (size_t k = 0; k < DIM; k++) 
2946 
{ 
2947 
col = j*DIM + k; 
2948  
2949 
if (bSparse)

2950 
{ 
2951 
if (col >= row && dfdx[j][k] != 0.0) 
2952 
{ 
2953 
gmx_sparsematrix_increment_value(sparse_matrix, 
2954 
row, col, dfdx[j][k]); 
2955 
} 
2956 
} 
2957 
else

2958 
{ 
2959 
full_matrix[row*sz+col] = dfdx[j][k]; 
2960 
} 
2961 
} 
2962 
} 
2963 
} 
2964 
} 
2965  
2966 
if (mdrunOptions.verbose && fplog)

2967 
{ 
2968 
fflush(fplog); 
2969 
} 
2970 
} 
2971 
/* write progress */

2972 
if (bIsMaster && mdrunOptions.verbose)

2973 
{ 
2974 
fprintf(stderr, "\rFinished step %d out of %d",

2975 
static_cast<int>(std::min(atom+nnodes, atom_index.size())), 
2976 
static_cast<int>(atom_index.size())); 
2977 
fflush(stderr); 
2978 
} 
2979 
} 
2980  
2981 
if (bIsMaster)

2982 
{ 
2983 
fprintf(stderr, "\n\nWriting Hessian...\n");

2984 
gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); 
2985 
} 
2986  
2987 
finish_em(cr, outf, walltime_accounting, wcycle); 
2988  
2989 
walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);

2990 
} 
2991  
2992 
} // namespace gmx
