minimize.c
1 
/*


2 
* $Id: minimize.c,v 1.131.2.6 2009/05/29 12:36:20 hess Exp $

3 
*

4 
* This source code is part of

5 
*

6 
* G R O M A C S

7 
*

8 
* GROningen MAchine for Chemical Simulations

9 
*

10 
* VERSION 3.2.0

11 
* Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.

12 
* Copyright (c) 19912000, University of Groningen, The Netherlands.

13 
* Copyright (c) 20012004, The GROMACS development team,

14 
* check out http://www.gromacs.org for more information.

15 

16 
* This program is free software; you can redistribute it and/or

17 
* modify it under the terms of the GNU General Public License

18 
* as published by the Free Software Foundation; either version 2

19 
* of the License, or (at your option) any later version.

20 
*

21 
* If you want to redistribute modifications, please consider that

22 
* scientific software is very special. Version control is crucial 

23 
* bugs must be traceable. We will be happy to consider code for

24 
* inclusion in the official distribution, but derived work must not

25 
* be called official GROMACS. Details are found in the README & COPYING

26 
* files  if they are missing, get the official version at www.gromacs.org.

27 
*

28 
* To help us fund GROMACS development, we humbly ask that you cite

29 
* the papers on the package  you can find them in the top README file.

30 
*

31 
* For more info, check our website at http://www.gromacs.org

32 
*

33 
* And Hey:

34 
* GROwing Monsters And Cloning Shrimps

35 
*/

36 
#ifdef HAVE_CONFIG_H

37 
#include <config.h> 
38 
#endif

39  
40 
#include <string.h> 
41 
#include <time.h> 
42 
#include <math.h> 
43 
#include "sysstuff.h" 
44 
#include "string2.h" 
45 
#include "network.h" 
46 
#include "confio.h" 
47 
#include "copyrite.h" 
48 
#include "smalloc.h" 
49 
#include "nrnb.h" 
50 
#include "main.h" 
51 
#include "pbc.h" 
52 
#include "force.h" 
53 
#include "macros.h" 
54 
#include "random.h" 
55 
#include "names.h" 
56 
#include "gmx_fatal.h" 
57 
#include "txtdump.h" 
58 
#include "typedefs.h" 
59 
#include "update.h" 
60 
#include "random.h" 
61 
#include "constr.h" 
62 
#include "vec.h" 
63 
#include "statutil.h" 
64 
#include "tgroup.h" 
65 
#include "mdebin.h" 
66 
#include "vsite.h" 
67 
#include "force.h" 
68 
#include "mdrun.h" 
69 
#include "domdec.h" 
70 
#include "partdec.h" 
71 
#include "trnio.h" 
72 
#include "sparsematrix.h" 
73 
#include "mtxio.h" 
74 
#include "gmx_random.h" 
75 
#include "physics.h" 
76 
#include "xvgr.h" 
77 
#include "mdatoms.h" 
78 
#include "gbutil.h" 
79 
#include "ns.h" 
80 
#include "gmx_wallcycle.h" 
81 
#include "mtop_util.h" 
82 
#include "gmxfio.h" 
83 
#include "pme.h" 
84  
85 
typedef struct { 
86 
t_state s; 
87 
rvec *f; 
88 
real epot; 
89 
real fnorm; 
90 
real fmax; 
91 
int a_fmax;

92 
} em_state_t; 
93  
94 
static em_state_t *init_em_state()

95 
{ 
96 
em_state_t *ems; 
97 

98 
snew(ems,1);

99  
100 
return ems;

101 
} 
102  
103 
static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps) 
104 
{ 
105 
fprintf(out,"%s:\n",minimizer);

106 
fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);

107 
fprintf(out," Number of steps = %12d\n",nsteps);

108 
} 
109  
110 
static void warn_step(FILE *fp,real ftol,bool bConstrain) 
111 
{ 
112 
fprintf(fp,"\nStepsize too small, or no change in energy.\n"

113 
"Converged to machine precision,\n"

114 
"but not to the requested precision Fmax < %g\n",

115 
ftol); 
116 
if (sizeof(real)<sizeof(double)) 
117 
fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");

118 

119 
if (bConstrain)

120 
fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"

121 
"off constraints alltogether (set constraints = none in mdp file)\n");

122 
} 
123  
124  
125  
126 
static void print_converged(FILE *fp,const char *alg,real ftol,int count,bool bDone, 
127 
int nsteps,real epot,real fmax, int nfmax, real fnorm) 
128 
{ 
129 
if (bDone)

130 
fprintf(fp,"\n%s converged to Fmax < %g in %d steps\n",alg,ftol,count);

131 
else if(count<nsteps) 
132 
fprintf(fp,"\n%s converged to machine precision in %d steps,\n"

133 
"but did not reach the requested Fmax < %g.\n",alg,count,ftol);

134 
else

135 
fprintf(fp,"\n%s did not converge to Fmax < %g in %d steps.\n",alg,ftol,count);

136  
137 
#ifdef GMX_DOUBLE

138 
fprintf(fp,"Potential Energy = %21.14e\n",epot);

139 
fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1); 
140 
fprintf(fp,"Norm of force = %21.14e\n",fnorm);

141 
#else

142 
fprintf(fp,"Potential Energy = %14.7e\n",epot);

143 
fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1); 
144 
fprintf(fp,"Norm of force = %14.7e\n",fnorm);

145 
#endif

146 
} 
147  
148 
static void get_f_norm_max(t_commrec *cr, 
149 
t_grpopts *opts,t_mdatoms *mdatoms,rvec *f, 
150 
real *fnorm,real *fmax,int *a_fmax)

151 
{ 
152 
double fnorm2,*sum;

153 
real fmax2,fmax2_0,fam; 
154 
int la_max,a_max,start,end,i,m,gf;

155  
156 
/* This routine finds the largest force and returns it.

157 
* On parallel machines the global max is taken.

158 
*/

159 
fnorm2 = 0;

160 
fmax2 = 0;

161 
la_max = 1;

162 
gf = 0;

163 
start = mdatoms>start; 
164 
end = mdatoms>homenr + start; 
165 
if (mdatoms>cFREEZE) {

166 
for(i=start; i<end; i++) {

167 
gf = mdatoms>cFREEZE[i]; 
168 
fam = 0;

169 
for(m=0; m<DIM; m++) 
170 
if (!opts>nFreeze[gf][m])

171 
fam += sqr(f[i][m]); 
172 
fnorm2 += fam; 
173 
if (fam > fmax2) {

174 
fmax2 = fam; 
175 
la_max = i; 
176 
} 
177 
} 
178 
} else {

179 
for(i=start; i<end; i++) {

180 
fam = norm2(f[i]); 
181 
fnorm2 += fam; 
182 
if (fam > fmax2) {

183 
fmax2 = fam; 
184 
la_max = i; 
185 
} 
186 
} 
187 
} 
188  
189 
if (la_max >= 0 && DOMAINDECOMP(cr)) { 
190 
a_max = cr>dd>gatindex[la_max]; 
191 
} else {

192 
a_max = la_max; 
193 
} 
194 
if (PAR(cr)) {

195 
snew(sum,2*cr>nnodes+1); 
196 
sum[2*cr>nodeid] = fmax2;

197 
sum[2*cr>nodeid+1] = a_max; 
198 
sum[2*cr>nnodes] = fnorm2;

199 
gmx_sumd(2*cr>nnodes+1,sum,cr); 
200 
fnorm2 = sum[2*cr>nnodes];

201 
/* Determine the global maximum */

202 
for(i=0; i<cr>nnodes; i++) { 
203 
if (sum[2*i] > fmax2) { 
204 
fmax2 = sum[2*i];

205 
a_max = (int)(sum[2*i+1] + 0.5); 
206 
} 
207 
} 
208 
sfree(sum); 
209 
} 
210  
211 
if (fnorm)

212 
*fnorm = sqrt(fnorm2); 
213 
if (fmax)

214 
*fmax = sqrt(fmax2); 
215 
if (a_fmax)

216 
*a_fmax = a_max; 
217 
} 
218  
219 
static void get_state_f_norm_max(t_commrec *cr, 
220 
t_grpopts *opts,t_mdatoms *mdatoms, 
221 
em_state_t *ems) 
222 
{ 
223 
get_f_norm_max(cr,opts,mdatoms,ems>f,&ems>fnorm,&ems>fmax,&ems>a_fmax); 
224 
} 
225  
226 
void init_em(FILE *fplog,const char *title, 
227 
t_commrec *cr,t_inputrec *ir, 
228 
t_state *state_global,gmx_mtop_t *top_global, 
229 
em_state_t *ems,gmx_localtop_t **top, 
230 
rvec *f,rvec **buf,rvec **f_global, 
231 
t_nrnb *nrnb,rvec mu_tot, 
232 
t_forcerec *fr,gmx_enerdata_t **enerd, 
233 
t_graph **graph,t_mdatoms *mdatoms, 
234 
gmx_vsite_t *vsite,gmx_constr_t constr, 
235 
int nfile,t_filenm fnm[],int *fp_trn,int *fp_ene, 
236 
t_mdebin **mdebin) 
237 
{ 
238 
int start,homenr,i;

239 
real dvdlambda; 
240  
241 
if (fplog)

242 
fprintf(fplog,"Initiating %s\n",title);

243  
244 
state_global>ngtc = 0;

245 
if (ir>eI == eiCG) {

246 
state_global>flags = (1<<estCGP);

247 
snew(state_global>cg_p,state_global>nalloc); 
248 
} 
249  
250 
/* Initiate some variables */

251 
if (ir>efep != efepNO)

252 
state_global>lambda = ir>init_lambda; 
253 
else

254 
state_global>lambda = 0.0; 
255  
256 
init_nrnb(nrnb); 
257  
258 
if (DOMAINDECOMP(cr)) {

259 
*top = dd_init_local_top(top_global); 
260  
261 
dd_init_local_state(cr>dd,state_global,&ems>s); 
262  
263 
/* Distribute the charge groups over the nodes from the master node */

264 
dd_partition_system(fplog,ir>init_step,cr,TRUE, 
265 
state_global,top_global,ir, 
266 
&ems>s,&ems>f,buf,mdatoms,*top, 
267 
fr,vsite,NULL,constr,

268 
nrnb,NULL,FALSE);

269 
dd_store_state(cr>dd,&ems>s); 
270 

271 
if (ir>nstfout) {

272 
snew(*f_global,top_global>natoms); 
273 
} else {

274 
*f_global = NULL;

275 
} 
276 
*graph = NULL;

277 
} else {

278 
/* Just copy the state */

279 
ems>s = *state_global; 
280 
snew(ems>s.x,ems>s.nalloc); 
281 
snew(ems>f,ems>s.nalloc); 
282 
for(i=0; i<state_global>natoms; i++) 
283 
copy_rvec(state_global>x[i],ems>s.x[i]); 
284 
copy_mat(state_global>box,ems>s.box); 
285  
286 
if (PAR(cr)) {

287 
/* Initialize the particle decomposition and split the topology */

288 
*top = split_system(fplog,top_global,ir,cr); 
289 

290 
pd_cg_range(cr,&fr>cg0,&fr>hcg); 
291 
} else {

292 
*top = gmx_mtop_generate_local_top(top_global,ir); 
293 
} 
294 
*f_global = f; 
295  
296 
if (ir>ePBC != epbcNONE && !ir>bPeriodicMols) {

297 
*graph = mk_graph(fplog,&((*top)>idef),0,top_global>natoms,FALSE,FALSE);

298 
} else {

299 
*graph = NULL;

300 
} 
301 
} 
302  
303 
clear_rvec(mu_tot); 
304 
calc_shifts(ems>s.box,fr>shift_vec); 
305  
306 
if (PARTDECOMP(cr)) {

307 
pd_at_range(cr,&start,&homenr); 
308 
homenr = start; 
309 
} else {

310 
start = 0;

311 
homenr = top_global>natoms; 
312 
} 
313 
atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms); 
314 
update_mdatoms(mdatoms,state_global>lambda); 
315  
316 
if (vsite && !DOMAINDECOMP(cr)) {

317 
set_vsite_top(vsite,*top,mdatoms,cr); 
318 
} 
319  
320 
if (constr) {

321 
if (ir>eConstrAlg == econtSHAKE &&

322 
gmx_mtop_ftype_count(top_global,F_CONSTR) > 0) {

323 
gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",

324 
econstr_names[econtSHAKE],econstr_names[econtLINCS]); 
325 
} 
326  
327 
if (!DOMAINDECOMP(cr))

328 
set_constraints(constr,*top,ir,mdatoms,NULL);

329  
330 
/* Constrain the starting coordinates */

331 
dvdlambda=0;

332 
constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)>idef,

333 
ir,cr,1,0,mdatoms, 
334 
ems>s.x,ems>s.x,NULL,ems>s.box,ems>s.lambda,&dvdlambda,

335 
NULL,NULL,nrnb,econqCoord); 
336 
} 
337  
338 
if (MASTER(cr)) {

339 
if (fp_trn)

340 
*fp_trn = open_trn(ftp2fn(efTRN,nfile,fnm),"w");

341 
if (fp_ene)

342 
*fp_ene = open_enx(ftp2fn(efENX,nfile,fnm),"w");

343 
} else {

344 
if (fp_trn)

345 
*fp_trn = 1;

346 
if (fp_ene)

347 
*fp_ene = 1;

348 
} 
349  
350 
snew(*enerd,1);

351 
init_enerdata(fplog,top_global>groups.grps[egcENER].nr,*enerd); 
352  
353 
/* Init bin for energy stuff */

354 
*mdebin = init_mdebin(*fp_ene,top_global,ir); 
355 
} 
356  
357 
static void finish_em(FILE *fplog,t_commrec *cr, 
358 
int fp_traj,int fp_ene) 
359 
{ 
360 
if (!(cr>duty & DUTY_PME)) {

361 
/* Tell the PME only node to finish */

362 
gmx_pme_finish(cr); 
363 
} 
364  
365 
if (MASTER(cr)) {

366 
close_trn(fp_traj); 
367 
close_enx(fp_ene); 
368 
} 
369 
} 
370  
371 
static void swap_em_state(em_state_t *ems1,em_state_t *ems2) 
372 
{ 
373 
em_state_t tmp; 
374  
375 
tmp = *ems1; 
376 
*ems1 = *ems2; 
377 
*ems2 = tmp; 
378 
} 
379  
380 
static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f) 
381 
{ 
382 
int i;

383  
384 
for(i=0; (i<state>natoms); i++) 
385 
copy_rvec(ems>s.x[i],state>x[i]); 
386 
if (f != NULL) 
387 
copy_rvec(ems>f[i],f[i]); 
388 
} 
389  
390 
static void write_em_traj(FILE *fplog,t_commrec *cr, 
391 
int fp_trn,bool bX,bool bF,char *confout, 
392 
gmx_mtop_t *top_global, 
393 
t_inputrec *ir,int step,

394 
em_state_t *state, 
395 
t_state *state_global,rvec *f_global) 
396 
{ 
397 
if ((bX  bF  confout != NULL) && !DOMAINDECOMP(cr)) { 
398 
f_global = state>f; 
399 
copy_em_coords_back(state,state_global,bF ? f_global : NULL);

400 
} 
401 

402 
write_traj(fplog,cr,fp_trn,bX,FALSE,bF,0,FALSE,0,NULL,FALSE, 
403 
top_global,ir>eI,ir>simulation_part,step,(double)step,

404 
&state>s,state_global,state>f,f_global); 
405 

406 
if (confout != NULL && MASTER(cr)) { 
407 
write_sto_conf_mtop(confout, 
408 
*top_global>name,top_global, 
409 
state_global>x,NULL,ir>ePBC,state_global>box);

410 
} 
411 
} 
412 

413 
static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md, 
414 
em_state_t *ems1,real a,rvec *f,em_state_t *ems2, 
415 
gmx_constr_t constr,gmx_localtop_t *top, 
416 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
417 
int count)

418  
419 
{ 
420 
t_state *s1,*s2; 
421 
int start,end,gf,i,m;

422 
rvec *x1,*x2; 
423 
real dvdlambda; 
424  
425 
s1 = &ems1>s; 
426 
s2 = &ems2>s; 
427  
428 
if (DOMAINDECOMP(cr) && s1>ddp_count != cr>dd>ddp_count)

429 
gmx_incons("state mismatch in do_em_step");

430  
431 
s2>flags = s1>flags; 
432  
433 
if (s2>nalloc != s1>nalloc) {

434 
s2>nalloc = s1>nalloc; 
435 
srenew(s2>x,s1>nalloc); 
436 
srenew(ems2>f, s1>nalloc); 
437 
if (s2>flags & (1<<estCGP)) 
438 
srenew(s2>cg_p, s1>nalloc); 
439 
} 
440 

441 
s2>natoms = s1>natoms; 
442 
s2>lambda = s1>lambda; 
443 
copy_mat(s1>box,s2>box); 
444  
445 
start = md>start; 
446 
end = md>start + md>homenr; 
447  
448 
x1 = s1>x; 
449 
x2 = s2>x; 
450 
gf = 0;

451 
for(i=start; i<end; i++) {

452 
if (md>cFREEZE)

453 
gf = md>cFREEZE[i]; 
454 
for(m=0; m<DIM; m++) { 
455 
if (ir>opts.nFreeze[gf][m])

456 
x2[i][m] = x1[i][m]; 
457 
else

458 
x2[i][m] = x1[i][m] + a*f[i][m]; 
459 
} 
460 
} 
461  
462 
if (s2>flags & (1<<estCGP)) { 
463 
/* Copy the CG p vector */

464 
x1 = s1>cg_p; 
465 
x2 = s2>cg_p; 
466 
for(i=start; i<end; i++)

467 
copy_rvec(x1[i],x2[i]); 
468 
} 
469  
470 
if (DOMAINDECOMP(cr)) {

471 
s2>ddp_count = s1>ddp_count; 
472 
if (s2>cg_gl_nalloc < s1>cg_gl_nalloc) {

473 
s2>cg_gl_nalloc = s1>cg_gl_nalloc; 
474 
srenew(s2>cg_gl,s2>cg_gl_nalloc); 
475 
} 
476 
s2>ncg_gl = s1>ncg_gl; 
477 
for(i=0; i<s2>ncg_gl; i++) 
478 
s2>cg_gl[i] = s1>cg_gl[i]; 
479 
s2>ddp_count_cg_gl = s1>ddp_count_cg_gl; 
480 
} 
481  
482 
if (constr) {

483 
wallcycle_start(wcycle,ewcCONSTR); 
484 
dvdlambda = 0;

485 
constrain(NULL,TRUE,TRUE,constr,&top>idef,

486 
ir,cr,count,0,md,

487 
s1>x,s2>x,NULL,s2>box,s2>lambda,

488 
&dvdlambda,NULL,NULL,nrnb,econqCoord); 
489 
wallcycle_stop(wcycle,ewcCONSTR); 
490 
} 
491 
} 
492  
493 
static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2) 
494  
495 
{ 
496 
int start,end,i,m;

497  
498 
if (DOMAINDECOMP(cr)) {

499 
start = 0;

500 
end = cr>dd>nat_home; 
501 
} else if (PARTDECOMP(cr)) { 
502 
pd_at_range(cr,&start,&end); 
503 
} else {

504 
start = 0;

505 
end = n; 
506 
} 
507  
508 
for(i=start; i<end; i++) {

509 
for(m=0; m<DIM; m++) { 
510 
x2[i][m] = x1[i][m] + a*f[i][m]; 
511 
} 
512 
} 
513 
} 
514  
515 
static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f) 
516  
517 
{ 
518 
int start,end,i,m;

519  
520 
if (DOMAINDECOMP(cr)) {

521 
start = 0;

522 
end = cr>dd>nat_home; 
523 
} else if (PARTDECOMP(cr)) { 
524 
pd_at_range(cr,&start,&end); 
525 
} else {

526 
start = 0;

527 
end = n; 
528 
} 
529  
530 
for(i=start; i<end; i++) {

531 
for(m=0; m<DIM; m++) { 
532 
f[i][m] = (x1[i][m]  x2[i][m])*a; 
533 
} 
534 
} 
535 
} 
536  
537 
static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr, 
538 
gmx_mtop_t *top_global,t_inputrec *ir, 
539 
em_state_t *ems,rvec **buf,gmx_localtop_t *top, 
540 
t_mdatoms *mdatoms,t_forcerec *fr, 
541 
gmx_vsite_t *vsite,gmx_constr_t constr, 
542 
t_nrnb *nrnb,gmx_wallcycle_t wcycle) 
543 
{ 
544 
/* Repartition the domain decomposition */

545 
wallcycle_start(wcycle,ewcDOMDEC); 
546 
dd_partition_system(fplog,step,cr,FALSE, 
547 
NULL,top_global,ir,

548 
&ems>s,&ems>f,buf, 
549 
mdatoms,top,fr,vsite,NULL,constr,

550 
nrnb,wcycle,FALSE); 
551 
dd_store_state(cr>dd,&ems>s); 
552 
wallcycle_stop(wcycle,ewcDOMDEC); 
553 
} 
554 

555 
static void evaluate_energy(FILE *fplog,bool bVerbose,t_commrec *cr, 
556 
t_state *state_global,gmx_mtop_t *top_global, 
557 
em_state_t *ems,rvec **buf,gmx_localtop_t *top, 
558 
t_inputrec *inputrec, 
559 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
560 
gmx_vsite_t *vsite,gmx_constr_t constr, 
561 
t_fcdata *fcd, 
562 
t_graph *graph,t_mdatoms *mdatoms, 
563 
t_forcerec *fr, rvec mu_tot, 
564 
gmx_enerdata_t *enerd,tensor vir,tensor pres, 
565 
int count,bool bFirst) 
566 
{ 
567 
real t; 
568 
bool bNS;

569 
int nabnsb;

570 
tensor force_vir,shake_vir,ekin; 
571 
real dvdl; 
572 
real terminate=0;

573 
t_state *state; 
574 

575 
/* Set the time to the initial time, the time does not change during EM */

576 
t = inputrec>init_t; 
577  
578 
if (bFirst 

579 
(DOMAINDECOMP(cr) && ems>s.ddp_count < cr>dd>ddp_count)) { 
580 
/* This the first state or an old state used before the last ns */

581 
bNS = TRUE; 
582 
} else {

583 
bNS = FALSE; 
584 
if (inputrec>nstlist > 0) { 
585 
bNS = TRUE; 
586 
} else if (inputrec>nstlist == 1) { 
587 
nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top>cgs,NULL,ems>s.x);

588 
if (PAR(cr))

589 
gmx_sumi(1,&nabnsb,cr);

590 
bNS = (nabnsb > 0);

591 
} 
592 
} 
593  
594 
if (vsite)

595 
construct_vsites(fplog,vsite,ems>s.x,nrnb,1,NULL, 
596 
top>idef.iparams,top>idef.il, 
597 
fr>ePBC,fr>bMolPBC,graph,cr,ems>s.box); 
598  
599 
if (DOMAINDECOMP(cr)) {

600 
if (bNS) {

601 
/* Repartition the domain decomposition */

602 
em_dd_partition_system(fplog,count,cr,top_global,inputrec, 
603 
ems,buf,top,mdatoms,fr,vsite,constr, 
604 
nrnb,wcycle); 
605 
} 
606 
} 
607 

608 
/* Calc force & energy on new trial position */

609 
/* do_force always puts the charge groups in the box and shifts again

610 
* We do not unshift, so molecules are always whole in congrad.c

611 
*/

612 
do_force(fplog,cr,inputrec, 
613 
count,nrnb,wcycle,top,&top_global>groups, 
614 
ems>s.box,ems>s.x,&ems>s.hist, 
615 
ems>f,*buf,force_vir,mdatoms,enerd,fcd, 
616 
ems>s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL, 
617 
GMX_FORCE_STATECHANGED  GMX_FORCE_ALLFORCES  
618 
(bNS ? GMX_FORCE_NS : 0));

619  
620 
/* Clear the unused shake virial and pressure */

621 
clear_mat(shake_vir); 
622 
clear_mat(pres); 
623  
624 
/* Calculate long range corrections to pressure and energy */

625 
calc_dispcorr(fplog,inputrec,fr,count,mdatoms>nr,ems>s.box,ems>s.lambda, 
626 
pres,force_vir,enerd>term); 
627  
628 
/* Communicate stuff when parallel */

629 
if (PAR(cr)) {

630 
wallcycle_start(wcycle,ewcMoveE); 
631  
632 
global_stat(fplog,cr,enerd,force_vir,shake_vir,mu_tot, 
633 
inputrec,NULL,FALSE,NULL,NULL,NULL,NULL,&terminate); 
634  
635 
wallcycle_stop(wcycle,ewcMoveE); 
636 
} 
637  
638 
ems>epot = enerd>term[F_EPOT]; 
639 

640 
if (constr) {

641 
/* Project out the constraint components of the force */

642 
wallcycle_start(wcycle,ewcCONSTR); 
643 
dvdl = 0;

644 
constrain(NULL,FALSE,FALSE,constr,&top>idef,

645 
inputrec,cr,count,0,mdatoms,

646 
ems>s.x,ems>f,ems>f,ems>s.box,ems>s.lambda,&dvdl, 
647 
NULL,&shake_vir,nrnb,econqForce);

648 
if (fr>bSepDVDL && fplog)

649 
fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);

650 
enerd>term[F_DVDL] += dvdl; 
651 
m_add(force_vir,shake_vir,vir); 
652 
wallcycle_stop(wcycle,ewcCONSTR); 
653 
} else {

654 
copy_mat(force_vir,vir); 
655 
} 
656  
657 
clear_mat(ekin); 
658 
enerd>term[F_PRES] = 
659 
calc_pres(fr>ePBC,inputrec>nwall,ems>s.box,ekin,vir,pres, 
660 
(fr>eeltype==eelPPPM)?enerd>term[F_COUL_RECIP]:0.0); 
661  
662 
get_state_f_norm_max(cr,&(inputrec>opts),mdatoms,ems); 
663 
} 
664  
665 
static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms, 
666 
gmx_mtop_t *mtop, 
667 
em_state_t *s_min,em_state_t *s_b) 
668 
{ 
669 
rvec *fm,*fb,*fmg; 
670 
t_block *cgs_gl; 
671 
int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;

672 
double partsum;

673 
unsigned char *grpnrFREEZE; 
674  
675 
if (debug)

676 
fprintf(debug,"Doing reorder_partsum\n");

677  
678 
fm = s_min>f; 
679 
fb = s_b>f; 
680  
681 
cgs_gl = dd_charge_groups_global(cr>dd); 
682 
index = cgs_gl>index; 
683  
684 
/* Collect fm in a global vector fmg.

685 
* This conflics with the spirit of domain decomposition,

686 
* but to fully optimize this a much more complicated algorithm is required.

687 
*/

688 
snew(fmg,mtop>natoms); 
689 

690 
ncg = s_min>s.ncg_gl; 
691 
cg_gl = s_min>s.cg_gl; 
692 
i = 0;

693 
for(c=0; c<ncg; c++) { 
694 
cg = cg_gl[c]; 
695 
a0 = index[cg]; 
696 
a1 = index[cg+1];

697 
for(a=a0; a<a1; a++) {

698 
copy_rvec(fm[i],fmg[a]); 
699 
i++; 
700 
} 
701 
} 
702 
gmx_sum(mtop>natoms*3,fmg[0],cr); 
703  
704 
/* Now we will determine the part of the sum for the cgs in state s_b */

705 
ncg = s_b>s.ncg_gl; 
706 
cg_gl = s_b>s.cg_gl; 
707 
partsum = 0;

708 
i = 0;

709 
gf = 0;

710 
grpnrFREEZE = mtop>groups.grpnr[egcFREEZE]; 
711 
for(c=0; c<ncg; c++) { 
712 
cg = cg_gl[c]; 
713 
a0 = index[cg]; 
714 
a1 = index[cg+1];

715 
for(a=a0; a<a1; a++) {

716 
if (mdatoms>cFREEZE && grpnrFREEZE) {

717 
gf = grpnrFREEZE[i]; 
718 
} 
719 
for(m=0; m<DIM; m++) { 
720 
if (!opts>nFreeze[gf][m]) {

721 
partsum += (fb[i][m]  fmg[a][m])*fb[i][m]; 
722 
} 
723 
} 
724 
i++; 
725 
} 
726 
} 
727 

728 
sfree(fmg); 
729  
730 
return partsum;

731 
} 
732  
733 
static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,

734 
gmx_mtop_t *mtop, 
735 
em_state_t *s_min,em_state_t *s_b) 
736 
{ 
737 
rvec *fm,*fb; 
738 
double sum;

739 
int gf,i,m;

740  
741 
/* This is just the classical PolakRibiere calculation of beta;

742 
* it looks a bit complicated since we take freeze groups into account,

743 
* and might have to sum it in parallel runs.

744 
*/

745 

746 
if (!DOMAINDECOMP(cr) 

747 
(s_min>s.ddp_count == cr>dd>ddp_count && 
748 
s_b>s.ddp_count == cr>dd>ddp_count)) { 
749 
fm = s_min>f; 
750 
fb = s_b>f; 
751 
sum = 0;

752 
gf = 0;

753 
/* This part of code can be incorrect with DD,

754 
* since the atom ordering in s_b and s_min might differ.

755 
*/

756 
for(i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

757 
if (mdatoms>cFREEZE)

758 
gf = mdatoms>cFREEZE[i]; 
759 
for(m=0; m<DIM; m++) 
760 
if (!opts>nFreeze[gf][m]) {

761 
sum += (fb[i][m]  fm[i][m])*fb[i][m]; 
762 
} 
763 
} 
764 
} else {

765 
/* We need to reorder cgs while summing */

766 
sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b); 
767 
} 
768 
if (PAR(cr))

769 
gmx_sumd(1,&sum,cr);

770  
771 
return sum/sqr(s_min>fnorm);

772 
} 
773  
774 
time_t do_cg(FILE *fplog,t_commrec *cr, 
775 
int nfile,t_filenm fnm[],

776 
bool bVerbose,bool bCompact, 
777 
gmx_vsite_t *vsite,gmx_constr_t constr, 
778 
int stepout,

779 
t_inputrec *inputrec, 
780 
gmx_mtop_t *top_global,t_fcdata *fcd, 
781 
t_state *state_global,rvec f[], 
782 
rvec buf[],t_mdatoms *mdatoms, 
783 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
784 
gmx_edsam_t ed, 
785 
t_forcerec *fr, 
786 
int repl_ex_nst,int repl_ex_seed, 
787 
real cpt_period,real max_hours, 
788 
unsigned long Flags, 
789 
int *nsteps_done)

790 
{ 
791 
const char *CG="PolakRibiere Conjugate Gradients"; 
792  
793 
em_state_t *s_min,*s_a,*s_b,*s_c; 
794 
gmx_localtop_t *top; 
795 
gmx_enerdata_t *enerd; 
796 
t_graph *graph; 
797 
rvec *f_global,*p,*sf,*sfm; 
798 
double gpa,gpb,gpc,tmp,sum[2],minstep; 
799 
real fnormn; 
800 
real stepsize; 
801 
real a,b,c,beta=0.0; 
802 
real epot_repl=0;

803 
real pnorm; 
804 
t_mdebin *mdebin; 
805 
bool converged,foundlower;

806 
rvec mu_tot; 
807 
time_t start_t; 
808 
bool do_log=FALSE,do_ene=FALSE,do_x,do_f;

809 
tensor vir,pres; 
810 
int number_steps,neval=0,nstcg=inputrec>nstcgsteep; 
811 
int fp_trn,fp_ene;

812 
int i,m,gf,step,nminstep;

813 
real terminate=0;

814  
815 
step=0;

816  
817 
s_min = init_em_state(); 
818 
s_a = init_em_state(); 
819 
s_b = init_em_state(); 
820 
s_c = init_em_state(); 
821  
822 
/* Init em and store the local state in s_min */

823 
init_em(fplog,CG,cr,inputrec, 
824 
state_global,top_global,s_min,&top,f,&buf,&f_global, 
825 
nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr, 
826 
nfile,fnm,&fp_trn,&fp_ene,&mdebin); 
827  
828 
/* Print to log file */

829 
start_t=print_date_and_time(fplog,cr>nodeid, 
830 
"Started PolakRibiere Conjugate Gradients");

831 
wallcycle_start(wcycle,ewcRUN); 
832 

833 
/* Max number of steps */

834 
number_steps=inputrec>nsteps; 
835  
836 
if (MASTER(cr))

837 
sp_header(stderr,CG,inputrec>em_tol,number_steps); 
838 
if (fplog)

839 
sp_header(fplog,CG,inputrec>em_tol,number_steps); 
840  
841 
/* Call the force routine and some auxiliary (neighboursearching etc.) */

842 
/* do_force always puts the charge groups in the box and shifts again

843 
* We do not unshift, so molecules are always whole in congrad.c

844 
*/

845 
evaluate_energy(fplog,bVerbose,cr, 
846 
state_global,top_global,s_min,&buf,top, 
847 
inputrec,nrnb,wcycle, 
848 
vsite,constr,fcd,graph,mdatoms,fr, 
849 
mu_tot,enerd,vir,pres,1,TRUE);

850 
where(); 
851  
852 
if (MASTER(cr)) {

853 
/* Copy stuff to the energy bin for easy printing etc. */

854 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,step,(real)step,

855 
enerd,&s_min>s,s_min>s.box, 
856 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
857 

858 
print_ebin_header(fplog,step,step,s_min>s.lambda); 
859 
print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,step,eprNORMAL, 
860 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
861 
} 
862 
where(); 
863  
864 
/* Estimate/guess the initial stepsize */

865 
stepsize = inputrec>em_stepsize/s_min>fnorm; 
866 

867 
if (MASTER(cr)) {

868 
fprintf(stderr," Fmax = %12.5e on atom %d\n",

869 
s_min>fmax,s_min>a_fmax+1);

870 
fprintf(stderr," FNorm = %12.5e\n",

871 
s_min>fnorm/sqrt(state_global>natoms)); 
872 
fprintf(stderr,"\n");

873 
/* and copy to the log file too... */

874 
fprintf(fplog," Fmax = %12.5e on atom %d\n",

875 
s_min>fmax,s_min>a_fmax+1);

876 
fprintf(fplog," FNorm = %12.5e\n",

877 
s_min>fnorm/sqrt(state_global>natoms)); 
878 
fprintf(fplog,"\n");

879 
} 
880 
/* Start the loop over CG steps.

881 
* Each successful step is counted, and we continue until

882 
* we either converge or reach the max number of steps.

883 
*/

884 
for(step=0,converged=FALSE;( step<=number_steps  number_steps==0) && !converged;step++) { 
885 

886 
/* start taking steps in a new direction

887 
* First time we enter the routine, beta=0, and the direction is

888 
* simply the negative gradient.

889 
*/

890  
891 
/* Calculate the new direction in p, and the gradient in this direction, gpa */

892 
p = s_min>s.cg_p; 
893 
sf = s_min>f; 
894 
gpa = 0;

895 
gf = 0;

896 
for(i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

897 
if (mdatoms>cFREEZE)

898 
gf = mdatoms>cFREEZE[i]; 
899 
for(m=0; m<DIM; m++) { 
900 
if (!inputrec>opts.nFreeze[gf][m]) {

901 
p[i][m] = sf[i][m] + beta*p[i][m]; 
902 
gpa = p[i][m]*sf[i][m]; 
903 
/* f is negative gradient, thus the sign */

904 
} else {

905 
p[i][m] = 0;

906 
} 
907 
} 
908 
} 
909 

910 
/* Sum the gradient along the line across CPUs */

911 
if (PAR(cr))

912 
gmx_sumd(1,&gpa,cr);

913  
914 
/* Calculate the norm of the search vector */

915 
get_f_norm_max(cr,&(inputrec>opts),mdatoms,p,&pnorm,NULL,NULL); 
916 

917 
/* Just in case stepsize reaches zero due to numerical precision... */

918 
if(stepsize<=0) 
919 
stepsize = inputrec>em_stepsize/pnorm; 
920 

921 
/*

922 
* Double check the value of the derivative in the search direction.

923 
* If it is positive it must be due to the old information in the

924 
* CG formula, so just remove that and start over with beta=0.

925 
* This corresponds to a steepest descent step.

926 
*/

927 
if(gpa>0) { 
928 
beta = 0;

929 
step; /* Don't count this step since we are restarting */

930 
continue; /* Go back to the beginning of the big forloop */ 
931 
} 
932  
933 
/* Calculate minimum allowed stepsize, before the average (norm)

934 
* relative change in coordinate is smaller than precision

935 
*/

936 
minstep=0;

937 
for (i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

938 
for(m=0; m<DIM; m++) { 
939 
tmp = fabs(s_min>s.x[i][m]); 
940 
if(tmp < 1.0) 
941 
tmp = 1.0; 
942 
tmp = p[i][m]/tmp; 
943 
minstep += tmp*tmp; 
944 
} 
945 
} 
946 
/* Add up from all CPUs */

947 
if(PAR(cr))

948 
gmx_sumd(1,&minstep,cr);

949  
950 
minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global>natoms));

951  
952 
if(stepsize<minstep) {

953 
converged=TRUE; 
954 
break;

955 
} 
956 

957 
/* Write coordinates if necessary */

958 
do_x = do_per_step(step,inputrec>nstxout); 
959 
do_f = do_per_step(step,inputrec>nstfout); 
960 

961 
write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,

962 
top_global,inputrec,step, 
963 
s_min,state_global,f_global); 
964 

965 
/* Take a step downhill.

966 
* In theory, we should minimize the function along this direction.

967 
* That is quite possible, but it turns out to take 510 function evaluations

968 
* for each line. However, we dont really need to find the exact minimum 

969 
* it is much better to start a new CG step in a modified direction as soon

970 
* as we are close to it. This will save a lot of energy evaluations.

971 
*

972 
* In practice, we just try to take a single step.

973 
* If it worked (i.e. lowered the energy), we increase the stepsize but

974 
* the continue straight to the next CG step without trying to find any minimum.

975 
* If it didn't work (higher energy), there must be a minimum somewhere between

976 
* the old position and the new one.

977 
*

978 
* Due to the finite numerical accuracy, it turns out that it is a good idea

979 
* to even accept a SMALL increase in energy, if the derivative is still downhill.

980 
* This leads to lower final energies in the tests I've done. / Erik

981 
*/

982 
s_a>epot = s_min>epot; 
983 
a = 0.0; 
984 
c = a + stepsize; /* reference position along line is zero */

985 

986 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count < cr>dd>ddp_count) {

987 
em_dd_partition_system(fplog,step,cr,top_global,inputrec, 
988 
s_min,&buf,top,mdatoms,fr,vsite,constr, 
989 
nrnb,wcycle); 
990 
} 
991  
992 
/* Take a trial step (new coords in s_c) */

993 
do_em_step(cr,inputrec,mdatoms,s_min,c,s_min>s.cg_p,s_c, 
994 
constr,top,nrnb,wcycle,1);

995 

996 
neval++; 
997 
/* Calculate energy for the trial step */

998 
evaluate_energy(fplog,bVerbose,cr, 
999 
state_global,top_global,s_c,&buf,top, 
1000 
inputrec,nrnb,wcycle, 
1001 
vsite,constr,fcd,graph,mdatoms,fr, 
1002 
mu_tot,enerd,vir,pres,1,FALSE);

1003 

1004 
/* Calc derivative along line */

1005 
p = s_c>s.cg_p; 
1006 
sf = s_c>f; 
1007 
gpc=0;

1008 
for(i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

1009 
for(m=0; m<DIM; m++) 
1010 
gpc = p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */

1011 
} 
1012 
/* Sum the gradient along the line across CPUs */

1013 
if (PAR(cr))

1014 
gmx_sumd(1,&gpc,cr);

1015  
1016 
/* This is the max amount of increase in energy we tolerate */

1017 
tmp=sqrt(GMX_REAL_EPS)*fabs(s_a>epot); 
1018  
1019 
/* Accept the step if the energy is lower, or if it is not significantly higher

1020 
* and the line derivative is still negative.

1021 
*/

1022 
if (s_c>epot < s_a>epot  (gpc < 0 && s_c>epot < (s_a>epot + tmp))) { 
1023 
foundlower = TRUE; 
1024 
/* Great, we found a better energy. Increase step for next iteration

1025 
* if we are still going down, decrease it otherwise

1026 
*/

1027 
if(gpc<0) 
1028 
stepsize *= 1.618034; /* The golden section */ 
1029 
else

1030 
stepsize *= 0.618034; /* 1/golden section */ 
1031 
} else {

1032 
/* New energy is the same or higher. We will have to do some work

1033 
* to find a smaller value in the interval. Take smaller step next time!

1034 
*/

1035 
foundlower = FALSE; 
1036 
stepsize *= 0.618034; 
1037 
} 
1038  
1039  
1040  
1041 

1042 
/* OK, if we didn't find a lower value we will have to locate one now  there must

1043 
* be one in the interval [a=0,c].

1044 
* The same thing is valid here, though: Don't spend dozens of iterations to find

1045 
* the line minimum. We try to interpolate based on the derivative at the endpoints,

1046 
* and only continue until we find a lower value. In most cases this means 12 iterations.

1047 
*

1048 
* I also have a safeguard for potentially really patological functions so we never

1049 
* take more than 20 steps before we give up ...

1050 
*

1051 
* If we already found a lower value we just skip this step and continue to the update.

1052 
*/

1053 
if (!foundlower) {

1054 
nminstep=0;

1055  
1056 
do {

1057 
/* Select a new trial point.

1058 
* If the derivatives at points a & c have different sign we interpolate to zero,

1059 
* otherwise just do a bisection.

1060 
*/

1061 
if(gpa<0 && gpc>0) 
1062 
b = a + gpa*(ac)/(gpcgpa); 
1063 
else

1064 
b = 0.5*(a+c); 
1065 

1066 
/* safeguard if interpolation close to machine accuracy causes errors:

1067 
* never go outside the interval

1068 
*/

1069 
if(b<=a  b>=c)

1070 
b = 0.5*(a+c); 
1071 

1072 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count != cr>dd>ddp_count) {

1073 
/* Reload the old state */

1074 
em_dd_partition_system(fplog,1,cr,top_global,inputrec,

1075 
s_min,&buf,top,mdatoms,fr,vsite,constr, 
1076 
nrnb,wcycle); 
1077 
} 
1078  
1079 
/* Take a trial step to this new point  new coords in s_b */

1080 
do_em_step(cr,inputrec,mdatoms,s_min,b,s_min>s.cg_p,s_b, 
1081 
constr,top,nrnb,wcycle,1);

1082 

1083 
neval++; 
1084 
/* Calculate energy for the trial step */

1085 
evaluate_energy(fplog,bVerbose,cr, 
1086 
state_global,top_global,s_b,&buf,top, 
1087 
inputrec,nrnb,wcycle, 
1088 
vsite,constr,fcd,graph,mdatoms,fr, 
1089 
mu_tot,enerd,vir,pres,1,FALSE);

1090 

1091 
/* p does not change within a step, but since the domain decomposition

1092 
* might change, we have to use cg_p of s_b here.

1093 
*/

1094 
p = s_b>s.cg_p; 
1095 
sf = s_b>f; 
1096 
gpb=0;

1097 
for(i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

1098 
for(m=0; m<DIM; m++) 
1099 
gpb = p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */

1100 
} 
1101 
/* Sum the gradient along the line across CPUs */

1102 
if (PAR(cr))

1103 
gmx_sumd(1,&gpb,cr);

1104 

1105 
if (debug)

1106 
fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",

1107 
s_a>epot,s_b>epot,s_c>epot,gpb); 
1108  
1109 
epot_repl = s_b>epot; 
1110 

1111 
/* Keep one of the intervals based on the value of the derivative at the new point */

1112 
if (gpb > 0) { 
1113 
/* Replace c endpoint with b */

1114 
swap_em_state(s_b,s_c); 
1115 
c = b; 
1116 
gpc = gpb; 
1117 
} else {

1118 
/* Replace a endpoint with b */

1119 
swap_em_state(s_b,s_a); 
1120 
a = b; 
1121 
gpa = gpb; 
1122 
} 
1123 

1124 
/*

1125 
* Stop search as soon as we find a value smaller than the endpoints.

1126 
* Never run more than 20 steps, no matter what.

1127 
*/

1128 
nminstep++; 
1129 
} while ((epot_repl > s_a>epot  epot_repl > s_c>epot) &&

1130 
(nminstep < 20));

1131 

1132 
if (fabs(epot_repl  s_min>epot) < fabs(s_min>epot)*GMX_REAL_EPS 

1133 
nminstep >= 20) {

1134 
/* OK. We couldn't find a significantly lower energy.

1135 
* If beta==0 this was steepest descent, and then we give up.

1136 
* If not, set beta=0 and restart with steepest descent before quitting.

1137 
*/

1138 
if (beta == 0.0) { 
1139 
/* Converged */

1140 
converged = TRUE; 
1141 
break;

1142 
} else {

1143 
/* Reset memory before giving up */

1144 
beta = 0.0; 
1145 
continue;

1146 
} 
1147 
} 
1148 

1149 
/* Select min energy state of A & C, put the best in B.

1150 
*/

1151 
if (s_c>epot < s_a>epot) {

1152 
if (debug)

1153 
fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",

1154 
s_c>epot,s_a>epot); 
1155 
swap_em_state(s_b,s_c); 
1156 
gpb = gpc; 
1157 
b = c; 
1158 
} else {

1159 
if (debug)

1160 
fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",

1161 
s_a>epot,s_c>epot); 
1162 
swap_em_state(s_b,s_a); 
1163 
gpb = gpa; 
1164 
b = a; 
1165 
} 
1166 

1167 
} else {

1168 
if (debug)

1169 
fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",

1170 
s_c>epot); 
1171 
swap_em_state(s_b,s_c); 
1172 
gpb = gpc; 
1173 
b = c; 
1174 
} 
1175 

1176 
/* new search direction */

1177 
/* beta = 0 means forget all memory and restart with steepest descents. */

1178 
if (nstcg && ((step % nstcg)==0)) 
1179 
beta = 0.0; 
1180 
else {

1181 
/* s_min>fnorm cannot be zero, because then we would have converged

1182 
* and broken out.

1183 
*/

1184  
1185 
/* PolakRibiere update.

1186 
* Change to fnorm2/fnorm2_old for FletcherReeves

1187 
*/

1188 
beta = pr_beta(cr,&inputrec>opts,mdatoms,top_global,s_min,s_b); 
1189 
} 
1190 
/* Limit beta to prevent oscillations */

1191 
if (fabs(beta) > 5.0) 
1192 
beta = 0.0; 
1193 

1194 

1195 
/* update positions */

1196 
swap_em_state(s_min,s_b); 
1197 
gpa = gpb; 
1198 

1199 
/* Print it if necessary */

1200 
if (MASTER(cr)) {

1201 
if(bVerbose)

1202 
fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",

1203 
step,s_min>epot,s_min>fnorm/sqrt(state_global>natoms), 
1204 
s_min>fmax,s_min>a_fmax+1);

1205 
/* Store the new (lower) energies */

1206 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,step,(real)step,

1207 
enerd,&s_min>s,s_min>s.box, 
1208 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
1209 
do_log = do_per_step(step,inputrec>nstlog); 
1210 
do_ene = do_per_step(step,inputrec>nstenergy); 
1211 
if(do_log)

1212 
print_ebin_header(fplog,step,step,s_min>s.lambda); 
1213 
print_ebin(fp_ene,do_ene,FALSE,FALSE, 
1214 
do_log ? fplog : NULL,step,step,step,eprNORMAL,

1215 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1216 
} 
1217 

1218 
/* Stop when the maximum force lies below tolerance.

1219 
* If we have reached machine precision, converged is already set to true.

1220 
*/

1221 
converged = converged  (s_min>fmax < inputrec>em_tol); 
1222 

1223 
} /* End of the loop */

1224 

1225 
if (converged)

1226 
step; /* we never took that last step in this case */

1227 

1228 
if (s_min>fmax > inputrec>em_tol) {

1229 
if (MASTER(cr)) {

1230 
warn_step(stderr,inputrec>em_tol,FALSE); 
1231 
warn_step(fplog,inputrec>em_tol,FALSE); 
1232 
} 
1233 
converged = FALSE; 
1234 
} 
1235 

1236 
if (MASTER(cr)) {

1237 
/* If we printed energy and/or logfile last step (which was the last step)

1238 
* we don't have to do it again, but otherwise print the final values.

1239 
*/

1240 
if(!do_log) {

1241 
/* Write final value to log since we didn't do anything the last step */

1242 
print_ebin_header(fplog,step,step,s_min>s.lambda); 
1243 
} 
1244 
if (!do_ene  !do_log) {

1245 
/* Write final energy file entries */

1246 
print_ebin(fp_ene,!do_ene,FALSE,FALSE, 
1247 
!do_log ? fplog : NULL,step,step,step,eprNORMAL,

1248 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1249 
} 
1250 
} 
1251  
1252 
/* Print some stuff... */

1253 
if (MASTER(cr))

1254 
fprintf(stderr,"\nwriting lowest energy coordinates.\n");

1255 

1256 
/* IMPORTANT!

1257 
* For accurate normal mode calculation it is imperative that we

1258 
* store the last conformation into the full precision binary trajectory.

1259 
*

1260 
* However, we should only do it if we did NOT already write this step

1261 
* above (which we did if do_x or do_f was true).

1262 
*/

1263 
do_x = !do_per_step(step,inputrec>nstxout); 
1264 
do_f = (inputrec>nstfout > 0 && !do_per_step(step,inputrec>nstfout));

1265 

1266 
write_em_traj(fplog,cr,fp_trn,do_x,do_f,ftp2fn(efSTO,nfile,fnm), 
1267 
top_global,inputrec,step, 
1268 
s_min,state_global,f_global); 
1269 

1270 
fnormn = s_min>fnorm/sqrt(state_global>natoms); 
1271 

1272 
if (MASTER(cr)) {

1273 
print_converged(stderr,CG,inputrec>em_tol,step,converged,number_steps, 
1274 
s_min>epot,s_min>fmax,s_min>a_fmax,fnormn); 
1275 
print_converged(fplog,CG,inputrec>em_tol,step,converged,number_steps, 
1276 
s_min>epot,s_min>fmax,s_min>a_fmax,fnormn); 
1277 

1278 
fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);

1279 
} 
1280 

1281 
finish_em(fplog,cr,fp_trn,fp_ene); 
1282 

1283 
/* To print the actual number of steps we needed somewhere */

1284 
*nsteps_done = step; 
1285  
1286 
return start_t;

1287 
} /* That's all folks */

1288  
1289  
1290 
time_t do_lbfgs(FILE *fplog,t_commrec *cr, 
1291 
int nfile,t_filenm fnm[],

1292 
bool bVerbose,bool bCompact, 
1293 
gmx_vsite_t *vsite,gmx_constr_t constr, 
1294 
int stepout,

1295 
t_inputrec *inputrec, 
1296 
gmx_mtop_t *top_global,t_fcdata *fcd, 
1297 
t_state *state,rvec f[], 
1298 
rvec buf[],t_mdatoms *mdatoms, 
1299 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
1300 
gmx_edsam_t ed, 
1301 
t_forcerec *fr, 
1302 
int repl_ex_nst,int repl_ex_seed, 
1303 
real cpt_period,real max_hours, 
1304 
unsigned long Flags, 
1305 
int *nsteps_done)

1306 
{ 
1307 
static char *LBFGS="LowMemory BFGS Minimizer"; 
1308 
em_state_t ems; 
1309 
gmx_localtop_t *top; 
1310 
gmx_enerdata_t *enerd; 
1311 
t_graph *graph; 
1312 
int ncorr,nmaxcorr,point,cp,neval,nminstep;

1313 
double stepsize,gpa,gpb,gpc,tmp,minstep;

1314 
real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg; 
1315 
real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp; 
1316 
real a,b,c,maxdelta,delta; 
1317 
real diag,Epot0,Epot,EpotA,EpotB,EpotC; 
1318 
real dgdx,dgdg,sq,yr,beta; 
1319 
t_mdebin *mdebin; 
1320 
bool converged,first;

1321 
rvec mu_tot; 
1322 
real fnorm,fmax; 
1323 
time_t start_t; 
1324 
bool do_log,do_ene,do_x,do_f,foundlower,*frozen;

1325 
tensor vir,pres; 
1326 
int fp_trn,fp_ene,start,end,number_steps;

1327 
int i,k,m,n,nfmax,gf,step;

1328 
/* not used */

1329 
real terminate; 
1330  
1331 
if (PAR(cr))

1332 
gmx_fatal(FARGS,"Cannot do parallel LBFGS Minimization  yet.\n");

1333 

1334 
n = 3*state>natoms;

1335 
nmaxcorr = inputrec>nbfgscorr; 
1336 

1337 
/* Allocate memory */

1338 
snew(f,state>natoms); 
1339 
/* Use pointers to real so we dont have to loop over both atoms and

1340 
* dimensions all the time...

1341 
* x/f are allocated as rvec *, so make new x0/f0 pointerstoreal

1342 
* that point to the same memory.

1343 
*/

1344 
snew(xa,n); 
1345 
snew(xb,n); 
1346 
snew(xc,n); 
1347 
snew(fa,n); 
1348 
snew(fb,n); 
1349 
snew(fc,n); 
1350 
snew(frozen,n); 
1351 

1352 
xx = (real *)state>x; 
1353 
ff = (real *)f; 
1354  
1355 
printf("x0: %20.12g %20.12g %20.12g\n",xx[0],xx[1],xx[2]); 
1356  
1357 
snew(p,n); 
1358 
snew(lastx,n); 
1359 
snew(lastf,n); 
1360 
snew(rho,nmaxcorr); 
1361 
snew(alpha,nmaxcorr); 
1362 

1363 
snew(dx,nmaxcorr); 
1364 
for(i=0;i<nmaxcorr;i++) 
1365 
snew(dx[i],n); 
1366 

1367 
snew(dg,nmaxcorr); 
1368 
for(i=0;i<nmaxcorr;i++) 
1369 
snew(dg[i],n); 
1370  
1371 
step = 0;

1372 
neval = 0;

1373  
1374 
/* Init em */

1375 
init_em(fplog,LBFGS,cr,inputrec, 
1376 
state,top_global,&ems,&top,f,&buf,&f, 
1377 
nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr, 
1378 
nfile,fnm,&fp_trn,&fp_ene,&mdebin); 
1379 
/* Do_lbfgs is not completely updated like do_steep and do_cg,

1380 
* so we free some memory again.

1381 
*/

1382 
sfree(ems.s.x); 
1383 
sfree(ems.f); 
1384  
1385 
start = mdatoms>start; 
1386 
end = mdatoms>homenr + start; 
1387 

1388 
/* Print to log file */

1389 
start_t=print_date_and_time(fplog,cr>nodeid, 
1390 
"Started LowMemory BFGS Minimization");

1391 
wallcycle_start(wcycle,ewcRUN); 
1392 

1393 
do_log = do_ene = do_x = do_f = TRUE; 
1394 

1395 
/* Max number of steps */

1396 
number_steps=inputrec>nsteps; 
1397  
1398 
/* Create a 3*natoms index to tell whether each degree of freedom is frozen */

1399 
gf = 0;

1400 
for(i=start; i<end; i++) {

1401 
if (mdatoms>cFREEZE)

1402 
gf = mdatoms>cFREEZE[i]; 
1403 
for(m=0; m<DIM; m++) 
1404 
frozen[3*i+m]=inputrec>opts.nFreeze[gf][m];

1405 
} 
1406 
if (MASTER(cr))

1407 
sp_header(stderr,LBFGS,inputrec>em_tol,number_steps); 
1408 
if (fplog)

1409 
sp_header(fplog,LBFGS,inputrec>em_tol,number_steps); 
1410 

1411 
if (vsite)

1412 
construct_vsites(fplog,vsite,state>x,nrnb,1,NULL, 
1413 
top>idef.iparams,top>idef.il, 
1414 
fr>ePBC,fr>bMolPBC,graph,cr,state>box); 
1415 

1416 
/* Call the force routine and some auxiliary (neighboursearching etc.) */

1417 
/* do_force always puts the charge groups in the box and shifts again

1418 
* We do not unshift, so molecules are always whole

1419 
*/

1420 
neval++; 
1421 
ems.s.x = state>x; 
1422 
ems.f = f; 
1423 
evaluate_energy(fplog,bVerbose,cr, 
1424 
state,top_global,&ems,&buf,top, 
1425 
inputrec,nrnb,wcycle, 
1426 
vsite,constr,fcd,graph,mdatoms,fr, 
1427 
mu_tot,enerd,vir,pres,1,TRUE);

1428 
where(); 
1429 

1430 
if (MASTER(cr)) {

1431 
/* Copy stuff to the energy bin for easy printing etc. */

1432 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,step,(real)step,

1433 
enerd,state,state>box, 
1434 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
1435 

1436 
print_ebin_header(fplog,step,step,state>lambda); 
1437 
print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,step,eprNORMAL, 
1438 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1439 
} 
1440 
where(); 
1441 

1442 
/* This is the starting energy */

1443 
Epot = enerd>term[F_EPOT]; 
1444 

1445 
fnorm = ems.fnorm; 
1446 
fmax = ems.fmax; 
1447 
nfmax = ems.a_fmax; 
1448 

1449 
/* Set the initial step.

1450 
* since it will be multiplied by the nonnormalized search direction

1451 
* vector (force vector the first time), we scale it by the

1452 
* norm of the force.

1453 
*/

1454 

1455 
if (MASTER(cr)) {

1456 
fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);

1457 
fprintf(stderr," Fmax = %12.5e on atom %d\n",fmax,nfmax+1); 
1458 
fprintf(stderr," FNorm = %12.5e\n",fnorm/sqrt(state>natoms));

1459 
fprintf(stderr,"\n");

1460 
/* and copy to the log file too... */

1461 
fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);

1462 
fprintf(fplog," Fmax = %12.5e on atom %d\n",fmax,nfmax+1); 
1463 
fprintf(fplog," FNorm = %12.5e\n",fnorm/sqrt(state>natoms));

1464 
fprintf(fplog,"\n");

1465 
} 
1466 

1467 
point=0;

1468 
for(i=0;i<n;i++) 
1469 
if(!frozen[i])

1470 
dx[point][i] = ff[i]; /* Initial search direction */

1471 
else

1472 
dx[point][i] = 0;

1473  
1474 
stepsize = 1.0/fnorm; 
1475 
converged = FALSE; 
1476 

1477 
/* Start the loop over BFGS steps.

1478 
* Each successful step is counted, and we continue until

1479 
* we either converge or reach the max number of steps.

1480 
*/

1481 

1482 
ncorr=0;

1483  
1484 
/* Set the gradient from the force */

1485 
for(step=0,converged=FALSE;( step<=number_steps  number_steps==0) && !converged;step++) { 
1486 

1487 
/* Write coordinates if necessary */

1488 
do_x = do_per_step(step,inputrec>nstxout); 
1489 
do_f = do_per_step(step,inputrec>nstfout); 
1490 

1491 
write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,

1492 
top_global,inputrec,step, 
1493 
&ems,state,f); 
1494  
1495 
/* Do the linesearching in the direction dx[point][0..(n1)] */

1496 

1497 
/* pointer to current direction  point=0 first time here */

1498 
s=dx[point]; 
1499 

1500 
/* calculate line gradient */

1501 
for(gpa=0,i=0;i<n;i++) 
1502 
gpa=s[i]*ff[i]; 
1503  
1504 
/* Calculate minimum allowed stepsize, before the average (norm)

1505 
* relative change in coordinate is smaller than precision

1506 
*/

1507 
for(minstep=0,i=0;i<n;i++) { 
1508 
tmp=fabs(xx[i]); 
1509 
if(tmp<1.0) 
1510 
tmp=1.0; 
1511 
tmp = s[i]/tmp; 
1512 
minstep += tmp*tmp; 
1513 
} 
1514 
minstep = GMX_REAL_EPS/sqrt(minstep/n); 
1515 

1516 
if(stepsize<minstep) {

1517 
converged=TRUE; 
1518 
break;

1519 
} 
1520 

1521 
/* Store old forces and coordinates */

1522 
for(i=0;i<n;i++) { 
1523 
lastx[i]=xx[i]; 
1524 
lastf[i]=ff[i]; 
1525 
} 
1526 
Epot0=Epot; 
1527 

1528 
first=TRUE; 
1529 

1530 
for(i=0;i<n;i++) 
1531 
xa[i]=xx[i]; 
1532 

1533 
/* Take a step downhill.

1534 
* In theory, we should minimize the function along this direction.

1535 
* That is quite possible, but it turns out to take 510 function evaluations

1536 
* for each line. However, we dont really need to find the exact minimum 

1537 
* it is much better to start a new BFGS step in a modified direction as soon

1538 
* as we are close to it. This will save a lot of energy evaluations.

1539 
*

1540 
* In practice, we just try to take a single step.

1541 
* If it worked (i.e. lowered the energy), we increase the stepsize but

1542 
* the continue straight to the next BFGS step without trying to find any minimum.

1543 
* If it didn't work (higher energy), there must be a minimum somewhere between

1544 
* the old position and the new one.

1545 
*

1546 
* Due to the finite numerical accuracy, it turns out that it is a good idea

1547 
* to even accept a SMALL increase in energy, if the derivative is still downhill.

1548 
* This leads to lower final energies in the tests I've done. / Erik

1549 
*/

1550 
foundlower=FALSE; 
1551 
EpotA = Epot0; 
1552 
a = 0.0; 
1553 
c = a + stepsize; /* reference position along line is zero */

1554  
1555 
/* Check stepsize first. We do not allow displacements

1556 
* larger than emstep.

1557 
*/

1558 
do {

1559 
c = a + stepsize; 
1560 
maxdelta=0;

1561 
for(i=0;i<n;i++) { 
1562 
delta=c*s[i]; 
1563 
if(delta>maxdelta)

1564 
maxdelta=delta; 
1565 
} 
1566 
if(maxdelta>inputrec>em_stepsize)

1567 
stepsize*=0.1; 
1568 
} while(maxdelta>inputrec>em_stepsize);

1569  
1570 
/* Take a trial step */

1571 
for (i=0; i<n; i++) 
1572 
xc[i] = lastx[i] + c*s[i]; 
1573 

1574 
neval++; 
1575 
/* Calculate energy for the trial step */

1576 
ems.s.x = (rvec *)xc; 
1577 
ems.f = (rvec *)fc; 
1578 
evaluate_energy(fplog,bVerbose,cr, 
1579 
state,top_global,&ems,&buf,top, 
1580 
inputrec,nrnb,wcycle, 
1581 
vsite,constr,fcd,graph,mdatoms,fr, 
1582 
mu_tot,enerd,vir,pres,step,FALSE); 
1583 
EpotC = ems.epot; 
1584 

1585 
/* Calc derivative along line */

1586 
for(gpc=0,i=0; i<n; i++) { 
1587 
gpc = s[i]*fc[i]; /* f is negative gradient, thus the sign */

1588 
} 
1589 
/* Sum the gradient along the line across CPUs */

1590 
if (PAR(cr))

1591 
gmx_sumd(1,&gpc,cr);

1592 

1593 
/* This is the max amount of increase in energy we tolerate */

1594 
tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA); 
1595 

1596 
/* Accept the step if the energy is lower, or if it is not significantly higher

1597 
* and the line derivative is still negative.

1598 
*/

1599 
if(EpotC<EpotA  (gpc<0 && EpotC<(EpotA+tmp))) { 
1600 
foundlower = TRUE; 
1601 
/* Great, we found a better energy. Increase step for next iteration

1602 
* if we are still going down, decrease it otherwise

1603 
*/

1604 
if(gpc<0) 
1605 
stepsize *= 1.618034; /* The golden section */ 
1606 
else

1607 
stepsize *= 0.618034; /* 1/golden section */ 
1608 
} else {

1609 
/* New energy is the same or higher. We will have to do some work

1610 
* to find a smaller value in the interval. Take smaller step next time!

1611 
*/

1612 
foundlower = FALSE; 
1613 
stepsize *= 0.618034; 
1614 
} 
1615 

1616 
/* OK, if we didn't find a lower value we will have to locate one now  there must

1617 
* be one in the interval [a=0,c].

1618 
* The same thing is valid here, though: Don't spend dozens of iterations to find

1619 
* the line minimum. We try to interpolate based on the derivative at the endpoints,

1620 
* and only continue until we find a lower value. In most cases this means 12 iterations.

1621 
*

1622 
* I also have a safeguard for potentially really patological functions so we never

1623 
* take more than 20 steps before we give up ...

1624 
*

1625 
* If we already found a lower value we just skip this step and continue to the update.

1626 
*/

1627  
1628 
if(!foundlower) {

1629 

1630 
nminstep=0;

1631 
do {

1632 
/* Select a new trial point.

1633 
* If the derivatives at points a & c have different sign we interpolate to zero,

1634 
* otherwise just do a bisection.

1635 
*/

1636 

1637 
if(gpa<0 && gpc>0) 
1638 
b = a + gpa*(ac)/(gpcgpa); 
1639 
else

1640 
b = 0.5*(a+c); 
1641 

1642 
/* safeguard if interpolation close to machine accuracy causes errors:

1643 
* never go outside the interval

1644 
*/

1645 
if(b<=a  b>=c)

1646 
b = 0.5*(a+c); 
1647 

1648 
/* Take a trial step */

1649 
for (i=0; i<n; i++) 
1650 
xb[i] = lastx[i] + b*s[i]; 
1651 

1652 
neval++; 
1653 
/* Calculate energy for the trial step */

1654 
ems.s.x = (rvec *)xb; 
1655 
ems.f = (rvec *)fb; 
1656 
evaluate_energy(fplog,bVerbose,cr, 
1657 
state,top_global,&ems,&buf,top, 
1658 
inputrec,nrnb,wcycle, 
1659 
vsite,constr,fcd,graph,mdatoms,fr, 
1660 
mu_tot,enerd,vir,pres,step,FALSE); 
1661 
EpotB = ems.epot; 
1662 

1663 
fnorm = ems.fnorm; 
1664 

1665 
for(gpb=0,i=0; i<n; i++) 
1666 
gpb = s[i]*fb[i]; /* f is negative gradient, thus the sign */

1667 

1668 
/* Sum the gradient along the line across CPUs */

1669 
if (PAR(cr))

1670 
gmx_sumd(1,&gpb,cr);

1671 

1672 
/* Keep one of the intervals based on the value of the derivative at the new point */

1673 
if(gpb>0) { 
1674 
/* Replace c endpoint with b */

1675 
EpotC = EpotB; 
1676 
c = b; 
1677 
gpc = gpb; 
1678 
/* swap coord pointers b/c */

1679 
xtmp = xb; 
1680 
ftmp = fb; 
1681 
xb = xc; 
1682 
fb = fc; 
1683 
xc = xtmp; 
1684 
fc = ftmp; 
1685 
} else {

1686 
/* Replace a endpoint with b */

1687 
EpotA = EpotB; 
1688 
a = b; 
1689 
gpa = gpb; 
1690 
/* swap coord pointers a/b */

1691 
xtmp = xb; 
1692 
ftmp = fb; 
1693 
xb = xa; 
1694 
fb = fa; 
1695 
xa = xtmp; 
1696 
fa = ftmp; 
1697 
} 
1698 

1699 
/*

1700 
* Stop search as soon as we find a value smaller than the endpoints,

1701 
* or if the tolerance is below machine precision.

1702 
* Never run more than 20 steps, no matter what.

1703 
*/

1704 
nminstep++; 
1705 
} while((EpotB>EpotA  EpotB>EpotC) && (nminstep<20)); 
1706  
1707 
if(fabs(EpotBEpot0)<GMX_REAL_EPS  nminstep>=20) { 
1708 
/* OK. We couldn't find a significantly lower energy.

1709 
* If ncorr==0 this was steepest descent, and then we give up.

1710 
* If not, reset memory to restart as steepest descent before quitting.

1711 
*/

1712 
if(ncorr==0) { 
1713 
/* Converged */

1714 
converged=TRUE; 
1715 
break;

1716 
} else {

1717 
/* Reset memory */

1718 
ncorr=0;

1719 
/* Search in gradient direction */

1720 
for(i=0;i<n;i++) 
1721 
dx[point][i]=ff[i]; 
1722 
/* Reset stepsize */

1723 
stepsize = 1.0/fnorm; 
1724 
continue;

1725 
} 
1726 
} 
1727 

1728 
/* Select min energy state of A & C, put the best in xx/ff/Epot

1729 
*/

1730 
if(EpotC<EpotA) {

1731 
Epot = EpotC; 
1732 
/* Use state C */

1733 
for(i=0;i<n;i++) { 
1734 
xx[i]=xc[i]; 
1735 
ff[i]=fc[i]; 
1736 
} 
1737 
stepsize=c; 
1738 
} else {

1739 
Epot = EpotA; 
1740 
/* Use state A */

1741 
for(i=0;i<n;i++) { 
1742 
xx[i]=xa[i]; 
1743 
ff[i]=fa[i]; 
1744 
} 
1745 
stepsize=a; 
1746 
} 
1747 

1748 
} else {

1749 
/* found lower */

1750 
Epot = EpotC; 
1751 
/* Use state C */

1752 
for(i=0;i<n;i++) { 
1753 
xx[i]=xc[i]; 
1754 
ff[i]=fc[i]; 
1755 
} 
1756 
stepsize=c; 
1757 
} 
1758  
1759 
/* Update the memory information, and calculate a new

1760 
* approximation of the inverse hessian

1761 
*/

1762 

1763 
/* Have new data in Epot, xx, ff */

1764 
if(ncorr<nmaxcorr)

1765 
ncorr++; 
1766  
1767 
for(i=0;i<n;i++) { 
1768 
dg[point][i]=lastf[i]ff[i]; 
1769 
dx[point][i]*=stepsize; 
1770 
} 
1771 

1772 
dgdg=0;

1773 
dgdx=0;

1774 
for(i=0;i<n;i++) { 
1775 
dgdg+=dg[point][i]*dg[point][i]; 
1776 
dgdx+=dg[point][i]*dx[point][i]; 
1777 
} 
1778 

1779 
diag=dgdx/dgdg; 
1780 

1781 
rho[point]=1.0/dgdx; 
1782 
point++; 
1783 

1784 
if(point>=nmaxcorr)

1785 
point=0;

1786 

1787 
/* Update */

1788 
for(i=0;i<n;i++) 
1789 
p[i]=ff[i]; 
1790 

1791 
cp=point; 
1792 

1793 
/* Recursive update. First go back over the memory points */

1794 
for(k=0;k<ncorr;k++) { 
1795 
cp; 
1796 
if(cp<0) 
1797 
cp=ncorr1;

1798 

1799 
sq=0;

1800 
for(i=0;i<n;i++) 
1801 
sq+=dx[cp][i]*p[i]; 
1802 

1803 
alpha[cp]=rho[cp]*sq; 
1804 

1805 
for(i=0;i<n;i++) 
1806 
p[i] = alpha[cp]*dg[cp][i]; 
1807 
} 
1808 

1809 
for(i=0;i<n;i++) 
1810 
p[i] *= diag; 
1811 

1812 
/* And then go forward again */

1813 
for(k=0;k<ncorr;k++) { 
1814 
yr = 0;

1815 
for(i=0;i<n;i++) 
1816 
yr += p[i]*dg[cp][i]; 
1817 

1818 
beta = rho[cp]*yr; 
1819 
beta = alpha[cp]beta; 
1820 

1821 
for(i=0;i<n;i++) 
1822 
p[i] += beta*dx[cp][i]; 
1823 

1824 
cp++; 
1825 
if(cp>=ncorr)

1826 
cp=0;

1827 
} 
1828 

1829 
for(i=0;i<n;i++) 
1830 
if(!frozen[i])

1831 
dx[point][i] = p[i]; 
1832 
else

1833 
dx[point][i] = 0;

1834  
1835 
stepsize=1.0; 
1836 

1837 
/* Test whether the convergence criterion is met */

1838 
get_f_norm_max(cr,&(inputrec>opts),mdatoms,f,&fnorm,&fmax,&nfmax); 
1839 

1840 
/* Print it if necessary */

1841 
if (MASTER(cr)) {

1842 
if(bVerbose)

1843 
fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",

1844 
step,Epot,fnorm/sqrt(state>natoms),fmax,nfmax+1);

1845 
/* Store the new (lower) energies */

1846 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,step,(real)step,

1847 
enerd,state,state>box, 
1848 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
1849 
do_log = do_per_step(step,inputrec>nstlog); 
1850 
do_ene = do_per_step(step,inputrec>nstenergy); 
1851 
if(do_log)

1852 
print_ebin_header(fplog,step,step,state>lambda); 
1853 
print_ebin(fp_ene,do_ene,FALSE,FALSE, 
1854 
do_log ? fplog : NULL,step,step,step,eprNORMAL,

1855 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1856 
} 
1857 

1858 
/* Stop when the maximum force lies below tolerance.

1859 
* If we have reached machine precision, converged is already set to true.

1860 
*/

1861 

1862 
converged = converged  (fmax < inputrec>em_tol); 
1863 

1864 
} /* End of the loop */

1865 

1866 
if(converged)

1867 
step; /* we never took that last step in this case */

1868 

1869 
if(fmax>inputrec>em_tol) {

1870 
if (MASTER(cr)) {

1871 
warn_step(stderr,inputrec>em_tol,FALSE); 
1872 
warn_step(fplog,inputrec>em_tol,FALSE); 
1873 
} 
1874 
converged = FALSE; 
1875 
} 
1876 

1877 
/* If we printed energy and/or logfile last step (which was the last step)

1878 
* we don't have to do it again, but otherwise print the final values.

1879 
*/

1880 
if(!do_log) /* Write final value to log since we didn't do anythin last step */ 
1881 
print_ebin_header(fplog,step,step,state>lambda); 
1882 
if(!do_ene  !do_log) /* Write final energy file entries */ 
1883 
print_ebin(fp_ene,!do_ene,FALSE,FALSE, 
1884 
!do_log ? fplog : NULL,step,step,step,eprNORMAL,

1885 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1886 

1887 
/* Print some stuff... */

1888 
if (MASTER(cr))

1889 
fprintf(stderr,"\nwriting lowest energy coordinates.\n");

1890 

1891 
/* IMPORTANT!

1892 
* For accurate normal mode calculation it is imperative that we

1893 
* store the last conformation into the full precision binary trajectory.

1894 
*

1895 
* However, we should only do it if we did NOT already write this step

1896 
* above (which we did if do_x or do_f was true).

1897 
*/

1898 
do_x = !do_per_step(step,inputrec>nstxout); 
1899 
do_f = !do_per_step(step,inputrec>nstfout); 
1900 
write_em_traj(fplog,cr,fp_trn,do_x,do_f,ftp2fn(efSTO,nfile,fnm), 
1901 
top_global,inputrec,step, 
1902 
&ems,state,f); 
1903 

1904 
if (MASTER(cr)) {

1905 
print_converged(stderr,LBFGS,inputrec>em_tol,step,converged, 
1906 
number_steps,Epot,fmax,nfmax,fnorm/sqrt(state>natoms)); 
1907 
print_converged(fplog,LBFGS,inputrec>em_tol,step,converged, 
1908 
number_steps,Epot,fmax,nfmax,fnorm/sqrt(state>natoms)); 
1909 

1910 
fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);

1911 
} 
1912 

1913 
finish_em(fplog,cr,fp_trn,fp_ene); 
1914  
1915 
/* To print the actual number of steps we needed somewhere */

1916 
*nsteps_done = step; 
1917  
1918 
return start_t;

1919 
} /* That's all folks */

1920  
1921  
1922 
time_t do_steep(FILE *fplog,t_commrec *cr, 
1923 
int nfile,t_filenm fnm[],

1924 
bool bVerbose,bool bCompact, 
1925 
gmx_vsite_t *vsite,gmx_constr_t constr, 
1926 
int stepout,

1927 
t_inputrec *inputrec, 
1928 
gmx_mtop_t *top_global,t_fcdata *fcd, 
1929 
t_state *state_global,rvec f[], 
1930 
rvec buf[],t_mdatoms *mdatoms, 
1931 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
1932 
gmx_edsam_t ed, 
1933 
t_forcerec *fr, 
1934 
int repl_ex_nst,int repl_ex_seed, 
1935 
real cpt_period,real max_hours, 
1936 
unsigned long Flags, 
1937 
int *nsteps_done)

1938 
{ 
1939 
const char *SD="Steepest Descents"; 
1940 
em_state_t *s_min,*s_try; 
1941 
rvec *f_global; 
1942 
gmx_localtop_t *top; 
1943 
gmx_enerdata_t *enerd; 
1944 
t_graph *graph; 
1945 
real stepsize,constepsize; 
1946 
real ustep,dvdlambda,fnormn; 
1947 
int fp_trn,fp_ene;

1948 
t_mdebin *mdebin; 
1949 
bool bDone,bAbort,do_x,do_f;

1950 
time_t start_t; 
1951 
tensor vir,pres; 
1952 
rvec mu_tot; 
1953 
int nsteps;

1954 
int count=0; 
1955 
int steps_accepted=0; 
1956 
/* not used */

1957 
real terminate=0;

1958  
1959 
s_min = init_em_state(); 
1960 
s_try = init_em_state(); 
1961  
1962 
/* Init em and store the local state in s_try */

1963 
init_em(fplog,SD,cr,inputrec, 
1964 
state_global,top_global,s_try,&top,f,&buf,&f_global, 
1965 
nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr, 
1966 
nfile,fnm,&fp_trn,&fp_ene,&mdebin); 
1967  
1968 
/* Print to log file */

1969 
start_t=print_date_and_time(fplog,cr>nodeid,"Started Steepest Descents");

1970 
wallcycle_start(wcycle,ewcRUN); 
1971 

1972 
/* Set variables for stepsize (in nm). This is the largest

1973 
* step that we are going to make in any direction.

1974 
*/

1975 
ustep = inputrec>em_stepsize; 
1976 
stepsize = 0;

1977 

1978 
/* Max number of steps */

1979 
nsteps = inputrec>nsteps; 
1980 

1981 
if (MASTER(cr))

1982 
/* Print to the screen */

1983 
sp_header(stderr,SD,inputrec>em_tol,nsteps); 
1984 
if (fplog)

1985 
sp_header(fplog,SD,inputrec>em_tol,nsteps); 
1986 

1987 
/**** HERE STARTS THE LOOP ****

1988 
* count is the counter for the number of steps

1989 
* bDone will be TRUE when the minimization has converged

1990 
* bAbort will be TRUE when nsteps steps have been performed or when

1991 
* the stepsize becomes smaller than is reasonable for machine precision

1992 
*/

1993 
count = 0;

1994 
bDone = FALSE; 
1995 
bAbort = FALSE; 
1996 
while( !bDone && !bAbort ) {

1997 
bAbort = (nsteps > 0) && (count==nsteps);

1998 

1999 
/* set new coordinates, except for first step */

2000 
if (count > 0) { 
2001 
do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min>f,s_try, 
2002 
constr,top,nrnb,wcycle,count); 
2003 
} 
2004 

2005 
evaluate_energy(fplog,bVerbose,cr, 
2006 
state_global,top_global,s_try,&buf,top, 
2007 
inputrec,nrnb,wcycle, 
2008 
vsite,constr,fcd,graph,mdatoms,fr, 
2009 
mu_tot,enerd,vir,pres,count,count==0);

2010 

2011 
if (MASTER(cr))

2012 
print_ebin_header(fplog,count,count,s_try>s.lambda); 
2013  
2014 
if (count == 0) 
2015 
s_min>epot = s_try>epot + 1;

2016 

2017 
/* Print it if necessary */

2018 
if (MASTER(cr)) {

2019 
if (bVerbose) {

2020 
fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",

2021 
count,ustep,s_try>epot,s_try>fmax,s_try>a_fmax+1,

2022 
(s_try>epot < s_min>epot) ? '\n' : '\r'); 
2023 
} 
2024 

2025 
if (s_try>epot < s_min>epot) {

2026 
/* Store the new (lower) energies */

2027 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,count,(real)count,

2028 
enerd,&s_try>s,s_try>s.box, 
2029 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
2030 
print_ebin(fp_ene,TRUE, 
2031 
do_per_step(steps_accepted,inputrec>nstdisreout), 
2032 
do_per_step(steps_accepted,inputrec>nstorireout), 
2033 
fplog,count,count,count,eprNORMAL,TRUE, 
2034 
mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
2035 
fflush(fplog); 
2036 
} 
2037 
} 
2038 

2039 
/* Now if the new energy is smaller than the previous...

2040 
* or if this is the first step!

2041 
* or if we did random steps!

2042 
*/

2043 

2044 
if ( (count==0)  (s_try>epot < s_min>epot) ) { 
2045 
steps_accepted++; 
2046  
2047 
/* Test whether the convergence criterion is met... */

2048 
bDone = (s_try>fmax < inputrec>em_tol); 
2049 

2050 
/* Copy the arrays for force, positions and energy */

2051 
/* The 'Min' array always holds the coords and forces of the minimal

2052 
sampled energy */

2053 
swap_em_state(s_min,s_try); 
2054 
if (count > 0) 
2055 
ustep *= 1.2; 
2056  
2057 
/* Write to trn, if necessary */

2058 
do_x = do_per_step(steps_accepted,inputrec>nstxout); 
2059 
do_f = do_per_step(steps_accepted,inputrec>nstfout); 
2060 
write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,

2061 
top_global,inputrec,count, 
2062 
s_min,state_global,f_global); 
2063 
} 
2064 
else {

2065 
/* If energy is not smaller make the step smaller... */

2066 
ustep *= 0.5; 
2067  
2068 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count != cr>dd>ddp_count) {

2069 
/* Reload the old state */

2070 
em_dd_partition_system(fplog,count,cr,top_global,inputrec, 
2071 
s_min,&buf,top,mdatoms,fr,vsite,constr, 
2072 
nrnb,wcycle); 
2073 
} 
2074 
} 
2075 

2076 
/* Determine new step */

2077 
stepsize = ustep/s_min>fmax; 
2078 

2079 
/* Check if stepsize is too small, with 1 nm as a characteristic length */

2080 
#ifdef GMX_DOUBLE

2081 
if (ustep < 1e12) 
2082 
#else

2083 
if (ustep < 1e6) 
2084 
#endif

2085 
{ 
2086 
if (MASTER(cr)) {

2087 
warn_step(stderr,inputrec>em_tol,constr!=NULL);

2088 
warn_step(fplog,inputrec>em_tol,constr!=NULL);

2089 
} 
2090 
bAbort=TRUE; 
2091 
} 
2092 

2093 
count++; 
2094 
} /* End of the loop */

2095 

2096 
/* Print some shit... */

2097 
if (MASTER(cr))

2098 
fprintf(stderr,"\nwriting lowest energy coordinates.\n");

2099 
write_em_traj(fplog,cr,fp_trn,TRUE,inputrec>nstfout,ftp2fn(efSTO,nfile,fnm), 
2100 
top_global,inputrec,count, 
2101 
s_min,state_global,f_global); 
2102  
2103 
fnormn = s_min>fnorm/sqrt(state_global>natoms); 
2104  
2105 
if (MASTER(cr)) {

2106 
print_converged(stderr,SD,inputrec>em_tol,count,bDone,nsteps, 
2107 
s_min>epot,s_min>fmax,s_min>a_fmax,fnormn); 
2108 
print_converged(fplog,SD,inputrec>em_tol,count,bDone,nsteps, 
2109 
s_min>epot,s_min>fmax,s_min>a_fmax,fnormn); 
2110 
} 
2111  
2112 
finish_em(fplog,cr,fp_trn,fp_ene); 
2113 

2114 
/* To print the actual number of steps we needed somewhere */

2115 
inputrec>nsteps=count; 
2116  
2117 
*nsteps_done = count; 
2118 

2119 
return start_t;

2120 
} /* That's all folks */

2121  
2122  
2123 
time_t do_nm(FILE *fplog,t_commrec *cr, 
2124 
int nfile,t_filenm fnm[],

2125 
bool bVerbose,bool bCompact, 
2126 
gmx_vsite_t *vsite,gmx_constr_t constr, 
2127 
int stepout,

2128 
t_inputrec *inputrec, 
2129 
gmx_mtop_t *top_global,t_fcdata *fcd, 
2130 
t_state *state_global,rvec f[], 
2131 
rvec buf[],t_mdatoms *mdatoms, 
2132 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
2133 
gmx_edsam_t ed, 
2134 
t_forcerec *fr, 
2135 
int repl_ex_nst,int repl_ex_seed, 
2136 
real cpt_period,real max_hours, 
2137 
unsigned long Flags, 
2138 
int *nsteps_done)

2139 
{ 
2140 
t_mdebin *mdebin; 
2141 
const char *NM = "Normal Mode Analysis"; 
2142 
int fp_ene,step,i;

2143 
time_t start_t; 
2144 
rvec *f_global; 
2145 
gmx_localtop_t *top; 
2146 
gmx_enerdata_t *enerd; 
2147 
t_graph *graph; 
2148 
real t,lambda,t0,lam0; 
2149 
bool bNS;

2150 
tensor vir,pres; 
2151 
int nfmax,count;

2152 
rvec mu_tot; 
2153 
rvec *fneg,*fpos; 
2154 
bool bSparse; /* use sparse matrix storage format */ 
2155 
size_t sz; 
2156 
gmx_sparsematrix_t * sparse_matrix = NULL;

2157 
real * full_matrix = NULL;

2158 
em_state_t * state_work; 
2159 

2160 
/* added with respect to mdrun */

2161 
int idum,jdum,kdum,row,col;

2162 
real der_range=10.0*sqrt(GMX_REAL_EPS); 
2163 
real fnorm,fmax; 
2164 
real dfdx; 
2165 

2166 
state_work = init_em_state(); 
2167 

2168 
/* Init em and store the local state in state_minimum */

2169 
init_em(fplog,NM,cr,inputrec, 
2170 
state_global,top_global,state_work,&top, 
2171 
f,&buf,&f_global, 
2172 
nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr, 
2173 
nfile,fnm,NULL,&fp_ene,&mdebin);

2174 

2175 
snew(fneg,top_global>natoms); 
2176 
snew(fpos,top_global>natoms); 
2177 

2178 
#ifndef GMX_DOUBLE

2179 
fprintf(stderr, 
2180 
"NOTE: This version of Gromacs has been compiled in single precision,\n"

2181 
" which MIGHT not be accurate enough for normal mode analysis.\n"

2182 
" Gromacs now uses sparse matrix storage, so the memory requirements\n"

2183 
" are fairly modest even if you recompile in double precision.\n\n");

2184 
#endif

2185 

2186 
/* Check if we can/should use sparse storage format.

2187 
*

2188 
* Sparse format is only useful when the Hessian itself is sparse, which it

2189 
* will be when we use a cutoff.

2190 
* For small systems (n<1000) it is easier to always use full matrix format, though.

2191 
*/

2192 
if(EEL_FULL(fr>eeltype)  fr>rlist==0.0) 
2193 
{ 
2194 
fprintf(stderr,"Noncutoff electrostatics used, forcing full Hessian format.\n");

2195 
bSparse = FALSE; 
2196 
} 
2197 
else if(top_global>natoms < 1000) 
2198 
{ 
2199 
fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global>natoms);

2200 
bSparse = FALSE; 
2201 
} 
2202 
else

2203 
{ 
2204 
fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");

2205 
bSparse = TRUE; 
2206 
} 
2207 

2208 
sz = DIM*top_global>natoms; 
2209 

2210 
fprintf(stderr,"Allocating Hessian memory...\n\n");

2211  
2212 
if(bSparse)

2213 
{ 
2214 
sparse_matrix=gmx_sparsematrix_init(sz); 
2215 
sparse_matrix>compressed_symmetric = TRUE; 
2216 
} 
2217 
else

2218 
{ 
2219 
snew(full_matrix,sz*sz); 
2220 
} 
2221 

2222 
/* Initial values */

2223 
t0 = inputrec>init_t; 
2224 
lam0 = inputrec>init_lambda; 
2225 
t = t0; 
2226 
lambda = lam0; 
2227 

2228 
init_nrnb(nrnb); 
2229 

2230 
where(); 
2231 

2232 
/* Write start time and temperature */

2233 
start_t=print_date_and_time(fplog,cr>nodeid,"Started nmrun");

2234 
wallcycle_start(wcycle,ewcRUN); 
2235 
if (MASTER(cr))

2236 
{ 
2237 
fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",*(top_global>name),

2238 
top_global>natoms); 
2239 
} 
2240 

2241 
count = 0;

2242 
evaluate_energy(fplog,bVerbose,cr, 
2243 
state_global,top_global,state_work,&buf,top, 
2244 
inputrec,nrnb,wcycle, 
2245 
vsite,constr,fcd,graph,mdatoms,fr, 
2246 
mu_tot,enerd,vir,pres,count,count==0);

2247 
count++; 
2248  
2249 
/* if forces are not small, warn user */

2250 
get_state_f_norm_max(cr,&(inputrec>opts),mdatoms,state_work); 
2251  
2252 
fprintf(stderr,"Maximum force:%12.5e\n",state_work>fmax);

2253 
if (state_work>fmax > 1.0e3) 
2254 
{ 
2255 
fprintf(stderr,"Maximum force probably not small enough to");

2256 
fprintf(stderr," ensure that you are in an \nenergy well. ");

2257 
fprintf(stderr,"Be aware that negative eigenvalues may occur");

2258 
fprintf(stderr," when the\nresulting matrix is diagonalized.\n");

2259 
} 
2260 

2261 
/***********************************************************

2262 
*

2263 
* Loop over all pairs in matrix

2264 
*

2265 
* do_force called twice. Once with positive and

2266 
* once with negative displacement

2267 
*

2268 
************************************************************/

2269  
2270 
/* fudge nr of steps to nr of atoms */

2271 

2272 
inputrec>nsteps = top_global>natoms; 
2273  
2274 
for (step=0; (step<inputrec>nsteps); step++) 
2275 
{ 
2276 

2277 
for (idum=0; (idum<DIM); idum++) 
2278 
{ 
2279 
row = DIM*step+idum; 
2280 

2281 
state_work>s.x[step][idum] = der_range; 
2282 

2283 
evaluate_energy(fplog,bVerbose,cr, 
2284 
state_global,top_global,state_work,&buf,top, 
2285 
inputrec,nrnb,wcycle, 
2286 
vsite,constr,fcd,graph,mdatoms,fr, 
2287 
mu_tot,enerd,vir,pres,count,count==0);

2288 
count++; 
2289 

2290 
for ( i=0 ; i < top_global>natoms ; i++ ) 
2291 
{ 
2292 
copy_rvec ( state_work>f[i] , fneg[i] ); 
2293 
} 
2294 

2295 
state_work>s.x[step][idum] += 2*der_range;

2296 

2297 
evaluate_energy(fplog,bVerbose,cr, 
2298 
state_global,top_global,state_work,&buf,top, 
2299 
inputrec,nrnb,wcycle, 
2300 
vsite,constr,fcd,graph,mdatoms,fr, 
2301 
mu_tot,enerd,vir,pres,count,count==0);

2302 
count++; 
2303 

2304 
for ( i=0 ; i < top_global>natoms ; i++ ) 
2305 
{ 
2306 
copy_rvec ( state_work>f[i] , fpos[i] ); 
2307 
} 
2308 

2309 
for (jdum=0; (jdum<top_global>natoms); jdum++) 
2310 
{ 
2311 
for (kdum=0; (kdum<DIM); kdum++) 
2312 
{ 
2313 
dfdx=(fpos[jdum][kdum]fneg[jdum][kdum])/(2*der_range);

2314 
col = DIM*jdum+kdum; 
2315 

2316 
if(bSparse)

2317 
{ 
2318 
if(col>=row && dfdx!=0.0) 
2319 
gmx_sparsematrix_increment_value(sparse_matrix,row,col,dfdx); 
2320 
} 
2321 
else

2322 
{ 
2323 
full_matrix[row*sz+col] = dfdx; 
2324 
} 
2325 
} 
2326 
} 
2327 

2328 
/* x is restored to original */

2329 
state_work>s.x[step][idum] = der_range; 
2330 

2331 
if (bVerbose && fplog)

2332 
fflush(fplog); 
2333 
} 
2334 
/* write progress */

2335 
if (MASTER(cr) && bVerbose)

2336 
{ 
2337 
fprintf(stderr,"\rFinished step %d out of %d",

2338 
step+1,top_global>natoms);

2339 
fflush(stderr); 
2340 
} 
2341 
} 
2342 
t=t0+step*inputrec>delta_t; 
2343 
lambda=lam0+step*inputrec>delta_lambda; 
2344 

2345 
if (MASTER(cr))

2346 
{ 
2347 
print_ebin(1,FALSE,FALSE,FALSE,fplog,step,step,t,eprAVER,

2348 
FALSE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
2349 
print_ebin(1,FALSE,FALSE,FALSE,fplog,step,step,t,eprRMS,

2350 
FALSE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
2351 
} 
2352 

2353 
fprintf(stderr,"\n\nWriting Hessian...\n");

2354 
gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix); 
2355  
2356 
*nsteps_done = step; 
2357 

2358 
return start_t;

2359 
} 
2360  
2361  
2362 
static void global_max(t_commrec *cr,int *n) 
2363 
{ 
2364 
int *sum,i;

2365  
2366 
snew(sum,cr>nnodes); 
2367 
sum[cr>nodeid] = *n; 
2368 
gmx_sumi(cr>nnodes,sum,cr); 
2369 
for(i=0; i<cr>nnodes; i++) 
2370 
*n = max(*n,sum[i]); 
2371 

2372 
sfree(sum); 
2373 
} 
2374  
2375 
static void realloc_bins(double **bin,int *nbin,int nbin_new) 
2376 
{ 
2377 
int i;

2378  
2379 
if (nbin_new != *nbin) {

2380 
srenew(*bin,nbin_new); 
2381 
for(i=*nbin; i<nbin_new; i++)

2382 
(*bin)[i] = 0;

2383 
*nbin = nbin_new; 
2384 
} 
2385 
} 
2386  
2387 
time_t do_tpi(FILE *fplog,t_commrec *cr, 
2388 
int nfile,t_filenm fnm[],

2389 
bool bVerbose,bool bCompact, 
2390 
gmx_vsite_t *vsite,gmx_constr_t constr, 
2391 
int stepout,

2392 
t_inputrec *inputrec, 
2393 
gmx_mtop_t *top_global,t_fcdata *fcd, 
2394 
t_state *state,rvec f[], 
2395 
rvec buf[],t_mdatoms *mdatoms, 
2396 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
2397 
gmx_edsam_t ed, 
2398 
t_forcerec *fr, 
2399 
int repl_ex_nst,int repl_ex_seed, 
2400 
real cpt_period,real max_hours, 
2401 
unsigned long Flags, 
2402 
int *nsteps_done)

2403 
{ 
2404 
const char *TPI="Test Particle Insertion"; 
2405 
gmx_localtop_t *top; 
2406 
gmx_groups_t *groups; 
2407 
gmx_enerdata_t *enerd; 
2408 
real lambda,t,temp,beta,drmax,epot; 
2409 
double embU,sum_embU,*sum_UgembU,V,V_all,VembU_all;

2410 
int status;

2411 
t_trxframe rerun_fr; 
2412 
bool bDispCorr,bCharge,bRFExcl,bNotLastFrame,bStateChanged,bNS,bOurStep;

2413 
time_t start_t; 
2414 
tensor force_vir,shake_vir,vir,pres; 
2415 
int cg_tp,a_tp0,a_tp1,ngid,gid_tp,nener,e;

2416 
rvec *x_mol; 
2417 
rvec mu_tot,x_init,dx,x_tp; 
2418 
int nnodes,frame,nsteps,step;

2419 
int i,start,end;

2420 
static gmx_rng_t tpi_rand;

2421 
FILE *fp_tpi=NULL;

2422 
char *ptr,*dump_pdb,**leg,str[STRLEN],str2[STRLEN];

2423 
double dbl,dump_ener;

2424 
bool bCavity;

2425 
int nat_cavity=0,d; 
2426 
real *mass_cavity=NULL,mass_tot;

2427 
int nbin;

2428 
double invbinw,*bin,refvolshift,logV,bUlogV;

2429 
char *tpid_leg[2]={"direct","reweighted"}; 
2430  
2431 
/* Since numerical problems can lead to extreme negative energies

2432 
* when atoms overlap, we need to set a lower limit for beta*U.

2433 
*/

2434 
real bU_neg_limit = 50;

2435  
2436 
/* Since there is no upper limit to the insertion energies,

2437 
* we need to set an upper limit for the distribution output.

2438 
*/

2439 
real bU_bin_limit = 50;

2440 
real bU_logV_bin_limit = bU_bin_limit + 10;

2441  
2442 
nnodes = cr>nnodes; 
2443  
2444 
top = gmx_mtop_generate_local_top(top_global,inputrec); 
2445  
2446 
groups = &top_global>groups; 
2447  
2448 
bCavity = (inputrec>eI == eiTPIC); 
2449 
if (bCavity) {

2450 
ptr = getenv("GMX_TPIC_MASSES");

2451 
if (ptr == NULL) { 
2452 
nat_cavity = 1;

2453 
} else {

2454 
/* Read (multiple) masses from env var GMX_TPIC_MASSES,

2455 
* The center of mass of the last atoms is then used for TPIC.

2456 
*/

2457 
nat_cavity = 0;

2458 
while (sscanf(ptr,"%lf%n",&dbl,&i) > 0) { 
2459 
srenew(mass_cavity,nat_cavity+1);

2460 
mass_cavity[nat_cavity] = dbl; 
2461 
fprintf(fplog,"mass[%d] = %f\n",

2462 
nat_cavity+1,mass_cavity[nat_cavity]);

2463 
nat_cavity++; 
2464 
ptr += i; 
2465 
} 
2466 
if (nat_cavity == 0) 
2467 
gmx_fatal(FARGS,"Found %d masses in GMX_TPIC_MASSES",nat_cavity);

2468 
} 
2469 
} 
2470  
2471 
/*

2472 
init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,

2473 
state>box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/

2474 
/* We never need full pbc for TPI */

2475 
fr>ePBC = epbcXYZ; 
2476 
/* Determine the temperature for the Boltzmann weighting */

2477 
temp = inputrec>opts.ref_t[0];

2478 
if (fplog) {

2479 
for(i=1; (i<inputrec>opts.ngtc); i++) { 
2480 
if (inputrec>opts.ref_t[i] != temp) {

2481 
fprintf(fplog,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");

2482 
fprintf(stderr,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");

2483 
} 
2484 
} 
2485 
fprintf(fplog, 
2486 
"\n The temperature for test particle insertion is %.3f K\n\n",

2487 
temp); 
2488 
} 
2489 
beta = 1.0/(BOLTZ*temp); 
2490  
2491 
/* Number of insertions per frame */

2492 
nsteps = inputrec>nsteps; 
2493  
2494 
/* Use the same neighborlist with more insertions points

2495 
* in a sphere of radius drmax around the initial point

2496 
*/

2497 
/* This should be a proper mdp parameter */

2498 
drmax = inputrec>rtpi; 
2499  
2500 
/* An environment variable can be set to dump all configurations

2501 
* to pdb with an insertion energy <= this value.

2502 
*/

2503 
dump_pdb = getenv("GMX_TPI_DUMP");

2504 
dump_ener = 0;

2505 
if (dump_pdb)

2506 
sscanf(dump_pdb,"%lf",&dump_ener);

2507  
2508 
atoms2md(top_global,inputrec,0,NULL,0,top_global>natoms,mdatoms); 
2509 
update_mdatoms(mdatoms,inputrec>init_lambda); 
2510  
2511 
snew(enerd,1);

2512 
init_enerdata(fplog,groups>grps[egcENER].nr,enerd); 
2513  
2514 
/* Print to log file */

2515 
start_t=print_date_and_time(fplog,cr>nodeid, 
2516 
"Started Test Particle Insertion");

2517 
wallcycle_start(wcycle,ewcRUN); 
2518  
2519 
/* The last charge group is the group to be inserted */

2520 
cg_tp = top>cgs.nr  1;

2521 
a_tp0 = top>cgs.index[cg_tp]; 
2522 
a_tp1 = top>cgs.index[cg_tp+1];

2523 
if (debug)

2524 
fprintf(debug,"TPI cg %d, atoms %d%d\n",cg_tp,a_tp0,a_tp1);

2525 
if (a_tp1  a_tp0 > 1 && 
2526 
(inputrec>rlist < inputrec>rcoulomb  
2527 
inputrec>rlist < inputrec>rvdw)) 
2528 
gmx_fatal(FARGS,"Can not do TPI for multiatom molecule with a twinrange cutoff");

2529 
snew(x_mol,a_tp1a_tp0); 
2530  
2531 
bDispCorr = (inputrec>eDispCorr != edispcNO); 
2532 
bCharge = FALSE; 
2533 
for(i=a_tp0; i<a_tp1; i++) {

2534 
/* Copy the coordinates of the molecule to be insterted */

2535 
copy_rvec(state>x[i],x_mol[ia_tp0]); 
2536 
/* Check if we need to print electrostatic energies */

2537 
bCharge = (mdatoms>chargeA[i] != 0 

2538 
(mdatoms>chargeB && mdatoms>chargeB[i] != 0));

2539 
} 
2540 
bRFExcl = (bCharge && EEL_RF(fr>eeltype) && fr>eeltype!=eelRF_NEC); 
2541  
2542 
calc_cgcm(fplog,cg_tp,cg_tp+1,&(top>cgs),state>x,fr>cg_cm);

2543 
if (bCavity) {

2544 
if (norm(fr>cg_cm[cg_tp]) > 0.5*inputrec>rlist && fplog) { 
2545 
fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");

2546 
fprintf(stderr,"WARNING: Your TPI molecule is not centered at 0,0,0\n");

2547 
} 
2548 
} else {

2549 
/* Center the molecule to be inserted at zero */

2550 
for(i=0; i<a_tp1a_tp0; i++) 
2551 
rvec_dec(x_mol[i],fr>cg_cm[cg_tp]); 
2552 
} 
2553  
2554 
if (fplog) {

2555 
fprintf(fplog,"\nWill insert %d atoms %s partial charges\n",

2556 
a_tp1a_tp0,bCharge ? "with" : "without"); 
2557 

2558 
fprintf(fplog,"\nWill insert %d times in each frame of %s\n",

2559 
nsteps,opt2fn("rerun",nfile,fnm));

2560 
} 
2561 

2562 
if (!bCavity) {

2563 
if (inputrec>nstlist > 1) { 
2564 
if (drmax==0 && a_tp1a_tp0==1) { 
2565 
gmx_fatal(FARGS,"Reusing the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense",inputrec>nstlist,drmax);

2566 
} 
2567 
if (fplog)

2568 
fprintf(fplog,"Will use the same neighborlist for %d insertions in a sphere of radius %f\n",inputrec>nstlist,drmax);

2569 
} 
2570 
} else {

2571 
if (fplog)

2572 
fprintf(fplog,"Will insert randomly in a sphere of radius %f around the center of the cavity\n",drmax);

2573 
} 
2574  
2575 
ngid = groups>grps[egcENER].nr; 
2576 
gid_tp = GET_CGINFO_GID(fr>cginfo[cg_tp]); 
2577 
nener = 1 + ngid;

2578 
if (bDispCorr)

2579 
nener += 1;

2580 
if (bCharge) {

2581 
nener += ngid; 
2582 
if (bRFExcl)

2583 
nener += 1;

2584 
} 
2585 
snew(sum_UgembU,nener); 
2586  
2587 
/* Initialize random generator */

2588 
tpi_rand = gmx_rng_init(inputrec>ld_seed); 
2589  
2590 
if (MASTER(cr)) {

2591 
fp_tpi = xvgropen(opt2fn("tpi",nfile,fnm),

2592 
"TPI energies","Time (ps)", 
2593 
"(kJ mol\\S1\\N) / (nm\\S3\\N)");

2594 
xvgr_subtitle(fp_tpi,"f. are averages over one frame");

2595 
snew(leg,4+nener);

2596 
e = 0;

2597 
sprintf(str,"kT log(<Ve\\S\\8b\\4U\\N>/<V>)");

2598 
leg[e++] = strdup(str); 
2599 
sprintf(str,"f. kT log<e\\S\\8b\\4U\\N>");

2600 
leg[e++] = strdup(str); 
2601 
sprintf(str,"f. <e\\S\\8b\\4U\\N>");

2602 
leg[e++] = strdup(str); 
2603 
sprintf(str,"f. V");

2604 
leg[e++] = strdup(str); 
2605 
sprintf(str,"f. <Ue\\S\\8b\\4U\\N>");

2606 
leg[e++] = strdup(str); 
2607 
for(i=0; i<ngid; i++) { 
2608 
sprintf(str,"f. <U\\sVdW %s\\Ne\\S\\8b\\4U\\N>",

2609 
*(groups>grpname[groups>grps[egcENER].nm_ind[i]])); 
2610 
leg[e++] = strdup(str); 
2611 
} 
2612 
if (bDispCorr) {

2613 
sprintf(str,"f. <U\\sdisp c\\Ne\\S\\8b\\4U\\N>");

2614 
leg[e++] = strdup(str); 
2615 
} 
2616 
if (bCharge) {

2617 
for(i=0; i<ngid; i++) { 
2618 
sprintf(str,"f. <U\\sCoul %s\\Ne\\S\\8b\\4U\\N>",

2619 
*(groups>grpname[groups>grps[egcENER].nm_ind[i]])); 
2620 
leg[e++] = strdup(str); 
2621 
} 
2622 
if (bRFExcl) {

2623 
sprintf(str,"f. <U\\sRF excl\\Ne\\S\\8b\\4U\\N>");

2624 
leg[e++] = strdup(str); 
2625 
} 
2626 
} 
2627 
xvgr_legend(fp_tpi,4+nener,leg);

2628 
for(i=0; i<4+nener; i++) 
2629 
sfree(leg[i]); 
2630 
sfree(leg); 
2631 
} 
2632 
clear_rvec(x_init); 
2633 
V_all = 0;

2634 
VembU_all = 0;

2635  
2636 
invbinw = 10;

2637 
nbin = 10;

2638 
snew(bin,nbin); 
2639  
2640 
start_time(); 
2641  
2642 
bNotLastFrame = read_first_frame(&status,opt2fn("rerun",nfile,fnm),

2643 
&rerun_fr,TRX_NEED_X); 
2644 
frame = 0;

2645  
2646 
if (rerun_fr.natoms  (bCavity ? nat_cavity : 0) != 
2647 
mdatoms>nr  (a_tp1  a_tp0)) 
2648 
gmx_fatal(FARGS,"Number of atoms in trajectory (%d)%s "

2649 
"is not equal the number in the run input file (%d) "

2650 
"minus the number of atoms to insert (%d)\n",

2651 
rerun_fr.natoms,bCavity ? " minus one" : "", 
2652 
mdatoms>nr,a_tp1a_tp0); 
2653  
2654 
refvolshift = log(det(rerun_fr.box)); 
2655 

2656 
while (bNotLastFrame) {

2657 
lambda = rerun_fr.lambda; 
2658 
t = rerun_fr.time; 
2659 

2660 
sum_embU = 0;

2661 
for(e=0; e<nener; e++) 
2662 
sum_UgembU[e] = 0;

2663  
2664 
/* Copy the coordinates from the input trajectory */

2665 
for(i=0; i<rerun_fr.natoms; i++) 
2666 
copy_rvec(rerun_fr.x[i],state>x[i]); 
2667 

2668 
V = det(rerun_fr.box); 
2669 
logV = log(V); 
2670  
2671 
bStateChanged = TRUE; 
2672 
bNS = TRUE; 
2673 
for(step=0; step<nsteps; step++) { 
2674 
/* In parallel all nodes generate all random configurations.

2675 
* In that way the result is identical to a single cpu tpi run.

2676 
*/

2677 
if (!bCavity) {

2678 
/* Random insertion in the whole volume */

2679 
bNS = (step % inputrec>nstlist == 0);

2680 
if (bNS) {

2681 
/* Generate a random position in the box */

2682 
x_init[XX] = gmx_rng_uniform_real(tpi_rand)*state>box[XX][XX]; 
2683 
x_init[YY] = gmx_rng_uniform_real(tpi_rand)*state>box[YY][YY]; 
2684 
x_init[ZZ] = gmx_rng_uniform_real(tpi_rand)*state>box[ZZ][ZZ]; 
2685 
} 
2686 
if (inputrec>nstlist == 1) { 
2687 
copy_rvec(x_init,x_tp); 
2688 
} else {

2689 
/* Generate coordinates within dx=drmax of x_init */

2690 
do {

2691 
dx[XX] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2692 
dx[YY] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2693 
dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2694 
} while (norm2(dx) > drmax*drmax);

2695 
rvec_add(x_init,dx,x_tp); 
2696 
} 
2697 
} else {

2698 
/* Random insertion around a cavity location

2699 
* given by the last coordinate of the trajectory.

2700 
*/

2701 
if (step == 0) { 
2702 
if (nat_cavity == 1) { 
2703 
/* Copy the location of the cavity */

2704 
copy_rvec(rerun_fr.x[rerun_fr.natoms1],x_init);

2705 
} else {

2706 
/* Determine the center of mass of the last molecule */

2707 
clear_rvec(x_init); 
2708 
mass_tot = 0;

2709 
for(i=0; i<nat_cavity; i++) { 
2710 
for(d=0; d<DIM; d++) 
2711 
x_init[d] += 
2712 
mass_cavity[i]*rerun_fr.x[rerun_fr.natomsnat_cavity+i][d]; 
2713 
mass_tot += mass_cavity[i]; 
2714 
} 
2715 
for(d=0; d<DIM; d++) 
2716 
x_init[d] /= mass_tot; 
2717 
} 
2718 
} 
2719 
/* Generate coordinates within dx=drmax of x_init */

2720 
do {

2721 
dx[XX] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2722 
dx[YY] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2723 
dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2724 
} while (norm2(dx) > drmax*drmax);

2725 
rvec_add(x_init,dx,x_tp); 
2726 
} 
2727  
2728 
if (a_tp1  a_tp0 == 1) { 
2729 
/* Insert a single atom, just copy the insertion location */

2730 
copy_rvec(x_tp,state>x[a_tp0]); 
2731 
} else {

2732 
/* Copy the coordinates from the top file */

2733 
for(i=a_tp0; i<a_tp1; i++)

2734 
copy_rvec(x_mol[ia_tp0],state>x[i]); 
2735 
/* Rotate the molecule randomly */

2736 
rotate_conf(a_tp1a_tp0,state>x+a_tp0,NULL,

2737 
2*M_PI*gmx_rng_uniform_real(tpi_rand),

2738 
2*M_PI*gmx_rng_uniform_real(tpi_rand),

2739 
2*M_PI*gmx_rng_uniform_real(tpi_rand));

2740 
/* Shift to the insertion location */

2741 
for(i=a_tp0; i<a_tp1; i++)

2742 
rvec_inc(state>x[i],x_tp); 
2743 
} 
2744  
2745 
/* Check if this insertion belongs to this node */

2746 
bOurStep = TRUE; 
2747 
if (PAR(cr)) {

2748 
switch (inputrec>eI) {

2749 
case eiTPI:

2750 
bOurStep = ((step / inputrec>nstlist) % nnodes == cr>nodeid); 
2751 
break;

2752 
case eiTPIC:

2753 
bOurStep = (step % nnodes == cr>nodeid); 
2754 
break;

2755 
default:

2756 
gmx_fatal(FARGS,"Unknown integrator %s",ei_names[inputrec>eI]);

2757 
} 
2758 
} 
2759 
if (bOurStep) {

2760 
/* Clear some matrix variables */

2761 
clear_mat(force_vir); 
2762 
clear_mat(shake_vir); 
2763 
clear_mat(vir); 
2764 
clear_mat(pres); 
2765 

2766 
/* Set the charge group center of mass of the test particle */

2767 
copy_rvec(x_init,fr>cg_cm[top>cgs.nr1]);

2768  
2769 
/* Calc energy (no forces) on new positions.

2770 
* Since we only need the intermolecular energy

2771 
* and the RF exclusion terms of the inserted molecule occur

2772 
* within a single charge group we can pass NULL for the graph.

2773 
* This also avoids shifts that would move charge groups

2774 
* out of the box.

2775 
*/

2776 
/* Make do_force do a single node fore calculation */

2777 
cr>nnodes = 1;

2778 
do_force(fplog,cr,inputrec, 
2779 
step,nrnb,wcycle,top,&top_global>groups, 
2780 
rerun_fr.box,state>x,&state>hist, 
2781 
f,buf,force_vir,mdatoms,enerd,fcd, 
2782 
lambda,NULL,fr,NULL,mu_tot,t,NULL,NULL, 
2783 
GMX_FORCE_NONBONDED  
2784 
(bNS ? GMX_FORCE_NS : 0) 

2785 
(bStateChanged ? GMX_FORCE_STATECHANGED : 0));

2786 
cr>nnodes = nnodes; 
2787 
bStateChanged = FALSE; 
2788 
bNS = FALSE; 
2789  
2790 
/* Calculate long range corrections to pressure and energy */

2791 
calc_dispcorr(fplog,inputrec,fr,step,mdatoms>nr,rerun_fr.box,lambda, 
2792 
pres,vir,enerd>term); 
2793 

2794 
/* If the compiler doesn't optimize this check away

2795 
* we catch the NAN energies.

2796 
* With tables extreme negative energies might occur close to r=0.

2797 
*/

2798 
epot = enerd>term[F_EPOT]; 
2799 
if (epot != epot  epot*beta < bU_neg_limit) {

2800 
if (debug)

2801 
fprintf(debug,"\n time %.3f, step %d: nonfinite energy %f, using exp(bU)=0\n",t,step,epot);

2802 
embU = 0;

2803 
} else {

2804 
embU = exp(beta*epot); 
2805 
sum_embU += embU; 
2806 
/* Determine the weighted energy contributions of each energy group */

2807 
e = 0;

2808 
sum_UgembU[e++] += epot*embU; 
2809 
if (fr>bBHAM) {

2810 
for(i=0; i<ngid; i++) 
2811 
sum_UgembU[e++] += 
2812 
(enerd>grpp.ener[egBHAMSR][GID(i,gid_tp,ngid)] + 
2813 
enerd>grpp.ener[egBHAMLR][GID(i,gid_tp,ngid)])*embU; 
2814 
} else {

2815 
for(i=0; i<ngid; i++) 
2816 
sum_UgembU[e++] += 
2817 
(enerd>grpp.ener[egLJSR][GID(i,gid_tp,ngid)] + 
2818 
enerd>grpp.ener[egLJLR][GID(i,gid_tp,ngid)])*embU; 
2819 
} 
2820 
if (bDispCorr)

2821 
sum_UgembU[e++] += enerd>term[F_DISPCORR]*embU; 
2822 
if (bCharge) {

2823 
for(i=0; i<ngid; i++) 
2824 
sum_UgembU[e++] += 
2825 
(enerd>grpp.ener[egCOULSR][GID(i,gid_tp,ngid)] + 
2826 
enerd>grpp.ener[egCOULLR][GID(i,gid_tp,ngid)])*embU; 
2827 
if (bRFExcl)

2828 
sum_UgembU[e++] += enerd>term[F_RF_EXCL]*embU; 
2829 
} 
2830 
} 
2831 

2832 
if (embU == 0  beta*epot > bU_bin_limit) { 
2833 
bin[0]++;

2834 
} else {

2835 
i = (int)((bU_logV_bin_limit

2836 
 (beta*epot  logV + refvolshift))*invbinw 
2837 
+ 0.5); 
2838 
if (i < 0) 
2839 
i = 0;

2840 
if (i >= nbin)

2841 
realloc_bins(&bin,&nbin,i+10);

2842 
bin[i]++; 
2843 
} 
2844  
2845 
if (debug)

2846 
fprintf(debug,"TPI %7d %12.5e %12.5f %12.5f %12.5f\n",

2847 
step,epot,x_tp[XX],x_tp[YY],x_tp[ZZ]); 
2848  
2849 
if (dump_pdb && epot <= dump_ener) {

2850 
sprintf(str,"t%g_step%d.pdb",t,step);

2851 
sprintf(str2,"t: %f step %d ener: %f",t,step,epot);

2852 
write_sto_conf_mtop(str,str2,top_global,state>x,state>v, 
2853 
inputrec>ePBC,state>box); 
2854 
} 
2855 
} 
2856 
} 
2857  
2858 
if (PAR(cr)) {

2859 
/* When running in parallel sum the energies over the processes */

2860 
gmx_sumd(1, &sum_embU, cr);

2861 
gmx_sumd(nener,sum_UgembU,cr); 
2862 
} 
2863  
2864 
frame++; 
2865 
V_all += V; 
2866 
VembU_all += V*sum_embU/nsteps; 
2867  
2868 
if (fp_tpi) {

2869 
if (bVerbose  frame%10==0  frame<10) 
2870 
fprintf(stderr,"mu %10.3e <mu> %10.3e\n",

2871 
log(sum_embU/nsteps)/beta,log(VembU_all/V_all)/beta); 
2872 

2873 
fprintf(fp_tpi,"%10.3f %12.5e %12.5e %12.5e %12.5e",

2874 
t, 
2875 
VembU_all==0 ? 20/beta : log(VembU_all/V_all)/beta, 
2876 
sum_embU==0 ? 20/beta : log(sum_embU/nsteps)/beta, 
2877 
sum_embU/nsteps,V); 
2878 
for(e=0; e<nener; e++) 
2879 
fprintf(fp_tpi," %12.5e",sum_UgembU[e]/nsteps);

2880 
fprintf(fp_tpi,"\n");

2881 
fflush(fp_tpi); 
2882 
} 
2883  
2884 
bNotLastFrame = read_next_frame(status,&rerun_fr); 
2885 
} /* End of the loop */

2886  
2887 
update_time(); 
2888  
2889 
close_trj(status); 
2890  
2891 
if (fp_tpi)

2892 
gmx_fio_fclose(fp_tpi); 
2893  
2894 
if (fplog) {

2895 
fprintf(fplog,"\n");

2896 
fprintf(fplog," <V> = %12.5e nm^3\n",V_all/frame);

2897 
fprintf(fplog," <mu> = %12.5e kJ/mol\n",log(VembU_all/V_all)/beta);

2898 
} 
2899 

2900 
/* Write the Boltzmann factor histogram */

2901 
if (PAR(cr)) {

2902 
/* When running in parallel sum the bins over the processes */

2903 
i = nbin; 
2904 
global_max(cr,&i); 
2905 
realloc_bins(&bin,&nbin,i); 
2906 
gmx_sumd(nbin,bin,cr); 
2907 
} 
2908 
fp_tpi = xvgropen(opt2fn("tpid",nfile,fnm),

2909 
"TPI energy distribution",

2910 
"\\8b\\4U  log(V/<V>)","count"); 
2911 
sprintf(str,"number \\8b\\4U > %g: %9.3e",bU_bin_limit,bin[0]); 
2912 
xvgr_subtitle(fp_tpi,str); 
2913 
xvgr_legend(fp_tpi,2,tpid_leg);

2914 
for(i=nbin1; i>0; i) { 
2915 
bUlogV = i/invbinw + bU_logV_bin_limit  refvolshift + log(V_all/frame); 
2916 
fprintf(fp_tpi,"%6.2f %10d %12.5e\n",

2917 
bUlogV, 
2918 
(int)(bin[i]+0.5), 
2919 
bin[i]*exp(bUlogV)*V_all/VembU_all); 
2920 
} 
2921 
gmx_fio_fclose(fp_tpi); 
2922 
sfree(bin); 
2923  
2924 
sfree(sum_UgembU); 
2925  
2926 
*nsteps_done = frame*inputrec>nsteps; 
2927 

2928 
return start_t;

2929 
} 