minimize_r1.131.2.3.c
1 
/*


2 
* $Id: minimize.c,v 1.131.2.3 2008/11/21 20:23:53 hess Exp $

3 
*

4 
* This source code is part of

5 
*

6 
* G R O M A C S

7 
*

8 
* GROningen MAchine for Chemical Simulations

9 
*

10 
* VERSION 3.2.0

11 
* Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.

12 
* Copyright (c) 19912000, University of Groningen, The Netherlands.

13 
* Copyright (c) 20012004, The GROMACS development team,

14 
* check out http://www.gromacs.org for more information.

15 

16 
* This program is free software; you can redistribute it and/or

17 
* modify it under the terms of the GNU General Public License

18 
* as published by the Free Software Foundation; either version 2

19 
* of the License, or (at your option) any later version.

20 
*

21 
* If you want to redistribute modifications, please consider that

22 
* scientific software is very special. Version control is crucial 

23 
* bugs must be traceable. We will be happy to consider code for

24 
* inclusion in the official distribution, but derived work must not

25 
* be called official GROMACS. Details are found in the README & COPYING

26 
* files  if they are missing, get the official version at www.gromacs.org.

27 
*

28 
* To help us fund GROMACS development, we humbly ask that you cite

29 
* the papers on the package  you can find them in the top README file.

30 
*

31 
* For more info, check our website at http://www.gromacs.org

32 
*

33 
* And Hey:

34 
* GROwing Monsters And Cloning Shrimps

35 
*/

36 
#ifdef HAVE_CONFIG_H

37 
#include <config.h> 
38 
#endif

39  
40 
#include <string.h> 
41 
#include <time.h> 
42 
#include <math.h> 
43 
#include "sysstuff.h" 
44 
#include "string2.h" 
45 
#include "network.h" 
46 
#include "confio.h" 
47 
#include "copyrite.h" 
48 
#include "smalloc.h" 
49 
#include "nrnb.h" 
50 
#include "main.h" 
51 
#include "pbc.h" 
52 
#include "force.h" 
53 
#include "macros.h" 
54 
#include "random.h" 
55 
#include "names.h" 
56 
#include "gmx_fatal.h" 
57 
#include "txtdump.h" 
58 
#include "typedefs.h" 
59 
#include "update.h" 
60 
#include "random.h" 
61 
#include "constr.h" 
62 
#include "vec.h" 
63 
#include "statutil.h" 
64 
#include "tgroup.h" 
65 
#include "mdebin.h" 
66 
#include "vsite.h" 
67 
#include "force.h" 
68 
#include "mdrun.h" 
69 
#include "domdec.h" 
70 
#include "partdec.h" 
71 
#include "trnio.h" 
72 
#include "sparsematrix.h" 
73 
#include "mtxio.h" 
74 
#include "gmx_random.h" 
75 
#include "physics.h" 
76 
#include "xvgr.h" 
77 
#include "mdatoms.h" 
78 
#include "gbutil.h" 
79 
#include "ns.h" 
80 
#include "gmx_wallcycle.h" 
81 
#include "mtop_util.h" 
82 
#include "gmxfio.h" 
83 
#include "pme.h" 
84  
85 
typedef struct { 
86 
t_state s; 
87 
rvec *f; 
88 
real epot; 
89 
real fnorm; 
90 
real fmax; 
91 
int a_fmax;

92 
} em_state_t; 
93  
94 
static em_state_t *init_em_state()

95 
{ 
96 
em_state_t *ems; 
97 

98 
snew(ems,1);

99  
100 
return ems;

101 
} 
102  
103 
static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps) 
104 
{ 
105 
fprintf(out,"%s:\n",minimizer);

106 
fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);

107 
fprintf(out," Number of steps = %12d\n",nsteps);

108 
} 
109  
110 
static void warn_step(FILE *fp,real ftol,bool bConstrain) 
111 
{ 
112 
fprintf(fp,"\nStepsize too small, or no change in energy.\n"

113 
"Converged to machine precision,\n"

114 
"but not to the requested precision Fmax < %g\n",

115 
ftol); 
116 
if (sizeof(real)<sizeof(double)) 
117 
fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");

118 

119 
if (bConstrain)

120 
fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"

121 
"off constraints alltogether (set constraints = none in mdp file)\n");

122 
} 
123  
124  
125  
126 
static void print_converged(FILE *fp,const char *alg,real ftol,int count,bool bDone, 
127 
int nsteps,real epot,real fmax, int nfmax, real fnorm) 
128 
{ 
129 
if (bDone)

130 
fprintf(fp,"\n%s converged to Fmax < %g in %d steps\n",alg,ftol,count);

131 
else if(count<nsteps) 
132 
fprintf(fp,"\n%s converged to machine precision in %d steps,\n"

133 
"but did not reach the requested Fmax < %g.\n",alg,count,ftol);

134 
else

135 
fprintf(fp,"\n%s did not converge to Fmax < %g in %d steps.\n",alg,ftol,count);

136  
137 
#ifdef GMX_DOUBLE

138 
fprintf(fp,"Potential Energy = %21.14e\n",epot);

139 
fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1); 
140 
fprintf(fp,"Norm of force = %21.14e\n",fnorm);

141 
#else

142 
fprintf(fp,"Potential Energy = %14.7e\n",epot);

143 
fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1); 
144 
fprintf(fp,"Norm of force = %14.7e\n",fnorm);

145 
#endif

146 
} 
147  
148 
static void get_f_norm_max(t_commrec *cr, 
149 
t_grpopts *opts,t_mdatoms *mdatoms,rvec *f, 
150 
real *fnorm,real *fmax,int *a_fmax)

151 
{ 
152 
double fnorm2,*sum;

153 
real fmax2,fmax2_0,fam; 
154 
int la_max,a_max,start,end,i,m,gf;

155  
156 
/* This routine finds the largest force and returns it.

157 
* On parallel machines the global max is taken.

158 
*/

159 
fnorm2 = 0;

160 
fmax2 = 0;

161 
la_max = 1;

162 
gf = 0;

163 
start = mdatoms>start; 
164 
end = mdatoms>homenr + start; 
165 
if (mdatoms>cFREEZE) {

166 
for(i=start; i<end; i++) {

167 
gf = mdatoms>cFREEZE[i]; 
168 
fam = 0;

169 
for(m=0; m<DIM; m++) 
170 
if (!opts>nFreeze[gf][m])

171 
fam += sqr(f[i][m]); 
172 
fnorm2 += fam; 
173 
if (fam > fmax2) {

174 
fmax2 = fam; 
175 
la_max = i; 
176 
} 
177 
} 
178 
} else {

179 
for(i=start; i<end; i++) {

180 
fam = norm2(f[i]); 
181 
fnorm2 += fam; 
182 
if (fam > fmax2) {

183 
fmax2 = fam; 
184 
la_max = i; 
185 
} 
186 
} 
187 
} 
188  
189 
if (la_max >= 0 && DOMAINDECOMP(cr)) { 
190 
a_max = cr>dd>gatindex[la_max]; 
191 
} else {

192 
a_max = la_max; 
193 
} 
194 
if (PAR(cr)) {

195 
snew(sum,2*cr>nnodes+1); 
196 
sum[2*cr>nodeid] = fmax2;

197 
sum[2*cr>nodeid+1] = a_max; 
198 
sum[2*cr>nnodes] = fnorm2;

199 
gmx_sumd(2*cr>nnodes+1,sum,cr); 
200 
fnorm2 = sum[2*cr>nnodes];

201 
/* Determine the global maximum */

202 
for(i=0; i<cr>nnodes; i++) { 
203 
if (sum[2*i] > fmax2) { 
204 
fmax2 = sum[2*i];

205 
a_max = (int)(sum[2*i+1] + 0.5); 
206 
} 
207 
} 
208 
sfree(sum); 
209 
} 
210  
211 
if (fnorm)

212 
*fnorm = sqrt(fnorm2); 
213 
if (fmax)

214 
*fmax = sqrt(fmax2); 
215 
if (a_fmax)

216 
*a_fmax = a_max; 
217 
} 
218  
219 
static void get_state_f_norm_max(t_commrec *cr, 
220 
t_grpopts *opts,t_mdatoms *mdatoms, 
221 
em_state_t *ems) 
222 
{ 
223 
get_f_norm_max(cr,opts,mdatoms,ems>f,&ems>fnorm,&ems>fmax,&ems>a_fmax); 
224 
} 
225  
226 
void init_em(FILE *fplog,const char *title, 
227 
t_commrec *cr,t_inputrec *ir, 
228 
t_state *state_global,gmx_mtop_t *top_global, 
229 
em_state_t *ems,gmx_localtop_t **top, 
230 
rvec *f,rvec **buf,rvec **f_global, 
231 
t_nrnb *nrnb,rvec mu_tot, 
232 
t_forcerec *fr,gmx_enerdata_t **enerd, 
233 
t_graph **graph,t_mdatoms *mdatoms, 
234 
gmx_vsite_t *vsite,gmx_constr_t constr, 
235 
int nfile,t_filenm fnm[],int *fp_trn,int *fp_ene, 
236 
t_mdebin **mdebin) 
237 
{ 
238 
int start,homenr,i;

239 
real dvdlambda; 
240  
241 
if (fplog)

242 
fprintf(fplog,"Initiating %s\n",title);

243  
244 
state_global>ngtc = 0;

245 
if (ir>eI == eiCG) {

246 
state_global>flags = (1<<estCGP);

247 
snew(state_global>cg_p,state_global>nalloc); 
248 
} 
249  
250 
/* Initiate some variables */

251 
if (ir>efep != efepNO)

252 
state_global>lambda = ir>init_lambda; 
253 
else

254 
state_global>lambda = 0.0; 
255  
256 
init_nrnb(nrnb); 
257  
258 
if (DOMAINDECOMP(cr)) {

259 
*top = dd_init_local_top(top_global); 
260  
261 
dd_init_local_state(cr>dd,state_global,&ems>s); 
262  
263 
/* Distribute the charge groups over the nodes from the master node */

264 
dd_partition_system(fplog,ir>init_step,cr,TRUE, 
265 
state_global,top_global,ir, 
266 
&ems>s,&ems>f,buf,mdatoms,*top, 
267 
fr,vsite,NULL,constr,

268 
nrnb,NULL,FALSE);

269 
dd_store_state(cr>dd,&ems>s); 
270 

271 
if (ir>nstfout) {

272 
snew(*f_global,top_global>natoms); 
273 
} else {

274 
*f_global = NULL;

275 
} 
276 
*graph = NULL;

277 
} else {

278 
/* Just copy the state */

279 
ems>s = *state_global; 
280 
snew(ems>s.x,ems>s.nalloc); 
281 
snew(ems>f,ems>s.nalloc); 
282 
for(i=0; i<state_global>natoms; i++) 
283 
copy_rvec(state_global>x[i],ems>s.x[i]); 
284 
copy_mat(state_global>box,ems>s.box); 
285  
286 
if (PAR(cr)) {

287 
/* Initialize the particle decomposition and split the topology */

288 
*top = split_system(fplog,top_global,ir,cr); 
289 

290 
pd_cg_range(cr,&fr>cg0,&fr>hcg); 
291 
} else {

292 
*top = gmx_mtop_generate_local_top(top_global,ir); 
293 
} 
294 
*f_global = f; 
295  
296 
if (ir>ePBC != epbcNONE && !ir>bPeriodicMols) {

297 
*graph = mk_graph(fplog,&((*top)>idef),0,top_global>natoms,FALSE,FALSE);

298 
} else {

299 
*graph = NULL;

300 
} 
301 
} 
302  
303 
clear_rvec(mu_tot); 
304 
calc_shifts(ems>s.box,fr>shift_vec); 
305  
306 
if (PARTDECOMP(cr)) {

307 
pd_at_range(cr,&start,&homenr); 
308 
homenr = start; 
309 
} else {

310 
start = 0;

311 
homenr = top_global>natoms; 
312 
} 
313 
atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms); 
314 
update_mdatoms(mdatoms,state_global>lambda); 
315  
316 
if (vsite && !DOMAINDECOMP(cr)) {

317 
set_vsite_top(vsite,*top,mdatoms,cr); 
318 
} 
319  
320 
if (constr) {

321 
if (ir>eConstrAlg == econtSHAKE &&

322 
gmx_mtop_ftype_count(top_global,F_CONSTR) > 0) {

323 
gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",

324 
econstr_names[econtSHAKE],econstr_names[econtLINCS]); 
325 
} 
326  
327 
if (!DOMAINDECOMP(cr))

328 
set_constraints(constr,*top,ir,mdatoms,NULL);

329  
330 
/* Constrain the starting coordinates */

331 
dvdlambda=0;

332 
constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)>idef,

333 
ir,cr,1,0,mdatoms, 
334 
ems>s.x,ems>s.x,NULL,ems>s.box,ems>s.lambda,&dvdlambda,

335 
NULL,NULL,nrnb,econqCoord); 
336 
} 
337  
338 
if (MASTER(cr)) {

339 
if (fp_trn)

340 
*fp_trn = open_trn(ftp2fn(efTRN,nfile,fnm),"w");

341 
if (fp_ene)

342 
*fp_ene = open_enx(ftp2fn(efENX,nfile,fnm),"w");

343 
} else {

344 
if (fp_trn)

345 
*fp_trn = 1;

346 
if (fp_ene)

347 
*fp_ene = 1;

348 
} 
349  
350 
snew(*enerd,1);

351 
init_enerdata(fplog,top_global>groups.grps[egcENER].nr,*enerd); 
352  
353 
/* Init bin for energy stuff */

354 
*mdebin = init_mdebin(*fp_ene,top_global,ir); 
355 
} 
356  
357 
static void finish_em(FILE *fplog,t_commrec *cr, 
358 
int fp_traj,int fp_ene) 
359 
{ 
360 
if (!(cr>duty & DUTY_PME)) {

361 
/* Tell the PME only node to finish */

362 
gmx_pme_finish(cr); 
363 
} 
364  
365 
if (MASTER(cr)) {

366 
close_trn(fp_traj); 
367 
close_enx(fp_ene); 
368 
} 
369 
} 
370  
371 
static void swap_em_state(em_state_t *ems1,em_state_t *ems2) 
372 
{ 
373 
em_state_t tmp; 
374  
375 
tmp = *ems1; 
376 
*ems1 = *ems2; 
377 
*ems2 = tmp; 
378 
} 
379  
380 
static void copy_em_coords_back(em_state_t *ems,t_state *state) 
381 
{ 
382 
int i;

383  
384 
for(i=0; (i<state>natoms); i++) 
385 
copy_rvec(ems>s.x[i],state>x[i]); 
386 
} 
387  
388 
static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md, 
389 
em_state_t *ems1,real a,rvec *f,em_state_t *ems2, 
390 
gmx_constr_t constr,gmx_localtop_t *top, 
391 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
392 
int count)

393  
394 
{ 
395 
t_state *s1,*s2; 
396 
int start,end,gf,i,m;

397 
rvec *x1,*x2; 
398 
real dvdlambda; 
399  
400 
s1 = &ems1>s; 
401 
s2 = &ems2>s; 
402  
403 
if (DOMAINDECOMP(cr) && s1>ddp_count != cr>dd>ddp_count)

404 
gmx_incons("state mismatch in do_em_step");

405  
406 
s2>flags = s1>flags; 
407  
408 
if (s2>nalloc != s1>nalloc) {

409 
s2>nalloc = s1>nalloc; 
410 
srenew(s2>x,s1>nalloc); 
411 
srenew(ems2>f, s1>nalloc); 
412 
if (s2>flags & (1<<estCGP)) 
413 
srenew(s2>cg_p, s1>nalloc); 
414 
} 
415 

416 
s2>natoms = s1>natoms; 
417 
s2>lambda = s1>lambda; 
418 
copy_mat(s1>box,s2>box); 
419  
420 
start = md>start; 
421 
end = md>start + md>homenr; 
422  
423 
x1 = s1>x; 
424 
x2 = s2>x; 
425 
gf = 0;

426 
for(i=start; i<end; i++) {

427 
if (md>cFREEZE)

428 
gf = md>cFREEZE[i]; 
429 
for(m=0; m<DIM; m++) { 
430 
if (ir>opts.nFreeze[gf][m])

431 
x2[i][m] = x1[i][m]; 
432 
else

433 
x2[i][m] = x1[i][m] + a*f[i][m]; 
434 
} 
435 
} 
436  
437 
if (s2>flags & (1<<estCGP)) { 
438 
/* Copy the CG p vector */

439 
x1 = s1>cg_p; 
440 
x2 = s2>cg_p; 
441 
for(i=start; i<end; i++)

442 
copy_rvec(x1[i],x2[i]); 
443 
} 
444  
445 
if (DOMAINDECOMP(cr)) {

446 
s2>ddp_count = s1>ddp_count; 
447 
if (s2>cg_gl_nalloc < s1>cg_gl_nalloc) {

448 
s2>cg_gl_nalloc = s1>cg_gl_nalloc; 
449 
srenew(s2>cg_gl,s2>cg_gl_nalloc); 
450 
} 
451 
s2>ncg_gl = s1>ncg_gl; 
452 
for(i=0; i<s2>ncg_gl; i++) 
453 
s2>cg_gl[i] = s1>cg_gl[i]; 
454 
s2>ddp_count_cg_gl = s1>ddp_count_cg_gl; 
455 
} 
456  
457 
if (constr) {

458 
wallcycle_start(wcycle,ewcCONSTR); 
459 
dvdlambda = 0;

460 
constrain(NULL,TRUE,TRUE,constr,&top>idef,

461 
ir,cr,count,0,md,

462 
s1>x,s2>x,NULL,s2>box,s2>lambda,

463 
&dvdlambda,NULL,NULL,nrnb,econqCoord); 
464 
wallcycle_stop(wcycle,ewcCONSTR); 
465 
} 
466 
} 
467  
468 
static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2) 
469  
470 
{ 
471 
int start,end,i,m;

472  
473 
if (DOMAINDECOMP(cr)) {

474 
start = 0;

475 
end = cr>dd>nat_home; 
476 
} else if (PARTDECOMP(cr)) { 
477 
pd_at_range(cr,&start,&end); 
478 
} else {

479 
start = 0;

480 
end = n; 
481 
} 
482  
483 
for(i=start; i<end; i++) {

484 
for(m=0; m<DIM; m++) { 
485 
x2[i][m] = x1[i][m] + a*f[i][m]; 
486 
} 
487 
} 
488 
} 
489  
490 
static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f) 
491  
492 
{ 
493 
int start,end,i,m;

494  
495 
if (DOMAINDECOMP(cr)) {

496 
start = 0;

497 
end = cr>dd>nat_home; 
498 
} else if (PARTDECOMP(cr)) { 
499 
pd_at_range(cr,&start,&end); 
500 
} else {

501 
start = 0;

502 
end = n; 
503 
} 
504  
505 
for(i=start; i<end; i++) {

506 
for(m=0; m<DIM; m++) { 
507 
f[i][m] = (x1[i][m]  x2[i][m])*a; 
508 
} 
509 
} 
510 
} 
511  
512 
static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr, 
513 
gmx_mtop_t *top_global,t_inputrec *ir, 
514 
em_state_t *ems,rvec **buf,gmx_localtop_t *top, 
515 
t_mdatoms *mdatoms,t_forcerec *fr, 
516 
gmx_vsite_t *vsite,gmx_constr_t constr, 
517 
t_nrnb *nrnb,gmx_wallcycle_t wcycle) 
518 
{ 
519 
/* Repartition the domain decomposition */

520 
wallcycle_start(wcycle,ewcDOMDEC); 
521 
dd_partition_system(fplog,step,cr,FALSE, 
522 
NULL,top_global,ir,

523 
&ems>s,&ems>f,buf, 
524 
mdatoms,top,fr,vsite,NULL,constr,

525 
nrnb,wcycle,FALSE); 
526 
dd_store_state(cr>dd,&ems>s); 
527 
wallcycle_stop(wcycle,ewcDOMDEC); 
528 
} 
529 

530 
static void evaluate_energy(FILE *fplog,bool bVerbose,t_commrec *cr, 
531 
t_state *state_global,gmx_mtop_t *top_global, 
532 
em_state_t *ems,rvec **buf,gmx_localtop_t *top, 
533 
t_inputrec *inputrec, 
534 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
535 
gmx_vsite_t *vsite,gmx_constr_t constr, 
536 
t_fcdata *fcd, 
537 
t_graph *graph,t_mdatoms *mdatoms, 
538 
t_forcerec *fr, rvec mu_tot, 
539 
gmx_enerdata_t *enerd,tensor vir,tensor pres, 
540 
int count,bool bFirst) 
541 
{ 
542 
real t; 
543 
bool bNS;

544 
int nabnsb;

545 
tensor force_vir,shake_vir,ekin; 
546 
real dvdl; 
547 
real terminate=0;

548 
t_state *state; 
549 

550 
/* Set the time to the initial time, the time does not change during EM */

551 
t = inputrec>init_t; 
552  
553 
if (bFirst 

554 
(DOMAINDECOMP(cr) && ems>s.ddp_count < cr>dd>ddp_count)) { 
555 
/* This the first state or an old state used before the last ns */

556 
bNS = TRUE; 
557 
} else {

558 
bNS = FALSE; 
559 
if (inputrec>nstlist > 0) { 
560 
bNS = TRUE; 
561 
} else if (inputrec>nstlist == 1) { 
562 
nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top>cgs,NULL,ems>s.x);

563 
if (PAR(cr))

564 
gmx_sumi(1,&nabnsb,cr);

565 
bNS = (nabnsb > 0);

566 
} 
567 
} 
568  
569 
if (vsite)

570 
construct_vsites(fplog,vsite,ems>s.x,nrnb,1,NULL, 
571 
top>idef.iparams,top>idef.il, 
572 
fr>ePBC,fr>bMolPBC,graph,cr,ems>s.box); 
573  
574 
if (DOMAINDECOMP(cr)) {

575 
if (bNS) {

576 
/* Repartition the domain decomposition */

577 
em_dd_partition_system(fplog,count,cr,top_global,inputrec, 
578 
ems,buf,top,mdatoms,fr,vsite,constr, 
579 
nrnb,wcycle); 
580 
} 
581 
} 
582 

583 
/* Calc force & energy on new trial position */

584 
/* do_force always puts the charge groups in the box and shifts again

585 
* We do not unshift, so molecules are always whole in congrad.c

586 
*/

587 
do_force(fplog,cr,inputrec, 
588 
count,nrnb,wcycle,top,&top_global>groups, 
589 
ems>s.box,ems>s.x,&ems>s.hist, 
590 
ems>f,*buf,force_vir,mdatoms,enerd,fcd, 
591 
ems>s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL, 
592 
GMX_FORCE_STATECHANGED  GMX_FORCE_ALLFORCES  
593 
(bNS ? GMX_FORCE_NS : 0));

594  
595 
/* Clear the unused shake virial and pressure */

596 
clear_mat(shake_vir); 
597 
clear_mat(pres); 
598  
599 
/* Calculate long range corrections to pressure and energy */

600 
calc_dispcorr(fplog,inputrec,fr,count,mdatoms>nr,ems>s.box,ems>s.lambda, 
601 
pres,force_vir,enerd>term); 
602  
603 
/* Communicate stuff when parallel */

604 
if (PAR(cr)) {

605 
wallcycle_start(wcycle,ewcMoveE); 
606  
607 
global_stat(fplog,cr,enerd,force_vir,shake_vir,mu_tot, 
608 
inputrec,NULL,FALSE,NULL,NULL,NULL,NULL,&terminate); 
609  
610 
wallcycle_stop(wcycle,ewcMoveE); 
611 
} 
612  
613 
ems>epot = enerd>term[F_EPOT]; 
614 

615 
if (constr) {

616 
/* Project out the constraint components of the force */

617 
wallcycle_start(wcycle,ewcCONSTR); 
618 
dvdl = 0;

619 
constrain(NULL,FALSE,FALSE,constr,&top>idef,

620 
inputrec,cr,count,0,mdatoms,

621 
ems>s.x,ems>f,ems>f,ems>s.box,ems>s.lambda,&dvdl, 
622 
NULL,&shake_vir,nrnb,econqForce);

623 
if (fr>bSepDVDL && fplog)

624 
fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);

625 
enerd>term[F_DVDL] += dvdl; 
626 
m_add(force_vir,shake_vir,vir); 
627 
wallcycle_stop(wcycle,ewcCONSTR); 
628 
} else {

629 
copy_mat(force_vir,vir); 
630 
} 
631  
632 
clear_mat(ekin); 
633 
enerd>term[F_PRES] = 
634 
calc_pres(fr>ePBC,inputrec>nwall,ems>s.box,ekin,vir,pres, 
635 
(fr>eeltype==eelPPPM)?enerd>term[F_COUL_RECIP]:0.0); 
636  
637 
get_state_f_norm_max(cr,&(inputrec>opts),mdatoms,ems); 
638 
} 
639  
640 
static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms, 
641 
gmx_mtop_t *mtop, 
642 
em_state_t *s_min,em_state_t *s_b) 
643 
{ 
644 
rvec *fm,*fb,*fmg; 
645 
t_block *cgs_gl; 
646 
int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;

647 
double partsum;

648 
unsigned char *grpnrFREEZE; 
649  
650 
if (debug)

651 
fprintf(debug,"Doing reorder_partsum\n");

652  
653 
fm = s_min>f; 
654 
fb = s_b>f; 
655  
656 
cgs_gl = dd_charge_groups_global(cr>dd); 
657 
index = cgs_gl>index; 
658  
659 
/* Collect fm in a global vector fmg.

660 
* This conflics with the spirit of domain decomposition,

661 
* but to fully optimize this a much more complicated algorithm is required.

662 
*/

663 
snew(fmg,mtop>natoms); 
664 

665 
ncg = s_min>s.ncg_gl; 
666 
cg_gl = s_min>s.cg_gl; 
667 
i = 0;

668 
for(c=0; c<ncg; c++) { 
669 
cg = cg_gl[c]; 
670 
a0 = index[cg]; 
671 
a1 = index[cg+1];

672 
for(a=a0; a<a1; a++) {

673 
copy_rvec(fm[i],fmg[a]); 
674 
i++; 
675 
} 
676 
} 
677 
gmx_sum(mtop>natoms*3,fmg[0],cr); 
678  
679 
/* Now we will determine the part of the sum for the cgs in state s_b */

680 
ncg = s_b>s.ncg_gl; 
681 
cg_gl = s_b>s.cg_gl; 
682 
partsum = 0;

683 
i = 0;

684 
gf = 0;

685 
grpnrFREEZE = mtop>groups.grpnr[egcFREEZE]; 
686 
for(c=0; c<ncg; c++) { 
687 
cg = cg_gl[c]; 
688 
a0 = index[cg]; 
689 
a1 = index[cg+1];

690 
for(a=a0; a<a1; a++) {

691 
if (mdatoms>cFREEZE && grpnrFREEZE) {

692 
gf = grpnrFREEZE[i]; 
693 
} 
694 
for(m=0; m<DIM; m++) { 
695 
if (!opts>nFreeze[gf][m]) {

696 
partsum += (fb[i][m]  fmg[a][m])*fb[i][m]; 
697 
} 
698 
} 
699 
i++; 
700 
} 
701 
} 
702 

703 
sfree(fmg); 
704  
705 
return partsum;

706 
} 
707  
708 
static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,

709 
gmx_mtop_t *mtop, 
710 
em_state_t *s_min,em_state_t *s_b) 
711 
{ 
712 
rvec *fm,*fb; 
713 
double sum;

714 
int gf,i,m;

715  
716 
/* This is just the classical PolakRibiere calculation of beta;

717 
* it looks a bit complicated since we take freeze groups into account,

718 
* and might have to sum it in parallel runs.

719 
*/

720 

721 
if (!DOMAINDECOMP(cr) 

722 
(s_min>s.ddp_count == cr>dd>ddp_count && 
723 
s_b>s.ddp_count == cr>dd>ddp_count)) { 
724 
fm = s_min>f; 
725 
fb = s_b>f; 
726 
sum = 0;

727 
gf = 0;

728 
/* This part of code can be incorrect with DD,

729 
* since the atom ordering in s_b and s_min might differ.

730 
*/

731 
for(i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

732 
if (mdatoms>cFREEZE)

733 
gf = mdatoms>cFREEZE[i]; 
734 
for(m=0; m<DIM; m++) 
735 
if (!opts>nFreeze[gf][m]) {

736 
sum += (fb[i][m]  fm[i][m])*fb[i][m]; 
737 
} 
738 
} 
739 
} else {

740 
/* We need to reorder cgs while summing */

741 
sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b); 
742 
} 
743 
if (PAR(cr))

744 
gmx_sumd(1,&sum,cr);

745  
746 
return sum/sqr(s_min>fnorm);

747 
} 
748  
749 
time_t do_cg(FILE *fplog,t_commrec *cr, 
750 
int nfile,t_filenm fnm[],

751 
bool bVerbose,bool bCompact, 
752 
gmx_vsite_t *vsite,gmx_constr_t constr, 
753 
int stepout,

754 
t_inputrec *inputrec, 
755 
gmx_mtop_t *top_global,t_fcdata *fcd, 
756 
t_state *state_global,rvec f[], 
757 
rvec buf[],t_mdatoms *mdatoms, 
758 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
759 
gmx_edsam_t ed, 
760 
t_forcerec *fr, 
761 
int repl_ex_nst,int repl_ex_seed, 
762 
real cpt_period,real max_hours, 
763 
unsigned long Flags) 
764 
{ 
765 
const char *CG="PolakRibiere Conjugate Gradients"; 
766  
767 
em_state_t *s_min,*s_a,*s_b,*s_c; 
768 
gmx_localtop_t *top; 
769 
gmx_enerdata_t *enerd; 
770 
t_graph *graph; 
771 
rvec *f_global,*p,*sf,*sfm; 
772 
double gpa,gpb,gpc,tmp,sum[2],minstep; 
773 
real fnormn; 
774 
real stepsize; 
775 
real a,b,c,beta=0.0; 
776 
real epot_repl=0;

777 
real pnorm; 
778 
t_mdebin *mdebin; 
779 
bool converged,foundlower;

780 
rvec mu_tot; 
781 
time_t start_t; 
782 
bool do_log=FALSE,do_ene=FALSE,do_x,do_f;

783 
tensor vir,pres; 
784 
int number_steps,neval=0,nstcg=inputrec>nstcgsteep; 
785 
int fp_trn,fp_ene;

786 
int i,m,gf,step,nminstep;

787 
real terminate=0;

788  
789 
step=0;

790  
791 
s_min = init_em_state(); 
792 
s_a = init_em_state(); 
793 
s_b = init_em_state(); 
794 
s_c = init_em_state(); 
795  
796 
/* Init em and store the local state in s_min */

797 
init_em(fplog,CG,cr,inputrec, 
798 
state_global,top_global,s_min,&top,f,&buf,&f_global, 
799 
nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr, 
800 
nfile,fnm,&fp_trn,&fp_ene,&mdebin); 
801  
802 
/* Print to log file */

803 
start_t=print_date_and_time(fplog,cr>nodeid, 
804 
"Started PolakRibiere Conjugate Gradients");

805 
wallcycle_start(wcycle,ewcRUN); 
806 

807 
/* Max number of steps */

808 
number_steps=inputrec>nsteps; 
809  
810 
if (MASTER(cr))

811 
sp_header(stderr,CG,inputrec>em_tol,number_steps); 
812 
if (fplog)

813 
sp_header(fplog,CG,inputrec>em_tol,number_steps); 
814  
815 
/* Call the force routine and some auxiliary (neighboursearching etc.) */

816 
/* do_force always puts the charge groups in the box and shifts again

817 
* We do not unshift, so molecules are always whole in congrad.c

818 
*/

819 
evaluate_energy(fplog,bVerbose,cr, 
820 
state_global,top_global,s_min,&buf,top, 
821 
inputrec,nrnb,wcycle, 
822 
vsite,constr,fcd,graph,mdatoms,fr, 
823 
mu_tot,enerd,vir,pres,1,TRUE);

824 
where(); 
825  
826 
if (MASTER(cr)) {

827 
/* Copy stuff to the energy bin for easy printing etc. */

828 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,step,(real)step,

829 
enerd,&s_min>s,s_min>s.box, 
830 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
831 

832 
print_ebin_header(fplog,step,step,s_min>s.lambda); 
833 
print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,step,eprNORMAL, 
834 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
835 
} 
836 
where(); 
837  
838 
/* Estimate/guess the initial stepsize */

839 
stepsize = inputrec>em_stepsize/s_min>fnorm; 
840 

841 
if (MASTER(cr)) {

842 
fprintf(stderr," Fmax = %12.5e on atom %d\n",

843 
s_min>fmax,s_min>a_fmax+1);

844 
fprintf(stderr," FNorm = %12.5e\n",

845 
s_min>fnorm/sqrt(state_global>natoms)); 
846 
fprintf(stderr,"\n");

847 
/* and copy to the log file too... */

848 
fprintf(fplog," Fmax = %12.5e on atom %d\n",

849 
s_min>fmax,s_min>a_fmax+1);

850 
fprintf(fplog," FNorm = %12.5e\n",

851 
s_min>fnorm/sqrt(state_global>natoms)); 
852 
fprintf(fplog,"\n");

853 
} 
854 
/* Start the loop over CG steps.

855 
* Each successful step is counted, and we continue until

856 
* we either converge or reach the max number of steps.

857 
*/

858 
for(step=0,converged=FALSE;( step<=number_steps  number_steps==0) && !converged;step++) { 
859 

860 
/* start taking steps in a new direction

861 
* First time we enter the routine, beta=0, and the direction is

862 
* simply the negative gradient.

863 
*/

864  
865 
/* Calculate the new direction in p, and the gradient in this direction, gpa */

866 
p = s_min>s.cg_p; 
867 
sf = s_min>f; 
868 
gpa = 0;

869 
gf = 0;

870 
for(i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

871 
if (mdatoms>cFREEZE)

872 
gf = mdatoms>cFREEZE[i]; 
873 
for(m=0; m<DIM; m++) { 
874 
if (!inputrec>opts.nFreeze[gf][m]) {

875 
p[i][m] = sf[i][m] + beta*p[i][m]; 
876 
gpa = p[i][m]*sf[i][m]; 
877 
/* f is negative gradient, thus the sign */

878 
} else {

879 
p[i][m] = 0;

880 
} 
881 
} 
882 
} 
883 

884 
/* Sum the gradient along the line across CPUs */

885 
if (PAR(cr))

886 
gmx_sumd(1,&gpa,cr);

887  
888 
/* Calculate the norm of the search vector */

889 
get_f_norm_max(cr,&(inputrec>opts),mdatoms,p,&pnorm,NULL,NULL); 
890 

891 
/* Just in case stepsize reaches zero due to numerical precision... */

892 
if(stepsize<=0) 
893 
stepsize = inputrec>em_stepsize/pnorm; 
894 

895 
/*

896 
* Double check the value of the derivative in the search direction.

897 
* If it is positive it must be due to the old information in the

898 
* CG formula, so just remove that and start over with beta=0.

899 
* This corresponds to a steepest descent step.

900 
*/

901 
if(gpa>0) { 
902 
beta = 0;

903 
step; /* Don't count this step since we are restarting */

904 
continue; /* Go back to the beginning of the big forloop */ 
905 
} 
906  
907 
/* Calculate minimum allowed stepsize, before the average (norm)

908 
* relative change in coordinate is smaller than precision

909 
*/

910 
minstep=0;

911 
for (i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

912 
for(m=0; m<DIM; m++) { 
913 
tmp = fabs(s_min>s.x[i][m]); 
914 
if(tmp < 1.0) 
915 
tmp = 1.0; 
916 
tmp = p[i][m]/tmp; 
917 
minstep += tmp*tmp; 
918 
} 
919 
} 
920 
/* Add up from all CPUs */

921 
if(PAR(cr))

922 
gmx_sumd(1,&minstep,cr);

923  
924 
minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global>natoms));

925  
926 
if(stepsize<minstep) {

927 
converged=TRUE; 
928 
break;

929 
} 
930 

931 
/* Write coordinates if necessary */

932 
do_x = do_per_step(step,inputrec>nstxout); 
933 
do_f = do_per_step(step,inputrec>nstfout); 
934 

935 
write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE, 
936 
top_global,inputrec>eI,inputrec>simulation_part,step,(double)step,

937 
&s_min>s,state_global,s_min>f,f_global); 
938 

939 
/* Take a step downhill.

940 
* In theory, we should minimize the function along this direction.

941 
* That is quite possible, but it turns out to take 510 function evaluations

942 
* for each line. However, we dont really need to find the exact minimum 

943 
* it is much better to start a new CG step in a modified direction as soon

944 
* as we are close to it. This will save a lot of energy evaluations.

945 
*

946 
* In practice, we just try to take a single step.

947 
* If it worked (i.e. lowered the energy), we increase the stepsize but

948 
* the continue straight to the next CG step without trying to find any minimum.

949 
* If it didn't work (higher energy), there must be a minimum somewhere between

950 
* the old position and the new one.

951 
*

952 
* Due to the finite numerical accuracy, it turns out that it is a good idea

953 
* to even accept a SMALL increase in energy, if the derivative is still downhill.

954 
* This leads to lower final energies in the tests I've done. / Erik

955 
*/

956 
s_a>epot = s_min>epot; 
957 
a = 0.0; 
958 
c = a + stepsize; /* reference position along line is zero */

959 

960 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count < cr>dd>ddp_count) {

961 
em_dd_partition_system(fplog,step,cr,top_global,inputrec, 
962 
s_min,&buf,top,mdatoms,fr,vsite,constr, 
963 
nrnb,wcycle); 
964 
} 
965  
966 
/* Take a trial step (new coords in s_c) */

967 
do_em_step(cr,inputrec,mdatoms,s_min,c,s_min>s.cg_p,s_c, 
968 
constr,top,nrnb,wcycle,1);

969 

970 
neval++; 
971 
/* Calculate energy for the trial step */

972 
evaluate_energy(fplog,bVerbose,cr, 
973 
state_global,top_global,s_c,&buf,top, 
974 
inputrec,nrnb,wcycle, 
975 
vsite,constr,fcd,graph,mdatoms,fr, 
976 
mu_tot,enerd,vir,pres,1,FALSE);

977 

978 
/* Calc derivative along line */

979 
p = s_c>s.cg_p; 
980 
sf = s_c>f; 
981 
gpc=0;

982 
for(i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

983 
for(m=0; m<DIM; m++) 
984 
gpc = p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */

985 
} 
986 
/* Sum the gradient along the line across CPUs */

987 
if (PAR(cr))

988 
gmx_sumd(1,&gpc,cr);

989  
990 
/* This is the max amount of increase in energy we tolerate */

991 
tmp=sqrt(GMX_REAL_EPS)*fabs(s_a>epot); 
992  
993 
/* Accept the step if the energy is lower, or if it is not significantly higher

994 
* and the line derivative is still negative.

995 
*/

996 
if (s_c>epot < s_a>epot  (gpc < 0 && s_c>epot < (s_a>epot + tmp))) { 
997 
foundlower = TRUE; 
998 
/* Great, we found a better energy. Increase step for next iteration

999 
* if we are still going down, decrease it otherwise

1000 
*/

1001 
if(gpc<0) 
1002 
stepsize *= 1.618034; /* The golden section */ 
1003 
else

1004 
stepsize *= 0.618034; /* 1/golden section */ 
1005 
} else {

1006 
/* New energy is the same or higher. We will have to do some work

1007 
* to find a smaller value in the interval. Take smaller step next time!

1008 
*/

1009 
foundlower = FALSE; 
1010 
stepsize *= 0.618034; 
1011 
} 
1012  
1013  
1014  
1015 

1016 
/* OK, if we didn't find a lower value we will have to locate one now  there must

1017 
* be one in the interval [a=0,c].

1018 
* The same thing is valid here, though: Don't spend dozens of iterations to find

1019 
* the line minimum. We try to interpolate based on the derivative at the endpoints,

1020 
* and only continue until we find a lower value. In most cases this means 12 iterations.

1021 
*

1022 
* I also have a safeguard for potentially really patological functions so we never

1023 
* take more than 20 steps before we give up ...

1024 
*

1025 
* If we already found a lower value we just skip this step and continue to the update.

1026 
*/

1027 
if (!foundlower) {

1028 
nminstep=0;

1029  
1030 
do {

1031 
/* Select a new trial point.

1032 
* If the derivatives at points a & c have different sign we interpolate to zero,

1033 
* otherwise just do a bisection.

1034 
*/

1035 
if(gpa<0 && gpc>0) 
1036 
b = a + gpa*(ac)/(gpcgpa); 
1037 
else

1038 
b = 0.5*(a+c); 
1039 

1040 
/* safeguard if interpolation close to machine accuracy causes errors:

1041 
* never go outside the interval

1042 
*/

1043 
if(b<=a  b>=c)

1044 
b = 0.5*(a+c); 
1045 

1046 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count != cr>dd>ddp_count) {

1047 
/* Reload the old state */

1048 
em_dd_partition_system(fplog,1,cr,top_global,inputrec,

1049 
s_min,&buf,top,mdatoms,fr,vsite,constr, 
1050 
nrnb,wcycle); 
1051 
} 
1052  
1053 
/* Take a trial step to this new point  new coords in s_b */

1054 
do_em_step(cr,inputrec,mdatoms,s_min,b,s_min>s.cg_p,s_b, 
1055 
constr,top,nrnb,wcycle,1);

1056 

1057 
neval++; 
1058 
/* Calculate energy for the trial step */

1059 
evaluate_energy(fplog,bVerbose,cr, 
1060 
state_global,top_global,s_b,&buf,top, 
1061 
inputrec,nrnb,wcycle, 
1062 
vsite,constr,fcd,graph,mdatoms,fr, 
1063 
mu_tot,enerd,vir,pres,1,FALSE);

1064 

1065 
/* p does not change within a step, but since the domain decomposition

1066 
* might change, we have to use cg_p of s_b here.

1067 
*/

1068 
p = s_b>s.cg_p; 
1069 
sf = s_b>f; 
1070 
gpb=0;

1071 
for(i=mdatoms>start; i<mdatoms>start+mdatoms>homenr; i++) {

1072 
for(m=0; m<DIM; m++) 
1073 
gpb = p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */

1074 
} 
1075 
/* Sum the gradient along the line across CPUs */

1076 
if (PAR(cr))

1077 
gmx_sumd(1,&gpb,cr);

1078 

1079 
if (debug)

1080 
fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",

1081 
s_a>epot,s_b>epot,s_c>epot,gpb); 
1082  
1083 
epot_repl = s_b>epot; 
1084 

1085 
/* Keep one of the intervals based on the value of the derivative at the new point */

1086 
if (gpb > 0) { 
1087 
/* Replace c endpoint with b */

1088 
swap_em_state(s_b,s_c); 
1089 
c = b; 
1090 
gpc = gpb; 
1091 
} else {

1092 
/* Replace a endpoint with b */

1093 
swap_em_state(s_b,s_a); 
1094 
a = b; 
1095 
gpa = gpb; 
1096 
} 
1097 

1098 
/*

1099 
* Stop search as soon as we find a value smaller than the endpoints.

1100 
* Never run more than 20 steps, no matter what.

1101 
*/

1102 
nminstep++; 
1103 
} while ((epot_repl > s_a>epot  epot_repl > s_c>epot) &&

1104 
(nminstep < 20));

1105 

1106 
if (fabs(epot_repl  s_min>epot) < fabs(s_min>epot)*GMX_REAL_EPS 

1107 
nminstep >= 20) {

1108 
/* OK. We couldn't find a significantly lower energy.

1109 
* If beta==0 this was steepest descent, and then we give up.

1110 
* If not, set beta=0 and restart with steepest descent before quitting.

1111 
*/

1112 
if (beta == 0.0) { 
1113 
/* Converged */

1114 
converged = TRUE; 
1115 
break;

1116 
} else {

1117 
/* Reset memory before giving up */

1118 
beta = 0.0; 
1119 
continue;

1120 
} 
1121 
} 
1122 

1123 
/* Select min energy state of A & C, put the best in B.

1124 
*/

1125 
if (s_c>epot < s_a>epot) {

1126 
if (debug)

1127 
fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",

1128 
s_c>epot,s_a>epot); 
1129 
swap_em_state(s_b,s_c); 
1130 
gpb = gpc; 
1131 
b = c; 
1132 
} else {

1133 
if (debug)

1134 
fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",

1135 
s_a>epot,s_c>epot); 
1136 
swap_em_state(s_b,s_a); 
1137 
gpb = gpa; 
1138 
b = a; 
1139 
} 
1140 

1141 
} else {

1142 
if (debug)

1143 
fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",

1144 
s_c>epot); 
1145 
swap_em_state(s_b,s_c); 
1146 
gpb = gpc; 
1147 
b = c; 
1148 
} 
1149 

1150 
/* new search direction */

1151 
/* beta = 0 means forget all memory and restart with steepest descents. */

1152 
if (nstcg && ((step % nstcg)==0)) 
1153 
beta = 0.0; 
1154 
else {

1155 
/* s_min>fnorm cannot be zero, because then we would have converged

1156 
* and broken out.

1157 
*/

1158  
1159 
/* PolakRibiere update.

1160 
* Change to fnorm2/fnorm2_old for FletcherReeves

1161 
*/

1162 
beta = pr_beta(cr,&inputrec>opts,mdatoms,top_global,s_min,s_b); 
1163 
} 
1164 
/* Limit beta to prevent oscillations */

1165 
if (fabs(beta) > 5.0) 
1166 
beta = 0.0; 
1167 

1168 

1169 
/* update positions */

1170 
swap_em_state(s_min,s_b); 
1171 
gpa = gpb; 
1172 

1173 
/* Print it if necessary */

1174 
if (MASTER(cr)) {

1175 
if(bVerbose)

1176 
fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",

1177 
step,s_min>epot,s_min>fnorm/sqrt(state_global>natoms), 
1178 
s_min>fmax,s_min>a_fmax+1);

1179 
/* Store the new (lower) energies */

1180 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,step,(real)step,

1181 
enerd,&s_min>s,s_min>s.box, 
1182 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
1183 
do_log = do_per_step(step,inputrec>nstlog); 
1184 
do_ene = do_per_step(step,inputrec>nstenergy); 
1185 
if(do_log)

1186 
print_ebin_header(fplog,step,step,s_min>s.lambda); 
1187 
print_ebin(fp_ene,do_ene,FALSE,FALSE, 
1188 
do_log ? fplog : NULL,step,step,step,eprNORMAL,

1189 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1190 
} 
1191 

1192 
/* Stop when the maximum force lies below tolerance.

1193 
* If we have reached machine precision, converged is already set to true.

1194 
*/

1195 
converged = converged  (s_min>fmax < inputrec>em_tol); 
1196 

1197 
} /* End of the loop */

1198 

1199 
if (converged)

1200 
step; /* we never took that last step in this case */

1201 

1202 
if (s_min>fmax > inputrec>em_tol) {

1203 
if (MASTER(cr)) {

1204 
warn_step(stderr,inputrec>em_tol,FALSE); 
1205 
warn_step(fplog,inputrec>em_tol,FALSE); 
1206 
} 
1207 
converged = FALSE; 
1208 
} 
1209 

1210 
if (MASTER(cr)) {

1211 
/* If we printed energy and/or logfile last step (which was the last step)

1212 
* we don't have to do it again, but otherwise print the final values.

1213 
*/

1214 
if(!do_log) {

1215 
/* Write final value to log since we didn't do anything the last step */

1216 
print_ebin_header(fplog,step,step,s_min>s.lambda); 
1217 
} 
1218 
if (!do_ene  !do_log) {

1219 
/* Write final energy file entries */

1220 
print_ebin(fp_ene,!do_ene,FALSE,FALSE, 
1221 
!do_log ? fplog : NULL,step,step,step,eprNORMAL,

1222 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1223 
} 
1224 
} 
1225 

1226 
if (!PAR(cr)) {

1227 
copy_em_coords_back(s_min,state_global); 
1228 
} 
1229  
1230 
/* Print some stuff... */

1231 
if (MASTER(cr))

1232 
fprintf(stderr,"\nwriting lowest energy coordinates.\n");

1233 

1234 
/* IMPORTANT!

1235 
* For accurate normal mode calculation it is imperative that we

1236 
* store the last conformation into the full precision binary trajectory.

1237 
*

1238 
* However, we should only do it if we did NOT already write this step

1239 
* above (which we did if do_x or do_f was true).

1240 
*/

1241 
do_x = !do_per_step(step,inputrec>nstxout); 
1242 
do_f = (inputrec>nstfout > 0 && !do_per_step(step,inputrec>nstfout));

1243 

1244 
write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f, 
1245 
0,FALSE,0,NULL,FALSE, 
1246 
top_global,inputrec>eI,inputrec>simulation_part,step,(real)step, 
1247 
&s_min>s,state_global,s_min>f,f_global); 
1248 
if (MASTER(cr)) {

1249 
write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm), 
1250 
*top_global>name,top_global, 
1251 
state_global>x,NULL,inputrec>ePBC,state_global>box);

1252 
} 
1253 

1254 
fnormn = s_min>fnorm/sqrt(state_global>natoms); 
1255 

1256 
if (MASTER(cr)) {

1257 
print_converged(stderr,CG,inputrec>em_tol,step,converged,number_steps, 
1258 
s_min>epot,s_min>fmax,s_min>a_fmax,fnormn); 
1259 
print_converged(fplog,CG,inputrec>em_tol,step,converged,number_steps, 
1260 
s_min>epot,s_min>fmax,s_min>a_fmax,fnormn); 
1261 

1262 
fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);

1263 
} 
1264 

1265 
finish_em(fplog,cr,fp_trn,fp_ene); 
1266 

1267 
/* To print the actual number of steps we needed somewhere */

1268 
inputrec>nsteps=step; 
1269  
1270 
return start_t;

1271 
} /* That's all folks */

1272  
1273  
1274 
time_t do_lbfgs(FILE *fplog,t_commrec *cr, 
1275 
int nfile,t_filenm fnm[],

1276 
bool bVerbose,bool bCompact, 
1277 
gmx_vsite_t *vsite,gmx_constr_t constr, 
1278 
int stepout,

1279 
t_inputrec *inputrec, 
1280 
gmx_mtop_t *top_global,t_fcdata *fcd, 
1281 
t_state *state,rvec f[], 
1282 
rvec buf[],t_mdatoms *mdatoms, 
1283 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
1284 
gmx_edsam_t ed, 
1285 
t_forcerec *fr, 
1286 
int repl_ex_nst,int repl_ex_seed, 
1287 
real cpt_period,real max_hours, 
1288 
unsigned long Flags) 
1289 
{ 
1290 
static char *LBFGS="LowMemory BFGS Minimizer"; 
1291 
em_state_t ems; 
1292 
gmx_localtop_t *top; 
1293 
gmx_enerdata_t *enerd; 
1294 
t_graph *graph; 
1295 
int ncorr,nmaxcorr,point,cp,neval,nminstep;

1296 
double stepsize,gpa,gpb,gpc,tmp,minstep;

1297 
real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg; 
1298 
real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp; 
1299 
real a,b,c,maxdelta,delta; 
1300 
real diag,Epot0,Epot,EpotA,EpotB,EpotC; 
1301 
real dgdx,dgdg,sq,yr,beta; 
1302 
t_mdebin *mdebin; 
1303 
bool converged,first;

1304 
rvec mu_tot; 
1305 
real fnorm,fmax; 
1306 
time_t start_t; 
1307 
bool do_log,do_ene,do_x,do_f,foundlower,*frozen;

1308 
tensor vir,pres; 
1309 
int fp_trn,fp_ene,start,end,number_steps;

1310 
int i,k,m,n,nfmax,gf,step;

1311 
/* not used */

1312 
real terminate; 
1313  
1314 
if (PAR(cr))

1315 
gmx_fatal(FARGS,"Cannot do parallel LBFGS Minimization  yet.\n");

1316 

1317 
n = 3*state>natoms;

1318 
nmaxcorr = inputrec>nbfgscorr; 
1319 

1320 
/* Allocate memory */

1321 
snew(f,state>natoms); 
1322 
/* Use pointers to real so we dont have to loop over both atoms and

1323 
* dimensions all the time...

1324 
* x/f are allocated as rvec *, so make new x0/f0 pointerstoreal

1325 
* that point to the same memory.

1326 
*/

1327 
snew(xa,n); 
1328 
snew(xb,n); 
1329 
snew(xc,n); 
1330 
snew(fa,n); 
1331 
snew(fb,n); 
1332 
snew(fc,n); 
1333 
snew(frozen,n); 
1334 

1335 
xx = (real *)state>x; 
1336 
ff = (real *)f; 
1337  
1338 
printf("x0: %20.12g %20.12g %20.12g\n",xx[0],xx[1],xx[2]); 
1339  
1340 
snew(p,n); 
1341 
snew(lastx,n); 
1342 
snew(lastf,n); 
1343 
snew(rho,nmaxcorr); 
1344 
snew(alpha,nmaxcorr); 
1345 

1346 
snew(dx,nmaxcorr); 
1347 
for(i=0;i<nmaxcorr;i++) 
1348 
snew(dx[i],n); 
1349 

1350 
snew(dg,nmaxcorr); 
1351 
for(i=0;i<nmaxcorr;i++) 
1352 
snew(dg[i],n); 
1353  
1354 
step = 0;

1355 
neval = 0;

1356  
1357 
/* Init em */

1358 
init_em(fplog,LBFGS,cr,inputrec, 
1359 
state,top_global,&ems,&top,f,&buf,&f, 
1360 
nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr, 
1361 
nfile,fnm,&fp_trn,&fp_ene,&mdebin); 
1362 
/* Do_lbfgs is not completely updated like do_steep and do_cg,

1363 
* so we free some memory again.

1364 
*/

1365 
sfree(ems.s.x); 
1366 
sfree(ems.f); 
1367  
1368 
start = mdatoms>start; 
1369 
end = mdatoms>homenr + start; 
1370 

1371 
/* Print to log file */

1372 
start_t=print_date_and_time(fplog,cr>nodeid, 
1373 
"Started LowMemory BFGS Minimization");

1374 
wallcycle_start(wcycle,ewcRUN); 
1375 

1376 
do_log = do_ene = do_x = do_f = TRUE; 
1377 

1378 
/* Max number of steps */

1379 
number_steps=inputrec>nsteps; 
1380  
1381 
/* Create a 3*natoms index to tell whether each degree of freedom is frozen */

1382 
gf = 0;

1383 
for(i=start; i<end; i++) {

1384 
if (mdatoms>cFREEZE)

1385 
gf = mdatoms>cFREEZE[i]; 
1386 
for(m=0; m<DIM; m++) 
1387 
frozen[3*i+m]=inputrec>opts.nFreeze[gf][m];

1388 
} 
1389 
if (MASTER(cr))

1390 
sp_header(stderr,LBFGS,inputrec>em_tol,number_steps); 
1391 
if (fplog)

1392 
sp_header(fplog,LBFGS,inputrec>em_tol,number_steps); 
1393 

1394 
if (vsite)

1395 
construct_vsites(fplog,vsite,state>x,nrnb,1,NULL, 
1396 
top>idef.iparams,top>idef.il, 
1397 
fr>ePBC,fr>bMolPBC,graph,cr,state>box); 
1398 

1399 
/* Call the force routine and some auxiliary (neighboursearching etc.) */

1400 
/* do_force always puts the charge groups in the box and shifts again

1401 
* We do not unshift, so molecules are always whole

1402 
*/

1403 
neval++; 
1404 
ems.s.x = state>x; 
1405 
ems.f = f; 
1406 
evaluate_energy(fplog,bVerbose,cr, 
1407 
state,top_global,&ems,&buf,top, 
1408 
inputrec,nrnb,wcycle, 
1409 
vsite,constr,fcd,graph,mdatoms,fr, 
1410 
mu_tot,enerd,vir,pres,1,TRUE);

1411 
where(); 
1412 

1413 
if (MASTER(cr)) {

1414 
/* Copy stuff to the energy bin for easy printing etc. */

1415 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,step,(real)step,

1416 
enerd,state,state>box, 
1417 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
1418 

1419 
print_ebin_header(fplog,step,step,state>lambda); 
1420 
print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,step,eprNORMAL, 
1421 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1422 
} 
1423 
where(); 
1424 

1425 
/* This is the starting energy */

1426 
Epot = enerd>term[F_EPOT]; 
1427 

1428 
fnorm = ems.fnorm; 
1429 
fmax = ems.fmax; 
1430 
nfmax = ems.a_fmax; 
1431 

1432 
/* Set the initial step.

1433 
* since it will be multiplied by the nonnormalized search direction

1434 
* vector (force vector the first time), we scale it by the

1435 
* norm of the force.

1436 
*/

1437 

1438 
if (MASTER(cr)) {

1439 
fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);

1440 
fprintf(stderr," Fmax = %12.5e on atom %d\n",fmax,nfmax+1); 
1441 
fprintf(stderr," FNorm = %12.5e\n",fnorm/sqrt(state>natoms));

1442 
fprintf(stderr,"\n");

1443 
/* and copy to the log file too... */

1444 
fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);

1445 
fprintf(fplog," Fmax = %12.5e on atom %d\n",fmax,nfmax+1); 
1446 
fprintf(fplog," FNorm = %12.5e\n",fnorm/sqrt(state>natoms));

1447 
fprintf(fplog,"\n");

1448 
} 
1449 

1450 
point=0;

1451 
for(i=0;i<n;i++) 
1452 
if(!frozen[i])

1453 
dx[point][i] = ff[i]; /* Initial search direction */

1454 
else

1455 
dx[point][i] = 0;

1456  
1457 
stepsize = 1.0/fnorm; 
1458 
converged = FALSE; 
1459 

1460 
/* Start the loop over BFGS steps.

1461 
* Each successful step is counted, and we continue until

1462 
* we either converge or reach the max number of steps.

1463 
*/

1464 

1465 
ncorr=0;

1466  
1467 
/* Set the gradient from the force */

1468 
for(step=0,converged=FALSE;( step<=number_steps  number_steps==0) && !converged;step++) { 
1469 

1470 
/* Write coordinates if necessary */

1471 
do_x = do_per_step(step,inputrec>nstxout); 
1472 
do_f = do_per_step(step,inputrec>nstfout); 
1473 

1474 
write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE, 
1475 
top_global,inputrec>eI,inputrec>simulation_part,step,(real)step,state,state,f,f); 
1476  
1477 
/* Do the linesearching in the direction dx[point][0..(n1)] */

1478 

1479 
/* pointer to current direction  point=0 first time here */

1480 
s=dx[point]; 
1481 

1482 
/* calculate line gradient */

1483 
for(gpa=0,i=0;i<n;i++) 
1484 
gpa=s[i]*ff[i]; 
1485  
1486 
/* Calculate minimum allowed stepsize, before the average (norm)

1487 
* relative change in coordinate is smaller than precision

1488 
*/

1489 
for(minstep=0,i=0;i<n;i++) { 
1490 
tmp=fabs(xx[i]); 
1491 
if(tmp<1.0) 
1492 
tmp=1.0; 
1493 
tmp = s[i]/tmp; 
1494 
minstep += tmp*tmp; 
1495 
} 
1496 
minstep = GMX_REAL_EPS/sqrt(minstep/n); 
1497 

1498 
if(stepsize<minstep) {

1499 
converged=TRUE; 
1500 
break;

1501 
} 
1502 

1503 
/* Store old forces and coordinates */

1504 
for(i=0;i<n;i++) { 
1505 
lastx[i]=xx[i]; 
1506 
lastf[i]=ff[i]; 
1507 
} 
1508 
Epot0=Epot; 
1509 

1510 
first=TRUE; 
1511 

1512 
for(i=0;i<n;i++) 
1513 
xa[i]=xx[i]; 
1514 

1515 
/* Take a step downhill.

1516 
* In theory, we should minimize the function along this direction.

1517 
* That is quite possible, but it turns out to take 510 function evaluations

1518 
* for each line. However, we dont really need to find the exact minimum 

1519 
* it is much better to start a new BFGS step in a modified direction as soon

1520 
* as we are close to it. This will save a lot of energy evaluations.

1521 
*

1522 
* In practice, we just try to take a single step.

1523 
* If it worked (i.e. lowered the energy), we increase the stepsize but

1524 
* the continue straight to the next BFGS step without trying to find any minimum.

1525 
* If it didn't work (higher energy), there must be a minimum somewhere between

1526 
* the old position and the new one.

1527 
*

1528 
* Due to the finite numerical accuracy, it turns out that it is a good idea

1529 
* to even accept a SMALL increase in energy, if the derivative is still downhill.

1530 
* This leads to lower final energies in the tests I've done. / Erik

1531 
*/

1532 
foundlower=FALSE; 
1533 
EpotA = Epot0; 
1534 
a = 0.0; 
1535 
c = a + stepsize; /* reference position along line is zero */

1536  
1537 
/* Check stepsize first. We do not allow displacements

1538 
* larger than emstep.

1539 
*/

1540 
do {

1541 
c = a + stepsize; 
1542 
maxdelta=0;

1543 
for(i=0;i<n;i++) { 
1544 
delta=c*s[i]; 
1545 
if(delta>maxdelta)

1546 
maxdelta=delta; 
1547 
} 
1548 
if(maxdelta>inputrec>em_stepsize)

1549 
stepsize*=0.1; 
1550 
} while(maxdelta>inputrec>em_stepsize);

1551  
1552 
/* Take a trial step */

1553 
for (i=0; i<n; i++) 
1554 
xc[i] = lastx[i] + c*s[i]; 
1555 

1556 
neval++; 
1557 
/* Calculate energy for the trial step */

1558 
ems.s.x = (rvec *)xc; 
1559 
ems.f = (rvec *)fc; 
1560 
evaluate_energy(fplog,bVerbose,cr, 
1561 
state,top_global,&ems,&buf,top, 
1562 
inputrec,nrnb,wcycle, 
1563 
vsite,constr,fcd,graph,mdatoms,fr, 
1564 
mu_tot,enerd,vir,pres,step,FALSE); 
1565 
EpotC = ems.epot; 
1566 

1567 
/* Calc derivative along line */

1568 
for(gpc=0,i=0; i<n; i++) { 
1569 
gpc = s[i]*fc[i]; /* f is negative gradient, thus the sign */

1570 
} 
1571 
/* Sum the gradient along the line across CPUs */

1572 
if (PAR(cr))

1573 
gmx_sumd(1,&gpc,cr);

1574 

1575 
/* This is the max amount of increase in energy we tolerate */

1576 
tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA); 
1577 

1578 
/* Accept the step if the energy is lower, or if it is not significantly higher

1579 
* and the line derivative is still negative.

1580 
*/

1581 
if(EpotC<EpotA  (gpc<0 && EpotC<(EpotA+tmp))) { 
1582 
foundlower = TRUE; 
1583 
/* Great, we found a better energy. Increase step for next iteration

1584 
* if we are still going down, decrease it otherwise

1585 
*/

1586 
if(gpc<0) 
1587 
stepsize *= 1.618034; /* The golden section */ 
1588 
else

1589 
stepsize *= 0.618034; /* 1/golden section */ 
1590 
} else {

1591 
/* New energy is the same or higher. We will have to do some work

1592 
* to find a smaller value in the interval. Take smaller step next time!

1593 
*/

1594 
foundlower = FALSE; 
1595 
stepsize *= 0.618034; 
1596 
} 
1597 

1598 
/* OK, if we didn't find a lower value we will have to locate one now  there must

1599 
* be one in the interval [a=0,c].

1600 
* The same thing is valid here, though: Don't spend dozens of iterations to find

1601 
* the line minimum. We try to interpolate based on the derivative at the endpoints,

1602 
* and only continue until we find a lower value. In most cases this means 12 iterations.

1603 
*

1604 
* I also have a safeguard for potentially really patological functions so we never

1605 
* take more than 20 steps before we give up ...

1606 
*

1607 
* If we already found a lower value we just skip this step and continue to the update.

1608 
*/

1609  
1610 
if(!foundlower) {

1611 

1612 
nminstep=0;

1613 
do {

1614 
/* Select a new trial point.

1615 
* If the derivatives at points a & c have different sign we interpolate to zero,

1616 
* otherwise just do a bisection.

1617 
*/

1618 

1619 
if(gpa<0 && gpc>0) 
1620 
b = a + gpa*(ac)/(gpcgpa); 
1621 
else

1622 
b = 0.5*(a+c); 
1623 

1624 
/* safeguard if interpolation close to machine accuracy causes errors:

1625 
* never go outside the interval

1626 
*/

1627 
if(b<=a  b>=c)

1628 
b = 0.5*(a+c); 
1629 

1630 
/* Take a trial step */

1631 
for (i=0; i<n; i++) 
1632 
xb[i] = lastx[i] + b*s[i]; 
1633 

1634 
neval++; 
1635 
/* Calculate energy for the trial step */

1636 
ems.s.x = (rvec *)xb; 
1637 
ems.f = (rvec *)fb; 
1638 
evaluate_energy(fplog,bVerbose,cr, 
1639 
state,top_global,&ems,&buf,top, 
1640 
inputrec,nrnb,wcycle, 
1641 
vsite,constr,fcd,graph,mdatoms,fr, 
1642 
mu_tot,enerd,vir,pres,step,FALSE); 
1643 
EpotB = ems.epot; 
1644 

1645 
fnorm = ems.fnorm; 
1646 

1647 
for(gpb=0,i=0; i<n; i++) 
1648 
gpb = s[i]*fb[i]; /* f is negative gradient, thus the sign */

1649 

1650 
/* Sum the gradient along the line across CPUs */

1651 
if (PAR(cr))

1652 
gmx_sumd(1,&gpb,cr);

1653 

1654 
/* Keep one of the intervals based on the value of the derivative at the new point */

1655 
if(gpb>0) { 
1656 
/* Replace c endpoint with b */

1657 
EpotC = EpotB; 
1658 
c = b; 
1659 
gpc = gpb; 
1660 
/* swap coord pointers b/c */

1661 
xtmp = xb; 
1662 
ftmp = fb; 
1663 
xb = xc; 
1664 
fb = fc; 
1665 
xc = xtmp; 
1666 
fc = ftmp; 
1667 
} else {

1668 
/* Replace a endpoint with b */

1669 
EpotA = EpotB; 
1670 
a = b; 
1671 
gpa = gpb; 
1672 
/* swap coord pointers a/b */

1673 
xtmp = xb; 
1674 
ftmp = fb; 
1675 
xb = xa; 
1676 
fb = fa; 
1677 
xa = xtmp; 
1678 
fa = ftmp; 
1679 
} 
1680 

1681 
/*

1682 
* Stop search as soon as we find a value smaller than the endpoints,

1683 
* or if the tolerance is below machine precision.

1684 
* Never run more than 20 steps, no matter what.

1685 
*/

1686 
nminstep++; 
1687 
} while((EpotB>EpotA  EpotB>EpotC) && (nminstep<20)); 
1688  
1689 
if(fabs(EpotBEpot0)<GMX_REAL_EPS  nminstep>=20) { 
1690 
/* OK. We couldn't find a significantly lower energy.

1691 
* If ncorr==0 this was steepest descent, and then we give up.

1692 
* If not, reset memory to restart as steepest descent before quitting.

1693 
*/

1694 
if(ncorr==0) { 
1695 
/* Converged */

1696 
converged=TRUE; 
1697 
break;

1698 
} else {

1699 
/* Reset memory */

1700 
ncorr=0;

1701 
/* Search in gradient direction */

1702 
for(i=0;i<n;i++) 
1703 
dx[point][i]=ff[i]; 
1704 
/* Reset stepsize */

1705 
stepsize = 1.0/fnorm; 
1706 
continue;

1707 
} 
1708 
} 
1709 

1710 
/* Select min energy state of A & C, put the best in xx/ff/Epot

1711 
*/

1712 
if(EpotC<EpotA) {

1713 
Epot = EpotC; 
1714 
/* Use state C */

1715 
for(i=0;i<n;i++) { 
1716 
xx[i]=xc[i]; 
1717 
ff[i]=fc[i]; 
1718 
} 
1719 
stepsize=c; 
1720 
} else {

1721 
Epot = EpotA; 
1722 
/* Use state A */

1723 
for(i=0;i<n;i++) { 
1724 
xx[i]=xa[i]; 
1725 
ff[i]=fa[i]; 
1726 
} 
1727 
stepsize=a; 
1728 
} 
1729 

1730 
} else {

1731 
/* found lower */

1732 
Epot = EpotC; 
1733 
/* Use state C */

1734 
for(i=0;i<n;i++) { 
1735 
xx[i]=xc[i]; 
1736 
ff[i]=fc[i]; 
1737 
} 
1738 
stepsize=c; 
1739 
} 
1740  
1741 
/* Update the memory information, and calculate a new

1742 
* approximation of the inverse hessian

1743 
*/

1744 

1745 
/* Have new data in Epot, xx, ff */

1746 
if(ncorr<nmaxcorr)

1747 
ncorr++; 
1748  
1749 
for(i=0;i<n;i++) { 
1750 
dg[point][i]=lastf[i]ff[i]; 
1751 
dx[point][i]*=stepsize; 
1752 
} 
1753 

1754 
dgdg=0;

1755 
dgdx=0;

1756 
for(i=0;i<n;i++) { 
1757 
dgdg+=dg[point][i]*dg[point][i]; 
1758 
dgdx+=dg[point][i]*dx[point][i]; 
1759 
} 
1760 

1761 
diag=dgdx/dgdg; 
1762 

1763 
rho[point]=1.0/dgdx; 
1764 
point++; 
1765 

1766 
if(point>=nmaxcorr)

1767 
point=0;

1768 

1769 
/* Update */

1770 
for(i=0;i<n;i++) 
1771 
p[i]=ff[i]; 
1772 

1773 
cp=point; 
1774 

1775 
/* Recursive update. First go back over the memory points */

1776 
for(k=0;k<ncorr;k++) { 
1777 
cp; 
1778 
if(cp<0) 
1779 
cp=ncorr1;

1780 

1781 
sq=0;

1782 
for(i=0;i<n;i++) 
1783 
sq+=dx[cp][i]*p[i]; 
1784 

1785 
alpha[cp]=rho[cp]*sq; 
1786 

1787 
for(i=0;i<n;i++) 
1788 
p[i] = alpha[cp]*dg[cp][i]; 
1789 
} 
1790 

1791 
for(i=0;i<n;i++) 
1792 
p[i] *= diag; 
1793 

1794 
/* And then go forward again */

1795 
for(k=0;k<ncorr;k++) { 
1796 
yr = 0;

1797 
for(i=0;i<n;i++) 
1798 
yr += p[i]*dg[cp][i]; 
1799 

1800 
beta = rho[cp]*yr; 
1801 
beta = alpha[cp]beta; 
1802 

1803 
for(i=0;i<n;i++) 
1804 
p[i] += beta*dx[cp][i]; 
1805 

1806 
cp++; 
1807 
if(cp>=ncorr)

1808 
cp=0;

1809 
} 
1810 

1811 
for(i=0;i<n;i++) 
1812 
if(!frozen[i])

1813 
dx[point][i] = p[i]; 
1814 
else

1815 
dx[point][i] = 0;

1816  
1817 
stepsize=1.0; 
1818 

1819 
/* Test whether the convergence criterion is met */

1820 
get_f_norm_max(cr,&(inputrec>opts),mdatoms,f,&fnorm,&fmax,&nfmax); 
1821 

1822 
/* Print it if necessary */

1823 
if (MASTER(cr)) {

1824 
if(bVerbose)

1825 
fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",

1826 
step,Epot,fnorm/sqrt(state>natoms),fmax,nfmax+1);

1827 
/* Store the new (lower) energies */

1828 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,step,(real)step,

1829 
enerd,state,state>box, 
1830 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
1831 
do_log = do_per_step(step,inputrec>nstlog); 
1832 
do_ene = do_per_step(step,inputrec>nstenergy); 
1833 
if(do_log)

1834 
print_ebin_header(fplog,step,step,state>lambda); 
1835 
print_ebin(fp_ene,do_ene,FALSE,FALSE, 
1836 
do_log ? fplog : NULL,step,step,step,eprNORMAL,

1837 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1838 
} 
1839 

1840 
/* Stop when the maximum force lies below tolerance.

1841 
* If we have reached machine precision, converged is already set to true.

1842 
*/

1843 

1844 
converged = converged  (fmax < inputrec>em_tol); 
1845 

1846 
} /* End of the loop */

1847 

1848 
if(converged)

1849 
step; /* we never took that last step in this case */

1850 

1851 
if(fmax>inputrec>em_tol) {

1852 
if (MASTER(cr)) {

1853 
warn_step(stderr,inputrec>em_tol,FALSE); 
1854 
warn_step(fplog,inputrec>em_tol,FALSE); 
1855 
} 
1856 
converged = FALSE; 
1857 
} 
1858 

1859 
/* If we printed energy and/or logfile last step (which was the last step)

1860 
* we don't have to do it again, but otherwise print the final values.

1861 
*/

1862 
if(!do_log) /* Write final value to log since we didn't do anythin last step */ 
1863 
print_ebin_header(fplog,step,step,state>lambda); 
1864 
if(!do_ene  !do_log) /* Write final energy file entries */ 
1865 
print_ebin(fp_ene,!do_ene,FALSE,FALSE, 
1866 
!do_log ? fplog : NULL,step,step,step,eprNORMAL,

1867 
TRUE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
1868 

1869 
/* Print some stuff... */

1870 
if (MASTER(cr))

1871 
fprintf(stderr,"\nwriting lowest energy coordinates.\n");

1872 

1873 
/* IMPORTANT!

1874 
* For accurate normal mode calculation it is imperative that we

1875 
* store the last conformation into the full precision binary trajectory.

1876 
*

1877 
* However, we should only do it if we did NOT already write this step

1878 
* above (which we did if do_x or do_f was true).

1879 
*/

1880 
do_x = !do_per_step(step,inputrec>nstxout); 
1881 
do_f = !do_per_step(step,inputrec>nstfout); 
1882 
write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE, 
1883 
top_global,inputrec>eI,inputrec>simulation_part,step,(real)step,state,state,f,f); 
1884 

1885 
if (MASTER(cr)) {

1886 
write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm), 
1887 
*top_global>name,top_global, 
1888 
state>x,NULL,inputrec>ePBC,state>box);

1889 
} 
1890 

1891 
if (MASTER(cr)) {

1892 
print_converged(stderr,LBFGS,inputrec>em_tol,step,converged, 
1893 
number_steps,Epot,fmax,nfmax,fnorm/sqrt(state>natoms)); 
1894 
print_converged(fplog,LBFGS,inputrec>em_tol,step,converged, 
1895 
number_steps,Epot,fmax,nfmax,fnorm/sqrt(state>natoms)); 
1896 

1897 
fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);

1898 
} 
1899 

1900 
finish_em(fplog,cr,fp_trn,fp_ene); 
1901  
1902 
/* To print the actual number of steps we needed somewhere */

1903 
inputrec>nsteps=step; 
1904  
1905 
return start_t;

1906 
} /* That's all folks */

1907  
1908  
1909 
time_t do_steep(FILE *fplog,t_commrec *cr, 
1910 
int nfile,t_filenm fnm[],

1911 
bool bVerbose,bool bCompact, 
1912 
gmx_vsite_t *vsite,gmx_constr_t constr, 
1913 
int stepout,

1914 
t_inputrec *inputrec, 
1915 
gmx_mtop_t *top_global,t_fcdata *fcd, 
1916 
t_state *state_global,rvec f[], 
1917 
rvec buf[],t_mdatoms *mdatoms, 
1918 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
1919 
gmx_edsam_t ed, 
1920 
t_forcerec *fr, 
1921 
int repl_ex_nst,int repl_ex_seed, 
1922 
real cpt_period,real max_hours, 
1923 
unsigned long Flags) 
1924 
{ 
1925 
const char *SD="Steepest Descents"; 
1926 
em_state_t *s_min,*s_try; 
1927 
rvec *f_global; 
1928 
gmx_localtop_t *top; 
1929 
gmx_enerdata_t *enerd; 
1930 
t_graph *graph; 
1931 
real stepsize,constepsize; 
1932 
real ustep,dvdlambda,fnormn; 
1933 
int fp_trn,fp_ene;

1934 
t_mdebin *mdebin; 
1935 
bool bDone,bAbort,do_x,do_f;

1936 
time_t start_t; 
1937 
tensor vir,pres; 
1938 
rvec mu_tot; 
1939 
int nsteps;

1940 
int count=0; 
1941 
int steps_accepted=0; 
1942 
/* not used */

1943 
real terminate=0;

1944  
1945 
s_min = init_em_state(); 
1946 
s_try = init_em_state(); 
1947  
1948 
/* Init em and store the local state in s_try */

1949 
init_em(fplog,SD,cr,inputrec, 
1950 
state_global,top_global,s_try,&top,f,&buf,&f_global, 
1951 
nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr, 
1952 
nfile,fnm,&fp_trn,&fp_ene,&mdebin); 
1953  
1954 
/* Print to log file */

1955 
start_t=print_date_and_time(fplog,cr>nodeid,"Started Steepest Descents");

1956 
wallcycle_start(wcycle,ewcRUN); 
1957 

1958 
/* Set variables for stepsize (in nm). This is the largest

1959 
* step that we are going to make in any direction.

1960 
*/

1961 
ustep = inputrec>em_stepsize; 
1962 
stepsize = 0;

1963 

1964 
/* Max number of steps */

1965 
nsteps = inputrec>nsteps; 
1966 

1967 
if (MASTER(cr))

1968 
/* Print to the screen */

1969 
sp_header(stderr,SD,inputrec>em_tol,nsteps); 
1970 
if (fplog)

1971 
sp_header(fplog,SD,inputrec>em_tol,nsteps); 
1972 

1973 
/**** HERE STARTS THE LOOP ****

1974 
* count is the counter for the number of steps

1975 
* bDone will be TRUE when the minimization has converged

1976 
* bAbort will be TRUE when nsteps steps have been performed or when

1977 
* the stepsize becomes smaller than is reasonable for machine precision

1978 
*/

1979 
count = 0;

1980 
bDone = FALSE; 
1981 
bAbort = FALSE; 
1982 
while( !bDone && !bAbort ) {

1983 
bAbort = (nsteps > 0) && (count==nsteps);

1984 

1985 
/* set new coordinates, except for first step */

1986 
if (count > 0) { 
1987 
do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min>f,s_try, 
1988 
constr,top,nrnb,wcycle,count); 
1989 
} 
1990 

1991 
evaluate_energy(fplog,bVerbose,cr, 
1992 
state_global,top_global,s_try,&buf,top, 
1993 
inputrec,nrnb,wcycle, 
1994 
vsite,constr,fcd,graph,mdatoms,fr, 
1995 
mu_tot,enerd,vir,pres,count,count==0);

1996 

1997 
if (MASTER(cr))

1998 
print_ebin_header(fplog,count,count,s_try>s.lambda); 
1999  
2000 
if (count == 0) 
2001 
s_min>epot = s_try>epot + 1;

2002 

2003 
/* Print it if necessary */

2004 
if (MASTER(cr)) {

2005 
if (bVerbose) {

2006 
fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",

2007 
count,ustep,s_try>epot,s_try>fmax,s_try>a_fmax+1,

2008 
(s_try>epot < s_min>epot) ? '\n' : '\r'); 
2009 
} 
2010 

2011 
if (s_try>epot < s_min>epot) {

2012 
/* Store the new (lower) energies */

2013 
upd_mdebin(mdebin,NULL,TRUE,mdatoms>tmass,count,(real)count,

2014 
enerd,&s_try>s,s_try>s.box, 
2015 
NULL,NULL,vir,pres,NULL,mu_tot,constr); 
2016 
print_ebin(fp_ene,TRUE, 
2017 
do_per_step(steps_accepted,inputrec>nstdisreout), 
2018 
do_per_step(steps_accepted,inputrec>nstorireout), 
2019 
fplog,count,count,count,eprNORMAL,TRUE, 
2020 
mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
2021 
fflush(fplog); 
2022 
} 
2023 
} 
2024 

2025 
/* Now if the new energy is smaller than the previous...

2026 
* or if this is the first step!

2027 
* or if we did random steps!

2028 
*/

2029 

2030 
if ( (count==0)  (s_try>epot < s_min>epot) ) { 
2031 
steps_accepted++; 
2032  
2033 
/* Test whether the convergence criterion is met... */

2034 
bDone = (s_try>fmax < inputrec>em_tol); 
2035 

2036 
/* Copy the arrays for force, positions and energy */

2037 
/* The 'Min' array always holds the coords and forces of the minimal

2038 
sampled energy */

2039 
swap_em_state(s_min,s_try); 
2040 
if (count > 0) 
2041 
ustep *= 1.2; 
2042  
2043 
/* Write to trn, if necessary */

2044 
do_x = do_per_step(steps_accepted,inputrec>nstxout); 
2045 
do_f = do_per_step(steps_accepted,inputrec>nstfout); 
2046 
write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE, 
2047 
top_global,inputrec>eI,inputrec>simulation_part,count,(real)count, 
2048 
&s_min>s,state_global,s_min>f,f_global); 
2049 
} 
2050 
else {

2051 
/* If energy is not smaller make the step smaller... */

2052 
ustep *= 0.5; 
2053  
2054 
if (DOMAINDECOMP(cr) && s_min>s.ddp_count != cr>dd>ddp_count) {

2055 
/* Reload the old state */

2056 
em_dd_partition_system(fplog,count,cr,top_global,inputrec, 
2057 
s_min,&buf,top,mdatoms,fr,vsite,constr, 
2058 
nrnb,wcycle); 
2059 
} 
2060 
} 
2061 

2062 
/* Determine new step */

2063 
stepsize = ustep/s_min>fmax; 
2064 

2065 
/* Check if stepsize is too small, with 1 nm as a characteristic length */

2066 
#ifdef GMX_DOUBLE

2067 
if (ustep < 1e12) 
2068 
#else

2069 
if (ustep < 1e6) 
2070 
#endif

2071 
{ 
2072 
if (MASTER(cr)) {

2073 
warn_step(stderr,inputrec>em_tol,constr!=NULL);

2074 
warn_step(fplog,inputrec>em_tol,constr!=NULL);

2075 
} 
2076 
bAbort=TRUE; 
2077 
} 
2078 

2079 
count++; 
2080 
} /* End of the loop */

2081 

2082 
/* In parallel write_traj copies back the coordinates,

2083 
* otherwise we have to do it explicitly.

2084 
*/

2085 
if (!PAR(cr)) {

2086 
copy_em_coords_back(s_min,state_global); 
2087 
} 
2088  
2089 
/* Print some shit... */

2090 
if (MASTER(cr))

2091 
fprintf(stderr,"\nwriting lowest energy coordinates.\n");

2092 
write_traj(fplog,cr,fp_trn,TRUE,FALSE,inputrec>nstfout,0,FALSE,0,NULL,FALSE, 
2093 
top_global,inputrec>eI,inputrec>simulation_part,count,(real)count, 
2094 
&s_min>s,state_global,s_min>f,f_global); 
2095  
2096 
fnormn = s_min>fnorm/sqrt(state_global>natoms); 
2097  
2098 
if (MASTER(cr)) {

2099 
write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm), 
2100 
*top_global>name,top_global, 
2101 
state_global>x,NULL,inputrec>ePBC,state_global>box);

2102 

2103 
print_converged(stderr,SD,inputrec>em_tol,count,bDone,nsteps, 
2104 
s_min>epot,s_min>fmax,s_min>a_fmax,fnormn); 
2105 
print_converged(fplog,SD,inputrec>em_tol,count,bDone,nsteps, 
2106 
s_min>epot,s_min>fmax,s_min>a_fmax,fnormn); 
2107 
} 
2108  
2109 
finish_em(fplog,cr,fp_trn,fp_ene); 
2110 

2111 
/* To print the actual number of steps we needed somewhere */

2112 
inputrec>nsteps=count; 
2113 

2114 
return start_t;

2115 
} /* That's all folks */

2116  
2117  
2118 
time_t do_nm(FILE *fplog,t_commrec *cr, 
2119 
int nfile,t_filenm fnm[],

2120 
bool bVerbose,bool bCompact, 
2121 
gmx_vsite_t *vsite,gmx_constr_t constr, 
2122 
int stepout,

2123 
t_inputrec *inputrec, 
2124 
gmx_mtop_t *top_global,t_fcdata *fcd, 
2125 
t_state *state_global,rvec f[], 
2126 
rvec buf[],t_mdatoms *mdatoms, 
2127 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
2128 
gmx_edsam_t ed, 
2129 
t_forcerec *fr, 
2130 
int repl_ex_nst,int repl_ex_seed, 
2131 
real cpt_period,real max_hours, 
2132 
unsigned long Flags) 
2133 
{ 
2134 
t_mdebin *mdebin; 
2135 
const char *NM = "Normal Mode Analysis"; 
2136 
int fp_ene,step,i;

2137 
time_t start_t; 
2138 
rvec *f_global; 
2139 
gmx_localtop_t *top; 
2140 
gmx_enerdata_t *enerd; 
2141 
t_graph *graph; 
2142 
real t,lambda,t0,lam0; 
2143 
bool bNS;

2144 
tensor vir,pres; 
2145 
int nfmax,count;

2146 
rvec mu_tot; 
2147 
rvec *fneg,*fpos; 
2148 
bool bSparse; /* use sparse matrix storage format */ 
2149 
size_t sz; 
2150 
gmx_sparsematrix_t * sparse_matrix = NULL;

2151 
real * full_matrix = NULL;

2152 
em_state_t * state_work; 
2153 

2154 
/* added with respect to mdrun */

2155 
int idum,jdum,kdum,row,col;

2156 
real der_range=10.0*sqrt(GMX_REAL_EPS); 
2157 
real fnorm,fmax; 
2158 
real dfdx; 
2159 

2160 
state_work = init_em_state(); 
2161 

2162 
/* Init em and store the local state in state_minimum */

2163 
init_em(fplog,NM,cr,inputrec, 
2164 
state_global,top_global,state_work,&top, 
2165 
f,&buf,&f_global, 
2166 
nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr, 
2167 
nfile,fnm,NULL,&fp_ene,&mdebin);

2168 

2169 
snew(fneg,top_global>natoms); 
2170 
snew(fpos,top_global>natoms); 
2171 

2172 
#ifndef GMX_DOUBLE

2173 
fprintf(stderr, 
2174 
"NOTE: This version of Gromacs has been compiled in single precision,\n"

2175 
" which MIGHT not be accurate enough for normal mode analysis.\n"

2176 
" Gromacs now uses sparse matrix storage, so the memory requirements\n"

2177 
" are fairly modest even if you recompile in double precision.\n\n");

2178 
#endif

2179 

2180 
/* Check if we can/should use sparse storage format.

2181 
*

2182 
* Sparse format is only useful when the Hessian itself is sparse, which it

2183 
* will be when we use a cutoff.

2184 
* For small systems (n<1000) it is easier to always use full matrix format, though.

2185 
*/

2186 
if(EEL_FULL(fr>eeltype)  fr>rlist==0.0) 
2187 
{ 
2188 
fprintf(stderr,"Noncutoff electrostatics used, forcing full Hessian format.\n");

2189 
bSparse = FALSE; 
2190 
} 
2191 
else if(top_global>natoms < 1000) 
2192 
{ 
2193 
fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global>natoms);

2194 
bSparse = FALSE; 
2195 
} 
2196 
else

2197 
{ 
2198 
fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");

2199 
bSparse = TRUE; 
2200 
} 
2201 

2202 
sz = DIM*top_global>natoms; 
2203 

2204 
fprintf(stderr,"Allocating Hessian memory...\n\n");

2205  
2206 
if(bSparse)

2207 
{ 
2208 
sparse_matrix=gmx_sparsematrix_init(sz); 
2209 
sparse_matrix>compressed_symmetric = TRUE; 
2210 
} 
2211 
else

2212 
{ 
2213 
snew(full_matrix,sz*sz); 
2214 
} 
2215 

2216 
/* Initial values */

2217 
t0 = inputrec>init_t; 
2218 
lam0 = inputrec>init_lambda; 
2219 
t = t0; 
2220 
lambda = lam0; 
2221 

2222 
init_nrnb(nrnb); 
2223 

2224 
where(); 
2225 

2226 
/* Write start time and temperature */

2227 
start_t=print_date_and_time(fplog,cr>nodeid,"Started nmrun");

2228 
wallcycle_start(wcycle,ewcRUN); 
2229 
if (MASTER(cr))

2230 
{ 
2231 
fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",*(top_global>name),

2232 
top_global>natoms); 
2233 
} 
2234 

2235 
count = 0;

2236 
evaluate_energy(fplog,bVerbose,cr, 
2237 
state_global,top_global,state_work,&buf,top, 
2238 
inputrec,nrnb,wcycle, 
2239 
vsite,constr,fcd,graph,mdatoms,fr, 
2240 
mu_tot,enerd,vir,pres,count,count==0);

2241 
count++; 
2242  
2243 
/* if forces are not small, warn user */

2244 
get_state_f_norm_max(cr,&(inputrec>opts),mdatoms,state_work); 
2245  
2246 
fprintf(stderr,"Maximum force:%12.5e\n",state_work>fmax);

2247 
if (state_work>fmax > 1.0e3) 
2248 
{ 
2249 
fprintf(stderr,"Maximum force probably not small enough to");

2250 
fprintf(stderr," ensure that you are in an \nenergy well. ");

2251 
fprintf(stderr,"Be aware that negative eigenvalues may occur");

2252 
fprintf(stderr," when the\nresulting matrix is diagonalized.\n");

2253 
} 
2254 

2255 
/***********************************************************

2256 
*

2257 
* Loop over all pairs in matrix

2258 
*

2259 
* do_force called twice. Once with positive and

2260 
* once with negative displacement

2261 
*

2262 
************************************************************/

2263  
2264 
/* fudge nr of steps to nr of atoms */

2265 

2266 
inputrec>nsteps = top_global>natoms; 
2267  
2268 
for (step=0; (step<inputrec>nsteps); step++) 
2269 
{ 
2270 

2271 
for (idum=0; (idum<DIM); idum++) 
2272 
{ 
2273 
row = DIM*step+idum; 
2274 

2275 
state_work>s.x[step][idum] = der_range; 
2276 

2277 
evaluate_energy(fplog,bVerbose,cr, 
2278 
state_global,top_global,state_work,&buf,top, 
2279 
inputrec,nrnb,wcycle, 
2280 
vsite,constr,fcd,graph,mdatoms,fr, 
2281 
mu_tot,enerd,vir,pres,count,count==0);

2282 
count++; 
2283 

2284 
for ( i=0 ; i < top_global>natoms ; i++ ) 
2285 
{ 
2286 
copy_rvec ( state_work>f[i] , fneg[i] ); 
2287 
} 
2288 

2289 
state_work>s.x[step][idum] += 2*der_range;

2290 

2291 
evaluate_energy(fplog,bVerbose,cr, 
2292 
state_global,top_global,state_work,&buf,top, 
2293 
inputrec,nrnb,wcycle, 
2294 
vsite,constr,fcd,graph,mdatoms,fr, 
2295 
mu_tot,enerd,vir,pres,count,count==0);

2296 
count++; 
2297 

2298 
for ( i=0 ; i < top_global>natoms ; i++ ) 
2299 
{ 
2300 
copy_rvec ( state_work>f[i] , fpos[i] ); 
2301 
} 
2302 

2303 
for (jdum=0; (jdum<top_global>natoms); jdum++) 
2304 
{ 
2305 
for (kdum=0; (kdum<DIM); kdum++) 
2306 
{ 
2307 
dfdx=(fpos[jdum][kdum]fneg[jdum][kdum])/(2*der_range);

2308 
col = DIM*jdum+kdum; 
2309 

2310 
if(bSparse)

2311 
{ 
2312 
if(col>=row && dfdx!=0.0) 
2313 
gmx_sparsematrix_increment_value(sparse_matrix,row,col,dfdx); 
2314 
} 
2315 
else

2316 
{ 
2317 
full_matrix[row*sz+col] = dfdx; 
2318 
} 
2319 
} 
2320 
} 
2321 

2322 
/* x is restored to original */

2323 
state_work>s.x[step][idum] = der_range; 
2324 

2325 
if (bVerbose && fplog)

2326 
fflush(fplog); 
2327 
} 
2328 
/* write progress */

2329 
if (MASTER(cr) && bVerbose)

2330 
{ 
2331 
fprintf(stderr,"\rFinished step %d out of %d",

2332 
step+1,top_global>natoms);

2333 
fflush(stderr); 
2334 
} 
2335 
} 
2336 
t=t0+step*inputrec>delta_t; 
2337 
lambda=lam0+step*inputrec>delta_lambda; 
2338 

2339 
if (MASTER(cr))

2340 
{ 
2341 
print_ebin(1,FALSE,FALSE,FALSE,fplog,step,step,t,eprAVER,

2342 
FALSE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
2343 
print_ebin(1,FALSE,FALSE,FALSE,fplog,step,step,t,eprRMS,

2344 
FALSE,mdebin,fcd,&(top_global>groups),&(inputrec>opts)); 
2345 
} 
2346 

2347 
fprintf(stderr,"\n\nWriting Hessian...\n");

2348 
gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix); 
2349 

2350 

2351 
return start_t;

2352 
} 
2353  
2354  
2355 
static void global_max(t_commrec *cr,int *n) 
2356 
{ 
2357 
int *sum,i;

2358  
2359 
snew(sum,cr>nnodes); 
2360 
sum[cr>nodeid] = *n; 
2361 
gmx_sumi(cr>nnodes,sum,cr); 
2362 
for(i=0; i<cr>nnodes; i++) 
2363 
*n = max(*n,sum[i]); 
2364 

2365 
sfree(sum); 
2366 
} 
2367  
2368 
static void realloc_bins(double **bin,int *nbin,int nbin_new) 
2369 
{ 
2370 
int i;

2371  
2372 
if (nbin_new != *nbin) {

2373 
srenew(*bin,nbin_new); 
2374 
for(i=*nbin; i<nbin_new; i++)

2375 
(*bin)[i] = 0;

2376 
*nbin = nbin_new; 
2377 
} 
2378 
} 
2379  
2380 
time_t do_tpi(FILE *fplog,t_commrec *cr, 
2381 
int nfile,t_filenm fnm[],

2382 
bool bVerbose,bool bCompact, 
2383 
gmx_vsite_t *vsite,gmx_constr_t constr, 
2384 
int stepout,

2385 
t_inputrec *inputrec, 
2386 
gmx_mtop_t *top_global,t_fcdata *fcd, 
2387 
t_state *state,rvec f[], 
2388 
rvec buf[],t_mdatoms *mdatoms, 
2389 
t_nrnb *nrnb,gmx_wallcycle_t wcycle, 
2390 
gmx_edsam_t ed, 
2391 
t_forcerec *fr, 
2392 
int repl_ex_nst,int repl_ex_seed, 
2393 
real cpt_period,real max_hours, 
2394 
unsigned long Flags) 
2395 
{ 
2396 
const char *TPI="Test Particle Insertion"; 
2397 
gmx_localtop_t *top; 
2398 
gmx_groups_t *groups; 
2399 
gmx_enerdata_t *enerd; 
2400 
real lambda,t,temp,beta,drmax,epot; 
2401 
double embU,sum_embU,*sum_UgembU,V,V_all,VembU_all;

2402 
int status;

2403 
t_trxframe rerun_fr; 
2404 
bool bDispCorr,bCharge,bRFExcl,bNotLastFrame,bStateChanged,bNS,bOurStep;

2405 
time_t start_t; 
2406 
tensor force_vir,shake_vir,vir,pres; 
2407 
int cg_tp,a_tp0,a_tp1,ngid,gid_tp,nener,e;

2408 
rvec *x_mol; 
2409 
rvec mu_tot,x_init,dx,x_tp; 
2410 
int nnodes,frame,nsteps,step;

2411 
int i,start,end;

2412 
static gmx_rng_t tpi_rand;

2413 
FILE *fp_tpi=NULL;

2414 
char *ptr,*dump_pdb,**leg,str[STRLEN],str2[STRLEN];

2415 
double dbl,dump_ener;

2416 
bool bCavity;

2417 
int nat_cavity=0,d; 
2418 
real *mass_cavity=NULL,mass_tot;

2419 
int nbin;

2420 
double invbinw,*bin,refvolshift,logV,bUlogV;

2421 
char *tpid_leg[2]={"direct","reweighted"}; 
2422  
2423 
/* Since numerical problems can lead to extreme negative energies

2424 
* when atoms overlap, we need to set a lower limit for beta*U.

2425 
*/

2426 
real bU_neg_limit = 50;

2427  
2428 
/* Since there is no upper limit to the insertion energies,

2429 
* we need to set an upper limit for the distribution output.

2430 
*/

2431 
real bU_bin_limit = 50;

2432 
real bU_logV_bin_limit = bU_bin_limit + 10;

2433  
2434 
nnodes = cr>nnodes; 
2435  
2436 
top = gmx_mtop_generate_local_top(top_global,inputrec); 
2437  
2438 
groups = &top_global>groups; 
2439  
2440 
bCavity = (inputrec>eI == eiTPIC); 
2441 
if (bCavity) {

2442 
ptr = getenv("GMX_TPIC_MASSES");

2443 
if (ptr == NULL) { 
2444 
nat_cavity = 1;

2445 
} else {

2446 
/* Read (multiple) masses from env var GMX_TPIC_MASSES,

2447 
* The center of mass of the last atoms is then used for TPIC.

2448 
*/

2449 
nat_cavity = 0;

2450 
while (sscanf(ptr,"%lf%n",&dbl,&i) > 0) { 
2451 
srenew(mass_cavity,nat_cavity+1);

2452 
mass_cavity[nat_cavity] = dbl; 
2453 
fprintf(fplog,"mass[%d] = %f\n",

2454 
nat_cavity+1,mass_cavity[nat_cavity]);

2455 
nat_cavity++; 
2456 
ptr += i; 
2457 
} 
2458 
if (nat_cavity == 0) 
2459 
gmx_fatal(FARGS,"Found %d masses in GMX_TPIC_MASSES",nat_cavity);

2460 
} 
2461 
} 
2462  
2463 
/*

2464 
init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,

2465 
state>box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/

2466 
/* We never need full pbc for TPI */

2467 
fr>ePBC = epbcXYZ; 
2468 
/* Determine the temperature for the Boltzmann weighting */

2469 
temp = inputrec>opts.ref_t[0];

2470 
if (fplog) {

2471 
for(i=1; (i<inputrec>opts.ngtc); i++) { 
2472 
if (inputrec>opts.ref_t[i] != temp) {

2473 
fprintf(fplog,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");

2474 
fprintf(stderr,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");

2475 
} 
2476 
} 
2477 
fprintf(fplog, 
2478 
"\n The temperature for test particle insertion is %.3f K\n\n",

2479 
temp); 
2480 
} 
2481 
beta = 1.0/(BOLTZ*temp); 
2482  
2483 
/* Number of insertions per frame */

2484 
nsteps = inputrec>nsteps; 
2485  
2486 
/* Use the same neighborlist with more insertions points

2487 
* in a sphere of radius drmax around the initial point

2488 
*/

2489 
/* This should be a proper mdp parameter */

2490 
drmax = inputrec>rtpi; 
2491  
2492 
/* An environment variable can be set to dump all configurations

2493 
* to pdb with an insertion energy <= this value.

2494 
*/

2495 
dump_pdb = getenv("GMX_TPI_DUMP");

2496 
dump_ener = 0;

2497 
if (dump_pdb)

2498 
sscanf(dump_pdb,"%lf",&dump_ener);

2499  
2500 
atoms2md(top_global,inputrec,0,NULL,0,top_global>natoms,mdatoms); 
2501 
update_mdatoms(mdatoms,inputrec>init_lambda); 
2502  
2503 
snew(enerd,1);

2504 
init_enerdata(fplog,groups>grps[egcENER].nr,enerd); 
2505  
2506 
/* Print to log file */

2507 
start_t=print_date_and_time(fplog,cr>nodeid, 
2508 
"Started Test Particle Insertion");

2509 
wallcycle_start(wcycle,ewcRUN); 
2510  
2511 
/* The last charge group is the group to be inserted */

2512 
cg_tp = top>cgs.nr  1;

2513 
a_tp0 = top>cgs.index[cg_tp]; 
2514 
a_tp1 = top>cgs.index[cg_tp+1];

2515 
if (debug)

2516 
fprintf(debug,"TPI cg %d, atoms %d%d\n",cg_tp,a_tp0,a_tp1);

2517 
if (a_tp1  a_tp0 > 1 && 
2518 
(inputrec>rlist < inputrec>rcoulomb  
2519 
inputrec>rlist < inputrec>rvdw)) 
2520 
gmx_fatal(FARGS,"Can not do TPI for multiatom molecule with a twinrange cutoff");

2521 
snew(x_mol,a_tp1a_tp0); 
2522  
2523 
bDispCorr = (inputrec>eDispCorr != edispcNO); 
2524 
bCharge = FALSE; 
2525 
for(i=a_tp0; i<a_tp1; i++) {

2526 
/* Copy the coordinates of the molecule to be insterted */

2527 
copy_rvec(state>x[i],x_mol[ia_tp0]); 
2528 
/* Check if we need to print electrostatic energies */

2529 
bCharge = (mdatoms>chargeA[i] != 0 

2530 
(mdatoms>chargeB && mdatoms>chargeB[i] != 0));

2531 
} 
2532 
bRFExcl = (bCharge && EEL_RF(fr>eeltype) && fr>eeltype!=eelRF_NEC); 
2533  
2534 
calc_cgcm(fplog,cg_tp,cg_tp+1,&(top>cgs),state>x,fr>cg_cm);

2535 
if (bCavity) {

2536 
if (norm(fr>cg_cm[cg_tp]) > 0.5*inputrec>rlist && fplog) { 
2537 
fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");

2538 
fprintf(stderr,"WARNING: Your TPI molecule is not centered at 0,0,0\n");

2539 
} 
2540 
} else {

2541 
/* Center the molecule to be inserted at zero */

2542 
for(i=0; i<a_tp1a_tp0; i++) 
2543 
rvec_dec(x_mol[i],fr>cg_cm[cg_tp]); 
2544 
} 
2545  
2546 
if (fplog) {

2547 
fprintf(fplog,"\nWill insert %d atoms %s partial charges\n",

2548 
a_tp1a_tp0,bCharge ? "with" : "without"); 
2549 

2550 
fprintf(fplog,"\nWill insert %d times in each frame of %s\n",

2551 
nsteps,opt2fn("rerun",nfile,fnm));

2552 
} 
2553 

2554 
if (!bCavity) {

2555 
if (inputrec>nstlist > 1) { 
2556 
if (drmax==0 && a_tp1a_tp0==1) { 
2557 
gmx_fatal(FARGS,"Reusing the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense",inputrec>nstlist,drmax);

2558 
} 
2559 
if (fplog)

2560 
fprintf(fplog,"Will use the same neighborlist for %d insertions in a sphere of radius %f\n",inputrec>nstlist,drmax);

2561 
} 
2562 
} else {

2563 
if (fplog)

2564 
fprintf(fplog,"Will insert randomly in a sphere of radius %f around the center of the cavity\n",drmax);

2565 
} 
2566  
2567 
ngid = groups>grps[egcENER].nr; 
2568 
gid_tp = GET_CGINFO_GID(fr>cginfo[cg_tp]); 
2569 
nener = 1 + ngid;

2570 
if (bDispCorr)

2571 
nener += 1;

2572 
if (bCharge) {

2573 
nener += ngid; 
2574 
if (bRFExcl)

2575 
nener += 1;

2576 
} 
2577 
snew(sum_UgembU,nener); 
2578  
2579 
/* Initialize random generator */

2580 
tpi_rand = gmx_rng_init(inputrec>ld_seed); 
2581  
2582 
if (MASTER(cr)) {

2583 
fp_tpi = xvgropen(opt2fn("tpi",nfile,fnm),

2584 
"TPI energies","Time (ps)", 
2585 
"(kJ mol\\S1\\N) / (nm\\S3\\N)");

2586 
xvgr_subtitle(fp_tpi,"f. are averages over one frame");

2587 
snew(leg,4+nener);

2588 
e = 0;

2589 
sprintf(str,"kT log(<Ve\\S\\8b\\4U\\N>/<V>)");

2590 
leg[e++] = strdup(str); 
2591 
sprintf(str,"f. kT log<e\\S\\8b\\4U\\N>");

2592 
leg[e++] = strdup(str); 
2593 
sprintf(str,"f. <e\\S\\8b\\4U\\N>");

2594 
leg[e++] = strdup(str); 
2595 
sprintf(str,"f. V");

2596 
leg[e++] = strdup(str); 
2597 
sprintf(str,"f. <Ue\\S\\8b\\4U\\N>");

2598 
leg[e++] = strdup(str); 
2599 
for(i=0; i<ngid; i++) { 
2600 
sprintf(str,"f. <U\\sVdW %s\\Ne\\S\\8b\\4U\\N>",

2601 
*(groups>grpname[groups>grps[egcENER].nm_ind[i]])); 
2602 
leg[e++] = strdup(str); 
2603 
} 
2604 
if (bDispCorr) {

2605 
sprintf(str,"f. <U\\sdisp c\\Ne\\S\\8b\\4U\\N>");

2606 
leg[e++] = strdup(str); 
2607 
} 
2608 
if (bCharge) {

2609 
for(i=0; i<ngid; i++) { 
2610 
sprintf(str,"f. <U\\sCoul %s\\Ne\\S\\8b\\4U\\N>",

2611 
*(groups>grpname[groups>grps[egcENER].nm_ind[i]])); 
2612 
leg[e++] = strdup(str); 
2613 
} 
2614 
if (bRFExcl) {

2615 
sprintf(str,"f. <U\\sRF excl\\Ne\\S\\8b\\4U\\N>");

2616 
leg[e++] = strdup(str); 
2617 
} 
2618 
} 
2619 
xvgr_legend(fp_tpi,4+nener,leg);

2620 
for(i=0; i<4+nener; i++) 
2621 
sfree(leg[i]); 
2622 
sfree(leg); 
2623 
} 
2624 
clear_rvec(x_init); 
2625 
V_all = 0;

2626 
VembU_all = 0;

2627  
2628 
invbinw = 10;

2629 
nbin = 10;

2630 
snew(bin,nbin); 
2631  
2632 
start_time(); 
2633  
2634 
bNotLastFrame = read_first_frame(&status,opt2fn("rerun",nfile,fnm),

2635 
&rerun_fr,TRX_NEED_X); 
2636 
frame = 0;

2637  
2638 
if (rerun_fr.natoms  (bCavity ? nat_cavity : 0) != 
2639 
mdatoms>nr  (a_tp1  a_tp0)) 
2640 
gmx_fatal(FARGS,"Number of atoms in trajectory (%d)%s "

2641 
"is not equal the number in the run input file (%d) "

2642 
"minus the number of atoms to insert (%d)\n",

2643 
rerun_fr.natoms,bCavity ? " minus one" : "", 
2644 
mdatoms>nr,a_tp1a_tp0); 
2645  
2646 
refvolshift = log(det(rerun_fr.box)); 
2647 

2648 
while (bNotLastFrame) {

2649 
lambda = rerun_fr.lambda; 
2650 
t = rerun_fr.time; 
2651 

2652 
sum_embU = 0;

2653 
for(e=0; e<nener; e++) 
2654 
sum_UgembU[e] = 0;

2655  
2656 
/* Copy the coordinates from the input trajectory */

2657 
for(i=0; i<rerun_fr.natoms; i++) 
2658 
copy_rvec(rerun_fr.x[i],state>x[i]); 
2659 

2660 
V = det(rerun_fr.box); 
2661 
logV = log(V); 
2662  
2663 
bStateChanged = TRUE; 
2664 
bNS = TRUE; 
2665 
for(step=0; step<nsteps; step++) { 
2666 
/* In parallel all nodes generate all random configurations.

2667 
* In that way the result is identical to a single cpu tpi run.

2668 
*/

2669 
if (!bCavity) {

2670 
/* Random insertion in the whole volume */

2671 
bNS = (step % inputrec>nstlist == 0);

2672 
if (bNS) {

2673 
/* Generate a random position in the box */

2674 
x_init[XX] = gmx_rng_uniform_real(tpi_rand)*state>box[XX][XX]; 
2675 
x_init[YY] = gmx_rng_uniform_real(tpi_rand)*state>box[YY][YY]; 
2676 
x_init[ZZ] = gmx_rng_uniform_real(tpi_rand)*state>box[ZZ][ZZ]; 
2677 
} 
2678 
if (inputrec>nstlist == 1) { 
2679 
copy_rvec(x_init,x_tp); 
2680 
} else {

2681 
/* Generate coordinates within dx=drmax of x_init */

2682 
do {

2683 
dx[XX] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2684 
dx[YY] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2685 
dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2686 
} while (norm2(dx) > drmax*drmax);

2687 
rvec_add(x_init,dx,x_tp); 
2688 
} 
2689 
} else {

2690 
/* Random insertion around a cavity location

2691 
* given by the last coordinate of the trajectory.

2692 
*/

2693 
if (step == 0) { 
2694 
if (nat_cavity == 1) { 
2695 
/* Copy the location of the cavity */

2696 
copy_rvec(rerun_fr.x[rerun_fr.natoms1],x_init);

2697 
} else {

2698 
/* Determine the center of mass of the last molecule */

2699 
clear_rvec(x_init); 
2700 
mass_tot = 0;

2701 
for(i=0; i<nat_cavity; i++) { 
2702 
for(d=0; d<DIM; d++) 
2703 
x_init[d] += 
2704 
mass_cavity[i]*rerun_fr.x[rerun_fr.natomsnat_cavity+i][d]; 
2705 
mass_tot += mass_cavity[i]; 
2706 
} 
2707 
for(d=0; d<DIM; d++) 
2708 
x_init[d] /= mass_tot; 
2709 
} 
2710 
} 
2711 
/* Generate coordinates within dx=drmax of x_init */

2712 
do {

2713 
dx[XX] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2714 
dx[YY] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2715 
dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand)  1)*drmax; 
2716 
} while (norm2(dx) > drmax*drmax);

2717 
rvec_add(x_init,dx,x_tp); 
2718 
} 
2719  
2720 
if (a_tp1  a_tp0 == 1) { 
2721 
/* Insert a single atom, just copy the insertion location */

2722 
copy_rvec(x_tp,state>x[a_tp0]); 
2723 
} else {

2724 
/* Copy the coordinates from the top file */

2725 
for(i=a_tp0; i<a_tp1; i++)

2726 
copy_rvec(x_mol[ia_tp0],state>x[i]); 
2727 
/* Rotate the molecule randomly */

2728 
rotate_conf(a_tp1a_tp0,state>x+a_tp0,NULL,

2729 
2*M_PI*gmx_rng_uniform_real(tpi_rand),

2730 
2*M_PI*gmx_rng_uniform_real(tpi_rand),

2731 
2*M_PI*gmx_rng_uniform_real(tpi_rand));

2732 
/* Shift to the insertion location */

2733 
for(i=a_tp0; i<a_tp1; i++)

2734 
rvec_inc(state>x[i],x_tp); 
2735 
} 
2736  
2737 
/* Check if this insertion belongs to this node */

2738 
bOurStep = TRUE; 
2739 
if (PAR(cr)) {

2740 
switch (inputrec>eI) {

2741 
case eiTPI:

2742 
bOurStep = ((step / inputrec>nstlist) % nnodes == cr>nodeid); 
2743 
break;

2744 
case eiTPIC:

2745 
bOurStep = (step % nnodes == cr>nodeid); 
2746 
break;

2747 
default:

2748 
gmx_fatal(FARGS,"Unknown integrator %s",ei_names[inputrec>eI]);

2749 
} 
2750 
} 
2751 
if (bOurStep) {

2752 
/* Clear some matrix variables */

2753 
clear_mat(force_vir); 
2754 
clear_mat(shake_vir); 
2755 
clear_mat(vir); 
2756 
clear_mat(pres); 
2757 

2758 
/* Set the charge group center of mass of the test particle */

2759 
copy_rvec(x_init,fr>cg_cm[top>cgs.nr1]);

2760  
2761 
/* Calc energy (no forces) on new positions.

2762 
* Since we only need the intermolecular energy

2763 
* and the RF exclusion terms of the inserted molecule occur

2764 
* within a single charge group we can pass NULL for the graph.

2765 
* This also avoids shifts that would move charge groups

2766 
* out of the box.

2767 
*/

2768 
/* Make do_force do a single node fore calculation */

2769 
cr>nnodes = 1;

2770 
do_force(fplog,cr,inputrec, 
2771 
step,nrnb,wcycle,top,&top_global>groups, 
2772 
rerun_fr.box,state>x,&state>hist, 
2773 
f,buf,force_vir,mdatoms,enerd,fcd, 
2774 
lambda,NULL,fr,NULL,mu_tot,t,NULL,NULL, 
2775 
GMX_FORCE_NONBONDED  
2776 
(bNS ? GMX_FORCE_NS : 0) 

2777 
(bStateChanged ? GMX_FORCE_STATECHANGED : 0));

2778 
cr>nnodes = nnodes; 
2779 
bStateChanged = FALSE; 
2780 
bNS = FALSE; 
2781  
2782 
/* Calculate long range corrections to pressure and energy */

2783 
calc_dispcorr(fplog,inputrec,fr,step,mdatoms>nr,rerun_fr.box,lambda, 
2784 
pres,vir,enerd>term); 
2785 

2786 
/* If the compiler doesn't optimize this check away

2787 
* we catch the NAN energies.

2788 
* With tables extreme negative energies might occur close to r=0.

2789 
*/

2790 
epot = enerd>term[F_EPOT]; 
2791 
if (epot != epot  epot*beta < bU_neg_limit) {

2792 
if (debug)

2793 
fprintf(debug,"\n time %.3f, step %d: nonfinite energy %f, using exp(bU)=0\n",t,step,epot);

2794 
embU = 0;

2795 
} else {

2796 
embU = exp(beta*epot); 
2797 
sum_embU += embU; 
2798 
/* Determine the weighted energy contributions of each energy group */

2799 
e = 0;

2800 
sum_UgembU[e++] += epot*embU; 
2801 
if (fr>bBHAM) {

2802 
for(i=0; i<ngid; i++) 
2803 
sum_UgembU[e++] += 
2804 
(enerd>grpp.ener[egBHAMSR][GID(i,gid_tp,ngid)] + 
2805 
enerd>grpp.ener[egBHAMLR][GID(i,gid_tp,ngid)])*embU; 
2806 
} else {

2807 
for(i=0; i<ngid; i++) 
2808 
sum_UgembU[e++] += 
2809 
(enerd>grpp.ener[egLJSR][GID(i,gid_tp,ngid)] + 
2810 
enerd>grpp.ener[egLJLR][GID(i,gid_tp,ngid)])*embU; 
2811 
} 
2812 
if (bDispCorr)

2813 
sum_UgembU[e++] += enerd>term[F_DISPCORR]*embU; 
2814 
if (bCharge) {

2815 
for(i=0; i<ngid; i++) 
2816 
sum_UgembU[e++] += 
2817 
(enerd>grpp.ener[egCOULSR][GID(i,gid_tp,ngid)] + 
2818 
enerd>grpp.ener[egCOULLR][GID(i,gid_tp,ngid)])*embU; 
2819 
if (bRFExcl)

2820 
sum_UgembU[e++] += enerd>term[F_RF_EXCL]*embU; 
2821 
} 
2822 
} 
2823 

2824 
if (embU == 0  beta*epot > bU_bin_limit) { 
2825 
bin[0]++;

2826 
} else {

2827 
i = (int)((bU_logV_bin_limit

2828 
 (beta*epot  logV + refvolshift))*invbinw 
2829 
+ 0.5); 
2830 
if (i < 0) 
2831 
i = 0;

2832 
if (i >= nbin)

2833 
realloc_bins(&bin,&nbin,i+10);

2834 
bin[i]++; 
2835 
} 
2836  
2837 
if (debug)

2838 
fprintf(debug,"TPI %7d %12.5e %12.5f %12.5f %12.5f\n",

2839 
step,epot,x_tp[XX],x_tp[YY],x_tp[ZZ]); 
2840  
2841 
if (dump_pdb && epot <= dump_ener) {

2842 
sprintf(str,"t%g_step%d.pdb",t,step);

2843 
sprintf(str2,"t: %f step %d ener: %f",t,step,epot);

2844 
write_sto_conf_mtop(str,str2,top_global,state>x,state>v, 
2845 
inputrec>ePBC,state>box); 
2846 
} 
2847 
} 
2848 
} 
2849  
2850 
if (PAR(cr)) {

2851 
/* When running in parallel sum the energies over the processes */

2852 
gmx_sumd(1, &sum_embU, cr);

2853 
gmx_sumd(nener,sum_UgembU,cr); 
2854 
} 
2855  
2856 
frame++; 
2857 
V_all += V; 
2858 
VembU_all += V*sum_embU/nsteps; 
2859  
2860 
if (fp_tpi) {

2861 
if (bVerbose  frame%10==0  frame<10) 
2862 
fprintf(stderr,"mu %10.3e <mu> %10.3e\n",

2863 
log(sum_embU/nsteps)/beta,log(VembU_all/V_all)/beta); 
2864 

2865 
fprintf(fp_tpi,"%10.3f %12.5e %12.5e %12.5e %12.5e",

2866 
t, 
2867 
VembU_all==0 ? 20/beta : log(VembU_all/V_all)/beta, 
2868 
sum_embU==0 ? 20/beta : log(sum_embU/nsteps)/beta, 
2869 
sum_embU/nsteps,V); 
2870 
for(e=0; e<nener; e++) 
2871 
fprintf(fp_tpi," %12.5e",sum_UgembU[e]/nsteps);

2872 
fprintf(fp_tpi,"\n");

2873 
fflush(fp_tpi); 
2874 
} 
2875  
2876 
bNotLastFrame = read_next_frame(status,&rerun_fr); 
2877 
} /* End of the loop */

2878  
2879 
update_time(); 
2880  
2881 
close_trj(status); 
2882  
2883 
if (fp_tpi)

2884 
gmx_fio_fclose(fp_tpi); 
2885  
2886 
if (fplog) {

2887 
fprintf(fplog,"\n");

2888 
fprintf(fplog," <V> = %12.5e nm^3\n",V_all/frame);

2889 
fprintf(fplog," <mu> = %12.5e kJ/mol\n",log(VembU_all/V_all)/beta);

2890 
} 
2891 

2892 
/* Write the Boltzmann factor histogram */

2893 
if (PAR(cr)) {

2894 
/* When running in parallel sum the bins over the processes */

2895 
i = nbin; 
2896 
global_max(cr,&i); 
2897 
realloc_bins(&bin,&nbin,i); 
2898 
gmx_sumd(nbin,bin,cr); 
2899 
} 
2900 
fp_tpi = xvgropen(opt2fn("tpid",nfile,fnm),

2901 
"TPI energy distribution",

2902 
"\\8b\\4U  log(V/<V>)","count"); 
2903 
sprintf(str,"number \\8b\\4U > %g: %9.3e",bU_bin_limit,bin[0]); 
2904 
xvgr_subtitle(fp_tpi,str); 
2905 
xvgr_legend(fp_tpi,2,tpid_leg);

2906 
for(i=nbin1; i>0; i) { 
2907 
bUlogV = i/invbinw + bU_logV_bin_limit  refvolshift + log(V_all/frame); 
2908 
fprintf(fp_tpi,"%6.2f %10d %12.5e\n",

2909 
bUlogV, 
2910 
(int)(bin[i]+0.5), 
2911 
bin[i]*exp(bUlogV)*V_all/VembU_all); 
2912 
} 
2913 
gmx_fio_fclose(fp_tpi); 
2914 
sfree(bin); 
2915  
2916 
sfree(sum_UgembU); 
2917  
2918 
return start_t;

2919 
} 