Project

General

Profile

minimize.c

fixed src/mdlib/minimize.c - Berk Hess, 05/29/2009 02:40 PM

 
1
/*
2
 * $Id: minimize.c,v 1.131.2.6 2009/05/29 12:36:20 hess Exp $
3
 * 
4
 *                This source code is part of
5
 * 
6
 *                 G   R   O   M   A   C   S
7
 * 
8
 *          GROningen MAchine for Chemical Simulations
9
 * 
10
 *                        VERSION 3.2.0
11
 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12
 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13
 * Copyright (c) 2001-2004, The GROMACS development team,
14
 * check out http://www.gromacs.org for more information.
15

16
 * This program is free software; you can redistribute it and/or
17
 * modify it under the terms of the GNU General Public License
18
 * as published by the Free Software Foundation; either version 2
19
 * of the License, or (at your option) any later version.
20
 * 
21
 * If you want to redistribute modifications, please consider that
22
 * scientific software is very special. Version control is crucial -
23
 * bugs must be traceable. We will be happy to consider code for
24
 * inclusion in the official distribution, but derived work must not
25
 * be called official GROMACS. Details are found in the README & COPYING
26
 * files - if they are missing, get the official version at www.gromacs.org.
27
 * 
28
 * To help us fund GROMACS development, we humbly ask that you cite
29
 * the papers on the package - you can find them in the top README file.
30
 * 
31
 * For more info, check our website at http://www.gromacs.org
32
 * 
33
 * And Hey:
34
 * GROwing Monsters And Cloning Shrimps
35
 */
36
#ifdef HAVE_CONFIG_H
37
#include <config.h>
38
#endif
39

    
40
#include <string.h>
41
#include <time.h>
42
#include <math.h>
43
#include "sysstuff.h"
44
#include "string2.h"
45
#include "network.h"
46
#include "confio.h"
47
#include "copyrite.h"
48
#include "smalloc.h"
49
#include "nrnb.h"
50
#include "main.h"
51
#include "pbc.h"
52
#include "force.h"
53
#include "macros.h"
54
#include "random.h"
55
#include "names.h"
56
#include "gmx_fatal.h"
57
#include "txtdump.h"
58
#include "typedefs.h"
59
#include "update.h"
60
#include "random.h"
61
#include "constr.h"
62
#include "vec.h"
63
#include "statutil.h"
64
#include "tgroup.h"
65
#include "mdebin.h"
66
#include "vsite.h"
67
#include "force.h"
68
#include "mdrun.h"
69
#include "domdec.h"
70
#include "partdec.h"
71
#include "trnio.h"
72
#include "sparsematrix.h"
73
#include "mtxio.h"
74
#include "gmx_random.h"
75
#include "physics.h"
76
#include "xvgr.h"
77
#include "mdatoms.h"
78
#include "gbutil.h"
79
#include "ns.h"
80
#include "gmx_wallcycle.h"
81
#include "mtop_util.h"
82
#include "gmxfio.h"
83
#include "pme.h"
84

    
85
typedef struct {
86
  t_state s;
87
  rvec    *f;
88
  real    epot;
89
  real    fnorm;
90
  real    fmax;
91
  int     a_fmax;
92
} em_state_t;
93

    
94
static em_state_t *init_em_state()
95
{
96
  em_state_t *ems;
97
  
98
  snew(ems,1);
99

    
100
  return ems;
101
}
102

    
103
static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
104
{
105
  fprintf(out,"%s:\n",minimizer);
106
  fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
107
  fprintf(out,"   Number of steps    = %12d\n",nsteps);
108
}
109

    
110
static void warn_step(FILE *fp,real ftol,bool bConstrain)
111
{
112
  fprintf(fp,"\nStepsize too small, or no change in energy.\n"
113
          "Converged to machine precision,\n"
114
          "but not to the requested precision Fmax < %g\n",
115
          ftol);
116
  if (sizeof(real)<sizeof(double))
117
      fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
118
        
119
  if (bConstrain)
120
    fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
121
            "off constraints alltogether (set constraints = none in mdp file)\n");
122
}
123

    
124

    
125

    
126
static void print_converged(FILE *fp,const char *alg,real ftol,int count,bool bDone,
127
                            int nsteps,real epot,real fmax, int nfmax, real fnorm)
128
{
129
  if (bDone)
130
    fprintf(fp,"\n%s converged to Fmax < %g in %d steps\n",alg,ftol,count); 
131
  else if(count<nsteps)
132
    fprintf(fp,"\n%s converged to machine precision in %d steps,\n"
133
               "but did not reach the requested Fmax < %g.\n",alg,count,ftol);
134
  else 
135
    fprintf(fp,"\n%s did not converge to Fmax < %g in %d steps.\n",alg,ftol,count);
136

    
137
#ifdef GMX_DOUBLE
138
  fprintf(fp,"Potential Energy  = %21.14e\n",epot); 
139
  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1); 
140
  fprintf(fp,"Norm of force     = %21.14e\n",fnorm); 
141
#else
142
  fprintf(fp,"Potential Energy  = %14.7e\n",epot); 
143
  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1); 
144
  fprintf(fp,"Norm of force     = %14.7e\n",fnorm); 
145
#endif
146
}
147

    
148
static void get_f_norm_max(t_commrec *cr,
149
                           t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
150
                           real *fnorm,real *fmax,int *a_fmax)
151
{
152
  double fnorm2,*sum;
153
  real fmax2,fmax2_0,fam;
154
  int  la_max,a_max,start,end,i,m,gf;
155

    
156
  /* This routine finds the largest force and returns it.
157
   * On parallel machines the global max is taken.
158
   */
159
  fnorm2 = 0;
160
  fmax2 = 0;
161
  la_max = -1;
162
  gf = 0;
163
  start = mdatoms->start;
164
  end   = mdatoms->homenr + start;
165
  if (mdatoms->cFREEZE) {
166
    for(i=start; i<end; i++) {
167
      gf = mdatoms->cFREEZE[i];
168
      fam = 0;
169
      for(m=0; m<DIM; m++)
170
        if (!opts->nFreeze[gf][m])
171
          fam += sqr(f[i][m]);
172
      fnorm2 += fam;
173
      if (fam > fmax2) {
174
        fmax2  = fam;
175
        la_max = i;
176
      }
177
    }
178
  } else {
179
    for(i=start; i<end; i++) {
180
      fam = norm2(f[i]);
181
      fnorm2 += fam;
182
      if (fam > fmax2) {
183
        fmax2  = fam;
184
        la_max = i;
185
      }
186
    }
187
  }
188

    
189
  if (la_max >= 0 && DOMAINDECOMP(cr)) {
190
    a_max = cr->dd->gatindex[la_max];
191
  } else {
192
    a_max = la_max;
193
  }
194
  if (PAR(cr)) {
195
    snew(sum,2*cr->nnodes+1);
196
    sum[2*cr->nodeid]   = fmax2;
197
    sum[2*cr->nodeid+1] = a_max;
198
    sum[2*cr->nnodes]   = fnorm2;
199
    gmx_sumd(2*cr->nnodes+1,sum,cr);
200
    fnorm2 = sum[2*cr->nnodes];
201
    /* Determine the global maximum */
202
    for(i=0; i<cr->nnodes; i++) {
203
      if (sum[2*i] > fmax2) {
204
        fmax2 = sum[2*i];
205
        a_max = (int)(sum[2*i+1] + 0.5);
206
      }
207
    }
208
    sfree(sum);
209
  }
210

    
211
  if (fnorm)
212
    *fnorm = sqrt(fnorm2);
213
  if (fmax)
214
    *fmax  = sqrt(fmax2);
215
  if (a_fmax)
216
    *a_fmax = a_max;
217
}
218

    
219
static void get_state_f_norm_max(t_commrec *cr,
220
                           t_grpopts *opts,t_mdatoms *mdatoms,
221
                           em_state_t *ems)
222
{
223
  get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
224
}
225

    
226
void init_em(FILE *fplog,const char *title,
227
             t_commrec *cr,t_inputrec *ir,
228
             t_state *state_global,gmx_mtop_t *top_global,
229
             em_state_t *ems,gmx_localtop_t **top,
230
             rvec *f,rvec **buf,rvec **f_global,
231
             t_nrnb *nrnb,rvec mu_tot,
232
             t_forcerec *fr,gmx_enerdata_t **enerd,
233
             t_graph **graph,t_mdatoms *mdatoms,
234
             gmx_vsite_t *vsite,gmx_constr_t constr,
235
             int nfile,t_filenm fnm[],int *fp_trn,int *fp_ene,
236
             t_mdebin **mdebin)
237
{
238
  int  start,homenr,i;
239
  real dvdlambda;
240

    
241
  if (fplog)
242
    fprintf(fplog,"Initiating %s\n",title);
243

    
244
  state_global->ngtc = 0;
245
  if (ir->eI == eiCG) {
246
    state_global->flags |= (1<<estCGP);
247
    snew(state_global->cg_p,state_global->nalloc);
248
  }
249

    
250
  /* Initiate some variables */
251
  if (ir->efep != efepNO)
252
    state_global->lambda = ir->init_lambda;
253
  else 
254
    state_global->lambda = 0.0;
255

    
256
  init_nrnb(nrnb);
257

    
258
  if (DOMAINDECOMP(cr)) {
259
    *top = dd_init_local_top(top_global);
260

    
261
    dd_init_local_state(cr->dd,state_global,&ems->s);
262

    
263
    /* Distribute the charge groups over the nodes from the master node */
264
    dd_partition_system(fplog,ir->init_step,cr,TRUE,
265
                        state_global,top_global,ir,
266
                        &ems->s,&ems->f,buf,mdatoms,*top,
267
                        fr,vsite,NULL,constr,
268
                        nrnb,NULL,FALSE);
269
    dd_store_state(cr->dd,&ems->s);
270
    
271
    if (ir->nstfout) {
272
      snew(*f_global,top_global->natoms);
273
    } else {
274
      *f_global = NULL;
275
    }
276
    *graph = NULL;
277
  } else {
278
    /* Just copy the state */
279
    ems->s = *state_global;
280
    snew(ems->s.x,ems->s.nalloc);
281
    snew(ems->f,ems->s.nalloc);
282
    for(i=0; i<state_global->natoms; i++)
283
      copy_rvec(state_global->x[i],ems->s.x[i]);
284
    copy_mat(state_global->box,ems->s.box);
285

    
286
    if (PAR(cr)) {
287
      /* Initialize the particle decomposition and split the topology */
288
      *top = split_system(fplog,top_global,ir,cr);
289
      
290
      pd_cg_range(cr,&fr->cg0,&fr->hcg);
291
    } else {
292
      *top = gmx_mtop_generate_local_top(top_global,ir);
293
    }
294
    *f_global = f;
295

    
296
    if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
297
      *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
298
    } else {
299
      *graph = NULL;
300
    }
301
  }
302

    
303
  clear_rvec(mu_tot);
304
  calc_shifts(ems->s.box,fr->shift_vec);
305

    
306
  if (PARTDECOMP(cr)) {
307
    pd_at_range(cr,&start,&homenr);
308
    homenr -= start;
309
  } else {
310
    start  = 0;
311
    homenr = top_global->natoms;
312
  }
313
  atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
314
  update_mdatoms(mdatoms,state_global->lambda);
315

    
316
  if (vsite && !DOMAINDECOMP(cr)) {
317
    set_vsite_top(vsite,*top,mdatoms,cr);
318
  }
319

    
320
  if (constr) {
321
    if (ir->eConstrAlg == econtSHAKE &&
322
        gmx_mtop_ftype_count(top_global,F_CONSTR) > 0) {
323
      gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
324
                econstr_names[econtSHAKE],econstr_names[econtLINCS]);
325
    }
326

    
327
    if (!DOMAINDECOMP(cr))
328
      set_constraints(constr,*top,ir,mdatoms,NULL);
329

    
330
    /* Constrain the starting coordinates */
331
    dvdlambda=0;
332
    constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
333
              ir,cr,-1,0,mdatoms,
334
              ems->s.x,ems->s.x,NULL,ems->s.box,ems->s.lambda,&dvdlambda,
335
              NULL,NULL,nrnb,econqCoord);
336
  }
337

    
338
  if (MASTER(cr)) {
339
    if (fp_trn)
340
      *fp_trn = open_trn(ftp2fn(efTRN,nfile,fnm),"w");
341
    if (fp_ene)
342
      *fp_ene = open_enx(ftp2fn(efENX,nfile,fnm),"w");
343
  } else {
344
    if (fp_trn)
345
      *fp_trn = -1;
346
    if (fp_ene)
347
      *fp_ene = -1;
348
  }
349

    
350
  snew(*enerd,1);
351
  init_enerdata(fplog,top_global->groups.grps[egcENER].nr,*enerd);
352

    
353
  /* Init bin for energy stuff */
354
  *mdebin = init_mdebin(*fp_ene,top_global,ir); 
355
}
356

    
357
static void finish_em(FILE *fplog,t_commrec *cr,
358
                      int fp_traj,int fp_ene)
359
{
360
  if (!(cr->duty & DUTY_PME)) {
361
    /* Tell the PME only node to finish */
362
    gmx_pme_finish(cr);
363
  }
364

    
365
  if (MASTER(cr)) {
366
    close_trn(fp_traj);
367
    close_enx(fp_ene);
368
  }
369
}
370

    
371
static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
372
{
373
  em_state_t tmp;
374

    
375
  tmp   = *ems1;
376
  *ems1 = *ems2;
377
  *ems2 = tmp;
378
}
379

    
380
static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
381
{
382
  int i;
383

    
384
  for(i=0; (i<state->natoms); i++)
385
    copy_rvec(ems->s.x[i],state->x[i]);
386
  if (f != NULL)
387
    copy_rvec(ems->f[i],f[i]);
388
}
389

    
390
static void write_em_traj(FILE *fplog,t_commrec *cr,
391
                          int fp_trn,bool bX,bool bF,char *confout,
392
                          gmx_mtop_t *top_global,
393
                          t_inputrec *ir,int step,
394
                          em_state_t *state,
395
                          t_state *state_global,rvec *f_global)
396
{
397
  if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr)) {
398
    f_global = state->f;
399
    copy_em_coords_back(state,state_global,bF ? f_global : NULL);
400
  }
401
  
402
  write_traj(fplog,cr,fp_trn,bX,FALSE,bF,0,FALSE,0,NULL,FALSE,
403
             top_global,ir->eI,ir->simulation_part,step,(double)step,
404
             &state->s,state_global,state->f,f_global);
405
  
406
  if (confout != NULL && MASTER(cr)) {
407
    write_sto_conf_mtop(confout,
408
                        *top_global->name,top_global,
409
                        state_global->x,NULL,ir->ePBC,state_global->box);
410
  }
411
}
412
  
413
static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
414
                       em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
415
                       gmx_constr_t constr,gmx_localtop_t *top,
416
                       t_nrnb *nrnb,gmx_wallcycle_t wcycle,
417
                       int count)
418

    
419
{
420
  t_state *s1,*s2;
421
  int  start,end,gf,i,m;
422
  rvec *x1,*x2;
423
  real dvdlambda;
424

    
425
  s1 = &ems1->s;
426
  s2 = &ems2->s;
427

    
428
  if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
429
    gmx_incons("state mismatch in do_em_step");
430

    
431
  s2->flags = s1->flags;
432

    
433
  if (s2->nalloc != s1->nalloc) {
434
    s2->nalloc = s1->nalloc;
435
    srenew(s2->x,s1->nalloc);
436
    srenew(ems2->f,  s1->nalloc);
437
    if (s2->flags & (1<<estCGP))
438
      srenew(s2->cg_p,  s1->nalloc);
439
  }
440
  
441
  s2->natoms = s1->natoms;
442
  s2->lambda = s1->lambda;
443
  copy_mat(s1->box,s2->box);
444

    
445
  start = md->start;
446
  end   = md->start + md->homenr;
447

    
448
  x1 = s1->x;
449
  x2 = s2->x;
450
  gf = 0;
451
  for(i=start; i<end; i++) {
452
    if (md->cFREEZE)
453
      gf = md->cFREEZE[i];
454
    for(m=0; m<DIM; m++) {
455
      if (ir->opts.nFreeze[gf][m])
456
        x2[i][m] = x1[i][m];
457
      else
458
        x2[i][m] = x1[i][m] + a*f[i][m];
459
    }
460
  }
461

    
462
  if (s2->flags & (1<<estCGP)) {
463
    /* Copy the CG p vector */
464
    x1 = s1->cg_p;
465
    x2 = s2->cg_p;
466
    for(i=start; i<end; i++)
467
      copy_rvec(x1[i],x2[i]);
468
  }
469

    
470
  if (DOMAINDECOMP(cr)) {
471
    s2->ddp_count = s1->ddp_count;
472
    if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
473
      s2->cg_gl_nalloc = s1->cg_gl_nalloc;
474
      srenew(s2->cg_gl,s2->cg_gl_nalloc);
475
    }
476
    s2->ncg_gl = s1->ncg_gl;
477
    for(i=0; i<s2->ncg_gl; i++)
478
      s2->cg_gl[i] = s1->cg_gl[i];
479
    s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
480
  }
481

    
482
  if (constr) {
483
    wallcycle_start(wcycle,ewcCONSTR);
484
    dvdlambda = 0;
485
    constrain(NULL,TRUE,TRUE,constr,&top->idef,        
486
              ir,cr,count,0,md,
487
              s1->x,s2->x,NULL,s2->box,s2->lambda,
488
              &dvdlambda,NULL,NULL,nrnb,econqCoord);
489
    wallcycle_stop(wcycle,ewcCONSTR);
490
  }
491
}
492

    
493
static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
494

    
495
{
496
  int  start,end,i,m;
497

    
498
  if (DOMAINDECOMP(cr)) {
499
    start = 0;
500
    end   = cr->dd->nat_home;
501
  } else if (PARTDECOMP(cr)) {
502
    pd_at_range(cr,&start,&end);
503
  } else {
504
    start = 0;
505
    end   = n;
506
  }
507

    
508
  for(i=start; i<end; i++) {
509
    for(m=0; m<DIM; m++) {
510
      x2[i][m] = x1[i][m] + a*f[i][m];
511
    }
512
  }
513
}
514

    
515
static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
516

    
517
{
518
  int  start,end,i,m;
519

    
520
  if (DOMAINDECOMP(cr)) {
521
    start = 0;
522
    end   = cr->dd->nat_home;
523
  } else if (PARTDECOMP(cr)) {
524
    pd_at_range(cr,&start,&end);
525
  } else {
526
    start = 0;
527
    end   = n;
528
  }
529

    
530
  for(i=start; i<end; i++) {
531
    for(m=0; m<DIM; m++) {
532
      f[i][m] = (x1[i][m] - x2[i][m])*a;
533
    }
534
  }
535
}
536

    
537
static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
538
                                   gmx_mtop_t *top_global,t_inputrec *ir,
539
                                   em_state_t *ems,rvec **buf,gmx_localtop_t *top,
540
                                   t_mdatoms *mdatoms,t_forcerec *fr,
541
                                   gmx_vsite_t *vsite,gmx_constr_t constr,
542
                                   t_nrnb *nrnb,gmx_wallcycle_t wcycle)
543
{
544
  /* Repartition the domain decomposition */
545
  wallcycle_start(wcycle,ewcDOMDEC);
546
  dd_partition_system(fplog,step,cr,FALSE,
547
                      NULL,top_global,ir,
548
                      &ems->s,&ems->f,buf,
549
                      mdatoms,top,fr,vsite,NULL,constr,
550
                      nrnb,wcycle,FALSE);
551
  dd_store_state(cr->dd,&ems->s);
552
  wallcycle_stop(wcycle,ewcDOMDEC);
553
}
554
    
555
static void evaluate_energy(FILE *fplog,bool bVerbose,t_commrec *cr,
556
                            t_state *state_global,gmx_mtop_t *top_global,
557
                            em_state_t *ems,rvec **buf,gmx_localtop_t *top,
558
                            t_inputrec *inputrec,
559
                            t_nrnb *nrnb,gmx_wallcycle_t wcycle,
560
                            gmx_vsite_t *vsite,gmx_constr_t constr,
561
                            t_fcdata *fcd,
562
                            t_graph *graph,t_mdatoms *mdatoms,
563
                            t_forcerec *fr, rvec mu_tot,
564
                            gmx_enerdata_t *enerd,tensor vir,tensor pres,
565
                            int count,bool bFirst)
566
{
567
  real t;
568
  bool bNS;
569
  int  nabnsb;
570
  tensor force_vir,shake_vir,ekin;
571
  real dvdl;
572
  real terminate=0;
573
  t_state *state;
574
  
575
  /* Set the time to the initial time, the time does not change during EM */
576
  t = inputrec->init_t;
577

    
578
  if (bFirst ||
579
      (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
580
    /* This the first state or an old state used before the last ns */
581
    bNS = TRUE;
582
  } else {
583
    bNS = FALSE;
584
    if (inputrec->nstlist > 0) {
585
      bNS = TRUE;
586
    } else if (inputrec->nstlist == -1) {
587
      nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
588
      if (PAR(cr))
589
        gmx_sumi(1,&nabnsb,cr);
590
      bNS = (nabnsb > 0);
591
    }
592
  }
593

    
594
  if (vsite)
595
    construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
596
                     top->idef.iparams,top->idef.il,
597
                     fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
598

    
599
  if (DOMAINDECOMP(cr)) {
600
    if (bNS) {
601
      /* Repartition the domain decomposition */
602
      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
603
                             ems,buf,top,mdatoms,fr,vsite,constr,
604
                             nrnb,wcycle);
605
    }
606
  }
607
      
608
  /* Calc force & energy on new trial position  */
609
  /* do_force always puts the charge groups in the box and shifts again
610
   * We do not unshift, so molecules are always whole in congrad.c
611
   */
612
  do_force(fplog,cr,inputrec,
613
           count,nrnb,wcycle,top,&top_global->groups,
614
           ems->s.box,ems->s.x,&ems->s.hist,
615
           ems->f,*buf,force_vir,mdatoms,enerd,fcd,
616
           ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,
617
           GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
618
           (bNS ? GMX_FORCE_NS : 0));
619

    
620
  /* Clear the unused shake virial and pressure */
621
  clear_mat(shake_vir);
622
  clear_mat(pres);
623

    
624
  /* Calculate long range corrections to pressure and energy */
625
  calc_dispcorr(fplog,inputrec,fr,count,mdatoms->nr,ems->s.box,ems->s.lambda,
626
                pres,force_vir,enerd->term);
627

    
628
  /* Communicate stuff when parallel */
629
  if (PAR(cr)) {
630
    wallcycle_start(wcycle,ewcMoveE);
631

    
632
    global_stat(fplog,cr,enerd,force_vir,shake_vir,mu_tot,
633
                inputrec,NULL,FALSE,NULL,NULL,NULL,NULL,&terminate);
634

    
635
    wallcycle_stop(wcycle,ewcMoveE);
636
  }
637

    
638
  ems->epot = enerd->term[F_EPOT];
639
  
640
  if (constr) {
641
    /* Project out the constraint components of the force */
642
    wallcycle_start(wcycle,ewcCONSTR);
643
    dvdl = 0;
644
    constrain(NULL,FALSE,FALSE,constr,&top->idef,
645
              inputrec,cr,count,0,mdatoms,
646
              ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
647
              NULL,&shake_vir,nrnb,econqForce);
648
    if (fr->bSepDVDL && fplog)
649
      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
650
    enerd->term[F_DVDL] += dvdl;
651
    m_add(force_vir,shake_vir,vir);
652
    wallcycle_stop(wcycle,ewcCONSTR);
653
  } else {
654
    copy_mat(force_vir,vir);
655
  }
656

    
657
  clear_mat(ekin);
658
  enerd->term[F_PRES] =
659
    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
660
              (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
661

    
662
  get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
663
}
664

    
665
static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
666
                              gmx_mtop_t *mtop,
667
                              em_state_t *s_min,em_state_t *s_b)
668
{
669
  rvec *fm,*fb,*fmg;
670
  t_block *cgs_gl;
671
  int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
672
  double partsum;
673
  unsigned char *grpnrFREEZE;
674

    
675
  if (debug)
676
    fprintf(debug,"Doing reorder_partsum\n");
677

    
678
  fm = s_min->f;
679
  fb = s_b->f;
680

    
681
  cgs_gl = dd_charge_groups_global(cr->dd);
682
  index = cgs_gl->index;
683

    
684
  /* Collect fm in a global vector fmg.
685
   * This conflics with the spirit of domain decomposition,
686
   * but to fully optimize this a much more complicated algorithm is required.
687
   */
688
  snew(fmg,mtop->natoms);
689
  
690
  ncg   = s_min->s.ncg_gl;
691
  cg_gl = s_min->s.cg_gl;
692
  i = 0;
693
  for(c=0; c<ncg; c++) {
694
    cg = cg_gl[c];
695
    a0 = index[cg];
696
    a1 = index[cg+1];
697
    for(a=a0; a<a1; a++) {
698
      copy_rvec(fm[i],fmg[a]);
699
      i++;
700
    }
701
  }
702
  gmx_sum(mtop->natoms*3,fmg[0],cr);
703

    
704
  /* Now we will determine the part of the sum for the cgs in state s_b */
705
  ncg   = s_b->s.ncg_gl;
706
  cg_gl = s_b->s.cg_gl;
707
  partsum = 0;
708
  i = 0;
709
  gf = 0;
710
  grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
711
  for(c=0; c<ncg; c++) {
712
    cg = cg_gl[c];
713
    a0 = index[cg];
714
    a1 = index[cg+1];
715
    for(a=a0; a<a1; a++) {
716
      if (mdatoms->cFREEZE && grpnrFREEZE) {
717
        gf = grpnrFREEZE[i];
718
      }
719
      for(m=0; m<DIM; m++) {
720
        if (!opts->nFreeze[gf][m]) {
721
          partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
722
        }
723
      }
724
      i++;
725
    }
726
  }
727
  
728
  sfree(fmg);
729

    
730
  return partsum;
731
}
732

    
733
static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
734
                    gmx_mtop_t *mtop,
735
                    em_state_t *s_min,em_state_t *s_b)
736
{
737
  rvec *fm,*fb;
738
  double sum;
739
  int  gf,i,m;
740

    
741
  /* This is just the classical Polak-Ribiere calculation of beta;
742
   * it looks a bit complicated since we take freeze groups into account,
743
   * and might have to sum it in parallel runs.
744
   */
745
  
746
  if (!DOMAINDECOMP(cr) ||
747
      (s_min->s.ddp_count == cr->dd->ddp_count &&
748
       s_b->s.ddp_count   == cr->dd->ddp_count)) {
749
    fm = s_min->f;
750
    fb = s_b->f;
751
    sum = 0;
752
    gf = 0;
753
    /* This part of code can be incorrect with DD,
754
     * since the atom ordering in s_b and s_min might differ.
755
     */
756
    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
757
      if (mdatoms->cFREEZE)
758
        gf = mdatoms->cFREEZE[i];
759
      for(m=0; m<DIM; m++)
760
        if (!opts->nFreeze[gf][m]) {
761
          sum += (fb[i][m] - fm[i][m])*fb[i][m];
762
        } 
763
    }
764
  } else {
765
    /* We need to reorder cgs while summing */
766
    sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
767
  }
768
  if (PAR(cr))
769
    gmx_sumd(1,&sum,cr);
770

    
771
  return sum/sqr(s_min->fnorm);
772
}
773

    
774
time_t do_cg(FILE *fplog,t_commrec *cr,
775
             int nfile,t_filenm fnm[],
776
             bool bVerbose,bool bCompact,
777
             gmx_vsite_t *vsite,gmx_constr_t constr,
778
             int stepout,
779
             t_inputrec *inputrec,
780
             gmx_mtop_t *top_global,t_fcdata *fcd,
781
             t_state *state_global,rvec f[],
782
             rvec buf[],t_mdatoms *mdatoms,
783
             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
784
             gmx_edsam_t ed,
785
             t_forcerec *fr,
786
             int repl_ex_nst,int repl_ex_seed,
787
             real cpt_period,real max_hours,
788
             unsigned long Flags,
789
             int *nsteps_done)
790
{
791
  const char *CG="Polak-Ribiere Conjugate Gradients";
792

    
793
  em_state_t *s_min,*s_a,*s_b,*s_c;
794
  gmx_localtop_t *top;
795
  gmx_enerdata_t *enerd;
796
  t_graph    *graph;
797
  rvec   *f_global,*p,*sf,*sfm;
798
  double gpa,gpb,gpc,tmp,sum[2],minstep;
799
  real   fnormn;
800
  real   stepsize;        
801
  real   a,b,c,beta=0.0;
802
  real   epot_repl=0;
803
  real   pnorm;
804
  t_mdebin   *mdebin;
805
  bool   converged,foundlower;
806
  rvec   mu_tot;
807
  time_t start_t;
808
  bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
809
  tensor vir,pres;
810
  int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
811
  int    fp_trn,fp_ene;
812
  int    i,m,gf,step,nminstep;
813
  real   terminate=0;  
814

    
815
  step=0;
816

    
817
  s_min = init_em_state();
818
  s_a   = init_em_state();
819
  s_b   = init_em_state();
820
  s_c   = init_em_state();
821

    
822
  /* Init em and store the local state in s_min */
823
  init_em(fplog,CG,cr,inputrec,
824
          state_global,top_global,s_min,&top,f,&buf,&f_global,
825
          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr,
826
          nfile,fnm,&fp_trn,&fp_ene,&mdebin);
827

    
828
  /* Print to log file */
829
  start_t=print_date_and_time(fplog,cr->nodeid,
830
                              "Started Polak-Ribiere Conjugate Gradients");
831
  wallcycle_start(wcycle,ewcRUN);
832
  
833
  /* Max number of steps */
834
  number_steps=inputrec->nsteps;
835

    
836
  if (MASTER(cr))
837
    sp_header(stderr,CG,inputrec->em_tol,number_steps);
838
  if (fplog)
839
    sp_header(fplog,CG,inputrec->em_tol,number_steps);
840

    
841
  /* Call the force routine and some auxiliary (neighboursearching etc.) */
842
  /* do_force always puts the charge groups in the box and shifts again
843
   * We do not unshift, so molecules are always whole in congrad.c
844
   */
845
  evaluate_energy(fplog,bVerbose,cr,
846
                  state_global,top_global,s_min,&buf,top,
847
                  inputrec,nrnb,wcycle,
848
                  vsite,constr,fcd,graph,mdatoms,fr,
849
                  mu_tot,enerd,vir,pres,-1,TRUE);
850
  where();
851

    
852
  if (MASTER(cr)) {
853
    /* Copy stuff to the energy bin for easy printing etc. */
854
    upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,step,(real)step,
855
               enerd,&s_min->s,s_min->s.box,
856
               NULL,NULL,vir,pres,NULL,mu_tot,constr);
857
    
858
    print_ebin_header(fplog,step,step,s_min->s.lambda);
859
    print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,step,eprNORMAL,
860
               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
861
  }
862
  where();
863

    
864
  /* Estimate/guess the initial stepsize */
865
  stepsize = inputrec->em_stepsize/s_min->fnorm;
866
 
867
  if (MASTER(cr)) {
868
    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
869
            s_min->fmax,s_min->a_fmax+1);
870
    fprintf(stderr,"   F-Norm            = %12.5e\n",
871
            s_min->fnorm/sqrt(state_global->natoms));
872
    fprintf(stderr,"\n");
873
    /* and copy to the log file too... */
874
    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
875
            s_min->fmax,s_min->a_fmax+1);
876
    fprintf(fplog,"   F-Norm            = %12.5e\n",
877
            s_min->fnorm/sqrt(state_global->natoms));
878
    fprintf(fplog,"\n");
879
  }  
880
  /* Start the loop over CG steps.                
881
   * Each successful step is counted, and we continue until
882
   * we either converge or reach the max number of steps.
883
   */
884
  for(step=0,converged=FALSE;( step<=number_steps || number_steps==0) && !converged;step++) {
885
    
886
    /* start taking steps in a new direction 
887
     * First time we enter the routine, beta=0, and the direction is 
888
     * simply the negative gradient.
889
     */
890

    
891
    /* Calculate the new direction in p, and the gradient in this direction, gpa */
892
    p  = s_min->s.cg_p;
893
    sf = s_min->f;
894
    gpa = 0;
895
    gf = 0;
896
    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
897
      if (mdatoms->cFREEZE) 
898
        gf = mdatoms->cFREEZE[i];
899
      for(m=0; m<DIM; m++) {
900
        if (!inputrec->opts.nFreeze[gf][m]) {
901
          p[i][m] = sf[i][m] + beta*p[i][m];
902
          gpa -= p[i][m]*sf[i][m];
903
          /* f is negative gradient, thus the sign */
904
        } else {
905
          p[i][m] = 0;
906
        }
907
      }
908
    }
909
    
910
    /* Sum the gradient along the line across CPUs */
911
    if (PAR(cr))
912
      gmx_sumd(1,&gpa,cr);
913

    
914
    /* Calculate the norm of the search vector */
915
    get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
916
    
917
    /* Just in case stepsize reaches zero due to numerical precision... */
918
    if(stepsize<=0)          
919
      stepsize = inputrec->em_stepsize/pnorm;
920
    
921
    /* 
922
     * Double check the value of the derivative in the search direction.
923
     * If it is positive it must be due to the old information in the
924
     * CG formula, so just remove that and start over with beta=0.
925
     * This corresponds to a steepest descent step.
926
     */
927
    if(gpa>0) {
928
      beta = 0;
929
      step--; /* Don't count this step since we are restarting */
930
      continue; /* Go back to the beginning of the big for-loop */
931
    }
932

    
933
    /* Calculate minimum allowed stepsize, before the average (norm)
934
     * relative change in coordinate is smaller than precision
935
     */
936
    minstep=0;
937
    for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
938
      for(m=0; m<DIM; m++) {
939
        tmp = fabs(s_min->s.x[i][m]);
940
        if(tmp < 1.0)
941
          tmp = 1.0;
942
        tmp = p[i][m]/tmp;
943
        minstep += tmp*tmp;
944
      }
945
    }
946
    /* Add up from all CPUs */
947
    if(PAR(cr))
948
      gmx_sumd(1,&minstep,cr);
949

    
950
    minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
951

    
952
    if(stepsize<minstep) {
953
      converged=TRUE;
954
      break;
955
    }
956
    
957
    /* Write coordinates if necessary */
958
    do_x = do_per_step(step,inputrec->nstxout);
959
    do_f = do_per_step(step,inputrec->nstfout);
960
    
961
    write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,
962
                  top_global,inputrec,step,
963
                  s_min,state_global,f_global);
964
    
965
    /* Take a step downhill.
966
     * In theory, we should minimize the function along this direction.
967
     * That is quite possible, but it turns out to take 5-10 function evaluations
968
     * for each line. However, we dont really need to find the exact minimum -
969
     * it is much better to start a new CG step in a modified direction as soon
970
     * as we are close to it. This will save a lot of energy evaluations.
971
     *
972
     * In practice, we just try to take a single step.
973
     * If it worked (i.e. lowered the energy), we increase the stepsize but
974
     * the continue straight to the next CG step without trying to find any minimum.
975
     * If it didn't work (higher energy), there must be a minimum somewhere between
976
     * the old position and the new one.
977
     * 
978
     * Due to the finite numerical accuracy, it turns out that it is a good idea
979
     * to even accept a SMALL increase in energy, if the derivative is still downhill.
980
     * This leads to lower final energies in the tests I've done. / Erik 
981
     */
982
    s_a->epot = s_min->epot;
983
    a = 0.0;
984
    c = a + stepsize; /* reference position along line is zero */
985
    
986
    if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
987
      em_dd_partition_system(fplog,step,cr,top_global,inputrec,
988
                             s_min,&buf,top,mdatoms,fr,vsite,constr,
989
                             nrnb,wcycle);
990
    }
991

    
992
    /* Take a trial step (new coords in s_c) */
993
    do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
994
               constr,top,nrnb,wcycle,-1);
995
    
996
    neval++;
997
    /* Calculate energy for the trial step */
998
    evaluate_energy(fplog,bVerbose,cr,
999
                    state_global,top_global,s_c,&buf,top,
1000
                    inputrec,nrnb,wcycle,
1001
                    vsite,constr,fcd,graph,mdatoms,fr,
1002
                    mu_tot,enerd,vir,pres,-1,FALSE);
1003
    
1004
    /* Calc derivative along line */
1005
    p  = s_c->s.cg_p;
1006
    sf = s_c->f;
1007
    gpc=0;
1008
    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1009
      for(m=0; m<DIM; m++) 
1010
          gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1011
    }
1012
    /* Sum the gradient along the line across CPUs */
1013
    if (PAR(cr))
1014
      gmx_sumd(1,&gpc,cr);
1015

    
1016
    /* This is the max amount of increase in energy we tolerate */
1017
    tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1018

    
1019
    /* Accept the step if the energy is lower, or if it is not significantly higher
1020
     * and the line derivative is still negative.
1021
     */
1022
    if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1023
      foundlower = TRUE;
1024
      /* Great, we found a better energy. Increase step for next iteration
1025
       * if we are still going down, decrease it otherwise
1026
       */
1027
      if(gpc<0)
1028
        stepsize *= 1.618034;  /* The golden section */
1029
      else
1030
        stepsize *= 0.618034;  /* 1/golden section */
1031
    } else {
1032
      /* New energy is the same or higher. We will have to do some work
1033
       * to find a smaller value in the interval. Take smaller step next time!
1034
       */
1035
      foundlower = FALSE;
1036
      stepsize *= 0.618034;
1037
    }    
1038

    
1039

    
1040

    
1041
    
1042
    /* OK, if we didn't find a lower value we will have to locate one now - there must
1043
     * be one in the interval [a=0,c].
1044
     * The same thing is valid here, though: Don't spend dozens of iterations to find
1045
     * the line minimum. We try to interpolate based on the derivative at the endpoints,
1046
     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1047
     *
1048
     * I also have a safeguard for potentially really patological functions so we never
1049
     * take more than 20 steps before we give up ...
1050
     *
1051
     * If we already found a lower value we just skip this step and continue to the update.
1052
     */
1053
    if (!foundlower) {
1054
      nminstep=0;
1055

    
1056
      do {
1057
        /* Select a new trial point.
1058
         * If the derivatives at points a & c have different sign we interpolate to zero,
1059
         * otherwise just do a bisection.
1060
         */
1061
        if(gpa<0 && gpc>0)
1062
          b = a + gpa*(a-c)/(gpc-gpa);
1063
        else
1064
          b = 0.5*(a+c);                
1065
        
1066
        /* safeguard if interpolation close to machine accuracy causes errors:
1067
         * never go outside the interval
1068
         */
1069
        if(b<=a || b>=c)
1070
          b = 0.5*(a+c);
1071
        
1072
        if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1073
          /* Reload the old state */
1074
          em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1075
                                 s_min,&buf,top,mdatoms,fr,vsite,constr,
1076
                                 nrnb,wcycle);
1077
        }
1078

    
1079
        /* Take a trial step to this new point - new coords in s_b */
1080
        do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1081
                   constr,top,nrnb,wcycle,-1);
1082
        
1083
        neval++;
1084
        /* Calculate energy for the trial step */
1085
        evaluate_energy(fplog,bVerbose,cr,
1086
                        state_global,top_global,s_b,&buf,top,
1087
                        inputrec,nrnb,wcycle,
1088
                        vsite,constr,fcd,graph,mdatoms,fr,
1089
                        mu_tot,enerd,vir,pres,-1,FALSE);
1090
        
1091
        /* p does not change within a step, but since the domain decomposition
1092
         * might change, we have to use cg_p of s_b here.
1093
         */
1094
        p  = s_b->s.cg_p;
1095
        sf = s_b->f;
1096
        gpb=0;
1097
        for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1098
          for(m=0; m<DIM; m++)
1099
              gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1100
        }
1101
        /* Sum the gradient along the line across CPUs */
1102
        if (PAR(cr))
1103
          gmx_sumd(1,&gpb,cr);
1104
        
1105
        if (debug)
1106
          fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1107
                  s_a->epot,s_b->epot,s_c->epot,gpb);
1108

    
1109
        epot_repl = s_b->epot;
1110
        
1111
        /* Keep one of the intervals based on the value of the derivative at the new point */
1112
        if (gpb > 0) {
1113
          /* Replace c endpoint with b */
1114
          swap_em_state(s_b,s_c);
1115
          c = b;
1116
          gpc = gpb;
1117
        } else {
1118
          /* Replace a endpoint with b */
1119
          swap_em_state(s_b,s_a);
1120
          a = b;
1121
          gpa = gpb;
1122
        }
1123
        
1124
        /* 
1125
         * Stop search as soon as we find a value smaller than the endpoints.
1126
         * Never run more than 20 steps, no matter what.
1127
         */
1128
        nminstep++;
1129
      } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1130
               (nminstep < 20));     
1131
      
1132
      if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1133
          nminstep >= 20) {
1134
        /* OK. We couldn't find a significantly lower energy.
1135
         * If beta==0 this was steepest descent, and then we give up.
1136
         * If not, set beta=0 and restart with steepest descent before quitting.
1137
         */
1138
        if (beta == 0.0) {
1139
          /* Converged */
1140
          converged = TRUE;
1141
          break;
1142
        } else {
1143
          /* Reset memory before giving up */
1144
          beta = 0.0;
1145
          continue;
1146
        }
1147
      }
1148
      
1149
      /* Select min energy state of A & C, put the best in B.
1150
       */
1151
      if (s_c->epot < s_a->epot) {
1152
        if (debug)
1153
          fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1154
                  s_c->epot,s_a->epot);
1155
        swap_em_state(s_b,s_c);
1156
        gpb = gpc;
1157
        b = c;
1158
      } else {
1159
        if (debug)
1160
          fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1161
                  s_a->epot,s_c->epot);
1162
        swap_em_state(s_b,s_a);
1163
        gpb = gpa;
1164
        b = a;
1165
      }
1166
      
1167
    } else {
1168
      if (debug)
1169
        fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1170
                s_c->epot);
1171
      swap_em_state(s_b,s_c);
1172
      gpb = gpc;
1173
      b = c;
1174
    }
1175
    
1176
    /* new search direction */
1177
    /* beta = 0 means forget all memory and restart with steepest descents. */
1178
    if (nstcg && ((step % nstcg)==0)) 
1179
      beta = 0.0;
1180
    else {
1181
      /* s_min->fnorm cannot be zero, because then we would have converged
1182
       * and broken out.
1183
       */
1184

    
1185
      /* Polak-Ribiere update.
1186
       * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1187
       */
1188
      beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1189
    }
1190
    /* Limit beta to prevent oscillations */
1191
    if (fabs(beta) > 5.0)
1192
      beta = 0.0;
1193
    
1194
    
1195
    /* update positions */
1196
    swap_em_state(s_min,s_b);
1197
    gpa = gpb;
1198
    
1199
    /* Print it if necessary */
1200
    if (MASTER(cr)) {
1201
      if(bVerbose)
1202
        fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1203
                step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1204
                s_min->fmax,s_min->a_fmax+1);
1205
      /* Store the new (lower) energies */
1206
      upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,step,(real)step,
1207
                 enerd,&s_min->s,s_min->s.box,
1208
                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1209
      do_log = do_per_step(step,inputrec->nstlog);
1210
      do_ene = do_per_step(step,inputrec->nstenergy);
1211
      if(do_log)
1212
        print_ebin_header(fplog,step,step,s_min->s.lambda);
1213
      print_ebin(fp_ene,do_ene,FALSE,FALSE,
1214
                 do_log ? fplog : NULL,step,step,step,eprNORMAL,
1215
                 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1216
    }
1217
    
1218
    /* Stop when the maximum force lies below tolerance.
1219
     * If we have reached machine precision, converged is already set to true.
1220
     */        
1221
    converged = converged || (s_min->fmax < inputrec->em_tol);
1222
    
1223
  } /* End of the loop */
1224
  
1225
  if (converged)        
1226
    step--; /* we never took that last step in this case */
1227
  
1228
  if (s_min->fmax > inputrec->em_tol) {
1229
    if (MASTER(cr)) {
1230
      warn_step(stderr,inputrec->em_tol,FALSE);
1231
      warn_step(fplog,inputrec->em_tol,FALSE);
1232
    }
1233
    converged = FALSE; 
1234
  }
1235
  
1236
  if (MASTER(cr)) {
1237
    /* If we printed energy and/or logfile last step (which was the last step)
1238
     * we don't have to do it again, but otherwise print the final values.
1239
     */
1240
    if(!do_log) {
1241
      /* Write final value to log since we didn't do anything the last step */
1242
      print_ebin_header(fplog,step,step,s_min->s.lambda);
1243
    }
1244
    if (!do_ene || !do_log) {
1245
      /* Write final energy file entries */
1246
      print_ebin(fp_ene,!do_ene,FALSE,FALSE,
1247
                 !do_log ? fplog : NULL,step,step,step,eprNORMAL,
1248
                 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1249
    }
1250
  }
1251

    
1252
  /* Print some stuff... */
1253
  if (MASTER(cr))
1254
    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1255
  
1256
  /* IMPORTANT!
1257
   * For accurate normal mode calculation it is imperative that we
1258
   * store the last conformation into the full precision binary trajectory.
1259
   *
1260
   * However, we should only do it if we did NOT already write this step
1261
   * above (which we did if do_x or do_f was true).
1262
   */  
1263
  do_x = !do_per_step(step,inputrec->nstxout);
1264
  do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1265
  
1266
  write_em_traj(fplog,cr,fp_trn,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1267
                top_global,inputrec,step,
1268
                s_min,state_global,f_global);
1269
  
1270
  fnormn = s_min->fnorm/sqrt(state_global->natoms);
1271
  
1272
  if (MASTER(cr)) {
1273
    print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1274
                    s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1275
    print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1276
                    s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1277
    
1278
    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1279
  }
1280
  
1281
  finish_em(fplog,cr,fp_trn,fp_ene);
1282
  
1283
  /* To print the actual number of steps we needed somewhere */
1284
  *nsteps_done = step;
1285

    
1286
  return start_t;
1287
} /* That's all folks */
1288

    
1289

    
1290
time_t do_lbfgs(FILE *fplog,t_commrec *cr,
1291
                int nfile,t_filenm fnm[],
1292
                bool bVerbose,bool bCompact,
1293
                gmx_vsite_t *vsite,gmx_constr_t constr,
1294
                int stepout,
1295
                t_inputrec *inputrec,
1296
                gmx_mtop_t *top_global,t_fcdata *fcd,
1297
                t_state *state,rvec f[],
1298
                rvec buf[],t_mdatoms *mdatoms,
1299
                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1300
                gmx_edsam_t ed,
1301
                t_forcerec *fr,
1302
                int repl_ex_nst,int repl_ex_seed,
1303
                real cpt_period,real max_hours,
1304
                unsigned long Flags,
1305
                int *nsteps_done)
1306
{
1307
  static char *LBFGS="Low-Memory BFGS Minimizer";
1308
  em_state_t ems;
1309
  gmx_localtop_t *top;
1310
  gmx_enerdata_t *enerd;
1311
  t_graph    *graph;
1312
  int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1313
  double stepsize,gpa,gpb,gpc,tmp,minstep;
1314
  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;        
1315
  real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1316
  real   a,b,c,maxdelta,delta;
1317
  real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1318
  real   dgdx,dgdg,sq,yr,beta;
1319
  t_mdebin   *mdebin;
1320
  bool   converged,first;
1321
  rvec   mu_tot;
1322
  real   fnorm,fmax;
1323
  time_t start_t;
1324
  bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1325
  tensor vir,pres;
1326
  int    fp_trn,fp_ene,start,end,number_steps;
1327
  int    i,k,m,n,nfmax,gf,step;
1328
  /* not used */
1329
  real   terminate;
1330

    
1331
  if (PAR(cr))
1332
    gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1333
  
1334
  n = 3*state->natoms;
1335
  nmaxcorr = inputrec->nbfgscorr;
1336
  
1337
  /* Allocate memory */
1338
  snew(f,state->natoms);
1339
  /* Use pointers to real so we dont have to loop over both atoms and
1340
   * dimensions all the time...
1341
   * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1342
   * that point to the same memory.
1343
   */
1344
  snew(xa,n);
1345
  snew(xb,n);
1346
  snew(xc,n);
1347
  snew(fa,n);
1348
  snew(fb,n);
1349
  snew(fc,n);
1350
  snew(frozen,n);
1351
  
1352
  xx = (real *)state->x;
1353
  ff = (real *)f;
1354

    
1355
        printf("x0: %20.12g %20.12g %20.12g\n",xx[0],xx[1],xx[2]);
1356

    
1357
  snew(p,n); 
1358
  snew(lastx,n); 
1359
  snew(lastf,n); 
1360
  snew(rho,nmaxcorr);
1361
  snew(alpha,nmaxcorr);
1362
  
1363
  snew(dx,nmaxcorr);
1364
  for(i=0;i<nmaxcorr;i++)
1365
    snew(dx[i],n);
1366
  
1367
  snew(dg,nmaxcorr);
1368
  for(i=0;i<nmaxcorr;i++)
1369
    snew(dg[i],n);
1370

    
1371
  step = 0;
1372
  neval = 0; 
1373

    
1374
  /* Init em */
1375
  init_em(fplog,LBFGS,cr,inputrec,
1376
          state,top_global,&ems,&top,f,&buf,&f,
1377
          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr,
1378
          nfile,fnm,&fp_trn,&fp_ene,&mdebin);
1379
  /* Do_lbfgs is not completely updated like do_steep and do_cg,
1380
   * so we free some memory again.
1381
   */
1382
  sfree(ems.s.x);
1383
  sfree(ems.f);
1384

    
1385
  start = mdatoms->start;
1386
  end   = mdatoms->homenr + start;
1387
    
1388
  /* Print to log file */
1389
  start_t=print_date_and_time(fplog,cr->nodeid,
1390
                              "Started Low-Memory BFGS Minimization");
1391
  wallcycle_start(wcycle,ewcRUN);
1392
  
1393
  do_log = do_ene = do_x = do_f = TRUE;
1394
  
1395
  /* Max number of steps */
1396
  number_steps=inputrec->nsteps;
1397

    
1398
  /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1399
  gf = 0;
1400
  for(i=start; i<end; i++) {
1401
    if (mdatoms->cFREEZE)
1402
      gf = mdatoms->cFREEZE[i];
1403
     for(m=0; m<DIM; m++) 
1404
       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];  
1405
  }
1406
  if (MASTER(cr))
1407
    sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1408
  if (fplog)
1409
    sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1410
  
1411
  if (vsite)
1412
    construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1413
                     top->idef.iparams,top->idef.il,
1414
                     fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1415
  
1416
  /* Call the force routine and some auxiliary (neighboursearching etc.) */
1417
  /* do_force always puts the charge groups in the box and shifts again
1418
   * We do not unshift, so molecules are always whole
1419
   */
1420
  neval++;
1421
  ems.s.x = state->x;
1422
  ems.f = f;
1423
  evaluate_energy(fplog,bVerbose,cr,
1424
                  state,top_global,&ems,&buf,top,
1425
                  inputrec,nrnb,wcycle,
1426
                  vsite,constr,fcd,graph,mdatoms,fr,
1427
                  mu_tot,enerd,vir,pres,-1,TRUE);
1428
  where();
1429
        
1430
  if (MASTER(cr)) {
1431
    /* Copy stuff to the energy bin for easy printing etc. */
1432
    upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,step,(real)step,
1433
               enerd,state,state->box,
1434
               NULL,NULL,vir,pres,NULL,mu_tot,constr);
1435
    
1436
    print_ebin_header(fplog,step,step,state->lambda);
1437
    print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,step,eprNORMAL,
1438
               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1439
  }
1440
  where();
1441
  
1442
  /* This is the starting energy */
1443
  Epot = enerd->term[F_EPOT];
1444
  
1445
  fnorm = ems.fnorm;
1446
  fmax  = ems.fmax;
1447
  nfmax = ems.a_fmax;
1448
  
1449
  /* Set the initial step.
1450
   * since it will be multiplied by the non-normalized search direction 
1451
   * vector (force vector the first time), we scale it by the
1452
   * norm of the force.
1453
   */
1454
  
1455
  if (MASTER(cr)) {
1456
    fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1457
    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1458
    fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1459
    fprintf(stderr,"\n");
1460
    /* and copy to the log file too... */
1461
    fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1462
    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1463
    fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1464
    fprintf(fplog,"\n");
1465
  }   
1466
  
1467
  point=0;
1468
  for(i=0;i<n;i++)
1469
    if(!frozen[i])
1470
      dx[point][i] = ff[i];  /* Initial search direction */
1471
    else
1472
      dx[point][i] = 0;
1473

    
1474
  stepsize = 1.0/fnorm;
1475
  converged = FALSE;
1476
  
1477
  /* Start the loop over BFGS steps.                
1478
   * Each successful step is counted, and we continue until
1479
   * we either converge or reach the max number of steps.
1480
   */
1481
  
1482
  ncorr=0;
1483

    
1484
  /* Set the gradient from the force */
1485
  for(step=0,converged=FALSE;( step<=number_steps || number_steps==0) && !converged;step++) {
1486
    
1487
    /* Write coordinates if necessary */
1488
    do_x = do_per_step(step,inputrec->nstxout);
1489
    do_f = do_per_step(step,inputrec->nstfout);
1490
    
1491
    write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,
1492
                  top_global,inputrec,step,
1493
                  &ems,state,f);
1494

    
1495
    /* Do the linesearching in the direction dx[point][0..(n-1)] */
1496
    
1497
    /* pointer to current direction - point=0 first time here */
1498
    s=dx[point];
1499
    
1500
    /* calculate line gradient */
1501
    for(gpa=0,i=0;i<n;i++) 
1502
        gpa-=s[i]*ff[i];
1503

    
1504
    /* Calculate minimum allowed stepsize, before the average (norm) 
1505
     * relative change in coordinate is smaller than precision 
1506
     */
1507
    for(minstep=0,i=0;i<n;i++) {
1508
      tmp=fabs(xx[i]);
1509
      if(tmp<1.0)
1510
        tmp=1.0;
1511
      tmp = s[i]/tmp;
1512
      minstep += tmp*tmp;
1513
    }
1514
    minstep = GMX_REAL_EPS/sqrt(minstep/n);
1515
    
1516
    if(stepsize<minstep) {
1517
      converged=TRUE;
1518
      break;
1519
    }
1520
    
1521
    /* Store old forces and coordinates */
1522
    for(i=0;i<n;i++) {
1523
      lastx[i]=xx[i];
1524
      lastf[i]=ff[i];
1525
    }
1526
    Epot0=Epot;
1527
    
1528
    first=TRUE;
1529
    
1530
    for(i=0;i<n;i++)
1531
      xa[i]=xx[i];
1532
    
1533
    /* Take a step downhill.
1534
     * In theory, we should minimize the function along this direction.
1535
     * That is quite possible, but it turns out to take 5-10 function evaluations
1536
     * for each line. However, we dont really need to find the exact minimum -
1537
     * it is much better to start a new BFGS step in a modified direction as soon
1538
     * as we are close to it. This will save a lot of energy evaluations.
1539
     *
1540
     * In practice, we just try to take a single step.
1541
     * If it worked (i.e. lowered the energy), we increase the stepsize but
1542
     * the continue straight to the next BFGS step without trying to find any minimum.
1543
     * If it didn't work (higher energy), there must be a minimum somewhere between
1544
     * the old position and the new one.
1545
     * 
1546
     * Due to the finite numerical accuracy, it turns out that it is a good idea
1547
     * to even accept a SMALL increase in energy, if the derivative is still downhill.
1548
     * This leads to lower final energies in the tests I've done. / Erik 
1549
     */
1550
    foundlower=FALSE;
1551
    EpotA = Epot0;
1552
    a = 0.0;
1553
    c = a + stepsize; /* reference position along line is zero */
1554

    
1555
    /* Check stepsize first. We do not allow displacements 
1556
     * larger than emstep.
1557
     */
1558
    do {
1559
      c = a + stepsize;
1560
      maxdelta=0;
1561
      for(i=0;i<n;i++) {
1562
        delta=c*s[i];
1563
        if(delta>maxdelta)
1564
          maxdelta=delta;
1565
      }
1566
      if(maxdelta>inputrec->em_stepsize)
1567
        stepsize*=0.1;
1568
    } while(maxdelta>inputrec->em_stepsize);
1569

    
1570
    /* Take a trial step */
1571
    for (i=0; i<n; i++)
1572
      xc[i] = lastx[i] + c*s[i];
1573
    
1574
    neval++;
1575
    /* Calculate energy for the trial step */
1576
    ems.s.x = (rvec *)xc;
1577
    ems.f   = (rvec *)fc;
1578
    evaluate_energy(fplog,bVerbose,cr,
1579
                    state,top_global,&ems,&buf,top,
1580
                    inputrec,nrnb,wcycle,
1581
                    vsite,constr,fcd,graph,mdatoms,fr,
1582
                    mu_tot,enerd,vir,pres,step,FALSE);
1583
    EpotC = ems.epot;
1584
    
1585
    /* Calc derivative along line */
1586
    for(gpc=0,i=0; i<n; i++) {
1587
        gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1588
    }
1589
    /* Sum the gradient along the line across CPUs */
1590
    if (PAR(cr))
1591
      gmx_sumd(1,&gpc,cr);
1592
    
1593
     /* This is the max amount of increase in energy we tolerate */
1594
   tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1595
    
1596
    /* Accept the step if the energy is lower, or if it is not significantly higher
1597
     * and the line derivative is still negative.
1598
     */
1599
    if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1600
      foundlower = TRUE;
1601
      /* Great, we found a better energy. Increase step for next iteration
1602
       * if we are still going down, decrease it otherwise
1603
       */
1604
      if(gpc<0)
1605
        stepsize *= 1.618034;  /* The golden section */
1606
      else
1607
        stepsize *= 0.618034;  /* 1/golden section */
1608
    } else {
1609
      /* New energy is the same or higher. We will have to do some work
1610
       * to find a smaller value in the interval. Take smaller step next time!
1611
       */
1612
      foundlower = FALSE;
1613
      stepsize *= 0.618034;
1614
    }    
1615
    
1616
    /* OK, if we didn't find a lower value we will have to locate one now - there must
1617
     * be one in the interval [a=0,c].
1618
     * The same thing is valid here, though: Don't spend dozens of iterations to find
1619
     * the line minimum. We try to interpolate based on the derivative at the endpoints,
1620
     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1621
     *
1622
     * I also have a safeguard for potentially really patological functions so we never
1623
     * take more than 20 steps before we give up ...
1624
     *
1625
     * If we already found a lower value we just skip this step and continue to the update.
1626
     */
1627

    
1628
    if(!foundlower) {
1629
     
1630
      nminstep=0;
1631
      do {
1632
        /* Select a new trial point.
1633
         * If the derivatives at points a & c have different sign we interpolate to zero,
1634
         * otherwise just do a bisection.
1635
         */
1636
        
1637
        if(gpa<0 && gpc>0)
1638
          b = a + gpa*(a-c)/(gpc-gpa);
1639
        else
1640
          b = 0.5*(a+c);                
1641
        
1642
        /* safeguard if interpolation close to machine accuracy causes errors:
1643
         * never go outside the interval
1644
         */
1645
        if(b<=a || b>=c)
1646
          b = 0.5*(a+c);
1647
        
1648
        /* Take a trial step */
1649
        for (i=0; i<n; i++) 
1650
          xb[i] = lastx[i] + b*s[i];
1651
        
1652
        neval++;
1653
        /* Calculate energy for the trial step */
1654
        ems.s.x = (rvec *)xb;
1655
        ems.f   = (rvec *)fb;
1656
        evaluate_energy(fplog,bVerbose,cr,
1657
                        state,top_global,&ems,&buf,top,
1658
                        inputrec,nrnb,wcycle,
1659
                        vsite,constr,fcd,graph,mdatoms,fr,
1660
                        mu_tot,enerd,vir,pres,step,FALSE);
1661
        EpotB = ems.epot;
1662
        
1663
        fnorm = ems.fnorm;
1664
        
1665
        for(gpb=0,i=0; i<n; i++) 
1666
          gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1667
        
1668
        /* Sum the gradient along the line across CPUs */
1669
        if (PAR(cr))
1670
          gmx_sumd(1,&gpb,cr);
1671
        
1672
        /* Keep one of the intervals based on the value of the derivative at the new point */
1673
        if(gpb>0) {
1674
          /* Replace c endpoint with b */
1675
          EpotC = EpotB;
1676
          c = b;
1677
          gpc = gpb;
1678
          /* swap coord pointers b/c */
1679
          xtmp = xb; 
1680
          ftmp = fb;
1681
          xb = xc; 
1682
          fb = fc;
1683
          xc = xtmp;
1684
          fc = ftmp;
1685
        } else {
1686
          /* Replace a endpoint with b */
1687
          EpotA = EpotB;
1688
          a = b;
1689
          gpa = gpb;
1690
          /* swap coord pointers a/b */
1691
          xtmp = xb; 
1692
          ftmp = fb;
1693
          xb = xa; 
1694
          fb = fa;
1695
          xa = xtmp; 
1696
          fa = ftmp;
1697
        }
1698
        
1699
        /* 
1700
         * Stop search as soon as we find a value smaller than the endpoints,
1701
         * or if the tolerance is below machine precision.
1702
         * Never run more than 20 steps, no matter what.
1703
         */
1704
        nminstep++; 
1705
      } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1706

    
1707
      if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1708
        /* OK. We couldn't find a significantly lower energy.
1709
         * If ncorr==0 this was steepest descent, and then we give up.
1710
         * If not, reset memory to restart as steepest descent before quitting.
1711
         */
1712
        if(ncorr==0) {
1713
        /* Converged */
1714
          converged=TRUE;
1715
          break;
1716
        } else {
1717
          /* Reset memory */
1718
          ncorr=0;
1719
          /* Search in gradient direction */
1720
          for(i=0;i<n;i++)
1721
            dx[point][i]=ff[i];
1722
          /* Reset stepsize */
1723
          stepsize = 1.0/fnorm;
1724
          continue;
1725
        }
1726
      }
1727
      
1728
      /* Select min energy state of A & C, put the best in xx/ff/Epot
1729
       */
1730
      if(EpotC<EpotA) {
1731
        Epot = EpotC;
1732
        /* Use state C */
1733
        for(i=0;i<n;i++) {
1734
          xx[i]=xc[i];
1735
          ff[i]=fc[i];
1736
        }
1737
        stepsize=c;
1738
      } else {
1739
        Epot = EpotA;
1740
        /* Use state A */
1741
        for(i=0;i<n;i++) {
1742
          xx[i]=xa[i];
1743
          ff[i]=fa[i];
1744
        }
1745
        stepsize=a;
1746
      }
1747
      
1748
    } else {
1749
      /* found lower */
1750
      Epot = EpotC;
1751
      /* Use state C */
1752
      for(i=0;i<n;i++) {
1753
        xx[i]=xc[i];
1754
        ff[i]=fc[i];
1755
      }
1756
      stepsize=c;
1757
    }
1758

    
1759
    /* Update the memory information, and calculate a new 
1760
     * approximation of the inverse hessian 
1761
     */
1762
    
1763
    /* Have new data in Epot, xx, ff */        
1764
    if(ncorr<nmaxcorr)
1765
      ncorr++;
1766

    
1767
    for(i=0;i<n;i++) {
1768
      dg[point][i]=lastf[i]-ff[i];
1769
      dx[point][i]*=stepsize;
1770
    }
1771
    
1772
    dgdg=0;
1773
    dgdx=0;        
1774
    for(i=0;i<n;i++) {
1775
      dgdg+=dg[point][i]*dg[point][i];
1776
      dgdx+=dg[point][i]*dx[point][i];
1777
    }
1778
    
1779
    diag=dgdx/dgdg;
1780
    
1781
    rho[point]=1.0/dgdx;
1782
    point++;
1783
    
1784
    if(point>=nmaxcorr)
1785
      point=0;
1786
    
1787
    /* Update */
1788
    for(i=0;i<n;i++)
1789
      p[i]=ff[i];
1790
    
1791
    cp=point;
1792
    
1793
    /* Recursive update. First go back over the memory points */
1794
    for(k=0;k<ncorr;k++) {
1795
      cp--;
1796
      if(cp<0) 
1797
        cp=ncorr-1;
1798
      
1799
      sq=0;
1800
      for(i=0;i<n;i++)
1801
        sq+=dx[cp][i]*p[i];
1802
      
1803
      alpha[cp]=rho[cp]*sq;
1804
      
1805
      for(i=0;i<n;i++)
1806
        p[i] -= alpha[cp]*dg[cp][i];                
1807
    }
1808
    
1809
    for(i=0;i<n;i++)
1810
      p[i] *= diag;
1811
    
1812
    /* And then go forward again */
1813
    for(k=0;k<ncorr;k++) {
1814
      yr = 0;
1815
      for(i=0;i<n;i++)
1816
        yr += p[i]*dg[cp][i];
1817
      
1818
      beta = rho[cp]*yr;            
1819
      beta = alpha[cp]-beta;
1820
      
1821
      for(i=0;i<n;i++)
1822
        p[i] += beta*dx[cp][i];
1823
      
1824
      cp++;        
1825
      if(cp>=ncorr)
1826
        cp=0;
1827
    }
1828
    
1829
    for(i=0;i<n;i++)
1830
      if(!frozen[i])
1831
        dx[point][i] = p[i];
1832
      else
1833
        dx[point][i] = 0;
1834

    
1835
    stepsize=1.0;
1836
    
1837
    /* Test whether the convergence criterion is met */
1838
    get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1839
    
1840
    /* Print it if necessary */
1841
    if (MASTER(cr)) {
1842
      if(bVerbose)
1843
        fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1844
                step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1845
      /* Store the new (lower) energies */
1846
      upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,step,(real)step,
1847
                 enerd,state,state->box,
1848
                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1849
      do_log = do_per_step(step,inputrec->nstlog);
1850
      do_ene = do_per_step(step,inputrec->nstenergy);
1851
      if(do_log)
1852
        print_ebin_header(fplog,step,step,state->lambda);
1853
      print_ebin(fp_ene,do_ene,FALSE,FALSE,
1854
                 do_log ? fplog : NULL,step,step,step,eprNORMAL,
1855
                 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1856
    }
1857
    
1858
    /* Stop when the maximum force lies below tolerance.
1859
     * If we have reached machine precision, converged is already set to true.
1860
     */
1861
    
1862
    converged = converged || (fmax < inputrec->em_tol);
1863
    
1864
  } /* End of the loop */
1865
  
1866
  if(converged)        
1867
    step--; /* we never took that last step in this case */
1868
  
1869
  if(fmax>inputrec->em_tol) {
1870
    if (MASTER(cr)) {
1871
      warn_step(stderr,inputrec->em_tol,FALSE);
1872
      warn_step(fplog,inputrec->em_tol,FALSE);
1873
    }
1874
    converged = FALSE; 
1875
  }
1876
  
1877
  /* If we printed energy and/or logfile last step (which was the last step)
1878
   * we don't have to do it again, but otherwise print the final values.
1879
   */
1880
  if(!do_log) /* Write final value to log since we didn't do anythin last step */
1881
    print_ebin_header(fplog,step,step,state->lambda);
1882
  if(!do_ene || !do_log) /* Write final energy file entries */
1883
    print_ebin(fp_ene,!do_ene,FALSE,FALSE,
1884
               !do_log ? fplog : NULL,step,step,step,eprNORMAL,
1885
               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1886
  
1887
  /* Print some stuff... */
1888
  if (MASTER(cr))
1889
    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1890
  
1891
  /* IMPORTANT!
1892
   * For accurate normal mode calculation it is imperative that we
1893
   * store the last conformation into the full precision binary trajectory.
1894
   *
1895
   * However, we should only do it if we did NOT already write this step
1896
   * above (which we did if do_x or do_f was true).
1897
   */  
1898
  do_x = !do_per_step(step,inputrec->nstxout);
1899
  do_f = !do_per_step(step,inputrec->nstfout);
1900
  write_em_traj(fplog,cr,fp_trn,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1901
                top_global,inputrec,step,
1902
                &ems,state,f);
1903
  
1904
  if (MASTER(cr)) {
1905
    print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
1906
                    number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1907
    print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
1908
                    number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1909
    
1910
    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1911
  }
1912
  
1913
  finish_em(fplog,cr,fp_trn,fp_ene);
1914

    
1915
  /* To print the actual number of steps we needed somewhere */
1916
  *nsteps_done = step;
1917

    
1918
  return start_t;
1919
} /* That's all folks */
1920

    
1921

    
1922
time_t do_steep(FILE *fplog,t_commrec *cr,
1923
                int nfile,t_filenm fnm[],
1924
                bool bVerbose,bool bCompact,
1925
                gmx_vsite_t *vsite,gmx_constr_t constr,
1926
                int stepout,
1927
                t_inputrec *inputrec,
1928
                gmx_mtop_t *top_global,t_fcdata *fcd,
1929
                t_state *state_global,rvec f[],
1930
                rvec buf[],t_mdatoms *mdatoms,
1931
                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1932
                gmx_edsam_t ed,
1933
                t_forcerec *fr,
1934
                int repl_ex_nst,int repl_ex_seed,
1935
                real cpt_period,real max_hours,
1936
                unsigned long Flags,
1937
                int *nsteps_done)
1938
{ 
1939
  const char *SD="Steepest Descents";
1940
  em_state_t *s_min,*s_try;
1941
  rvec       *f_global;
1942
  gmx_localtop_t *top;
1943
  gmx_enerdata_t *enerd;
1944
  t_graph    *graph;
1945
  real   stepsize,constepsize;
1946
  real   ustep,dvdlambda,fnormn;
1947
  int        fp_trn,fp_ene; 
1948
  t_mdebin   *mdebin; 
1949
  bool   bDone,bAbort,do_x,do_f; 
1950
  time_t start_t; 
1951
  tensor vir,pres; 
1952
  rvec   mu_tot;
1953
  int    nsteps;
1954
  int    count=0; 
1955
  int    steps_accepted=0; 
1956
  /* not used */
1957
  real   terminate=0;
1958

    
1959
  s_min = init_em_state();
1960
  s_try = init_em_state();
1961

    
1962
  /* Init em and store the local state in s_try */
1963
  init_em(fplog,SD,cr,inputrec,
1964
          state_global,top_global,s_try,&top,f,&buf,&f_global,
1965
          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr,
1966
          nfile,fnm,&fp_trn,&fp_ene,&mdebin);
1967

    
1968
  /* Print to log file  */
1969
  start_t=print_date_and_time(fplog,cr->nodeid,"Started Steepest Descents");
1970
  wallcycle_start(wcycle,ewcRUN);
1971
    
1972
  /* Set variables for stepsize (in nm). This is the largest  
1973
   * step that we are going to make in any direction. 
1974
   */
1975
  ustep = inputrec->em_stepsize; 
1976
  stepsize = 0;
1977
  
1978
  /* Max number of steps  */
1979
  nsteps = inputrec->nsteps; 
1980
  
1981
  if (MASTER(cr)) 
1982
    /* Print to the screen  */
1983
    sp_header(stderr,SD,inputrec->em_tol,nsteps);
1984
  if (fplog)
1985
    sp_header(fplog,SD,inputrec->em_tol,nsteps);
1986
    
1987
  /**** HERE STARTS THE LOOP ****
1988
   * count is the counter for the number of steps 
1989
   * bDone will be TRUE when the minimization has converged
1990
   * bAbort will be TRUE when nsteps steps have been performed or when
1991
   * the stepsize becomes smaller than is reasonable for machine precision
1992
   */
1993
  count  = 0;
1994
  bDone  = FALSE;
1995
  bAbort = FALSE;
1996
  while( !bDone && !bAbort ) {
1997
    bAbort = (nsteps > 0) && (count==nsteps);
1998
    
1999
    /* set new coordinates, except for first step */
2000
    if (count > 0) {
2001
      do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2002
                 constr,top,nrnb,wcycle,count);
2003
    }
2004
    
2005
    evaluate_energy(fplog,bVerbose,cr,
2006
                    state_global,top_global,s_try,&buf,top,
2007
                    inputrec,nrnb,wcycle,
2008
                    vsite,constr,fcd,graph,mdatoms,fr,
2009
                    mu_tot,enerd,vir,pres,count,count==0);
2010
    
2011
    if (MASTER(cr))
2012
      print_ebin_header(fplog,count,count,s_try->s.lambda);
2013

    
2014
    if (count == 0)
2015
      s_min->epot = s_try->epot + 1;
2016
    
2017
    /* Print it if necessary  */
2018
    if (MASTER(cr)) {
2019
      if (bVerbose) {
2020
        fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2021
                count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2022
                (s_try->epot < s_min->epot) ? '\n' : '\r');
2023
      }
2024
      
2025
      if (s_try->epot < s_min->epot) {
2026
        /* Store the new (lower) energies  */
2027
        upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,count,(real)count,
2028
                   enerd,&s_try->s,s_try->s.box,
2029
                   NULL,NULL,vir,pres,NULL,mu_tot,constr);
2030
        print_ebin(fp_ene,TRUE,
2031
                   do_per_step(steps_accepted,inputrec->nstdisreout),
2032
                   do_per_step(steps_accepted,inputrec->nstorireout),
2033
                   fplog,count,count,count,eprNORMAL,TRUE,
2034
                   mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2035
        fflush(fplog);
2036
      }
2037
    } 
2038
    
2039
    /* Now if the new energy is smaller than the previous...  
2040
     * or if this is the first step!
2041
     * or if we did random steps! 
2042
     */
2043
    
2044
    if ( (count==0) || (s_try->epot < s_min->epot) ) {
2045
      steps_accepted++; 
2046

    
2047
      /* Test whether the convergence criterion is met...  */
2048
      bDone = (s_try->fmax < inputrec->em_tol);
2049
      
2050
      /* Copy the arrays for force, positions and energy  */
2051
      /* The 'Min' array always holds the coords and forces of the minimal 
2052
         sampled energy  */
2053
      swap_em_state(s_min,s_try);
2054
      if (count > 0)
2055
        ustep *= 1.2;
2056

    
2057
      /* Write to trn, if necessary */
2058
      do_x = do_per_step(steps_accepted,inputrec->nstxout);
2059
      do_f = do_per_step(steps_accepted,inputrec->nstfout);
2060
      write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,
2061
                    top_global,inputrec,count,
2062
                    s_min,state_global,f_global);
2063
    } 
2064
    else {
2065
      /* If energy is not smaller make the step smaller...  */
2066
      ustep *= 0.5;
2067

    
2068
      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2069
        /* Reload the old state */
2070
        em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2071
                               s_min,&buf,top,mdatoms,fr,vsite,constr,
2072
                               nrnb,wcycle);
2073
      }
2074
    }
2075
    
2076
    /* Determine new step  */
2077
    stepsize = ustep/s_min->fmax;
2078
    
2079
    /* Check if stepsize is too small, with 1 nm as a characteristic length */
2080
#ifdef GMX_DOUBLE
2081
    if (ustep < 1e-12)
2082
#else
2083
    if (ustep < 1e-6)
2084
#endif
2085
      {
2086
        if (MASTER(cr)) {
2087
          warn_step(stderr,inputrec->em_tol,constr!=NULL);
2088
          warn_step(fplog,inputrec->em_tol,constr!=NULL);
2089
        }
2090
        bAbort=TRUE;
2091
      }
2092
    
2093
    count++;
2094
  } /* End of the loop  */
2095
  
2096
    /* Print some shit...  */
2097
  if (MASTER(cr)) 
2098
    fprintf(stderr,"\nwriting lowest energy coordinates.\n"); 
2099
  write_em_traj(fplog,cr,fp_trn,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2100
                top_global,inputrec,count,
2101
                s_min,state_global,f_global);
2102

    
2103
  fnormn = s_min->fnorm/sqrt(state_global->natoms);
2104

    
2105
  if (MASTER(cr)) {
2106
    print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2107
                    s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2108
    print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2109
                    s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2110
  }
2111

    
2112
  finish_em(fplog,cr,fp_trn,fp_ene);
2113
  
2114
  /* To print the actual number of steps we needed somewhere */
2115
  inputrec->nsteps=count;
2116

    
2117
  *nsteps_done = count;
2118
  
2119
  return start_t;
2120
} /* That's all folks */
2121

    
2122

    
2123
time_t do_nm(FILE *fplog,t_commrec *cr,
2124
             int nfile,t_filenm fnm[],
2125
             bool bVerbose,bool bCompact,
2126
             gmx_vsite_t *vsite,gmx_constr_t constr,
2127
             int stepout,
2128
             t_inputrec *inputrec,
2129
             gmx_mtop_t *top_global,t_fcdata *fcd,
2130
             t_state *state_global,rvec f[],
2131
             rvec buf[],t_mdatoms *mdatoms,
2132
             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2133
             gmx_edsam_t ed,
2134
             t_forcerec *fr,
2135
             int repl_ex_nst,int repl_ex_seed,
2136
             real cpt_period,real max_hours,
2137
             unsigned long Flags,
2138
             int *nsteps_done)
2139
{
2140
    t_mdebin   *mdebin;
2141
        const char *NM = "Normal Mode Analysis";
2142
    int        fp_ene,step,i;
2143
    time_t     start_t;
2144
    rvec       *f_global;
2145
    gmx_localtop_t *top;
2146
    gmx_enerdata_t *enerd;
2147
    t_graph    *graph;
2148
    real       t,lambda,t0,lam0;
2149
    bool       bNS;
2150
    tensor     vir,pres;
2151
    int        nfmax,count;
2152
    rvec       mu_tot;
2153
    rvec       *fneg,*fpos;
2154
    bool       bSparse; /* use sparse matrix storage format */
2155
    size_t     sz;
2156
    gmx_sparsematrix_t * sparse_matrix = NULL;
2157
    real *     full_matrix             = NULL;
2158
        em_state_t *   state_work;
2159
        
2160
    /* added with respect to mdrun */
2161
    int        idum,jdum,kdum,row,col;
2162
    real       der_range=10.0*sqrt(GMX_REAL_EPS);
2163
    real       fnorm,fmax;
2164
    real       dfdx;
2165
    
2166
    state_work = init_em_state();
2167
    
2168
    /* Init em and store the local state in state_minimum */
2169
    init_em(fplog,NM,cr,inputrec,
2170
            state_global,top_global,state_work,&top,
2171
            f,&buf,&f_global,
2172
            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr,
2173
            nfile,fnm,NULL,&fp_ene,&mdebin);
2174
    
2175
    snew(fneg,top_global->natoms);
2176
    snew(fpos,top_global->natoms);
2177
    
2178
#ifndef GMX_DOUBLE
2179
    fprintf(stderr,
2180
            "NOTE: This version of Gromacs has been compiled in single precision,\n"
2181
            "      which MIGHT not be accurate enough for normal mode analysis.\n"
2182
            "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2183
            "      are fairly modest even if you recompile in double precision.\n\n");
2184
#endif
2185
    
2186
    /* Check if we can/should use sparse storage format.
2187
     *
2188
     * Sparse format is only useful when the Hessian itself is sparse, which it
2189
      * will be when we use a cutoff.    
2190
      * For small systems (n<1000) it is easier to always use full matrix format, though.
2191
      */
2192
    if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2193
    {
2194
        fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2195
        bSparse = FALSE;
2196
    }
2197
    else if(top_global->natoms < 1000)
2198
    {
2199
        fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2200
        bSparse = FALSE;
2201
    }
2202
    else
2203
    {
2204
        fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2205
        bSparse = TRUE;
2206
    }
2207
    
2208
    sz = DIM*top_global->natoms;
2209
    
2210
    fprintf(stderr,"Allocating Hessian memory...\n\n");
2211

    
2212
    if(bSparse)
2213
    {
2214
        sparse_matrix=gmx_sparsematrix_init(sz);
2215
        sparse_matrix->compressed_symmetric = TRUE;
2216
    }
2217
    else
2218
    {
2219
        snew(full_matrix,sz*sz);
2220
    }
2221
    
2222
    /* Initial values */
2223
    t0           = inputrec->init_t;
2224
    lam0         = inputrec->init_lambda;
2225
    t            = t0;
2226
    lambda       = lam0;
2227
    
2228
    init_nrnb(nrnb);
2229
    
2230
    where();
2231
    
2232
    /* Write start time and temperature */
2233
    start_t=print_date_and_time(fplog,cr->nodeid,"Started nmrun");
2234
    wallcycle_start(wcycle,ewcRUN);
2235
    if (MASTER(cr)) 
2236
    {
2237
        fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",*(top_global->name),
2238
                top_global->natoms);
2239
    }
2240
    
2241
    count = 0;
2242
    evaluate_energy(fplog,bVerbose,cr,
2243
                    state_global,top_global,state_work,&buf,top,
2244
                    inputrec,nrnb,wcycle,
2245
                    vsite,constr,fcd,graph,mdatoms,fr,
2246
                    mu_tot,enerd,vir,pres,count,count==0);
2247
    count++;
2248

    
2249
    /* if forces are not small, warn user */
2250
    get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2251

    
2252
    fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2253
    if (state_work->fmax > 1.0e-3) 
2254
    {
2255
        fprintf(stderr,"Maximum force probably not small enough to");
2256
        fprintf(stderr," ensure that you are in an \nenergy well. ");
2257
        fprintf(stderr,"Be aware that negative eigenvalues may occur");
2258
        fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2259
    }
2260
    
2261
    /***********************************************************
2262
     *
2263
     *      Loop over all pairs in matrix 
2264
     * 
2265
     *      do_force called twice. Once with positive and 
2266
     *      once with negative displacement 
2267
     *
2268
     ************************************************************/
2269

    
2270
    /* fudge nr of steps to nr of atoms */
2271
    
2272
    inputrec->nsteps = top_global->natoms;
2273

    
2274
    for (step=0; (step<inputrec->nsteps); step++) 
2275
    {
2276
        
2277
        for (idum=0; (idum<DIM); idum++) 
2278
        {
2279
          row = DIM*step+idum;
2280
          
2281
          state_work->s.x[step][idum] -= der_range;
2282
          
2283
          evaluate_energy(fplog,bVerbose,cr,
2284
                          state_global,top_global,state_work,&buf,top,
2285
                          inputrec,nrnb,wcycle,
2286
                          vsite,constr,fcd,graph,mdatoms,fr,
2287
                          mu_tot,enerd,vir,pres,count,count==0);
2288
          count++;
2289
                        
2290
          for ( i=0 ; i < top_global->natoms ; i++ )
2291
            {
2292
              copy_rvec ( state_work->f[i] , fneg[i] );
2293
            }
2294
          
2295
          state_work->s.x[step][idum] += 2*der_range;
2296
          
2297
          evaluate_energy(fplog,bVerbose,cr,
2298
                          state_global,top_global,state_work,&buf,top,
2299
                          inputrec,nrnb,wcycle,
2300
                          vsite,constr,fcd,graph,mdatoms,fr,
2301
                          mu_tot,enerd,vir,pres,count,count==0);
2302
          count++;
2303
          
2304
          for ( i=0 ; i < top_global->natoms ; i++ )
2305
            {
2306
              copy_rvec ( state_work->f[i] , fpos[i] );
2307
            }
2308
          
2309
          for (jdum=0; (jdum<top_global->natoms); jdum++) 
2310
            {
2311
                for (kdum=0; (kdum<DIM); kdum++) 
2312
                {
2313
                    dfdx=-(fpos[jdum][kdum]-fneg[jdum][kdum])/(2*der_range);
2314
                    col = DIM*jdum+kdum;
2315
                    
2316
                    if(bSparse)
2317
                    {
2318
                        if(col>=row && dfdx!=0.0)
2319
                            gmx_sparsematrix_increment_value(sparse_matrix,row,col,dfdx);
2320
                    }
2321
                    else
2322
                    {
2323
                        full_matrix[row*sz+col] = dfdx;
2324
                    }
2325
                }
2326
            }
2327
            
2328
            /* x is restored to original */
2329
                        state_work->s.x[step][idum] -= der_range;
2330
            
2331
            if (bVerbose && fplog)
2332
                fflush(fplog);            
2333
        }
2334
        /* write progress */
2335
        if (MASTER(cr) && bVerbose) 
2336
        {
2337
            fprintf(stderr,"\rFinished step %d out of %d",
2338
                    step+1,top_global->natoms); 
2339
            fflush(stderr);
2340
        }
2341
    }
2342
    t=t0+step*inputrec->delta_t;
2343
    lambda=lam0+step*inputrec->delta_lambda;
2344
    
2345
    if (MASTER(cr)) 
2346
    {
2347
        print_ebin(-1,FALSE,FALSE,FALSE,fplog,step,step,t,eprAVER,
2348
                   FALSE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2349
        print_ebin(-1,FALSE,FALSE,FALSE,fplog,step,step,t,eprRMS,
2350
                   FALSE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2351
    }
2352
      
2353
    fprintf(stderr,"\n\nWriting Hessian...\n");
2354
    gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2355

    
2356
    *nsteps_done = step;
2357
    
2358
    return start_t;
2359
}
2360

    
2361

    
2362
static void global_max(t_commrec *cr,int *n)
2363
{
2364
  int *sum,i;
2365

    
2366
  snew(sum,cr->nnodes);
2367
  sum[cr->nodeid] = *n;
2368
  gmx_sumi(cr->nnodes,sum,cr);
2369
  for(i=0; i<cr->nnodes; i++)
2370
    *n = max(*n,sum[i]);
2371
  
2372
  sfree(sum);
2373
}
2374

    
2375
static void realloc_bins(double **bin,int *nbin,int nbin_new)
2376
{
2377
  int i;
2378

    
2379
  if (nbin_new != *nbin) {
2380
    srenew(*bin,nbin_new);
2381
    for(i=*nbin; i<nbin_new; i++)
2382
      (*bin)[i] = 0;
2383
    *nbin = nbin_new;
2384
  }
2385
}
2386

    
2387
time_t do_tpi(FILE *fplog,t_commrec *cr,
2388
              int nfile,t_filenm fnm[],
2389
              bool bVerbose,bool bCompact,
2390
              gmx_vsite_t *vsite,gmx_constr_t constr,
2391
              int stepout,
2392
              t_inputrec *inputrec,
2393
              gmx_mtop_t *top_global,t_fcdata *fcd,
2394
              t_state *state,rvec f[],
2395
              rvec buf[],t_mdatoms *mdatoms,
2396
              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2397
              gmx_edsam_t ed,
2398
              t_forcerec *fr,
2399
              int repl_ex_nst,int repl_ex_seed,
2400
              real cpt_period,real max_hours,
2401
              unsigned long Flags,
2402
              int *nsteps_done)
2403
{
2404
  const char *TPI="Test Particle Insertion"; 
2405
  gmx_localtop_t *top;
2406
  gmx_groups_t *groups;
2407
  gmx_enerdata_t *enerd;
2408
  real   lambda,t,temp,beta,drmax,epot;
2409
  double embU,sum_embU,*sum_UgembU,V,V_all,VembU_all;
2410
  int    status;
2411
  t_trxframe rerun_fr;
2412
  bool   bDispCorr,bCharge,bRFExcl,bNotLastFrame,bStateChanged,bNS,bOurStep;
2413
  time_t start_t; 
2414
  tensor force_vir,shake_vir,vir,pres;
2415
  int    cg_tp,a_tp0,a_tp1,ngid,gid_tp,nener,e;
2416
  rvec   *x_mol;
2417
  rvec   mu_tot,x_init,dx,x_tp;
2418
  int    nnodes,frame,nsteps,step;
2419
  int    i,start,end;
2420
  static gmx_rng_t tpi_rand;
2421
  FILE   *fp_tpi=NULL;
2422
  char   *ptr,*dump_pdb,**leg,str[STRLEN],str2[STRLEN];
2423
  double dbl,dump_ener;
2424
  bool   bCavity;
2425
  int    nat_cavity=0,d;
2426
  real   *mass_cavity=NULL,mass_tot;
2427
  int    nbin;
2428
  double invbinw,*bin,refvolshift,logV,bUlogV;
2429
  char   *tpid_leg[2]={"direct","reweighted"};
2430

    
2431
  /* Since numerical problems can lead to extreme negative energies
2432
   * when atoms overlap, we need to set a lower limit for beta*U.
2433
   */
2434
  real bU_neg_limit = -50;
2435

    
2436
  /* Since there is no upper limit to the insertion energies,
2437
   * we need to set an upper limit for the distribution output.
2438
   */
2439
  real bU_bin_limit      = 50;
2440
  real bU_logV_bin_limit = bU_bin_limit + 10;
2441

    
2442
  nnodes = cr->nnodes;
2443

    
2444
  top = gmx_mtop_generate_local_top(top_global,inputrec);
2445

    
2446
  groups = &top_global->groups;
2447

    
2448
  bCavity = (inputrec->eI == eiTPIC);
2449
  if (bCavity) {
2450
    ptr = getenv("GMX_TPIC_MASSES");
2451
    if (ptr == NULL) {
2452
      nat_cavity = 1;
2453
    } else {
2454
      /* Read (multiple) masses from env var GMX_TPIC_MASSES,
2455
       * The center of mass of the last atoms is then used for TPIC.
2456
       */
2457
      nat_cavity = 0;
2458
      while (sscanf(ptr,"%lf%n",&dbl,&i) > 0) {
2459
        srenew(mass_cavity,nat_cavity+1);
2460
        mass_cavity[nat_cavity] = dbl;
2461
        fprintf(fplog,"mass[%d] = %f\n",
2462
                nat_cavity+1,mass_cavity[nat_cavity]);
2463
        nat_cavity++;
2464
        ptr += i;
2465
      }
2466
      if (nat_cavity == 0)
2467
        gmx_fatal(FARGS,"Found %d masses in GMX_TPIC_MASSES",nat_cavity);
2468
    }
2469
  }
2470

    
2471
  /*
2472
  init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,
2473
  state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/
2474
  /* We never need full pbc for TPI */
2475
  fr->ePBC = epbcXYZ;
2476
  /* Determine the temperature for the Boltzmann weighting */
2477
  temp = inputrec->opts.ref_t[0];
2478
  if (fplog) {
2479
    for(i=1; (i<inputrec->opts.ngtc); i++) {
2480
      if (inputrec->opts.ref_t[i] != temp) {
2481
        fprintf(fplog,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
2482
        fprintf(stderr,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
2483
      }
2484
    }
2485
    fprintf(fplog,
2486
            "\n  The temperature for test particle insertion is %.3f K\n\n",
2487
            temp);
2488
  }
2489
  beta = 1.0/(BOLTZ*temp);
2490

    
2491
  /* Number of insertions per frame */
2492
  nsteps = inputrec->nsteps; 
2493

    
2494
  /* Use the same neighborlist with more insertions points
2495
   * in a sphere of radius drmax around the initial point
2496
   */
2497
  /* This should be a proper mdp parameter */
2498
  drmax = inputrec->rtpi;
2499

    
2500
  /* An environment variable can be set to dump all configurations
2501
   * to pdb with an insertion energy <= this value.
2502
   */
2503
  dump_pdb = getenv("GMX_TPI_DUMP");
2504
  dump_ener = 0;
2505
  if (dump_pdb)
2506
    sscanf(dump_pdb,"%lf",&dump_ener);
2507

    
2508
  atoms2md(top_global,inputrec,0,NULL,0,top_global->natoms,mdatoms);
2509
  update_mdatoms(mdatoms,inputrec->init_lambda);
2510

    
2511
  snew(enerd,1);
2512
  init_enerdata(fplog,groups->grps[egcENER].nr,enerd);
2513

    
2514
  /* Print to log file  */
2515
  start_t=print_date_and_time(fplog,cr->nodeid,
2516
                              "Started Test Particle Insertion"); 
2517
  wallcycle_start(wcycle,ewcRUN);
2518

    
2519
  /* The last charge group is the group to be inserted */
2520
  cg_tp = top->cgs.nr - 1;
2521
  a_tp0 = top->cgs.index[cg_tp];
2522
  a_tp1 = top->cgs.index[cg_tp+1];
2523
  if (debug)
2524
    fprintf(debug,"TPI cg %d, atoms %d-%d\n",cg_tp,a_tp0,a_tp1);
2525
  if (a_tp1 - a_tp0 > 1 &&
2526
      (inputrec->rlist < inputrec->rcoulomb ||
2527
       inputrec->rlist < inputrec->rvdw))
2528
    gmx_fatal(FARGS,"Can not do TPI for multi-atom molecule with a twin-range cut-off");
2529
  snew(x_mol,a_tp1-a_tp0);
2530

    
2531
  bDispCorr = (inputrec->eDispCorr != edispcNO);
2532
  bCharge = FALSE;
2533
  for(i=a_tp0; i<a_tp1; i++) {
2534
    /* Copy the coordinates of the molecule to be insterted */
2535
    copy_rvec(state->x[i],x_mol[i-a_tp0]);
2536
    /* Check if we need to print electrostatic energies */
2537
    bCharge |= (mdatoms->chargeA[i] != 0 ||
2538
                (mdatoms->chargeB && mdatoms->chargeB[i] != 0));
2539
  }
2540
  bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype!=eelRF_NEC);
2541

    
2542
  calc_cgcm(fplog,cg_tp,cg_tp+1,&(top->cgs),state->x,fr->cg_cm);
2543
  if (bCavity) {
2544
    if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog) {
2545
      fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
2546
      fprintf(stderr,"WARNING: Your TPI molecule is not centered at 0,0,0\n");
2547
    }
2548
  } else {
2549
    /* Center the molecule to be inserted at zero */
2550
     for(i=0; i<a_tp1-a_tp0; i++)
2551
      rvec_dec(x_mol[i],fr->cg_cm[cg_tp]);
2552
  }
2553

    
2554
  if (fplog) {
2555
    fprintf(fplog,"\nWill insert %d atoms %s partial charges\n",
2556
            a_tp1-a_tp0,bCharge ? "with" : "without");
2557
    
2558
    fprintf(fplog,"\nWill insert %d times in each frame of %s\n",
2559
            nsteps,opt2fn("-rerun",nfile,fnm));
2560
  }
2561
  
2562
  if (!bCavity) {
2563
    if (inputrec->nstlist > 1) {
2564
      if (drmax==0 && a_tp1-a_tp0==1) {
2565
        gmx_fatal(FARGS,"Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense",inputrec->nstlist,drmax);
2566
      }
2567
      if (fplog)
2568
        fprintf(fplog,"Will use the same neighborlist for %d insertions in a sphere of radius %f\n",inputrec->nstlist,drmax);
2569
    }
2570
  } else {
2571
    if (fplog)
2572
      fprintf(fplog,"Will insert randomly in a sphere of radius %f around the center of the cavity\n",drmax);
2573
  }
2574

    
2575
  ngid = groups->grps[egcENER].nr;
2576
  gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
2577
  nener = 1 + ngid;
2578
  if (bDispCorr)
2579
    nener += 1;
2580
  if (bCharge) {
2581
    nener += ngid;
2582
    if (bRFExcl)
2583
      nener += 1;
2584
  }
2585
  snew(sum_UgembU,nener);
2586

    
2587
  /* Initialize random generator */
2588
  tpi_rand = gmx_rng_init(inputrec->ld_seed);
2589

    
2590
  if (MASTER(cr)) {
2591
    fp_tpi = xvgropen(opt2fn("-tpi",nfile,fnm),
2592
                      "TPI energies","Time (ps)",
2593
                      "(kJ mol\\S-1\\N) / (nm\\S3\\N)");
2594
    xvgr_subtitle(fp_tpi,"f. are averages over one frame");
2595
    snew(leg,4+nener);
2596
    e = 0;
2597
    sprintf(str,"-kT log(<Ve\\S-\\8b\\4U\\N>/<V>)");
2598
    leg[e++] = strdup(str);
2599
    sprintf(str,"f. -kT log<e\\S-\\8b\\4U\\N>");
2600
    leg[e++] = strdup(str);
2601
    sprintf(str,"f. <e\\S-\\8b\\4U\\N>");
2602
    leg[e++] = strdup(str);
2603
    sprintf(str,"f. V");
2604
    leg[e++] = strdup(str);
2605
    sprintf(str,"f. <Ue\\S-\\8b\\4U\\N>");
2606
    leg[e++] = strdup(str);
2607
    for(i=0; i<ngid; i++) {
2608
      sprintf(str,"f. <U\\sVdW %s\\Ne\\S-\\8b\\4U\\N>",
2609
              *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
2610
      leg[e++] = strdup(str);
2611
    }
2612
    if (bDispCorr) {
2613
      sprintf(str,"f. <U\\sdisp c\\Ne\\S-\\8b\\4U\\N>");
2614
      leg[e++] = strdup(str);
2615
    }
2616
    if (bCharge) {
2617
      for(i=0; i<ngid; i++) {
2618
        sprintf(str,"f. <U\\sCoul %s\\Ne\\S-\\8b\\4U\\N>",
2619
                *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
2620
        leg[e++] = strdup(str);
2621
      }
2622
      if (bRFExcl) {
2623
        sprintf(str,"f. <U\\sRF excl\\Ne\\S-\\8b\\4U\\N>");
2624
        leg[e++] = strdup(str);
2625
      }
2626
    }
2627
    xvgr_legend(fp_tpi,4+nener,leg);
2628
    for(i=0; i<4+nener; i++)
2629
      sfree(leg[i]);
2630
    sfree(leg);
2631
  }
2632
  clear_rvec(x_init);
2633
  V_all = 0;
2634
  VembU_all = 0;
2635

    
2636
  invbinw = 10;
2637
  nbin = 10;
2638
  snew(bin,nbin);
2639

    
2640
  start_time();
2641

    
2642
  bNotLastFrame = read_first_frame(&status,opt2fn("-rerun",nfile,fnm),
2643
                                   &rerun_fr,TRX_NEED_X);
2644
  frame = 0;
2645

    
2646
  if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) !=
2647
      mdatoms->nr - (a_tp1 - a_tp0))
2648
    gmx_fatal(FARGS,"Number of atoms in trajectory (%d)%s "
2649
              "is not equal the number in the run input file (%d) "
2650
              "minus the number of atoms to insert (%d)\n",
2651
              rerun_fr.natoms,bCavity ? " minus one" : "",
2652
              mdatoms->nr,a_tp1-a_tp0);
2653

    
2654
  refvolshift = log(det(rerun_fr.box));
2655
  
2656
  while (bNotLastFrame) {
2657
    lambda = rerun_fr.lambda;
2658
    t = rerun_fr.time;
2659
    
2660
    sum_embU = 0;
2661
    for(e=0; e<nener; e++)
2662
      sum_UgembU[e] = 0;
2663

    
2664
    /* Copy the coordinates from the input trajectory */
2665
    for(i=0; i<rerun_fr.natoms; i++)
2666
      copy_rvec(rerun_fr.x[i],state->x[i]);
2667
      
2668
    V = det(rerun_fr.box);
2669
    logV = log(V);
2670

    
2671
    bStateChanged = TRUE;
2672
    bNS = TRUE;
2673
    for(step=0; step<nsteps; step++) {
2674
      /* In parallel all nodes generate all random configurations.
2675
       * In that way the result is identical to a single cpu tpi run.
2676
       */
2677
      if (!bCavity) {
2678
        /* Random insertion in the whole volume */
2679
        bNS = (step % inputrec->nstlist == 0);
2680
        if (bNS) {
2681
          /* Generate a random position in the box */
2682
          x_init[XX] = gmx_rng_uniform_real(tpi_rand)*state->box[XX][XX];
2683
          x_init[YY] = gmx_rng_uniform_real(tpi_rand)*state->box[YY][YY];
2684
          x_init[ZZ] = gmx_rng_uniform_real(tpi_rand)*state->box[ZZ][ZZ];
2685
        }
2686
        if (inputrec->nstlist == 1) {
2687
          copy_rvec(x_init,x_tp);
2688
        } else {
2689
          /* Generate coordinates within |dx|=drmax of x_init */
2690
          do {
2691
            dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2692
            dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2693
            dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2694
          } while (norm2(dx) > drmax*drmax);
2695
          rvec_add(x_init,dx,x_tp);
2696
        }
2697
      } else {
2698
        /* Random insertion around a cavity location
2699
         * given by the last coordinate of the trajectory.
2700
         */
2701
        if (step == 0) {
2702
          if (nat_cavity == 1) {
2703
            /* Copy the location of the cavity */
2704
            copy_rvec(rerun_fr.x[rerun_fr.natoms-1],x_init);
2705
          } else {
2706
            /* Determine the center of mass of the last molecule */
2707
            clear_rvec(x_init);
2708
            mass_tot = 0;
2709
            for(i=0; i<nat_cavity; i++) {
2710
              for(d=0; d<DIM; d++)
2711
                x_init[d] +=
2712
                  mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d];
2713
              mass_tot += mass_cavity[i];
2714
            }
2715
            for(d=0; d<DIM; d++)
2716
              x_init[d] /= mass_tot;
2717
          }
2718
        }
2719
        /* Generate coordinates within |dx|=drmax of x_init */
2720
        do {
2721
          dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2722
          dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2723
          dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2724
        } while (norm2(dx) > drmax*drmax);
2725
        rvec_add(x_init,dx,x_tp);
2726
      }
2727

    
2728
      if (a_tp1 - a_tp0 == 1) {
2729
        /* Insert a single atom, just copy the insertion location */
2730
        copy_rvec(x_tp,state->x[a_tp0]);
2731
      } else {
2732
        /* Copy the coordinates from the top file */
2733
        for(i=a_tp0; i<a_tp1; i++)
2734
          copy_rvec(x_mol[i-a_tp0],state->x[i]);
2735
        /* Rotate the molecule randomly */
2736
        rotate_conf(a_tp1-a_tp0,state->x+a_tp0,NULL,
2737
                    2*M_PI*gmx_rng_uniform_real(tpi_rand),
2738
                    2*M_PI*gmx_rng_uniform_real(tpi_rand),
2739
                    2*M_PI*gmx_rng_uniform_real(tpi_rand));
2740
        /* Shift to the insertion location */
2741
        for(i=a_tp0; i<a_tp1; i++)
2742
          rvec_inc(state->x[i],x_tp);
2743
      }
2744

    
2745
      /* Check if this insertion belongs to this node */
2746
      bOurStep = TRUE;
2747
      if (PAR(cr)) {
2748
        switch (inputrec->eI) {
2749
        case eiTPI:
2750
          bOurStep = ((step / inputrec->nstlist) % nnodes == cr->nodeid);
2751
          break;
2752
        case eiTPIC:
2753
          bOurStep = (step % nnodes == cr->nodeid);
2754
          break;
2755
        default:
2756
          gmx_fatal(FARGS,"Unknown integrator %s",ei_names[inputrec->eI]);
2757
        }
2758
      }
2759
      if (bOurStep) {
2760
        /* Clear some matrix variables  */
2761
        clear_mat(force_vir); 
2762
        clear_mat(shake_vir);
2763
        clear_mat(vir);
2764
        clear_mat(pres);
2765
        
2766
        /* Set the charge group center of mass of the test particle */
2767
        copy_rvec(x_init,fr->cg_cm[top->cgs.nr-1]);
2768

    
2769
        /* Calc energy (no forces) on new positions.
2770
         * Since we only need the intermolecular energy
2771
         * and the RF exclusion terms of the inserted molecule occur
2772
         * within a single charge group we can pass NULL for the graph.
2773
         * This also avoids shifts that would move charge groups
2774
         * out of the box.
2775
         */
2776
        /* Make do_force do a single node fore calculation */
2777
        cr->nnodes = 1;
2778
        do_force(fplog,cr,inputrec,
2779
                 step,nrnb,wcycle,top,&top_global->groups,
2780
                 rerun_fr.box,state->x,&state->hist,
2781
                 f,buf,force_vir,mdatoms,enerd,fcd,
2782
                 lambda,NULL,fr,NULL,mu_tot,t,NULL,NULL,
2783
                 GMX_FORCE_NONBONDED |
2784
                 (bNS ? GMX_FORCE_NS : 0) |
2785
                 (bStateChanged ? GMX_FORCE_STATECHANGED : 0)); 
2786
        cr->nnodes = nnodes;
2787
        bStateChanged = FALSE;
2788
        bNS = FALSE;
2789

    
2790
        /* Calculate long range corrections to pressure and energy */
2791
        calc_dispcorr(fplog,inputrec,fr,step,mdatoms->nr,rerun_fr.box,lambda,
2792
                      pres,vir,enerd->term);
2793
        
2794
        /* If the compiler doesn't optimize this check away
2795
         * we catch the NAN energies.
2796
         * With tables extreme negative energies might occur close to r=0.
2797
         */
2798
        epot = enerd->term[F_EPOT];
2799
        if (epot != epot || epot*beta < bU_neg_limit) {
2800
          if (debug)
2801
            fprintf(debug,"\n  time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n",t,step,epot);
2802
          embU = 0;
2803
        } else {
2804
          embU = exp(-beta*epot);
2805
          sum_embU += embU;
2806
          /* Determine the weighted energy contributions of each energy group */
2807
          e = 0;
2808
          sum_UgembU[e++] += epot*embU;
2809
          if (fr->bBHAM) {
2810
            for(i=0; i<ngid; i++)
2811
              sum_UgembU[e++] +=
2812
                (enerd->grpp.ener[egBHAMSR][GID(i,gid_tp,ngid)] +
2813
                 enerd->grpp.ener[egBHAMLR][GID(i,gid_tp,ngid)])*embU;
2814
          } else {
2815
            for(i=0; i<ngid; i++)
2816
              sum_UgembU[e++] +=
2817
                (enerd->grpp.ener[egLJSR][GID(i,gid_tp,ngid)] +
2818
                 enerd->grpp.ener[egLJLR][GID(i,gid_tp,ngid)])*embU;
2819
          }
2820
          if (bDispCorr)
2821
            sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU;
2822
          if (bCharge) {
2823
            for(i=0; i<ngid; i++)
2824
              sum_UgembU[e++] +=
2825
                (enerd->grpp.ener[egCOULSR][GID(i,gid_tp,ngid)] +
2826
                 enerd->grpp.ener[egCOULLR][GID(i,gid_tp,ngid)])*embU;
2827
            if (bRFExcl)
2828
              sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
2829
          }
2830
        }
2831
        
2832
        if (embU == 0 || beta*epot > bU_bin_limit) {
2833
          bin[0]++;
2834
        } else {
2835
          i = (int)((bU_logV_bin_limit
2836
                     - (beta*epot - logV + refvolshift))*invbinw
2837
                    + 0.5);
2838
          if (i < 0)
2839
            i = 0;
2840
          if (i >= nbin)
2841
            realloc_bins(&bin,&nbin,i+10);
2842
          bin[i]++;
2843
        }
2844

    
2845
        if (debug)
2846
          fprintf(debug,"TPI %7d %12.5e %12.5f %12.5f %12.5f\n",
2847
                  step,epot,x_tp[XX],x_tp[YY],x_tp[ZZ]);
2848

    
2849
        if (dump_pdb && epot <= dump_ener) {
2850
          sprintf(str,"t%g_step%d.pdb",t,step);
2851
          sprintf(str2,"t: %f step %d ener: %f",t,step,epot);
2852
          write_sto_conf_mtop(str,str2,top_global,state->x,state->v,
2853
                              inputrec->ePBC,state->box);
2854
        }
2855
      }
2856
    }
2857

    
2858
    if (PAR(cr)) {
2859
      /* When running in parallel sum the energies over the processes */
2860
      gmx_sumd(1,    &sum_embU, cr);
2861
      gmx_sumd(nener,sum_UgembU,cr);
2862
    }
2863

    
2864
    frame++;
2865
    V_all += V;
2866
    VembU_all += V*sum_embU/nsteps;
2867

    
2868
    if (fp_tpi) {
2869
      if (bVerbose || frame%10==0 || frame<10)
2870
        fprintf(stderr,"mu %10.3e <mu> %10.3e\n",
2871
                -log(sum_embU/nsteps)/beta,-log(VembU_all/V_all)/beta);
2872
      
2873
      fprintf(fp_tpi,"%10.3f %12.5e %12.5e %12.5e %12.5e",
2874
              t,
2875
              VembU_all==0 ? 20/beta : -log(VembU_all/V_all)/beta,
2876
              sum_embU==0  ? 20/beta : -log(sum_embU/nsteps)/beta,
2877
              sum_embU/nsteps,V);
2878
      for(e=0; e<nener; e++)
2879
        fprintf(fp_tpi," %12.5e",sum_UgembU[e]/nsteps);
2880
      fprintf(fp_tpi,"\n");
2881
      fflush(fp_tpi);
2882
    }
2883

    
2884
    bNotLastFrame = read_next_frame(status,&rerun_fr);
2885
  } /* End of the loop  */
2886

    
2887
  update_time();
2888

    
2889
  close_trj(status);
2890

    
2891
  if (fp_tpi)
2892
    gmx_fio_fclose(fp_tpi);
2893

    
2894
  if (fplog) {
2895
    fprintf(fplog,"\n");
2896
    fprintf(fplog,"  <V>  = %12.5e nm^3\n",V_all/frame);
2897
    fprintf(fplog,"  <mu> = %12.5e kJ/mol\n",-log(VembU_all/V_all)/beta);
2898
  }
2899
  
2900
  /* Write the Boltzmann factor histogram */
2901
  if (PAR(cr)) {
2902
    /* When running in parallel sum the bins over the processes */
2903
    i = nbin;
2904
    global_max(cr,&i);
2905
    realloc_bins(&bin,&nbin,i);
2906
    gmx_sumd(nbin,bin,cr);
2907
  }
2908
  fp_tpi = xvgropen(opt2fn("-tpid",nfile,fnm),
2909
                    "TPI energy distribution",
2910
                    "\\8b\\4U - log(V/<V>)","count");
2911
  sprintf(str,"number \\8b\\4U > %g: %9.3e",bU_bin_limit,bin[0]);
2912
  xvgr_subtitle(fp_tpi,str);
2913
  xvgr_legend(fp_tpi,2,tpid_leg);
2914
  for(i=nbin-1; i>0; i--) {
2915
    bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame);
2916
    fprintf(fp_tpi,"%6.2f %10d %12.5e\n",
2917
            bUlogV,
2918
            (int)(bin[i]+0.5),
2919
            bin[i]*exp(-bUlogV)*V_all/VembU_all);
2920
  }
2921
  gmx_fio_fclose(fp_tpi);
2922
  sfree(bin);
2923

    
2924
  sfree(sum_UgembU);
2925

    
2926
  *nsteps_done = frame*inputrec->nsteps;
2927
  
2928
  return start_t;
2929
}