Project

General

Profile

minimize_r1.131.2.3.c

fixed? src/mdlib/minimize.c - Berk Hess, 12/12/2008 04:35 PM

 
1
/*
2
 * $Id: minimize.c,v 1.131.2.3 2008/11/21 20:23:53 hess Exp $
3
 * 
4
 *                This source code is part of
5
 * 
6
 *                 G   R   O   M   A   C   S
7
 * 
8
 *          GROningen MAchine for Chemical Simulations
9
 * 
10
 *                        VERSION 3.2.0
11
 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12
 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13
 * Copyright (c) 2001-2004, The GROMACS development team,
14
 * check out http://www.gromacs.org for more information.
15

16
 * This program is free software; you can redistribute it and/or
17
 * modify it under the terms of the GNU General Public License
18
 * as published by the Free Software Foundation; either version 2
19
 * of the License, or (at your option) any later version.
20
 * 
21
 * If you want to redistribute modifications, please consider that
22
 * scientific software is very special. Version control is crucial -
23
 * bugs must be traceable. We will be happy to consider code for
24
 * inclusion in the official distribution, but derived work must not
25
 * be called official GROMACS. Details are found in the README & COPYING
26
 * files - if they are missing, get the official version at www.gromacs.org.
27
 * 
28
 * To help us fund GROMACS development, we humbly ask that you cite
29
 * the papers on the package - you can find them in the top README file.
30
 * 
31
 * For more info, check our website at http://www.gromacs.org
32
 * 
33
 * And Hey:
34
 * GROwing Monsters And Cloning Shrimps
35
 */
36
#ifdef HAVE_CONFIG_H
37
#include <config.h>
38
#endif
39

    
40
#include <string.h>
41
#include <time.h>
42
#include <math.h>
43
#include "sysstuff.h"
44
#include "string2.h"
45
#include "network.h"
46
#include "confio.h"
47
#include "copyrite.h"
48
#include "smalloc.h"
49
#include "nrnb.h"
50
#include "main.h"
51
#include "pbc.h"
52
#include "force.h"
53
#include "macros.h"
54
#include "random.h"
55
#include "names.h"
56
#include "gmx_fatal.h"
57
#include "txtdump.h"
58
#include "typedefs.h"
59
#include "update.h"
60
#include "random.h"
61
#include "constr.h"
62
#include "vec.h"
63
#include "statutil.h"
64
#include "tgroup.h"
65
#include "mdebin.h"
66
#include "vsite.h"
67
#include "force.h"
68
#include "mdrun.h"
69
#include "domdec.h"
70
#include "partdec.h"
71
#include "trnio.h"
72
#include "sparsematrix.h"
73
#include "mtxio.h"
74
#include "gmx_random.h"
75
#include "physics.h"
76
#include "xvgr.h"
77
#include "mdatoms.h"
78
#include "gbutil.h"
79
#include "ns.h"
80
#include "gmx_wallcycle.h"
81
#include "mtop_util.h"
82
#include "gmxfio.h"
83
#include "pme.h"
84

    
85
typedef struct {
86
  t_state s;
87
  rvec    *f;
88
  real    epot;
89
  real    fnorm;
90
  real    fmax;
91
  int     a_fmax;
92
} em_state_t;
93

    
94
static em_state_t *init_em_state()
95
{
96
  em_state_t *ems;
97
  
98
  snew(ems,1);
99

    
100
  return ems;
101
}
102

    
103
static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
104
{
105
  fprintf(out,"%s:\n",minimizer);
106
  fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
107
  fprintf(out,"   Number of steps    = %12d\n",nsteps);
108
}
109

    
110
static void warn_step(FILE *fp,real ftol,bool bConstrain)
111
{
112
  fprintf(fp,"\nStepsize too small, or no change in energy.\n"
113
          "Converged to machine precision,\n"
114
          "but not to the requested precision Fmax < %g\n",
115
          ftol);
116
  if (sizeof(real)<sizeof(double))
117
      fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
118
        
119
  if (bConstrain)
120
    fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
121
            "off constraints alltogether (set constraints = none in mdp file)\n");
122
}
123

    
124

    
125

    
126
static void print_converged(FILE *fp,const char *alg,real ftol,int count,bool bDone,
127
                            int nsteps,real epot,real fmax, int nfmax, real fnorm)
128
{
129
  if (bDone)
130
    fprintf(fp,"\n%s converged to Fmax < %g in %d steps\n",alg,ftol,count); 
131
  else if(count<nsteps)
132
    fprintf(fp,"\n%s converged to machine precision in %d steps,\n"
133
               "but did not reach the requested Fmax < %g.\n",alg,count,ftol);
134
  else 
135
    fprintf(fp,"\n%s did not converge to Fmax < %g in %d steps.\n",alg,ftol,count);
136

    
137
#ifdef GMX_DOUBLE
138
  fprintf(fp,"Potential Energy  = %21.14e\n",epot); 
139
  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1); 
140
  fprintf(fp,"Norm of force     = %21.14e\n",fnorm); 
141
#else
142
  fprintf(fp,"Potential Energy  = %14.7e\n",epot); 
143
  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1); 
144
  fprintf(fp,"Norm of force     = %14.7e\n",fnorm); 
145
#endif
146
}
147

    
148
static void get_f_norm_max(t_commrec *cr,
149
                           t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
150
                           real *fnorm,real *fmax,int *a_fmax)
151
{
152
  double fnorm2,*sum;
153
  real fmax2,fmax2_0,fam;
154
  int  la_max,a_max,start,end,i,m,gf;
155

    
156
  /* This routine finds the largest force and returns it.
157
   * On parallel machines the global max is taken.
158
   */
159
  fnorm2 = 0;
160
  fmax2 = 0;
161
  la_max = -1;
162
  gf = 0;
163
  start = mdatoms->start;
164
  end   = mdatoms->homenr + start;
165
  if (mdatoms->cFREEZE) {
166
    for(i=start; i<end; i++) {
167
      gf = mdatoms->cFREEZE[i];
168
      fam = 0;
169
      for(m=0; m<DIM; m++)
170
        if (!opts->nFreeze[gf][m])
171
          fam += sqr(f[i][m]);
172
      fnorm2 += fam;
173
      if (fam > fmax2) {
174
        fmax2  = fam;
175
        la_max = i;
176
      }
177
    }
178
  } else {
179
    for(i=start; i<end; i++) {
180
      fam = norm2(f[i]);
181
      fnorm2 += fam;
182
      if (fam > fmax2) {
183
        fmax2  = fam;
184
        la_max = i;
185
      }
186
    }
187
  }
188

    
189
  if (la_max >= 0 && DOMAINDECOMP(cr)) {
190
    a_max = cr->dd->gatindex[la_max];
191
  } else {
192
    a_max = la_max;
193
  }
194
  if (PAR(cr)) {
195
    snew(sum,2*cr->nnodes+1);
196
    sum[2*cr->nodeid]   = fmax2;
197
    sum[2*cr->nodeid+1] = a_max;
198
    sum[2*cr->nnodes]   = fnorm2;
199
    gmx_sumd(2*cr->nnodes+1,sum,cr);
200
    fnorm2 = sum[2*cr->nnodes];
201
    /* Determine the global maximum */
202
    for(i=0; i<cr->nnodes; i++) {
203
      if (sum[2*i] > fmax2) {
204
        fmax2 = sum[2*i];
205
        a_max = (int)(sum[2*i+1] + 0.5);
206
      }
207
    }
208
    sfree(sum);
209
  }
210

    
211
  if (fnorm)
212
    *fnorm = sqrt(fnorm2);
213
  if (fmax)
214
    *fmax  = sqrt(fmax2);
215
  if (a_fmax)
216
    *a_fmax = a_max;
217
}
218

    
219
static void get_state_f_norm_max(t_commrec *cr,
220
                           t_grpopts *opts,t_mdatoms *mdatoms,
221
                           em_state_t *ems)
222
{
223
  get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
224
}
225

    
226
void init_em(FILE *fplog,const char *title,
227
             t_commrec *cr,t_inputrec *ir,
228
             t_state *state_global,gmx_mtop_t *top_global,
229
             em_state_t *ems,gmx_localtop_t **top,
230
             rvec *f,rvec **buf,rvec **f_global,
231
             t_nrnb *nrnb,rvec mu_tot,
232
             t_forcerec *fr,gmx_enerdata_t **enerd,
233
             t_graph **graph,t_mdatoms *mdatoms,
234
             gmx_vsite_t *vsite,gmx_constr_t constr,
235
             int nfile,t_filenm fnm[],int *fp_trn,int *fp_ene,
236
             t_mdebin **mdebin)
237
{
238
  int  start,homenr,i;
239
  real dvdlambda;
240

    
241
  if (fplog)
242
    fprintf(fplog,"Initiating %s\n",title);
243

    
244
  state_global->ngtc = 0;
245
  if (ir->eI == eiCG) {
246
    state_global->flags |= (1<<estCGP);
247
    snew(state_global->cg_p,state_global->nalloc);
248
  }
249

    
250
  /* Initiate some variables */
251
  if (ir->efep != efepNO)
252
    state_global->lambda = ir->init_lambda;
253
  else 
254
    state_global->lambda = 0.0;
255

    
256
  init_nrnb(nrnb);
257

    
258
  if (DOMAINDECOMP(cr)) {
259
    *top = dd_init_local_top(top_global);
260

    
261
    dd_init_local_state(cr->dd,state_global,&ems->s);
262

    
263
    /* Distribute the charge groups over the nodes from the master node */
264
    dd_partition_system(fplog,ir->init_step,cr,TRUE,
265
                        state_global,top_global,ir,
266
                        &ems->s,&ems->f,buf,mdatoms,*top,
267
                        fr,vsite,NULL,constr,
268
                        nrnb,NULL,FALSE);
269
    dd_store_state(cr->dd,&ems->s);
270
    
271
    if (ir->nstfout) {
272
      snew(*f_global,top_global->natoms);
273
    } else {
274
      *f_global = NULL;
275
    }
276
    *graph = NULL;
277
  } else {
278
    /* Just copy the state */
279
    ems->s = *state_global;
280
    snew(ems->s.x,ems->s.nalloc);
281
    snew(ems->f,ems->s.nalloc);
282
    for(i=0; i<state_global->natoms; i++)
283
      copy_rvec(state_global->x[i],ems->s.x[i]);
284
    copy_mat(state_global->box,ems->s.box);
285

    
286
    if (PAR(cr)) {
287
      /* Initialize the particle decomposition and split the topology */
288
      *top = split_system(fplog,top_global,ir,cr);
289
      
290
      pd_cg_range(cr,&fr->cg0,&fr->hcg);
291
    } else {
292
      *top = gmx_mtop_generate_local_top(top_global,ir);
293
    }
294
    *f_global = f;
295

    
296
    if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
297
      *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
298
    } else {
299
      *graph = NULL;
300
    }
301
  }
302

    
303
  clear_rvec(mu_tot);
304
  calc_shifts(ems->s.box,fr->shift_vec);
305

    
306
  if (PARTDECOMP(cr)) {
307
    pd_at_range(cr,&start,&homenr);
308
    homenr -= start;
309
  } else {
310
    start  = 0;
311
    homenr = top_global->natoms;
312
  }
313
  atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
314
  update_mdatoms(mdatoms,state_global->lambda);
315

    
316
  if (vsite && !DOMAINDECOMP(cr)) {
317
    set_vsite_top(vsite,*top,mdatoms,cr);
318
  }
319

    
320
  if (constr) {
321
    if (ir->eConstrAlg == econtSHAKE &&
322
        gmx_mtop_ftype_count(top_global,F_CONSTR) > 0) {
323
      gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
324
                econstr_names[econtSHAKE],econstr_names[econtLINCS]);
325
    }
326

    
327
    if (!DOMAINDECOMP(cr))
328
      set_constraints(constr,*top,ir,mdatoms,NULL);
329

    
330
    /* Constrain the starting coordinates */
331
    dvdlambda=0;
332
    constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
333
              ir,cr,-1,0,mdatoms,
334
              ems->s.x,ems->s.x,NULL,ems->s.box,ems->s.lambda,&dvdlambda,
335
              NULL,NULL,nrnb,econqCoord);
336
  }
337

    
338
  if (MASTER(cr)) {
339
    if (fp_trn)
340
      *fp_trn = open_trn(ftp2fn(efTRN,nfile,fnm),"w");
341
    if (fp_ene)
342
      *fp_ene = open_enx(ftp2fn(efENX,nfile,fnm),"w");
343
  } else {
344
    if (fp_trn)
345
      *fp_trn = -1;
346
    if (fp_ene)
347
      *fp_ene = -1;
348
  }
349

    
350
  snew(*enerd,1);
351
  init_enerdata(fplog,top_global->groups.grps[egcENER].nr,*enerd);
352

    
353
  /* Init bin for energy stuff */
354
  *mdebin = init_mdebin(*fp_ene,top_global,ir); 
355
}
356

    
357
static void finish_em(FILE *fplog,t_commrec *cr,
358
                      int fp_traj,int fp_ene)
359
{
360
  if (!(cr->duty & DUTY_PME)) {
361
    /* Tell the PME only node to finish */
362
    gmx_pme_finish(cr);
363
  }
364

    
365
  if (MASTER(cr)) {
366
    close_trn(fp_traj);
367
    close_enx(fp_ene);
368
  }
369
}
370

    
371
static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
372
{
373
  em_state_t tmp;
374

    
375
  tmp   = *ems1;
376
  *ems1 = *ems2;
377
  *ems2 = tmp;
378
}
379

    
380
static void copy_em_coords_back(em_state_t *ems,t_state *state)
381
{
382
  int i;
383

    
384
  for(i=0; (i<state->natoms); i++)
385
    copy_rvec(ems->s.x[i],state->x[i]);
386
}
387

    
388
static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
389
                       em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
390
                       gmx_constr_t constr,gmx_localtop_t *top,
391
                       t_nrnb *nrnb,gmx_wallcycle_t wcycle,
392
                       int count)
393

    
394
{
395
  t_state *s1,*s2;
396
  int  start,end,gf,i,m;
397
  rvec *x1,*x2;
398
  real dvdlambda;
399

    
400
  s1 = &ems1->s;
401
  s2 = &ems2->s;
402

    
403
  if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
404
    gmx_incons("state mismatch in do_em_step");
405

    
406
  s2->flags = s1->flags;
407

    
408
  if (s2->nalloc != s1->nalloc) {
409
    s2->nalloc = s1->nalloc;
410
    srenew(s2->x,s1->nalloc);
411
    srenew(ems2->f,  s1->nalloc);
412
    if (s2->flags & (1<<estCGP))
413
      srenew(s2->cg_p,  s1->nalloc);
414
  }
415
  
416
  s2->natoms = s1->natoms;
417
  s2->lambda = s1->lambda;
418
  copy_mat(s1->box,s2->box);
419

    
420
  start = md->start;
421
  end   = md->start + md->homenr;
422

    
423
  x1 = s1->x;
424
  x2 = s2->x;
425
  gf = 0;
426
  for(i=start; i<end; i++) {
427
    if (md->cFREEZE)
428
      gf = md->cFREEZE[i];
429
    for(m=0; m<DIM; m++) {
430
      if (ir->opts.nFreeze[gf][m])
431
        x2[i][m] = x1[i][m];
432
      else
433
        x2[i][m] = x1[i][m] + a*f[i][m];
434
    }
435
  }
436

    
437
  if (s2->flags & (1<<estCGP)) {
438
    /* Copy the CG p vector */
439
    x1 = s1->cg_p;
440
    x2 = s2->cg_p;
441
    for(i=start; i<end; i++)
442
      copy_rvec(x1[i],x2[i]);
443
  }
444

    
445
  if (DOMAINDECOMP(cr)) {
446
    s2->ddp_count = s1->ddp_count;
447
    if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
448
      s2->cg_gl_nalloc = s1->cg_gl_nalloc;
449
      srenew(s2->cg_gl,s2->cg_gl_nalloc);
450
    }
451
    s2->ncg_gl = s1->ncg_gl;
452
    for(i=0; i<s2->ncg_gl; i++)
453
      s2->cg_gl[i] = s1->cg_gl[i];
454
    s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
455
  }
456

    
457
  if (constr) {
458
    wallcycle_start(wcycle,ewcCONSTR);
459
    dvdlambda = 0;
460
    constrain(NULL,TRUE,TRUE,constr,&top->idef,        
461
              ir,cr,count,0,md,
462
              s1->x,s2->x,NULL,s2->box,s2->lambda,
463
              &dvdlambda,NULL,NULL,nrnb,econqCoord);
464
    wallcycle_stop(wcycle,ewcCONSTR);
465
  }
466
}
467

    
468
static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
469

    
470
{
471
  int  start,end,i,m;
472

    
473
  if (DOMAINDECOMP(cr)) {
474
    start = 0;
475
    end   = cr->dd->nat_home;
476
  } else if (PARTDECOMP(cr)) {
477
    pd_at_range(cr,&start,&end);
478
  } else {
479
    start = 0;
480
    end   = n;
481
  }
482

    
483
  for(i=start; i<end; i++) {
484
    for(m=0; m<DIM; m++) {
485
      x2[i][m] = x1[i][m] + a*f[i][m];
486
    }
487
  }
488
}
489

    
490
static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
491

    
492
{
493
  int  start,end,i,m;
494

    
495
  if (DOMAINDECOMP(cr)) {
496
    start = 0;
497
    end   = cr->dd->nat_home;
498
  } else if (PARTDECOMP(cr)) {
499
    pd_at_range(cr,&start,&end);
500
  } else {
501
    start = 0;
502
    end   = n;
503
  }
504

    
505
  for(i=start; i<end; i++) {
506
    for(m=0; m<DIM; m++) {
507
      f[i][m] = (x1[i][m] - x2[i][m])*a;
508
    }
509
  }
510
}
511

    
512
static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
513
                                   gmx_mtop_t *top_global,t_inputrec *ir,
514
                                   em_state_t *ems,rvec **buf,gmx_localtop_t *top,
515
                                   t_mdatoms *mdatoms,t_forcerec *fr,
516
                                   gmx_vsite_t *vsite,gmx_constr_t constr,
517
                                   t_nrnb *nrnb,gmx_wallcycle_t wcycle)
518
{
519
  /* Repartition the domain decomposition */
520
  wallcycle_start(wcycle,ewcDOMDEC);
521
  dd_partition_system(fplog,step,cr,FALSE,
522
                      NULL,top_global,ir,
523
                      &ems->s,&ems->f,buf,
524
                      mdatoms,top,fr,vsite,NULL,constr,
525
                      nrnb,wcycle,FALSE);
526
  dd_store_state(cr->dd,&ems->s);
527
  wallcycle_stop(wcycle,ewcDOMDEC);
528
}
529
    
530
static void evaluate_energy(FILE *fplog,bool bVerbose,t_commrec *cr,
531
                            t_state *state_global,gmx_mtop_t *top_global,
532
                            em_state_t *ems,rvec **buf,gmx_localtop_t *top,
533
                            t_inputrec *inputrec,
534
                            t_nrnb *nrnb,gmx_wallcycle_t wcycle,
535
                            gmx_vsite_t *vsite,gmx_constr_t constr,
536
                            t_fcdata *fcd,
537
                            t_graph *graph,t_mdatoms *mdatoms,
538
                            t_forcerec *fr, rvec mu_tot,
539
                            gmx_enerdata_t *enerd,tensor vir,tensor pres,
540
                            int count,bool bFirst)
541
{
542
  real t;
543
  bool bNS;
544
  int  nabnsb;
545
  tensor force_vir,shake_vir,ekin;
546
  real dvdl;
547
  real terminate=0;
548
  t_state *state;
549
  
550
  /* Set the time to the initial time, the time does not change during EM */
551
  t = inputrec->init_t;
552

    
553
  if (bFirst ||
554
      (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
555
    /* This the first state or an old state used before the last ns */
556
    bNS = TRUE;
557
  } else {
558
    bNS = FALSE;
559
    if (inputrec->nstlist > 0) {
560
      bNS = TRUE;
561
    } else if (inputrec->nstlist == -1) {
562
      nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
563
      if (PAR(cr))
564
        gmx_sumi(1,&nabnsb,cr);
565
      bNS = (nabnsb > 0);
566
    }
567
  }
568

    
569
  if (vsite)
570
    construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
571
                     top->idef.iparams,top->idef.il,
572
                     fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
573

    
574
  if (DOMAINDECOMP(cr)) {
575
    if (bNS) {
576
      /* Repartition the domain decomposition */
577
      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
578
                             ems,buf,top,mdatoms,fr,vsite,constr,
579
                             nrnb,wcycle);
580
    }
581
  }
582
      
583
  /* Calc force & energy on new trial position  */
584
  /* do_force always puts the charge groups in the box and shifts again
585
   * We do not unshift, so molecules are always whole in congrad.c
586
   */
587
  do_force(fplog,cr,inputrec,
588
           count,nrnb,wcycle,top,&top_global->groups,
589
           ems->s.box,ems->s.x,&ems->s.hist,
590
           ems->f,*buf,force_vir,mdatoms,enerd,fcd,
591
           ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,
592
           GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
593
           (bNS ? GMX_FORCE_NS : 0));
594

    
595
  /* Clear the unused shake virial and pressure */
596
  clear_mat(shake_vir);
597
  clear_mat(pres);
598

    
599
  /* Calculate long range corrections to pressure and energy */
600
  calc_dispcorr(fplog,inputrec,fr,count,mdatoms->nr,ems->s.box,ems->s.lambda,
601
                pres,force_vir,enerd->term);
602

    
603
  /* Communicate stuff when parallel */
604
  if (PAR(cr)) {
605
    wallcycle_start(wcycle,ewcMoveE);
606

    
607
    global_stat(fplog,cr,enerd,force_vir,shake_vir,mu_tot,
608
                inputrec,NULL,FALSE,NULL,NULL,NULL,NULL,&terminate);
609

    
610
    wallcycle_stop(wcycle,ewcMoveE);
611
  }
612

    
613
  ems->epot = enerd->term[F_EPOT];
614
  
615
  if (constr) {
616
    /* Project out the constraint components of the force */
617
    wallcycle_start(wcycle,ewcCONSTR);
618
    dvdl = 0;
619
    constrain(NULL,FALSE,FALSE,constr,&top->idef,
620
              inputrec,cr,count,0,mdatoms,
621
              ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
622
              NULL,&shake_vir,nrnb,econqForce);
623
    if (fr->bSepDVDL && fplog)
624
      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
625
    enerd->term[F_DVDL] += dvdl;
626
    m_add(force_vir,shake_vir,vir);
627
    wallcycle_stop(wcycle,ewcCONSTR);
628
  } else {
629
    copy_mat(force_vir,vir);
630
  }
631

    
632
  clear_mat(ekin);
633
  enerd->term[F_PRES] =
634
    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
635
              (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
636

    
637
  get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
638
}
639

    
640
static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
641
                              gmx_mtop_t *mtop,
642
                              em_state_t *s_min,em_state_t *s_b)
643
{
644
  rvec *fm,*fb,*fmg;
645
  t_block *cgs_gl;
646
  int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
647
  double partsum;
648
  unsigned char *grpnrFREEZE;
649

    
650
  if (debug)
651
    fprintf(debug,"Doing reorder_partsum\n");
652

    
653
  fm = s_min->f;
654
  fb = s_b->f;
655

    
656
  cgs_gl = dd_charge_groups_global(cr->dd);
657
  index = cgs_gl->index;
658

    
659
  /* Collect fm in a global vector fmg.
660
   * This conflics with the spirit of domain decomposition,
661
   * but to fully optimize this a much more complicated algorithm is required.
662
   */
663
  snew(fmg,mtop->natoms);
664
  
665
  ncg   = s_min->s.ncg_gl;
666
  cg_gl = s_min->s.cg_gl;
667
  i = 0;
668
  for(c=0; c<ncg; c++) {
669
    cg = cg_gl[c];
670
    a0 = index[cg];
671
    a1 = index[cg+1];
672
    for(a=a0; a<a1; a++) {
673
      copy_rvec(fm[i],fmg[a]);
674
      i++;
675
    }
676
  }
677
  gmx_sum(mtop->natoms*3,fmg[0],cr);
678

    
679
  /* Now we will determine the part of the sum for the cgs in state s_b */
680
  ncg   = s_b->s.ncg_gl;
681
  cg_gl = s_b->s.cg_gl;
682
  partsum = 0;
683
  i = 0;
684
  gf = 0;
685
  grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
686
  for(c=0; c<ncg; c++) {
687
    cg = cg_gl[c];
688
    a0 = index[cg];
689
    a1 = index[cg+1];
690
    for(a=a0; a<a1; a++) {
691
      if (mdatoms->cFREEZE && grpnrFREEZE) {
692
        gf = grpnrFREEZE[i];
693
      }
694
      for(m=0; m<DIM; m++) {
695
        if (!opts->nFreeze[gf][m]) {
696
          partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
697
        }
698
      }
699
      i++;
700
    }
701
  }
702
  
703
  sfree(fmg);
704

    
705
  return partsum;
706
}
707

    
708
static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
709
                    gmx_mtop_t *mtop,
710
                    em_state_t *s_min,em_state_t *s_b)
711
{
712
  rvec *fm,*fb;
713
  double sum;
714
  int  gf,i,m;
715

    
716
  /* This is just the classical Polak-Ribiere calculation of beta;
717
   * it looks a bit complicated since we take freeze groups into account,
718
   * and might have to sum it in parallel runs.
719
   */
720
  
721
  if (!DOMAINDECOMP(cr) ||
722
      (s_min->s.ddp_count == cr->dd->ddp_count &&
723
       s_b->s.ddp_count   == cr->dd->ddp_count)) {
724
    fm = s_min->f;
725
    fb = s_b->f;
726
    sum = 0;
727
    gf = 0;
728
    /* This part of code can be incorrect with DD,
729
     * since the atom ordering in s_b and s_min might differ.
730
     */
731
    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
732
      if (mdatoms->cFREEZE)
733
        gf = mdatoms->cFREEZE[i];
734
      for(m=0; m<DIM; m++)
735
        if (!opts->nFreeze[gf][m]) {
736
          sum += (fb[i][m] - fm[i][m])*fb[i][m];
737
        } 
738
    }
739
  } else {
740
    /* We need to reorder cgs while summing */
741
    sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
742
  }
743
  if (PAR(cr))
744
    gmx_sumd(1,&sum,cr);
745

    
746
  return sum/sqr(s_min->fnorm);
747
}
748

    
749
time_t do_cg(FILE *fplog,t_commrec *cr,
750
             int nfile,t_filenm fnm[],
751
             bool bVerbose,bool bCompact,
752
             gmx_vsite_t *vsite,gmx_constr_t constr,
753
             int stepout,
754
             t_inputrec *inputrec,
755
             gmx_mtop_t *top_global,t_fcdata *fcd,
756
             t_state *state_global,rvec f[],
757
             rvec buf[],t_mdatoms *mdatoms,
758
             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
759
             gmx_edsam_t ed,
760
             t_forcerec *fr,
761
             int repl_ex_nst,int repl_ex_seed,
762
             real cpt_period,real max_hours,
763
             unsigned long Flags)
764
{
765
  const char *CG="Polak-Ribiere Conjugate Gradients";
766

    
767
  em_state_t *s_min,*s_a,*s_b,*s_c;
768
  gmx_localtop_t *top;
769
  gmx_enerdata_t *enerd;
770
  t_graph    *graph;
771
  rvec   *f_global,*p,*sf,*sfm;
772
  double gpa,gpb,gpc,tmp,sum[2],minstep;
773
  real   fnormn;
774
  real   stepsize;        
775
  real   a,b,c,beta=0.0;
776
  real   epot_repl=0;
777
  real   pnorm;
778
  t_mdebin   *mdebin;
779
  bool   converged,foundlower;
780
  rvec   mu_tot;
781
  time_t start_t;
782
  bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
783
  tensor vir,pres;
784
  int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
785
  int    fp_trn,fp_ene;
786
  int    i,m,gf,step,nminstep;
787
  real   terminate=0;  
788

    
789
  step=0;
790

    
791
  s_min = init_em_state();
792
  s_a   = init_em_state();
793
  s_b   = init_em_state();
794
  s_c   = init_em_state();
795

    
796
  /* Init em and store the local state in s_min */
797
  init_em(fplog,CG,cr,inputrec,
798
          state_global,top_global,s_min,&top,f,&buf,&f_global,
799
          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr,
800
          nfile,fnm,&fp_trn,&fp_ene,&mdebin);
801

    
802
  /* Print to log file */
803
  start_t=print_date_and_time(fplog,cr->nodeid,
804
                              "Started Polak-Ribiere Conjugate Gradients");
805
  wallcycle_start(wcycle,ewcRUN);
806
  
807
  /* Max number of steps */
808
  number_steps=inputrec->nsteps;
809

    
810
  if (MASTER(cr))
811
    sp_header(stderr,CG,inputrec->em_tol,number_steps);
812
  if (fplog)
813
    sp_header(fplog,CG,inputrec->em_tol,number_steps);
814

    
815
  /* Call the force routine and some auxiliary (neighboursearching etc.) */
816
  /* do_force always puts the charge groups in the box and shifts again
817
   * We do not unshift, so molecules are always whole in congrad.c
818
   */
819
  evaluate_energy(fplog,bVerbose,cr,
820
                  state_global,top_global,s_min,&buf,top,
821
                  inputrec,nrnb,wcycle,
822
                  vsite,constr,fcd,graph,mdatoms,fr,
823
                  mu_tot,enerd,vir,pres,-1,TRUE);
824
  where();
825

    
826
  if (MASTER(cr)) {
827
    /* Copy stuff to the energy bin for easy printing etc. */
828
    upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,step,(real)step,
829
               enerd,&s_min->s,s_min->s.box,
830
               NULL,NULL,vir,pres,NULL,mu_tot,constr);
831
    
832
    print_ebin_header(fplog,step,step,s_min->s.lambda);
833
    print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,step,eprNORMAL,
834
               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
835
  }
836
  where();
837

    
838
  /* Estimate/guess the initial stepsize */
839
  stepsize = inputrec->em_stepsize/s_min->fnorm;
840
 
841
  if (MASTER(cr)) {
842
    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
843
            s_min->fmax,s_min->a_fmax+1);
844
    fprintf(stderr,"   F-Norm            = %12.5e\n",
845
            s_min->fnorm/sqrt(state_global->natoms));
846
    fprintf(stderr,"\n");
847
    /* and copy to the log file too... */
848
    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
849
            s_min->fmax,s_min->a_fmax+1);
850
    fprintf(fplog,"   F-Norm            = %12.5e\n",
851
            s_min->fnorm/sqrt(state_global->natoms));
852
    fprintf(fplog,"\n");
853
  }  
854
  /* Start the loop over CG steps.                
855
   * Each successful step is counted, and we continue until
856
   * we either converge or reach the max number of steps.
857
   */
858
  for(step=0,converged=FALSE;( step<=number_steps || number_steps==0) && !converged;step++) {
859
    
860
    /* start taking steps in a new direction 
861
     * First time we enter the routine, beta=0, and the direction is 
862
     * simply the negative gradient.
863
     */
864

    
865
    /* Calculate the new direction in p, and the gradient in this direction, gpa */
866
    p  = s_min->s.cg_p;
867
    sf = s_min->f;
868
    gpa = 0;
869
    gf = 0;
870
    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
871
      if (mdatoms->cFREEZE) 
872
        gf = mdatoms->cFREEZE[i];
873
      for(m=0; m<DIM; m++) {
874
        if (!inputrec->opts.nFreeze[gf][m]) {
875
          p[i][m] = sf[i][m] + beta*p[i][m];
876
          gpa -= p[i][m]*sf[i][m];
877
          /* f is negative gradient, thus the sign */
878
        } else {
879
          p[i][m] = 0;
880
        }
881
      }
882
    }
883
    
884
    /* Sum the gradient along the line across CPUs */
885
    if (PAR(cr))
886
      gmx_sumd(1,&gpa,cr);
887

    
888
    /* Calculate the norm of the search vector */
889
    get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
890
    
891
    /* Just in case stepsize reaches zero due to numerical precision... */
892
    if(stepsize<=0)          
893
      stepsize = inputrec->em_stepsize/pnorm;
894
    
895
    /* 
896
     * Double check the value of the derivative in the search direction.
897
     * If it is positive it must be due to the old information in the
898
     * CG formula, so just remove that and start over with beta=0.
899
     * This corresponds to a steepest descent step.
900
     */
901
    if(gpa>0) {
902
      beta = 0;
903
      step--; /* Don't count this step since we are restarting */
904
      continue; /* Go back to the beginning of the big for-loop */
905
    }
906

    
907
    /* Calculate minimum allowed stepsize, before the average (norm)
908
     * relative change in coordinate is smaller than precision
909
     */
910
    minstep=0;
911
    for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
912
      for(m=0; m<DIM; m++) {
913
        tmp = fabs(s_min->s.x[i][m]);
914
        if(tmp < 1.0)
915
          tmp = 1.0;
916
        tmp = p[i][m]/tmp;
917
        minstep += tmp*tmp;
918
      }
919
    }
920
    /* Add up from all CPUs */
921
    if(PAR(cr))
922
      gmx_sumd(1,&minstep,cr);
923

    
924
    minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
925

    
926
    if(stepsize<minstep) {
927
      converged=TRUE;
928
      break;
929
    }
930
    
931
    /* Write coordinates if necessary */
932
    do_x = do_per_step(step,inputrec->nstxout);
933
    do_f = do_per_step(step,inputrec->nstfout);
934
    
935
    write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE,
936
               top_global,inputrec->eI,inputrec->simulation_part,step,(double)step,
937
               &s_min->s,state_global,s_min->f,f_global);
938
    
939
    /* Take a step downhill.
940
     * In theory, we should minimize the function along this direction.
941
     * That is quite possible, but it turns out to take 5-10 function evaluations
942
     * for each line. However, we dont really need to find the exact minimum -
943
     * it is much better to start a new CG step in a modified direction as soon
944
     * as we are close to it. This will save a lot of energy evaluations.
945
     *
946
     * In practice, we just try to take a single step.
947
     * If it worked (i.e. lowered the energy), we increase the stepsize but
948
     * the continue straight to the next CG step without trying to find any minimum.
949
     * If it didn't work (higher energy), there must be a minimum somewhere between
950
     * the old position and the new one.
951
     * 
952
     * Due to the finite numerical accuracy, it turns out that it is a good idea
953
     * to even accept a SMALL increase in energy, if the derivative is still downhill.
954
     * This leads to lower final energies in the tests I've done. / Erik 
955
     */
956
    s_a->epot = s_min->epot;
957
    a = 0.0;
958
    c = a + stepsize; /* reference position along line is zero */
959
    
960
    if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
961
      em_dd_partition_system(fplog,step,cr,top_global,inputrec,
962
                             s_min,&buf,top,mdatoms,fr,vsite,constr,
963
                             nrnb,wcycle);
964
    }
965

    
966
    /* Take a trial step (new coords in s_c) */
967
    do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
968
               constr,top,nrnb,wcycle,-1);
969
    
970
    neval++;
971
    /* Calculate energy for the trial step */
972
    evaluate_energy(fplog,bVerbose,cr,
973
                    state_global,top_global,s_c,&buf,top,
974
                    inputrec,nrnb,wcycle,
975
                    vsite,constr,fcd,graph,mdatoms,fr,
976
                    mu_tot,enerd,vir,pres,-1,FALSE);
977
    
978
    /* Calc derivative along line */
979
    p  = s_c->s.cg_p;
980
    sf = s_c->f;
981
    gpc=0;
982
    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
983
      for(m=0; m<DIM; m++) 
984
          gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
985
    }
986
    /* Sum the gradient along the line across CPUs */
987
    if (PAR(cr))
988
      gmx_sumd(1,&gpc,cr);
989

    
990
    /* This is the max amount of increase in energy we tolerate */
991
    tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
992

    
993
    /* Accept the step if the energy is lower, or if it is not significantly higher
994
     * and the line derivative is still negative.
995
     */
996
    if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
997
      foundlower = TRUE;
998
      /* Great, we found a better energy. Increase step for next iteration
999
       * if we are still going down, decrease it otherwise
1000
       */
1001
      if(gpc<0)
1002
        stepsize *= 1.618034;  /* The golden section */
1003
      else
1004
        stepsize *= 0.618034;  /* 1/golden section */
1005
    } else {
1006
      /* New energy is the same or higher. We will have to do some work
1007
       * to find a smaller value in the interval. Take smaller step next time!
1008
       */
1009
      foundlower = FALSE;
1010
      stepsize *= 0.618034;
1011
    }    
1012

    
1013

    
1014

    
1015
    
1016
    /* OK, if we didn't find a lower value we will have to locate one now - there must
1017
     * be one in the interval [a=0,c].
1018
     * The same thing is valid here, though: Don't spend dozens of iterations to find
1019
     * the line minimum. We try to interpolate based on the derivative at the endpoints,
1020
     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1021
     *
1022
     * I also have a safeguard for potentially really patological functions so we never
1023
     * take more than 20 steps before we give up ...
1024
     *
1025
     * If we already found a lower value we just skip this step and continue to the update.
1026
     */
1027
    if (!foundlower) {
1028
      nminstep=0;
1029

    
1030
      do {
1031
        /* Select a new trial point.
1032
         * If the derivatives at points a & c have different sign we interpolate to zero,
1033
         * otherwise just do a bisection.
1034
         */
1035
        if(gpa<0 && gpc>0)
1036
          b = a + gpa*(a-c)/(gpc-gpa);
1037
        else
1038
          b = 0.5*(a+c);                
1039
        
1040
        /* safeguard if interpolation close to machine accuracy causes errors:
1041
         * never go outside the interval
1042
         */
1043
        if(b<=a || b>=c)
1044
          b = 0.5*(a+c);
1045
        
1046
        if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1047
          /* Reload the old state */
1048
          em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1049
                                 s_min,&buf,top,mdatoms,fr,vsite,constr,
1050
                                 nrnb,wcycle);
1051
        }
1052

    
1053
        /* Take a trial step to this new point - new coords in s_b */
1054
        do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1055
                   constr,top,nrnb,wcycle,-1);
1056
        
1057
        neval++;
1058
        /* Calculate energy for the trial step */
1059
        evaluate_energy(fplog,bVerbose,cr,
1060
                        state_global,top_global,s_b,&buf,top,
1061
                        inputrec,nrnb,wcycle,
1062
                        vsite,constr,fcd,graph,mdatoms,fr,
1063
                        mu_tot,enerd,vir,pres,-1,FALSE);
1064
        
1065
        /* p does not change within a step, but since the domain decomposition
1066
         * might change, we have to use cg_p of s_b here.
1067
         */
1068
        p  = s_b->s.cg_p;
1069
        sf = s_b->f;
1070
        gpb=0;
1071
        for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1072
          for(m=0; m<DIM; m++)
1073
              gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1074
        }
1075
        /* Sum the gradient along the line across CPUs */
1076
        if (PAR(cr))
1077
          gmx_sumd(1,&gpb,cr);
1078
        
1079
        if (debug)
1080
          fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1081
                  s_a->epot,s_b->epot,s_c->epot,gpb);
1082

    
1083
        epot_repl = s_b->epot;
1084
        
1085
        /* Keep one of the intervals based on the value of the derivative at the new point */
1086
        if (gpb > 0) {
1087
          /* Replace c endpoint with b */
1088
          swap_em_state(s_b,s_c);
1089
          c = b;
1090
          gpc = gpb;
1091
        } else {
1092
          /* Replace a endpoint with b */
1093
          swap_em_state(s_b,s_a);
1094
          a = b;
1095
          gpa = gpb;
1096
        }
1097
        
1098
        /* 
1099
         * Stop search as soon as we find a value smaller than the endpoints.
1100
         * Never run more than 20 steps, no matter what.
1101
         */
1102
        nminstep++;
1103
      } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1104
               (nminstep < 20));     
1105
      
1106
      if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1107
          nminstep >= 20) {
1108
        /* OK. We couldn't find a significantly lower energy.
1109
         * If beta==0 this was steepest descent, and then we give up.
1110
         * If not, set beta=0 and restart with steepest descent before quitting.
1111
         */
1112
        if (beta == 0.0) {
1113
          /* Converged */
1114
          converged = TRUE;
1115
          break;
1116
        } else {
1117
          /* Reset memory before giving up */
1118
          beta = 0.0;
1119
          continue;
1120
        }
1121
      }
1122
      
1123
      /* Select min energy state of A & C, put the best in B.
1124
       */
1125
      if (s_c->epot < s_a->epot) {
1126
        if (debug)
1127
          fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1128
                  s_c->epot,s_a->epot);
1129
        swap_em_state(s_b,s_c);
1130
        gpb = gpc;
1131
        b = c;
1132
      } else {
1133
        if (debug)
1134
          fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1135
                  s_a->epot,s_c->epot);
1136
        swap_em_state(s_b,s_a);
1137
        gpb = gpa;
1138
        b = a;
1139
      }
1140
      
1141
    } else {
1142
      if (debug)
1143
        fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1144
                s_c->epot);
1145
      swap_em_state(s_b,s_c);
1146
      gpb = gpc;
1147
      b = c;
1148
    }
1149
    
1150
    /* new search direction */
1151
    /* beta = 0 means forget all memory and restart with steepest descents. */
1152
    if (nstcg && ((step % nstcg)==0)) 
1153
      beta = 0.0;
1154
    else {
1155
      /* s_min->fnorm cannot be zero, because then we would have converged
1156
       * and broken out.
1157
       */
1158

    
1159
      /* Polak-Ribiere update.
1160
       * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1161
       */
1162
      beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1163
    }
1164
    /* Limit beta to prevent oscillations */
1165
    if (fabs(beta) > 5.0)
1166
      beta = 0.0;
1167
    
1168
    
1169
    /* update positions */
1170
    swap_em_state(s_min,s_b);
1171
    gpa = gpb;
1172
    
1173
    /* Print it if necessary */
1174
    if (MASTER(cr)) {
1175
      if(bVerbose)
1176
        fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1177
                step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1178
                s_min->fmax,s_min->a_fmax+1);
1179
      /* Store the new (lower) energies */
1180
      upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,step,(real)step,
1181
                 enerd,&s_min->s,s_min->s.box,
1182
                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1183
      do_log = do_per_step(step,inputrec->nstlog);
1184
      do_ene = do_per_step(step,inputrec->nstenergy);
1185
      if(do_log)
1186
        print_ebin_header(fplog,step,step,s_min->s.lambda);
1187
      print_ebin(fp_ene,do_ene,FALSE,FALSE,
1188
                 do_log ? fplog : NULL,step,step,step,eprNORMAL,
1189
                 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1190
    }
1191
    
1192
    /* Stop when the maximum force lies below tolerance.
1193
     * If we have reached machine precision, converged is already set to true.
1194
     */        
1195
    converged = converged || (s_min->fmax < inputrec->em_tol);
1196
    
1197
  } /* End of the loop */
1198
  
1199
  if (converged)        
1200
    step--; /* we never took that last step in this case */
1201
  
1202
  if (s_min->fmax > inputrec->em_tol) {
1203
    if (MASTER(cr)) {
1204
      warn_step(stderr,inputrec->em_tol,FALSE);
1205
      warn_step(fplog,inputrec->em_tol,FALSE);
1206
    }
1207
    converged = FALSE; 
1208
  }
1209
  
1210
  if (MASTER(cr)) {
1211
    /* If we printed energy and/or logfile last step (which was the last step)
1212
     * we don't have to do it again, but otherwise print the final values.
1213
     */
1214
    if(!do_log) {
1215
      /* Write final value to log since we didn't do anything the last step */
1216
      print_ebin_header(fplog,step,step,s_min->s.lambda);
1217
    }
1218
    if (!do_ene || !do_log) {
1219
      /* Write final energy file entries */
1220
      print_ebin(fp_ene,!do_ene,FALSE,FALSE,
1221
                 !do_log ? fplog : NULL,step,step,step,eprNORMAL,
1222
                 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1223
    }
1224
  }
1225
  
1226
  if (!PAR(cr)) {
1227
    copy_em_coords_back(s_min,state_global);
1228
  }
1229

    
1230
  /* Print some stuff... */
1231
  if (MASTER(cr))
1232
    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1233
  
1234
  /* IMPORTANT!
1235
   * For accurate normal mode calculation it is imperative that we
1236
   * store the last conformation into the full precision binary trajectory.
1237
   *
1238
   * However, we should only do it if we did NOT already write this step
1239
   * above (which we did if do_x or do_f was true).
1240
   */  
1241
  do_x = !do_per_step(step,inputrec->nstxout);
1242
  do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1243
  
1244
  write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,
1245
             0,FALSE,0,NULL,FALSE,
1246
             top_global,inputrec->eI,inputrec->simulation_part,step,(real)step,
1247
             &s_min->s,state_global,s_min->f,f_global);
1248
  if (MASTER(cr)) {
1249
    write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
1250
                        *top_global->name,top_global,
1251
                        state_global->x,NULL,inputrec->ePBC,state_global->box);
1252
  }
1253
  
1254
  fnormn = s_min->fnorm/sqrt(state_global->natoms);
1255
  
1256
  if (MASTER(cr)) {
1257
    print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1258
                    s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1259
    print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1260
                    s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1261
    
1262
    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1263
  }
1264
  
1265
  finish_em(fplog,cr,fp_trn,fp_ene);
1266
  
1267
  /* To print the actual number of steps we needed somewhere */
1268
  inputrec->nsteps=step;
1269

    
1270
  return start_t;
1271
} /* That's all folks */
1272

    
1273

    
1274
time_t do_lbfgs(FILE *fplog,t_commrec *cr,
1275
                int nfile,t_filenm fnm[],
1276
                bool bVerbose,bool bCompact,
1277
                gmx_vsite_t *vsite,gmx_constr_t constr,
1278
                int stepout,
1279
                t_inputrec *inputrec,
1280
                gmx_mtop_t *top_global,t_fcdata *fcd,
1281
                t_state *state,rvec f[],
1282
                rvec buf[],t_mdatoms *mdatoms,
1283
                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1284
                gmx_edsam_t ed,
1285
                t_forcerec *fr,
1286
                int repl_ex_nst,int repl_ex_seed,
1287
                real cpt_period,real max_hours,
1288
                unsigned long Flags)
1289
{
1290
  static char *LBFGS="Low-Memory BFGS Minimizer";
1291
  em_state_t ems;
1292
  gmx_localtop_t *top;
1293
  gmx_enerdata_t *enerd;
1294
  t_graph    *graph;
1295
  int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1296
  double stepsize,gpa,gpb,gpc,tmp,minstep;
1297
  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;        
1298
  real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1299
  real   a,b,c,maxdelta,delta;
1300
  real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1301
  real   dgdx,dgdg,sq,yr,beta;
1302
  t_mdebin   *mdebin;
1303
  bool   converged,first;
1304
  rvec   mu_tot;
1305
  real   fnorm,fmax;
1306
  time_t start_t;
1307
  bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1308
  tensor vir,pres;
1309
  int    fp_trn,fp_ene,start,end,number_steps;
1310
  int    i,k,m,n,nfmax,gf,step;
1311
  /* not used */
1312
  real   terminate;
1313

    
1314
  if (PAR(cr))
1315
    gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1316
  
1317
  n = 3*state->natoms;
1318
  nmaxcorr = inputrec->nbfgscorr;
1319
  
1320
  /* Allocate memory */
1321
  snew(f,state->natoms);
1322
  /* Use pointers to real so we dont have to loop over both atoms and
1323
   * dimensions all the time...
1324
   * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1325
   * that point to the same memory.
1326
   */
1327
  snew(xa,n);
1328
  snew(xb,n);
1329
  snew(xc,n);
1330
  snew(fa,n);
1331
  snew(fb,n);
1332
  snew(fc,n);
1333
  snew(frozen,n);
1334
  
1335
  xx = (real *)state->x;
1336
  ff = (real *)f;
1337

    
1338
        printf("x0: %20.12g %20.12g %20.12g\n",xx[0],xx[1],xx[2]);
1339

    
1340
  snew(p,n); 
1341
  snew(lastx,n); 
1342
  snew(lastf,n); 
1343
  snew(rho,nmaxcorr);
1344
  snew(alpha,nmaxcorr);
1345
  
1346
  snew(dx,nmaxcorr);
1347
  for(i=0;i<nmaxcorr;i++)
1348
    snew(dx[i],n);
1349
  
1350
  snew(dg,nmaxcorr);
1351
  for(i=0;i<nmaxcorr;i++)
1352
    snew(dg[i],n);
1353

    
1354
  step = 0;
1355
  neval = 0; 
1356

    
1357
  /* Init em */
1358
  init_em(fplog,LBFGS,cr,inputrec,
1359
          state,top_global,&ems,&top,f,&buf,&f,
1360
          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr,
1361
          nfile,fnm,&fp_trn,&fp_ene,&mdebin);
1362
  /* Do_lbfgs is not completely updated like do_steep and do_cg,
1363
   * so we free some memory again.
1364
   */
1365
  sfree(ems.s.x);
1366
  sfree(ems.f);
1367

    
1368
  start = mdatoms->start;
1369
  end   = mdatoms->homenr + start;
1370
    
1371
  /* Print to log file */
1372
  start_t=print_date_and_time(fplog,cr->nodeid,
1373
                              "Started Low-Memory BFGS Minimization");
1374
  wallcycle_start(wcycle,ewcRUN);
1375
  
1376
  do_log = do_ene = do_x = do_f = TRUE;
1377
  
1378
  /* Max number of steps */
1379
  number_steps=inputrec->nsteps;
1380

    
1381
  /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1382
  gf = 0;
1383
  for(i=start; i<end; i++) {
1384
    if (mdatoms->cFREEZE)
1385
      gf = mdatoms->cFREEZE[i];
1386
     for(m=0; m<DIM; m++) 
1387
       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];  
1388
  }
1389
  if (MASTER(cr))
1390
    sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1391
  if (fplog)
1392
    sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1393
  
1394
  if (vsite)
1395
    construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1396
                     top->idef.iparams,top->idef.il,
1397
                     fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1398
  
1399
  /* Call the force routine and some auxiliary (neighboursearching etc.) */
1400
  /* do_force always puts the charge groups in the box and shifts again
1401
   * We do not unshift, so molecules are always whole
1402
   */
1403
  neval++;
1404
  ems.s.x = state->x;
1405
  ems.f = f;
1406
  evaluate_energy(fplog,bVerbose,cr,
1407
                  state,top_global,&ems,&buf,top,
1408
                  inputrec,nrnb,wcycle,
1409
                  vsite,constr,fcd,graph,mdatoms,fr,
1410
                  mu_tot,enerd,vir,pres,-1,TRUE);
1411
  where();
1412
        
1413
  if (MASTER(cr)) {
1414
    /* Copy stuff to the energy bin for easy printing etc. */
1415
    upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,step,(real)step,
1416
               enerd,state,state->box,
1417
               NULL,NULL,vir,pres,NULL,mu_tot,constr);
1418
    
1419
    print_ebin_header(fplog,step,step,state->lambda);
1420
    print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,step,eprNORMAL,
1421
               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1422
  }
1423
  where();
1424
  
1425
  /* This is the starting energy */
1426
  Epot = enerd->term[F_EPOT];
1427
  
1428
  fnorm = ems.fnorm;
1429
  fmax  = ems.fmax;
1430
  nfmax = ems.a_fmax;
1431
  
1432
  /* Set the initial step.
1433
   * since it will be multiplied by the non-normalized search direction 
1434
   * vector (force vector the first time), we scale it by the
1435
   * norm of the force.
1436
   */
1437
  
1438
  if (MASTER(cr)) {
1439
    fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1440
    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1441
    fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1442
    fprintf(stderr,"\n");
1443
    /* and copy to the log file too... */
1444
    fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1445
    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1446
    fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1447
    fprintf(fplog,"\n");
1448
  }   
1449
  
1450
  point=0;
1451
  for(i=0;i<n;i++)
1452
    if(!frozen[i])
1453
      dx[point][i] = ff[i];  /* Initial search direction */
1454
    else
1455
      dx[point][i] = 0;
1456

    
1457
  stepsize = 1.0/fnorm;
1458
  converged = FALSE;
1459
  
1460
  /* Start the loop over BFGS steps.                
1461
   * Each successful step is counted, and we continue until
1462
   * we either converge or reach the max number of steps.
1463
   */
1464
  
1465
  ncorr=0;
1466

    
1467
  /* Set the gradient from the force */
1468
  for(step=0,converged=FALSE;( step<=number_steps || number_steps==0) && !converged;step++) {
1469
    
1470
    /* Write coordinates if necessary */
1471
    do_x = do_per_step(step,inputrec->nstxout);
1472
    do_f = do_per_step(step,inputrec->nstfout);
1473
    
1474
    write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE,
1475
               top_global,inputrec->eI,inputrec->simulation_part,step,(real)step,state,state,f,f);
1476

    
1477
    /* Do the linesearching in the direction dx[point][0..(n-1)] */
1478
    
1479
    /* pointer to current direction - point=0 first time here */
1480
    s=dx[point];
1481
    
1482
    /* calculate line gradient */
1483
    for(gpa=0,i=0;i<n;i++) 
1484
        gpa-=s[i]*ff[i];
1485

    
1486
    /* Calculate minimum allowed stepsize, before the average (norm) 
1487
     * relative change in coordinate is smaller than precision 
1488
     */
1489
    for(minstep=0,i=0;i<n;i++) {
1490
      tmp=fabs(xx[i]);
1491
      if(tmp<1.0)
1492
        tmp=1.0;
1493
      tmp = s[i]/tmp;
1494
      minstep += tmp*tmp;
1495
    }
1496
    minstep = GMX_REAL_EPS/sqrt(minstep/n);
1497
    
1498
    if(stepsize<minstep) {
1499
      converged=TRUE;
1500
      break;
1501
    }
1502
    
1503
    /* Store old forces and coordinates */
1504
    for(i=0;i<n;i++) {
1505
      lastx[i]=xx[i];
1506
      lastf[i]=ff[i];
1507
    }
1508
    Epot0=Epot;
1509
    
1510
    first=TRUE;
1511
    
1512
    for(i=0;i<n;i++)
1513
      xa[i]=xx[i];
1514
    
1515
    /* Take a step downhill.
1516
     * In theory, we should minimize the function along this direction.
1517
     * That is quite possible, but it turns out to take 5-10 function evaluations
1518
     * for each line. However, we dont really need to find the exact minimum -
1519
     * it is much better to start a new BFGS step in a modified direction as soon
1520
     * as we are close to it. This will save a lot of energy evaluations.
1521
     *
1522
     * In practice, we just try to take a single step.
1523
     * If it worked (i.e. lowered the energy), we increase the stepsize but
1524
     * the continue straight to the next BFGS step without trying to find any minimum.
1525
     * If it didn't work (higher energy), there must be a minimum somewhere between
1526
     * the old position and the new one.
1527
     * 
1528
     * Due to the finite numerical accuracy, it turns out that it is a good idea
1529
     * to even accept a SMALL increase in energy, if the derivative is still downhill.
1530
     * This leads to lower final energies in the tests I've done. / Erik 
1531
     */
1532
    foundlower=FALSE;
1533
    EpotA = Epot0;
1534
    a = 0.0;
1535
    c = a + stepsize; /* reference position along line is zero */
1536

    
1537
    /* Check stepsize first. We do not allow displacements 
1538
     * larger than emstep.
1539
     */
1540
    do {
1541
      c = a + stepsize;
1542
      maxdelta=0;
1543
      for(i=0;i<n;i++) {
1544
        delta=c*s[i];
1545
        if(delta>maxdelta)
1546
          maxdelta=delta;
1547
      }
1548
      if(maxdelta>inputrec->em_stepsize)
1549
        stepsize*=0.1;
1550
    } while(maxdelta>inputrec->em_stepsize);
1551

    
1552
    /* Take a trial step */
1553
    for (i=0; i<n; i++)
1554
      xc[i] = lastx[i] + c*s[i];
1555
    
1556
    neval++;
1557
    /* Calculate energy for the trial step */
1558
    ems.s.x = (rvec *)xc;
1559
    ems.f   = (rvec *)fc;
1560
    evaluate_energy(fplog,bVerbose,cr,
1561
                    state,top_global,&ems,&buf,top,
1562
                    inputrec,nrnb,wcycle,
1563
                    vsite,constr,fcd,graph,mdatoms,fr,
1564
                    mu_tot,enerd,vir,pres,step,FALSE);
1565
    EpotC = ems.epot;
1566
    
1567
    /* Calc derivative along line */
1568
    for(gpc=0,i=0; i<n; i++) {
1569
        gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1570
    }
1571
    /* Sum the gradient along the line across CPUs */
1572
    if (PAR(cr))
1573
      gmx_sumd(1,&gpc,cr);
1574
    
1575
     /* This is the max amount of increase in energy we tolerate */
1576
   tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1577
    
1578
    /* Accept the step if the energy is lower, or if it is not significantly higher
1579
     * and the line derivative is still negative.
1580
     */
1581
    if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1582
      foundlower = TRUE;
1583
      /* Great, we found a better energy. Increase step for next iteration
1584
       * if we are still going down, decrease it otherwise
1585
       */
1586
      if(gpc<0)
1587
        stepsize *= 1.618034;  /* The golden section */
1588
      else
1589
        stepsize *= 0.618034;  /* 1/golden section */
1590
    } else {
1591
      /* New energy is the same or higher. We will have to do some work
1592
       * to find a smaller value in the interval. Take smaller step next time!
1593
       */
1594
      foundlower = FALSE;
1595
      stepsize *= 0.618034;
1596
    }    
1597
    
1598
    /* OK, if we didn't find a lower value we will have to locate one now - there must
1599
     * be one in the interval [a=0,c].
1600
     * The same thing is valid here, though: Don't spend dozens of iterations to find
1601
     * the line minimum. We try to interpolate based on the derivative at the endpoints,
1602
     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1603
     *
1604
     * I also have a safeguard for potentially really patological functions so we never
1605
     * take more than 20 steps before we give up ...
1606
     *
1607
     * If we already found a lower value we just skip this step and continue to the update.
1608
     */
1609

    
1610
    if(!foundlower) {
1611
     
1612
      nminstep=0;
1613
      do {
1614
        /* Select a new trial point.
1615
         * If the derivatives at points a & c have different sign we interpolate to zero,
1616
         * otherwise just do a bisection.
1617
         */
1618
        
1619
        if(gpa<0 && gpc>0)
1620
          b = a + gpa*(a-c)/(gpc-gpa);
1621
        else
1622
          b = 0.5*(a+c);                
1623
        
1624
        /* safeguard if interpolation close to machine accuracy causes errors:
1625
         * never go outside the interval
1626
         */
1627
        if(b<=a || b>=c)
1628
          b = 0.5*(a+c);
1629
        
1630
        /* Take a trial step */
1631
        for (i=0; i<n; i++) 
1632
          xb[i] = lastx[i] + b*s[i];
1633
        
1634
        neval++;
1635
        /* Calculate energy for the trial step */
1636
        ems.s.x = (rvec *)xb;
1637
        ems.f   = (rvec *)fb;
1638
        evaluate_energy(fplog,bVerbose,cr,
1639
                        state,top_global,&ems,&buf,top,
1640
                        inputrec,nrnb,wcycle,
1641
                        vsite,constr,fcd,graph,mdatoms,fr,
1642
                        mu_tot,enerd,vir,pres,step,FALSE);
1643
        EpotB = ems.epot;
1644
        
1645
        fnorm = ems.fnorm;
1646
        
1647
        for(gpb=0,i=0; i<n; i++) 
1648
          gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1649
        
1650
        /* Sum the gradient along the line across CPUs */
1651
        if (PAR(cr))
1652
          gmx_sumd(1,&gpb,cr);
1653
        
1654
        /* Keep one of the intervals based on the value of the derivative at the new point */
1655
        if(gpb>0) {
1656
          /* Replace c endpoint with b */
1657
          EpotC = EpotB;
1658
          c = b;
1659
          gpc = gpb;
1660
          /* swap coord pointers b/c */
1661
          xtmp = xb; 
1662
          ftmp = fb;
1663
          xb = xc; 
1664
          fb = fc;
1665
          xc = xtmp;
1666
          fc = ftmp;
1667
        } else {
1668
          /* Replace a endpoint with b */
1669
          EpotA = EpotB;
1670
          a = b;
1671
          gpa = gpb;
1672
          /* swap coord pointers a/b */
1673
          xtmp = xb; 
1674
          ftmp = fb;
1675
          xb = xa; 
1676
          fb = fa;
1677
          xa = xtmp; 
1678
          fa = ftmp;
1679
        }
1680
        
1681
        /* 
1682
         * Stop search as soon as we find a value smaller than the endpoints,
1683
         * or if the tolerance is below machine precision.
1684
         * Never run more than 20 steps, no matter what.
1685
         */
1686
        nminstep++; 
1687
      } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1688

    
1689
      if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1690
        /* OK. We couldn't find a significantly lower energy.
1691
         * If ncorr==0 this was steepest descent, and then we give up.
1692
         * If not, reset memory to restart as steepest descent before quitting.
1693
         */
1694
        if(ncorr==0) {
1695
        /* Converged */
1696
          converged=TRUE;
1697
          break;
1698
        } else {
1699
          /* Reset memory */
1700
          ncorr=0;
1701
          /* Search in gradient direction */
1702
          for(i=0;i<n;i++)
1703
            dx[point][i]=ff[i];
1704
          /* Reset stepsize */
1705
          stepsize = 1.0/fnorm;
1706
          continue;
1707
        }
1708
      }
1709
      
1710
      /* Select min energy state of A & C, put the best in xx/ff/Epot
1711
       */
1712
      if(EpotC<EpotA) {
1713
        Epot = EpotC;
1714
        /* Use state C */
1715
        for(i=0;i<n;i++) {
1716
          xx[i]=xc[i];
1717
          ff[i]=fc[i];
1718
        }
1719
        stepsize=c;
1720
      } else {
1721
        Epot = EpotA;
1722
        /* Use state A */
1723
        for(i=0;i<n;i++) {
1724
          xx[i]=xa[i];
1725
          ff[i]=fa[i];
1726
        }
1727
        stepsize=a;
1728
      }
1729
      
1730
    } else {
1731
      /* found lower */
1732
      Epot = EpotC;
1733
      /* Use state C */
1734
      for(i=0;i<n;i++) {
1735
        xx[i]=xc[i];
1736
        ff[i]=fc[i];
1737
      }
1738
      stepsize=c;
1739
    }
1740

    
1741
    /* Update the memory information, and calculate a new 
1742
     * approximation of the inverse hessian 
1743
     */
1744
    
1745
    /* Have new data in Epot, xx, ff */        
1746
    if(ncorr<nmaxcorr)
1747
      ncorr++;
1748

    
1749
    for(i=0;i<n;i++) {
1750
      dg[point][i]=lastf[i]-ff[i];
1751
      dx[point][i]*=stepsize;
1752
    }
1753
    
1754
    dgdg=0;
1755
    dgdx=0;        
1756
    for(i=0;i<n;i++) {
1757
      dgdg+=dg[point][i]*dg[point][i];
1758
      dgdx+=dg[point][i]*dx[point][i];
1759
    }
1760
    
1761
    diag=dgdx/dgdg;
1762
    
1763
    rho[point]=1.0/dgdx;
1764
    point++;
1765
    
1766
    if(point>=nmaxcorr)
1767
      point=0;
1768
    
1769
    /* Update */
1770
    for(i=0;i<n;i++)
1771
      p[i]=ff[i];
1772
    
1773
    cp=point;
1774
    
1775
    /* Recursive update. First go back over the memory points */
1776
    for(k=0;k<ncorr;k++) {
1777
      cp--;
1778
      if(cp<0) 
1779
        cp=ncorr-1;
1780
      
1781
      sq=0;
1782
      for(i=0;i<n;i++)
1783
        sq+=dx[cp][i]*p[i];
1784
      
1785
      alpha[cp]=rho[cp]*sq;
1786
      
1787
      for(i=0;i<n;i++)
1788
        p[i] -= alpha[cp]*dg[cp][i];                
1789
    }
1790
    
1791
    for(i=0;i<n;i++)
1792
      p[i] *= diag;
1793
    
1794
    /* And then go forward again */
1795
    for(k=0;k<ncorr;k++) {
1796
      yr = 0;
1797
      for(i=0;i<n;i++)
1798
        yr += p[i]*dg[cp][i];
1799
      
1800
      beta = rho[cp]*yr;            
1801
      beta = alpha[cp]-beta;
1802
      
1803
      for(i=0;i<n;i++)
1804
        p[i] += beta*dx[cp][i];
1805
      
1806
      cp++;        
1807
      if(cp>=ncorr)
1808
        cp=0;
1809
    }
1810
    
1811
    for(i=0;i<n;i++)
1812
      if(!frozen[i])
1813
        dx[point][i] = p[i];
1814
      else
1815
        dx[point][i] = 0;
1816

    
1817
    stepsize=1.0;
1818
    
1819
    /* Test whether the convergence criterion is met */
1820
    get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1821
    
1822
    /* Print it if necessary */
1823
    if (MASTER(cr)) {
1824
      if(bVerbose)
1825
        fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1826
                step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1827
      /* Store the new (lower) energies */
1828
      upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,step,(real)step,
1829
                 enerd,state,state->box,
1830
                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1831
      do_log = do_per_step(step,inputrec->nstlog);
1832
      do_ene = do_per_step(step,inputrec->nstenergy);
1833
      if(do_log)
1834
        print_ebin_header(fplog,step,step,state->lambda);
1835
      print_ebin(fp_ene,do_ene,FALSE,FALSE,
1836
                 do_log ? fplog : NULL,step,step,step,eprNORMAL,
1837
                 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1838
    }
1839
    
1840
    /* Stop when the maximum force lies below tolerance.
1841
     * If we have reached machine precision, converged is already set to true.
1842
     */
1843
    
1844
    converged = converged || (fmax < inputrec->em_tol);
1845
    
1846
  } /* End of the loop */
1847
  
1848
  if(converged)        
1849
    step--; /* we never took that last step in this case */
1850
  
1851
  if(fmax>inputrec->em_tol) {
1852
    if (MASTER(cr)) {
1853
      warn_step(stderr,inputrec->em_tol,FALSE);
1854
      warn_step(fplog,inputrec->em_tol,FALSE);
1855
    }
1856
    converged = FALSE; 
1857
  }
1858
  
1859
  /* If we printed energy and/or logfile last step (which was the last step)
1860
   * we don't have to do it again, but otherwise print the final values.
1861
   */
1862
  if(!do_log) /* Write final value to log since we didn't do anythin last step */
1863
    print_ebin_header(fplog,step,step,state->lambda);
1864
  if(!do_ene || !do_log) /* Write final energy file entries */
1865
    print_ebin(fp_ene,!do_ene,FALSE,FALSE,
1866
               !do_log ? fplog : NULL,step,step,step,eprNORMAL,
1867
               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1868
  
1869
  /* Print some stuff... */
1870
  if (MASTER(cr))
1871
    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1872
  
1873
  /* IMPORTANT!
1874
   * For accurate normal mode calculation it is imperative that we
1875
   * store the last conformation into the full precision binary trajectory.
1876
   *
1877
   * However, we should only do it if we did NOT already write this step
1878
   * above (which we did if do_x or do_f was true).
1879
   */  
1880
  do_x = !do_per_step(step,inputrec->nstxout);
1881
  do_f = !do_per_step(step,inputrec->nstfout);
1882
  write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE,
1883
             top_global,inputrec->eI,inputrec->simulation_part,step,(real)step,state,state,f,f);
1884
  
1885
  if (MASTER(cr)) {
1886
    write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
1887
                        *top_global->name,top_global,
1888
                        state->x,NULL,inputrec->ePBC,state->box);
1889
  }
1890
  
1891
  if (MASTER(cr)) {
1892
    print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
1893
                    number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1894
    print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
1895
                    number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1896
    
1897
    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1898
  }
1899
  
1900
  finish_em(fplog,cr,fp_trn,fp_ene);
1901

    
1902
  /* To print the actual number of steps we needed somewhere */
1903
  inputrec->nsteps=step;
1904

    
1905
  return start_t;
1906
} /* That's all folks */
1907

    
1908

    
1909
time_t do_steep(FILE *fplog,t_commrec *cr,
1910
                int nfile,t_filenm fnm[],
1911
                bool bVerbose,bool bCompact,
1912
                gmx_vsite_t *vsite,gmx_constr_t constr,
1913
                int stepout,
1914
                t_inputrec *inputrec,
1915
                gmx_mtop_t *top_global,t_fcdata *fcd,
1916
                t_state *state_global,rvec f[],
1917
                rvec buf[],t_mdatoms *mdatoms,
1918
                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1919
                gmx_edsam_t ed,
1920
                t_forcerec *fr,
1921
                int repl_ex_nst,int repl_ex_seed,
1922
                real cpt_period,real max_hours,
1923
                unsigned long Flags)
1924
{ 
1925
  const char *SD="Steepest Descents";
1926
  em_state_t *s_min,*s_try;
1927
  rvec       *f_global;
1928
  gmx_localtop_t *top;
1929
  gmx_enerdata_t *enerd;
1930
  t_graph    *graph;
1931
  real   stepsize,constepsize;
1932
  real   ustep,dvdlambda,fnormn;
1933
  int        fp_trn,fp_ene; 
1934
  t_mdebin   *mdebin; 
1935
  bool   bDone,bAbort,do_x,do_f; 
1936
  time_t start_t; 
1937
  tensor vir,pres; 
1938
  rvec   mu_tot;
1939
  int    nsteps;
1940
  int    count=0; 
1941
  int    steps_accepted=0; 
1942
  /* not used */
1943
  real   terminate=0;
1944

    
1945
  s_min = init_em_state();
1946
  s_try = init_em_state();
1947

    
1948
  /* Init em and store the local state in s_try */
1949
  init_em(fplog,SD,cr,inputrec,
1950
          state_global,top_global,s_try,&top,f,&buf,&f_global,
1951
          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr,
1952
          nfile,fnm,&fp_trn,&fp_ene,&mdebin);
1953

    
1954
  /* Print to log file  */
1955
  start_t=print_date_and_time(fplog,cr->nodeid,"Started Steepest Descents");
1956
  wallcycle_start(wcycle,ewcRUN);
1957
    
1958
  /* Set variables for stepsize (in nm). This is the largest  
1959
   * step that we are going to make in any direction. 
1960
   */
1961
  ustep = inputrec->em_stepsize; 
1962
  stepsize = 0;
1963
  
1964
  /* Max number of steps  */
1965
  nsteps = inputrec->nsteps; 
1966
  
1967
  if (MASTER(cr)) 
1968
    /* Print to the screen  */
1969
    sp_header(stderr,SD,inputrec->em_tol,nsteps);
1970
  if (fplog)
1971
    sp_header(fplog,SD,inputrec->em_tol,nsteps);
1972
    
1973
  /**** HERE STARTS THE LOOP ****
1974
   * count is the counter for the number of steps 
1975
   * bDone will be TRUE when the minimization has converged
1976
   * bAbort will be TRUE when nsteps steps have been performed or when
1977
   * the stepsize becomes smaller than is reasonable for machine precision
1978
   */
1979
  count  = 0;
1980
  bDone  = FALSE;
1981
  bAbort = FALSE;
1982
  while( !bDone && !bAbort ) {
1983
    bAbort = (nsteps > 0) && (count==nsteps);
1984
    
1985
    /* set new coordinates, except for first step */
1986
    if (count > 0) {
1987
      do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
1988
                 constr,top,nrnb,wcycle,count);
1989
    }
1990
    
1991
    evaluate_energy(fplog,bVerbose,cr,
1992
                    state_global,top_global,s_try,&buf,top,
1993
                    inputrec,nrnb,wcycle,
1994
                    vsite,constr,fcd,graph,mdatoms,fr,
1995
                    mu_tot,enerd,vir,pres,count,count==0);
1996
    
1997
    if (MASTER(cr))
1998
      print_ebin_header(fplog,count,count,s_try->s.lambda);
1999

    
2000
    if (count == 0)
2001
      s_min->epot = s_try->epot + 1;
2002
    
2003
    /* Print it if necessary  */
2004
    if (MASTER(cr)) {
2005
      if (bVerbose) {
2006
        fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2007
                count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2008
                (s_try->epot < s_min->epot) ? '\n' : '\r');
2009
      }
2010
      
2011
      if (s_try->epot < s_min->epot) {
2012
        /* Store the new (lower) energies  */
2013
        upd_mdebin(mdebin,NULL,TRUE,mdatoms->tmass,count,(real)count,
2014
                   enerd,&s_try->s,s_try->s.box,
2015
                   NULL,NULL,vir,pres,NULL,mu_tot,constr);
2016
        print_ebin(fp_ene,TRUE,
2017
                   do_per_step(steps_accepted,inputrec->nstdisreout),
2018
                   do_per_step(steps_accepted,inputrec->nstorireout),
2019
                   fplog,count,count,count,eprNORMAL,TRUE,
2020
                   mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2021
        fflush(fplog);
2022
      }
2023
    } 
2024
    
2025
    /* Now if the new energy is smaller than the previous...  
2026
     * or if this is the first step!
2027
     * or if we did random steps! 
2028
     */
2029
    
2030
    if ( (count==0) || (s_try->epot < s_min->epot) ) {
2031
      steps_accepted++; 
2032

    
2033
      /* Test whether the convergence criterion is met...  */
2034
      bDone = (s_try->fmax < inputrec->em_tol);
2035
      
2036
      /* Copy the arrays for force, positions and energy  */
2037
      /* The 'Min' array always holds the coords and forces of the minimal 
2038
         sampled energy  */
2039
      swap_em_state(s_min,s_try);
2040
      if (count > 0)
2041
        ustep *= 1.2;
2042

    
2043
      /* Write to trn, if necessary */
2044
      do_x = do_per_step(steps_accepted,inputrec->nstxout);
2045
      do_f = do_per_step(steps_accepted,inputrec->nstfout);
2046
      write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE,
2047
                 top_global,inputrec->eI,inputrec->simulation_part,count,(real)count,
2048
                 &s_min->s,state_global,s_min->f,f_global);
2049
    } 
2050
    else {
2051
      /* If energy is not smaller make the step smaller...  */
2052
      ustep *= 0.5;
2053

    
2054
      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2055
        /* Reload the old state */
2056
        em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2057
                               s_min,&buf,top,mdatoms,fr,vsite,constr,
2058
                               nrnb,wcycle);
2059
      }
2060
    }
2061
    
2062
    /* Determine new step  */
2063
    stepsize = ustep/s_min->fmax;
2064
    
2065
    /* Check if stepsize is too small, with 1 nm as a characteristic length */
2066
#ifdef GMX_DOUBLE
2067
    if (ustep < 1e-12)
2068
#else
2069
    if (ustep < 1e-6)
2070
#endif
2071
      {
2072
        if (MASTER(cr)) {
2073
          warn_step(stderr,inputrec->em_tol,constr!=NULL);
2074
          warn_step(fplog,inputrec->em_tol,constr!=NULL);
2075
        }
2076
        bAbort=TRUE;
2077
      }
2078
    
2079
    count++;
2080
  } /* End of the loop  */
2081
  
2082
  /* In parallel write_traj copies back the coordinates,
2083
   * otherwise we have to do it explicitly.
2084
   */
2085
  if (!PAR(cr)) {
2086
    copy_em_coords_back(s_min,state_global);
2087
  }
2088

    
2089
    /* Print some shit...  */
2090
  if (MASTER(cr)) 
2091
    fprintf(stderr,"\nwriting lowest energy coordinates.\n"); 
2092
  write_traj(fplog,cr,fp_trn,TRUE,FALSE,inputrec->nstfout,0,FALSE,0,NULL,FALSE,
2093
             top_global,inputrec->eI,inputrec->simulation_part,count,(real)count,
2094
             &s_min->s,state_global,s_min->f,f_global);
2095

    
2096
  fnormn = s_min->fnorm/sqrt(state_global->natoms);
2097

    
2098
  if (MASTER(cr)) {
2099
    write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
2100
                        *top_global->name,top_global,
2101
                        state_global->x,NULL,inputrec->ePBC,state_global->box);
2102
    
2103
    print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2104
                    s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2105
    print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2106
                    s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2107
  }
2108

    
2109
  finish_em(fplog,cr,fp_trn,fp_ene);
2110
  
2111
  /* To print the actual number of steps we needed somewhere */
2112
  inputrec->nsteps=count;
2113
  
2114
  return start_t;
2115
} /* That's all folks */
2116

    
2117

    
2118
time_t do_nm(FILE *fplog,t_commrec *cr,
2119
             int nfile,t_filenm fnm[],
2120
             bool bVerbose,bool bCompact,
2121
             gmx_vsite_t *vsite,gmx_constr_t constr,
2122
             int stepout,
2123
             t_inputrec *inputrec,
2124
             gmx_mtop_t *top_global,t_fcdata *fcd,
2125
             t_state *state_global,rvec f[],
2126
             rvec buf[],t_mdatoms *mdatoms,
2127
             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2128
             gmx_edsam_t ed,
2129
             t_forcerec *fr,
2130
             int repl_ex_nst,int repl_ex_seed,
2131
             real cpt_period,real max_hours,
2132
             unsigned long Flags)
2133
{
2134
    t_mdebin   *mdebin;
2135
        const char *NM = "Normal Mode Analysis";
2136
    int        fp_ene,step,i;
2137
    time_t     start_t;
2138
    rvec       *f_global;
2139
    gmx_localtop_t *top;
2140
    gmx_enerdata_t *enerd;
2141
    t_graph    *graph;
2142
    real       t,lambda,t0,lam0;
2143
    bool       bNS;
2144
    tensor     vir,pres;
2145
    int        nfmax,count;
2146
    rvec       mu_tot;
2147
    rvec       *fneg,*fpos;
2148
    bool       bSparse; /* use sparse matrix storage format */
2149
    size_t     sz;
2150
    gmx_sparsematrix_t * sparse_matrix = NULL;
2151
    real *     full_matrix             = NULL;
2152
        em_state_t *   state_work;
2153
        
2154
    /* added with respect to mdrun */
2155
    int        idum,jdum,kdum,row,col;
2156
    real       der_range=10.0*sqrt(GMX_REAL_EPS);
2157
    real       fnorm,fmax;
2158
    real       dfdx;
2159
    
2160
    state_work = init_em_state();
2161
    
2162
    /* Init em and store the local state in state_minimum */
2163
    init_em(fplog,NM,cr,inputrec,
2164
            state_global,top_global,state_work,&top,
2165
            f,&buf,&f_global,
2166
            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,vsite,constr,
2167
            nfile,fnm,NULL,&fp_ene,&mdebin);
2168
    
2169
    snew(fneg,top_global->natoms);
2170
    snew(fpos,top_global->natoms);
2171
    
2172
#ifndef GMX_DOUBLE
2173
    fprintf(stderr,
2174
            "NOTE: This version of Gromacs has been compiled in single precision,\n"
2175
            "      which MIGHT not be accurate enough for normal mode analysis.\n"
2176
            "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2177
            "      are fairly modest even if you recompile in double precision.\n\n");
2178
#endif
2179
    
2180
    /* Check if we can/should use sparse storage format.
2181
     *
2182
     * Sparse format is only useful when the Hessian itself is sparse, which it
2183
      * will be when we use a cutoff.    
2184
      * For small systems (n<1000) it is easier to always use full matrix format, though.
2185
      */
2186
    if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2187
    {
2188
        fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2189
        bSparse = FALSE;
2190
    }
2191
    else if(top_global->natoms < 1000)
2192
    {
2193
        fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2194
        bSparse = FALSE;
2195
    }
2196
    else
2197
    {
2198
        fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2199
        bSparse = TRUE;
2200
    }
2201
    
2202
    sz = DIM*top_global->natoms;
2203
    
2204
    fprintf(stderr,"Allocating Hessian memory...\n\n");
2205

    
2206
    if(bSparse)
2207
    {
2208
        sparse_matrix=gmx_sparsematrix_init(sz);
2209
        sparse_matrix->compressed_symmetric = TRUE;
2210
    }
2211
    else
2212
    {
2213
        snew(full_matrix,sz*sz);
2214
    }
2215
    
2216
    /* Initial values */
2217
    t0           = inputrec->init_t;
2218
    lam0         = inputrec->init_lambda;
2219
    t            = t0;
2220
    lambda       = lam0;
2221
    
2222
    init_nrnb(nrnb);
2223
    
2224
    where();
2225
    
2226
    /* Write start time and temperature */
2227
    start_t=print_date_and_time(fplog,cr->nodeid,"Started nmrun");
2228
    wallcycle_start(wcycle,ewcRUN);
2229
    if (MASTER(cr)) 
2230
    {
2231
        fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",*(top_global->name),
2232
                top_global->natoms);
2233
    }
2234
    
2235
    count = 0;
2236
    evaluate_energy(fplog,bVerbose,cr,
2237
                    state_global,top_global,state_work,&buf,top,
2238
                    inputrec,nrnb,wcycle,
2239
                    vsite,constr,fcd,graph,mdatoms,fr,
2240
                    mu_tot,enerd,vir,pres,count,count==0);
2241
    count++;
2242

    
2243
    /* if forces are not small, warn user */
2244
    get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2245

    
2246
    fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2247
    if (state_work->fmax > 1.0e-3) 
2248
    {
2249
        fprintf(stderr,"Maximum force probably not small enough to");
2250
        fprintf(stderr," ensure that you are in an \nenergy well. ");
2251
        fprintf(stderr,"Be aware that negative eigenvalues may occur");
2252
        fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2253
    }
2254
    
2255
    /***********************************************************
2256
     *
2257
     *      Loop over all pairs in matrix 
2258
     * 
2259
     *      do_force called twice. Once with positive and 
2260
     *      once with negative displacement 
2261
     *
2262
     ************************************************************/
2263

    
2264
    /* fudge nr of steps to nr of atoms */
2265
    
2266
    inputrec->nsteps = top_global->natoms;
2267

    
2268
    for (step=0; (step<inputrec->nsteps); step++) 
2269
    {
2270
        
2271
        for (idum=0; (idum<DIM); idum++) 
2272
        {
2273
          row = DIM*step+idum;
2274
          
2275
          state_work->s.x[step][idum] -= der_range;
2276
          
2277
          evaluate_energy(fplog,bVerbose,cr,
2278
                          state_global,top_global,state_work,&buf,top,
2279
                          inputrec,nrnb,wcycle,
2280
                          vsite,constr,fcd,graph,mdatoms,fr,
2281
                          mu_tot,enerd,vir,pres,count,count==0);
2282
          count++;
2283
                        
2284
          for ( i=0 ; i < top_global->natoms ; i++ )
2285
            {
2286
              copy_rvec ( state_work->f[i] , fneg[i] );
2287
            }
2288
          
2289
          state_work->s.x[step][idum] += 2*der_range;
2290
          
2291
          evaluate_energy(fplog,bVerbose,cr,
2292
                          state_global,top_global,state_work,&buf,top,
2293
                          inputrec,nrnb,wcycle,
2294
                          vsite,constr,fcd,graph,mdatoms,fr,
2295
                          mu_tot,enerd,vir,pres,count,count==0);
2296
          count++;
2297
          
2298
          for ( i=0 ; i < top_global->natoms ; i++ )
2299
            {
2300
              copy_rvec ( state_work->f[i] , fpos[i] );
2301
            }
2302
          
2303
          for (jdum=0; (jdum<top_global->natoms); jdum++) 
2304
            {
2305
                for (kdum=0; (kdum<DIM); kdum++) 
2306
                {
2307
                    dfdx=-(fpos[jdum][kdum]-fneg[jdum][kdum])/(2*der_range);
2308
                    col = DIM*jdum+kdum;
2309
                    
2310
                    if(bSparse)
2311
                    {
2312
                        if(col>=row && dfdx!=0.0)
2313
                            gmx_sparsematrix_increment_value(sparse_matrix,row,col,dfdx);
2314
                    }
2315
                    else
2316
                    {
2317
                        full_matrix[row*sz+col] = dfdx;
2318
                    }
2319
                }
2320
            }
2321
            
2322
            /* x is restored to original */
2323
                        state_work->s.x[step][idum] -= der_range;
2324
            
2325
            if (bVerbose && fplog)
2326
                fflush(fplog);            
2327
        }
2328
        /* write progress */
2329
        if (MASTER(cr) && bVerbose) 
2330
        {
2331
            fprintf(stderr,"\rFinished step %d out of %d",
2332
                    step+1,top_global->natoms); 
2333
            fflush(stderr);
2334
        }
2335
    }
2336
    t=t0+step*inputrec->delta_t;
2337
    lambda=lam0+step*inputrec->delta_lambda;
2338
    
2339
    if (MASTER(cr)) 
2340
    {
2341
        print_ebin(-1,FALSE,FALSE,FALSE,fplog,step,step,t,eprAVER,
2342
                   FALSE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2343
        print_ebin(-1,FALSE,FALSE,FALSE,fplog,step,step,t,eprRMS,
2344
                   FALSE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2345
    }
2346
      
2347
    fprintf(stderr,"\n\nWriting Hessian...\n");
2348
    gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2349
    
2350
    
2351
    return start_t;
2352
}
2353

    
2354

    
2355
static void global_max(t_commrec *cr,int *n)
2356
{
2357
  int *sum,i;
2358

    
2359
  snew(sum,cr->nnodes);
2360
  sum[cr->nodeid] = *n;
2361
  gmx_sumi(cr->nnodes,sum,cr);
2362
  for(i=0; i<cr->nnodes; i++)
2363
    *n = max(*n,sum[i]);
2364
  
2365
  sfree(sum);
2366
}
2367

    
2368
static void realloc_bins(double **bin,int *nbin,int nbin_new)
2369
{
2370
  int i;
2371

    
2372
  if (nbin_new != *nbin) {
2373
    srenew(*bin,nbin_new);
2374
    for(i=*nbin; i<nbin_new; i++)
2375
      (*bin)[i] = 0;
2376
    *nbin = nbin_new;
2377
  }
2378
}
2379

    
2380
time_t do_tpi(FILE *fplog,t_commrec *cr,
2381
              int nfile,t_filenm fnm[],
2382
              bool bVerbose,bool bCompact,
2383
              gmx_vsite_t *vsite,gmx_constr_t constr,
2384
              int stepout,
2385
              t_inputrec *inputrec,
2386
              gmx_mtop_t *top_global,t_fcdata *fcd,
2387
              t_state *state,rvec f[],
2388
              rvec buf[],t_mdatoms *mdatoms,
2389
              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2390
              gmx_edsam_t ed,
2391
              t_forcerec *fr,
2392
              int repl_ex_nst,int repl_ex_seed,
2393
              real cpt_period,real max_hours,
2394
              unsigned long Flags)
2395
{
2396
  const char *TPI="Test Particle Insertion"; 
2397
  gmx_localtop_t *top;
2398
  gmx_groups_t *groups;
2399
  gmx_enerdata_t *enerd;
2400
  real   lambda,t,temp,beta,drmax,epot;
2401
  double embU,sum_embU,*sum_UgembU,V,V_all,VembU_all;
2402
  int    status;
2403
  t_trxframe rerun_fr;
2404
  bool   bDispCorr,bCharge,bRFExcl,bNotLastFrame,bStateChanged,bNS,bOurStep;
2405
  time_t start_t; 
2406
  tensor force_vir,shake_vir,vir,pres;
2407
  int    cg_tp,a_tp0,a_tp1,ngid,gid_tp,nener,e;
2408
  rvec   *x_mol;
2409
  rvec   mu_tot,x_init,dx,x_tp;
2410
  int    nnodes,frame,nsteps,step;
2411
  int    i,start,end;
2412
  static gmx_rng_t tpi_rand;
2413
  FILE   *fp_tpi=NULL;
2414
  char   *ptr,*dump_pdb,**leg,str[STRLEN],str2[STRLEN];
2415
  double dbl,dump_ener;
2416
  bool   bCavity;
2417
  int    nat_cavity=0,d;
2418
  real   *mass_cavity=NULL,mass_tot;
2419
  int    nbin;
2420
  double invbinw,*bin,refvolshift,logV,bUlogV;
2421
  char   *tpid_leg[2]={"direct","reweighted"};
2422

    
2423
  /* Since numerical problems can lead to extreme negative energies
2424
   * when atoms overlap, we need to set a lower limit for beta*U.
2425
   */
2426
  real bU_neg_limit = -50;
2427

    
2428
  /* Since there is no upper limit to the insertion energies,
2429
   * we need to set an upper limit for the distribution output.
2430
   */
2431
  real bU_bin_limit      = 50;
2432
  real bU_logV_bin_limit = bU_bin_limit + 10;
2433

    
2434
  nnodes = cr->nnodes;
2435

    
2436
  top = gmx_mtop_generate_local_top(top_global,inputrec);
2437

    
2438
  groups = &top_global->groups;
2439

    
2440
  bCavity = (inputrec->eI == eiTPIC);
2441
  if (bCavity) {
2442
    ptr = getenv("GMX_TPIC_MASSES");
2443
    if (ptr == NULL) {
2444
      nat_cavity = 1;
2445
    } else {
2446
      /* Read (multiple) masses from env var GMX_TPIC_MASSES,
2447
       * The center of mass of the last atoms is then used for TPIC.
2448
       */
2449
      nat_cavity = 0;
2450
      while (sscanf(ptr,"%lf%n",&dbl,&i) > 0) {
2451
        srenew(mass_cavity,nat_cavity+1);
2452
        mass_cavity[nat_cavity] = dbl;
2453
        fprintf(fplog,"mass[%d] = %f\n",
2454
                nat_cavity+1,mass_cavity[nat_cavity]);
2455
        nat_cavity++;
2456
        ptr += i;
2457
      }
2458
      if (nat_cavity == 0)
2459
        gmx_fatal(FARGS,"Found %d masses in GMX_TPIC_MASSES",nat_cavity);
2460
    }
2461
  }
2462

    
2463
  /*
2464
  init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,
2465
  state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/
2466
  /* We never need full pbc for TPI */
2467
  fr->ePBC = epbcXYZ;
2468
  /* Determine the temperature for the Boltzmann weighting */
2469
  temp = inputrec->opts.ref_t[0];
2470
  if (fplog) {
2471
    for(i=1; (i<inputrec->opts.ngtc); i++) {
2472
      if (inputrec->opts.ref_t[i] != temp) {
2473
        fprintf(fplog,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
2474
        fprintf(stderr,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
2475
      }
2476
    }
2477
    fprintf(fplog,
2478
            "\n  The temperature for test particle insertion is %.3f K\n\n",
2479
            temp);
2480
  }
2481
  beta = 1.0/(BOLTZ*temp);
2482

    
2483
  /* Number of insertions per frame */
2484
  nsteps = inputrec->nsteps; 
2485

    
2486
  /* Use the same neighborlist with more insertions points
2487
   * in a sphere of radius drmax around the initial point
2488
   */
2489
  /* This should be a proper mdp parameter */
2490
  drmax = inputrec->rtpi;
2491

    
2492
  /* An environment variable can be set to dump all configurations
2493
   * to pdb with an insertion energy <= this value.
2494
   */
2495
  dump_pdb = getenv("GMX_TPI_DUMP");
2496
  dump_ener = 0;
2497
  if (dump_pdb)
2498
    sscanf(dump_pdb,"%lf",&dump_ener);
2499

    
2500
  atoms2md(top_global,inputrec,0,NULL,0,top_global->natoms,mdatoms);
2501
  update_mdatoms(mdatoms,inputrec->init_lambda);
2502

    
2503
  snew(enerd,1);
2504
  init_enerdata(fplog,groups->grps[egcENER].nr,enerd);
2505

    
2506
  /* Print to log file  */
2507
  start_t=print_date_and_time(fplog,cr->nodeid,
2508
                              "Started Test Particle Insertion"); 
2509
  wallcycle_start(wcycle,ewcRUN);
2510

    
2511
  /* The last charge group is the group to be inserted */
2512
  cg_tp = top->cgs.nr - 1;
2513
  a_tp0 = top->cgs.index[cg_tp];
2514
  a_tp1 = top->cgs.index[cg_tp+1];
2515
  if (debug)
2516
    fprintf(debug,"TPI cg %d, atoms %d-%d\n",cg_tp,a_tp0,a_tp1);
2517
  if (a_tp1 - a_tp0 > 1 &&
2518
      (inputrec->rlist < inputrec->rcoulomb ||
2519
       inputrec->rlist < inputrec->rvdw))
2520
    gmx_fatal(FARGS,"Can not do TPI for multi-atom molecule with a twin-range cut-off");
2521
  snew(x_mol,a_tp1-a_tp0);
2522

    
2523
  bDispCorr = (inputrec->eDispCorr != edispcNO);
2524
  bCharge = FALSE;
2525
  for(i=a_tp0; i<a_tp1; i++) {
2526
    /* Copy the coordinates of the molecule to be insterted */
2527
    copy_rvec(state->x[i],x_mol[i-a_tp0]);
2528
    /* Check if we need to print electrostatic energies */
2529
    bCharge |= (mdatoms->chargeA[i] != 0 ||
2530
                (mdatoms->chargeB && mdatoms->chargeB[i] != 0));
2531
  }
2532
  bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype!=eelRF_NEC);
2533

    
2534
  calc_cgcm(fplog,cg_tp,cg_tp+1,&(top->cgs),state->x,fr->cg_cm);
2535
  if (bCavity) {
2536
    if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog) {
2537
      fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
2538
      fprintf(stderr,"WARNING: Your TPI molecule is not centered at 0,0,0\n");
2539
    }
2540
  } else {
2541
    /* Center the molecule to be inserted at zero */
2542
     for(i=0; i<a_tp1-a_tp0; i++)
2543
      rvec_dec(x_mol[i],fr->cg_cm[cg_tp]);
2544
  }
2545

    
2546
  if (fplog) {
2547
    fprintf(fplog,"\nWill insert %d atoms %s partial charges\n",
2548
            a_tp1-a_tp0,bCharge ? "with" : "without");
2549
    
2550
    fprintf(fplog,"\nWill insert %d times in each frame of %s\n",
2551
            nsteps,opt2fn("-rerun",nfile,fnm));
2552
  }
2553
  
2554
  if (!bCavity) {
2555
    if (inputrec->nstlist > 1) {
2556
      if (drmax==0 && a_tp1-a_tp0==1) {
2557
        gmx_fatal(FARGS,"Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense",inputrec->nstlist,drmax);
2558
      }
2559
      if (fplog)
2560
        fprintf(fplog,"Will use the same neighborlist for %d insertions in a sphere of radius %f\n",inputrec->nstlist,drmax);
2561
    }
2562
  } else {
2563
    if (fplog)
2564
      fprintf(fplog,"Will insert randomly in a sphere of radius %f around the center of the cavity\n",drmax);
2565
  }
2566

    
2567
  ngid = groups->grps[egcENER].nr;
2568
  gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
2569
  nener = 1 + ngid;
2570
  if (bDispCorr)
2571
    nener += 1;
2572
  if (bCharge) {
2573
    nener += ngid;
2574
    if (bRFExcl)
2575
      nener += 1;
2576
  }
2577
  snew(sum_UgembU,nener);
2578

    
2579
  /* Initialize random generator */
2580
  tpi_rand = gmx_rng_init(inputrec->ld_seed);
2581

    
2582
  if (MASTER(cr)) {
2583
    fp_tpi = xvgropen(opt2fn("-tpi",nfile,fnm),
2584
                      "TPI energies","Time (ps)",
2585
                      "(kJ mol\\S-1\\N) / (nm\\S3\\N)");
2586
    xvgr_subtitle(fp_tpi,"f. are averages over one frame");
2587
    snew(leg,4+nener);
2588
    e = 0;
2589
    sprintf(str,"-kT log(<Ve\\S-\\8b\\4U\\N>/<V>)");
2590
    leg[e++] = strdup(str);
2591
    sprintf(str,"f. -kT log<e\\S-\\8b\\4U\\N>");
2592
    leg[e++] = strdup(str);
2593
    sprintf(str,"f. <e\\S-\\8b\\4U\\N>");
2594
    leg[e++] = strdup(str);
2595
    sprintf(str,"f. V");
2596
    leg[e++] = strdup(str);
2597
    sprintf(str,"f. <Ue\\S-\\8b\\4U\\N>");
2598
    leg[e++] = strdup(str);
2599
    for(i=0; i<ngid; i++) {
2600
      sprintf(str,"f. <U\\sVdW %s\\Ne\\S-\\8b\\4U\\N>",
2601
              *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
2602
      leg[e++] = strdup(str);
2603
    }
2604
    if (bDispCorr) {
2605
      sprintf(str,"f. <U\\sdisp c\\Ne\\S-\\8b\\4U\\N>");
2606
      leg[e++] = strdup(str);
2607
    }
2608
    if (bCharge) {
2609
      for(i=0; i<ngid; i++) {
2610
        sprintf(str,"f. <U\\sCoul %s\\Ne\\S-\\8b\\4U\\N>",
2611
                *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
2612
        leg[e++] = strdup(str);
2613
      }
2614
      if (bRFExcl) {
2615
        sprintf(str,"f. <U\\sRF excl\\Ne\\S-\\8b\\4U\\N>");
2616
        leg[e++] = strdup(str);
2617
      }
2618
    }
2619
    xvgr_legend(fp_tpi,4+nener,leg);
2620
    for(i=0; i<4+nener; i++)
2621
      sfree(leg[i]);
2622
    sfree(leg);
2623
  }
2624
  clear_rvec(x_init);
2625
  V_all = 0;
2626
  VembU_all = 0;
2627

    
2628
  invbinw = 10;
2629
  nbin = 10;
2630
  snew(bin,nbin);
2631

    
2632
  start_time();
2633

    
2634
  bNotLastFrame = read_first_frame(&status,opt2fn("-rerun",nfile,fnm),
2635
                                   &rerun_fr,TRX_NEED_X);
2636
  frame = 0;
2637

    
2638
  if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) !=
2639
      mdatoms->nr - (a_tp1 - a_tp0))
2640
    gmx_fatal(FARGS,"Number of atoms in trajectory (%d)%s "
2641
              "is not equal the number in the run input file (%d) "
2642
              "minus the number of atoms to insert (%d)\n",
2643
              rerun_fr.natoms,bCavity ? " minus one" : "",
2644
              mdatoms->nr,a_tp1-a_tp0);
2645

    
2646
  refvolshift = log(det(rerun_fr.box));
2647
  
2648
  while (bNotLastFrame) {
2649
    lambda = rerun_fr.lambda;
2650
    t = rerun_fr.time;
2651
    
2652
    sum_embU = 0;
2653
    for(e=0; e<nener; e++)
2654
      sum_UgembU[e] = 0;
2655

    
2656
    /* Copy the coordinates from the input trajectory */
2657
    for(i=0; i<rerun_fr.natoms; i++)
2658
      copy_rvec(rerun_fr.x[i],state->x[i]);
2659
      
2660
    V = det(rerun_fr.box);
2661
    logV = log(V);
2662

    
2663
    bStateChanged = TRUE;
2664
    bNS = TRUE;
2665
    for(step=0; step<nsteps; step++) {
2666
      /* In parallel all nodes generate all random configurations.
2667
       * In that way the result is identical to a single cpu tpi run.
2668
       */
2669
      if (!bCavity) {
2670
        /* Random insertion in the whole volume */
2671
        bNS = (step % inputrec->nstlist == 0);
2672
        if (bNS) {
2673
          /* Generate a random position in the box */
2674
          x_init[XX] = gmx_rng_uniform_real(tpi_rand)*state->box[XX][XX];
2675
          x_init[YY] = gmx_rng_uniform_real(tpi_rand)*state->box[YY][YY];
2676
          x_init[ZZ] = gmx_rng_uniform_real(tpi_rand)*state->box[ZZ][ZZ];
2677
        }
2678
        if (inputrec->nstlist == 1) {
2679
          copy_rvec(x_init,x_tp);
2680
        } else {
2681
          /* Generate coordinates within |dx|=drmax of x_init */
2682
          do {
2683
            dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2684
            dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2685
            dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2686
          } while (norm2(dx) > drmax*drmax);
2687
          rvec_add(x_init,dx,x_tp);
2688
        }
2689
      } else {
2690
        /* Random insertion around a cavity location
2691
         * given by the last coordinate of the trajectory.
2692
         */
2693
        if (step == 0) {
2694
          if (nat_cavity == 1) {
2695
            /* Copy the location of the cavity */
2696
            copy_rvec(rerun_fr.x[rerun_fr.natoms-1],x_init);
2697
          } else {
2698
            /* Determine the center of mass of the last molecule */
2699
            clear_rvec(x_init);
2700
            mass_tot = 0;
2701
            for(i=0; i<nat_cavity; i++) {
2702
              for(d=0; d<DIM; d++)
2703
                x_init[d] +=
2704
                  mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d];
2705
              mass_tot += mass_cavity[i];
2706
            }
2707
            for(d=0; d<DIM; d++)
2708
              x_init[d] /= mass_tot;
2709
          }
2710
        }
2711
        /* Generate coordinates within |dx|=drmax of x_init */
2712
        do {
2713
          dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2714
          dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2715
          dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2716
        } while (norm2(dx) > drmax*drmax);
2717
        rvec_add(x_init,dx,x_tp);
2718
      }
2719

    
2720
      if (a_tp1 - a_tp0 == 1) {
2721
        /* Insert a single atom, just copy the insertion location */
2722
        copy_rvec(x_tp,state->x[a_tp0]);
2723
      } else {
2724
        /* Copy the coordinates from the top file */
2725
        for(i=a_tp0; i<a_tp1; i++)
2726
          copy_rvec(x_mol[i-a_tp0],state->x[i]);
2727
        /* Rotate the molecule randomly */
2728
        rotate_conf(a_tp1-a_tp0,state->x+a_tp0,NULL,
2729
                    2*M_PI*gmx_rng_uniform_real(tpi_rand),
2730
                    2*M_PI*gmx_rng_uniform_real(tpi_rand),
2731
                    2*M_PI*gmx_rng_uniform_real(tpi_rand));
2732
        /* Shift to the insertion location */
2733
        for(i=a_tp0; i<a_tp1; i++)
2734
          rvec_inc(state->x[i],x_tp);
2735
      }
2736

    
2737
      /* Check if this insertion belongs to this node */
2738
      bOurStep = TRUE;
2739
      if (PAR(cr)) {
2740
        switch (inputrec->eI) {
2741
        case eiTPI:
2742
          bOurStep = ((step / inputrec->nstlist) % nnodes == cr->nodeid);
2743
          break;
2744
        case eiTPIC:
2745
          bOurStep = (step % nnodes == cr->nodeid);
2746
          break;
2747
        default:
2748
          gmx_fatal(FARGS,"Unknown integrator %s",ei_names[inputrec->eI]);
2749
        }
2750
      }
2751
      if (bOurStep) {
2752
        /* Clear some matrix variables  */
2753
        clear_mat(force_vir); 
2754
        clear_mat(shake_vir);
2755
        clear_mat(vir);
2756
        clear_mat(pres);
2757
        
2758
        /* Set the charge group center of mass of the test particle */
2759
        copy_rvec(x_init,fr->cg_cm[top->cgs.nr-1]);
2760

    
2761
        /* Calc energy (no forces) on new positions.
2762
         * Since we only need the intermolecular energy
2763
         * and the RF exclusion terms of the inserted molecule occur
2764
         * within a single charge group we can pass NULL for the graph.
2765
         * This also avoids shifts that would move charge groups
2766
         * out of the box.
2767
         */
2768
        /* Make do_force do a single node fore calculation */
2769
        cr->nnodes = 1;
2770
        do_force(fplog,cr,inputrec,
2771
                 step,nrnb,wcycle,top,&top_global->groups,
2772
                 rerun_fr.box,state->x,&state->hist,
2773
                 f,buf,force_vir,mdatoms,enerd,fcd,
2774
                 lambda,NULL,fr,NULL,mu_tot,t,NULL,NULL,
2775
                 GMX_FORCE_NONBONDED |
2776
                 (bNS ? GMX_FORCE_NS : 0) |
2777
                 (bStateChanged ? GMX_FORCE_STATECHANGED : 0)); 
2778
        cr->nnodes = nnodes;
2779
        bStateChanged = FALSE;
2780
        bNS = FALSE;
2781

    
2782
        /* Calculate long range corrections to pressure and energy */
2783
        calc_dispcorr(fplog,inputrec,fr,step,mdatoms->nr,rerun_fr.box,lambda,
2784
                      pres,vir,enerd->term);
2785
        
2786
        /* If the compiler doesn't optimize this check away
2787
         * we catch the NAN energies.
2788
         * With tables extreme negative energies might occur close to r=0.
2789
         */
2790
        epot = enerd->term[F_EPOT];
2791
        if (epot != epot || epot*beta < bU_neg_limit) {
2792
          if (debug)
2793
            fprintf(debug,"\n  time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n",t,step,epot);
2794
          embU = 0;
2795
        } else {
2796
          embU = exp(-beta*epot);
2797
          sum_embU += embU;
2798
          /* Determine the weighted energy contributions of each energy group */
2799
          e = 0;
2800
          sum_UgembU[e++] += epot*embU;
2801
          if (fr->bBHAM) {
2802
            for(i=0; i<ngid; i++)
2803
              sum_UgembU[e++] +=
2804
                (enerd->grpp.ener[egBHAMSR][GID(i,gid_tp,ngid)] +
2805
                 enerd->grpp.ener[egBHAMLR][GID(i,gid_tp,ngid)])*embU;
2806
          } else {
2807
            for(i=0; i<ngid; i++)
2808
              sum_UgembU[e++] +=
2809
                (enerd->grpp.ener[egLJSR][GID(i,gid_tp,ngid)] +
2810
                 enerd->grpp.ener[egLJLR][GID(i,gid_tp,ngid)])*embU;
2811
          }
2812
          if (bDispCorr)
2813
            sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU;
2814
          if (bCharge) {
2815
            for(i=0; i<ngid; i++)
2816
              sum_UgembU[e++] +=
2817
                (enerd->grpp.ener[egCOULSR][GID(i,gid_tp,ngid)] +
2818
                 enerd->grpp.ener[egCOULLR][GID(i,gid_tp,ngid)])*embU;
2819
            if (bRFExcl)
2820
              sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
2821
          }
2822
        }
2823
        
2824
        if (embU == 0 || beta*epot > bU_bin_limit) {
2825
          bin[0]++;
2826
        } else {
2827
          i = (int)((bU_logV_bin_limit
2828
                     - (beta*epot - logV + refvolshift))*invbinw
2829
                    + 0.5);
2830
          if (i < 0)
2831
            i = 0;
2832
          if (i >= nbin)
2833
            realloc_bins(&bin,&nbin,i+10);
2834
          bin[i]++;
2835
        }
2836

    
2837
        if (debug)
2838
          fprintf(debug,"TPI %7d %12.5e %12.5f %12.5f %12.5f\n",
2839
                  step,epot,x_tp[XX],x_tp[YY],x_tp[ZZ]);
2840

    
2841
        if (dump_pdb && epot <= dump_ener) {
2842
          sprintf(str,"t%g_step%d.pdb",t,step);
2843
          sprintf(str2,"t: %f step %d ener: %f",t,step,epot);
2844
          write_sto_conf_mtop(str,str2,top_global,state->x,state->v,
2845
                              inputrec->ePBC,state->box);
2846
        }
2847
      }
2848
    }
2849

    
2850
    if (PAR(cr)) {
2851
      /* When running in parallel sum the energies over the processes */
2852
      gmx_sumd(1,    &sum_embU, cr);
2853
      gmx_sumd(nener,sum_UgembU,cr);
2854
    }
2855

    
2856
    frame++;
2857
    V_all += V;
2858
    VembU_all += V*sum_embU/nsteps;
2859

    
2860
    if (fp_tpi) {
2861
      if (bVerbose || frame%10==0 || frame<10)
2862
        fprintf(stderr,"mu %10.3e <mu> %10.3e\n",
2863
                -log(sum_embU/nsteps)/beta,-log(VembU_all/V_all)/beta);
2864
      
2865
      fprintf(fp_tpi,"%10.3f %12.5e %12.5e %12.5e %12.5e",
2866
              t,
2867
              VembU_all==0 ? 20/beta : -log(VembU_all/V_all)/beta,
2868
              sum_embU==0  ? 20/beta : -log(sum_embU/nsteps)/beta,
2869
              sum_embU/nsteps,V);
2870
      for(e=0; e<nener; e++)
2871
        fprintf(fp_tpi," %12.5e",sum_UgembU[e]/nsteps);
2872
      fprintf(fp_tpi,"\n");
2873
      fflush(fp_tpi);
2874
    }
2875

    
2876
    bNotLastFrame = read_next_frame(status,&rerun_fr);
2877
  } /* End of the loop  */
2878

    
2879
  update_time();
2880

    
2881
  close_trj(status);
2882

    
2883
  if (fp_tpi)
2884
    gmx_fio_fclose(fp_tpi);
2885

    
2886
  if (fplog) {
2887
    fprintf(fplog,"\n");
2888
    fprintf(fplog,"  <V>  = %12.5e nm^3\n",V_all/frame);
2889
    fprintf(fplog,"  <mu> = %12.5e kJ/mol\n",-log(VembU_all/V_all)/beta);
2890
  }
2891
  
2892
  /* Write the Boltzmann factor histogram */
2893
  if (PAR(cr)) {
2894
    /* When running in parallel sum the bins over the processes */
2895
    i = nbin;
2896
    global_max(cr,&i);
2897
    realloc_bins(&bin,&nbin,i);
2898
    gmx_sumd(nbin,bin,cr);
2899
  }
2900
  fp_tpi = xvgropen(opt2fn("-tpid",nfile,fnm),
2901
                    "TPI energy distribution",
2902
                    "\\8b\\4U - log(V/<V>)","count");
2903
  sprintf(str,"number \\8b\\4U > %g: %9.3e",bU_bin_limit,bin[0]);
2904
  xvgr_subtitle(fp_tpi,str);
2905
  xvgr_legend(fp_tpi,2,tpid_leg);
2906
  for(i=nbin-1; i>0; i--) {
2907
    bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame);
2908
    fprintf(fp_tpi,"%6.2f %10d %12.5e\n",
2909
            bUlogV,
2910
            (int)(bin[i]+0.5),
2911
            bin[i]*exp(-bUlogV)*V_all/VembU_all);
2912
  }
2913
  gmx_fio_fclose(fp_tpi);
2914
  sfree(bin);
2915

    
2916
  sfree(sum_UgembU);
2917

    
2918
  return start_t;
2919
}