Project

General

Profile

gmx_cluster.cpp

Boris Timofeev, 08/07/2018 10:01 AM

 
1
/*
2
 * This file is part of the GROMACS molecular simulation package.
3
 *
4
 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5
 * Copyright (c) 2001-2004, The GROMACS development team.
6
 * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
7
 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8
 * and including many others, as listed in the AUTHORS file in the
9
 * top-level source directory and at http://www.gromacs.org.
10
 *
11
 * GROMACS is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Lesser General Public License
13
 * as published by the Free Software Foundation; either version 2.1
14
 * of the License, or (at your option) any later version.
15
 *
16
 * GROMACS is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with GROMACS; if not, see
23
 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25
 *
26
 * If you want to redistribute modifications to GROMACS, please
27
 * consider that scientific software is very special. Version
28
 * control is crucial - bugs must be traceable. We will be happy to
29
 * consider code for inclusion in the official distribution, but
30
 * derived work must not be called official GROMACS. Details are found
31
 * in the README & COPYING files - if they are missing, get the
32
 * official version at http://www.gromacs.org.
33
 *
34
 * To help us fund GROMACS development, we humbly ask that you cite
35
 * the research papers on the package. Check out http://www.gromacs.org.
36
 */
37
#include "gmxpre.h"
38

    
39
#include <cmath>
40
#include <cstdlib>
41
#include <cstring>
42

    
43
#include <algorithm>
44

    
45
#include "gromacs/commandline/pargs.h"
46
#include "gromacs/commandline/viewit.h"
47
#include "gromacs/fileio/confio.h"
48
#include "gromacs/fileio/matio.h"
49
#include "gromacs/fileio/trxio.h"
50
#include "gromacs/fileio/xvgr.h"
51
#include "gromacs/gmxana/cmat.h"
52
#include "gromacs/gmxana/gmx_ana.h"
53
#include "gromacs/linearalgebra/eigensolver.h"
54
#include "gromacs/math/do_fit.h"
55
#include "gromacs/math/vec.h"
56
#include "gromacs/pbcutil/pbc.h"
57
#include "gromacs/pbcutil/rmpbc.h"
58
#include "gromacs/random/threefry.h"
59
#include "gromacs/random/uniformintdistribution.h"
60
#include "gromacs/random/uniformrealdistribution.h"
61
#include "gromacs/topology/index.h"
62
#include "gromacs/topology/topology.h"
63
#include "gromacs/utility/arraysize.h"
64
#include "gromacs/utility/cstringutil.h"
65
#include "gromacs/utility/fatalerror.h"
66
#include "gromacs/utility/futil.h"
67
#include "gromacs/utility/smalloc.h"
68

    
69
/* print to two file pointers at once (i.e. stderr and log) */
70
static gmx_inline
71
void lo_ffprintf(FILE *fp1, FILE *fp2, const char *buf)
72
{
73
    fprintf(fp1, "%s", buf);
74
    fprintf(fp2, "%s", buf);
75
}
76

    
77
/* just print a prepared buffer to fp1 and fp2 */
78
static gmx_inline
79
void ffprintf(FILE *fp1, FILE *fp2, const char *buf)
80
{
81
    lo_ffprintf(fp1, fp2, buf);
82
}
83

    
84
/* prepare buffer with one argument, then print to fp1 and fp2 */
85
static gmx_inline
86
void ffprintf_d(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg)
87
{
88
    sprintf(buf, fmt, arg);
89
    lo_ffprintf(fp1, fp2, buf);
90
}
91

    
92
/* prepare buffer with one argument, then print to fp1 and fp2 */
93
static gmx_inline
94
void ffprintf_g(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg)
95
{
96
    sprintf(buf, fmt, arg);
97
    lo_ffprintf(fp1, fp2, buf);
98
}
99

    
100
/* prepare buffer with one argument, then print to fp1 and fp2 */
101
static gmx_inline
102
void ffprintf_s(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg)
103
{
104
    sprintf(buf, fmt, arg);
105
    lo_ffprintf(fp1, fp2, buf);
106
}
107

    
108
/* prepare buffer with two arguments, then print to fp1 and fp2 */
109
static gmx_inline
110
void ffprintf_dd(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg1, int arg2)
111
{
112
    sprintf(buf, fmt, arg1, arg2);
113
    lo_ffprintf(fp1, fp2, buf);
114
}
115

    
116
/* prepare buffer with two arguments, then print to fp1 and fp2 */
117
static gmx_inline
118
void ffprintf_gg(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg1, real arg2)
119
{
120
    sprintf(buf, fmt, arg1, arg2);
121
    lo_ffprintf(fp1, fp2, buf);
122
}
123

    
124
/* prepare buffer with two arguments, then print to fp1 and fp2 */
125
static gmx_inline
126
void ffprintf_ss(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg1, const char *arg2)
127
{
128
    sprintf(buf, fmt, arg1, arg2);
129
    lo_ffprintf(fp1, fp2, buf);
130
}
131

    
132
typedef struct {
133
    int  ncl;
134
    int *cl;
135
} t_clusters;
136

    
137
typedef struct {
138
    int  nr;
139
    int *nb;
140
} t_nnb;
141

    
142
void cp_index(int nn, int from[], int to[])
143
{
144
    int i;
145

    
146
    for (i = 0; (i < nn); i++)
147
    {
148
        to[i] = from[i];
149
    }
150
}
151

    
152
void mc_optimize(FILE *log, t_mat *m, real *time,
153
                 int maxiter, int nrandom,
154
                 int seed, real kT,
155
                 const char *conv, gmx_output_env_t *oenv)
156
{
157
    FILE      *fp = NULL;
158
    real       ecur, enext, emin, prob, enorm;
159
    int        i, j, iswap, jswap, nn, nuphill = 0;
160
    t_mat     *minimum;
161

    
162
    if (seed == 0)
163
    {
164
        seed = static_cast<int>(gmx::makeRandomSeed());
165
    }
166
    gmx::DefaultRandomEngine rng(seed);
167

    
168
    if (m->n1 != m->nn)
169
    {
170
        fprintf(stderr, "Can not do Monte Carlo optimization with a non-square matrix.\n");
171
        return;
172
    }
173
    printf("\nDoing Monte Carlo optimization to find the smoothest trajectory\n");
174
    printf("by reordering the frames to minimize the path between the two structures\n");
175
    printf("that have the largest pairwise RMSD.\n");
176
    printf("Using random seed %d.\n", seed);
177

    
178
    iswap = jswap = -1;
179
    enorm = m->mat[0][0];
180
    for (i = 0; (i < m->n1); i++)
181
    {
182
        for (j = 0; (j < m->nn); j++)
183
        {
184
            if (m->mat[i][j] > enorm)
185
            {
186
                enorm   = m->mat[i][j];
187
                iswap   = i;
188
                jswap   = j;
189
            }
190
        }
191
    }
192
    if ((iswap == -1) || (jswap == -1))
193
    {
194
        fprintf(stderr, "Matrix contains identical values in all fields\n");
195
        return;
196
    }
197
    swap_rows(m, 0, iswap);
198
    swap_rows(m, m->n1-1, jswap);
199
    emin = ecur = mat_energy(m);
200
    printf("Largest distance %g between %d and %d. Energy: %g.\n",
201
           enorm, iswap, jswap, emin);
202

    
203
    nn  = m->nn;
204

    
205
    /* Initiate and store global minimum */
206
    minimum     = init_mat(nn, m->b1D);
207
    minimum->nn = nn;
208
    copy_t_mat(minimum, m);
209

    
210
    if (NULL != conv)
211
    {
212
        fp = xvgropen(conv, "Convergence of the MC optimization",
213
                      "Energy", "Step", oenv);
214
    }
215

    
216
    gmx::UniformIntDistribution<int>     intDistNN(1, nn-2); // [1,nn-2]
217
    gmx::UniformRealDistribution<real>   realDistOne;        // [0,1)
218

    
219
    for (i = 0; (i < maxiter); i++)
220
    {
221
        /* Generate new swapping candidates */
222
        do
223
        {
224
            iswap = intDistNN(rng);
225
            jswap = intDistNN(rng);
226
        }
227
        while ((iswap == jswap) || (iswap >= nn-1) || (jswap >= nn-1));
228

    
229
        /* Apply swap and compute energy */
230
        swap_rows(m, iswap, jswap);
231
        enext = mat_energy(m);
232

    
233
        /* Compute probability */
234
        prob = 0;
235
        if ((enext < ecur) || (i < nrandom))
236
        {
237
            prob = 1;
238
            if (enext < emin)
239
            {
240
                /* Store global minimum */
241
                copy_t_mat(minimum, m);
242
                emin = enext;
243
            }
244
        }
245
        else if (kT > 0)
246
        {
247
            /* Try Monte Carlo step */
248
            prob = std::exp(-(enext-ecur)/(enorm*kT));
249
        }
250

    
251
        if (prob == 1 || realDistOne(rng) < prob)
252
        {
253
            if (enext > ecur)
254
            {
255
                nuphill++;
256
            }
257

    
258
            fprintf(log, "Iter: %d Swapped %4d and %4d (energy: %g prob: %g)\n",
259
                    i, iswap, jswap, enext, prob);
260
            if (NULL != fp)
261
            {
262
                fprintf(fp, "%6d  %10g\n", i, enext);
263
            }
264
            ecur = enext;
265
        }
266
        else
267
        {
268
            swap_rows(m, jswap, iswap);
269
        }
270
    }
271
    fprintf(log, "%d uphill steps were taken during optimization\n",
272
            nuphill);
273

    
274
    /* Now swap the matrix to get it into global minimum mode */
275
    copy_t_mat(m, minimum);
276

    
277
    fprintf(log, "Global minimum energy %g\n", mat_energy(minimum));
278
    fprintf(log, "Global minimum energy %g\n", mat_energy(m));
279
    fprintf(log, "Swapped time and frame indices and RMSD to next neighbor:\n");
280
    for (i = 0; (i < m->nn); i++)
281
    {
282
        fprintf(log, "%10g  %5d  %10g\n",
283
                time[m->m_ind[i]],
284
                m->m_ind[i],
285
                (i < m->nn-1) ? m->mat[m->m_ind[i]][m->m_ind[i+1]] : 0);
286
    }
287

    
288
    if (NULL != fp)
289
    {
290
        xvgrclose(fp);
291
    }
292
}
293

    
294
static void calc_dist(int nind, rvec x[], real **d)
295
{
296
    int      i, j;
297
    real    *xi;
298
    rvec     dx;
299

    
300
    for (i = 0; (i < nind-1); i++)
301
    {
302
        xi = x[i];
303
        for (j = i+1; (j < nind); j++)
304
        {
305
            /* Should use pbc_dx when analysing multiple molecueles,
306
             * but the box is not stored for every frame.
307
             */
308
            rvec_sub(xi, x[j], dx);
309
            d[i][j] = norm(dx);
310
        }
311
    }
312
}
313

    
314
static real rms_dist(int isize, real **d, real **d_r)
315
{
316
    int  i, j;
317
    real r, r2;
318

    
319
    r2 = 0.0;
320
    for (i = 0; (i < isize-1); i++)
321
    {
322
        for (j = i+1; (j < isize); j++)
323
        {
324
            r   = d[i][j]-d_r[i][j];
325
            r2 += r*r;
326
        }
327
    }
328
    r2 /= (isize*(isize-1))/2;
329

    
330
    return std::sqrt(r2);
331
}
332

    
333
static int rms_dist_comp(const void *a, const void *b)
334
{
335
    t_dist *da, *db;
336

    
337
    da = (t_dist *)a;
338
    db = (t_dist *)b;
339

    
340
    if (da->dist - db->dist < 0)
341
    {
342
        return -1;
343
    }
344
    else if (da->dist - db->dist > 0)
345
    {
346
        return 1;
347
    }
348
    return 0;
349
}
350

    
351
static int clust_id_comp(const void *a, const void *b)
352
{
353
    t_clustid *da, *db;
354

    
355
    da = (t_clustid *)a;
356
    db = (t_clustid *)b;
357

    
358
    return da->clust - db->clust;
359
}
360

    
361
static int nrnb_comp(const void *a, const void *b)
362
{
363
    t_nnb *da, *db;
364

    
365
    da = (t_nnb *)a;
366
    db = (t_nnb *)b;
367

    
368
    /* return the b-a, we want highest first */
369
    return db->nr - da->nr;
370
}
371

    
372
void gather(t_mat *m, real cutoff, t_clusters *clust)
373
{
374
    t_clustid *c;
375
    t_dist    *d;
376
    int        i, j, k, nn, cid, n1, diff;
377
    gmx_bool   bChange;
378

    
379
    /* First we sort the entries in the RMSD matrix */
380
    n1 = m->nn;
381
    nn = ((n1-1)*n1)/2;
382
    snew(d, nn);
383
    for (i = k = 0; (i < n1); i++)
384
    {
385
        for (j = i+1; (j < n1); j++, k++)
386
        {
387
            d[k].i    = i;
388
            d[k].j    = j;
389
            d[k].dist = m->mat[i][j];
390
        }
391
    }
392
    if (k != nn)
393
    {
394
        gmx_incons("gather algortihm");
395
    }
396
    qsort(d, nn, sizeof(d[0]), rms_dist_comp);
397

    
398
    /* Now we make a cluster index for all of the conformations */
399
    c = new_clustid(n1);
400

    
401
    /* Now we check the closest structures, and equalize their cluster numbers */
402
    fprintf(stderr, "Linking structures ");
403
    do
404
    {
405
        fprintf(stderr, "*");
406
        bChange = FALSE;
407
        for (k = 0; (k < nn) && (d[k].dist < cutoff); k++)
408
        {
409
            diff = c[d[k].j].clust - c[d[k].i].clust;
410
            if (diff)
411
            {
412
                bChange = TRUE;
413
                if (diff > 0)
414
                {
415
                    c[d[k].j].clust = c[d[k].i].clust;
416
                }
417
                else
418
                {
419
                    c[d[k].i].clust = c[d[k].j].clust;
420
                }
421
            }
422
        }
423
    }
424
    while (bChange);
425
    fprintf(stderr, "\nSorting and renumbering clusters\n");
426
    /* Sort on cluster number */
427
    qsort(c, n1, sizeof(c[0]), clust_id_comp);
428

    
429
    /* Renumber clusters */
430
    cid = 1;
431
    for (k = 1; k < n1; k++)
432
    {
433
        if (c[k].clust != c[k-1].clust)
434
        {
435
            c[k-1].clust = cid;
436
            cid++;
437
        }
438
        else
439
        {
440
            c[k-1].clust = cid;
441
        }
442
    }
443
    c[k-1].clust = cid;
444
    if (debug)
445
    {
446
        for (k = 0; (k < n1); k++)
447
        {
448
            fprintf(debug, "Cluster index for conformation %d: %d\n",
449
                    c[k].conf, c[k].clust);
450
        }
451
    }
452
    clust->ncl = cid;
453
    for (k = 0; k < n1; k++)
454
    {
455
        clust->cl[c[k].conf] = c[k].clust;
456
    }
457

    
458
    sfree(c);
459
    sfree(d);
460
}
461

    
462
gmx_bool jp_same(int **nnb, int i, int j, int P)
463
{
464
    gmx_bool bIn;
465
    int      k, ii, jj, pp;
466

    
467
    bIn = FALSE;
468
    for (k = 0; nnb[i][k] >= 0; k++)
469
    {
470
        bIn = bIn || (nnb[i][k] == j);
471
    }
472
    if (!bIn)
473
    {
474
        return FALSE;
475
    }
476

    
477
    bIn = FALSE;
478
    for (k = 0; nnb[j][k] >= 0; k++)
479
    {
480
        bIn = bIn || (nnb[j][k] == i);
481
    }
482
    if (!bIn)
483
    {
484
        return FALSE;
485
    }
486

    
487
    pp = 0;
488
    for (ii = 0; nnb[i][ii] >= 0; ii++)
489
    {
490
        for (jj = 0; nnb[j][jj] >= 0; jj++)
491
        {
492
            if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
493
            {
494
                pp++;
495
            }
496
        }
497
    }
498

    
499
    return (pp >= P);
500
}
501

    
502
static void jarvis_patrick(int n1, real **mat, int M, int P,
503
                           real rmsdcut, t_clusters *clust)
504
{
505
    t_dist     *row;
506
    t_clustid  *c;
507
    int       **nnb;
508
    int         i, j, k, cid, diff, maxval;
509
    gmx_bool    bChange;
510
    real      **mcpy = NULL;
511

    
512
    if (rmsdcut < 0)
513
    {
514
        rmsdcut = 10000;
515
    }
516

    
517
    /* First we sort the entries in the RMSD matrix row by row.
518
     * This gives us the nearest neighbor list.
519
     */
520
    snew(nnb, n1);
521
    snew(row, n1);
522
    for (i = 0; (i < n1); i++)
523
    {
524
        for (j = 0; (j < n1); j++)
525
        {
526
            row[j].j    = j;
527
            row[j].dist = mat[i][j];
528
        }
529
        qsort(row, n1, sizeof(row[0]), rms_dist_comp);
530
        if (M > 0)
531
        {
532
            /* Put the M nearest neighbors in the list */
533
            snew(nnb[i], M+1);
534
            for (j = k = 0; (k < M) && (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
535
            {
536
                if (row[j].j  != i)
537
                {
538
                    nnb[i][k]  = row[j].j;
539
                    k++;
540
                }
541
            }
542
            nnb[i][k] = -1;
543
        }
544
        else
545
        {
546
            /* Put all neighbors nearer than rmsdcut in the list */
547
            maxval = 0;
548
            k      = 0;
549
            for (j = 0; (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
550
            {
551
                if (row[j].j != i)
552
                {
553
                    if (k >= maxval)
554
                    {
555
                        maxval += 10;
556
                        srenew(nnb[i], maxval);
557
                    }
558
                    nnb[i][k] = row[j].j;
559
                    k++;
560
                }
561
            }
562
            if (k == maxval)
563
            {
564
                srenew(nnb[i], maxval+1);
565
            }
566
            nnb[i][k] = -1;
567
        }
568
    }
569
    sfree(row);
570
    if (debug)
571
    {
572
        fprintf(debug, "Nearest neighborlist. M = %d, P = %d\n", M, P);
573
        for (i = 0; (i < n1); i++)
574
        {
575
            fprintf(debug, "i:%5d nbs:", i);
576
            for (j = 0; nnb[i][j] >= 0; j++)
577
            {
578
                fprintf(debug, "%5d[%5.3f]", nnb[i][j], mat[i][nnb[i][j]]);
579
            }
580
            fprintf(debug, "\n");
581
        }
582
    }
583

    
584
    c = new_clustid(n1);
585
    fprintf(stderr, "Linking structures ");
586
    /* Use mcpy for temporary storage of booleans */
587
    mcpy = mk_matrix(n1, n1, FALSE);
588
    for (i = 0; i < n1; i++)
589
    {
590
        for (j = i+1; j < n1; j++)
591
        {
592
            mcpy[i][j] = jp_same(nnb, i, j, P);
593
        }
594
    }
595
    do
596
    {
597
        fprintf(stderr, "*");
598
        bChange = FALSE;
599
        for (i = 0; i < n1; i++)
600
        {
601
            for (j = i+1; j < n1; j++)
602
            {
603
                if (mcpy[i][j])
604
                {
605
                    diff = c[j].clust - c[i].clust;
606
                    if (diff)
607
                    {
608
                        bChange = TRUE;
609
                        if (diff > 0)
610
                        {
611
                            c[j].clust = c[i].clust;
612
                        }
613
                        else
614
                        {
615
                            c[i].clust = c[j].clust;
616
                        }
617
                    }
618
                }
619
            }
620
        }
621
    }
622
    while (bChange);
623

    
624
    fprintf(stderr, "\nSorting and renumbering clusters\n");
625
    /* Sort on cluster number */
626
    qsort(c, n1, sizeof(c[0]), clust_id_comp);
627

    
628
    /* Renumber clusters */
629
    cid = 1;
630
    for (k = 1; k < n1; k++)
631
    {
632
        if (c[k].clust != c[k-1].clust)
633
        {
634
            c[k-1].clust = cid;
635
            cid++;
636
        }
637
        else
638
        {
639
            c[k-1].clust = cid;
640
        }
641
    }
642
    c[k-1].clust = cid;
643
    clust->ncl   = cid;
644
    for (k = 0; k < n1; k++)
645
    {
646
        clust->cl[c[k].conf] = c[k].clust;
647
    }
648
    if (debug)
649
    {
650
        for (k = 0; (k < n1); k++)
651
        {
652
            fprintf(debug, "Cluster index for conformation %d: %d\n",
653
                    c[k].conf, c[k].clust);
654
        }
655
    }
656

    
657
    done_matrix(n1, &mcpy);
658

    
659
    sfree(c);
660
    for (i = 0; (i < n1); i++)
661
    {
662
        sfree(nnb[i]);
663
    }
664
    sfree(nnb);
665
}
666

    
667
static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)
668
{
669
    int i, j;
670

    
671
    /* dump neighbor list */
672
    fprintf(fp, "%s", title);
673
    for (i = 0; (i < n1); i++)
674
    {
675
        fprintf(fp, "i:%5d #:%5d nbs:", i, nnb[i].nr);
676
        for (j = 0; j < nnb[i].nr; j++)
677
        {
678
            fprintf(fp, "%5d", nnb[i].nb[j]);
679
        }
680
        fprintf(fp, "\n");
681
    }
682
}
683

    
684
static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)
685
{
686
    t_dist *row;
687
    t_nnb  *nnb;
688
    int     i, j, k, j1, maxval;
689

    
690
    /* Put all neighbors nearer than rmsdcut in the list */
691
    fprintf(stderr, "Making list of neighbors within cutoff ");
692
    snew(nnb, n1);
693
    snew(row, n1);
694
    for (i = 0; (i < n1); i++)
695
    {
696
        maxval = 0;
697
        k      = 0;
698
        /* put all neighbors within cut-off in list */
699
        for (j = 0; j < n1; j++)
700
        {
701
            if (mat[i][j] < rmsdcut)
702
            {
703
                if (k >= maxval)
704
                {
705
                    maxval += 10;
706
                    srenew(nnb[i].nb, maxval);
707
                }
708
                nnb[i].nb[k] = j;
709
                k++;
710
            }
711
        }
712
        /* store nr of neighbors, we'll need that */
713
        nnb[i].nr = k;
714
        if (i%(1+n1/100) == 0)
715
        {
716
            fprintf(stderr, "%3d%%\b\b\b\b", (i*100+1)/n1);
717
        }
718
    }
719
    fprintf(stderr, "%3d%%\n", 100);
720
    sfree(row);
721

    
722
    /* sort neighbor list on number of neighbors, largest first */
723
    qsort(nnb, n1, sizeof(nnb[0]), nrnb_comp);
724

    
725
    if (debug)
726
    {
727
        dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
728
    }
729

    
730
    /* turn first structure with all its neighbors (largest) into cluster
731
       remove them from pool of structures and repeat for all remaining */
732
    fprintf(stderr, "Finding clusters %4d", 0);
733
    /* cluster id's start at 1: */
734
    k = 1;
735
    while (nnb[0].nr)
736
    {
737
        /* set cluster id (k) for first item in neighborlist */
738
        for (j = 0; j < nnb[0].nr; j++)
739
        {
740
            clust->cl[nnb[0].nb[j]] = k;
741
        }
742
        /* mark as done */
743
        nnb[0].nr = 0;
744
        sfree(nnb[0].nb);
745

    
746
        /* adjust number of neighbors for others, taking removals into account: */
747
        for (i = 1; i < n1 && nnb[i].nr; i++)
748
        {
749
            j1 = 0;
750
            for (j = 0; j < nnb[i].nr; j++)
751
            {
752
                /* if this neighbor wasn't removed */
753
                if (clust->cl[nnb[i].nb[j]] == 0)
754
                {
755
                    /* shift the rest (j1<=j) */
756
                    nnb[i].nb[j1] = nnb[i].nb[j];
757
                    /* next */
758
                    j1++;
759
                }
760
            }
761
            /* now j1 is the new number of neighbors */
762
            nnb[i].nr = j1;
763
        }
764
        /* sort again on nnb[].nr, because we have new # neighbors: */
765
        /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
766
        qsort(nnb, i, sizeof(nnb[0]), nrnb_comp);
767

    
768
        fprintf(stderr, "\b\b\b\b%4d", k);
769
        /* new cluster id */
770
        k++;
771
    }
772
    fprintf(stderr, "\n");
773
    sfree(nnb);
774
    if (debug)
775
    {
776
        fprintf(debug, "Clusters (%d):\n", k);
777
        for (i = 0; i < n1; i++)
778
        {
779
            fprintf(debug, " %3d", clust->cl[i]);
780
        }
781
        fprintf(debug, "\n");
782
    }
783

    
784
    clust->ncl = k-1;
785
}
786

    
787
static rvec **read_whole_trj(const char *fn, int isize, int index[], int skip,
788
        int *nframe, real **time, matrix  **boxes, int **frameindexes, const gmx_output_env_t *oenv, gmx_bool bPBC, gmx_rmpbc_t gpbc)
789
{
790
    rvec       **xx, *x;
791
    matrix       box;
792
    real         t;
793
    int          i, i0, j, max_nf;
794
    int          natom;
795
    t_trxstatus *status;
796

    
797

    
798
    max_nf = 0;
799
    xx     = NULL;
800
    *time  = NULL;
801
        *boxes = NULL;
802

    
803

    
804
    natom  = read_first_x(oenv, &status, fn, &t, &x, box);
805
    i      = 0;
806
    i0     = 0;
807
    do
808
    {
809
        if (bPBC)
810
        {
811
            gmx_rmpbc(gpbc, natom, box, x);
812
        }
813
        if (i0 >= max_nf)
814
        {
815
            max_nf += 10;
816
            srenew(xx, max_nf);
817
            srenew(*time, max_nf);
818
                        srenew(*boxes, max_nf);
819
                        srenew(*frameindexes, max_nf);
820
        }
821
        if ((i % skip) == 0)
822
        {
823
            snew(xx[i0], isize);
824
            /* Store only the interesting atoms */
825
            for (j = 0; (j < isize); j++)
826
            {
827
                copy_rvec(x[index[j]], xx[i0][j]);
828
            }
829
            (*time)[i0] = t;
830
                        memcpy (&(*boxes)[i0], &box, sizeof(box));
831
                        (*frameindexes)[i0] = nframes_read(status);
832
            i0++;
833
        }
834
        i++;
835
    }
836
    while (read_next_x(oenv, status, &t, x, box));
837
    fprintf(stderr, "Allocated %lu bytes for frames\n",
838
            (unsigned long) (max_nf*isize*sizeof(**xx)));
839
    fprintf(stderr, "Read %d frames from trajectory %s\n", i0, fn);
840
    *nframe = i0;
841
    sfree(x);
842

    
843
    return xx;
844
}
845

    
846
static int plot_clusters(int nf, real **mat, t_clusters *clust,
847
                         int minstruct)
848
{
849
    int  i, j, ncluster, ci;
850
    int *cl_id, *nstruct, *strind;
851

    
852
    snew(cl_id, nf);
853
    snew(nstruct, nf);
854
    snew(strind, nf);
855
    for (i = 0; i < nf; i++)
856
    {
857
        strind[i] = 0;
858
        cl_id[i]  = clust->cl[i];
859
        nstruct[cl_id[i]]++;
860
    }
861
    ncluster = 0;
862
    for (i = 0; i < nf; i++)
863
    {
864
        if (nstruct[i] >= minstruct)
865
        {
866
            ncluster++;
867
            for (j = 0; (j < nf); j++)
868
            {
869
                if (cl_id[j] == i)
870
                {
871
                    strind[j] = ncluster;
872
                }
873
            }
874
        }
875
    }
876
    ncluster++;
877
    fprintf(stderr, "There are %d clusters with at least %d conformations\n",
878
            ncluster, minstruct);
879

    
880
    for (i = 0; (i < nf); i++)
881
    {
882
        ci = cl_id[i];
883
        for (j = 0; j < i; j++)
884
        {
885
            if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct))
886
            {
887
                /* color different clusters with different colors, as long as
888
                   we don't run out of colors */
889
                mat[i][j] = strind[i];
890
            }
891
            else
892
            {
893
                mat[i][j] = 0;
894
            }
895
        }
896
    }
897
    sfree(strind);
898
    sfree(nstruct);
899
    sfree(cl_id);
900

    
901
    return ncluster;
902
}
903

    
904
static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)
905
{
906
    int i, j;
907

    
908
    for (i = 0; i < nf; i++)
909
    {
910
        for (j = 0; j < i; j++)
911
        {
912
            if (clust->cl[i] == clust->cl[j])
913
            {
914
                mat[i][j] = val;
915
            }
916
            else
917
            {
918
                mat[i][j] = 0;
919
            }
920
        }
921
    }
922
}
923

    
924
static char *parse_filename(const char *fn, int maxnr)
925
{
926
    int         i;
927
    char       *fnout;
928
    const char *ext;
929
    char        buf[STRLEN];
930

    
931
    if (std::strchr(fn, '%'))
932
    {
933
        gmx_fatal(FARGS, "will not number filename %s containing '%c'", fn, '%');
934
    }
935
    /* number of digits needed in numbering */
936
    i = static_cast<int>((std::log(static_cast<real>(maxnr))/std::log(10.0)) + 1);
937
    /* split fn and ext */
938
    ext = std::strrchr(fn, '.');
939
    if (!ext)
940
    {
941
        gmx_fatal(FARGS, "cannot separate extension in filename %s", fn);
942
    }
943
    ext++;
944
    /* insert e.g. '%03d' between fn and ext */
945
    sprintf(buf, "%s%%0%dd.%s", fn, i, ext);
946
    snew(fnout, std::strlen(buf)+1);
947
    std::strcpy(fnout, buf);
948

    
949
    return fnout;
950
}
951

    
952
static void ana_trans(t_clusters *clust, int nf,
953
                      const char *transfn, const char *ntransfn, FILE *log,
954
                      t_rgb rlo, t_rgb rhi, const gmx_output_env_t *oenv)
955
{
956
    FILE  *fp;
957
    real **trans, *axis;
958
    int   *ntrans;
959
    int    i, ntranst, maxtrans;
960
    char   buf[STRLEN];
961

    
962
    snew(ntrans, clust->ncl);
963
    snew(trans, clust->ncl);
964
    snew(axis, clust->ncl);
965
    for (i = 0; i < clust->ncl; i++)
966
    {
967
        axis[i] = i+1;
968
        snew(trans[i], clust->ncl);
969
    }
970
    ntranst  = 0;
971
    maxtrans = 0;
972
    for (i = 1; i < nf; i++)
973
    {
974
        if (clust->cl[i] != clust->cl[i-1])
975
        {
976
            ntranst++;
977
            ntrans[clust->cl[i-1]-1]++;
978
            ntrans[clust->cl[i]-1]++;
979
            trans[clust->cl[i-1]-1][clust->cl[i]-1]++;
980
            maxtrans = static_cast<int>(std::max(static_cast<real>(maxtrans), trans[clust->cl[i]-1][clust->cl[i-1]-1]));
981
        }
982
    }
983
    ffprintf_dd(stderr, log, buf, "Counted %d transitions in total, "
984
                "max %d between two specific clusters\n", ntranst, maxtrans);
985
    if (transfn)
986
    {
987
        fp = gmx_ffopen(transfn, "w");
988
        i  = std::min(maxtrans+1, 80);
989
        write_xpm(fp, 0, "Cluster Transitions", "# transitions",
990
                  "from cluster", "to cluster",
991
                  clust->ncl, clust->ncl, axis, axis, trans,
992
                  0, maxtrans, rlo, rhi, &i);
993
        gmx_ffclose(fp);
994
    }
995
    if (ntransfn)
996
    {
997
        fp = xvgropen(ntransfn, "Cluster Transitions", "Cluster #", "# transitions",
998
                      oenv);
999
        for (i = 0; i < clust->ncl; i++)
1000
        {
1001
            fprintf(fp, "%5d %5d\n", i+1, ntrans[i]);
1002
        }
1003
        xvgrclose(fp);
1004
    }
1005
    sfree(ntrans);
1006
    for (i = 0; i < clust->ncl; i++)
1007
    {
1008
        sfree(trans[i]);
1009
    }
1010
    sfree(trans);
1011
    sfree(axis);
1012
}
1013

    
1014
static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,
1015
                             int natom, t_atoms *atoms, rvec *xtps,
1016
                             real *mass, rvec **xx, real *time, matrix *boxes, int *frameindexes,
1017
                             int ifsize, int *fitidx,
1018
                             int iosize, int *outidx,
1019
                             const char *trxfn, const char *sizefn,
1020
                             const char *transfn, const char *ntransfn,
1021
                             const char *clustidfn, const char *clustndxfn,               
1022
                                 gmx_bool bAverage,
1023
                             int write_ncl, int write_nst, real rmsmin,
1024
                             gmx_bool bFit, FILE *log, t_rgb rlo, t_rgb rhi,
1025
                             const gmx_output_env_t *oenv)
1026
{
1027
    FILE        *size_fp = NULL;
1028
    char         buf[STRLEN], buf1[40], buf2[40], buf3[40], *trxsfn;
1029
    t_trxstatus *trxout  = NULL;
1030
    t_trxstatus *trxsout = NULL;
1031
    int          i, i1, cl, nstr, *structure, first = 0, midstr;
1032
    gmx_bool    *bWrite = NULL;
1033
    real         r, clrmsd, midrmsd;
1034
    rvec        *xav = NULL;
1035
    matrix       zerobox;
1036

    
1037
    clear_mat(zerobox);
1038

    
1039
    ffprintf_d(stderr, log, buf, "\nFound %d clusters\n\n", clust->ncl);
1040
    trxsfn = NULL;
1041
    if (trxfn)
1042
    {
1043
        /* do we write all structures? */
1044
        if (write_ncl)
1045
        {
1046
            trxsfn = parse_filename(trxfn, std::max(write_ncl, clust->ncl));
1047
            snew(bWrite, nf);
1048
        }
1049
        ffprintf_ss(stderr, log, buf, "Writing %s structure for each cluster to %s\n",
1050
                    bAverage ? "average" : "middle", trxfn);
1051
        if (write_ncl)
1052
        {
1053
            /* find out what we want to tell the user:
1054
               Writing [all structures|structures with rmsd > %g] for
1055
               {all|first %d} clusters {with more than %d structures} to %s     */
1056
            if (rmsmin > 0.0)
1057
            {
1058
                sprintf(buf1, "structures with rmsd > %g", rmsmin);
1059
            }
1060
            else
1061
            {
1062
                sprintf(buf1, "all structures");
1063
            }
1064
            buf2[0] = buf3[0] = '\0';
1065
            if (write_ncl >= clust->ncl)
1066
            {
1067
                if (write_nst == 0)
1068
                {
1069
                    sprintf(buf2, "all ");
1070
                }
1071
            }
1072
            else
1073
            {
1074
                sprintf(buf2, "the first %d ", write_ncl);
1075
            }
1076
            if (write_nst)
1077
            {
1078
                sprintf(buf3, " with more than %d structures", write_nst);
1079
            }
1080
            sprintf(buf, "Writing %s for %sclusters%s to %s\n", buf1, buf2, buf3, trxsfn);
1081
            ffprintf(stderr, log, buf);
1082
        }
1083

    
1084
        /* Prepare a reference structure for the orientation of the clusters  */
1085
        if (bFit)
1086
        {
1087
            reset_x(ifsize, fitidx, natom, NULL, xtps, mass);
1088
        }
1089
        trxout = open_trx(trxfn, "w");
1090
        /* Calculate the average structure in each cluster,               *
1091
         * all structures are fitted to the first struture of the cluster */
1092
        snew(xav, natom);
1093
    }
1094

    
1095
    if (transfn || ntransfn)
1096
    {
1097
        ana_trans(clust, nf, transfn, ntransfn, log, rlo, rhi, oenv);
1098
    }
1099

    
1100
    if (clustidfn)
1101
    {
1102
        FILE *fp = xvgropen(clustidfn, "Clusters", output_env_get_xvgr_tlabel(oenv), "Cluster #", oenv);
1103
        if (output_env_get_print_xvgr_codes(oenv))
1104
        {
1105
            fprintf(fp, "@    s0 symbol 2\n");
1106
            fprintf(fp, "@    s0 symbol size 0.2\n");
1107
            fprintf(fp, "@    s0 linestyle 0\n");
1108
        }
1109
        for (i = 0; i < nf; i++)
1110
        {
1111
            fprintf(fp, "%8g %8d\n", time[i], clust->cl[i]);
1112
        }
1113
        xvgrclose(fp);
1114
    }
1115
    if (sizefn)
1116
    {
1117
        size_fp = xvgropen(sizefn, "Cluster Sizes", "Cluster #", "# Structures", oenv);
1118
        if (output_env_get_print_xvgr_codes(oenv))
1119
        {
1120
            fprintf(size_fp, "@g%d type %s\n", 0, "bar");
1121
        }
1122
    }
1123

    
1124
        FILE *ndxfn = NULL;
1125
        if (clustndxfn) {
1126
                ndxfn = gmx_ffopen(clustndxfn, "w");
1127
        }
1128

    
1129
    snew(structure, nf);
1130
    fprintf(log, "\n%3s | %3s  %4s | %6s %4s | cluster members\n",
1131
            "cl.", "#st", "rmsd", "middle", "rmsd");
1132
    for (cl = 1; cl <= clust->ncl; cl++)
1133
    {
1134
        /* prepare structures (fit, middle, average) */
1135
        if (xav)
1136
        {
1137
            for (i = 0; i < natom; i++)
1138
            {
1139
                clear_rvec(xav[i]);
1140
            }
1141
        }
1142
        nstr = 0;
1143
        for (i1 = 0; i1 < nf; i1++)
1144
        {
1145
            if (clust->cl[i1] == cl)
1146
            {
1147
                structure[nstr] = i1;
1148
                nstr++;
1149
                if (trxfn && (bAverage || write_ncl) )
1150
                {
1151
                    if (bFit)
1152
                    {
1153
                        reset_x(ifsize, fitidx, natom, NULL, xx[i1], mass);
1154
                    }
1155
                    if (nstr == 1)
1156
                    {
1157
                        first = i1;
1158
                    }
1159
                    else if (bFit)
1160
                    {
1161
                        do_fit(natom, mass, xx[first], xx[i1]);
1162
                    }
1163
                    if (xav)
1164
                    {
1165
                        for (i = 0; i < natom; i++)
1166
                        {
1167
                            rvec_inc(xav[i], xx[i1][i]);
1168
                        }
1169
                    }
1170
                }
1171
            }
1172
        }
1173
        if (sizefn)
1174
        {
1175
            fprintf(size_fp, "%8d %8d\n", cl, nstr);
1176
        }
1177

    
1178
                if (ndxfn) {
1179
                        fprintf(ndxfn, "[Cluster_%04d]\n", cl);
1180
                }
1181
        clrmsd  = 0;
1182
        midstr  = 0;
1183
        midrmsd = 10000;
1184
        for (i1 = 0; i1 < nstr; i1++)
1185
        {
1186
            r = 0;
1187
            if (nstr > 1)
1188
            {
1189
                for (i = 0; i < nstr; i++)
1190
                {
1191
                    if (i < i1)
1192
                    {
1193
                        r += rmsd[structure[i]][structure[i1]];
1194
                    }
1195
                    else
1196
                    {
1197
                        r += rmsd[structure[i1]][structure[i]];
1198
                    }
1199
                }
1200
                r /= (nstr - 1);
1201
            }
1202
            if (r < midrmsd)
1203
            {
1204
                midstr  = structure[i1];
1205
                midrmsd = r;
1206
            }
1207
            clrmsd += r;
1208
        }
1209
        clrmsd /= nstr;
1210

    
1211
        /* dump cluster info to logfile */
1212
        if (nstr > 1)
1213
        {
1214
            sprintf(buf1, "%6.3f", clrmsd);
1215
            if (buf1[0] == '0')
1216
            {
1217
                buf1[0] = ' ';
1218
            }
1219
            sprintf(buf2, "%5.3f", midrmsd);
1220
            if (buf2[0] == '0')
1221
            {
1222
                buf2[0] = ' ';
1223
            }
1224
        }
1225
        else
1226
        {
1227
            sprintf(buf1, "%5s", "");
1228
            sprintf(buf2, "%5s", "");
1229
        }
1230
        fprintf(log, "%3d | %3d %s | %6g%s |", cl, nstr, buf1, time[midstr], buf2);
1231
        for (i = 0; i < nstr; i++)
1232
        {
1233
            if ((i % 7 == 0) && i)
1234
            {
1235
                sprintf(buf, "\n%3s | %3s  %4s | %6s %4s |", "", "", "", "", "");
1236
                                if (ndxfn) {
1237
                                        fprintf(ndxfn, "\n");
1238
                                }
1239
                        }
1240
            else
1241
            {
1242
                buf[0] = '\0';
1243
            }
1244
            i1 = structure[i];
1245
            fprintf(log, "%s %6g", buf, time[i1]);
1246
                        if (ndxfn && frameindexes) {
1247
                                fprintf(ndxfn, " %6d", frameindexes[i1]);
1248
                        }
1249
        }
1250
        fprintf(log, "\n");
1251
                if (ndxfn) 
1252
                        fprintf(ndxfn, "\n");
1253
        /* write structures to trajectory file(s) */
1254
        if (trxfn)
1255
        {
1256
            if (write_ncl)
1257
            {
1258
                for (i = 0; i < nstr; i++)
1259
                {
1260
                    bWrite[i] = FALSE;
1261
                }
1262
            }
1263
            if (cl < write_ncl+1 && nstr > write_nst)
1264
            {
1265
                /* Dump all structures for this cluster */
1266
                /* generate numbered filename (there is a %d in trxfn!) */
1267
                sprintf(buf, trxsfn, cl);
1268
                trxsout = open_trx(buf, "w");
1269
                for (i = 0; i < nstr; i++)
1270
                {
1271
                    bWrite[i] = TRUE;
1272
                    if (rmsmin > 0.0)
1273
                    {
1274
                        for (i1 = 0; i1 < i && bWrite[i]; i1++)
1275
                        {
1276
                            if (bWrite[i1])
1277
                            {
1278
                                bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
1279
                            }
1280
                        }
1281
                    }
1282
                    if (bWrite[i])
1283
                    {
1284
                        write_trx(trxsout, iosize, outidx, atoms, i, time[structure[i]],  boxes[structure[i]],  //zerobox,
1285
                                  xx[structure[i]], NULL, NULL);
1286
                    }
1287
                }
1288
                close_trx(trxsout);
1289
            }
1290
            /* Dump the average structure for this cluster */
1291
            if (bAverage)
1292
            {
1293
                for (i = 0; i < natom; i++)
1294
                {
1295
                    svmul(1.0/nstr, xav[i], xav[i]);
1296
                }
1297
            }
1298
            else
1299
            {
1300
                for (i = 0; i < natom; i++)
1301
                {
1302
                    copy_rvec(xx[midstr][i], xav[i]);
1303
                }
1304
                if (bFit)
1305
                {
1306
                    reset_x(ifsize, fitidx, natom, NULL, xav, mass);
1307
                }
1308
            }
1309
            if (bFit)
1310
            {
1311
                do_fit(natom, mass, xtps, xav);
1312
            }
1313
            write_trx(trxout, iosize, outidx, atoms, cl, time[midstr], 
1314
                                boxes[midstr], // zerobox, 
1315
                                xav, NULL, NULL);
1316
        }
1317
    }
1318
    /* clean up */
1319
    if (trxfn)
1320
    {
1321
        close_trx(trxout);
1322
        sfree(xav);
1323
        if (write_ncl)
1324
        {
1325
            sfree(bWrite);
1326
        }
1327
    }
1328
    sfree(structure);
1329
    if (trxsfn)
1330
    {
1331
        sfree(trxsfn);
1332
    }
1333

    
1334
    if (size_fp)
1335
    {
1336
        xvgrclose(size_fp);
1337
    }
1338
        if (ndxfn)
1339
        {
1340
                gmx_ffclose(ndxfn);
1341
        }
1342
}
1343

    
1344
static void convert_mat(t_matrix *mat, t_mat *rms)
1345
{
1346
    int i, j;
1347

    
1348
    rms->n1 = mat->nx;
1349
    matrix2real(mat, rms->mat);
1350
    /* free input xpm matrix data */
1351
    for (i = 0; i < mat->nx; i++)
1352
    {
1353
        sfree(mat->matrix[i]);
1354
    }
1355
    sfree(mat->matrix);
1356

    
1357
    for (i = 0; i < mat->nx; i++)
1358
    {
1359
        for (j = i; j < mat->nx; j++)
1360
        {
1361
            rms->sumrms += rms->mat[i][j];
1362
            rms->maxrms  = std::max(rms->maxrms, rms->mat[i][j]);
1363
            if (j != i)
1364
            {
1365
                rms->minrms = std::min(rms->minrms, rms->mat[i][j]);
1366
            }
1367
        }
1368
    }
1369
    rms->nn = mat->nx;
1370
}
1371

    
1372
int gmx_cluster(int argc, char *argv[])
1373
{
1374
    const char        *desc[] = {
1375
        "[THISMODULE] can cluster structures using several different methods.",
1376
        "Distances between structures can be determined from a trajectory",
1377
        "or read from an [REF].xpm[ref] matrix file with the [TT]-dm[tt] option.",
1378
        "RMS deviation after fitting or RMS deviation of atom-pair distances",
1379
        "can be used to define the distance between structures.[PAR]",
1380

    
1381
        "single linkage: add a structure to a cluster when its distance to any",
1382
        "element of the cluster is less than [TT]cutoff[tt].[PAR]",
1383

    
1384
        "Jarvis Patrick: add a structure to a cluster when this structure",
1385
        "and a structure in the cluster have each other as neighbors and",
1386
        "they have a least [TT]P[tt] neighbors in common. The neighbors",
1387
        "of a structure are the M closest structures or all structures within",
1388
        "[TT]cutoff[tt].[PAR]",
1389

    
1390
        "Monte Carlo: reorder the RMSD matrix using Monte Carlo such that",
1391
        "the order of the frames is using the smallest possible increments.",
1392
        "With this it is possible to make a smooth animation going from one",
1393
        "structure to another with the largest possible (e.g.) RMSD between",
1394
        "them, however the intermediate steps should be as small as possible.",
1395
        "Applications could be to visualize a potential of mean force",
1396
        "ensemble of simulations or a pulling simulation. Obviously the user",
1397
        "has to prepare the trajectory well (e.g. by not superimposing frames).",
1398
        "The final result can be inspect visually by looking at the matrix",
1399
        "[REF].xpm[ref] file, which should vary smoothly from bottom to top.[PAR]",
1400

    
1401
        "diagonalization: diagonalize the RMSD matrix.[PAR]",
1402

    
1403
        "gromos: use algorithm as described in Daura [IT]et al.[it]",
1404
        "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
1405
        "Count number of neighbors using cut-off, take structure with",
1406
        "largest number of neighbors with all its neighbors as cluster",
1407
        "and eliminate it from the pool of clusters. Repeat for remaining",
1408
        "structures in pool.[PAR]",
1409

    
1410
        "When the clustering algorithm assigns each structure to exactly one",
1411
        "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
1412
        "file is supplied, the structure with",
1413
        "the smallest average distance to the others or the average structure",
1414
        "or all structures for each cluster will be written to a trajectory",
1415
        "file. When writing all structures, separate numbered files are made",
1416
        "for each cluster.[PAR]",
1417

    
1418
        "Two output files are always written:",
1419
        "",
1420
        " * [TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
1421
        "   and a graphical depiction of the clusters in the lower right half",
1422
        "   When [TT]-minstruct[tt] = 1 the graphical depiction is black",
1423
        "   when two structures are in the same cluster.",
1424
        "   When [TT]-minstruct[tt] > 1 different colors will be used for each",
1425
        "   cluster.",
1426
        " * [TT]-g[tt] writes information on the options used and a detailed list",
1427
        "   of all clusters and their members.",
1428
        "",
1429

    
1430
        "Additionally, a number of optional output files can be written:",
1431
        "",
1432
        " * [TT]-dist[tt] writes the RMSD distribution.",
1433
        " * [TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
1434
        "   diagonalization.",
1435
        " * [TT]-sz[tt] writes the cluster sizes.",
1436
        " * [TT]-tr[tt] writes a matrix of the number transitions between",
1437
        "   cluster pairs.",
1438
        " * [TT]-ntr[tt] writes the total number of transitions to or from",
1439
        "   each cluster.",
1440
        " * [TT]-clid[tt] writes the cluster number as a function of time.",
1441
        " * [TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1442
        "   structure of each cluster or writes numbered files with cluster members",
1443
        "   for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1444
        "   [TT]-nst[tt] and [TT]-rmsmin[tt]). The center of a cluster is the",
1445
        "   structure with the smallest average RMSD from all other structures",
1446
        "   of the cluster.",
1447
    };
1448

    
1449
    FILE              *fp, *log;
1450
    int                nf, i, i1, i2, j;
1451
    gmx_int64_t        nrms = 0;
1452

    
1453
    matrix             box;
1454
        matrix            *boxes;
1455
    rvec              *xtps, *usextps, *x1, **xx = NULL;
1456
    const char        *fn, *trx_out_fn;
1457
    t_clusters         clust;
1458
    t_mat             *rms, *orig = NULL;
1459
    real              *eigenvalues;
1460
    t_topology         top;
1461
    int                ePBC;
1462
    t_atoms            useatoms;
1463
    t_matrix          *readmat = NULL;
1464
    real              *eigenvectors;
1465

    
1466
    int                isize = 0, ifsize = 0, iosize = 0;
1467
    int               *index = NULL, *fitidx = NULL, *outidx = NULL, *frameindexes = NULL;
1468
    char              *grpname;
1469
    real               rmsd, **d1, **d2, *time = NULL, time_invfac, *mass = NULL;
1470
    char               buf[STRLEN], buf1[80], title[STRLEN];
1471
    gmx_bool           bAnalyze, bUseRmsdCut, bJP_RMSD = FALSE, bReadMat, bReadTraj, bPBC = TRUE;
1472

    
1473
    int                method, ncluster = 0;
1474
    static const char *methodname[] = {
1475
        NULL, "linkage", "jarvis-patrick", "monte-carlo",
1476
        "diagonalization", "gromos", NULL
1477
    };
1478
    enum {
1479
        m_null, m_linkage, m_jarvis_patrick,
1480
        m_monte_carlo, m_diagonalize, m_gromos, m_nr
1481
    };
1482
    /* Set colors for plotting: white = zero RMS, black = maximum */
1483
    static t_rgb      rlo_top  = { 1.0, 1.0, 1.0 };
1484
    static t_rgb      rhi_top  = { 0.0, 0.0, 0.0 };
1485
    static t_rgb      rlo_bot  = { 1.0, 1.0, 1.0 };
1486
    static t_rgb      rhi_bot  = { 0.0, 0.0, 1.0 };
1487
    static int        nlevels  = 40, skip = 1;
1488
    static real       scalemax = -1.0, rmsdcut = 0.1, rmsmin = 0.0;
1489
    gmx_bool          bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;
1490
    static int        niter    = 10000, nrandom = 0, seed = 0, write_ncl = 0, write_nst = 1, minstruct = 1;
1491
    static real       kT       = 1e-3;
1492
    static int        M        = 10, P = 3;
1493
    gmx_output_env_t *oenv;
1494
    gmx_rmpbc_t       gpbc = NULL;
1495

    
1496
    t_pargs           pa[] = {
1497
        { "-dista", FALSE, etBOOL, {&bRMSdist},
1498
          "Use RMSD of distances instead of RMS deviation" },
1499
        { "-nlevels", FALSE, etINT,  {&nlevels},
1500
          "Discretize RMSD matrix in this number of levels" },
1501
        { "-cutoff", FALSE, etREAL, {&rmsdcut},
1502
          "RMSD cut-off (nm) for two structures to be neighbor" },
1503
        { "-fit",   FALSE, etBOOL, {&bFit},
1504
          "Use least squares fitting before RMSD calculation" },
1505
        { "-max",   FALSE, etREAL, {&scalemax},
1506
          "Maximum level in RMSD matrix" },
1507
        { "-skip",  FALSE, etINT,  {&skip},
1508
          "Only analyze every nr-th frame" },
1509
        { "-av",    FALSE, etBOOL, {&bAverage},
1510
          "Write average iso middle structure for each cluster" },
1511
        { "-wcl",   FALSE, etINT,  {&write_ncl},
1512
          "Write the structures for this number of clusters to numbered files" },
1513
        { "-nst",   FALSE, etINT,  {&write_nst},
1514
          "Only write all structures if more than this number of structures per cluster" },
1515
        { "-rmsmin", FALSE, etREAL, {&rmsmin},
1516
          "minimum rms difference with rest of cluster for writing structures" },
1517
        { "-method", FALSE, etENUM, {methodname},
1518
          "Method for cluster determination" },
1519
        { "-minstruct", FALSE, etINT, {&minstruct},
1520
          "Minimum number of structures in cluster for coloring in the [REF].xpm[ref] file" },
1521
        { "-binary", FALSE, etBOOL, {&bBinary},
1522
          "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1523
          "is given by [TT]-cutoff[tt]" },
1524
        { "-M",     FALSE, etINT,  {&M},
1525
          "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1526
          "0 is use cutoff" },
1527
        { "-P",     FALSE, etINT,  {&P},
1528
          "Number of identical nearest neighbors required to form a cluster" },
1529
        { "-seed",  FALSE, etINT,  {&seed},
1530
          "Random number seed for Monte Carlo clustering algorithm (0 means generate)" },
1531
        { "-niter", FALSE, etINT,  {&niter},
1532
          "Number of iterations for MC" },
1533
        { "-nrandom", FALSE, etINT,  {&nrandom},
1534
          "The first iterations for MC may be done complete random, to shuffle the frames" },
1535
        { "-kT",    FALSE, etREAL, {&kT},
1536
          "Boltzmann weighting factor for Monte Carlo optimization "
1537
          "(zero turns off uphill steps)" },
1538
        { "-pbc", FALSE, etBOOL,
1539
          { &bPBC }, "PBC check" }
1540
    };
1541
        t_filenm          fnm[] = {
1542
                { efTRX, "-f",     NULL,        ffOPTRD },
1543
                { efTPS, "-s",     NULL,        ffOPTRD },
1544
                { efNDX, NULL,     NULL,        ffOPTRD },
1545
                { efXPM, "-dm",   "rmsd",       ffOPTRD },
1546
                { efXPM, "-om",   "rmsd-raw",   ffWRITE },
1547
                { efXPM, "-o",    "rmsd-clust", ffWRITE },
1548
                { efLOG, "-g",    "cluster",    ffWRITE },
1549
                { efXVG, "-dist", "rmsd-dist",  ffOPTWR },
1550
                { efXVG, "-ev",   "rmsd-eig",   ffOPTWR },
1551
                { efXVG, "-conv", "mc-conv",    ffOPTWR },
1552
                { efXVG, "-sz",   "clust-size", ffOPTWR},
1553
                { efXPM, "-tr",   "clust-trans", ffOPTWR},
1554
                { efXVG, "-ntr",  "clust-trans", ffOPTWR},
1555
                { efXVG, "-clid", "clust-id",   ffOPTWR},
1556
                { efTRX, "-cl",   "clusters.pdb", ffOPTWR },
1557
                { efNDX, "-clndx","clusters.ndx", ffOPTWR }
1558
    };
1559
#define NFILE asize(fnm)
1560

    
1561
    if (!parse_common_args(&argc, argv,
1562
                           PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT,
1563
                           NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL,
1564
                           &oenv))
1565
    {
1566
        return 0;
1567
    }
1568

    
1569
    /* parse options */
1570
    bReadMat   = opt2bSet("-dm", NFILE, fnm);
1571
    bReadTraj  = opt2bSet("-f", NFILE, fnm) || !bReadMat;
1572
    if (opt2parg_bSet("-av", asize(pa), pa) ||
1573
        opt2parg_bSet("-wcl", asize(pa), pa) ||
1574
        opt2parg_bSet("-nst", asize(pa), pa) ||
1575
        opt2parg_bSet("-rmsmin", asize(pa), pa) ||
1576
        opt2bSet("-cl", NFILE, fnm) )
1577
    {
1578
        trx_out_fn = opt2fn("-cl", NFILE, fnm);
1579
    }
1580
    else
1581
    {
1582
        trx_out_fn = NULL;
1583
    }
1584
    if (bReadMat && output_env_get_time_factor(oenv) != 1)
1585
    {
1586
        fprintf(stderr,
1587
                "\nWarning: assuming the time unit in %s is %s\n",
1588
                opt2fn("-dm", NFILE, fnm), output_env_get_time_unit(oenv));
1589
    }
1590
    if (trx_out_fn && !bReadTraj)
1591
    {
1592
        fprintf(stderr, "\nWarning: "
1593
                "cannot write cluster structures without reading trajectory\n"
1594
                "         ignoring option -cl %s\n", trx_out_fn);
1595
    }
1596

    
1597
    method = 1;
1598
    while (method < m_nr && gmx_strcasecmp(methodname[0], methodname[method]) != 0)
1599
    {
1600
        method++;
1601
    }
1602
    if (method == m_nr)
1603
    {
1604
        gmx_fatal(FARGS, "Invalid method");
1605
    }
1606

    
1607
    bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
1608
                method == m_gromos );
1609

    
1610
    /* Open log file */
1611
    log = ftp2FILE(efLOG, NFILE, fnm, "w");
1612

    
1613
    fprintf(stderr, "Using %s method for clustering\n", methodname[0]);
1614
    fprintf(log, "Using %s method for clustering\n", methodname[0]);
1615

    
1616
    /* check input and write parameters to log file */
1617
    bUseRmsdCut = FALSE;
1618
    if (method == m_jarvis_patrick)
1619
    {
1620
        bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff", asize(pa), pa);
1621
        if ((M < 0) || (M == 1))
1622
        {
1623
            gmx_fatal(FARGS, "M (%d) must be 0 or larger than 1", M);
1624
        }
1625
        if (M < 2)
1626
        {
1627
            sprintf(buf1, "Will use P=%d and RMSD cutoff (%g)", P, rmsdcut);
1628
            bUseRmsdCut = TRUE;
1629
        }
1630
        else
1631
        {
1632
            if (P >= M)
1633
            {
1634
                gmx_fatal(FARGS, "Number of neighbors required (P) must be less than M");
1635
            }
1636
            if (bJP_RMSD)
1637
            {
1638
                sprintf(buf1, "Will use P=%d, M=%d and RMSD cutoff (%g)", P, M, rmsdcut);
1639
                bUseRmsdCut = TRUE;
1640
            }
1641
            else
1642
            {
1643
                sprintf(buf1, "Will use P=%d, M=%d", P, M);
1644
            }
1645
        }
1646
        ffprintf_s(stderr, log, buf, "%s for determining the neighbors\n\n", buf1);
1647
    }
1648
    else /* method != m_jarvis */
1649
    {
1650
        bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
1651
    }
1652
    if (bUseRmsdCut && method != m_jarvis_patrick)
1653
    {
1654
        fprintf(log, "Using RMSD cutoff %g nm\n", rmsdcut);
1655
    }
1656
    if (method == m_monte_carlo)
1657
    {
1658
        fprintf(log, "Using %d iterations\n", niter);
1659
    }
1660

    
1661
    if (skip < 1)
1662
    {
1663
        gmx_fatal(FARGS, "skip (%d) should be >= 1", skip);
1664
    }
1665

    
1666
    /* get input */
1667
    if (bReadTraj)
1668
    {
1669
        /* don't read mass-database as masses (and top) are not used */
1670
        read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xtps, NULL, box,
1671
                      TRUE);
1672
        if (bPBC)
1673
        {
1674
            gpbc = gmx_rmpbc_init(&top.idef, ePBC, top.atoms.nr);
1675
        }
1676

    
1677
        fprintf(stderr, "\nSelect group for least squares fit%s:\n",
1678
                bReadMat ? "" : " and RMSD calculation");
1679
        get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1680
                  1, &ifsize, &fitidx, &grpname);
1681
        if (trx_out_fn)
1682
        {
1683
            fprintf(stderr, "\nSelect group for output:\n");
1684
            get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1685
                      1, &iosize, &outidx, &grpname);
1686
            /* merge and convert both index groups: */
1687
            /* first copy outidx to index. let outidx refer to elements in index */
1688
            snew(index, iosize);
1689
            isize = iosize;
1690
            for (i = 0; i < iosize; i++)
1691
            {
1692
                index[i]  = outidx[i];
1693
                outidx[i] = i;
1694
            }
1695
            /* now lookup elements from fitidx in index, add them if necessary
1696
               and also let fitidx refer to elements in index */
1697
            for (i = 0; i < ifsize; i++)
1698
            {
1699
                j = 0;
1700
                while (j < isize && index[j] != fitidx[i])
1701
                {
1702
                    j++;
1703
                }
1704
                if (j >= isize)
1705
                {
1706
                    /* slow this way, but doesn't matter much */
1707
                    isize++;
1708
                    srenew(index, isize);
1709
                }
1710
                index[j]  = fitidx[i];
1711
                fitidx[i] = j;
1712
            }
1713
        }
1714
        else /* !trx_out_fn */
1715
        {
1716
            isize = ifsize;
1717
            snew(index, isize);
1718
            for (i = 0; i < ifsize; i++)
1719
            {
1720
                index[i]  = fitidx[i];
1721
                fitidx[i] = i;
1722
            }
1723
        }
1724
    }
1725

    
1726
    if (bReadTraj)
1727
    {
1728
        /* Loop over first coordinate file */
1729
        fn = opt2fn("-f", NFILE, fnm);
1730

    
1731
        xx = read_whole_trj(fn, isize, index, skip, &nf, &time, &boxes, &frameindexes, oenv, bPBC, gpbc);
1732
        output_env_conv_times(oenv, nf, time);
1733
        if (!bRMSdist || bAnalyze)
1734
        {
1735
            /* Center all frames on zero */
1736
            snew(mass, isize);
1737
            for (i = 0; i < ifsize; i++)
1738
            {
1739
                mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1740
            }
1741
            if (bFit)
1742
            {
1743
                for (i = 0; i < nf; i++)
1744
                {
1745
                    reset_x(ifsize, fitidx, isize, NULL, xx[i], mass);
1746
                }
1747
            }
1748
        }
1749
        if (bPBC)
1750
        {
1751
            gmx_rmpbc_done(gpbc);
1752
        }
1753
    }
1754

    
1755
    if (bReadMat)
1756
    {
1757
        fprintf(stderr, "Reading rms distance matrix ");
1758
        read_xpm_matrix(opt2fn("-dm", NFILE, fnm), &readmat);
1759
        fprintf(stderr, "\n");
1760
        if (readmat[0].nx != readmat[0].ny)
1761
        {
1762
            gmx_fatal(FARGS, "Matrix (%dx%d) is not square",
1763
                      readmat[0].nx, readmat[0].ny);
1764
        }
1765
        if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1766
        {
1767
            gmx_fatal(FARGS, "Matrix size (%dx%d) does not match the number of "
1768
                      "frames (%d)", readmat[0].nx, readmat[0].ny, nf);
1769
        }
1770

    
1771
        nf = readmat[0].nx;
1772
        sfree(time);
1773
        time        = readmat[0].axis_x;
1774
        time_invfac = output_env_get_time_invfactor(oenv);
1775
        for (i = 0; i < nf; i++)
1776
        {
1777
            time[i] *= time_invfac;
1778
        }
1779

    
1780
        rms = init_mat(readmat[0].nx, method == m_diagonalize);
1781
        convert_mat(&(readmat[0]), rms);
1782

    
1783
        nlevels = readmat[0].nmap;
1784
    }
1785
    else   /* !bReadMat */
1786
    {
1787
        rms  = init_mat(nf, method == m_diagonalize);
1788
        nrms = (static_cast<gmx_int64_t>(nf)*static_cast<gmx_int64_t>(nf-1))/2;
1789
        if (!bRMSdist)
1790
        {
1791
            fprintf(stderr, "Computing %dx%d RMS deviation matrix\n", nf, nf);
1792
            /* Initialize work array */
1793
            snew(x1, isize);
1794
            for (i1 = 0; i1 < nf; i1++)
1795
            {
1796
                for (i2 = i1+1; i2 < nf; i2++)
1797
                {
1798
                    for (i = 0; i < isize; i++)
1799
                    {
1800
                        copy_rvec(xx[i1][i], x1[i]);
1801
                    }
1802
                    if (bFit)
1803
                    {
1804
                        do_fit(isize, mass, xx[i2], x1);
1805
                    }
1806
                    rmsd = rmsdev(isize, mass, xx[i2], x1);
1807
                    set_mat_entry(rms, i1, i2, rmsd);
1808
                }
1809
                nrms -= nf-i1-1;
1810
                fprintf(stderr, "\r# RMSD calculations left: " "%" GMX_PRId64 "   ", nrms);
1811
                fflush(stderr);
1812
            }
1813
            sfree(x1);
1814
        }
1815
        else /* bRMSdist */
1816
        {
1817
            fprintf(stderr, "Computing %dx%d RMS distance deviation matrix\n", nf, nf);
1818

    
1819
            /* Initiate work arrays */
1820
            snew(d1, isize);
1821
            snew(d2, isize);
1822
            for (i = 0; (i < isize); i++)
1823
            {
1824
                snew(d1[i], isize);
1825
                snew(d2[i], isize);
1826
            }
1827
            for (i1 = 0; i1 < nf; i1++)
1828
            {
1829
                calc_dist(isize, xx[i1], d1);
1830
                for (i2 = i1+1; (i2 < nf); i2++)
1831
                {
1832
                    calc_dist(isize, xx[i2], d2);
1833
                    set_mat_entry(rms, i1, i2, rms_dist(isize, d1, d2));
1834
                }
1835
                nrms -= nf-i1-1;
1836
                fprintf(stderr, "\r# RMSD calculations left: " "%" GMX_PRId64 "   ", nrms);
1837
                fflush(stderr);
1838
            }
1839
            /* Clean up work arrays */
1840
            for (i = 0; (i < isize); i++)
1841
            {
1842
                sfree(d1[i]);
1843
                sfree(d2[i]);
1844
            }
1845
            sfree(d1);
1846
            sfree(d2);
1847
        }
1848
        fprintf(stderr, "\n\n");
1849
    }
1850
    ffprintf_gg(stderr, log, buf, "The RMSD ranges from %g to %g nm\n",
1851
                rms->minrms, rms->maxrms);
1852
    ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2*rms->sumrms/(nf*(nf-1)));
1853
    ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);
1854
    ffprintf_g(stderr, log, buf, "Energy of the matrix is %g.\n", mat_energy(rms));
1855
    if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
1856
    {
1857
        fprintf(stderr, "WARNING: rmsd cutoff %g is outside range of rmsd values "
1858
                "%g to %g\n", rmsdcut, rms->minrms, rms->maxrms);
1859
    }
1860
    if (bAnalyze && (rmsmin < rms->minrms) )
1861
    {
1862
        fprintf(stderr, "WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1863
                rmsmin, rms->minrms);
1864
    }
1865
    if (bAnalyze && (rmsmin > rmsdcut) )
1866
    {
1867
        fprintf(stderr, "WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1868
                rmsmin, rmsdcut);
1869
    }
1870

    
1871
    /* Plot the rmsd distribution */
1872
    rmsd_distribution(opt2fn("-dist", NFILE, fnm), rms, oenv);
1873

    
1874
    if (bBinary)
1875
    {
1876
        for (i1 = 0; (i1 < nf); i1++)
1877
        {
1878
            for (i2 = 0; (i2 < nf); i2++)
1879
            {
1880
                if (rms->mat[i1][i2] < rmsdcut)
1881
                {
1882
                    rms->mat[i1][i2] = 0;
1883
                }
1884
                else
1885
                {
1886
                    rms->mat[i1][i2] = 1;
1887
                }
1888
            }
1889
        }
1890
    }
1891

    
1892
    snew(clust.cl, nf);
1893
    switch (method)
1894
    {
1895
        case m_linkage:
1896
            /* Now sort the matrix and write it out again */
1897
            gather(rms, rmsdcut, &clust);
1898
            break;
1899
        case m_diagonalize:
1900
            /* Do a diagonalization */
1901
            snew(eigenvalues, nf);
1902
            snew(eigenvectors, nf*nf);
1903
            std::memcpy(eigenvectors, rms->mat[0], nf*nf*sizeof(real));
1904
            eigensolver(eigenvectors, nf, 0, nf, eigenvalues, rms->mat[0]);
1905
            sfree(eigenvectors);
1906

    
1907
            fp = xvgropen(opt2fn("-ev", NFILE, fnm), "RMSD matrix Eigenvalues",
1908
                          "Eigenvector index", "Eigenvalues (nm\\S2\\N)", oenv);
1909
            for (i = 0; (i < nf); i++)
1910
            {
1911
                fprintf(fp, "%10d  %10g\n", i, eigenvalues[i]);
1912
            }
1913
            xvgrclose(fp);
1914
            break;
1915
        case m_monte_carlo:
1916
            orig     = init_mat(rms->nn, FALSE);
1917
            orig->nn = rms->nn;
1918
            copy_t_mat(orig, rms);
1919
            mc_optimize(log, rms, time, niter, nrandom, seed, kT,
1920
                        opt2fn_null("-conv", NFILE, fnm), oenv);
1921
            break;
1922
        case m_jarvis_patrick:
1923
            jarvis_patrick(rms->nn, rms->mat, M, P, bJP_RMSD ? rmsdcut : -1, &clust);
1924
            break;
1925
        case m_gromos:
1926
            gromos(rms->nn, rms->mat, rmsdcut, &clust);
1927
            break;
1928
        default:
1929
            gmx_fatal(FARGS, "DEATH HORROR unknown method \"%s\"", methodname[0]);
1930
    }
1931

    
1932
    if (method == m_monte_carlo || method == m_diagonalize)
1933
    {
1934
        fprintf(stderr, "Energy of the matrix after clustering is %g.\n",
1935
                mat_energy(rms));
1936
    }
1937

    
1938
    if (bAnalyze)
1939
    {
1940
        if (minstruct > 1)
1941
        {
1942
            ncluster = plot_clusters(nf, rms->mat, &clust, minstruct);
1943
        }
1944
        else
1945
        {
1946
            mark_clusters(nf, rms->mat, rms->maxrms, &clust);
1947
        }
1948
        init_t_atoms(&useatoms, isize, FALSE);
1949
        snew(usextps, isize);
1950
        useatoms.resinfo = top.atoms.resinfo;
1951
        for (i = 0; i < isize; i++)
1952
        {
1953
            useatoms.atomname[i]    = top.atoms.atomname[index[i]];
1954
            useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1955
            useatoms.nres           = std::max(useatoms.nres, useatoms.atom[i].resind+1);
1956
            copy_rvec(xtps[index[i]], usextps[i]);
1957
        }
1958
        useatoms.nr = isize;
1959
        analyze_clusters(nf, &clust, rms->mat, isize, &useatoms, usextps, mass, xx, time, boxes, frameindexes,
1960
                         ifsize, fitidx, iosize, outidx, 
1961
                         bReadTraj ? trx_out_fn : NULL,
1962
                         opt2fn_null("-sz", NFILE, fnm),
1963
                         opt2fn_null("-tr", NFILE, fnm),
1964
                         opt2fn_null("-ntr", NFILE, fnm),
1965
                         opt2fn_null("-clid", NFILE, fnm),
1966
                                     opt2fn_null("-clndx", NFILE, fnm),
1967
                         bAverage, write_ncl, write_nst, rmsmin, bFit, log,
1968
                         rlo_bot, rhi_bot, oenv);
1969
                sfree(boxes);
1970
                sfree(frameindexes);
1971
    }
1972
    gmx_ffclose(log);
1973

    
1974
    if (bBinary && !bAnalyze)
1975
    {
1976
        /* Make the clustering visible */
1977
        for (i2 = 0; (i2 < nf); i2++)
1978
        {
1979
            for (i1 = i2+1; (i1 < nf); i1++)
1980
            {
1981
                if (rms->mat[i1][i2])
1982
                {
1983
                    rms->mat[i1][i2] = rms->maxrms;
1984
                }
1985
            }
1986
        }
1987
    }
1988

    
1989
    fp = opt2FILE("-o", NFILE, fnm, "w");
1990
    fprintf(stderr, "Writing rms distance/clustering matrix ");
1991
    if (bReadMat)
1992
    {
1993
        write_xpm(fp, 0, readmat[0].title, readmat[0].legend, readmat[0].label_x,
1994
                  readmat[0].label_y, nf, nf, readmat[0].axis_x, readmat[0].axis_y,
1995
                  rms->mat, 0.0, rms->maxrms, rlo_top, rhi_top, &nlevels);
1996
    }
1997
    else
1998
    {
1999
        sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
2000
        sprintf(title, "RMS%sDeviation / Cluster Index",
2001
                bRMSdist ? " Distance " : " ");
2002
        if (minstruct > 1)
2003
        {
2004
            write_xpm_split(fp, 0, title, "RMSD (nm)", buf, buf,
2005
                            nf, nf, time, time, rms->mat, 0.0, rms->maxrms, &nlevels,
2006
                            rlo_top, rhi_top, 0.0, ncluster,
2007
                            &ncluster, TRUE, rlo_bot, rhi_bot);
2008
        }
2009
        else
2010
        {
2011
            write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
2012
                      nf, nf, time, time, rms->mat, 0.0, rms->maxrms,
2013
                      rlo_top, rhi_top, &nlevels);
2014
        }
2015
    }
2016
    fprintf(stderr, "\n");
2017
    gmx_ffclose(fp);
2018
    if (NULL != orig)
2019
    {
2020
        fp = opt2FILE("-om", NFILE, fnm, "w");
2021
        sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
2022
        sprintf(title, "RMS%sDeviation", bRMSdist ? " Distance " : " ");
2023
        write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
2024
                  nf, nf, time, time, orig->mat, 0.0, orig->maxrms,
2025
                  rlo_top, rhi_top, &nlevels);
2026
        gmx_ffclose(fp);
2027
        done_mat(&orig);
2028
        sfree(orig);
2029
    }
2030
    /* now show what we've done */
2031
    do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
2032
    do_view(oenv, opt2fn_null("-sz", NFILE, fnm), "-nxy");
2033
    if (method == m_diagonalize)
2034
    {
2035
        do_view(oenv, opt2fn_null("-ev", NFILE, fnm), "-nxy");
2036
    }
2037
    do_view(oenv, opt2fn("-dist", NFILE, fnm), "-nxy");
2038
    if (bAnalyze)
2039
    {
2040
        do_view(oenv, opt2fn_null("-tr", NFILE, fnm), "-nxy");
2041
        do_view(oenv, opt2fn_null("-ntr", NFILE, fnm), "-nxy");
2042
        do_view(oenv, opt2fn_null("-clid", NFILE, fnm), "-nxy");
2043
    }
2044
    do_view(oenv, opt2fn_null("-conv", NFILE, fnm), NULL);
2045

    
2046
    return 0;
2047
}