1


2


3


4


5


6


7


8


9


10


11


12


13


14


15


16


17


18


19


20


21


22


23


24


25


26


27


28


29


30


31


32


33


34


35


36


37

#include "gmxpre.h"

38


39

#include <cmath>

40

#include <cstdlib>

41

#include <cstring>

42


43

#include <algorithm>

44


45

#include "gromacs/commandline/pargs.h"

46

#include "gromacs/commandline/viewit.h"

47

#include "gromacs/fileio/confio.h"

48

#include "gromacs/fileio/matio.h"

49

#include "gromacs/fileio/trxio.h"

50

#include "gromacs/fileio/xvgr.h"

51

#include "gromacs/gmxana/cmat.h"

52

#include "gromacs/gmxana/gmx_ana.h"

53

#include "gromacs/linearalgebra/eigensolver.h"

54

#include "gromacs/math/do_fit.h"

55

#include "gromacs/math/vec.h"

56

#include "gromacs/pbcutil/pbc.h"

57

#include "gromacs/pbcutil/rmpbc.h"

58

#include "gromacs/random/threefry.h"

59

#include "gromacs/random/uniformintdistribution.h"

60

#include "gromacs/random/uniformrealdistribution.h"

61

#include "gromacs/topology/index.h"

62

#include "gromacs/topology/topology.h"

63

#include "gromacs/utility/arraysize.h"

64

#include "gromacs/utility/cstringutil.h"

65

#include "gromacs/utility/fatalerror.h"

66

#include "gromacs/utility/futil.h"

67

#include "gromacs/utility/smalloc.h"

68


69


70

static gmx_inline

71

void lo_ffprintf(FILE *fp1, FILE *fp2, const char *buf)

72

{

73

fprintf(fp1, "%s", buf);

74

fprintf(fp2, "%s", buf);

75

}

76


77


78

static gmx_inline

79

void ffprintf(FILE *fp1, FILE *fp2, const char *buf)

80

{

81

lo_ffprintf(fp1, fp2, buf);

82

}

83


84


85

static gmx_inline

86

void ffprintf_d(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg)

87

{

88

sprintf(buf, fmt, arg);

89

lo_ffprintf(fp1, fp2, buf);

90

}

91


92


93

static gmx_inline

94

void ffprintf_g(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg)

95

{

96

sprintf(buf, fmt, arg);

97

lo_ffprintf(fp1, fp2, buf);

98

}

99


100


101

static gmx_inline

102

void ffprintf_s(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg)

103

{

104

sprintf(buf, fmt, arg);

105

lo_ffprintf(fp1, fp2, buf);

106

}

107


108


109

static gmx_inline

110

void ffprintf_dd(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg1, int arg2)

111

{

112

sprintf(buf, fmt, arg1, arg2);

113

lo_ffprintf(fp1, fp2, buf);

114

}

115


116


117

static gmx_inline

118

void ffprintf_gg(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg1, real arg2)

119

{

120

sprintf(buf, fmt, arg1, arg2);

121

lo_ffprintf(fp1, fp2, buf);

122

}

123


124


125

static gmx_inline

126

void ffprintf_ss(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg1, const char *arg2)

127

{

128

sprintf(buf, fmt, arg1, arg2);

129

lo_ffprintf(fp1, fp2, buf);

130

}

131


132

typedef struct {

133

int ncl;

134

int *cl;

135

} t_clusters;

136


137

typedef struct {

138

int nr;

139

int *nb;

140

} t_nnb;

141


142

void cp_index(int nn, int from[], int to[])

143

{

144

int i;

145


146

for (i = 0; (i < nn); i++)

147

{

148

to[i] = from[i];

149

}

150

}

151


152

void mc_optimize(FILE *log, t_mat *m, real *time,

153

int maxiter, int nrandom,

154

int seed, real kT,

155

const char *conv, gmx_output_env_t *oenv)

156

{

157

FILE *fp = NULL;

158

real ecur, enext, emin, prob, enorm;

159

int i, j, iswap, jswap, nn, nuphill = 0;

160

t_mat *minimum;

161


162

if (seed == 0)

163

{

164

seed = static_cast<int>(gmx::makeRandomSeed());

165

}

166

gmx::DefaultRandomEngine rng(seed);

167


168

if (m>n1 != m>nn)

169

{

170

fprintf(stderr, "Can not do Monte Carlo optimization with a nonsquare matrix.\n");

171

return;

172

}

173

printf("\nDoing Monte Carlo optimization to find the smoothest trajectory\n");

174

printf("by reordering the frames to minimize the path between the two structures\n");

175

printf("that have the largest pairwise RMSD.\n");

176

printf("Using random seed %d.\n", seed);

177


178

iswap = jswap = 1;

179

enorm = m>mat[0][0];

180

for (i = 0; (i < m>n1); i++)

181

{

182

for (j = 0; (j < m>nn); j++)

183

{

184

if (m>mat[i][j] > enorm)

185

{

186

enorm = m>mat[i][j];

187

iswap = i;

188

jswap = j;

189

}

190

}

191

}

192

if ((iswap == 1)  (jswap == 1))

193

{

194

fprintf(stderr, "Matrix contains identical values in all fields\n");

195

return;

196

}

197

swap_rows(m, 0, iswap);

198

swap_rows(m, m>n11, jswap);

199

emin = ecur = mat_energy(m);

200

printf("Largest distance %g between %d and %d. Energy: %g.\n",

201

enorm, iswap, jswap, emin);

202


203

nn = m>nn;

204


205


206

minimum = init_mat(nn, m>b1D);

207

minimum>nn = nn;

208

copy_t_mat(minimum, m);

209


210

if (NULL != conv)

211

{

212

fp = xvgropen(conv, "Convergence of the MC optimization",

213

"Energy", "Step", oenv);

214

}

215


216

gmx::UniformIntDistribution<int> intDistNN(1, nn2);

217

gmx::UniformRealDistribution<real> realDistOne;

218


219

for (i = 0; (i < maxiter); i++)

220

{

221


222

do

223

{

224

iswap = intDistNN(rng);

225

jswap = intDistNN(rng);

226

}

227

while ((iswap == jswap)  (iswap >= nn1)  (jswap >= nn1));

228


229


230

swap_rows(m, iswap, jswap);

231

enext = mat_energy(m);

232


233


234

prob = 0;

235

if ((enext < ecur)  (i < nrandom))

236

{

237

prob = 1;

238

if (enext < emin)

239

{

240


241

copy_t_mat(minimum, m);

242

emin = enext;

243

}

244

}

245

else if (kT > 0)

246

{

247


248

prob = std::exp((enextecur)/(enorm*kT));

249

}

250


251

if (prob == 1  realDistOne(rng) < prob)

252

{

253

if (enext > ecur)

254

{

255

nuphill++;

256

}

257


258

fprintf(log, "Iter: %d Swapped %4d and %4d (energy: %g prob: %g)\n",

259

i, iswap, jswap, enext, prob);

260

if (NULL != fp)

261

{

262

fprintf(fp, "%6d %10g\n", i, enext);

263

}

264

ecur = enext;

265

}

266

else

267

{

268

swap_rows(m, jswap, iswap);

269

}

270

}

271

fprintf(log, "%d uphill steps were taken during optimization\n",

272

nuphill);

273


274


275

copy_t_mat(m, minimum);

276


277

fprintf(log, "Global minimum energy %g\n", mat_energy(minimum));

278

fprintf(log, "Global minimum energy %g\n", mat_energy(m));

279

fprintf(log, "Swapped time and frame indices and RMSD to next neighbor:\n");

280

for (i = 0; (i < m>nn); i++)

281

{

282

fprintf(log, "%10g %5d %10g\n",

283

time[m>m_ind[i]],

284

m>m_ind[i],

285

(i < m>nn1) ? m>mat[m>m_ind[i]][m>m_ind[i+1]] : 0);

286

}

287


288

if (NULL != fp)

289

{

290

xvgrclose(fp);

291

}

292

}

293


294

static void calc_dist(int nind, rvec x[], real **d)

295

{

296

int i, j;

297

real *xi;

298

rvec dx;

299


300

for (i = 0; (i < nind1); i++)

301

{

302

xi = x[i];

303

for (j = i+1; (j < nind); j++)

304

{

305


306


307


308

rvec_sub(xi, x[j], dx);

309

d[i][j] = norm(dx);

310

}

311

}

312

}

313


314

static real rms_dist(int isize, real **d, real **d_r)

315

{

316

int i, j;

317

real r, r2;

318


319

r2 = 0.0;

320

for (i = 0; (i < isize1); i++)

321

{

322

for (j = i+1; (j < isize); j++)

323

{

324

r = d[i][j]d_r[i][j];

325

r2 += r*r;

326

}

327

}

328

r2 /= (isize*(isize1))/2;

329


330

return std::sqrt(r2);

331

}

332


333

static int rms_dist_comp(const void *a, const void *b)

334

{

335

t_dist *da, *db;

336


337

da = (t_dist *)a;

338

db = (t_dist *)b;

339


340

if (da>dist  db>dist < 0)

341

{

342

return 1;

343

}

344

else if (da>dist  db>dist > 0)

345

{

346

return 1;

347

}

348

return 0;

349

}

350


351

static int clust_id_comp(const void *a, const void *b)

352

{

353

t_clustid *da, *db;

354


355

da = (t_clustid *)a;

356

db = (t_clustid *)b;

357


358

return da>clust  db>clust;

359

}

360


361

static int nrnb_comp(const void *a, const void *b)

362

{

363

t_nnb *da, *db;

364


365

da = (t_nnb *)a;

366

db = (t_nnb *)b;

367


368


369

return db>nr  da>nr;

370

}

371


372

void gather(t_mat *m, real cutoff, t_clusters *clust)

373

{

374

t_clustid *c;

375

t_dist *d;

376

int i, j, k, nn, cid, n1, diff;

377

gmx_bool bChange;

378


379


380

n1 = m>nn;

381

nn = ((n11)*n1)/2;

382

snew(d, nn);

383

for (i = k = 0; (i < n1); i++)

384

{

385

for (j = i+1; (j < n1); j++, k++)

386

{

387

d[k].i = i;

388

d[k].j = j;

389

d[k].dist = m>mat[i][j];

390

}

391

}

392

if (k != nn)

393

{

394

gmx_incons("gather algortihm");

395

}

396

qsort(d, nn, sizeof(d[0]), rms_dist_comp);

397


398


399

c = new_clustid(n1);

400


401


402

fprintf(stderr, "Linking structures ");

403

do

404

{

405

fprintf(stderr, "*");

406

bChange = FALSE;

407

for (k = 0; (k < nn) && (d[k].dist < cutoff); k++)

408

{

409

diff = c[d[k].j].clust  c[d[k].i].clust;

410

if (diff)

411

{

412

bChange = TRUE;

413

if (diff > 0)

414

{

415

c[d[k].j].clust = c[d[k].i].clust;

416

}

417

else

418

{

419

c[d[k].i].clust = c[d[k].j].clust;

420

}

421

}

422

}

423

}

424

while (bChange);

425

fprintf(stderr, "\nSorting and renumbering clusters\n");

426


427

qsort(c, n1, sizeof(c[0]), clust_id_comp);

428


429


430

cid = 1;

431

for (k = 1; k < n1; k++)

432

{

433

if (c[k].clust != c[k1].clust)

434

{

435

c[k1].clust = cid;

436

cid++;

437

}

438

else

439

{

440

c[k1].clust = cid;

441

}

442

}

443

c[k1].clust = cid;

444

if (debug)

445

{

446

for (k = 0; (k < n1); k++)

447

{

448

fprintf(debug, "Cluster index for conformation %d: %d\n",

449

c[k].conf, c[k].clust);

450

}

451

}

452

clust>ncl = cid;

453

for (k = 0; k < n1; k++)

454

{

455

clust>cl[c[k].conf] = c[k].clust;

456

}

457


458

sfree(c);

459

sfree(d);

460

}

461


462

gmx_bool jp_same(int **nnb, int i, int j, int P)

463

{

464

gmx_bool bIn;

465

int k, ii, jj, pp;

466


467

bIn = FALSE;

468

for (k = 0; nnb[i][k] >= 0; k++)

469

{

470

bIn = bIn  (nnb[i][k] == j);

471

}

472

if (!bIn)

473

{

474

return FALSE;

475

}

476


477

bIn = FALSE;

478

for (k = 0; nnb[j][k] >= 0; k++)

479

{

480

bIn = bIn  (nnb[j][k] == i);

481

}

482

if (!bIn)

483

{

484

return FALSE;

485

}

486


487

pp = 0;

488

for (ii = 0; nnb[i][ii] >= 0; ii++)

489

{

490

for (jj = 0; nnb[j][jj] >= 0; jj++)

491

{

492

if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != 1))

493

{

494

pp++;

495

}

496

}

497

}

498


499

return (pp >= P);

500

}

501


502

static void jarvis_patrick(int n1, real **mat, int M, int P,

503

real rmsdcut, t_clusters *clust)

504

{

505

t_dist *row;

506

t_clustid *c;

507

int **nnb;

508

int i, j, k, cid, diff, maxval;

509

gmx_bool bChange;

510

real **mcpy = NULL;

511


512

if (rmsdcut < 0)

513

{

514

rmsdcut = 10000;

515

}

516


517


518


519


520

snew(nnb, n1);

521

snew(row, n1);

522

for (i = 0; (i < n1); i++)

523

{

524

for (j = 0; (j < n1); j++)

525

{

526

row[j].j = j;

527

row[j].dist = mat[i][j];

528

}

529

qsort(row, n1, sizeof(row[0]), rms_dist_comp);

530

if (M > 0)

531

{

532


533

snew(nnb[i], M+1);

534

for (j = k = 0; (k < M) && (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)

535

{

536

if (row[j].j != i)

537

{

538

nnb[i][k] = row[j].j;

539

k++;

540

}

541

}

542

nnb[i][k] = 1;

543

}

544

else

545

{

546


547

maxval = 0;

548

k = 0;

549

for (j = 0; (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)

550

{

551

if (row[j].j != i)

552

{

553

if (k >= maxval)

554

{

555

maxval += 10;

556

srenew(nnb[i], maxval);

557

}

558

nnb[i][k] = row[j].j;

559

k++;

560

}

561

}

562

if (k == maxval)

563

{

564

srenew(nnb[i], maxval+1);

565

}

566

nnb[i][k] = 1;

567

}

568

}

569

sfree(row);

570

if (debug)

571

{

572

fprintf(debug, "Nearest neighborlist. M = %d, P = %d\n", M, P);

573

for (i = 0; (i < n1); i++)

574

{

575

fprintf(debug, "i:%5d nbs:", i);

576

for (j = 0; nnb[i][j] >= 0; j++)

577

{

578

fprintf(debug, "%5d[%5.3f]", nnb[i][j], mat[i][nnb[i][j]]);

579

}

580

fprintf(debug, "\n");

581

}

582

}

583


584

c = new_clustid(n1);

585

fprintf(stderr, "Linking structures ");

586


587

mcpy = mk_matrix(n1, n1, FALSE);

588

for (i = 0; i < n1; i++)

589

{

590

for (j = i+1; j < n1; j++)

591

{

592

mcpy[i][j] = jp_same(nnb, i, j, P);

593

}

594

}

595

do

596

{

597

fprintf(stderr, "*");

598

bChange = FALSE;

599

for (i = 0; i < n1; i++)

600

{

601

for (j = i+1; j < n1; j++)

602

{

603

if (mcpy[i][j])

604

{

605

diff = c[j].clust  c[i].clust;

606

if (diff)

607

{

608

bChange = TRUE;

609

if (diff > 0)

610

{

611

c[j].clust = c[i].clust;

612

}

613

else

614

{

615

c[i].clust = c[j].clust;

616

}

617

}

618

}

619

}

620

}

621

}

622

while (bChange);

623


624

fprintf(stderr, "\nSorting and renumbering clusters\n");

625


626

qsort(c, n1, sizeof(c[0]), clust_id_comp);

627


628


629

cid = 1;

630

for (k = 1; k < n1; k++)

631

{

632

if (c[k].clust != c[k1].clust)

633

{

634

c[k1].clust = cid;

635

cid++;

636

}

637

else

638

{

639

c[k1].clust = cid;

640

}

641

}

642

c[k1].clust = cid;

643

clust>ncl = cid;

644

for (k = 0; k < n1; k++)

645

{

646

clust>cl[c[k].conf] = c[k].clust;

647

}

648

if (debug)

649

{

650

for (k = 0; (k < n1); k++)

651

{

652

fprintf(debug, "Cluster index for conformation %d: %d\n",

653

c[k].conf, c[k].clust);

654

}

655

}

656


657

done_matrix(n1, &mcpy);

658


659

sfree(c);

660

for (i = 0; (i < n1); i++)

661

{

662

sfree(nnb[i]);

663

}

664

sfree(nnb);

665

}

666


667

static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)

668

{

669

int i, j;

670


671


672

fprintf(fp, "%s", title);

673

for (i = 0; (i < n1); i++)

674

{

675

fprintf(fp, "i:%5d #:%5d nbs:", i, nnb[i].nr);

676

for (j = 0; j < nnb[i].nr; j++)

677

{

678

fprintf(fp, "%5d", nnb[i].nb[j]);

679

}

680

fprintf(fp, "\n");

681

}

682

}

683


684

static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)

685

{

686

t_dist *row;

687

t_nnb *nnb;

688

int i, j, k, j1, maxval;

689


690


691

fprintf(stderr, "Making list of neighbors within cutoff ");

692

snew(nnb, n1);

693

snew(row, n1);

694

for (i = 0; (i < n1); i++)

695

{

696

maxval = 0;

697

k = 0;

698


699

for (j = 0; j < n1; j++)

700

{

701

if (mat[i][j] < rmsdcut)

702

{

703

if (k >= maxval)

704

{

705

maxval += 10;

706

srenew(nnb[i].nb, maxval);

707

}

708

nnb[i].nb[k] = j;

709

k++;

710

}

711

}

712


713

nnb[i].nr = k;

714

if (i%(1+n1/100) == 0)

715

{

716

fprintf(stderr, "%3d%%\b\b\b\b", (i*100+1)/n1);

717

}

718

}

719

fprintf(stderr, "%3d%%\n", 100);

720

sfree(row);

721


722


723

qsort(nnb, n1, sizeof(nnb[0]), nrnb_comp);

724


725

if (debug)

726

{

727

dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);

728

}

729


730


731


732

fprintf(stderr, "Finding clusters %4d", 0);

733


734

k = 1;

735

while (nnb[0].nr)

736

{

737


738

for (j = 0; j < nnb[0].nr; j++)

739

{

740

clust>cl[nnb[0].nb[j]] = k;

741

}

742


743

nnb[0].nr = 0;

744

sfree(nnb[0].nb);

745


746


747

for (i = 1; i < n1 && nnb[i].nr; i++)

748

{

749

j1 = 0;

750

for (j = 0; j < nnb[i].nr; j++)

751

{

752


753

if (clust>cl[nnb[i].nb[j]] == 0)

754

{

755


756

nnb[i].nb[j1] = nnb[i].nb[j];

757


758

j1++;

759

}

760

}

761


762

nnb[i].nr = j1;

763

}

764


765


766

qsort(nnb, i, sizeof(nnb[0]), nrnb_comp);

767


768

fprintf(stderr, "\b\b\b\b%4d", k);

769


770

k++;

771

}

772

fprintf(stderr, "\n");

773

sfree(nnb);

774

if (debug)

775

{

776

fprintf(debug, "Clusters (%d):\n", k);

777

for (i = 0; i < n1; i++)

778

{

779

fprintf(debug, " %3d", clust>cl[i]);

780

}

781

fprintf(debug, "\n");

782

}

783


784

clust>ncl = k1;

785

}

786


787

static rvec **read_whole_trj(const char *fn, int isize, int index[], int skip,

788

int *nframe, real **time, matrix **boxes, int **frameindexes, const gmx_output_env_t *oenv, gmx_bool bPBC, gmx_rmpbc_t gpbc)

789

{

790

rvec **xx, *x;

791

matrix box;

792

real t;

793

int i, i0, j, max_nf;

794

int natom;

795

t_trxstatus *status;

796


797


798

max_nf = 0;

799

xx = NULL;

800

*time = NULL;

801

*boxes = NULL;

802


803


804

natom = read_first_x(oenv, &status, fn, &t, &x, box);

805

i = 0;

806

i0 = 0;

807

do

808

{

809

if (bPBC)

810

{

811

gmx_rmpbc(gpbc, natom, box, x);

812

}

813

if (i0 >= max_nf)

814

{

815

max_nf += 10;

816

srenew(xx, max_nf);

817

srenew(*time, max_nf);

818

srenew(*boxes, max_nf);

819

srenew(*frameindexes, max_nf);

820

}

821

if ((i % skip) == 0)

822

{

823

snew(xx[i0], isize);

824


825

for (j = 0; (j < isize); j++)

826

{

827

copy_rvec(x[index[j]], xx[i0][j]);

828

}

829

(*time)[i0] = t;

830

memcpy (&(*boxes)[i0], &box, sizeof(box));

831

(*frameindexes)[i0] = nframes_read(status);

832

i0++;

833

}

834

i++;

835

}

836

while (read_next_x(oenv, status, &t, x, box));

837

fprintf(stderr, "Allocated %lu bytes for frames\n",

838

(unsigned long) (max_nf*isize*sizeof(**xx)));

839

fprintf(stderr, "Read %d frames from trajectory %s\n", i0, fn);

840

*nframe = i0;

841

sfree(x);

842


843

return xx;

844

}

845


846

static int plot_clusters(int nf, real **mat, t_clusters *clust,

847

int minstruct)

848

{

849

int i, j, ncluster, ci;

850

int *cl_id, *nstruct, *strind;

851


852

snew(cl_id, nf);

853

snew(nstruct, nf);

854

snew(strind, nf);

855

for (i = 0; i < nf; i++)

856

{

857

strind[i] = 0;

858

cl_id[i] = clust>cl[i];

859

nstruct[cl_id[i]]++;

860

}

861

ncluster = 0;

862

for (i = 0; i < nf; i++)

863

{

864

if (nstruct[i] >= minstruct)

865

{

866

ncluster++;

867

for (j = 0; (j < nf); j++)

868

{

869

if (cl_id[j] == i)

870

{

871

strind[j] = ncluster;

872

}

873

}

874

}

875

}

876

ncluster++;

877

fprintf(stderr, "There are %d clusters with at least %d conformations\n",

878

ncluster, minstruct);

879


880

for (i = 0; (i < nf); i++)

881

{

882

ci = cl_id[i];

883

for (j = 0; j < i; j++)

884

{

885

if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct))

886

{

887


888


889

mat[i][j] = strind[i];

890

}

891

else

892

{

893

mat[i][j] = 0;

894

}

895

}

896

}

897

sfree(strind);

898

sfree(nstruct);

899

sfree(cl_id);

900


901

return ncluster;

902

}

903


904

static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)

905

{

906

int i, j;

907


908

for (i = 0; i < nf; i++)

909

{

910

for (j = 0; j < i; j++)

911

{

912

if (clust>cl[i] == clust>cl[j])

913

{

914

mat[i][j] = val;

915

}

916

else

917

{

918

mat[i][j] = 0;

919

}

920

}

921

}

922

}

923


924

static char *parse_filename(const char *fn, int maxnr)

925

{

926

int i;

927

char *fnout;

928

const char *ext;

929

char buf[STRLEN];

930


931

if (std::strchr(fn, '%'))

932

{

933

gmx_fatal(FARGS, "will not number filename %s containing '%c'", fn, '%');

934

}

935


936

i = static_cast<int>((std::log(static_cast<real>(maxnr))/std::log(10.0)) + 1);

937


938

ext = std::strrchr(fn, '.');

939

if (!ext)

940

{

941

gmx_fatal(FARGS, "cannot separate extension in filename %s", fn);

942

}

943

ext++;

944


945

sprintf(buf, "%s%%0%dd.%s", fn, i, ext);

946

snew(fnout, std::strlen(buf)+1);

947

std::strcpy(fnout, buf);

948


949

return fnout;

950

}

951


952

static void ana_trans(t_clusters *clust, int nf,

953

const char *transfn, const char *ntransfn, FILE *log,

954

t_rgb rlo, t_rgb rhi, const gmx_output_env_t *oenv)

955

{

956

FILE *fp;

957

real **trans, *axis;

958

int *ntrans;

959

int i, ntranst, maxtrans;

960

char buf[STRLEN];

961


962

snew(ntrans, clust>ncl);

963

snew(trans, clust>ncl);

964

snew(axis, clust>ncl);

965

for (i = 0; i < clust>ncl; i++)

966

{

967

axis[i] = i+1;

968

snew(trans[i], clust>ncl);

969

}

970

ntranst = 0;

971

maxtrans = 0;

972

for (i = 1; i < nf; i++)

973

{

974

if (clust>cl[i] != clust>cl[i1])

975

{

976

ntranst++;

977

ntrans[clust>cl[i1]1]++;

978

ntrans[clust>cl[i]1]++;

979

trans[clust>cl[i1]1][clust>cl[i]1]++;

980

maxtrans = static_cast<int>(std::max(static_cast<real>(maxtrans), trans[clust>cl[i]1][clust>cl[i1]1]));

981

}

982

}

983

ffprintf_dd(stderr, log, buf, "Counted %d transitions in total, "

984

"max %d between two specific clusters\n", ntranst, maxtrans);

985

if (transfn)

986

{

987

fp = gmx_ffopen(transfn, "w");

988

i = std::min(maxtrans+1, 80);

989

write_xpm(fp, 0, "Cluster Transitions", "# transitions",

990

"from cluster", "to cluster",

991

clust>ncl, clust>ncl, axis, axis, trans,

992

0, maxtrans, rlo, rhi, &i);

993

gmx_ffclose(fp);

994

}

995

if (ntransfn)

996

{

997

fp = xvgropen(ntransfn, "Cluster Transitions", "Cluster #", "# transitions",

998

oenv);

999

for (i = 0; i < clust>ncl; i++)

1000

{

1001

fprintf(fp, "%5d %5d\n", i+1, ntrans[i]);

1002

}

1003

xvgrclose(fp);

1004

}

1005

sfree(ntrans);

1006

for (i = 0; i < clust>ncl; i++)

1007

{

1008

sfree(trans[i]);

1009

}

1010

sfree(trans);

1011

sfree(axis);

1012

}

1013


1014

static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,

1015

int natom, t_atoms *atoms, rvec *xtps,

1016

real *mass, rvec **xx, real *time, matrix *boxes, int *frameindexes,

1017

int ifsize, int *fitidx,

1018

int iosize, int *outidx,

1019

const char *trxfn, const char *sizefn,

1020

const char *transfn, const char *ntransfn,

1021

const char *clustidfn, const char *clustndxfn,

1022

gmx_bool bAverage,

1023

int write_ncl, int write_nst, real rmsmin,

1024

gmx_bool bFit, FILE *log, t_rgb rlo, t_rgb rhi,

1025

const gmx_output_env_t *oenv)

1026

{

1027

FILE *size_fp = NULL;

1028

char buf[STRLEN], buf1[40], buf2[40], buf3[40], *trxsfn;

1029

t_trxstatus *trxout = NULL;

1030

t_trxstatus *trxsout = NULL;

1031

int i, i1, cl, nstr, *structure, first = 0, midstr;

1032

gmx_bool *bWrite = NULL;

1033

real r, clrmsd, midrmsd;

1034

rvec *xav = NULL;

1035

matrix zerobox;

1036


1037

clear_mat(zerobox);

1038


1039

ffprintf_d(stderr, log, buf, "\nFound %d clusters\n\n", clust>ncl);

1040

trxsfn = NULL;

1041

if (trxfn)

1042

{

1043


1044

if (write_ncl)

1045

{

1046

trxsfn = parse_filename(trxfn, std::max(write_ncl, clust>ncl));

1047

snew(bWrite, nf);

1048

}

1049

ffprintf_ss(stderr, log, buf, "Writing %s structure for each cluster to %s\n",

1050

bAverage ? "average" : "middle", trxfn);

1051

if (write_ncl)

1052

{

1053


1054


1055


1056

if (rmsmin > 0.0)

1057

{

1058

sprintf(buf1, "structures with rmsd > %g", rmsmin);

1059

}

1060

else

1061

{

1062

sprintf(buf1, "all structures");

1063

}

1064

buf2[0] = buf3[0] = '\0';

1065

if (write_ncl >= clust>ncl)

1066

{

1067

if (write_nst == 0)

1068

{

1069

sprintf(buf2, "all ");

1070

}

1071

}

1072

else

1073

{

1074

sprintf(buf2, "the first %d ", write_ncl);

1075

}

1076

if (write_nst)

1077

{

1078

sprintf(buf3, " with more than %d structures", write_nst);

1079

}

1080

sprintf(buf, "Writing %s for %sclusters%s to %s\n", buf1, buf2, buf3, trxsfn);

1081

ffprintf(stderr, log, buf);

1082

}

1083


1084


1085

if (bFit)

1086

{

1087

reset_x(ifsize, fitidx, natom, NULL, xtps, mass);

1088

}

1089

trxout = open_trx(trxfn, "w");

1090


1091


1092

snew(xav, natom);

1093

}

1094


1095

if (transfn  ntransfn)

1096

{

1097

ana_trans(clust, nf, transfn, ntransfn, log, rlo, rhi, oenv);

1098

}

1099


1100

if (clustidfn)

1101

{

1102

FILE *fp = xvgropen(clustidfn, "Clusters", output_env_get_xvgr_tlabel(oenv), "Cluster #", oenv);

1103

if (output_env_get_print_xvgr_codes(oenv))

1104

{

1105

fprintf(fp, "@ s0 symbol 2\n");

1106

fprintf(fp, "@ s0 symbol size 0.2\n");

1107

fprintf(fp, "@ s0 linestyle 0\n");

1108

}

1109

for (i = 0; i < nf; i++)

1110

{

1111

fprintf(fp, "%8g %8d\n", time[i], clust>cl[i]);

1112

}

1113

xvgrclose(fp);

1114

}

1115

if (sizefn)

1116

{

1117

size_fp = xvgropen(sizefn, "Cluster Sizes", "Cluster #", "# Structures", oenv);

1118

if (output_env_get_print_xvgr_codes(oenv))

1119

{

1120

fprintf(size_fp, "@g%d type %s\n", 0, "bar");

1121

}

1122

}

1123


1124

FILE *ndxfn = NULL;

1125

if (clustndxfn) {

1126

ndxfn = gmx_ffopen(clustndxfn, "w");

1127

}

1128


1129

snew(structure, nf);

1130

fprintf(log, "\n%3s  %3s %4s  %6s %4s  cluster members\n",

1131

"cl.", "#st", "rmsd", "middle", "rmsd");

1132

for (cl = 1; cl <= clust>ncl; cl++)

1133

{

1134


1135

if (xav)

1136

{

1137

for (i = 0; i < natom; i++)

1138

{

1139

clear_rvec(xav[i]);

1140

}

1141

}

1142

nstr = 0;

1143

for (i1 = 0; i1 < nf; i1++)

1144

{

1145

if (clust>cl[i1] == cl)

1146

{

1147

structure[nstr] = i1;

1148

nstr++;

1149

if (trxfn && (bAverage  write_ncl) )

1150

{

1151

if (bFit)

1152

{

1153

reset_x(ifsize, fitidx, natom, NULL, xx[i1], mass);

1154

}

1155

if (nstr == 1)

1156

{

1157

first = i1;

1158

}

1159

else if (bFit)

1160

{

1161

do_fit(natom, mass, xx[first], xx[i1]);

1162

}

1163

if (xav)

1164

{

1165

for (i = 0; i < natom; i++)

1166

{

1167

rvec_inc(xav[i], xx[i1][i]);

1168

}

1169

}

1170

}

1171

}

1172

}

1173

if (sizefn)

1174

{

1175

fprintf(size_fp, "%8d %8d\n", cl, nstr);

1176

}

1177


1178

if (ndxfn) {

1179

fprintf(ndxfn, "[Cluster_%04d]\n", cl);

1180

}

1181

clrmsd = 0;

1182

midstr = 0;

1183

midrmsd = 10000;

1184

for (i1 = 0; i1 < nstr; i1++)

1185

{

1186

r = 0;

1187

if (nstr > 1)

1188

{

1189

for (i = 0; i < nstr; i++)

1190

{

1191

if (i < i1)

1192

{

1193

r += rmsd[structure[i]][structure[i1]];

1194

}

1195

else

1196

{

1197

r += rmsd[structure[i1]][structure[i]];

1198

}

1199

}

1200

r /= (nstr  1);

1201

}

1202

if (r < midrmsd)

1203

{

1204

midstr = structure[i1];

1205

midrmsd = r;

1206

}

1207

clrmsd += r;

1208

}

1209

clrmsd /= nstr;

1210


1211


1212

if (nstr > 1)

1213

{

1214

sprintf(buf1, "%6.3f", clrmsd);

1215

if (buf1[0] == '0')

1216

{

1217

buf1[0] = ' ';

1218

}

1219

sprintf(buf2, "%5.3f", midrmsd);

1220

if (buf2[0] == '0')

1221

{

1222

buf2[0] = ' ';

1223

}

1224

}

1225

else

1226

{

1227

sprintf(buf1, "%5s", "");

1228

sprintf(buf2, "%5s", "");

1229

}

1230

fprintf(log, "%3d  %3d %s  %6g%s ", cl, nstr, buf1, time[midstr], buf2);

1231

for (i = 0; i < nstr; i++)

1232

{

1233

if ((i % 7 == 0) && i)

1234

{

1235

sprintf(buf, "\n%3s  %3s %4s  %6s %4s ", "", "", "", "", "");

1236

if (ndxfn) {

1237

fprintf(ndxfn, "\n");

1238

}

1239

}

1240

else

1241

{

1242

buf[0] = '\0';

1243

}

1244

i1 = structure[i];

1245

fprintf(log, "%s %6g", buf, time[i1]);

1246

if (ndxfn && frameindexes) {

1247

fprintf(ndxfn, " %6d", frameindexes[i1]);

1248

}

1249

}

1250

fprintf(log, "\n");

1251

if (ndxfn)

1252

fprintf(ndxfn, "\n");

1253


1254

if (trxfn)

1255

{

1256

if (write_ncl)

1257

{

1258

for (i = 0; i < nstr; i++)

1259

{

1260

bWrite[i] = FALSE;

1261

}

1262

}

1263

if (cl < write_ncl+1 && nstr > write_nst)

1264

{

1265


1266


1267

sprintf(buf, trxsfn, cl);

1268

trxsout = open_trx(buf, "w");

1269

for (i = 0; i < nstr; i++)

1270

{

1271

bWrite[i] = TRUE;

1272

if (rmsmin > 0.0)

1273

{

1274

for (i1 = 0; i1 < i && bWrite[i]; i1++)

1275

{

1276

if (bWrite[i1])

1277

{

1278

bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;

1279

}

1280

}

1281

}

1282

if (bWrite[i])

1283

{

1284

write_trx(trxsout, iosize, outidx, atoms, i, time[structure[i]], boxes[structure[i]],

1285

xx[structure[i]], NULL, NULL);

1286

}

1287

}

1288

close_trx(trxsout);

1289

}

1290


1291

if (bAverage)

1292

{

1293

for (i = 0; i < natom; i++)

1294

{

1295

svmul(1.0/nstr, xav[i], xav[i]);

1296

}

1297

}

1298

else

1299

{

1300

for (i = 0; i < natom; i++)

1301

{

1302

copy_rvec(xx[midstr][i], xav[i]);

1303

}

1304

if (bFit)

1305

{

1306

reset_x(ifsize, fitidx, natom, NULL, xav, mass);

1307

}

1308

}

1309

if (bFit)

1310

{

1311

do_fit(natom, mass, xtps, xav);

1312

}

1313

write_trx(trxout, iosize, outidx, atoms, cl, time[midstr],

1314

boxes[midstr],

1315

xav, NULL, NULL);

1316

}

1317

}

1318


1319

if (trxfn)

1320

{

1321

close_trx(trxout);

1322

sfree(xav);

1323

if (write_ncl)

1324

{

1325

sfree(bWrite);

1326

}

1327

}

1328

sfree(structure);

1329

if (trxsfn)

1330

{

1331

sfree(trxsfn);

1332

}

1333


1334

if (size_fp)

1335

{

1336

xvgrclose(size_fp);

1337

}

1338

if (ndxfn)

1339

{

1340

gmx_ffclose(ndxfn);

1341

}

1342

}

1343


1344

static void convert_mat(t_matrix *mat, t_mat *rms)

1345

{

1346

int i, j;

1347


1348

rms>n1 = mat>nx;

1349

matrix2real(mat, rms>mat);

1350


1351

for (i = 0; i < mat>nx; i++)

1352

{

1353

sfree(mat>matrix[i]);

1354

}

1355

sfree(mat>matrix);

1356


1357

for (i = 0; i < mat>nx; i++)

1358

{

1359

for (j = i; j < mat>nx; j++)

1360

{

1361

rms>sumrms += rms>mat[i][j];

1362

rms>maxrms = std::max(rms>maxrms, rms>mat[i][j]);

1363

if (j != i)

1364

{

1365

rms>minrms = std::min(rms>minrms, rms>mat[i][j]);

1366

}

1367

}

1368

}

1369

rms>nn = mat>nx;

1370

}

1371


1372

int gmx_cluster(int argc, char *argv[])

1373

{

1374

const char *desc[] = {

1375

"[THISMODULE] can cluster structures using several different methods.",

1376

"Distances between structures can be determined from a trajectory",

1377

"or read from an [REF].xpm[ref] matrix file with the [TT]dm[tt] option.",

1378

"RMS deviation after fitting or RMS deviation of atompair distances",

1379

"can be used to define the distance between structures.[PAR]",

1380


1381

"single linkage: add a structure to a cluster when its distance to any",

1382

"element of the cluster is less than [TT]cutoff[tt].[PAR]",

1383


1384

"Jarvis Patrick: add a structure to a cluster when this structure",

1385

"and a structure in the cluster have each other as neighbors and",

1386

"they have a least [TT]P[tt] neighbors in common. The neighbors",

1387

"of a structure are the M closest structures or all structures within",

1388

"[TT]cutoff[tt].[PAR]",

1389


1390

"Monte Carlo: reorder the RMSD matrix using Monte Carlo such that",

1391

"the order of the frames is using the smallest possible increments.",

1392

"With this it is possible to make a smooth animation going from one",

1393

"structure to another with the largest possible (e.g.) RMSD between",

1394

"them, however the intermediate steps should be as small as possible.",

1395

"Applications could be to visualize a potential of mean force",

1396

"ensemble of simulations or a pulling simulation. Obviously the user",

1397

"has to prepare the trajectory well (e.g. by not superimposing frames).",

1398

"The final result can be inspect visually by looking at the matrix",

1399

"[REF].xpm[ref] file, which should vary smoothly from bottom to top.[PAR]",

1400


1401

"diagonalization: diagonalize the RMSD matrix.[PAR]",

1402


1403

"gromos: use algorithm as described in Daura [IT]et al.[it]",

1404

"([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236240).",

1405

"Count number of neighbors using cutoff, take structure with",

1406

"largest number of neighbors with all its neighbors as cluster",

1407

"and eliminate it from the pool of clusters. Repeat for remaining",

1408

"structures in pool.[PAR]",

1409


1410

"When the clustering algorithm assigns each structure to exactly one",

1411

"cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",

1412

"file is supplied, the structure with",

1413

"the smallest average distance to the others or the average structure",

1414

"or all structures for each cluster will be written to a trajectory",

1415

"file. When writing all structures, separate numbered files are made",

1416

"for each cluster.[PAR]",

1417


1418

"Two output files are always written:",

1419

"",

1420

" * [TT]o[tt] writes the RMSD values in the upper left half of the matrix",

1421

" and a graphical depiction of the clusters in the lower right half",

1422

" When [TT]minstruct[tt] = 1 the graphical depiction is black",

1423

" when two structures are in the same cluster.",

1424

" When [TT]minstruct[tt] > 1 different colors will be used for each",

1425

" cluster.",

1426

" * [TT]g[tt] writes information on the options used and a detailed list",

1427

" of all clusters and their members.",

1428

"",

1429


1430

"Additionally, a number of optional output files can be written:",

1431

"",

1432

" * [TT]dist[tt] writes the RMSD distribution.",

1433

" * [TT]ev[tt] writes the eigenvectors of the RMSD matrix",

1434

" diagonalization.",

1435

" * [TT]sz[tt] writes the cluster sizes.",

1436

" * [TT]tr[tt] writes a matrix of the number transitions between",

1437

" cluster pairs.",

1438

" * [TT]ntr[tt] writes the total number of transitions to or from",

1439

" each cluster.",

1440

" * [TT]clid[tt] writes the cluster number as a function of time.",

1441

" * [TT]cl[tt] writes average (with option [TT]av[tt]) or central",

1442

" structure of each cluster or writes numbered files with cluster members",

1443

" for a selected set of clusters (with option [TT]wcl[tt], depends on",

1444

" [TT]nst[tt] and [TT]rmsmin[tt]). The center of a cluster is the",

1445

" structure with the smallest average RMSD from all other structures",

1446

" of the cluster.",

1447

};

1448


1449

FILE *fp, *log;

1450

int nf, i, i1, i2, j;

1451

gmx_int64_t nrms = 0;

1452


1453

matrix box;

1454

matrix *boxes;

1455

rvec *xtps, *usextps, *x1, **xx = NULL;

1456

const char *fn, *trx_out_fn;

1457

t_clusters clust;

1458

t_mat *rms, *orig = NULL;

1459

real *eigenvalues;

1460

t_topology top;

1461

int ePBC;

1462

t_atoms useatoms;

1463

t_matrix *readmat = NULL;

1464

real *eigenvectors;

1465


1466

int isize = 0, ifsize = 0, iosize = 0;

1467

int *index = NULL, *fitidx = NULL, *outidx = NULL, *frameindexes = NULL;

1468

char *grpname;

1469

real rmsd, **d1, **d2, *time = NULL, time_invfac, *mass = NULL;

1470

char buf[STRLEN], buf1[80], title[STRLEN];

1471

gmx_bool bAnalyze, bUseRmsdCut, bJP_RMSD = FALSE, bReadMat, bReadTraj, bPBC = TRUE;

1472


1473

int method, ncluster = 0;

1474

static const char *methodname[] = {

1475

NULL, "linkage", "jarvispatrick", "montecarlo",

1476

"diagonalization", "gromos", NULL

1477

};

1478

enum {

1479

m_null, m_linkage, m_jarvis_patrick,

1480

m_monte_carlo, m_diagonalize, m_gromos, m_nr

1481

};

1482


1483

static t_rgb rlo_top = { 1.0, 1.0, 1.0 };

1484

static t_rgb rhi_top = { 0.0, 0.0, 0.0 };

1485

static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };

1486

static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };

1487

static int nlevels = 40, skip = 1;

1488

static real scalemax = 1.0, rmsdcut = 0.1, rmsmin = 0.0;

1489

gmx_bool bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;

1490

static int niter = 10000, nrandom = 0, seed = 0, write_ncl = 0, write_nst = 1, minstruct = 1;

1491

static real kT = 1e3;

1492

static int M = 10, P = 3;

1493

gmx_output_env_t *oenv;

1494

gmx_rmpbc_t gpbc = NULL;

1495


1496

t_pargs pa[] = {

1497

{ "dista", FALSE, etBOOL, {&bRMSdist},

1498

"Use RMSD of distances instead of RMS deviation" },

1499

{ "nlevels", FALSE, etINT, {&nlevels},

1500

"Discretize RMSD matrix in this number of levels" },

1501

{ "cutoff", FALSE, etREAL, {&rmsdcut},

1502

"RMSD cutoff (nm) for two structures to be neighbor" },

1503

{ "fit", FALSE, etBOOL, {&bFit},

1504

"Use least squares fitting before RMSD calculation" },

1505

{ "max", FALSE, etREAL, {&scalemax},

1506

"Maximum level in RMSD matrix" },

1507

{ "skip", FALSE, etINT, {&skip},

1508

"Only analyze every nrth frame" },

1509

{ "av", FALSE, etBOOL, {&bAverage},

1510

"Write average iso middle structure for each cluster" },

1511

{ "wcl", FALSE, etINT, {&write_ncl},

1512

"Write the structures for this number of clusters to numbered files" },

1513

{ "nst", FALSE, etINT, {&write_nst},

1514

"Only write all structures if more than this number of structures per cluster" },

1515

{ "rmsmin", FALSE, etREAL, {&rmsmin},

1516

"minimum rms difference with rest of cluster for writing structures" },

1517

{ "method", FALSE, etENUM, {methodname},

1518

"Method for cluster determination" },

1519

{ "minstruct", FALSE, etINT, {&minstruct},

1520

"Minimum number of structures in cluster for coloring in the [REF].xpm[ref] file" },

1521

{ "binary", FALSE, etBOOL, {&bBinary},

1522

"Treat the RMSD matrix as consisting of 0 and 1, where the cutoff "

1523

"is given by [TT]cutoff[tt]" },

1524

{ "M", FALSE, etINT, {&M},

1525

"Number of nearest neighbors considered for JarvisPatrick algorithm, "

1526

"0 is use cutoff" },

1527

{ "P", FALSE, etINT, {&P},

1528

"Number of identical nearest neighbors required to form a cluster" },

1529

{ "seed", FALSE, etINT, {&seed},

1530

"Random number seed for Monte Carlo clustering algorithm (0 means generate)" },

1531

{ "niter", FALSE, etINT, {&niter},

1532

"Number of iterations for MC" },

1533

{ "nrandom", FALSE, etINT, {&nrandom},

1534

"The first iterations for MC may be done complete random, to shuffle the frames" },

1535

{ "kT", FALSE, etREAL, {&kT},

1536

"Boltzmann weighting factor for Monte Carlo optimization "

1537

"(zero turns off uphill steps)" },

1538

{ "pbc", FALSE, etBOOL,

1539

{ &bPBC }, "PBC check" }

1540

};

1541

t_filenm fnm[] = {

1542

{ efTRX, "f", NULL, ffOPTRD },

1543

{ efTPS, "s", NULL, ffOPTRD },

1544

{ efNDX, NULL, NULL, ffOPTRD },

1545

{ efXPM, "dm", "rmsd", ffOPTRD },

1546

{ efXPM, "om", "rmsdraw", ffWRITE },

1547

{ efXPM, "o", "rmsdclust", ffWRITE },

1548

{ efLOG, "g", "cluster", ffWRITE },

1549

{ efXVG, "dist", "rmsddist", ffOPTWR },

1550

{ efXVG, "ev", "rmsdeig", ffOPTWR },

1551

{ efXVG, "conv", "mcconv", ffOPTWR },

1552

{ efXVG, "sz", "clustsize", ffOPTWR},

1553

{ efXPM, "tr", "clusttrans", ffOPTWR},

1554

{ efXVG, "ntr", "clusttrans", ffOPTWR},

1555

{ efXVG, "clid", "clustid", ffOPTWR},

1556

{ efTRX, "cl", "clusters.pdb", ffOPTWR },

1557

{ efNDX, "clndx","clusters.ndx", ffOPTWR }

1558

};

1559

#define NFILE asize(fnm)

1560


1561

if (!parse_common_args(&argc, argv,

1562

PCA_CAN_VIEW  PCA_CAN_TIME  PCA_TIME_UNIT,

1563

NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL,

1564

&oenv))

1565

{

1566

return 0;

1567

}

1568


1569


1570

bReadMat = opt2bSet("dm", NFILE, fnm);

1571

bReadTraj = opt2bSet("f", NFILE, fnm)  !bReadMat;

1572

if (opt2parg_bSet("av", asize(pa), pa) 

1573

opt2parg_bSet("wcl", asize(pa), pa) 

1574

opt2parg_bSet("nst", asize(pa), pa) 

1575

opt2parg_bSet("rmsmin", asize(pa), pa) 

1576

opt2bSet("cl", NFILE, fnm) )

1577

{

1578

trx_out_fn = opt2fn("cl", NFILE, fnm);

1579

}

1580

else

1581

{

1582

trx_out_fn = NULL;

1583

}

1584

if (bReadMat && output_env_get_time_factor(oenv) != 1)

1585

{

1586

fprintf(stderr,

1587

"\nWarning: assuming the time unit in %s is %s\n",

1588

opt2fn("dm", NFILE, fnm), output_env_get_time_unit(oenv));

1589

}

1590

if (trx_out_fn && !bReadTraj)

1591

{

1592

fprintf(stderr, "\nWarning: "

1593

"cannot write cluster structures without reading trajectory\n"

1594

" ignoring option cl %s\n", trx_out_fn);

1595

}

1596


1597

method = 1;

1598

while (method < m_nr && gmx_strcasecmp(methodname[0], methodname[method]) != 0)

1599

{

1600

method++;

1601

}

1602

if (method == m_nr)

1603

{

1604

gmx_fatal(FARGS, "Invalid method");

1605

}

1606


1607

bAnalyze = (method == m_linkage  method == m_jarvis_patrick 

1608

method == m_gromos );

1609


1610


1611

log = ftp2FILE(efLOG, NFILE, fnm, "w");

1612


1613

fprintf(stderr, "Using %s method for clustering\n", methodname[0]);

1614

fprintf(log, "Using %s method for clustering\n", methodname[0]);

1615


1616


1617

bUseRmsdCut = FALSE;

1618

if (method == m_jarvis_patrick)

1619

{

1620

bJP_RMSD = (M == 0)  opt2parg_bSet("cutoff", asize(pa), pa);

1621

if ((M < 0)  (M == 1))

1622

{

1623

gmx_fatal(FARGS, "M (%d) must be 0 or larger than 1", M);

1624

}

1625

if (M < 2)

1626

{

1627

sprintf(buf1, "Will use P=%d and RMSD cutoff (%g)", P, rmsdcut);

1628

bUseRmsdCut = TRUE;

1629

}

1630

else

1631

{

1632

if (P >= M)

1633

{

1634

gmx_fatal(FARGS, "Number of neighbors required (P) must be less than M");

1635

}

1636

if (bJP_RMSD)

1637

{

1638

sprintf(buf1, "Will use P=%d, M=%d and RMSD cutoff (%g)", P, M, rmsdcut);

1639

bUseRmsdCut = TRUE;

1640

}

1641

else

1642

{

1643

sprintf(buf1, "Will use P=%d, M=%d", P, M);

1644

}

1645

}

1646

ffprintf_s(stderr, log, buf, "%s for determining the neighbors\n\n", buf1);

1647

}

1648

else

1649

{

1650

bUseRmsdCut = ( bBinary  method == m_linkage  method == m_gromos );

1651

}

1652

if (bUseRmsdCut && method != m_jarvis_patrick)

1653

{

1654

fprintf(log, "Using RMSD cutoff %g nm\n", rmsdcut);

1655

}

1656

if (method == m_monte_carlo)

1657

{

1658

fprintf(log, "Using %d iterations\n", niter);

1659

}

1660


1661

if (skip < 1)

1662

{

1663

gmx_fatal(FARGS, "skip (%d) should be >= 1", skip);

1664

}

1665


1666


1667

if (bReadTraj)

1668

{

1669


1670

read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xtps, NULL, box,

1671

TRUE);

1672

if (bPBC)

1673

{

1674

gpbc = gmx_rmpbc_init(&top.idef, ePBC, top.atoms.nr);

1675

}

1676


1677

fprintf(stderr, "\nSelect group for least squares fit%s:\n",

1678

bReadMat ? "" : " and RMSD calculation");

1679

get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),

1680

1, &ifsize, &fitidx, &grpname);

1681

if (trx_out_fn)

1682

{

1683

fprintf(stderr, "\nSelect group for output:\n");

1684

get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),

1685

1, &iosize, &outidx, &grpname);

1686


1687


1688

snew(index, iosize);

1689

isize = iosize;

1690

for (i = 0; i < iosize; i++)

1691

{

1692

index[i] = outidx[i];

1693

outidx[i] = i;

1694

}

1695


1696


1697

for (i = 0; i < ifsize; i++)

1698

{

1699

j = 0;

1700

while (j < isize && index[j] != fitidx[i])

1701

{

1702

j++;

1703

}

1704

if (j >= isize)

1705

{

1706


1707

isize++;

1708

srenew(index, isize);

1709

}

1710

index[j] = fitidx[i];

1711

fitidx[i] = j;

1712

}

1713

}

1714

else

1715

{

1716

isize = ifsize;

1717

snew(index, isize);

1718

for (i = 0; i < ifsize; i++)

1719

{

1720

index[i] = fitidx[i];

1721

fitidx[i] = i;

1722

}

1723

}

1724

}

1725


1726

if (bReadTraj)

1727

{

1728


1729

fn = opt2fn("f", NFILE, fnm);

1730


1731

xx = read_whole_trj(fn, isize, index, skip, &nf, &time, &boxes, &frameindexes, oenv, bPBC, gpbc);

1732

output_env_conv_times(oenv, nf, time);

1733

if (!bRMSdist  bAnalyze)

1734

{

1735


1736

snew(mass, isize);

1737

for (i = 0; i < ifsize; i++)

1738

{

1739

mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;

1740

}

1741

if (bFit)

1742

{

1743

for (i = 0; i < nf; i++)

1744

{

1745

reset_x(ifsize, fitidx, isize, NULL, xx[i], mass);

1746

}

1747

}

1748

}

1749

if (bPBC)

1750

{

1751

gmx_rmpbc_done(gpbc);

1752

}

1753

}

1754


1755

if (bReadMat)

1756

{

1757

fprintf(stderr, "Reading rms distance matrix ");

1758

read_xpm_matrix(opt2fn("dm", NFILE, fnm), &readmat);

1759

fprintf(stderr, "\n");

1760

if (readmat[0].nx != readmat[0].ny)

1761

{

1762

gmx_fatal(FARGS, "Matrix (%dx%d) is not square",

1763

readmat[0].nx, readmat[0].ny);

1764

}

1765

if (bReadTraj && bAnalyze && (readmat[0].nx != nf))

1766

{

1767

gmx_fatal(FARGS, "Matrix size (%dx%d) does not match the number of "

1768

"frames (%d)", readmat[0].nx, readmat[0].ny, nf);

1769

}

1770


1771

nf = readmat[0].nx;

1772

sfree(time);

1773

time = readmat[0].axis_x;

1774

time_invfac = output_env_get_time_invfactor(oenv);

1775

for (i = 0; i < nf; i++)

1776

{

1777

time[i] *= time_invfac;

1778

}

1779


1780

rms = init_mat(readmat[0].nx, method == m_diagonalize);

1781

convert_mat(&(readmat[0]), rms);

1782


1783

nlevels = readmat[0].nmap;

1784

}

1785

else

1786

{

1787

rms = init_mat(nf, method == m_diagonalize);

1788

nrms = (static_cast<gmx_int64_t>(nf)*static_cast<gmx_int64_t>(nf1))/2;

1789

if (!bRMSdist)

1790

{

1791

fprintf(stderr, "Computing %dx%d RMS deviation matrix\n", nf, nf);

1792


1793

snew(x1, isize);

1794

for (i1 = 0; i1 < nf; i1++)

1795

{

1796

for (i2 = i1+1; i2 < nf; i2++)

1797

{

1798

for (i = 0; i < isize; i++)

1799

{

1800

copy_rvec(xx[i1][i], x1[i]);

1801

}

1802

if (bFit)

1803

{

1804

do_fit(isize, mass, xx[i2], x1);

1805

}

1806

rmsd = rmsdev(isize, mass, xx[i2], x1);

1807

set_mat_entry(rms, i1, i2, rmsd);

1808

}

1809

nrms = nfi11;

1810

fprintf(stderr, "\r# RMSD calculations left: " "%" GMX_PRId64 " ", nrms);

1811

fflush(stderr);

1812

}

1813

sfree(x1);

1814

}

1815

else

1816

{

1817

fprintf(stderr, "Computing %dx%d RMS distance deviation matrix\n", nf, nf);

1818


1819


1820

snew(d1, isize);

1821

snew(d2, isize);

1822

for (i = 0; (i < isize); i++)

1823

{

1824

snew(d1[i], isize);

1825

snew(d2[i], isize);

1826

}

1827

for (i1 = 0; i1 < nf; i1++)

1828

{

1829

calc_dist(isize, xx[i1], d1);

1830

for (i2 = i1+1; (i2 < nf); i2++)

1831

{

1832

calc_dist(isize, xx[i2], d2);

1833

set_mat_entry(rms, i1, i2, rms_dist(isize, d1, d2));

1834

}

1835

nrms = nfi11;

1836

fprintf(stderr, "\r# RMSD calculations left: " "%" GMX_PRId64 " ", nrms);

1837

fflush(stderr);

1838

}

1839


1840

for (i = 0; (i < isize); i++)

1841

{

1842

sfree(d1[i]);

1843

sfree(d2[i]);

1844

}

1845

sfree(d1);

1846

sfree(d2);

1847

}

1848

fprintf(stderr, "\n\n");

1849

}

1850

ffprintf_gg(stderr, log, buf, "The RMSD ranges from %g to %g nm\n",

1851

rms>minrms, rms>maxrms);

1852

ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2*rms>sumrms/(nf*(nf1)));

1853

ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);

1854

ffprintf_g(stderr, log, buf, "Energy of the matrix is %g.\n", mat_energy(rms));

1855

if (bUseRmsdCut && (rmsdcut < rms>minrms  rmsdcut > rms>maxrms) )

1856

{

1857

fprintf(stderr, "WARNING: rmsd cutoff %g is outside range of rmsd values "

1858

"%g to %g\n", rmsdcut, rms>minrms, rms>maxrms);

1859

}

1860

if (bAnalyze && (rmsmin < rms>minrms) )

1861

{

1862

fprintf(stderr, "WARNING: rmsd minimum %g is below lowest rmsd value %g\n",

1863

rmsmin, rms>minrms);

1864

}

1865

if (bAnalyze && (rmsmin > rmsdcut) )

1866

{

1867

fprintf(stderr, "WARNING: rmsd minimum %g is above rmsd cutoff %g\n",

1868

rmsmin, rmsdcut);

1869

}

1870


1871


1872

rmsd_distribution(opt2fn("dist", NFILE, fnm), rms, oenv);

1873


1874

if (bBinary)

1875

{

1876

for (i1 = 0; (i1 < nf); i1++)

1877

{

1878

for (i2 = 0; (i2 < nf); i2++)

1879

{

1880

if (rms>mat[i1][i2] < rmsdcut)

1881

{

1882

rms>mat[i1][i2] = 0;

1883

}

1884

else

1885

{

1886

rms>mat[i1][i2] = 1;

1887

}

1888

}

1889

}

1890

}

1891


1892

snew(clust.cl, nf);

1893

switch (method)

1894

{

1895

case m_linkage:

1896


1897

gather(rms, rmsdcut, &clust);

1898

break;

1899

case m_diagonalize:

1900


1901

snew(eigenvalues, nf);

1902

snew(eigenvectors, nf*nf);

1903

std::memcpy(eigenvectors, rms>mat[0], nf*nf*sizeof(real));

1904

eigensolver(eigenvectors, nf, 0, nf, eigenvalues, rms>mat[0]);

1905

sfree(eigenvectors);

1906


1907

fp = xvgropen(opt2fn("ev", NFILE, fnm), "RMSD matrix Eigenvalues",

1908

"Eigenvector index", "Eigenvalues (nm\\S2\\N)", oenv);

1909

for (i = 0; (i < nf); i++)

1910

{

1911

fprintf(fp, "%10d %10g\n", i, eigenvalues[i]);

1912

}

1913

xvgrclose(fp);

1914

break;

1915

case m_monte_carlo:

1916

orig = init_mat(rms>nn, FALSE);

1917

orig>nn = rms>nn;

1918

copy_t_mat(orig, rms);

1919

mc_optimize(log, rms, time, niter, nrandom, seed, kT,

1920

opt2fn_null("conv", NFILE, fnm), oenv);

1921

break;

1922

case m_jarvis_patrick:

1923

jarvis_patrick(rms>nn, rms>mat, M, P, bJP_RMSD ? rmsdcut : 1, &clust);

1924

break;

1925

case m_gromos:

1926

gromos(rms>nn, rms>mat, rmsdcut, &clust);

1927

break;

1928

default:

1929

gmx_fatal(FARGS, "DEATH HORROR unknown method \"%s\"", methodname[0]);

1930

}

1931


1932

if (method == m_monte_carlo  method == m_diagonalize)

1933

{

1934

fprintf(stderr, "Energy of the matrix after clustering is %g.\n",

1935

mat_energy(rms));

1936

}

1937


1938

if (bAnalyze)

1939

{

1940

if (minstruct > 1)

1941

{

1942

ncluster = plot_clusters(nf, rms>mat, &clust, minstruct);

1943

}

1944

else

1945

{

1946

mark_clusters(nf, rms>mat, rms>maxrms, &clust);

1947

}

1948

init_t_atoms(&useatoms, isize, FALSE);

1949

snew(usextps, isize);

1950

useatoms.resinfo = top.atoms.resinfo;

1951

for (i = 0; i < isize; i++)

1952

{

1953

useatoms.atomname[i] = top.atoms.atomname[index[i]];

1954

useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;

1955

useatoms.nres = std::max(useatoms.nres, useatoms.atom[i].resind+1);

1956

copy_rvec(xtps[index[i]], usextps[i]);

1957

}

1958

useatoms.nr = isize;

1959

analyze_clusters(nf, &clust, rms>mat, isize, &useatoms, usextps, mass, xx, time, boxes, frameindexes,

1960

ifsize, fitidx, iosize, outidx,

1961

bReadTraj ? trx_out_fn : NULL,

1962

opt2fn_null("sz", NFILE, fnm),

1963

opt2fn_null("tr", NFILE, fnm),

1964

opt2fn_null("ntr", NFILE, fnm),

1965

opt2fn_null("clid", NFILE, fnm),

1966

opt2fn_null("clndx", NFILE, fnm),

1967

bAverage, write_ncl, write_nst, rmsmin, bFit, log,

1968

rlo_bot, rhi_bot, oenv);

1969

sfree(boxes);

1970

sfree(frameindexes);

1971

}

1972

gmx_ffclose(log);

1973


1974

if (bBinary && !bAnalyze)

1975

{

1976


1977

for (i2 = 0; (i2 < nf); i2++)

1978

{

1979

for (i1 = i2+1; (i1 < nf); i1++)

1980

{

1981

if (rms>mat[i1][i2])

1982

{

1983

rms>mat[i1][i2] = rms>maxrms;

1984

}

1985

}

1986

}

1987

}

1988


1989

fp = opt2FILE("o", NFILE, fnm, "w");

1990

fprintf(stderr, "Writing rms distance/clustering matrix ");

1991

if (bReadMat)

1992

{

1993

write_xpm(fp, 0, readmat[0].title, readmat[0].legend, readmat[0].label_x,

1994

readmat[0].label_y, nf, nf, readmat[0].axis_x, readmat[0].axis_y,

1995

rms>mat, 0.0, rms>maxrms, rlo_top, rhi_top, &nlevels);

1996

}

1997

else

1998

{

1999

sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));

2000

sprintf(title, "RMS%sDeviation / Cluster Index",

2001

bRMSdist ? " Distance " : " ");

2002

if (minstruct > 1)

2003

{

2004

write_xpm_split(fp, 0, title, "RMSD (nm)", buf, buf,

2005

nf, nf, time, time, rms>mat, 0.0, rms>maxrms, &nlevels,

2006

rlo_top, rhi_top, 0.0, ncluster,

2007

&ncluster, TRUE, rlo_bot, rhi_bot);

2008

}

2009

else

2010

{

2011

write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,

2012

nf, nf, time, time, rms>mat, 0.0, rms>maxrms,

2013

rlo_top, rhi_top, &nlevels);

2014

}

2015

}

2016

fprintf(stderr, "\n");

2017

gmx_ffclose(fp);

2018

if (NULL != orig)

2019

{

2020

fp = opt2FILE("om", NFILE, fnm, "w");

2021

sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));

2022

sprintf(title, "RMS%sDeviation", bRMSdist ? " Distance " : " ");

2023

write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,

2024

nf, nf, time, time, orig>mat, 0.0, orig>maxrms,

2025

rlo_top, rhi_top, &nlevels);

2026

gmx_ffclose(fp);

2027

done_mat(&orig);

2028

sfree(orig);

2029

}

2030


2031

do_view(oenv, opt2fn("o", NFILE, fnm), "nxy");

2032

do_view(oenv, opt2fn_null("sz", NFILE, fnm), "nxy");

2033

if (method == m_diagonalize)

2034

{

2035

do_view(oenv, opt2fn_null("ev", NFILE, fnm), "nxy");

2036

}

2037

do_view(oenv, opt2fn("dist", NFILE, fnm), "nxy");

2038

if (bAnalyze)

2039

{

2040

do_view(oenv, opt2fn_null("tr", NFILE, fnm), "nxy");

2041

do_view(oenv, opt2fn_null("ntr", NFILE, fnm), "nxy");

2042

do_view(oenv, opt2fn_null("clid", NFILE, fnm), "nxy");

2043

}

2044

do_view(oenv, opt2fn_null("conv", NFILE, fnm), NULL);

2045


2046

return 0;

2047

}
