Project

General

Profile

gmx_fft_mkl.c

fixed wrong datatype - Hirokazu Kobayashi, 05/14/2008 12:20 PM

 
1
/*
2
 * $Id: gmx_fft_mkl.c,v 1.1.2.5 2008/02/29 07:02:51 spoel Exp $
3
 * 
4
 *                This source code is part of
5
 * 
6
 *                 G   R   O   M   A   C   S
7
 * 
8
 *          GROningen MAchine for Chemical Simulations
9
 * 
10
 *                        VERSION 3.3.3
11
 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12
 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13
 * Copyright (c) 2001-2008, The GROMACS development team,
14
 * check out http://www.gromacs.org for more information.
15

16
 * This program is free software; you can redistribute it and/or
17
 * modify it under the terms of the GNU General Public License
18
 * as published by the Free Software Foundation; either version 2
19
 * of the License, or (at your option) any later version.
20
 * 
21
 * If you want to redistribute modifications, please consider that
22
 * scientific software is very special. Version control is crucial -
23
 * bugs must be traceable. We will be happy to consider code for
24
 * inclusion in the official distribution, but derived work must not
25
 * be called official GROMACS. Details are found in the README & COPYING
26
 * files - if they are missing, get the official version at www.gromacs.org.
27
 * 
28
 * To help us fund GROMACS development, we humbly ask that you cite
29
 * the papers on the package - you can find them in the top README file.
30
 * 
31
 * For more info, check our website at http://www.gromacs.org
32
 * 
33
 * And Hey:
34
 * Groningen Machine for Chemical Simulation
35
 */
36
#ifdef HAVE_CONFIG_H
37
#include <config.h>
38
#endif
39

    
40
#include <errno.h>
41
#include <stdlib.h>
42

    
43
#include <mkl_dfti.h>
44

    
45
#include "gmx_fft.h"
46
#include "gmx_fatal.h"
47

    
48
/* 
49
 * For MKL version (<10.0), we should define MKL_LONG.
50
 */
51
#ifndef MKL_LONG
52
#define MKL_LONG long int
53
#endif
54

    
55
#ifdef GMX_DOUBLE
56
#define GMX_DFTI_PREC  DFTI_DOUBLE
57
#else
58
#define GMX_DFTI_PREC  DFTI_SINGLE
59
#endif
60

    
61
/* Contents of the Intel MKL FFT fft datatype.
62
 * 
63
 * Note that this is one of several possible implementations of gmx_fft_t.
64
 *
65
 *  The MKL _API_ supports 1D,2D, and 3D transforms, including real-to-complex.
66
 *  Unfortunately the actual library implementation does not support 3D real
67
 *  transforms as of version 7.2, and versions before 7.0 don't support 2D real
68
 *  either. In addition, the multi-dimensional storage format for real data
69
 *  is not compatible with our padding.
70
 *
71
 *  To work around this we roll our own 2D and 3D real-to-complex transforms,
72
 *  using separate X/Y/Z handles defined to perform (ny*nz), (nx*nz), and
73
 *  (nx*ny) transforms at once when necessary. To perform strided multiple
74
 *  transforms out-of-place (i.e., without padding in the last dimension)
75
 *  on the fly we also need to separate the forward and backward
76
 *  handles for real-to-complex/complex-to-real data permutation.
77
 * 
78
 *  This makes it necessary to define 3 handles for in-place FFTs, and 4 for
79
 *  the out-of-place transforms. Still, whenever possible we try to use 
80
 *  a single 3D-transform handle instead.
81
 *
82
 *  So, the handles are enumerated as follows:
83
 *  
84
 *  1D FFT (real too):    Index 0 is the handle for the entire FFT
85
 *  2D complex FFT:       Index 0 is the handle for the entire FFT
86
 *  3D complex FFT:       Index 0 is the handle for the entire FFT
87
 *  2D, inplace real FFT: 0=FFTx, 1=FFTy handle
88
 *  2D, ooplace real FFT: 0=FFTx, 1=real-to-complex FFTy, 2=complex-to-real FFTy                      
89
 *  3D, inplace real FFT: 0=FFTx, 1=FFTy, 2=FFTz handle
90
 *  3D, ooplace real FFT: 0=FFTx, 1=FFTy, 2=r2c FFTz, 3=c2r FFTz                      
91
 *
92
 *  Intel people reading this: Learn from FFTW what a good interface looks like :-)
93
 *
94
 */       
95
struct gmx_fft
96
{
97
    int                ndim;              /**< Number of dimensions in FFT  */
98
    int                nx;                /**< Length of X transform        */
99
    int                ny;                /**< Length of Y transform        */
100
    int                nz;                /**< Length of Z transform        */
101
    int                real_fft;          /**< 1 if real FFT, otherwise 0   */
102
    DFTI_DESCRIPTOR *  inplace[3];        /**< in-place FFT                 */
103
    DFTI_DESCRIPTOR *  ooplace[4];        /**< out-of-place FFT             */
104
    t_complex *    work;                  /**< Enable out-of-place c2r FFT  */
105
};
106

    
107

    
108
int
109
gmx_fft_init_1d(gmx_fft_t *   pfft,
110
                int           nx) 
111
{
112
    gmx_fft_t      fft;
113
    int            d;
114
    int            status;
115
    
116
    if(pfft==NULL)
117
    {
118
        gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
119
        return EINVAL;
120
    }
121
    *pfft = NULL;
122
    
123
    if( (fft = malloc(sizeof(struct gmx_fft))) == NULL)
124
    {
125
        return ENOMEM;
126
    }    
127

    
128
    /* Mark all handles invalid */
129
    for(d=0;d<3;d++)
130
    {
131
        fft->inplace[d] = fft->ooplace[d] = NULL;
132
    }
133
    fft->ooplace[3] = NULL;
134

    
135
    
136
    status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
137

    
138
    if( status == 0 )
139
        status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
140
    
141
    if( status == 0 )
142
        status = DftiCommitDescriptor(fft->inplace[0]);
143
    
144
    
145
    if( status == 0 )    
146
        status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
147
    
148
    if( status == 0)
149
        DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
150
    
151
    if( status == 0)
152
        DftiCommitDescriptor(fft->ooplace[0]);
153
    
154
    
155
    if( status != 0 )
156
    {
157
        gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
158
        gmx_fft_destroy(fft);
159
        return status;
160
    }
161
    
162
    fft->ndim     = 1;
163
    fft->nx       = nx;
164
    fft->real_fft = 0;
165
    fft->work     = NULL;
166
        
167
    *pfft = fft;
168
    return 0;
169
}
170

    
171

    
172

    
173
int
174
gmx_fft_init_1d_real(gmx_fft_t *    pfft,
175
                     int            nx) 
176
{
177
    gmx_fft_t      fft;
178
    int            d;
179
    int            status;
180
    
181
    if(pfft==NULL)
182
    {
183
        gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
184
        return EINVAL;
185
    }
186
    *pfft = NULL;
187

    
188
    if( (fft = malloc(sizeof(struct gmx_fft))) == NULL)
189
    {
190
        return ENOMEM;
191
    }    
192
    
193
    /* Mark all handles invalid */
194
    for(d=0;d<3;d++)
195
    {
196
        fft->inplace[d] = fft->ooplace[d] = NULL;
197
    }
198
    fft->ooplace[3] = NULL;
199
    
200
    status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nx);
201

    
202
    if( status == 0 )
203
        status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
204

    
205
    if( status == 0 )
206
        status = DftiCommitDescriptor(fft->inplace[0]);
207
    
208

    
209
    if( status == 0 )
210
        status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nx);
211
    
212
    if( status == 0 )
213
        status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
214

    
215
    if( status == 0 )
216
        status = DftiCommitDescriptor(fft->ooplace[0]);
217

    
218
    
219
    if(status == DFTI_UNIMPLEMENTED)
220
    {
221
        gmx_fatal(FARGS,
222
                  "The linked Intel MKL version (<6.0?) cannot do real FFTs.");
223
        gmx_fft_destroy(fft);
224
        return status;
225
    }
226

    
227
    
228
    if( status != 0 )
229
    {
230
        gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
231
        gmx_fft_destroy(fft);
232
        return status;
233
    }
234
    
235
    fft->ndim     = 1;
236
    fft->nx       = nx;
237
    fft->real_fft = 1;
238
    fft->work     = NULL;
239

    
240
    *pfft = fft;
241
    return 0;
242
}
243

    
244

    
245
            
246
int
247
gmx_fft_init_2d(gmx_fft_t *   pfft,
248
                int           nx, 
249
                int           ny) 
250
{
251
    gmx_fft_t      fft;
252
    int            d;
253
    int            status;
254
    MKL_LONG       length[2];
255
    
256
    if(pfft==NULL)
257
    {
258
        gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
259
        return EINVAL;
260
    }
261
    *pfft = NULL;
262
    
263
    if( (fft = malloc(sizeof(struct gmx_fft))) == NULL)
264
    {
265
        return ENOMEM;
266
    }    
267
    
268
    /* Mark all handles invalid */
269
    for(d=0;d<3;d++)
270
    {
271
        fft->inplace[d] = fft->ooplace[d] = NULL;
272
    }
273
    fft->ooplace[3] = NULL;
274
    
275
    length[0] = (MKL_LONG)nx;
276
    length[1] = (MKL_LONG)ny;
277
    
278
    status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,2,length);
279
    
280
    if( status == 0 )
281
        status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
282

    
283
    if( status == 0 )
284
        status = DftiCommitDescriptor(fft->inplace[0]);
285

    
286
    
287
    if( status == 0 )
288
        status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,2,length);
289
    
290
    if( status == 0 )
291
        status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
292

    
293
    if( status == 0 )
294
        status = DftiCommitDescriptor(fft->ooplace[0]);
295

    
296
    
297
    if( status != 0 )
298
    {
299
        gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
300
        gmx_fft_destroy(fft);
301
        return status;
302
    }
303
    
304
    fft->ndim     = 2;
305
    fft->nx       = nx;
306
    fft->ny       = ny;
307
    fft->real_fft = 0;
308
    fft->work     = NULL;
309

    
310
    *pfft = fft;
311
    return 0;
312
}
313

    
314

    
315

    
316
int 
317
gmx_fft_init_2d_real(gmx_fft_t *   pfft,
318
                     int           nx, 
319
                     int           ny) 
320
{
321
    gmx_fft_t      fft;
322
    int            d;
323
    int            status;
324
    MKL_LONG       stride[2];
325
    MKL_LONG       nyc;
326
    
327
    if(pfft==NULL)
328
    {
329
        gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
330
        return EINVAL;
331
    }
332
    *pfft = NULL;
333

    
334
    if( (fft = malloc(sizeof(struct gmx_fft))) == NULL)
335
    {
336
        return ENOMEM;
337
    }    
338
    
339
    nyc = (MKL_LONG)(ny/2 + 1);
340
    
341
    /* Mark all handles invalid */
342
    for(d=0;d<3;d++)
343
    {
344
        fft->inplace[d] = fft->ooplace[d] = NULL;
345
    }
346
    fft->ooplace[3] = NULL;
347
    
348
    /* Roll our own 2D real transform using multiple transforms in MKL,
349
     * since the current MKL versions does not support our storage format,
350
     * and all but the most recent don't even have 2D real FFTs.
351
     */
352
    
353
    /* In-place X FFT */
354
    status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
355
    
356
    if ( status == 0 )
357
    {
358
        stride[0]  = 0;
359
        stride[1]  = nyc;
360
     
361
        status = 
362
            (DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE)    ||
363
             DftiSetValue(fft->inplace[0],DFTI_NUMBER_OF_TRANSFORMS,nyc)  ||
364
             DftiSetValue(fft->inplace[0],DFTI_INPUT_DISTANCE,1)          ||
365
             DftiSetValue(fft->inplace[0],DFTI_INPUT_STRIDES,stride)      ||
366
             DftiSetValue(fft->inplace[0],DFTI_OUTPUT_DISTANCE,1)         ||
367
             DftiSetValue(fft->inplace[0],DFTI_OUTPUT_STRIDES,stride));
368
    }
369
    
370
    if( status == 0 )
371
        status = DftiCommitDescriptor(fft->inplace[0]);
372

    
373
    /* Out-of-place X FFT */
374
    if( status == 0 )
375
        status = DftiCreateDescriptor(&(fft->ooplace[0]),GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
376

    
377
    if( status == 0 )
378
    {
379
        stride[0] = 0;
380
        stride[1] = nyc;
381

    
382
        status =
383
            (DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
384
             DftiSetValue(fft->ooplace[0],DFTI_NUMBER_OF_TRANSFORMS,nyc)   ||
385
             DftiSetValue(fft->ooplace[0],DFTI_INPUT_DISTANCE,1)           ||
386
             DftiSetValue(fft->ooplace[0],DFTI_INPUT_STRIDES,stride)       ||
387
             DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_DISTANCE,1)          ||
388
             DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_STRIDES,stride));
389
    }
390

    
391
    if( status == 0 )
392
        status = DftiCommitDescriptor(fft->ooplace[0]);
393

    
394
   
395
    /* In-place Y FFT  */
396
    if( status == 0 )
397
        status = DftiCreateDescriptor(&fft->inplace[1],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
398
    
399
    if( status == 0 )
400
    {
401
        stride[0] = 0;
402
        stride[1] = 1;
403
               
404
        status = 
405
            (DftiSetValue(fft->inplace[1],DFTI_PLACEMENT,DFTI_INPLACE)     ||
406
             DftiSetValue(fft->inplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx)    ||
407
             DftiSetValue(fft->inplace[1],DFTI_INPUT_DISTANCE,2*nyc)       ||
408
             DftiSetValue(fft->inplace[1],DFTI_INPUT_STRIDES,stride)       ||
409
             DftiSetValue(fft->inplace[1],DFTI_OUTPUT_DISTANCE,2*nyc)      ||
410
             DftiSetValue(fft->inplace[1],DFTI_OUTPUT_STRIDES,stride)     ||
411
             DftiCommitDescriptor(fft->inplace[1]));
412
    }
413

    
414

    
415
    /* Out-of-place real-to-complex (affects output distance) Y FFT */
416
    if( status == 0 )
417
        status = DftiCreateDescriptor(&fft->ooplace[1],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
418
    
419
    if( status == 0 )
420
    {
421
        stride[0] = 0;
422
        stride[1] = 1;
423
         
424
        status =
425
            (DftiSetValue(fft->ooplace[1],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
426
             DftiSetValue(fft->ooplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx)    ||
427
             DftiSetValue(fft->ooplace[1],DFTI_INPUT_DISTANCE,(MKL_LONG)ny)          ||
428
             DftiSetValue(fft->ooplace[1],DFTI_INPUT_STRIDES,stride)       ||
429
             DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_DISTANCE,2*nyc)      ||
430
             DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_STRIDES,stride)      ||
431
             DftiCommitDescriptor(fft->ooplace[1]));
432
    }
433

    
434

    
435
    /* Out-of-place complex-to-real (affects output distance) Y FFT */
436
    if( status == 0 )
437
        status = DftiCreateDescriptor(&fft->ooplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
438
    
439
    if( status == 0 )
440
    {
441
        stride[0] = 0;
442
        stride[1] = 1;
443
               
444
        status =
445
            (DftiSetValue(fft->ooplace[2],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
446
             DftiSetValue(fft->ooplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx)    ||
447
             DftiSetValue(fft->ooplace[2],DFTI_INPUT_DISTANCE,2*nyc)       ||
448
             DftiSetValue(fft->ooplace[2],DFTI_INPUT_STRIDES,stride)       ||
449
             DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)ny)         ||
450
             DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_STRIDES,stride)      ||
451
             DftiCommitDescriptor(fft->ooplace[2]));
452
    }
453
    
454
    
455
    if ( status == 0 )
456
    {
457
        if ((fft->work = malloc(sizeof(t_complex)*(nx*(ny/2+1)))) == NULL)
458
        {
459
            status = ENOMEM;
460
        }
461
    }
462
    
463
    if( status != 0 )
464
    {
465
        gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
466
        gmx_fft_destroy(fft);
467
        return status;
468
    }
469
    
470
    fft->ndim     = 2;
471
    fft->nx       = nx;
472
    fft->ny       = ny;
473
    fft->real_fft = 1;
474
    
475
    *pfft = fft;
476
    return 0;
477
}
478

    
479

    
480

    
481
int
482
gmx_fft_init_3d(gmx_fft_t *   pfft,
483
                int           nx, 
484
                int           ny,
485
                int           nz) 
486
{
487
    gmx_fft_t      fft;
488
    int            d;
489
    MKL_LONG       length[3];
490
    int            status;
491
    
492
    if(pfft==NULL)
493
    {
494
        gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
495
        return EINVAL;
496
    }
497
    *pfft = NULL;
498

    
499
    if( (fft = malloc(sizeof(struct gmx_fft))) == NULL)
500
    {
501
        return ENOMEM;
502
    }    
503
    
504
    /* Mark all handles invalid */
505
    for(d=0;d<3;d++)
506
    {
507
        fft->inplace[d] = fft->ooplace[d] = NULL;
508
    }
509
    fft->ooplace[3] = NULL;
510
    
511
    length[0] = (MKL_LONG)nx;
512
    length[1] = (MKL_LONG)ny;
513
    length[2] = (MKL_LONG)nz;
514
    
515
    status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,(MKL_LONG)3,length);
516
    
517
    if( status == 0 )
518
        status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
519

    
520
    if( status == 0 )
521
        status = DftiCommitDescriptor(fft->inplace[0]);
522

    
523
    
524
    if( status == 0 )
525
        status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,3,length);
526
    
527
    if( status == 0 )
528
        status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
529

    
530
    if( status == 0 )
531
        status = DftiCommitDescriptor(fft->ooplace[0]);
532

    
533
    
534
    if( status != 0 )
535
    {
536
        gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
537
        gmx_fft_destroy(fft);
538
        return status;
539
    }
540
    
541
    
542
    fft->ndim     = 3;
543
    fft->nx       = nx;
544
    fft->ny       = ny;
545
    fft->nz       = nz;
546
    fft->real_fft = 0;
547
    fft->work     = NULL;
548

    
549
    *pfft = fft;
550
    return 0;
551
} 
552

    
553

    
554

    
555

    
556
int
557
gmx_fft_init_3d_real(gmx_fft_t *   pfft,
558
                     int           nx, 
559
                     int           ny,
560
                     int           nz) 
561
{
562
    gmx_fft_t      fft;
563
    int            d;
564
    int            status;
565
    MKL_LONG       stride[2];
566
    int            nzc;
567
    
568
    if(pfft==NULL)
569
    {
570
        gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
571
        return EINVAL;
572
    }
573
    *pfft = NULL;
574

    
575
    nzc = (nz/2 + 1);
576
    
577
    if( (fft = malloc(sizeof(struct gmx_fft))) == NULL)
578
    {
579
        return ENOMEM;
580
    }    
581
    
582
    /* Mark all handles invalid */
583
    for(d=0;d<3;d++)
584
    {
585
        fft->inplace[d] = fft->ooplace[d] = NULL;
586
    }
587
    fft->ooplace[3] = NULL;
588
    
589
    /* Roll our own 3D real transform using multiple transforms in MKL,
590
     * since the current MKL versions does not support our storage format
591
     * or 3D real transforms.
592
     */
593
    
594
    /* In-place X FFT.
595
     * ny*nzc complex-to-complex transforms, length nx 
596
     * transform distance: 1
597
     * element strides: ny*nzc
598
     */
599
    status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
600
    
601
    if ( status == 0)
602
    {
603
        stride[0] = 0;
604
        stride[1] = (MKL_LONG)ny*nzc;
605
        
606
        status = 
607
        (DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE)      ||
608
         DftiSetValue(fft->inplace[0],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)ny*nzc) ||
609
         DftiSetValue(fft->inplace[0],DFTI_INPUT_DISTANCE,1)            ||
610
         DftiSetValue(fft->inplace[0],DFTI_INPUT_STRIDES,stride)        ||
611
         DftiSetValue(fft->inplace[0],DFTI_OUTPUT_DISTANCE,1)           ||
612
         DftiSetValue(fft->inplace[0],DFTI_OUTPUT_STRIDES,stride)       ||
613
         DftiCommitDescriptor(fft->inplace[0]));
614
    }
615
    
616
    /* Out-of-place X FFT: 
617
     * ny*nzc complex-to-complex transforms, length nx 
618
     * transform distance: 1
619
     * element strides: ny*nzc
620
     */
621
    if( status == 0 )
622
        status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
623
    
624
    if( status == 0 )
625
    {
626
        stride[0] = 0;
627
        stride[1] = (MKL_LONG)ny*nzc;
628
        
629
        status =
630
        (DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE)    ||
631
         DftiSetValue(fft->ooplace[0],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)ny*nzc)   ||
632
         DftiSetValue(fft->ooplace[0],DFTI_INPUT_DISTANCE,1)              ||
633
         DftiSetValue(fft->ooplace[0],DFTI_INPUT_STRIDES,stride)          ||
634
         DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_DISTANCE,1)             ||
635
         DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_STRIDES,stride)         ||
636
         DftiCommitDescriptor(fft->ooplace[0]));
637
    }
638
    
639
    
640
    /* In-place Y FFT.
641
     * We cannot do all NX*NZC transforms at once, so define a handle to do
642
     * NZC transforms, and then execute it NX times.
643
     * nzc complex-to-complex transforms, length ny 
644
     * transform distance: 1
645
     * element strides: nzc
646
     */
647
    if( status == 0 )
648
        status = DftiCreateDescriptor(&fft->inplace[1],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)ny);
649
    
650
    if( status == 0 )
651
    {
652
        stride[0] = 0;
653
        stride[1] = (MKL_LONG)nzc;
654
        
655
        status = 
656
        (DftiSetValue(fft->inplace[1],DFTI_PLACEMENT,DFTI_INPLACE)      ||
657
         DftiSetValue(fft->inplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nzc)    ||
658
         DftiSetValue(fft->inplace[1],DFTI_INPUT_DISTANCE,1)            ||
659
         DftiSetValue(fft->inplace[1],DFTI_INPUT_STRIDES,stride)        ||
660
         DftiSetValue(fft->inplace[1],DFTI_OUTPUT_DISTANCE,1)           ||
661
         DftiSetValue(fft->inplace[1],DFTI_OUTPUT_STRIDES,stride)       ||
662
         DftiCommitDescriptor(fft->inplace[1]));
663
    }
664
    
665
    
666
    /* Out-of-place Y FFT: 
667
     * We cannot do all NX*NZC transforms at once, so define a handle to do
668
     * NZC transforms, and then execute it NX times.
669
     * nzc complex-to-complex transforms, length ny 
670
     * transform distance: 1
671
     * element strides: nzc
672
     */
673
    if( status == 0 )
674
        status = DftiCreateDescriptor(&fft->ooplace[1],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)ny);
675
    
676
    if( status == 0 )
677
    {
678
        stride[0] = 0;
679
        stride[1] = (MKL_LONG)nzc;
680
        
681
        status =
682
        (DftiSetValue(fft->ooplace[1],DFTI_PLACEMENT,DFTI_NOT_INPLACE)  ||
683
         DftiSetValue(fft->ooplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nzc)    ||
684
         DftiSetValue(fft->ooplace[1],DFTI_INPUT_DISTANCE,1)            ||
685
         DftiSetValue(fft->ooplace[1],DFTI_INPUT_STRIDES,stride)        ||
686
         DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_DISTANCE,1)           ||
687
         DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_STRIDES,stride)       ||
688
         DftiCommitDescriptor(fft->ooplace[1]));
689
    }
690
    
691
    /* In-place Z FFT: 
692
     * nx*ny real-to-complex transforms, length nz
693
     * transform distance: nzc*2 -> nzc*2
694
     * element strides: 1
695
     */
696
    if( status == 0 )
697
        status = DftiCreateDescriptor(&fft->inplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
698
    
699
    if( status == 0 )
700
    {
701
        stride[0] = 0;
702
        stride[1] = 1;
703
        
704
        status = 
705
        (DftiSetValue(fft->inplace[2],DFTI_PLACEMENT,DFTI_INPLACE)     ||
706
         DftiSetValue(fft->inplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
707
         DftiSetValue(fft->inplace[2],DFTI_INPUT_DISTANCE,(MKL_LONG)nzc*2)       ||
708
         DftiSetValue(fft->inplace[2],DFTI_INPUT_STRIDES,stride)       ||
709
         DftiSetValue(fft->inplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nzc*2)      ||
710
         DftiSetValue(fft->inplace[2],DFTI_OUTPUT_STRIDES,stride)      ||
711
         DftiCommitDescriptor(fft->inplace[2]));
712
    }
713
    
714
    
715
    /* Out-of-place real-to-complex (affects distance) Z FFT: 
716
     * nx*ny real-to-complex transforms, length nz
717
     * transform distance: nz -> nzc*2
718
     * element STRIDES: 1
719
     */
720
    if( status == 0 )
721
        status = DftiCreateDescriptor(&fft->ooplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
722
    
723
    if( status == 0 )
724
    {
725
        stride[0] = 0;
726
        stride[1] = 1;
727
        
728
        status =
729
        (DftiSetValue(fft->ooplace[2],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
730
         DftiSetValue(fft->ooplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
731
         DftiSetValue(fft->ooplace[2],DFTI_INPUT_DISTANCE,(MKL_LONG)nz)          ||
732
         DftiSetValue(fft->ooplace[2],DFTI_INPUT_STRIDES,stride)       ||
733
         DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nzc*2)      ||
734
         DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_STRIDES,stride)      ||
735
         DftiCommitDescriptor(fft->ooplace[2]));
736
    }
737

    
738
    
739
    /* Out-of-place complex-to-real (affects distance) Z FFT: 
740
     * nx*ny real-to-complex transforms, length nz
741
     * transform distance: nzc*2 -> nz
742
     * element STRIDES: 1
743
     */
744
    if( status == 0 )
745
        status = DftiCreateDescriptor(&fft->ooplace[3],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
746
    
747
    if( status == 0 )
748
    {
749
        stride[0] = 0;
750
        stride[1] = 1;
751
        
752
        status =
753
            (DftiSetValue(fft->ooplace[3],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
754
             DftiSetValue(fft->ooplace[3],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
755
             DftiSetValue(fft->ooplace[3],DFTI_INPUT_DISTANCE,(MKL_LONG)nzc*2)       ||
756
             DftiSetValue(fft->ooplace[3],DFTI_INPUT_STRIDES,stride)       ||
757
             DftiSetValue(fft->ooplace[3],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nz)         ||
758
             DftiSetValue(fft->ooplace[3],DFTI_OUTPUT_STRIDES,stride)      ||
759
             DftiCommitDescriptor(fft->ooplace[3]));
760
    }
761
    
762
    
763
    if ( status == 0 )
764
    {
765
        if ((fft->work = malloc(sizeof(t_complex)*(nx*ny*(nz/2+1)))) == NULL)
766
        {
767
            status = ENOMEM;
768
        }
769
    }
770
    
771
    
772
    if( status != 0 ) 
773
    {
774
        gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
775
        gmx_fft_destroy(fft);
776
        return status;
777
    }
778
    
779
    
780
    fft->ndim     = 3;
781
    fft->nx       = nx;
782
    fft->ny       = ny;
783
    fft->nz       = nz;
784
    fft->real_fft = 1;
785

    
786
    *pfft = fft;
787
    return 0;
788
} 
789

    
790
            
791

    
792

    
793
int 
794
gmx_fft_1d(gmx_fft_t                  fft,
795
           enum gmx_fft_direction     dir,
796
           void *                     in_data,
797
           void *                     out_data)
798
{
799
    int inplace = (in_data == out_data);
800
    int status = 0;
801
    
802
    if( (fft->real_fft == 1) || (fft->ndim != 1) ||
803
        ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
804
    {
805
        gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
806
        return EINVAL;
807
    }    
808
    
809
    if(dir==GMX_FFT_FORWARD)
810
    {
811
        if(inplace)
812
        {
813
            status = DftiComputeForward(fft->inplace[0],in_data);
814
        }
815
        else
816
        {
817
            status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
818
        }
819
    }
820
    else
821
    {
822
        if(inplace)
823
        {
824
            status = DftiComputeBackward(fft->inplace[0],in_data);
825
        }
826
        else
827
        {
828
            status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
829
        }
830
    }
831
    
832
    if( status != 0 )
833
    {
834
        gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
835
        status = -1;
836
    }
837

    
838
    return status;
839
}
840

    
841

    
842

    
843
int 
844
gmx_fft_1d_real(gmx_fft_t                  fft,
845
                enum gmx_fft_direction     dir,
846
                void *                     in_data,
847
                void *                     out_data)
848
{
849
    int inplace = (in_data == out_data);
850
    int status = 0;
851

    
852
    if( (fft->real_fft != 1) || (fft->ndim != 1) ||
853
        ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
854
    {
855
        gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
856
        return EINVAL;
857
    }    
858
    
859
    if(dir==GMX_FFT_REAL_TO_COMPLEX)
860
    {
861
        if(inplace)
862
        {
863
            status = DftiComputeForward(fft->inplace[0],in_data);
864
        }
865
        else
866
        {
867
            status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
868
        }
869
    }
870
    else
871
    {
872
        if(inplace)
873
        {
874
            status = DftiComputeBackward(fft->inplace[0],in_data);
875
        }
876
        else
877
        {
878
            status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
879
        }
880
    }
881
    
882
    if( status != 0 )
883
    {
884
        gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
885
        status = -1;
886
    }
887
    
888
    return status;
889
}
890

    
891

    
892
int 
893
gmx_fft_2d(gmx_fft_t                  fft,
894
           enum gmx_fft_direction     dir,
895
           void *                     in_data,
896
           void *                     out_data)
897
{
898
    int inplace = (in_data == out_data);
899
    int status = 0;
900

    
901
    if( (fft->real_fft == 1) || (fft->ndim != 2) ||
902
        ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
903
    {
904
        gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
905
        return EINVAL;
906
    }    
907
    
908
    if(dir==GMX_FFT_FORWARD)
909
    {
910
        if(inplace)
911
        {
912
            status = DftiComputeForward(fft->inplace[0],in_data);
913
        }
914
        else
915
        {
916
            status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
917
        }
918
    }
919
    else
920
    {
921
        if(inplace)
922
        {
923
            status = DftiComputeBackward(fft->inplace[0],in_data);
924
        }
925
        else
926
        {
927
            status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
928
        }
929
    }
930
    
931
    if( status != 0 )
932
    {
933
        gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
934
        status = -1;
935
    }
936
    
937
    return status;
938
}
939

    
940

    
941
int 
942
gmx_fft_2d_real(gmx_fft_t                  fft,
943
                enum gmx_fft_direction     dir,
944
                void *                     in_data,
945
                void *                     out_data)
946
{
947
    int inplace = (in_data == out_data);
948
    int status = 0;
949
        
950
    if( (fft->real_fft != 1) || (fft->ndim != 2) ||
951
        ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
952
    {
953
        gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
954
        return EINVAL;
955
    }    
956
    
957
    if(dir==GMX_FFT_REAL_TO_COMPLEX)
958
    {
959
        if(inplace)
960
        {
961
            /* real-to-complex in Y dimension, in-place */
962
            status = DftiComputeForward(fft->inplace[1],in_data);
963
            
964
            /* complex-to-complex in X dimension, in-place */
965
            if ( status == 0 )
966
                status = DftiComputeForward(fft->inplace[0],in_data);
967
        }
968
        else
969
        {
970
            /* real-to-complex in Y dimension, in_data to out_data */
971
            status = DftiComputeForward(fft->ooplace[1],in_data,out_data);
972
            
973
            /* complex-to-complex in X dimension, in-place in out_data */
974
            if ( status == 0 )
975
                status = DftiComputeForward(fft->inplace[0],out_data);
976
        }
977
    }
978
    else
979
    {
980
        if(inplace)
981
        {
982
            /* complex-to-complex in X dimension, in-place */
983
            status = DftiComputeBackward(fft->inplace[0],in_data);
984
            
985
            /* complex-to-real in Y dimension, in-place */
986
            if ( status == 0 )
987
                status = DftiComputeBackward(fft->inplace[1],in_data);
988
                        
989
        }
990
        else
991
        {
992
            /* complex-to-complex in X dimension, from in_data to work */
993
            status = DftiComputeBackward(fft->ooplace[0],in_data,fft->work);
994
            
995
            /* complex-to-real in Y dimension, from work to out_data */
996
            if ( status == 0 )
997
                status = DftiComputeBackward(fft->ooplace[1],fft->work,out_data);
998
            
999
        }
1000
    }
1001
    
1002
    if( status != 0 )
1003
    {
1004
        gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
1005
        status = -1;
1006
    }
1007
    
1008
    return status;
1009
}
1010

    
1011

    
1012
int 
1013
gmx_fft_3d(gmx_fft_t                  fft,
1014
           enum gmx_fft_direction     dir,
1015
           void *                     in_data,
1016
           void *                     out_data)
1017
{
1018
    int inplace = (in_data == out_data);
1019
    int status = 0;
1020
    
1021
    if( (fft->real_fft == 1) || (fft->ndim != 3) ||
1022
        ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
1023
    {
1024
        gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
1025
        return EINVAL;
1026
    }    
1027
    
1028
    if(dir==GMX_FFT_FORWARD)
1029
    {
1030
        if(inplace)
1031
        {
1032
            status = DftiComputeForward(fft->inplace[0],in_data);
1033
        }
1034
        else
1035
        {
1036
            status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
1037
        }
1038
    }
1039
    else
1040
    {
1041
        if(inplace)
1042
        {
1043
            status = DftiComputeBackward(fft->inplace[0],in_data);
1044
        }
1045
        else
1046
        {
1047
            status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
1048
        }
1049
    }
1050
    
1051
    if( status != 0 )
1052
    {
1053
        gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
1054
        status = -1;
1055
    }
1056
    
1057
    return status;
1058
}
1059

    
1060

    
1061
int 
1062
gmx_fft_3d_real(gmx_fft_t                  fft,
1063
                enum gmx_fft_direction     dir,
1064
                void *                     in_data,
1065
                void *                     out_data)
1066
{
1067
    int inplace = (in_data == out_data);
1068
    int status = 0;
1069
    int i;
1070
    int nx,ny,nzc;
1071
    
1072
    nx  = fft->nx;
1073
    ny  = fft->ny;
1074
    nzc = fft->nz/2 + 1;
1075
    
1076
    if( (fft->real_fft != 1) || (fft->ndim != 3) ||
1077
        ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
1078
    {
1079
        gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
1080
        return EINVAL;
1081
    }    
1082
    
1083
    if(dir==GMX_FFT_REAL_TO_COMPLEX)
1084
    {
1085
        if(inplace)
1086
        {
1087
            /* real-to-complex in Z dimension, in-place */
1088
            status = DftiComputeForward(fft->inplace[2],in_data);
1089
            
1090
            /* complex-to-complex in Y dimension, in-place */
1091
            for(i=0;i<nx;i++)
1092
            {
1093
                if ( status == 0 )
1094
                    status = DftiComputeForward(fft->inplace[1],(t_complex *)in_data+i*ny*nzc);
1095
            }
1096

    
1097
            /* complex-to-complex in X dimension, in-place */
1098
            if ( status == 0 )
1099
                status = DftiComputeForward(fft->inplace[0],in_data);
1100
        }
1101
        else
1102
        {
1103
            /* real-to-complex in Z dimension, from in_data to out_data */
1104
            status = DftiComputeForward(fft->ooplace[2],in_data,out_data);
1105
            
1106
            /* complex-to-complex in Y dimension, in-place */
1107
            for(i=0;i<nx;i++)
1108
            {
1109
                if ( status == 0 )
1110
                    status = DftiComputeForward(fft->inplace[1],(t_complex *)out_data+i*ny*nzc);
1111
            }
1112
            
1113
            /* complex-to-complex in X dimension, in-place */
1114
            if ( status == 0 )
1115
                status = DftiComputeForward(fft->inplace[0],out_data);
1116
        }
1117
    }
1118
    else
1119
    {
1120
        if(inplace)
1121
        {
1122
            /* complex-to-complex in X dimension, in-place */
1123
            status = DftiComputeBackward(fft->inplace[0],in_data);
1124
            
1125
            /* complex-to-complex in Y dimension, in-place */
1126
            for(i=0;i<nx;i++)
1127
            {
1128
                if ( status == 0 )
1129
                    status = DftiComputeBackward(fft->inplace[1],(t_complex *)in_data+i*ny*nzc);
1130
            }
1131
            
1132
            /* complex-to-real in Z dimension, in-place */
1133
            if ( status == 0 )
1134
                status = DftiComputeBackward(fft->inplace[2],in_data);
1135
        }
1136
        else
1137
        {
1138
            /* complex-to-complex in X dimension, from in_data to work */
1139
            status = DftiComputeBackward(fft->ooplace[0],in_data,fft->work);
1140
            
1141
            /* complex-to-complex in Y dimension, in-place */
1142
            for(i=0;i<nx;i++)
1143
            {
1144
                if ( status == 0 )
1145
                    status = DftiComputeBackward(fft->inplace[1],fft->work+i*ny*nzc);
1146
            }
1147
            
1148
            /* complex-to-real in Z dimension, work to out_data */
1149
            if ( status == 0 )
1150
                status = DftiComputeBackward(fft->ooplace[2],fft->work,out_data);
1151
        }
1152
    }
1153
    
1154
    if( status != 0 )
1155
    {
1156
        gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
1157
        status = -1;
1158
    }
1159
    
1160
    return status;
1161
}
1162

    
1163

    
1164

    
1165
void
1166
gmx_fft_destroy(gmx_fft_t    fft)
1167
{
1168
    int d;
1169
    
1170
    if(fft != NULL)
1171
    {
1172
        for(d=0;d<3;d++)
1173
        {
1174
            if(fft->inplace[d] != NULL)
1175
            {
1176
                DftiFreeDescriptor(&fft->inplace[d]);
1177
            }
1178
            if(fft->ooplace[d] != NULL)
1179
            {
1180
                DftiFreeDescriptor(&fft->ooplace[d]);
1181
            }
1182
        }
1183
        if(fft->ooplace[3] != NULL)
1184
        {
1185
            DftiFreeDescriptor(&fft->ooplace[3]);
1186
        }
1187
        free(fft);
1188
    }
1189
}