fft_internal.h 8.68 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/***************************************************************************
 *   Copyright (C) 2012 by Oliver Bock,Heinz-Bernd Eggenstein              *
 *   oliver.bock[AT]aei.mpg.de                                             *
 *   heinz-bernd.eggenstein[AT]aei.mpg.de                                  *
 *                                                                         *
 *   This file is part of libclfft (originally for Einstein@Home)          *
 *   Derived from clFFT,  (C) Apple, see notice below.                     *
 *                                                                         *
 *                                                                         *
 *   libclfft  is distributed in the hope that it will be useful,          *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See              *
 *   notice below for more details.                                        *
 *                                                                         *
 ***************************************************************************/
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
//
// File:       fft_internal.h
//
// Version:    <1.0>
//
// Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Inc. ("Apple")
//             in consideration of your agreement to the following terms, and your use,
//             installation, modification or redistribution of this Apple software
//             constitutes acceptance of these terms.  If you do not agree with these
//             terms, please do not use, install, modify or redistribute this Apple
//             software.
//
//             In consideration of your agreement to abide by the following terms, and
//             subject to these terms, Apple grants you a personal, non - exclusive
//             license, under Apple's copyrights in this original Apple software ( the
//             "Apple Software" ), to use, reproduce, modify and redistribute the Apple
//             Software, with or without modifications, in source and / or binary forms;
//             provided that if you redistribute the Apple Software in its entirety and
//             without modifications, you must retain this notice and the following text
//             and disclaimers in all such redistributions of the Apple Software. Neither
//             the name, trademarks, service marks or logos of Apple Inc. may be used to
//             endorse or promote products derived from the Apple Software without specific
//             prior written permission from Apple.  Except as expressly stated in this
//             notice, no other rights or licenses, express or implied, are granted by
//             Apple herein, including but not limited to any patent rights that may be
//             infringed by your derivative works or by other works in which the Apple
//             Software may be incorporated.
//
//             The Apple Software is provided by Apple on an "AS IS" basis.  APPLE MAKES NO
//             WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
//             WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
//             PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
//             ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
//
//             IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
//             CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
//             SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
//             INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
//             AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
//             UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
//             OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
//
////////////////////////////////////////////////////////////////////////////////////////////////////


#ifndef __CLFFT_INTERNAL_H
#define __CLFFT_INTERNAL_H

#include "clFFT.h"
#include <iostream>
#include <string>
#include <sstream>

using namespace std;

typedef enum kernel_dir_t
{
Oliver Bock's avatar
Oliver Bock committed
75
76
77
    cl_fft_kernel_x,
    cl_fft_kernel_y,
    cl_fft_kernel_z
78
79
80
81
}cl_fft_kernel_dir;

typedef struct kernel_info_t
{
Oliver Bock's avatar
Oliver Bock committed
82
83
84
85
    cl_kernel kernel;
    char *kernel_name;
    size_t lmem_size;
    size_t num_workgroups;
86
    size_t num_xforms_per_workgroup;
Oliver Bock's avatar
Oliver Bock committed
87
88
89
90
    size_t num_workitems_per_workgroup;
    cl_fft_kernel_dir dir;
    int in_place_possible;
    kernel_info_t *next;
91
92
}cl_fft_kernel_info;

Oliver Bock's avatar
Oliver Bock committed
93
typedef struct
94
{
Oliver Bock's avatar
Oliver Bock committed
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    // context in which fft resources are created and kernels are executed
    cl_context              context;

    // size of signal
    clFFT_Dim3              n;

    // dimension of transform ... must be either 1D, 2D or 3D
    clFFT_Dimension         dim;

    // data format ... must be either interleaved or plannar
    clFFT_DataFormat        format;

    // string containing kernel source. Generated at runtime based on
    // n, dim, format and other parameters
    string                  *kernel_string;

111
    // CL program containing source and kernel for this particular
Oliver Bock's avatar
Oliver Bock committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
    // n, dim, data format
    cl_program              program;

    // linked list of kernels which needs to be executed for this fft
    cl_fft_kernel_info      *kernel_info;

    // number of kernels
    int                     num_kernels;

    // twist kernel for virtualizing fft of very large sizes that do not
    // fit in GPU global memory
    cl_kernel               twist_kernel;

    // flag indicating if temporary intermediate buffer is needed or not.
    // this depends on fft kernels being executed and if transform is
    // in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
    // one that does not require global transpose do not need temporary buffer)
    // 2D 1024x1024 out-of-place fft however do require intermediate buffer.
    // If temp buffer is needed, its allocation is lazy i.e. its not allocated
    // until its needed
    cl_int                  temp_buffer_needed;

    // Batch size is runtime parameter and size of temporary buffer (if needed)
    // depends on batch size. Allocation of temporary buffer is lazy i.e. its
    // only created when needed. Once its created at first call of clFFT_Executexxx
    // it is not allocated next time if next time clFFT_Executexxx is called with
    // batch size different than the first call. last_batch_size caches the last
    // batch size with which this plan is used so that we dont keep allocating/deallocating
    // temp buffer if same batch size is used again and again.
    size_t                  last_batch_size;

    // temporary buffer for interleaved plan
    cl_mem                  tempmemobj;

    // temporary buffer for planner plan. Only one of tempmemobj or
    // (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
    // data format of plan (plannar or interleaved)
    cl_mem                  tempmemobj_real, tempmemobj_imag;

151
152
153
154

    // precomputed lookup tables for sin,cos calculations, each of size 
    // sqrt(n) or 2*sqrt(n), n is size of signal;
 	
155
156
    cl_mem                  cossin_LUT_d1;
    cl_mem                  cossin_LUT_d2;
157
158
159
160
    int                     logN1;
    int                     logN2;
    size_t                  N1; 
    size_t                  N2;
161
    clFFT_TwiddleFactorMethod twiddleMethod;
162
    
Oliver Bock's avatar
Oliver Bock committed
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
    // Maximum size of signal for which local memory transposed based
    // fft is sufficient i.e. no global mem transpose (communication)
    // is needed
    size_t                  max_localmem_fft_size;

    // Maximum work items per work group allowed. This, along with max_radix below controls
    // maximum local memory being used by fft kernels of this plan. Set to 256 by default
    size_t                  max_work_item_per_workgroup;

    // Maximum base radix for local memory fft ... this controls the maximum register
    // space used by work items. Currently defaults to 16
    size_t                  max_radix;

    // Device depended parameter that tells how many work-items need to be read consecutive
    // values to make sure global memory access by work-items of a work-group result in
    // coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
    size_t                  min_mem_coalesce_width;

    // Number of local memory banks. This is used to geneate kernel with local memory
    // transposes with appropriate padding to avoid bank conflicts to local memory
    // e.g. on NVidia it is 16.
    size_t                  num_local_mem_banks;
185
186
187
188
}cl_fft_plan;

void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir);

Oliver Bock's avatar
Oliver Bock committed
189
#endif