Skip to content
Snippets Groups Projects
Select Git revision
  • 64e15583a967d59e307df968feb3c8aac9303ba1
  • master default
  • mingw_gcc44
  • release_ABP1_012
  • release_ABP1_008
  • release_ABP1_006
  • release_ABP1_007
  • release_ABP1_005
  • release_ABP1_004
  • release_ABP1_003
  • pre_release_0.15
  • release_ABP1_001
  • release_ABP1_002
  • pre_release_0.13
  • pre_release_0.14
  • pre_release_0.11
  • pre_release_0.12
  • pre_release_0.10
  • pre_release_0.09
  • pre_release_0.08
20 results

footer.html

Blame
  • Forked from einsteinathome / graphicsframework
    Source project has a limited visibility.
    fft_internal.h 7.16 KiB
    
    //
    // File:       fft_internal.h
    //
    // Version:    <1.0>
    //
    // Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Inc. ("Apple")
    //             in consideration of your agreement to the following terms, and your use,
    //             installation, modification or redistribution of this Apple software
    //             constitutes acceptance of these terms.  If you do not agree with these
    //             terms, please do not use, install, modify or redistribute this Apple
    //             software.
    //
    //             In consideration of your agreement to abide by the following terms, and
    //             subject to these terms, Apple grants you a personal, non - exclusive
    //             license, under Apple's copyrights in this original Apple software ( the
    //             "Apple Software" ), to use, reproduce, modify and redistribute the Apple
    //             Software, with or without modifications, in source and / or binary forms;
    //             provided that if you redistribute the Apple Software in its entirety and
    //             without modifications, you must retain this notice and the following text
    //             and disclaimers in all such redistributions of the Apple Software. Neither
    //             the name, trademarks, service marks or logos of Apple Inc. may be used to
    //             endorse or promote products derived from the Apple Software without specific
    //             prior written permission from Apple.  Except as expressly stated in this
    //             notice, no other rights or licenses, express or implied, are granted by
    //             Apple herein, including but not limited to any patent rights that may be
    //             infringed by your derivative works or by other works in which the Apple
    //             Software may be incorporated.
    //
    //             The Apple Software is provided by Apple on an "AS IS" basis.  APPLE MAKES NO
    //             WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
    //             WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
    //             PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
    //             ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
    //
    //             IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
    //             CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    //             SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    //             INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
    //             AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
    //             UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
    //             OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    //
    // Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
    //
    ////////////////////////////////////////////////////////////////////////////////////////////////////
    
    
    #ifndef __CLFFT_INTERNAL_H
    #define __CLFFT_INTERNAL_H
    
    #include "clFFT.h"
    #include <iostream>
    #include <string>
    #include <sstream>
    
    using namespace std;
    
    typedef enum kernel_dir_t
    {
        cl_fft_kernel_x,
        cl_fft_kernel_y,
        cl_fft_kernel_z
    }cl_fft_kernel_dir;
    
    typedef struct kernel_info_t
    {
        cl_kernel kernel;
        char *kernel_name;
        size_t lmem_size;
        size_t num_workgroups;
        size_t num_xforms_per_workgroup;
        size_t num_workitems_per_workgroup;
        cl_fft_kernel_dir dir;
        int in_place_possible;
        kernel_info_t *next;
    }cl_fft_kernel_info;
    
    typedef struct
    {
        // context in which fft resources are created and kernels are executed
        cl_context              context;
    
        // size of signal
        clFFT_Dim3              n;
    
        // dimension of transform ... must be either 1D, 2D or 3D
        clFFT_Dimension         dim;
    
        // data format ... must be either interleaved or plannar
        clFFT_DataFormat        format;
    
        // string containing kernel source. Generated at runtime based on
        // n, dim, format and other parameters
        string                  *kernel_string;
    
        // CL program containing source and kernel this particular
        // n, dim, data format
        cl_program              program;
    
        // linked list of kernels which needs to be executed for this fft
        cl_fft_kernel_info      *kernel_info;
    
        // number of kernels
        int                     num_kernels;
    
        // twist kernel for virtualizing fft of very large sizes that do not
        // fit in GPU global memory
        cl_kernel               twist_kernel;
    
        // flag indicating if temporary intermediate buffer is needed or not.
        // this depends on fft kernels being executed and if transform is
        // in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
        // one that does not require global transpose do not need temporary buffer)
        // 2D 1024x1024 out-of-place fft however do require intermediate buffer.
        // If temp buffer is needed, its allocation is lazy i.e. its not allocated
        // until its needed
        cl_int                  temp_buffer_needed;
    
        // Batch size is runtime parameter and size of temporary buffer (if needed)
        // depends on batch size. Allocation of temporary buffer is lazy i.e. its
        // only created when needed. Once its created at first call of clFFT_Executexxx
        // it is not allocated next time if next time clFFT_Executexxx is called with
        // batch size different than the first call. last_batch_size caches the last
        // batch size with which this plan is used so that we dont keep allocating/deallocating
        // temp buffer if same batch size is used again and again.
        size_t                  last_batch_size;
    
        // temporary buffer for interleaved plan
        cl_mem                  tempmemobj;
    
        // temporary buffer for planner plan. Only one of tempmemobj or
        // (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
        // data format of plan (plannar or interleaved)
        cl_mem                  tempmemobj_real, tempmemobj_imag;
    
        // Maximum size of signal for which local memory transposed based
        // fft is sufficient i.e. no global mem transpose (communication)
        // is needed
        size_t                  max_localmem_fft_size;
    
        // Maximum work items per work group allowed. This, along with max_radix below controls
        // maximum local memory being used by fft kernels of this plan. Set to 256 by default
        size_t                  max_work_item_per_workgroup;
    
        // Maximum base radix for local memory fft ... this controls the maximum register
        // space used by work items. Currently defaults to 16
        size_t                  max_radix;
    
        // Device depended parameter that tells how many work-items need to be read consecutive
        // values to make sure global memory access by work-items of a work-group result in
        // coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
        size_t                  min_mem_coalesce_width;
    
        // Number of local memory banks. This is used to geneate kernel with local memory
        // transposes with appropriate padding to avoid bank conflicts to local memory
        // e.g. on NVidia it is 16.
        size_t                  num_local_mem_banks;
    }cl_fft_plan;
    
    void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir);
    
    #endif