Skip to content
Snippets Groups Projects
Select Git revision
  • f65d1927ae97dfd8aa73598c3582b04866e86d9b
  • master default protected
  • fix_Makefile.mingw#2
  • update_Makefile.mingw
  • fix_Makefile.mingw
  • fix_API_for_C_apps
  • fix_procinfo_mac
  • boinccmd_gpu_mode_always_until_sigterm
  • fgrp_osx_hotfix
  • fix_boinc_master@f8250782
  • eah_wrapper_improvements
  • diagnostics_win-hotfix
  • diagnostics_win-hotfix-old
  • current_fgrp_apps
  • testing_gw_apps
  • gw_app_darwin_15
  • current_brp_apps
  • current_brp_apps_android10
  • current_gfx_apps
  • current_server
  • current_gw_apps
  • previous_fgrp_apps
  • previous_gw_apps
  • testing_brp_apps
  • apps_FGRP3_1.07
  • apps_FGRP3_1.08
26 results

boinc_api.C

Blame
  • fft_internal.h 4.53 KiB
    #ifndef __CLFFT_INTERNAL_H 
    #define __CLFFT_INTERNAL_H 
      
    #include "clFFT.h" 
    #include <iostream> 
    #include <string> 
    #include <sstream> 
      
    using namespace std; 
      
    typedef enum kernel_dir_t 
    { 
        cl_fft_kernel_x, 
        cl_fft_kernel_y, 
        cl_fft_kernel_z 
    }cl_fft_kernel_dir; 
      
    typedef struct kernel_info_t 
    { 
        cl_kernel kernel; 
        char *kernel_name; 
        size_t lmem_size; 
        size_t num_workgroups; 
        size_t num_xforms_per_workgroup; 
        size_t num_workitems_per_workgroup; 
        cl_fft_kernel_dir dir; 
        int in_place_possible; 
        kernel_info_t *next; 
    }cl_fft_kernel_info; 
      
    typedef struct  
    { 
        // context in which fft resources are created and kernels are executed 
        cl_context              context; 
         
        // size of signal 
        clFFT_Dim3              n; 
         
        // dimension of transform ... must be either 1D, 2D or 3D 
        clFFT_Dimension         dim; 
         
        // data format ... must be either interleaved or plannar 
        clFFT_DataFormat        format; 
         
        // string containing kernel source. Generated at runtime based on 
        // n, dim, format and other parameters 
        string                  *kernel_string; 
         
        // CL program containing source and kernel this particular  
        // n, dim, data format 
        cl_program              program; 
         
        // linked list of kernels which needs to be executed for this fft 
        cl_fft_kernel_info      *kernel_info; 
         
        // number of kernels 
        int                     num_kernels; 
         
        // twist kernel for virtualizing fft of very large sizes that do not 
        // fit in GPU global memory 
        cl_kernel               twist_kernel; 
         
        // flag indicating if temporary intermediate buffer is needed or not. 
        // this depends on fft kernels being executed and if transform is  
        // in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...  
        // one that does not require global transpose do not need temporary buffer) 
        // 2D 1024x1024 out-of-place fft however do require intermediate buffer. 
        // If temp buffer is needed, its allocation is lazy i.e. its not allocated 
        // until its needed 
        cl_int                  temp_buffer_needed; 
         
        // Batch size is runtime parameter and size of temporary buffer (if needed) 
        // depends on batch size. Allocation of temporary buffer is lazy i.e. its 
        // only created when needed. Once its created at first call of clFFT_Executexxx 
        // it is not allocated next time if next time clFFT_Executexxx is called with  
        // batch size different than the first call. last_batch_size caches the last 
        // batch size with which this plan is used so that we dont keep allocating/deallocating 
        // temp buffer if same batch size is used again and again. 
        size_t                  last_batch_size; 
         
        // temporary buffer for interleaved plan 
        cl_mem                  tempmemobj; 
         
        // temporary buffer for planner plan. Only one of tempmemobj or  
        // (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending  
        // data format of plan (plannar or interleaved) 
        cl_mem                  tempmemobj_real, tempmemobj_imag; 
         
        // Maximum size of signal for which local memory transposed based 
        // fft is sufficient i.e. no global mem transpose (communication) 
        // is needed 
        size_t                  max_localmem_fft_size; 
         
        // Maximum work items per work group allowed. This, along with max_radix below controls  
        // maximum local memory being used by fft kernels of this plan. Set to 256 by default 
        size_t                  max_work_item_per_workgroup; 
         
        // Maximum base radix for local memory fft ... this controls the maximum register  
        // space used by work items. Currently defaults to 16 
        size_t                  max_radix; 
         
        // Device depended parameter that tells how many work-items need to be read consecutive 
        // values to make sure global memory access by work-items of a work-group result in  
        // coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16 
        size_t                  min_mem_coalesce_width; 
         
        // Number of local memory banks. This is used to geneate kernel with local memory  
        // transposes with appropriate padding to avoid bank conflicts to local memory 
        // e.g. on NVidia it is 16. 
        size_t                  num_local_mem_banks; 
    }cl_fft_plan; 
      
    void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir); 
      
    #endif