Skip to content
Snippets Groups Projects
Select Git revision
  • a36e2db135652e89c4193e2ff0d1b815d1866025
  • master default protected
  • CLFFT_NO_MAD_ENABLE
  • BRP_build_fixes
  • override_cl_compile_options
  • improve_Makefile
  • HSA
  • clmathfft
  • longer_dft_support
  • current_brp_apps
  • current_fgrp_apps
11 results

fft_execute.cpp

Blame
  • fft_execute.cpp 13.11 KiB
    #include "fft_internal.h" 
    #include <clFFT.h> 
    #include <stdlib.h> 
    #include <stdio.h> 
    #include <math.h> 
      
    #define max(a,b) (((a)>(b)) ? (a) : (b)) 
    #define min(a,b) (((a)<(b)) ? (a) : (b)) 
      
    static cl_int 
    allocateTemporaryBufferInterleaved(cl_fft_plan *plan, cl_uint batchSize) 
    { 
        cl_int err = CL_SUCCESS; 
        if(plan->temp_buffer_needed && plan->last_batch_size != batchSize)  
        { 
            plan->last_batch_size = batchSize;  
            size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * 2 * sizeof(cl_float); 
             
            if(plan->tempmemobj) 
                clReleaseMemObject(plan->tempmemobj); 
                 
            plan->tempmemobj = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err); 
        } 
        return err;  
    } 
      
    static cl_int 
    allocateTemporaryBufferPlannar(cl_fft_plan *plan, cl_uint batchSize) 
    { 
        cl_int err = CL_SUCCESS; 
        cl_int terr; 
        if(plan->temp_buffer_needed && plan->last_batch_size != batchSize)  
        { 
            plan->last_batch_size = batchSize;  
            size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * sizeof(cl_float); 
             
            if(plan->tempmemobj_real) 
                clReleaseMemObject(plan->tempmemobj_real); 
      
            if(plan->tempmemobj_imag) 
                clReleaseMemObject(plan->tempmemobj_imag);           
                 
            plan->tempmemobj_real = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err); 
            plan->tempmemobj_imag = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &terr); 
            err |= terr; 
        }    
        return err; 
    } 
      
    void 
    getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_int *batchSize, size_t *gWorkItems, size_t *lWorkItems) 
    { 
        *lWorkItems = kernelInfo->num_workitems_per_workgroup; 
        int numWorkGroups = kernelInfo->num_workgroups; 
        int numXFormsPerWG = kernelInfo->num_xforms_per_workgroup; 
         
        switch(kernelInfo->dir) 
        { 
            case cl_fft_kernel_x: 
                *batchSize *= (plan->n.y * plan->n.z); 
                numWorkGroups = (*batchSize % numXFormsPerWG) ? (*batchSize/numXFormsPerWG + 1) : (*batchSize/numXFormsPerWG); 
                numWorkGroups *= kernelInfo->num_workgroups; 
                break; 
            case cl_fft_kernel_y: 
                *batchSize *= plan->n.z; 
                numWorkGroups *= *batchSize; 
                break; 
            case cl_fft_kernel_z: 
                numWorkGroups *= *batchSize; 
                break;