diff --git a/include/clFFT.h b/include/clFFT.h index 32dbbd97949ba8cd4108789c416ef9a7fe9717a9..3fafb66010d508a2e7bf71237377510d7ec4cfc9 100644 --- a/include/clFFT.h +++ b/include/clFFT.h @@ -89,7 +89,11 @@ typedef enum clFFT_native_trig = 0, clFFT_sincosfunc = 1, clFFT_BigLUT = 2, - clFFT_TaylorLUT = 3 + clFFT_TaylorLUT = 3, + clFFT_RFU4 = 4, + clFFT_RFU5 = 5, + clFFT_RFU6 = 6, + clFFT_RFU7 = 7 } clFFT_TwiddleFactorMethod; typedef struct @@ -115,6 +119,8 @@ typedef void* clFFT_Plan; clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ); +clFFT_Plan clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code ); + void clFFT_DestroyPlan( clFFT_Plan plan ); cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, diff --git a/src/fft_kernelstring.cpp b/src/fft_kernelstring.cpp index f1260395dae249b8de585b07289312a264aecd40..4f21738644ba9ea2b2bd4b18e6faba9f45d0f578 100644 --- a/src/fft_kernelstring.cpp +++ b/src/fft_kernelstring.cpp @@ -757,25 +757,25 @@ insertLocalSinCosLUT(string & kernel_string, cl_fft_plan *plan, int workgroupsiz // LUT holds grid values for Taylor seres approx kernel_string += string(" __local float2 cossin_T_LUT[256];\n"); - int m = (int) ceilf(256.0 / (float) workgroupsize); + int sizeLUT= plan->N2; + + int m = (int) ceilf((float) sizeLUT / (float) workgroupsize); kernel_string += string(" int lLUTind= lId; \n"); - if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n"); + if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n"); kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n"); - if (256 % workgroupsize != 0) kernel_string += string(" } \n"); + if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n"); for(int k= 1 ; k < m ; k++) { kernel_string += string(" lLUTind+=") + num2str(workgroupsize) + string(";\n"); - if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n"); + if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n"); kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n"); - if (256 % workgroupsize != 0) kernel_string += string(" }\n"); + if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n"); } kernel_string += string(" barrier(CLK_LOCAL_MEM_FENCE);\n"); - -// TODO remove kernel_string += string(" __global float2 * cossin_T_LUT = cossinLUT2;\n"); } } diff --git a/src/fft_setup.cpp b/src/fft_setup.cpp index fed40b05e71570d556fcf89c9d3398c91b39d7b2..57e06b471fa137379fb3c0924abfe5fe954da639 100644 --- a/src/fft_setup.cpp +++ b/src/fft_setup.cpp @@ -334,9 +334,15 @@ int precomputeSinCosLUTs(cl_fft_plan * plan,cl_int *error_code) { } - clFFT_Plan -clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) +clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) { + return clFFT_CreatePlanAdv( context,n, dim, dataFormat, 0,error_code ); +} + + + +clFFT_Plan +clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code ) { int i; cl_int err; @@ -388,9 +394,7 @@ clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_Da plan->min_mem_coalesce_width = 16; plan->num_local_mem_banks = 16; - - //TODO: restore native as default - plan->twiddleMethod = clFFT_TaylorLUT; + plan->twiddleMethod = (clFFT_TwiddleFactorMethod)(flags & 7); precomputeSinCosLUTs(plan,error_code);