From 7d3bdca6c02a62f5258d0d55e31318bbd61ecc26 Mon Sep 17 00:00:00 2001 From: Heinz-Bernd Eggenstein <heinz-bernd.eggenstein@aei.mpg.de> Date: Tue, 26 Jun 2012 17:08:07 +0200 Subject: [PATCH] Bug #1608: clFFT use of native_sin , native_cos can cause validation problems added plan class creation method that allows to set flags to direct code generation currently limited to select among 4 methods to compute twiddle factors: -native_sin,native_cos function -sincos() function -set of two LUTs in global memory -Taylor series approx via a smaller LUT in shared memory --- include/clFFT.h | 8 +++++++- src/fft_kernelstring.cpp | 14 +++++++------- src/fft_setup.cpp | 14 +++++++++----- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/include/clFFT.h b/include/clFFT.h index 32dbbd9..3fafb66 100644 --- a/include/clFFT.h +++ b/include/clFFT.h @@ -89,7 +89,11 @@ typedef enum clFFT_native_trig = 0, clFFT_sincosfunc = 1, clFFT_BigLUT = 2, - clFFT_TaylorLUT = 3 + clFFT_TaylorLUT = 3, + clFFT_RFU4 = 4, + clFFT_RFU5 = 5, + clFFT_RFU6 = 6, + clFFT_RFU7 = 7 } clFFT_TwiddleFactorMethod; typedef struct @@ -115,6 +119,8 @@ typedef void* clFFT_Plan; clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ); +clFFT_Plan clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code ); + void clFFT_DestroyPlan( clFFT_Plan plan ); cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, diff --git a/src/fft_kernelstring.cpp b/src/fft_kernelstring.cpp index f126039..4f21738 100644 --- a/src/fft_kernelstring.cpp +++ b/src/fft_kernelstring.cpp @@ -757,25 +757,25 @@ insertLocalSinCosLUT(string & kernel_string, cl_fft_plan *plan, int workgroupsiz // LUT holds grid values for Taylor seres approx kernel_string += string(" __local float2 cossin_T_LUT[256];\n"); - int m = (int) ceilf(256.0 / (float) workgroupsize); + int sizeLUT= plan->N2; + + int m = (int) ceilf((float) sizeLUT / (float) workgroupsize); kernel_string += string(" int lLUTind= lId; \n"); - if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n"); + if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n"); kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n"); - if (256 % workgroupsize != 0) kernel_string += string(" } \n"); + if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n"); for(int k= 1 ; k < m ; k++) { kernel_string += string(" lLUTind+=") + num2str(workgroupsize) + string(";\n"); - if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n"); + if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n"); kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n"); - if (256 % workgroupsize != 0) kernel_string += string(" }\n"); + if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n"); } kernel_string += string(" barrier(CLK_LOCAL_MEM_FENCE);\n"); - -// TODO remove kernel_string += string(" __global float2 * cossin_T_LUT = cossinLUT2;\n"); } } diff --git a/src/fft_setup.cpp b/src/fft_setup.cpp index fed40b0..57e06b4 100644 --- a/src/fft_setup.cpp +++ b/src/fft_setup.cpp @@ -334,9 +334,15 @@ int precomputeSinCosLUTs(cl_fft_plan * plan,cl_int *error_code) { } - clFFT_Plan -clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) +clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) { + return clFFT_CreatePlanAdv( context,n, dim, dataFormat, 0,error_code ); +} + + + +clFFT_Plan +clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code ) { int i; cl_int err; @@ -388,9 +394,7 @@ clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_Da plan->min_mem_coalesce_width = 16; plan->num_local_mem_banks = 16; - - //TODO: restore native as default - plan->twiddleMethod = clFFT_TaylorLUT; + plan->twiddleMethod = (clFFT_TwiddleFactorMethod)(flags & 7); precomputeSinCosLUTs(plan,error_code); -- GitLab