Commit 7d3bdca6 authored by Heinz-Bernd Eggenstein's avatar Heinz-Bernd Eggenstein
Browse files

Bug #1608: clFFT use of native_sin , native_cos can cause validation problems

added plan class creation method that allows to set flags to direct code generation
currently limited to select among 4 methods to compute twiddle factors:
-native_sin,native_cos function
-sincos() function
-set of two LUTs in global memory
-Taylor series approx via a smaller LUT in shared memory
parent 8d6fe913
...@@ -89,7 +89,11 @@ typedef enum ...@@ -89,7 +89,11 @@ typedef enum
clFFT_native_trig = 0, clFFT_native_trig = 0,
clFFT_sincosfunc = 1, clFFT_sincosfunc = 1,
clFFT_BigLUT = 2, clFFT_BigLUT = 2,
clFFT_TaylorLUT = 3 clFFT_TaylorLUT = 3,
clFFT_RFU4 = 4,
clFFT_RFU5 = 5,
clFFT_RFU6 = 6,
clFFT_RFU7 = 7
} clFFT_TwiddleFactorMethod; } clFFT_TwiddleFactorMethod;
typedef struct typedef struct
...@@ -115,6 +119,8 @@ typedef void* clFFT_Plan; ...@@ -115,6 +119,8 @@ typedef void* clFFT_Plan;
clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ); clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code );
clFFT_Plan clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code );
void clFFT_DestroyPlan( clFFT_Plan plan ); void clFFT_DestroyPlan( clFFT_Plan plan );
cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
......
...@@ -757,25 +757,25 @@ insertLocalSinCosLUT(string & kernel_string, cl_fft_plan *plan, int workgroupsiz ...@@ -757,25 +757,25 @@ insertLocalSinCosLUT(string & kernel_string, cl_fft_plan *plan, int workgroupsiz
// LUT holds grid values for Taylor seres approx // LUT holds grid values for Taylor seres approx
kernel_string += string(" __local float2 cossin_T_LUT[256];\n"); kernel_string += string(" __local float2 cossin_T_LUT[256];\n");
int m = (int) ceilf(256.0 / (float) workgroupsize); int sizeLUT= plan->N2;
int m = (int) ceilf((float) sizeLUT / (float) workgroupsize);
kernel_string += string(" int lLUTind= lId; \n"); kernel_string += string(" int lLUTind= lId; \n");
if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n"); if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n");
kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n"); kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n");
if (256 % workgroupsize != 0) kernel_string += string(" } \n"); if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n");
for(int k= 1 ; k < m ; k++) { for(int k= 1 ; k < m ; k++) {
kernel_string += string(" lLUTind+=") + num2str(workgroupsize) + string(";\n"); kernel_string += string(" lLUTind+=") + num2str(workgroupsize) + string(";\n");
if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n"); if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n");
kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n"); kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n");
if (256 % workgroupsize != 0) kernel_string += string(" }\n"); if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n");
} }
kernel_string += string(" barrier(CLK_LOCAL_MEM_FENCE);\n"); kernel_string += string(" barrier(CLK_LOCAL_MEM_FENCE);\n");
// TODO remove kernel_string += string(" __global float2 * cossin_T_LUT = cossinLUT2;\n");
} }
} }
......
...@@ -334,9 +334,15 @@ int precomputeSinCosLUTs(cl_fft_plan * plan,cl_int *error_code) { ...@@ -334,9 +334,15 @@ int precomputeSinCosLUTs(cl_fft_plan * plan,cl_int *error_code) {
} }
clFFT_Plan clFFT_Plan
clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) {
return clFFT_CreatePlanAdv( context,n, dim, dataFormat, 0,error_code );
}
clFFT_Plan
clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code )
{ {
int i; int i;
cl_int err; cl_int err;
...@@ -388,9 +394,7 @@ clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_Da ...@@ -388,9 +394,7 @@ clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_Da
plan->min_mem_coalesce_width = 16; plan->min_mem_coalesce_width = 16;
plan->num_local_mem_banks = 16; plan->num_local_mem_banks = 16;
plan->twiddleMethod = (clFFT_TwiddleFactorMethod)(flags & 7);
//TODO: restore native as default
plan->twiddleMethod = clFFT_TaylorLUT;
precomputeSinCosLUTs(plan,error_code); precomputeSinCosLUTs(plan,error_code);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment