Commit 7d3bdca6 authored by Heinz-Bernd Eggenstein's avatar Heinz-Bernd Eggenstein
Browse files

Bug #1608: clFFT use of native_sin , native_cos can cause validation problems

added plan class creation method that allows to set flags to direct code generation
currently limited to select among 4 methods to compute twiddle factors:
-native_sin,native_cos function
-sincos() function
-set of two LUTs in global memory
-Taylor series approx via a smaller LUT in shared memory
parent 8d6fe913
......@@ -89,7 +89,11 @@ typedef enum
clFFT_native_trig = 0,
clFFT_sincosfunc = 1,
clFFT_BigLUT = 2,
clFFT_TaylorLUT = 3
clFFT_TaylorLUT = 3,
clFFT_RFU4 = 4,
clFFT_RFU5 = 5,
clFFT_RFU6 = 6,
clFFT_RFU7 = 7
} clFFT_TwiddleFactorMethod;
typedef struct
......@@ -115,6 +119,8 @@ typedef void* clFFT_Plan;
clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code );
clFFT_Plan clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code );
void clFFT_DestroyPlan( clFFT_Plan plan );
cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
......
......@@ -757,25 +757,25 @@ insertLocalSinCosLUT(string & kernel_string, cl_fft_plan *plan, int workgroupsiz
// LUT holds grid values for Taylor seres approx
kernel_string += string(" __local float2 cossin_T_LUT[256];\n");
int m = (int) ceilf(256.0 / (float) workgroupsize);
int sizeLUT= plan->N2;
int m = (int) ceilf((float) sizeLUT / (float) workgroupsize);
kernel_string += string(" int lLUTind= lId; \n");
if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n");
if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n");
kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n");
if (256 % workgroupsize != 0) kernel_string += string(" } \n");
if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n");
for(int k= 1 ; k < m ; k++) {
kernel_string += string(" lLUTind+=") + num2str(workgroupsize) + string(";\n");
if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n");
if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n");
kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n");
if (256 % workgroupsize != 0) kernel_string += string(" }\n");
if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n");
}
kernel_string += string(" barrier(CLK_LOCAL_MEM_FENCE);\n");
// TODO remove kernel_string += string(" __global float2 * cossin_T_LUT = cossinLUT2;\n");
}
}
......
......@@ -334,9 +334,15 @@ int precomputeSinCosLUTs(cl_fft_plan * plan,cl_int *error_code) {
}
clFFT_Plan
clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code )
clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) {
return clFFT_CreatePlanAdv( context,n, dim, dataFormat, 0,error_code );
}
clFFT_Plan
clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code )
{
int i;
cl_int err;
......@@ -388,9 +394,7 @@ clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_Da
plan->min_mem_coalesce_width = 16;
plan->num_local_mem_banks = 16;
//TODO: restore native as default
plan->twiddleMethod = clFFT_TaylorLUT;
plan->twiddleMethod = (clFFT_TwiddleFactorMethod)(flags & 7);
precomputeSinCosLUTs(plan,error_code);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment