From 7d3bdca6c02a62f5258d0d55e31318bbd61ecc26 Mon Sep 17 00:00:00 2001
From: Heinz-Bernd Eggenstein <heinz-bernd.eggenstein@aei.mpg.de>
Date: Tue, 26 Jun 2012 17:08:07 +0200
Subject: [PATCH] Bug #1608: clFFT use of native_sin , native_cos can cause
 validation problems

added plan class creation method that allows to set flags to direct code generation
currently limited to select among 4 methods to compute twiddle factors:
-native_sin,native_cos function
-sincos() function
-set of two LUTs in global memory
-Taylor series approx via a smaller LUT in shared memory
---
 include/clFFT.h          |  8 +++++++-
 src/fft_kernelstring.cpp | 14 +++++++-------
 src/fft_setup.cpp        | 14 +++++++++-----
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/include/clFFT.h b/include/clFFT.h
index 32dbbd9..3fafb66 100644
--- a/include/clFFT.h
+++ b/include/clFFT.h
@@ -89,7 +89,11 @@ typedef enum
   clFFT_native_trig       = 0,
   clFFT_sincosfunc        = 1,
   clFFT_BigLUT            = 2,
-  clFFT_TaylorLUT         = 3             
+  clFFT_TaylorLUT         = 3,
+  clFFT_RFU4              = 4,
+  clFFT_RFU5              = 5,   
+  clFFT_RFU6              = 6,  
+  clFFT_RFU7              = 7    
 } clFFT_TwiddleFactorMethod;
 
 typedef struct
@@ -115,6 +119,8 @@ typedef void* clFFT_Plan;
 
 clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code );
 
+clFFT_Plan clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code );
+
 void clFFT_DestroyPlan( clFFT_Plan plan );
 
 cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
diff --git a/src/fft_kernelstring.cpp b/src/fft_kernelstring.cpp
index f126039..4f21738 100644
--- a/src/fft_kernelstring.cpp
+++ b/src/fft_kernelstring.cpp
@@ -757,25 +757,25 @@ insertLocalSinCosLUT(string & kernel_string, cl_fft_plan *plan, int workgroupsiz
         // LUT holds grid values for Taylor seres approx
         kernel_string += string(" __local  float2  cossin_T_LUT[256];\n");
         
-        int m = (int) ceilf(256.0 / (float) workgroupsize);
+        int sizeLUT= plan->N2;
+        
+        int m = (int) ceilf((float) sizeLUT / (float) workgroupsize);
         
         
         kernel_string += string(" int lLUTind= lId;       \n");     
         
-        if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) {     \n");
+        if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n");
         kernel_string += string("     cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n");
-        if (256 % workgroupsize  != 0)  kernel_string += string(" }                                      \n");
+        if (sizeLUT % workgroupsize  != 0)  kernel_string += string(" }\n");
         
         for(int k= 1 ; k < m ; k++) {
             kernel_string += string(" lLUTind+=") + num2str(workgroupsize) + string(";\n");
-            if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n");
+            if (sizeLUT % workgroupsize != 0) kernel_string += string(" if(lLUTind < ") + num2str(sizeLUT) + string("){ \n");
             kernel_string += string("     cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n");
-            if (256 % workgroupsize != 0) kernel_string += string(" }\n");
+            if (sizeLUT % workgroupsize != 0) kernel_string += string(" }\n");
         }
         
         kernel_string += string(" barrier(CLK_LOCAL_MEM_FENCE);\n");
-        
-// TODO remove        kernel_string += string(" __global float2 * cossin_T_LUT =  cossinLUT2;\n");
     }
 }
 
diff --git a/src/fft_setup.cpp b/src/fft_setup.cpp
index fed40b0..57e06b4 100644
--- a/src/fft_setup.cpp
+++ b/src/fft_setup.cpp
@@ -334,9 +334,15 @@ int precomputeSinCosLUTs(cl_fft_plan * plan,cl_int *error_code) {
 }
 
 
-
 clFFT_Plan
-clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code )
+clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) {
+    return clFFT_CreatePlanAdv( context,n, dim, dataFormat, 0,error_code );
+}
+
+
+
+clFFT_Plan 
+clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code )
 {
     int i;
     cl_int err;
@@ -388,9 +394,7 @@ clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_Da
     plan->min_mem_coalesce_width = 16;
     plan->num_local_mem_banks = 16;
 
-    
-    //TODO: restore native as default
-    plan->twiddleMethod = clFFT_TaylorLUT;    
+    plan->twiddleMethod = (clFFT_TwiddleFactorMethod)(flags & 7);    
     
     precomputeSinCosLUTs(plan,error_code);
 
-- 
GitLab