Skip to content
Snippets Groups Projects
Commit 8d6fe913 authored by Heinz-Bernd Eggenstein's avatar Heinz-Bernd Eggenstein
Browse files

Bug #1608: clFFT use of native_sin , native_cos can cause validation problems

experimanetal: improved Taylor series approx by copying LUT to shared mem.
               TODO: cleanup, expose sin/cos method on plan creation interface,
                     do proper calculation of available shared mem for sin cos LUT
parent 20314512
No related branches found
No related tags found
No related merge requests found
......@@ -748,6 +748,38 @@ insertLocalStoreIndexArithmatic(string &kernelString, int numWorkItemsReq, int n
}
static void
insertLocalSinCosLUT(string & kernel_string, cl_fft_plan *plan, int workgroupsize) {
// conditionally copy to local (shared) memory
if(plan->twiddleMethod == clFFT_TaylorLUT) {
// LUT holds grid values for Taylor seres approx
kernel_string += string(" __local float2 cossin_T_LUT[256];\n");
int m = (int) ceilf(256.0 / (float) workgroupsize);
kernel_string += string(" int lLUTind= lId; \n");
if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n");
kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n");
if (256 % workgroupsize != 0) kernel_string += string(" } \n");
for(int k= 1 ; k < m ; k++) {
kernel_string += string(" lLUTind+=") + num2str(workgroupsize) + string(";\n");
if (256 % workgroupsize != 0) kernel_string += string(" if(lLUTind < 256) { \n");
kernel_string += string(" cossin_T_LUT[lLUTind]=cossinLUT2[lLUTind]; \n");
if (256 % workgroupsize != 0) kernel_string += string(" }\n");
}
kernel_string += string(" barrier(CLK_LOCAL_MEM_FENCE);\n");
// TODO remove kernel_string += string(" __global float2 * cossin_T_LUT = cossinLUT2;\n");
}
}
static void
createLocalMemfftKernelString(cl_fft_plan *plan)
{
......@@ -818,6 +850,7 @@ createLocalMemfftKernelString(cl_fft_plan *plan)
unsigned int lMemSize = 0;
insertVariables(localString, maxRadix);
insertLocalSinCosLUT(localString, plan, numWorkItemsPerWG);
lMemSize = insertGlobalLoadsAndTranspose(localString, n, numWorkItemsPerXForm, numXFormsPerWG, maxRadix, plan->min_mem_coalesce_width, dataFormat);
(*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size;
......@@ -1140,16 +1173,7 @@ insertSinCos(string & kernel_string, cl_fft_plan *plan, int num, int denom , str
}
static void
insertLocalSinCosLUT(string & kernel_string, cl_fft_plan *plan, int workgroupsize) {
// TODO: conditionally copy to local (shared memory)
if(plan->twiddleMethod == clFFT_TaylorLUT) {
// second LUT holds grid values for Taylor seres approx
kernel_string += string(" __global float2 * cossin_T_LUT = cossinLUT2;\n");
}
}
static void
createGlobalFFTKernelString(cl_fft_plan *plan, int n, int BS, cl_fft_kernel_dir dir, int vertBS)
......@@ -1256,6 +1280,9 @@ createGlobalFFTKernelString(cl_fft_plan *plan, int n, int BS, cl_fft_kernel_dir
insertVariables(localString, R1);
if((R2 > 1) || (passNum < (numPasses - 1))) {
insertLocalSinCosLUT(localString, plan, threadsPerBlock);
}
if(vertical)
{
localString += string("xNum = groupId >> ") + num2str((int)log2(numBlocksPerXForm)) + string(";\n");
......@@ -1310,7 +1337,7 @@ createGlobalFFTKernelString(cl_fft_plan *plan, int n, int BS, cl_fft_kernel_dir
localString += string("a[") + num2str(j) + string("] = in[") + num2str(j*gInInc*strideI) + string("];\n");
}
insertLocalSinCosLUT(localString, plan, threadsPerBlock);
localString += string("fftKernel") + num2str(R1) + string("(a, dir);\n");
......@@ -1368,7 +1395,6 @@ createGlobalFFTKernelString(cl_fft_plan *plan, int n, int BS, cl_fft_kernel_dir
insertSinCos(localString, plan, 1, N , expr, varRes) ;
// localString += string("ang = ang1*(k + ") + num2str((t%R2)*R1 + (t/R2)) + string(");\n");
// localString += string("w = (float2)(native_cos(ang), native_sin(ang));\n");
localString += string("a[") + num2str(t) + string("] = complexMul(a[") + num2str(t) + string("], w);\n");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment