Commit 7df18002 authored by Oliver Bock's avatar Oliver Bock
Browse files

Removing trailing whitespaces

parent bc1bc3e9
This diff is collapsed.
...@@ -61,11 +61,11 @@ extern "C" { ...@@ -61,11 +61,11 @@ extern "C" {
#endif #endif
// XForm type // XForm type
typedef enum typedef enum
{ {
clFFT_Forward = -1, clFFT_Forward = -1,
clFFT_Inverse = 1 clFFT_Inverse = 1
}clFFT_Direction; }clFFT_Direction;
// XForm dimension // XForm dimension
...@@ -74,7 +74,7 @@ typedef enum ...@@ -74,7 +74,7 @@ typedef enum
clFFT_1D = 0, clFFT_1D = 0,
clFFT_2D = 1, clFFT_2D = 1,
clFFT_3D = 3 clFFT_3D = 3
}clFFT_Dimension; }clFFT_Dimension;
// XForm Data type // XForm Data type
...@@ -89,8 +89,8 @@ typedef struct ...@@ -89,8 +89,8 @@ typedef struct
unsigned int x; unsigned int x;
unsigned int y; unsigned int y;
unsigned int z; unsigned int z;
}clFFT_Dim3; }clFFT_Dim3;
typedef struct typedef struct
{ {
float *real; float *real;
...@@ -103,31 +103,31 @@ typedef struct ...@@ -103,31 +103,31 @@ typedef struct
float imag; float imag;
}clFFT_Complex; }clFFT_Complex;
typedef void* clFFT_Plan; typedef void* clFFT_Plan;
clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ); clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code );
void clFFT_DestroyPlan( clFFT_Plan plan ); void clFFT_DestroyPlan( clFFT_Plan plan );
cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in, cl_mem data_out, cl_mem data_in, cl_mem data_out,
cl_int num_events, cl_event *event_list, cl_event *event ); cl_int num_events, cl_event *event_list, cl_event *event );
cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
cl_int num_events, cl_event *event_list, cl_event *event ); cl_int num_events, cl_event *event_list, cl_event *event );
cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir); size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir); size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
void clFFT_DumpPlan( clFFT_Plan plan, FILE *file); void clFFT_DumpPlan( clFFT_Plan plan, FILE *file);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif #endif
...@@ -59,7 +59,7 @@ static string baseKernels = string( ...@@ -59,7 +59,7 @@ static string baseKernels = string(
"#endif\n" "#endif\n"
"#define complexMul(a,b) ((float2)(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y)))\n" "#define complexMul(a,b) ((float2)(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y)))\n"
"#define conj(a) ((float2)((a).x, -(a).y))\n" "#define conj(a) ((float2)((a).x, -(a).y))\n"
"#define conjTransp(a) ((float2)(-(a).y, (a).x))\n" "#define conjTransp(a) ((float2)(-(a).y, (a).x))\n"
"\n" "\n"
"#define fftKernel2(a,dir) \\\n" "#define fftKernel2(a,dir) \\\n"
"{ \\\n" "{ \\\n"
...@@ -67,14 +67,14 @@ static string baseKernels = string( ...@@ -67,14 +67,14 @@ static string baseKernels = string(
" (a)[0] = c + (a)[1]; \\\n" " (a)[0] = c + (a)[1]; \\\n"
" (a)[1] = c - (a)[1]; \\\n" " (a)[1] = c - (a)[1]; \\\n"
"}\n" "}\n"
"\n" "\n"
"#define fftKernel2S(d1,d2,dir) \\\n" "#define fftKernel2S(d1,d2,dir) \\\n"
"{ \\\n" "{ \\\n"
" float2 c = (d1); \\\n" " float2 c = (d1); \\\n"
" (d1) = c + (d2); \\\n" " (d1) = c + (d2); \\\n"
" (d2) = c - (d2); \\\n" " (d2) = c - (d2); \\\n"
"}\n" "}\n"
"\n" "\n"
"#define fftKernel4(a,dir) \\\n" "#define fftKernel4(a,dir) \\\n"
"{ \\\n" "{ \\\n"
" fftKernel2S((a)[0], (a)[2], dir); \\\n" " fftKernel2S((a)[0], (a)[2], dir); \\\n"
...@@ -86,7 +86,7 @@ static string baseKernels = string( ...@@ -86,7 +86,7 @@ static string baseKernels = string(
" (a)[1] = (a)[2]; \\\n" " (a)[1] = (a)[2]; \\\n"
" (a)[2] = c; \\\n" " (a)[2] = c; \\\n"
"}\n" "}\n"
"\n" "\n"
"#define fftKernel4s(a0,a1,a2,a3,dir) \\\n" "#define fftKernel4s(a0,a1,a2,a3,dir) \\\n"
"{ \\\n" "{ \\\n"
" fftKernel2S((a0), (a2), dir); \\\n" " fftKernel2S((a0), (a2), dir); \\\n"
...@@ -96,9 +96,9 @@ static string baseKernels = string( ...@@ -96,9 +96,9 @@ static string baseKernels = string(
" fftKernel2S((a2), (a3), dir); \\\n" " fftKernel2S((a2), (a3), dir); \\\n"
" float2 c = (a1); \\\n" " float2 c = (a1); \\\n"
" (a1) = (a2); \\\n" " (a1) = (a2); \\\n"
" (a2) = c; \\\n" " (a2) = c; \\\n"
"}\n" "}\n"
"\n" "\n"
"#define bitreverse8(a) \\\n" "#define bitreverse8(a) \\\n"
"{ \\\n" "{ \\\n"
" float2 c; \\\n" " float2 c; \\\n"
...@@ -109,7 +109,7 @@ static string baseKernels = string( ...@@ -109,7 +109,7 @@ static string baseKernels = string(
" (a)[3] = (a)[6]; \\\n" " (a)[3] = (a)[6]; \\\n"
" (a)[6] = c; \\\n" " (a)[6] = c; \\\n"
"}\n" "}\n"
"\n" "\n"
"#define fftKernel8(a,dir) \\\n" "#define fftKernel8(a,dir) \\\n"
"{ \\\n" "{ \\\n"
" const float2 w1 = (float2)(0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f); \\\n" " const float2 w1 = (float2)(0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f); \\\n"
...@@ -134,7 +134,7 @@ static string baseKernels = string( ...@@ -134,7 +134,7 @@ static string baseKernels = string(
" fftKernel2S((a)[6], (a)[7], dir); \\\n" " fftKernel2S((a)[6], (a)[7], dir); \\\n"
" bitreverse8((a)); \\\n" " bitreverse8((a)); \\\n"
"}\n" "}\n"
"\n" "\n"
"#define bitreverse4x4(a) \\\n" "#define bitreverse4x4(a) \\\n"
"{ \\\n" "{ \\\n"
" float2 c; \\\n" " float2 c; \\\n"
...@@ -145,7 +145,7 @@ static string baseKernels = string( ...@@ -145,7 +145,7 @@ static string baseKernels = string(
" c = (a)[7]; (a)[7] = (a)[13]; (a)[13] = c; \\\n" " c = (a)[7]; (a)[7] = (a)[13]; (a)[13] = c; \\\n"
" c = (a)[11]; (a)[11] = (a)[14]; (a)[14] = c; \\\n" " c = (a)[11]; (a)[11] = (a)[14]; (a)[14] = c; \\\n"
"}\n" "}\n"
"\n" "\n"
"#define fftKernel16(a,dir) \\\n" "#define fftKernel16(a,dir) \\\n"
"{ \\\n" "{ \\\n"
" const float w0 = 0x1.d906bcp-1f; \\\n" " const float w0 = 0x1.d906bcp-1f; \\\n"
...@@ -170,7 +170,7 @@ static string baseKernels = string( ...@@ -170,7 +170,7 @@ static string baseKernels = string(
" fftKernel4((a) + 12, dir); \\\n" " fftKernel4((a) + 12, dir); \\\n"
" bitreverse4x4((a)); \\\n" " bitreverse4x4((a)); \\\n"
"}\n" "}\n"
"\n" "\n"
"#define bitreverse32(a) \\\n" "#define bitreverse32(a) \\\n"
"{ \\\n" "{ \\\n"
" float2 c1, c2; \\\n" " float2 c1, c2; \\\n"
...@@ -181,7 +181,7 @@ static string baseKernels = string( ...@@ -181,7 +181,7 @@ static string baseKernels = string(
" c1 = (a)[22]; (a)[22] = (a)[11]; c2 = (a)[13]; (a)[13] = c1; c1 = (a)[26]; (a)[26] = c2; c2 = (a)[21]; (a)[21] = c1; (a)[11] = c2; \\\n" " c1 = (a)[22]; (a)[22] = (a)[11]; c2 = (a)[13]; (a)[13] = c1; c1 = (a)[26]; (a)[26] = c2; c2 = (a)[21]; (a)[21] = c1; (a)[11] = c2; \\\n"
" c1 = (a)[30]; (a)[30] = (a)[15]; c2 = (a)[29]; (a)[29] = c1; c1 = (a)[27]; (a)[27] = c2; c2 = (a)[23]; (a)[23] = c1; (a)[15] = c2; \\\n" " c1 = (a)[30]; (a)[30] = (a)[15]; c2 = (a)[29]; (a)[29] = c1; c1 = (a)[27]; (a)[27] = c2; c2 = (a)[23]; (a)[23] = c1; (a)[15] = c2; \\\n"
"}\n" "}\n"
"\n" "\n"
"#define fftKernel32(a,dir) \\\n" "#define fftKernel32(a,dir) \\\n"
"{ \\\n" "{ \\\n"
" fftKernel2S((a)[0], (a)[16], dir); \\\n" " fftKernel2S((a)[0], (a)[16], dir); \\\n"
...@@ -270,7 +270,7 @@ static string twistKernelPlannar = string( ...@@ -270,7 +270,7 @@ static string twistKernelPlannar = string(
" } \\\n" " } \\\n"
" } \\\n" " } \\\n"
"} \\\n" "} \\\n"
); );
......
...@@ -59,17 +59,17 @@ static cl_int ...@@ -59,17 +59,17 @@ static cl_int
allocateTemporaryBufferInterleaved(cl_fft_plan *plan, cl_uint batchSize) allocateTemporaryBufferInterleaved(cl_fft_plan *plan, cl_uint batchSize)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
if(plan->temp_buffer_needed && plan->last_batch_size != batchSize) if(plan->temp_buffer_needed && plan->last_batch_size != batchSize)
{ {
plan->last_batch_size = batchSize; plan->last_batch_size = batchSize;
size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * 2 * sizeof(cl_float); size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * 2 * sizeof(cl_float);
if(plan->tempmemobj) if(plan->tempmemobj)
clReleaseMemObject(plan->tempmemobj); clReleaseMemObject(plan->tempmemobj);
plan->tempmemobj = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err); plan->tempmemobj = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err);
} }
return err; return err;
} }
static cl_int static cl_int
...@@ -77,21 +77,21 @@ allocateTemporaryBufferPlannar(cl_fft_plan *plan, cl_uint batchSize) ...@@ -77,21 +77,21 @@ allocateTemporaryBufferPlannar(cl_fft_plan *plan, cl_uint batchSize)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
cl_int terr; cl_int terr;
if(plan->temp_buffer_needed && plan->last_batch_size != batchSize) if(plan->temp_buffer_needed && plan->last_batch_size != batchSize)
{ {
plan->last_batch_size = batchSize; plan->last_batch_size = batchSize;
size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * sizeof(cl_float); size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * sizeof(cl_float);
if(plan->tempmemobj_real) if(plan->tempmemobj_real)
clReleaseMemObject(plan->tempmemobj_real); clReleaseMemObject(plan->tempmemobj_real);
if(plan->tempmemobj_imag) if(plan->tempmemobj_imag)
clReleaseMemObject(plan->tempmemobj_imag); clReleaseMemObject(plan->tempmemobj_imag);
plan->tempmemobj_real = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err); plan->tempmemobj_real = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err);
plan->tempmemobj_imag = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &terr); plan->tempmemobj_imag = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &terr);
err |= terr; err |= terr;
} }
return err; return err;
} }
...@@ -101,7 +101,7 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in ...@@ -101,7 +101,7 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in
*lWorkItems = kernelInfo->num_workitems_per_workgroup; *lWorkItems = kernelInfo->num_workitems_per_workgroup;
int numWorkGroups = kernelInfo->num_workgroups; int numWorkGroups = kernelInfo->num_workgroups;
int numXFormsPerWG = kernelInfo->num_xforms_per_workgroup; int numXFormsPerWG = kernelInfo->num_xforms_per_workgroup;
switch(kernelInfo->dir) switch(kernelInfo->dir)
{ {
case cl_fft_kernel_x: case cl_fft_kernel_x:
...@@ -117,45 +117,45 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in ...@@ -117,45 +117,45 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in
numWorkGroups *= *batchSize; numWorkGroups *= *batchSize;
break; break;
} }
*gWorkItems = numWorkGroups * *lWorkItems; *gWorkItems = numWorkGroups * *lWorkItems;
} }
cl_int cl_int
clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir, clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in, cl_mem data_out, cl_mem data_in, cl_mem data_out,
cl_int num_events, cl_event *event_list, cl_event *event ) cl_int num_events, cl_event *event_list, cl_event *event )
{ {
int s; int s;
cl_fft_plan *plan = (cl_fft_plan *) Plan; cl_fft_plan *plan = (cl_fft_plan *) Plan;
if(plan->format != clFFT_InterleavedComplexFormat) if(plan->format != clFFT_InterleavedComplexFormat)
return CL_INVALID_VALUE; return CL_INVALID_VALUE;
cl_int err; cl_int err;
size_t gWorkItems, lWorkItems; size_t gWorkItems, lWorkItems;
int inPlaceDone = -1; int inPlaceDone = -1;
cl_int isInPlace = data_in == data_out ? 1 : 0; cl_int isInPlace = data_in == data_out ? 1 : 0;
if((err = allocateTemporaryBufferInterleaved(plan, batchSize)) != CL_SUCCESS) if((err = allocateTemporaryBufferInterleaved(plan, batchSize)) != CL_SUCCESS)
return err; return err;
cl_mem memObj[3]; cl_mem memObj[3];
memObj[0] = data_in; memObj[0] = data_in;
memObj[1] = data_out; memObj[1] = data_out;
memObj[2] = plan->tempmemobj; memObj[2] = plan->tempmemobj;
cl_fft_kernel_info *kernelInfo = plan->kernel_info; cl_fft_kernel_info *kernelInfo = plan->kernel_info;
int numKernels = plan->num_kernels; int numKernels = plan->num_kernels;
int numKernelsOdd = numKernels & 1; int numKernelsOdd = numKernels & 1;
int currRead = 0; int currRead = 0;
int currWrite = 1; int currWrite = 1;
// at least one external dram shuffle (transpose) required // at least one external dram shuffle (transpose) required
if(plan->temp_buffer_needed) if(plan->temp_buffer_needed)
{ {
// in-place transform // in-place transform
if(isInPlace) if(isInPlace)
{ {
inPlaceDone = 0; inPlaceDone = 0;
currRead = 1; currRead = 1;
...@@ -165,36 +165,36 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS ...@@ -165,36 +165,36 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS
{ {
currWrite = (numKernels & 1) ? 1 : 2; currWrite = (numKernels & 1) ? 1 : 2;
} }
while(kernelInfo) while(kernelInfo)
{ {
if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible) if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible)
{ {
currWrite = currRead; currWrite = currRead;
inPlaceDone = 1; inPlaceDone = 1;
} }
s = batchSize; s = batchSize;
getKernelWorkDimensions(plan, kernelInfo, &s, &gWorkItems, &lWorkItems); getKernelWorkDimensions(plan, kernelInfo, &s, &gWorkItems, &lWorkItems);
err |= clSetKernelArg(kernelInfo->kernel, 0, sizeof(cl_mem), &memObj[currRead]); err |= clSetKernelArg(kernelInfo->kernel, 0, sizeof(cl_mem), &memObj[currRead]);
err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]); err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]);
err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir); err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s); err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s);
err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, 0, NULL, NULL); err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, 0, NULL, NULL);
if(err) if(err)
return err; return err;
currRead = (currWrite == 1) ? 1 : 2; currRead = (currWrite == 1) ? 1 : 2;
currWrite = (currWrite == 1) ? 2 : 1; currWrite = (currWrite == 1) ? 2 : 1;
kernelInfo = kernelInfo->next; kernelInfo = kernelInfo->next;
} }
} }
// no dram shuffle (transpose required) transform // no dram shuffle (transpose required) transform
// all kernels can execute in-place. // all kernels can execute in-place.
else { else {
while(kernelInfo) while(kernelInfo)
{ {
s = batchSize; s = batchSize;
...@@ -203,41 +203,41 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS ...@@ -203,41 +203,41 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS
err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]); err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]);
err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir); err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s); err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s);
err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, 0, NULL, NULL); err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, 0, NULL, NULL);
if(err) if(err)
return err; return err;
currRead = 1; currRead = 1;
currWrite = 1; currWrite = 1;
kernelInfo = kernelInfo->next; kernelInfo = kernelInfo->next;
} }
} }
return err; return err;
} }
cl_int cl_int
clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir, clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
cl_int num_events, cl_event *event_list, cl_event *event) cl_int num_events, cl_event *event_list, cl_event *event)
{ {
int s; int s;
cl_fft_plan *plan = (cl_fft_plan *) Plan; cl_fft_plan *plan = (cl_fft_plan *) Plan;
if(plan->format != clFFT_SplitComplexFormat) if(plan->format != clFFT_SplitComplexFormat)
return CL_INVALID_VALUE; return CL_INVALID_VALUE;
cl_int err; cl_int err;
size_t gWorkItems, lWorkItems; size_t gWorkItems, lWorkItems;
int inPlaceDone = -1; int inPlaceDone = -1;
cl_int isInPlace = ((data_in_real == data_out_real) && (data_in_imag == data_out_imag)) ? 1 : 0; cl_int isInPlace = ((data_in_real == data_out_real) && (data_in_imag == data_out_imag)) ? 1 : 0;
if((err = allocateTemporaryBufferPlannar(plan, batchSize)) != CL_SUCCESS) if((err = allocateTemporaryBufferPlannar(plan, batchSize)) != CL_SUCCESS)
return err; return err;
cl_mem memObj_real[3]; cl_mem memObj_real[3];
cl_mem memObj_imag[3]; cl_mem memObj_imag[3];
memObj_real[0] = data_in_real; memObj_real[0] = data_in_real;
...@@ -246,19 +246,19 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, ...@@ -246,19 +246,19 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
memObj_imag[0] = data_in_imag; memObj_imag[0] = data_in_imag;
memObj_imag[1] = data_out_imag; memObj_imag[1] = data_out_imag;
memObj_imag[2] = plan->tempmemobj_imag; memObj_imag[2] = plan->tempmemobj_imag;
cl_fft_kernel_info *kernelInfo = plan->kernel_info; cl_fft_kernel_info *kernelInfo = plan->kernel_info;
int numKernels = plan->num_kernels; int numKernels = plan->num_kernels;
int numKernelsOdd = numKernels & 1; int numKernelsOdd = numKernels & 1;
int currRead = 0; int currRead = 0;
int currWrite = 1; int currWrite = 1;
// at least one external dram shuffle (transpose) required // at least one external dram shuffle (transpose) required
if(plan->temp_buffer_needed) if(plan->temp_buffer_needed)
{ {
// in-place transform // in-place transform
if(isInPlace) if(isInPlace)
{ {
inPlaceDone = 0; inPlaceDone = 0;
currRead = 1; currRead = 1;
...@@ -268,15 +268,15 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, ...@@ -268,15 +268,15 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
{ {
currWrite = (numKernels & 1) ? 1 : 2; currWrite = (numKernels & 1) ? 1 : 2;
} }
while(kernelInfo) while(kernelInfo)
{ {
if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible) if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible)
{ {
currWrite = currRead; currWrite = currRead;
inPlaceDone = 1; inPlaceDone = 1;
} }