Commit 7df18002 authored by Oliver Bock's avatar Oliver Bock
Browse files

Removing trailing whitespaces

parent bc1bc3e9
This diff is collapsed.
......@@ -61,11 +61,11 @@ extern "C" {
#endif
// XForm type
typedef enum
typedef enum
{
clFFT_Forward = -1,
clFFT_Inverse = 1
}clFFT_Direction;
// XForm dimension
......@@ -74,7 +74,7 @@ typedef enum
clFFT_1D = 0,
clFFT_2D = 1,
clFFT_3D = 3
}clFFT_Dimension;
// XForm Data type
......@@ -89,8 +89,8 @@ typedef struct
unsigned int x;
unsigned int y;
unsigned int z;
}clFFT_Dim3;
}clFFT_Dim3;
typedef struct
{
float *real;
......@@ -103,31 +103,31 @@ typedef struct
float imag;
}clFFT_Complex;
typedef void* clFFT_Plan;
typedef void* clFFT_Plan;
clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code );
void clFFT_DestroyPlan( clFFT_Plan plan );
cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in, cl_mem data_out,
cl_int num_events, cl_event *event_list, cl_event *event );
cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
cl_int num_events, cl_event *event_list, cl_event *event );
cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
void clFFT_DumpPlan( clFFT_Plan plan, FILE *file);
void clFFT_DumpPlan( clFFT_Plan plan, FILE *file);
#ifdef __cplusplus
}
#endif
#endif
#endif
......@@ -59,7 +59,7 @@ static string baseKernels = string(
"#endif\n"
"#define complexMul(a,b) ((float2)(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y)))\n"
"#define conj(a) ((float2)((a).x, -(a).y))\n"
"#define conjTransp(a) ((float2)(-(a).y, (a).x))\n"
"#define conjTransp(a) ((float2)(-(a).y, (a).x))\n"
"\n"
"#define fftKernel2(a,dir) \\\n"
"{ \\\n"
......@@ -67,14 +67,14 @@ static string baseKernels = string(
" (a)[0] = c + (a)[1]; \\\n"
" (a)[1] = c - (a)[1]; \\\n"
"}\n"
"\n"
"\n"
"#define fftKernel2S(d1,d2,dir) \\\n"
"{ \\\n"
" float2 c = (d1); \\\n"
" (d1) = c + (d2); \\\n"
" (d2) = c - (d2); \\\n"
"}\n"
"\n"
"\n"
"#define fftKernel4(a,dir) \\\n"
"{ \\\n"
" fftKernel2S((a)[0], (a)[2], dir); \\\n"
......@@ -86,7 +86,7 @@ static string baseKernels = string(
" (a)[1] = (a)[2]; \\\n"
" (a)[2] = c; \\\n"
"}\n"
"\n"
"\n"
"#define fftKernel4s(a0,a1,a2,a3,dir) \\\n"
"{ \\\n"
" fftKernel2S((a0), (a2), dir); \\\n"
......@@ -96,9 +96,9 @@ static string baseKernels = string(
" fftKernel2S((a2), (a3), dir); \\\n"
" float2 c = (a1); \\\n"
" (a1) = (a2); \\\n"
" (a2) = c; \\\n"
" (a2) = c; \\\n"
"}\n"
"\n"
"\n"
"#define bitreverse8(a) \\\n"
"{ \\\n"
" float2 c; \\\n"
......@@ -109,7 +109,7 @@ static string baseKernels = string(
" (a)[3] = (a)[6]; \\\n"
" (a)[6] = c; \\\n"
"}\n"
"\n"
"\n"
"#define fftKernel8(a,dir) \\\n"
"{ \\\n"
" const float2 w1 = (float2)(0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f); \\\n"
......@@ -134,7 +134,7 @@ static string baseKernels = string(
" fftKernel2S((a)[6], (a)[7], dir); \\\n"
" bitreverse8((a)); \\\n"
"}\n"
"\n"
"\n"
"#define bitreverse4x4(a) \\\n"
"{ \\\n"
" float2 c; \\\n"
......@@ -145,7 +145,7 @@ static string baseKernels = string(
" c = (a)[7]; (a)[7] = (a)[13]; (a)[13] = c; \\\n"
" c = (a)[11]; (a)[11] = (a)[14]; (a)[14] = c; \\\n"
"}\n"
"\n"
"\n"
"#define fftKernel16(a,dir) \\\n"
"{ \\\n"
" const float w0 = 0x1.d906bcp-1f; \\\n"
......@@ -170,7 +170,7 @@ static string baseKernels = string(
" fftKernel4((a) + 12, dir); \\\n"
" bitreverse4x4((a)); \\\n"
"}\n"
"\n"
"\n"
"#define bitreverse32(a) \\\n"
"{ \\\n"
" float2 c1, c2; \\\n"
......@@ -181,7 +181,7 @@ static string baseKernels = string(
" c1 = (a)[22]; (a)[22] = (a)[11]; c2 = (a)[13]; (a)[13] = c1; c1 = (a)[26]; (a)[26] = c2; c2 = (a)[21]; (a)[21] = c1; (a)[11] = c2; \\\n"
" c1 = (a)[30]; (a)[30] = (a)[15]; c2 = (a)[29]; (a)[29] = c1; c1 = (a)[27]; (a)[27] = c2; c2 = (a)[23]; (a)[23] = c1; (a)[15] = c2; \\\n"
"}\n"
"\n"
"\n"
"#define fftKernel32(a,dir) \\\n"
"{ \\\n"
" fftKernel2S((a)[0], (a)[16], dir); \\\n"
......@@ -270,7 +270,7 @@ static string twistKernelPlannar = string(
" } \\\n"
" } \\\n"
"} \\\n"
);
);
......
......@@ -59,17 +59,17 @@ static cl_int
allocateTemporaryBufferInterleaved(cl_fft_plan *plan, cl_uint batchSize)
{
cl_int err = CL_SUCCESS;
if(plan->temp_buffer_needed && plan->last_batch_size != batchSize)
if(plan->temp_buffer_needed && plan->last_batch_size != batchSize)
{
plan->last_batch_size = batchSize;
plan->last_batch_size = batchSize;
size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * 2 * sizeof(cl_float);
if(plan->tempmemobj)
clReleaseMemObject(plan->tempmemobj);
plan->tempmemobj = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err);
}
return err;
return err;
}
static cl_int
......@@ -77,21 +77,21 @@ allocateTemporaryBufferPlannar(cl_fft_plan *plan, cl_uint batchSize)
{
cl_int err = CL_SUCCESS;
cl_int terr;
if(plan->temp_buffer_needed && plan->last_batch_size != batchSize)
if(plan->temp_buffer_needed && plan->last_batch_size != batchSize)
{
plan->last_batch_size = batchSize;
plan->last_batch_size = batchSize;
size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * sizeof(cl_float);
if(plan->tempmemobj_real)
clReleaseMemObject(plan->tempmemobj_real);
if(plan->tempmemobj_imag)
clReleaseMemObject(plan->tempmemobj_imag);
clReleaseMemObject(plan->tempmemobj_imag);
plan->tempmemobj_real = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err);
plan->tempmemobj_imag = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &terr);
err |= terr;
}
}
return err;
}
......@@ -101,7 +101,7 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in
*lWorkItems = kernelInfo->num_workitems_per_workgroup;
int numWorkGroups = kernelInfo->num_workgroups;
int numXFormsPerWG = kernelInfo->num_xforms_per_workgroup;
switch(kernelInfo->dir)
{
case cl_fft_kernel_x:
......@@ -117,45 +117,45 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in
numWorkGroups *= *batchSize;
break;
}
*gWorkItems = numWorkGroups * *lWorkItems;
}
cl_int
clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in, cl_mem data_out,
cl_int
clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in, cl_mem data_out,
cl_int num_events, cl_event *event_list, cl_event *event )
{
{
int s;
cl_fft_plan *plan = (cl_fft_plan *) Plan;
if(plan->format != clFFT_InterleavedComplexFormat)
return CL_INVALID_VALUE;
cl_int err;
size_t gWorkItems, lWorkItems;
int inPlaceDone = -1;
cl_int isInPlace = data_in == data_out ? 1 : 0;
if((err = allocateTemporaryBufferInterleaved(plan, batchSize)) != CL_SUCCESS)
return err;
return err;
cl_mem memObj[3];
memObj[0] = data_in;
memObj[1] = data_out;
memObj[2] = plan->tempmemobj;
cl_fft_kernel_info *kernelInfo = plan->kernel_info;
int numKernels = plan->num_kernels;
int numKernelsOdd = numKernels & 1;
int currRead = 0;
int currWrite = 1;
// at least one external dram shuffle (transpose) required
if(plan->temp_buffer_needed)
if(plan->temp_buffer_needed)
{
// in-place transform
if(isInPlace)
if(isInPlace)
{
inPlaceDone = 0;
currRead = 1;
......@@ -165,36 +165,36 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS
{
currWrite = (numKernels & 1) ? 1 : 2;
}
while(kernelInfo)
while(kernelInfo)
{
if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible)
if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible)
{
currWrite = currRead;
inPlaceDone = 1;
}
s = batchSize;
getKernelWorkDimensions(plan, kernelInfo, &s, &gWorkItems, &lWorkItems);
err |= clSetKernelArg(kernelInfo->kernel, 0, sizeof(cl_mem), &memObj[currRead]);
err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]);
err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s);
err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, 0, NULL, NULL);
if(err)
return err;
currRead = (currWrite == 1) ? 1 : 2;
currWrite = (currWrite == 1) ? 2 : 1;
currWrite = (currWrite == 1) ? 2 : 1;
kernelInfo = kernelInfo->next;
}
}
}
// no dram shuffle (transpose required) transform
// all kernels can execute in-place.
else {
while(kernelInfo)
{
s = batchSize;
......@@ -203,41 +203,41 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS
err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]);
err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s);
err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, 0, NULL, NULL);
if(err)
return err;
return err;
currRead = 1;
currWrite = 1;
kernelInfo = kernelInfo->next;
}
}
return err;
}
cl_int
clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
cl_int
clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
cl_int num_events, cl_event *event_list, cl_event *event)
{
{
int s;
cl_fft_plan *plan = (cl_fft_plan *) Plan;
if(plan->format != clFFT_SplitComplexFormat)
return CL_INVALID_VALUE;
cl_int err;
size_t gWorkItems, lWorkItems;
int inPlaceDone = -1;
cl_int isInPlace = ((data_in_real == data_out_real) && (data_in_imag == data_out_imag)) ? 1 : 0;
if((err = allocateTemporaryBufferPlannar(plan, batchSize)) != CL_SUCCESS)
return err;
return err;
cl_mem memObj_real[3];
cl_mem memObj_imag[3];
memObj_real[0] = data_in_real;
......@@ -246,19 +246,19 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
memObj_imag[0] = data_in_imag;
memObj_imag[1] = data_out_imag;
memObj_imag[2] = plan->tempmemobj_imag;
cl_fft_kernel_info *kernelInfo = plan->kernel_info;
int numKernels = plan->num_kernels;
int numKernelsOdd = numKernels & 1;
int currRead = 0;
int currWrite = 1;
// at least one external dram shuffle (transpose) required
if(plan->temp_buffer_needed)
if(plan->temp_buffer_needed)
{
// in-place transform
if(isInPlace)
if(isInPlace)
{
inPlaceDone = 0;
currRead = 1;
......@@ -268,15 +268,15 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
{
currWrite = (numKernels & 1) ? 1 : 2;
}
while(kernelInfo)
while(kernelInfo)
{
if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible)
if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible)
{
currWrite = currRead;
inPlaceDone = 1;
}
s = batchSize;
getKernelWorkDimensions(plan, kernelInfo, &s, &gWorkItems, &lWorkItems);
err |= clSetKernelArg(kernelInfo->kernel, 0, sizeof(cl_mem), &memObj_real[currRead]);
......@@ -285,20 +285,20 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_mem), &memObj_imag[currWrite]);
err |= clSetKernelArg(kernelInfo->kernel, 4, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 5, sizeof(cl_int), &s);
err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, 0, NULL, NULL);
if(err)
return err;
return err;
currRead = (currWrite == 1) ? 1 : 2;
currWrite = (currWrite == 1) ? 2 : 1;
currWrite = (currWrite == 1) ? 2 : 1;
kernelInfo = kernelInfo->next;
}
}
}
// no dram shuffle (transpose required) transform
else {
while(kernelInfo)
{
s = batchSize;
......@@ -309,87 +309,87 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_mem), &memObj_imag[currWrite]);
err |= clSetKernelArg(kernelInfo->kernel, 4, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 5, sizeof(cl_int), &s);
err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, 0, NULL, NULL);
if(err)
return err;
return err;
currRead = 1;
currWrite = 1;
kernelInfo = kernelInfo->next;
}
}
return err;
}
cl_int
clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
cl_int
clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir)
{
cl_fft_plan *plan = (cl_fft_plan *) Plan;
unsigned int N = numRows*numCols;
unsigned int nCols = numCols;
unsigned int sRow = startRow;
unsigned int rToProcess = rowsToProcess;
int d = dir;
int err = 0;
cl_device_id device_id;
err = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device_id, NULL);
if(err)
return err;
size_t gSize;
err = clGetKernelWorkGroupInfo(plan->twist_kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &gSize, NULL);
if(err)
return err;
gSize = min(128, gSize);
size_t numGlobalThreads[1] = { max(numCols / gSize, 1)*gSize };
size_t numLocalThreads[1] = { gSize };
err |= clSetKernelArg(plan->twist_kernel, 0, sizeof(cl_mem), &array);
err |= clSetKernelArg(plan->twist_kernel, 1, sizeof(unsigned int), &sRow);
err |= clSetKernelArg(plan->twist_kernel, 2, sizeof(unsigned int), &nCols);
err |= clSetKernelArg(plan->twist_kernel, 3, sizeof(unsigned int), &N);
err |= clSetKernelArg(plan->twist_kernel, 4, sizeof(unsigned int), &rToProcess);
err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(int), &d);
err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL);
return err;
err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL);
return err;
}
cl_int
clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
cl_int
clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir)
{
cl_fft_plan *plan = (cl_fft_plan *) Plan;
unsigned int N = numRows*numCols;
unsigned int nCols = numCols;
unsigned int sRow = startRow;
unsigned int rToProcess = rowsToProcess;
int d = dir;
int err = 0;
cl_device_id device_id;
err = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device_id, NULL);
if(err)
return err;
size_t gSize;
err = clGetKernelWorkGroupInfo(plan->twist_kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &gSize, NULL);
if(err)
return err;
gSize = min(128, gSize);
size_t numGlobalThreads[1] = { max(numCols / gSize, 1)*gSize };
size_t numLocalThreads[1] = { gSize };
err |= clSetKernelArg(plan->twist_kernel, 0, sizeof(cl_mem), &array_real);
err |= clSetKernelArg(plan->twist_kernel, 1, sizeof(cl_mem), &array_imag);
err |= clSetKernelArg(plan->twist_kernel, 2, sizeof(unsigned int), &sRow);
......@@ -397,9 +397,9 @@ clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real,
err |= clSetKernelArg(plan->twist_kernel, 4, sizeof(unsigned int), &N);
err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(unsigned int), &rToProcess);
err |= clSetKernelArg(plan->twist_kernel, 6, sizeof(int), &d);
err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL);
return err;
err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL);
return err;
}
......@@ -76,83 +76,83 @@ typedef struct kernel_info_t
kernel_info_t *next;
}cl_fft_kernel_info;
typedef struct
typedef struct
{
// context in which fft resources are created and kernels are executed
cl_context context;
// size of signal
clFFT_Dim3 n;
// dimension of transform ... must be either 1D, 2D or 3D
clFFT_Dimension dim;
// data format ... must be either interleaved or plannar
clFFT_DataFormat format;
// string containing kernel source. Generated at runtime based on
// n, dim, format and other parameters
string *kernel_string;
// CL program containing source and kernel this particular
// CL program containing source and kernel this particular
// n, dim, data format
cl_program program;
// linked list of kernels which needs to be executed for this fft
cl_fft_kernel_info *kernel_info;
// number of kernels