Commit 05167ff1 authored by Oliver Bock's avatar Oliver Bock
Browse files

Detabbed sources

parent 7df18002
This diff is collapsed.
...@@ -63,44 +63,44 @@ extern "C" { ...@@ -63,44 +63,44 @@ extern "C" {
// XForm type // XForm type
typedef enum typedef enum
{ {
clFFT_Forward = -1, clFFT_Forward = -1,
clFFT_Inverse = 1 clFFT_Inverse = 1
}clFFT_Direction; }clFFT_Direction;
// XForm dimension // XForm dimension
typedef enum typedef enum
{ {
clFFT_1D = 0, clFFT_1D = 0,
clFFT_2D = 1, clFFT_2D = 1,
clFFT_3D = 3 clFFT_3D = 3
}clFFT_Dimension; }clFFT_Dimension;
// XForm Data type // XForm Data type
typedef enum typedef enum
{ {
clFFT_SplitComplexFormat = 0, clFFT_SplitComplexFormat = 0,
clFFT_InterleavedComplexFormat = 1 clFFT_InterleavedComplexFormat = 1
}clFFT_DataFormat; }clFFT_DataFormat;
typedef struct typedef struct
{ {
unsigned int x; unsigned int x;
unsigned int y; unsigned int y;
unsigned int z; unsigned int z;
}clFFT_Dim3; }clFFT_Dim3;
typedef struct typedef struct
{ {
float *real; float *real;
float *imag; float *imag;
} clFFT_SplitComplex; } clFFT_SplitComplex;
typedef struct typedef struct
{ {
float real; float real;
float imag; float imag;
}clFFT_Complex; }clFFT_Complex;
typedef void* clFFT_Plan; typedef void* clFFT_Plan;
...@@ -110,19 +110,19 @@ clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension d ...@@ -110,19 +110,19 @@ clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension d
void clFFT_DestroyPlan( clFFT_Plan plan ); void clFFT_DestroyPlan( clFFT_Plan plan );
cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in, cl_mem data_out, cl_mem data_in, cl_mem data_out,
cl_int num_events, cl_event *event_list, cl_event *event ); cl_int num_events, cl_event *event_list, cl_event *event );
cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
cl_int num_events, cl_event *event_list, cl_event *event ); cl_int num_events, cl_event *event_list, cl_event *event );
cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir); size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag, cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir); size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
void clFFT_DumpPlan( clFFT_Plan plan, FILE *file); void clFFT_DumpPlan( clFFT_Plan plan, FILE *file);
......
This diff is collapsed.
This diff is collapsed.
...@@ -58,104 +58,104 @@ using namespace std; ...@@ -58,104 +58,104 @@ using namespace std;
typedef enum kernel_dir_t typedef enum kernel_dir_t
{ {
cl_fft_kernel_x, cl_fft_kernel_x,
cl_fft_kernel_y, cl_fft_kernel_y,
cl_fft_kernel_z cl_fft_kernel_z
}cl_fft_kernel_dir; }cl_fft_kernel_dir;
typedef struct kernel_info_t typedef struct kernel_info_t
{ {
cl_kernel kernel; cl_kernel kernel;
char *kernel_name; char *kernel_name;
size_t lmem_size; size_t lmem_size;
size_t num_workgroups; size_t num_workgroups;
size_t num_xforms_per_workgroup; size_t num_xforms_per_workgroup;
size_t num_workitems_per_workgroup; size_t num_workitems_per_workgroup;
cl_fft_kernel_dir dir; cl_fft_kernel_dir dir;
int in_place_possible; int in_place_possible;
kernel_info_t *next; kernel_info_t *next;
}cl_fft_kernel_info; }cl_fft_kernel_info;
typedef struct typedef struct
{ {
// context in which fft resources are created and kernels are executed // context in which fft resources are created and kernels are executed
cl_context context; cl_context context;
// size of signal // size of signal
clFFT_Dim3 n; clFFT_Dim3 n;
// dimension of transform ... must be either 1D, 2D or 3D // dimension of transform ... must be either 1D, 2D or 3D
clFFT_Dimension dim; clFFT_Dimension dim;
// data format ... must be either interleaved or plannar // data format ... must be either interleaved or plannar
clFFT_DataFormat format; clFFT_DataFormat format;
// string containing kernel source. Generated at runtime based on // string containing kernel source. Generated at runtime based on
// n, dim, format and other parameters // n, dim, format and other parameters
string *kernel_string; string *kernel_string;
// CL program containing source and kernel this particular // CL program containing source and kernel this particular
// n, dim, data format // n, dim, data format
cl_program program; cl_program program;
// linked list of kernels which needs to be executed for this fft // linked list of kernels which needs to be executed for this fft
cl_fft_kernel_info *kernel_info; cl_fft_kernel_info *kernel_info;
// number of kernels // number of kernels
int num_kernels; int num_kernels;
// twist kernel for virtualizing fft of very large sizes that do not // twist kernel for virtualizing fft of very large sizes that do not
// fit in GPU global memory // fit in GPU global memory
cl_kernel twist_kernel; cl_kernel twist_kernel;
// flag indicating if temporary intermediate buffer is needed or not. // flag indicating if temporary intermediate buffer is needed or not.
// this depends on fft kernels being executed and if transform is // this depends on fft kernels being executed and if transform is
// in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ... // in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
// one that does not require global transpose do not need temporary buffer) // one that does not require global transpose do not need temporary buffer)
// 2D 1024x1024 out-of-place fft however do require intermediate buffer. // 2D 1024x1024 out-of-place fft however do require intermediate buffer.
// If temp buffer is needed, its allocation is lazy i.e. its not allocated // If temp buffer is needed, its allocation is lazy i.e. its not allocated
// until its needed // until its needed
cl_int temp_buffer_needed; cl_int temp_buffer_needed;
// Batch size is runtime parameter and size of temporary buffer (if needed) // Batch size is runtime parameter and size of temporary buffer (if needed)
// depends on batch size. Allocation of temporary buffer is lazy i.e. its // depends on batch size. Allocation of temporary buffer is lazy i.e. its
// only created when needed. Once its created at first call of clFFT_Executexxx // only created when needed. Once its created at first call of clFFT_Executexxx
// it is not allocated next time if next time clFFT_Executexxx is called with // it is not allocated next time if next time clFFT_Executexxx is called with
// batch size different than the first call. last_batch_size caches the last // batch size different than the first call. last_batch_size caches the last
// batch size with which this plan is used so that we dont keep allocating/deallocating // batch size with which this plan is used so that we dont keep allocating/deallocating
// temp buffer if same batch size is used again and again. // temp buffer if same batch size is used again and again.
size_t last_batch_size; size_t last_batch_size;
// temporary buffer for interleaved plan // temporary buffer for interleaved plan
cl_mem tempmemobj; cl_mem tempmemobj;
// temporary buffer for planner plan. Only one of tempmemobj or // temporary buffer for planner plan. Only one of tempmemobj or
// (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending // (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
// data format of plan (plannar or interleaved) // data format of plan (plannar or interleaved)
cl_mem tempmemobj_real, tempmemobj_imag; cl_mem tempmemobj_real, tempmemobj_imag;
// Maximum size of signal for which local memory transposed based // Maximum size of signal for which local memory transposed based
// fft is sufficient i.e. no global mem transpose (communication) // fft is sufficient i.e. no global mem transpose (communication)
// is needed // is needed
size_t max_localmem_fft_size; size_t max_localmem_fft_size;
// Maximum work items per work group allowed. This, along with max_radix below controls // Maximum work items per work group allowed. This, along with max_radix below controls
// maximum local memory being used by fft kernels of this plan. Set to 256 by default // maximum local memory being used by fft kernels of this plan. Set to 256 by default
size_t max_work_item_per_workgroup; size_t max_work_item_per_workgroup;
// Maximum base radix for local memory fft ... this controls the maximum register // Maximum base radix for local memory fft ... this controls the maximum register
// space used by work items. Currently defaults to 16 // space used by work items. Currently defaults to 16
size_t max_radix; size_t max_radix;
// Device depended parameter that tells how many work-items need to be read consecutive // Device depended parameter that tells how many work-items need to be read consecutive
// values to make sure global memory access by work-items of a work-group result in // values to make sure global memory access by work-items of a work-group result in
// coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16 // coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
size_t min_mem_coalesce_width; size_t min_mem_coalesce_width;
// Number of local memory banks. This is used to geneate kernel with local memory // Number of local memory banks. This is used to geneate kernel with local memory
// transposes with appropriate padding to avoid bank conflicts to local memory // transposes with appropriate padding to avoid bank conflicts to local memory
// e.g. on NVidia it is 16. // e.g. on NVidia it is 16.
size_t num_local_mem_banks; size_t num_local_mem_banks;
}cl_fft_plan; }cl_fft_plan;
void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir); void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir);
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment