Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
einsteinathome
libclfft
Commits
05167ff1
Commit
05167ff1
authored
Mar 18, 2011
by
Oliver Bock
Browse files
Detabbed sources
parent
7df18002
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
example/main.cpp
View file @
05167ff1
This diff is collapsed.
Click to expand it.
include/clFFT.h
View file @
05167ff1
...
@@ -63,44 +63,44 @@ extern "C" {
...
@@ -63,44 +63,44 @@ extern "C" {
// XForm type
// XForm type
typedef
enum
typedef
enum
{
{
clFFT_Forward
=
-
1
,
clFFT_Forward
=
-
1
,
clFFT_Inverse
=
1
clFFT_Inverse
=
1
}
clFFT_Direction
;
}
clFFT_Direction
;
// XForm dimension
// XForm dimension
typedef
enum
typedef
enum
{
{
clFFT_1D
=
0
,
clFFT_1D
=
0
,
clFFT_2D
=
1
,
clFFT_2D
=
1
,
clFFT_3D
=
3
clFFT_3D
=
3
}
clFFT_Dimension
;
}
clFFT_Dimension
;
// XForm Data type
// XForm Data type
typedef
enum
typedef
enum
{
{
clFFT_SplitComplexFormat
=
0
,
clFFT_SplitComplexFormat
=
0
,
clFFT_InterleavedComplexFormat
=
1
clFFT_InterleavedComplexFormat
=
1
}
clFFT_DataFormat
;
}
clFFT_DataFormat
;
typedef
struct
typedef
struct
{
{
unsigned
int
x
;
unsigned
int
x
;
unsigned
int
y
;
unsigned
int
y
;
unsigned
int
z
;
unsigned
int
z
;
}
clFFT_Dim3
;
}
clFFT_Dim3
;
typedef
struct
typedef
struct
{
{
float
*
real
;
float
*
real
;
float
*
imag
;
float
*
imag
;
}
clFFT_SplitComplex
;
}
clFFT_SplitComplex
;
typedef
struct
typedef
struct
{
{
float
real
;
float
real
;
float
imag
;
float
imag
;
}
clFFT_Complex
;
}
clFFT_Complex
;
typedef
void
*
clFFT_Plan
;
typedef
void
*
clFFT_Plan
;
...
@@ -110,19 +110,19 @@ clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension d
...
@@ -110,19 +110,19 @@ clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension d
void
clFFT_DestroyPlan
(
clFFT_Plan
plan
);
void
clFFT_DestroyPlan
(
clFFT_Plan
plan
);
cl_int
clFFT_ExecuteInterleaved
(
cl_command_queue
queue
,
clFFT_Plan
plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_int
clFFT_ExecuteInterleaved
(
cl_command_queue
queue
,
clFFT_Plan
plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_mem
data_in
,
cl_mem
data_out
,
cl_mem
data_in
,
cl_mem
data_out
,
cl_int
num_events
,
cl_event
*
event_list
,
cl_event
*
event
);
cl_int
num_events
,
cl_event
*
event_list
,
cl_event
*
event
);
cl_int
clFFT_ExecutePlannar
(
cl_command_queue
queue
,
clFFT_Plan
plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_int
clFFT_ExecutePlannar
(
cl_command_queue
queue
,
clFFT_Plan
plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_mem
data_in_real
,
cl_mem
data_in_imag
,
cl_mem
data_out_real
,
cl_mem
data_out_imag
,
cl_mem
data_in_real
,
cl_mem
data_in_imag
,
cl_mem
data_out_real
,
cl_mem
data_out_imag
,
cl_int
num_events
,
cl_event
*
event_list
,
cl_event
*
event
);
cl_int
num_events
,
cl_event
*
event_list
,
cl_event
*
event
);
cl_int
clFFT_1DTwistInterleaved
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array
,
cl_int
clFFT_1DTwistInterleaved
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array
,
size_t
numRows
,
size_t
numCols
,
size_t
startRow
,
size_t
rowsToProcess
,
clFFT_Direction
dir
);
size_t
numRows
,
size_t
numCols
,
size_t
startRow
,
size_t
rowsToProcess
,
clFFT_Direction
dir
);
cl_int
clFFT_1DTwistPlannar
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array_real
,
cl_mem
array_imag
,
cl_int
clFFT_1DTwistPlannar
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array_real
,
cl_mem
array_imag
,
size_t
numRows
,
size_t
numCols
,
size_t
startRow
,
size_t
rowsToProcess
,
clFFT_Direction
dir
);
size_t
numRows
,
size_t
numCols
,
size_t
startRow
,
size_t
rowsToProcess
,
clFFT_Direction
dir
);
void
clFFT_DumpPlan
(
clFFT_Plan
plan
,
FILE
*
file
);
void
clFFT_DumpPlan
(
clFFT_Plan
plan
,
FILE
*
file
);
...
...
src/fft_base_kernels.h
View file @
05167ff1
This diff is collapsed.
Click to expand it.
src/fft_execute.cpp
View file @
05167ff1
This diff is collapsed.
Click to expand it.
src/fft_internal.h
View file @
05167ff1
...
@@ -58,104 +58,104 @@ using namespace std;
...
@@ -58,104 +58,104 @@ using namespace std;
typedef
enum
kernel_dir_t
typedef
enum
kernel_dir_t
{
{
cl_fft_kernel_x
,
cl_fft_kernel_x
,
cl_fft_kernel_y
,
cl_fft_kernel_y
,
cl_fft_kernel_z
cl_fft_kernel_z
}
cl_fft_kernel_dir
;
}
cl_fft_kernel_dir
;
typedef
struct
kernel_info_t
typedef
struct
kernel_info_t
{
{
cl_kernel
kernel
;
cl_kernel
kernel
;
char
*
kernel_name
;
char
*
kernel_name
;
size_t
lmem_size
;
size_t
lmem_size
;
size_t
num_workgroups
;
size_t
num_workgroups
;
size_t
num_xforms_per_workgroup
;
size_t
num_xforms_per_workgroup
;
size_t
num_workitems_per_workgroup
;
size_t
num_workitems_per_workgroup
;
cl_fft_kernel_dir
dir
;
cl_fft_kernel_dir
dir
;
int
in_place_possible
;
int
in_place_possible
;
kernel_info_t
*
next
;
kernel_info_t
*
next
;
}
cl_fft_kernel_info
;
}
cl_fft_kernel_info
;
typedef
struct
typedef
struct
{
{
// context in which fft resources are created and kernels are executed
// context in which fft resources are created and kernels are executed
cl_context
context
;
cl_context
context
;
// size of signal
// size of signal
clFFT_Dim3
n
;
clFFT_Dim3
n
;
// dimension of transform ... must be either 1D, 2D or 3D
// dimension of transform ... must be either 1D, 2D or 3D
clFFT_Dimension
dim
;
clFFT_Dimension
dim
;
// data format ... must be either interleaved or plannar
// data format ... must be either interleaved or plannar
clFFT_DataFormat
format
;
clFFT_DataFormat
format
;
// string containing kernel source. Generated at runtime based on
// string containing kernel source. Generated at runtime based on
// n, dim, format and other parameters
// n, dim, format and other parameters
string
*
kernel_string
;
string
*
kernel_string
;
// CL program containing source and kernel this particular
// CL program containing source and kernel this particular
// n, dim, data format
// n, dim, data format
cl_program
program
;
cl_program
program
;
// linked list of kernels which needs to be executed for this fft
// linked list of kernels which needs to be executed for this fft
cl_fft_kernel_info
*
kernel_info
;
cl_fft_kernel_info
*
kernel_info
;
// number of kernels
// number of kernels
int
num_kernels
;
int
num_kernels
;
// twist kernel for virtualizing fft of very large sizes that do not
// twist kernel for virtualizing fft of very large sizes that do not
// fit in GPU global memory
// fit in GPU global memory
cl_kernel
twist_kernel
;
cl_kernel
twist_kernel
;
// flag indicating if temporary intermediate buffer is needed or not.
// flag indicating if temporary intermediate buffer is needed or not.
// this depends on fft kernels being executed and if transform is
// this depends on fft kernels being executed and if transform is
// in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
// in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
// one that does not require global transpose do not need temporary buffer)
// one that does not require global transpose do not need temporary buffer)
// 2D 1024x1024 out-of-place fft however do require intermediate buffer.
// 2D 1024x1024 out-of-place fft however do require intermediate buffer.
// If temp buffer is needed, its allocation is lazy i.e. its not allocated
// If temp buffer is needed, its allocation is lazy i.e. its not allocated
// until its needed
// until its needed
cl_int
temp_buffer_needed
;
cl_int
temp_buffer_needed
;
// Batch size is runtime parameter and size of temporary buffer (if needed)
// Batch size is runtime parameter and size of temporary buffer (if needed)
// depends on batch size. Allocation of temporary buffer is lazy i.e. its
// depends on batch size. Allocation of temporary buffer is lazy i.e. its
// only created when needed. Once its created at first call of clFFT_Executexxx
// only created when needed. Once its created at first call of clFFT_Executexxx
// it is not allocated next time if next time clFFT_Executexxx is called with
// it is not allocated next time if next time clFFT_Executexxx is called with
// batch size different than the first call. last_batch_size caches the last
// batch size different than the first call. last_batch_size caches the last
// batch size with which this plan is used so that we dont keep allocating/deallocating
// batch size with which this plan is used so that we dont keep allocating/deallocating
// temp buffer if same batch size is used again and again.
// temp buffer if same batch size is used again and again.
size_t
last_batch_size
;
size_t
last_batch_size
;
// temporary buffer for interleaved plan
// temporary buffer for interleaved plan
cl_mem
tempmemobj
;
cl_mem
tempmemobj
;
// temporary buffer for planner plan. Only one of tempmemobj or
// temporary buffer for planner plan. Only one of tempmemobj or
// (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
// (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
// data format of plan (plannar or interleaved)
// data format of plan (plannar or interleaved)
cl_mem
tempmemobj_real
,
tempmemobj_imag
;
cl_mem
tempmemobj_real
,
tempmemobj_imag
;
// Maximum size of signal for which local memory transposed based
// Maximum size of signal for which local memory transposed based
// fft is sufficient i.e. no global mem transpose (communication)
// fft is sufficient i.e. no global mem transpose (communication)
// is needed
// is needed
size_t
max_localmem_fft_size
;
size_t
max_localmem_fft_size
;
// Maximum work items per work group allowed. This, along with max_radix below controls
// Maximum work items per work group allowed. This, along with max_radix below controls
// maximum local memory being used by fft kernels of this plan. Set to 256 by default
// maximum local memory being used by fft kernels of this plan. Set to 256 by default
size_t
max_work_item_per_workgroup
;
size_t
max_work_item_per_workgroup
;
// Maximum base radix for local memory fft ... this controls the maximum register
// Maximum base radix for local memory fft ... this controls the maximum register
// space used by work items. Currently defaults to 16
// space used by work items. Currently defaults to 16
size_t
max_radix
;
size_t
max_radix
;
// Device depended parameter that tells how many work-items need to be read consecutive
// Device depended parameter that tells how many work-items need to be read consecutive
// values to make sure global memory access by work-items of a work-group result in
// values to make sure global memory access by work-items of a work-group result in
// coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
// coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
size_t
min_mem_coalesce_width
;
size_t
min_mem_coalesce_width
;
// Number of local memory banks. This is used to geneate kernel with local memory
// Number of local memory banks. This is used to geneate kernel with local memory
// transposes with appropriate padding to avoid bank conflicts to local memory
// transposes with appropriate padding to avoid bank conflicts to local memory
// e.g. on NVidia it is 16.
// e.g. on NVidia it is 16.
size_t
num_local_mem_banks
;
size_t
num_local_mem_banks
;
}
cl_fft_plan
;
}
cl_fft_plan
;
void
FFT1D
(
cl_fft_plan
*
plan
,
cl_fft_kernel_dir
dir
);
void
FFT1D
(
cl_fft_plan
*
plan
,
cl_fft_kernel_dir
dir
);
...
...
src/fft_kernelstring.cpp
View file @
05167ff1
This diff is collapsed.
Click to expand it.
src/fft_setup.cpp
View file @
05167ff1
This diff is collapsed.
Click to expand it.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment