Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
einsteinathome
libclfft
Commits
7df18002
Commit
7df18002
authored
Mar 18, 2011
by
Oliver Bock
Browse files
Removing trailing whitespaces
parent
bc1bc3e9
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
example/main.cpp
View file @
7df18002
This diff is collapsed.
Click to expand it.
include/clFFT.h
View file @
7df18002
...
...
@@ -61,11 +61,11 @@ extern "C" {
#endif
// XForm type
typedef
enum
typedef
enum
{
clFFT_Forward
=
-
1
,
clFFT_Inverse
=
1
}
clFFT_Direction
;
// XForm dimension
...
...
@@ -74,7 +74,7 @@ typedef enum
clFFT_1D
=
0
,
clFFT_2D
=
1
,
clFFT_3D
=
3
}
clFFT_Dimension
;
// XForm Data type
...
...
@@ -89,8 +89,8 @@ typedef struct
unsigned
int
x
;
unsigned
int
y
;
unsigned
int
z
;
}
clFFT_Dim3
;
}
clFFT_Dim3
;
typedef
struct
{
float
*
real
;
...
...
@@ -103,31 +103,31 @@ typedef struct
float
imag
;
}
clFFT_Complex
;
typedef
void
*
clFFT_Plan
;
typedef
void
*
clFFT_Plan
;
clFFT_Plan
clFFT_CreatePlan
(
cl_context
context
,
clFFT_Dim3
n
,
clFFT_Dimension
dim
,
clFFT_DataFormat
dataFormat
,
cl_int
*
error_code
);
void
clFFT_DestroyPlan
(
clFFT_Plan
plan
);
cl_int
clFFT_ExecuteInterleaved
(
cl_command_queue
queue
,
clFFT_Plan
plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_int
clFFT_ExecuteInterleaved
(
cl_command_queue
queue
,
clFFT_Plan
plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_mem
data_in
,
cl_mem
data_out
,
cl_int
num_events
,
cl_event
*
event_list
,
cl_event
*
event
);
cl_int
clFFT_ExecutePlannar
(
cl_command_queue
queue
,
clFFT_Plan
plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_int
clFFT_ExecutePlannar
(
cl_command_queue
queue
,
clFFT_Plan
plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_mem
data_in_real
,
cl_mem
data_in_imag
,
cl_mem
data_out_real
,
cl_mem
data_out_imag
,
cl_int
num_events
,
cl_event
*
event_list
,
cl_event
*
event
);
cl_int
clFFT_1DTwistInterleaved
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array
,
cl_int
clFFT_1DTwistInterleaved
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array
,
size_t
numRows
,
size_t
numCols
,
size_t
startRow
,
size_t
rowsToProcess
,
clFFT_Direction
dir
);
cl_int
clFFT_1DTwistPlannar
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array_real
,
cl_mem
array_imag
,
cl_int
clFFT_1DTwistPlannar
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array_real
,
cl_mem
array_imag
,
size_t
numRows
,
size_t
numCols
,
size_t
startRow
,
size_t
rowsToProcess
,
clFFT_Direction
dir
);
void
clFFT_DumpPlan
(
clFFT_Plan
plan
,
FILE
*
file
);
void
clFFT_DumpPlan
(
clFFT_Plan
plan
,
FILE
*
file
);
#ifdef __cplusplus
}
#endif
#endif
#endif
src/fft_base_kernels.h
View file @
7df18002
...
...
@@ -59,7 +59,7 @@ static string baseKernels = string(
"#endif
\n
"
"#define complexMul(a,b) ((float2)(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y)))
\n
"
"#define conj(a) ((float2)((a).x, -(a).y))
\n
"
"#define conjTransp(a) ((float2)(-(a).y, (a).x))
\n
"
"#define conjTransp(a) ((float2)(-(a).y, (a).x))
\n
"
"
\n
"
"#define fftKernel2(a,dir)
\\\n
"
"{
\\\n
"
...
...
@@ -67,14 +67,14 @@ static string baseKernels = string(
" (a)[0] = c + (a)[1];
\\\n
"
" (a)[1] = c - (a)[1];
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define fftKernel2S(d1,d2,dir)
\\\n
"
"{
\\\n
"
" float2 c = (d1);
\\\n
"
" (d1) = c + (d2);
\\\n
"
" (d2) = c - (d2);
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define fftKernel4(a,dir)
\\\n
"
"{
\\\n
"
" fftKernel2S((a)[0], (a)[2], dir);
\\\n
"
...
...
@@ -86,7 +86,7 @@ static string baseKernels = string(
" (a)[1] = (a)[2];
\\\n
"
" (a)[2] = c;
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define fftKernel4s(a0,a1,a2,a3,dir)
\\\n
"
"{
\\\n
"
" fftKernel2S((a0), (a2), dir);
\\\n
"
...
...
@@ -96,9 +96,9 @@ static string baseKernels = string(
" fftKernel2S((a2), (a3), dir);
\\\n
"
" float2 c = (a1);
\\\n
"
" (a1) = (a2);
\\\n
"
" (a2) = c;
\\\n
"
" (a2) = c;
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define bitreverse8(a)
\\\n
"
"{
\\\n
"
" float2 c;
\\\n
"
...
...
@@ -109,7 +109,7 @@ static string baseKernels = string(
" (a)[3] = (a)[6];
\\\n
"
" (a)[6] = c;
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define fftKernel8(a,dir)
\\\n
"
"{
\\\n
"
" const float2 w1 = (float2)(0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f);
\\\n
"
...
...
@@ -134,7 +134,7 @@ static string baseKernels = string(
" fftKernel2S((a)[6], (a)[7], dir);
\\\n
"
" bitreverse8((a));
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define bitreverse4x4(a)
\\\n
"
"{
\\\n
"
" float2 c;
\\\n
"
...
...
@@ -145,7 +145,7 @@ static string baseKernels = string(
" c = (a)[7]; (a)[7] = (a)[13]; (a)[13] = c;
\\\n
"
" c = (a)[11]; (a)[11] = (a)[14]; (a)[14] = c;
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define fftKernel16(a,dir)
\\\n
"
"{
\\\n
"
" const float w0 = 0x1.d906bcp-1f;
\\\n
"
...
...
@@ -170,7 +170,7 @@ static string baseKernels = string(
" fftKernel4((a) + 12, dir);
\\\n
"
" bitreverse4x4((a));
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define bitreverse32(a)
\\\n
"
"{
\\\n
"
" float2 c1, c2;
\\\n
"
...
...
@@ -181,7 +181,7 @@ static string baseKernels = string(
" c1 = (a)[22]; (a)[22] = (a)[11]; c2 = (a)[13]; (a)[13] = c1; c1 = (a)[26]; (a)[26] = c2; c2 = (a)[21]; (a)[21] = c1; (a)[11] = c2;
\\\n
"
" c1 = (a)[30]; (a)[30] = (a)[15]; c2 = (a)[29]; (a)[29] = c1; c1 = (a)[27]; (a)[27] = c2; c2 = (a)[23]; (a)[23] = c1; (a)[15] = c2;
\\\n
"
"}
\n
"
"
\n
"
"
\n
"
"#define fftKernel32(a,dir)
\\\n
"
"{
\\\n
"
" fftKernel2S((a)[0], (a)[16], dir);
\\\n
"
...
...
@@ -270,7 +270,7 @@ static string twistKernelPlannar = string(
" }
\\\n
"
" }
\\\n
"
"}
\\\n
"
);
);
...
...
src/fft_execute.cpp
View file @
7df18002
...
...
@@ -59,17 +59,17 @@ static cl_int
allocateTemporaryBufferInterleaved
(
cl_fft_plan
*
plan
,
cl_uint
batchSize
)
{
cl_int
err
=
CL_SUCCESS
;
if
(
plan
->
temp_buffer_needed
&&
plan
->
last_batch_size
!=
batchSize
)
if
(
plan
->
temp_buffer_needed
&&
plan
->
last_batch_size
!=
batchSize
)
{
plan
->
last_batch_size
=
batchSize
;
plan
->
last_batch_size
=
batchSize
;
size_t
tmpLength
=
plan
->
n
.
x
*
plan
->
n
.
y
*
plan
->
n
.
z
*
batchSize
*
2
*
sizeof
(
cl_float
);
if
(
plan
->
tempmemobj
)
clReleaseMemObject
(
plan
->
tempmemobj
);
plan
->
tempmemobj
=
clCreateBuffer
(
plan
->
context
,
CL_MEM_READ_WRITE
,
tmpLength
,
NULL
,
&
err
);
}
return
err
;
return
err
;
}
static
cl_int
...
...
@@ -77,21 +77,21 @@ allocateTemporaryBufferPlannar(cl_fft_plan *plan, cl_uint batchSize)
{
cl_int
err
=
CL_SUCCESS
;
cl_int
terr
;
if
(
plan
->
temp_buffer_needed
&&
plan
->
last_batch_size
!=
batchSize
)
if
(
plan
->
temp_buffer_needed
&&
plan
->
last_batch_size
!=
batchSize
)
{
plan
->
last_batch_size
=
batchSize
;
plan
->
last_batch_size
=
batchSize
;
size_t
tmpLength
=
plan
->
n
.
x
*
plan
->
n
.
y
*
plan
->
n
.
z
*
batchSize
*
sizeof
(
cl_float
);
if
(
plan
->
tempmemobj_real
)
clReleaseMemObject
(
plan
->
tempmemobj_real
);
if
(
plan
->
tempmemobj_imag
)
clReleaseMemObject
(
plan
->
tempmemobj_imag
);
clReleaseMemObject
(
plan
->
tempmemobj_imag
);
plan
->
tempmemobj_real
=
clCreateBuffer
(
plan
->
context
,
CL_MEM_READ_WRITE
,
tmpLength
,
NULL
,
&
err
);
plan
->
tempmemobj_imag
=
clCreateBuffer
(
plan
->
context
,
CL_MEM_READ_WRITE
,
tmpLength
,
NULL
,
&
terr
);
err
|=
terr
;
}
}
return
err
;
}
...
...
@@ -101,7 +101,7 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in
*
lWorkItems
=
kernelInfo
->
num_workitems_per_workgroup
;
int
numWorkGroups
=
kernelInfo
->
num_workgroups
;
int
numXFormsPerWG
=
kernelInfo
->
num_xforms_per_workgroup
;
switch
(
kernelInfo
->
dir
)
{
case
cl_fft_kernel_x
:
...
...
@@ -117,45 +117,45 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in
numWorkGroups
*=
*
batchSize
;
break
;
}
*
gWorkItems
=
numWorkGroups
*
*
lWorkItems
;
}
cl_int
clFFT_ExecuteInterleaved
(
cl_command_queue
queue
,
clFFT_Plan
Plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_mem
data_in
,
cl_mem
data_out
,
cl_int
clFFT_ExecuteInterleaved
(
cl_command_queue
queue
,
clFFT_Plan
Plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_mem
data_in
,
cl_mem
data_out
,
cl_int
num_events
,
cl_event
*
event_list
,
cl_event
*
event
)
{
{
int
s
;
cl_fft_plan
*
plan
=
(
cl_fft_plan
*
)
Plan
;
if
(
plan
->
format
!=
clFFT_InterleavedComplexFormat
)
return
CL_INVALID_VALUE
;
cl_int
err
;
size_t
gWorkItems
,
lWorkItems
;
int
inPlaceDone
=
-
1
;
cl_int
isInPlace
=
data_in
==
data_out
?
1
:
0
;
if
((
err
=
allocateTemporaryBufferInterleaved
(
plan
,
batchSize
))
!=
CL_SUCCESS
)
return
err
;
return
err
;
cl_mem
memObj
[
3
];
memObj
[
0
]
=
data_in
;
memObj
[
1
]
=
data_out
;
memObj
[
2
]
=
plan
->
tempmemobj
;
cl_fft_kernel_info
*
kernelInfo
=
plan
->
kernel_info
;
int
numKernels
=
plan
->
num_kernels
;
int
numKernelsOdd
=
numKernels
&
1
;
int
currRead
=
0
;
int
currWrite
=
1
;
// at least one external dram shuffle (transpose) required
if
(
plan
->
temp_buffer_needed
)
if
(
plan
->
temp_buffer_needed
)
{
// in-place transform
if
(
isInPlace
)
if
(
isInPlace
)
{
inPlaceDone
=
0
;
currRead
=
1
;
...
...
@@ -165,36 +165,36 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS
{
currWrite
=
(
numKernels
&
1
)
?
1
:
2
;
}
while
(
kernelInfo
)
while
(
kernelInfo
)
{
if
(
isInPlace
&&
numKernelsOdd
&&
!
inPlaceDone
&&
kernelInfo
->
in_place_possible
)
if
(
isInPlace
&&
numKernelsOdd
&&
!
inPlaceDone
&&
kernelInfo
->
in_place_possible
)
{
currWrite
=
currRead
;
inPlaceDone
=
1
;
}
s
=
batchSize
;
getKernelWorkDimensions
(
plan
,
kernelInfo
,
&
s
,
&
gWorkItems
,
&
lWorkItems
);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
0
,
sizeof
(
cl_mem
),
&
memObj
[
currRead
]);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
1
,
sizeof
(
cl_mem
),
&
memObj
[
currWrite
]);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
2
,
sizeof
(
cl_int
),
&
dir
);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
3
,
sizeof
(
cl_int
),
&
s
);
err
|=
clEnqueueNDRangeKernel
(
queue
,
kernelInfo
->
kernel
,
1
,
NULL
,
&
gWorkItems
,
&
lWorkItems
,
0
,
NULL
,
NULL
);
if
(
err
)
return
err
;
currRead
=
(
currWrite
==
1
)
?
1
:
2
;
currWrite
=
(
currWrite
==
1
)
?
2
:
1
;
currWrite
=
(
currWrite
==
1
)
?
2
:
1
;
kernelInfo
=
kernelInfo
->
next
;
}
}
}
// no dram shuffle (transpose required) transform
// all kernels can execute in-place.
else
{
while
(
kernelInfo
)
{
s
=
batchSize
;
...
...
@@ -203,41 +203,41 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
1
,
sizeof
(
cl_mem
),
&
memObj
[
currWrite
]);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
2
,
sizeof
(
cl_int
),
&
dir
);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
3
,
sizeof
(
cl_int
),
&
s
);
err
|=
clEnqueueNDRangeKernel
(
queue
,
kernelInfo
->
kernel
,
1
,
NULL
,
&
gWorkItems
,
&
lWorkItems
,
0
,
NULL
,
NULL
);
if
(
err
)
return
err
;
return
err
;
currRead
=
1
;
currWrite
=
1
;
kernelInfo
=
kernelInfo
->
next
;
}
}
return
err
;
}
cl_int
clFFT_ExecutePlannar
(
cl_command_queue
queue
,
clFFT_Plan
Plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_int
clFFT_ExecutePlannar
(
cl_command_queue
queue
,
clFFT_Plan
Plan
,
cl_int
batchSize
,
clFFT_Direction
dir
,
cl_mem
data_in_real
,
cl_mem
data_in_imag
,
cl_mem
data_out_real
,
cl_mem
data_out_imag
,
cl_int
num_events
,
cl_event
*
event_list
,
cl_event
*
event
)
{
{
int
s
;
cl_fft_plan
*
plan
=
(
cl_fft_plan
*
)
Plan
;
if
(
plan
->
format
!=
clFFT_SplitComplexFormat
)
return
CL_INVALID_VALUE
;
cl_int
err
;
size_t
gWorkItems
,
lWorkItems
;
int
inPlaceDone
=
-
1
;
cl_int
isInPlace
=
((
data_in_real
==
data_out_real
)
&&
(
data_in_imag
==
data_out_imag
))
?
1
:
0
;
if
((
err
=
allocateTemporaryBufferPlannar
(
plan
,
batchSize
))
!=
CL_SUCCESS
)
return
err
;
return
err
;
cl_mem
memObj_real
[
3
];
cl_mem
memObj_imag
[
3
];
memObj_real
[
0
]
=
data_in_real
;
...
...
@@ -246,19 +246,19 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
memObj_imag
[
0
]
=
data_in_imag
;
memObj_imag
[
1
]
=
data_out_imag
;
memObj_imag
[
2
]
=
plan
->
tempmemobj_imag
;
cl_fft_kernel_info
*
kernelInfo
=
plan
->
kernel_info
;
int
numKernels
=
plan
->
num_kernels
;
int
numKernelsOdd
=
numKernels
&
1
;
int
currRead
=
0
;
int
currWrite
=
1
;
// at least one external dram shuffle (transpose) required
if
(
plan
->
temp_buffer_needed
)
if
(
plan
->
temp_buffer_needed
)
{
// in-place transform
if
(
isInPlace
)
if
(
isInPlace
)
{
inPlaceDone
=
0
;
currRead
=
1
;
...
...
@@ -268,15 +268,15 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
{
currWrite
=
(
numKernels
&
1
)
?
1
:
2
;
}
while
(
kernelInfo
)
while
(
kernelInfo
)
{
if
(
isInPlace
&&
numKernelsOdd
&&
!
inPlaceDone
&&
kernelInfo
->
in_place_possible
)
if
(
isInPlace
&&
numKernelsOdd
&&
!
inPlaceDone
&&
kernelInfo
->
in_place_possible
)
{
currWrite
=
currRead
;
inPlaceDone
=
1
;
}
s
=
batchSize
;
getKernelWorkDimensions
(
plan
,
kernelInfo
,
&
s
,
&
gWorkItems
,
&
lWorkItems
);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
0
,
sizeof
(
cl_mem
),
&
memObj_real
[
currRead
]);
...
...
@@ -285,20 +285,20 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
3
,
sizeof
(
cl_mem
),
&
memObj_imag
[
currWrite
]);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
4
,
sizeof
(
cl_int
),
&
dir
);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
5
,
sizeof
(
cl_int
),
&
s
);
err
|=
clEnqueueNDRangeKernel
(
queue
,
kernelInfo
->
kernel
,
1
,
NULL
,
&
gWorkItems
,
&
lWorkItems
,
0
,
NULL
,
NULL
);
if
(
err
)
return
err
;
return
err
;
currRead
=
(
currWrite
==
1
)
?
1
:
2
;
currWrite
=
(
currWrite
==
1
)
?
2
:
1
;
currWrite
=
(
currWrite
==
1
)
?
2
:
1
;
kernelInfo
=
kernelInfo
->
next
;
}
}
}
// no dram shuffle (transpose required) transform
else
{
while
(
kernelInfo
)
{
s
=
batchSize
;
...
...
@@ -309,87 +309,87 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
3
,
sizeof
(
cl_mem
),
&
memObj_imag
[
currWrite
]);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
4
,
sizeof
(
cl_int
),
&
dir
);
err
|=
clSetKernelArg
(
kernelInfo
->
kernel
,
5
,
sizeof
(
cl_int
),
&
s
);
err
|=
clEnqueueNDRangeKernel
(
queue
,
kernelInfo
->
kernel
,
1
,
NULL
,
&
gWorkItems
,
&
lWorkItems
,
0
,
NULL
,
NULL
);
if
(
err
)
return
err
;
return
err
;
currRead
=
1
;
currWrite
=
1
;
kernelInfo
=
kernelInfo
->
next
;
}
}
return
err
;
}
cl_int
clFFT_1DTwistInterleaved
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array
,
cl_int
clFFT_1DTwistInterleaved
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array
,
size_t
numRows
,
size_t
numCols
,
size_t
startRow
,
size_t
rowsToProcess
,
clFFT_Direction
dir
)
{
cl_fft_plan
*
plan
=
(
cl_fft_plan
*
)
Plan
;
unsigned
int
N
=
numRows
*
numCols
;
unsigned
int
nCols
=
numCols
;
unsigned
int
sRow
=
startRow
;
unsigned
int
rToProcess
=
rowsToProcess
;
int
d
=
dir
;
int
err
=
0
;
cl_device_id
device_id
;
err
=
clGetCommandQueueInfo
(
queue
,
CL_QUEUE_DEVICE
,
sizeof
(
cl_device_id
),
&
device_id
,
NULL
);
if
(
err
)
return
err
;
size_t
gSize
;
err
=
clGetKernelWorkGroupInfo
(
plan
->
twist_kernel
,
device_id
,
CL_KERNEL_WORK_GROUP_SIZE
,
sizeof
(
size_t
),
&
gSize
,
NULL
);
if
(
err
)
return
err
;
gSize
=
min
(
128
,
gSize
);
size_t
numGlobalThreads
[
1
]
=
{
max
(
numCols
/
gSize
,
1
)
*
gSize
};
size_t
numLocalThreads
[
1
]
=
{
gSize
};
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
0
,
sizeof
(
cl_mem
),
&
array
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
1
,
sizeof
(
unsigned
int
),
&
sRow
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
2
,
sizeof
(
unsigned
int
),
&
nCols
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
3
,
sizeof
(
unsigned
int
),
&
N
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
4
,
sizeof
(
unsigned
int
),
&
rToProcess
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
5
,
sizeof
(
int
),
&
d
);
err
|=
clEnqueueNDRangeKernel
(
queue
,
plan
->
twist_kernel
,
1
,
NULL
,
numGlobalThreads
,
numLocalThreads
,
0
,
NULL
,
NULL
);
return
err
;
err
|=
clEnqueueNDRangeKernel
(
queue
,
plan
->
twist_kernel
,
1
,
NULL
,
numGlobalThreads
,
numLocalThreads
,
0
,
NULL
,
NULL
);
return
err
;
}
cl_int
clFFT_1DTwistPlannar
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array_real
,
cl_mem
array_imag
,
cl_int
clFFT_1DTwistPlannar
(
clFFT_Plan
Plan
,
cl_command_queue
queue
,
cl_mem
array_real
,
cl_mem
array_imag
,
size_t
numRows
,
size_t
numCols
,
size_t
startRow
,
size_t
rowsToProcess
,
clFFT_Direction
dir
)
{
cl_fft_plan
*
plan
=
(
cl_fft_plan
*
)
Plan
;
unsigned
int
N
=
numRows
*
numCols
;
unsigned
int
nCols
=
numCols
;
unsigned
int
sRow
=
startRow
;
unsigned
int
rToProcess
=
rowsToProcess
;
int
d
=
dir
;
int
err
=
0
;
cl_device_id
device_id
;
err
=
clGetCommandQueueInfo
(
queue
,
CL_QUEUE_DEVICE
,
sizeof
(
cl_device_id
),
&
device_id
,
NULL
);
if
(
err
)
return
err
;
size_t
gSize
;
err
=
clGetKernelWorkGroupInfo
(
plan
->
twist_kernel
,
device_id
,
CL_KERNEL_WORK_GROUP_SIZE
,
sizeof
(
size_t
),
&
gSize
,
NULL
);
if
(
err
)
return
err
;
gSize
=
min
(
128
,
gSize
);
size_t
numGlobalThreads
[
1
]
=
{
max
(
numCols
/
gSize
,
1
)
*
gSize
};
size_t
numLocalThreads
[
1
]
=
{
gSize
};
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
0
,
sizeof
(
cl_mem
),
&
array_real
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
1
,
sizeof
(
cl_mem
),
&
array_imag
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
2
,
sizeof
(
unsigned
int
),
&
sRow
);
...
...
@@ -397,9 +397,9 @@ clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real,
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
4
,
sizeof
(
unsigned
int
),
&
N
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
5
,
sizeof
(
unsigned
int
),
&
rToProcess
);
err
|=
clSetKernelArg
(
plan
->
twist_kernel
,
6
,
sizeof
(
int
),
&
d
);
err
|=
clEnqueueNDRangeKernel
(
queue
,
plan
->
twist_kernel
,
1
,
NULL
,
numGlobalThreads
,
numLocalThreads
,
0
,
NULL
,
NULL
);
return
err
;
err
|=
clEnqueueNDRangeKernel
(
queue
,
plan
->
twist_kernel
,
1
,
NULL
,
numGlobalThreads
,
numLocalThreads
,
0
,
NULL
,
NULL
);
return
err
;
}
src/fft_internal.h
View file @
7df18002
...
...
@@ -76,83 +76,83 @@ typedef struct kernel_info_t
kernel_info_t
*
next
;
}
cl_fft_kernel_info
;
typedef
struct
typedef
struct
{
// context in which fft resources are created and kernels are executed
cl_context
context
;
// size of signal
clFFT_Dim3
n
;
// dimension of transform ... must be either 1D, 2D or 3D
clFFT_Dimension
dim
;
// data format ... must be either interleaved or plannar
clFFT_DataFormat
format
;
// string containing kernel source. Generated at runtime based on
// n, dim, format and other parameters
string
*
kernel_string
;
// CL program containing source and kernel this particular
// CL program containing source and kernel this particular
// n, dim, data format
cl_program
program
;
// linked list of kernels which needs to be executed for this fft
cl_fft_kernel_info
*
kernel_info
;