Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
einsteinathome
libclfft
Commits
05167ff1
Commit
05167ff1
authored
Mar 18, 2011
by
Oliver Bock
Browse files
Detabbed sources
parent
7df18002
Changes
7
Hide whitespace changes
Inline
Side-by-side
example/main.cpp
View file @
05167ff1
...
...
@@ -66,23 +66,23 @@
#define eps_avg 10.0
#define MAX( _a, _b)
((_a)>(_b)?(_a) : (_b))
#define MAX( _a, _b)
((_a)>(_b)?(_a) : (_b))
typedef
enum
{
clFFT_OUT_OF_PLACE
,
clFFT_IN_PLACE
,
clFFT_OUT_OF_PLACE
,
clFFT_IN_PLACE
,
}
clFFT_TestType
;
typedef
struct
{
double
real
;
double
imag
;
double
real
;
double
imag
;
}
clFFT_ComplexDouble
;
typedef
struct
{
double
*
real
;
double
*
imag
;
double
*
real
;
double
*
imag
;
}
clFFT_SplitComplexDouble
;
cl_device_id
device_id
;
...
...
@@ -100,9 +100,9 @@ double subtractTimes( uint64_t endTime, uint64_t startTime )
mach_timebase_info_data_t
info
;
kern_return_t
err
=
mach_timebase_info
(
&
info
);
//Convert the timebase into seconds
//Convert the timebase into seconds
if
(
err
==
0
)
conversion
=
1e-9
*
(
double
)
info
.
numer
/
(
double
)
info
.
denom
;
conversion
=
1e-9
*
(
double
)
info
.
numer
/
(
double
)
info
.
denom
;
}
return
conversion
*
(
double
)
difference
;
...
...
@@ -111,583 +111,583 @@ double subtractTimes( uint64_t endTime, uint64_t startTime )
#ifdef __APPLE__
void
computeReferenceF
(
clFFT_SplitComplex
*
out
,
clFFT_Dim3
n
,
unsigned
int
batchSize
,
clFFT_Dimension
dim
,
clFFT_Direction
dir
)
unsigned
int
batchSize
,
clFFT_Dimension
dim
,
clFFT_Direction
dir
)
{
FFTSetup
plan_vdsp
;
DSPSplitComplex
out_vdsp
;
FFTDirection
dir_vdsp
=
dir
==
clFFT_Forward
?
FFT_FORWARD
:
FFT_INVERSE
;
unsigned
int
i
,
j
,
k
;
unsigned
int
stride
;
unsigned
int
log2Nx
=
(
unsigned
int
)
log2
(
n
.
x
);
unsigned
int
log2Ny
=
(
unsigned
int
)
log2
(
n
.
y
);
unsigned
int
log2Nz
=
(
unsigned
int
)
log2
(
n
.
z
);
unsigned
int
log2N
;
log2N
=
log2Nx
;
log2N
=
log2N
>
log2Ny
?
log2N
:
log2Ny
;
log2N
=
log2N
>
log2Nz
?
log2N
:
log2Nz
;
plan_vdsp
=
vDSP_create_fftsetup
(
log2N
,
2
);
switch
(
dim
)
{
case
clFFT_1D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
stride
=
i
*
n
.
x
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
break
;
case
clFFT_2D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
{
stride
=
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
x
;
j
++
)
{
stride
=
j
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
break
;
case
clFFT_3D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
y
;
k
++
)
{
stride
=
k
*
n
.
x
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
x
;
k
++
)
{
stride
=
k
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
x
;
k
++
)
{
stride
=
k
+
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
*
n
.
y
,
log2Nz
,
dir_vdsp
);
}
}
}
break
;
}
vDSP_destroy_fftsetup
(
plan_vdsp
);
FFTSetup
plan_vdsp
;
DSPSplitComplex
out_vdsp
;
FFTDirection
dir_vdsp
=
dir
==
clFFT_Forward
?
FFT_FORWARD
:
FFT_INVERSE
;
unsigned
int
i
,
j
,
k
;
unsigned
int
stride
;
unsigned
int
log2Nx
=
(
unsigned
int
)
log2
(
n
.
x
);
unsigned
int
log2Ny
=
(
unsigned
int
)
log2
(
n
.
y
);
unsigned
int
log2Nz
=
(
unsigned
int
)
log2
(
n
.
z
);
unsigned
int
log2N
;
log2N
=
log2Nx
;
log2N
=
log2N
>
log2Ny
?
log2N
:
log2Ny
;
log2N
=
log2N
>
log2Nz
?
log2N
:
log2Nz
;
plan_vdsp
=
vDSP_create_fftsetup
(
log2N
,
2
);
switch
(
dim
)
{
case
clFFT_1D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
stride
=
i
*
n
.
x
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
break
;
case
clFFT_2D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
{
stride
=
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
x
;
j
++
)
{
stride
=
j
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
break
;
case
clFFT_3D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
y
;
k
++
)
{
stride
=
k
*
n
.
x
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
x
;
k
++
)
{
stride
=
k
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
x
;
k
++
)
{
stride
=
k
+
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
*
n
.
y
,
log2Nz
,
dir_vdsp
);
}
}
}
break
;
}
vDSP_destroy_fftsetup
(
plan_vdsp
);
}
#endif
#ifdef __APPLE__
void
computeReferenceD
(
clFFT_SplitComplexDouble
*
out
,
clFFT_Dim3
n
,
unsigned
int
batchSize
,
clFFT_Dimension
dim
,
clFFT_Direction
dir
)
unsigned
int
batchSize
,
clFFT_Dimension
dim
,
clFFT_Direction
dir
)
{
FFTSetupD
plan_vdsp
;
DSPDoubleSplitComplex
out_vdsp
;
FFTDirection
dir_vdsp
=
dir
==
clFFT_Forward
?
FFT_FORWARD
:
FFT_INVERSE
;
unsigned
int
i
,
j
,
k
;
unsigned
int
stride
;
unsigned
int
log2Nx
=
(
int
)
log2
(
n
.
x
);
unsigned
int
log2Ny
=
(
int
)
log2
(
n
.
y
);
unsigned
int
log2Nz
=
(
int
)
log2
(
n
.
z
);
unsigned
int
log2N
;
log2N
=
log2Nx
;
log2N
=
log2N
>
log2Ny
?
log2N
:
log2Ny
;
log2N
=
log2N
>
log2Nz
?
log2N
:
log2Nz
;
plan_vdsp
=
vDSP_create_fftsetupD
(
log2N
,
2
);
switch
(
dim
)
{
case
clFFT_1D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
stride
=
i
*
n
.
x
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
break
;
case
clFFT_2D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
{
stride
=
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
x
;
j
++
)
{
stride
=
j
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
break
;
case
clFFT_3D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
y
;
k
++
)
{
stride
=
k
*
n
.
x
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
x
;
k
++
)
{
stride
=
k
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
x
;
k
++
)
{
stride
=
k
+
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
*
n
.
y
,
log2Nz
,
dir_vdsp
);
}
}
}
break
;
}
vDSP_destroy_fftsetupD
(
plan_vdsp
);
FFTSetupD
plan_vdsp
;
DSPDoubleSplitComplex
out_vdsp
;
FFTDirection
dir_vdsp
=
dir
==
clFFT_Forward
?
FFT_FORWARD
:
FFT_INVERSE
;
unsigned
int
i
,
j
,
k
;
unsigned
int
stride
;
unsigned
int
log2Nx
=
(
int
)
log2
(
n
.
x
);
unsigned
int
log2Ny
=
(
int
)
log2
(
n
.
y
);
unsigned
int
log2Nz
=
(
int
)
log2
(
n
.
z
);
unsigned
int
log2N
;
log2N
=
log2Nx
;
log2N
=
log2N
>
log2Ny
?
log2N
:
log2Ny
;
log2N
=
log2N
>
log2Nz
?
log2N
:
log2Nz
;
plan_vdsp
=
vDSP_create_fftsetupD
(
log2N
,
2
);
switch
(
dim
)
{
case
clFFT_1D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
stride
=
i
*
n
.
x
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
break
;
case
clFFT_2D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
{
stride
=
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
x
;
j
++
)
{
stride
=
j
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
break
;
case
clFFT_3D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
y
;
k
++
)
{
stride
=
k
*
n
.
x
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
x
;
k
++
)
{
stride
=
k
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
}
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
{
for
(
k
=
0
;
k
<
n
.
x
;
k
++
)
{
stride
=
k
+
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
*
n
.
y
,
log2Nz
,
dir_vdsp
);
}
}
}
break
;
}
vDSP_destroy_fftsetupD
(
plan_vdsp
);
}
#endif
double
complexNormSq
(
clFFT_ComplexDouble
a
)
{
return
(
a
.
real
*
a
.
real
+
a
.
imag
*
a
.
imag
);
return
(
a
.
real
*
a
.
real
+
a
.
imag
*
a
.
imag
);
}
double
computeL2Error
(
clFFT_SplitComplex
*
data
,
clFFT_SplitComplexDouble
*
data_ref
,
int
n
,
int
batchSize
,
double
*
max_diff
,
double
*
min_diff
)
{
int
i
,
j
;
double
avg_norm
=
0.0
;
*
max_diff
=
0.0
;
*
min_diff
=
0x1
.0
p1000
;
for
(
j
=
0
;
j
<
batchSize
;
j
++
)
{
double
norm_ref
=
0.0
;
double
norm
=
0.0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
int
index
=
j
*
n
+
i
;
clFFT_ComplexDouble
diff
=
(
clFFT_ComplexDouble
)
{
data_ref
->
real
[
index
]
-
data
->
real
[
index
],
data_ref
->
imag
[
index
]
-
data
->
imag
[
index
]
};
double
norm_tmp
=
complexNormSq
(
diff
);
norm
+=
norm_tmp
;
norm_ref
+=
(
data_ref
->
real
[
index
]
*
data_ref
->
real
[
index
]
+
data_ref
->
imag
[
index
]
*
data_ref
->
imag
[
index
]);
}
double
curr_norm
=
sqrt
(
norm
/
norm_ref
)
/
FLT_EPSILON
;
avg_norm
+=
curr_norm
;
*
max_diff
=
*
max_diff
<
curr_norm
?
curr_norm
:
*
max_diff
;
*
min_diff
=
*
min_diff
>
curr_norm
?
curr_norm
:
*
min_diff
;
}
return
avg_norm
/
batchSize
;
int
i
,
j
;
double
avg_norm
=
0.0
;
*
max_diff
=
0.0
;
*
min_diff
=
0x1
.0
p1000
;
for
(
j
=
0
;
j
<
batchSize
;
j
++
)
{
double
norm_ref
=
0.0
;
double
norm
=
0.0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
int
index
=
j
*
n
+
i
;
clFFT_ComplexDouble
diff
=
(
clFFT_ComplexDouble
)
{
data_ref
->
real
[
index
]
-
data
->
real
[
index
],
data_ref
->
imag
[
index
]
-
data
->
imag
[
index
]
};
double
norm_tmp
=
complexNormSq
(
diff
);
norm
+=
norm_tmp
;
norm_ref
+=
(
data_ref
->
real
[
index
]
*
data_ref
->
real
[
index
]
+
data_ref
->
imag
[
index
]
*
data_ref
->
imag
[
index
]);
}
double
curr_norm
=
sqrt
(
norm
/
norm_ref
)
/
FLT_EPSILON
;
avg_norm
+=
curr_norm
;
*
max_diff
=
*
max_diff
<
curr_norm
?
curr_norm
:
*
max_diff
;
*
min_diff
=
*
min_diff
>
curr_norm
?
curr_norm
:
*
min_diff
;
}
return
avg_norm
/
batchSize
;
}
void
convertInterleavedToSplit
(
clFFT_SplitComplex
*
result_split
,
clFFT_Complex
*
data_cl
,
int
length
)
{
int
i
;
for
(
i
=
0
;
i
<
length
;
i
++
)
{
result_split
->
real
[
i
]
=
data_cl
[
i
].
real
;
result_split
->
imag
[
i
]
=
data_cl
[
i
].
imag
;
}
int
i
;
for
(
i
=
0
;
i
<
length
;
i
++
)
{
result_split
->
real
[
i
]
=
data_cl
[
i
].
real
;
result_split
->
imag
[
i
]
=
data_cl
[
i
].
imag
;
}
}
int
runTest
(
clFFT_Dim3
n
,
int
batchSize
,
clFFT_Direction
dir
,
clFFT_Dimension
dim
,
clFFT_DataFormat
dataFormat
,
int
numIter
,
clFFT_TestType
testType
)
clFFT_DataFormat
dataFormat
,
int
numIter
,
clFFT_TestType
testType
)
{
cl_int
err
=
CL_SUCCESS
;
int
iter
;
cl_int
err
=
CL_SUCCESS
;
int
iter
;
#ifdef __APPLE__
double
t
;
uint64_t
t0
,
t1
;
double
t
;
uint64_t
t0
,
t1
;
int
mx
=
log2
(
n
.
x
);
int
my
=
log2
(
n
.
y
);
int
mz
=
log2
(
n
.
z
);
int
mx
=
log2
(
n
.
x
);
int
my
=
log2
(
n
.
y
);
int
mz
=
log2
(
n
.
z
);
double
gflops
=
5e-9
*
((
double
)
mx
+
(
double
)
my
+
(
double
)
mz
)
*
(
double
)
n
.
x
*
(
double
)
n
.
y
*
(
double
)
n
.
z
*
(
double
)
batchSize
*
(
double
)
numIter
;
double
gflops
=
5e-9
*
((
double
)
mx
+
(
double
)
my
+
(
double
)
mz
)
*
(
double
)
n
.
x
*
(
double
)
n
.
y
*
(
double
)
n
.
z
*
(
double
)
batchSize
*
(
double
)
numIter
;
#endif
int
length
=
n
.
x
*
n
.
y
*
n
.
z
*
batchSize
;
clFFT_SplitComplex
data_i_split
=
(
clFFT_SplitComplex
)
{
NULL
,
NULL
};
clFFT_SplitComplex
data_cl_split
=
(
clFFT_SplitComplex
)
{
NULL
,
NULL
};
clFFT_Complex
*
data_i
=
NULL
;
clFFT_Complex
*
data_cl
=
NULL
;
clFFT_SplitComplexDouble
data_iref
=
(
clFFT_SplitComplexDouble
)
{
NULL
,
NULL
};
clFFT_SplitComplexDouble
data_oref
=
(
clFFT_SplitComplexDouble
)
{
NULL
,
NULL
};
clFFT_Plan
plan
=
NULL
;
cl_mem
data_in
=
NULL
;
cl_mem
data_out
=
NULL
;