Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Oliver Behnke
libclfft
Commits
7df18002
Commit
7df18002
authored
Mar 18, 2011
by
Oliver Bock
Browse files
Removing trailing whitespaces
parent
bc1bc3e9
Changes
7
Hide whitespace changes
Inline
Side-by-side
example/main.cpp
View file @
7df18002
...
...
@@ -94,58 +94,58 @@ double subtractTimes( uint64_t endTime, uint64_t startTime )
{
uint64_t
difference
=
endTime
-
startTime
;
static
double
conversion
=
0.0
;
if
(
conversion
==
0.0
)
{
mach_timebase_info_data_t
info
;
kern_return_t
err
=
mach_timebase_info
(
&
info
);
//Convert the timebase into seconds
if
(
err
==
0
)
conversion
=
1e-9
*
(
double
)
info
.
numer
/
(
double
)
info
.
denom
;
}
return
conversion
*
(
double
)
difference
;
}
#endif
#ifdef __APPLE__
void
computeReferenceF
(
clFFT_SplitComplex
*
out
,
clFFT_Dim3
n
,
void
computeReferenceF
(
clFFT_SplitComplex
*
out
,
clFFT_Dim3
n
,
unsigned
int
batchSize
,
clFFT_Dimension
dim
,
clFFT_Direction
dir
)
{
FFTSetup
plan_vdsp
;
DSPSplitComplex
out_vdsp
;
FFTDirection
dir_vdsp
=
dir
==
clFFT_Forward
?
FFT_FORWARD
:
FFT_INVERSE
;
unsigned
int
i
,
j
,
k
;
unsigned
int
stride
;
unsigned
int
log2Nx
=
(
unsigned
int
)
log2
(
n
.
x
);
unsigned
int
log2Ny
=
(
unsigned
int
)
log2
(
n
.
y
);
unsigned
int
log2Nz
=
(
unsigned
int
)
log2
(
n
.
z
);
unsigned
int
log2N
;
log2N
=
log2Nx
;
log2N
=
log2N
>
log2Ny
?
log2N
:
log2Ny
;
log2N
=
log2N
>
log2Nz
?
log2N
:
log2Nz
;
plan_vdsp
=
vDSP_create_fftsetup
(
log2N
,
2
);
switch
(
dim
)
{
case
clFFT_1D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
stride
=
i
*
n
.
x
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
break
;
case
clFFT_2D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
...
...
@@ -153,7 +153,7 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride
=
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
...
...
@@ -164,14 +164,14 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride
=
j
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
break
;
case
clFFT_3D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
...
...
@@ -181,7 +181,7 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride
=
k
*
n
.
x
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
...
...
@@ -195,7 +195,7 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride
=
k
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
...
...
@@ -209,55 +209,55 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride
=
k
+
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zip
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
*
n
.
y
,
log2Nz
,
dir_vdsp
);
}
}
}
break
;
}
vDSP_destroy_fftsetup
(
plan_vdsp
);
}
#endif
#ifdef __APPLE__
void
computeReferenceD
(
clFFT_SplitComplexDouble
*
out
,
clFFT_Dim3
n
,
void
computeReferenceD
(
clFFT_SplitComplexDouble
*
out
,
clFFT_Dim3
n
,
unsigned
int
batchSize
,
clFFT_Dimension
dim
,
clFFT_Direction
dir
)
{
FFTSetupD
plan_vdsp
;
DSPDoubleSplitComplex
out_vdsp
;
FFTDirection
dir_vdsp
=
dir
==
clFFT_Forward
?
FFT_FORWARD
:
FFT_INVERSE
;
unsigned
int
i
,
j
,
k
;
unsigned
int
stride
;
unsigned
int
log2Nx
=
(
int
)
log2
(
n
.
x
);
unsigned
int
log2Ny
=
(
int
)
log2
(
n
.
y
);
unsigned
int
log2Nz
=
(
int
)
log2
(
n
.
z
);
unsigned
int
log2N
;
log2N
=
log2Nx
;
log2N
=
log2N
>
log2Ny
?
log2N
:
log2Ny
;
log2N
=
log2N
>
log2Nz
?
log2N
:
log2Nz
;
plan_vdsp
=
vDSP_create_fftsetupD
(
log2N
,
2
);
switch
(
dim
)
{
case
clFFT_1D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
stride
=
i
*
n
.
x
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
break
;
case
clFFT_2D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
y
;
j
++
)
...
...
@@ -265,7 +265,7 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride
=
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
...
...
@@ -276,14 +276,14 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride
=
j
+
i
*
n
.
x
*
n
.
y
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
break
;
case
clFFT_3D
:
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
j
=
0
;
j
<
n
.
z
;
j
++
)
...
...
@@ -293,7 +293,7 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride
=
k
*
n
.
x
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
1
,
log2Nx
,
dir_vdsp
);
}
}
...
...
@@ -307,7 +307,7 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride
=
k
+
j
*
n
.
x
*
n
.
y
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
,
log2Ny
,
dir_vdsp
);
}
}
...
...
@@ -321,14 +321,14 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride
=
k
+
j
*
n
.
x
+
i
*
n
.
x
*
n
.
y
*
n
.
z
;
out_vdsp
.
realp
=
out
->
real
+
stride
;
out_vdsp
.
imagp
=
out
->
imag
+
stride
;
vDSP_fft_zipD
(
plan_vdsp
,
&
out_vdsp
,
n
.
x
*
n
.
y
,
log2Nz
,
dir_vdsp
);
}
}
}
break
;
}
vDSP_destroy_fftsetupD
(
plan_vdsp
);
}
#endif
...
...
@@ -344,12 +344,12 @@ double computeL2Error(clFFT_SplitComplex *data, clFFT_SplitComplexDouble *data_r
double
avg_norm
=
0.0
;
*
max_diff
=
0.0
;
*
min_diff
=
0x1
.0
p1000
;
for
(
j
=
0
;
j
<
batchSize
;
j
++
)
{
double
norm_ref
=
0.0
;
double
norm
=
0.0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
for
(
i
=
0
;
i
<
n
;
i
++
)
{
int
index
=
j
*
n
+
i
;
clFFT_ComplexDouble
diff
=
(
clFFT_ComplexDouble
)
{
data_ref
->
real
[
index
]
-
data
->
real
[
index
],
data_ref
->
imag
[
index
]
-
data
->
imag
[
index
]
};
...
...
@@ -362,7 +362,7 @@ double computeL2Error(clFFT_SplitComplex *data, clFFT_SplitComplexDouble *data_r
*
max_diff
=
*
max_diff
<
curr_norm
?
curr_norm
:
*
max_diff
;
*
min_diff
=
*
min_diff
>
curr_norm
?
curr_norm
:
*
min_diff
;
}
return
avg_norm
/
batchSize
;
}
...
...
@@ -375,9 +375,9 @@ void convertInterleavedToSplit(clFFT_SplitComplex *result_split, clFFT_Complex *
}
}
int
runTest
(
clFFT_Dim3
n
,
int
batchSize
,
clFFT_Direction
dir
,
clFFT_Dimension
dim
,
int
runTest
(
clFFT_Dim3
n
,
int
batchSize
,
clFFT_Direction
dir
,
clFFT_Dimension
dim
,
clFFT_DataFormat
dataFormat
,
int
numIter
,
clFFT_TestType
testType
)
{
{
cl_int
err
=
CL_SUCCESS
;
int
iter
;
...
...
@@ -393,14 +393,14 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
#endif
int
length
=
n
.
x
*
n
.
y
*
n
.
z
*
batchSize
;
clFFT_SplitComplex
data_i_split
=
(
clFFT_SplitComplex
)
{
NULL
,
NULL
};
clFFT_SplitComplex
data_cl_split
=
(
clFFT_SplitComplex
)
{
NULL
,
NULL
};
clFFT_Complex
*
data_i
=
NULL
;
clFFT_Complex
*
data_cl
=
NULL
;
clFFT_SplitComplexDouble
data_iref
=
(
clFFT_SplitComplexDouble
)
{
NULL
,
NULL
};
clFFT_SplitComplexDouble
data_iref
=
(
clFFT_SplitComplexDouble
)
{
NULL
,
NULL
};
clFFT_SplitComplexDouble
data_oref
=
(
clFFT_SplitComplexDouble
)
{
NULL
,
NULL
};
clFFT_Plan
plan
=
NULL
;
cl_mem
data_in
=
NULL
;
cl_mem
data_out
=
NULL
;
...
...
@@ -408,7 +408,7 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
cl_mem
data_in_imag
=
NULL
;
cl_mem
data_out_real
=
NULL
;
cl_mem
data_out_imag
=
NULL
;
if
(
dataFormat
==
clFFT_SplitComplexFormat
)
{
data_i_split
.
real
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
length
);
data_i_split
.
imag
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
length
);
...
...
@@ -431,11 +431,11 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
goto
cleanup
;
}
}
data_iref
.
real
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
length
);
data_iref
.
imag
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
length
);
data_oref
.
real
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
length
);
data_oref
.
imag
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
length
);
data_oref
.
imag
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
length
);
if
(
!
data_iref
.
real
||
!
data_iref
.
imag
||
!
data_oref
.
real
||
!
data_oref
.
imag
)
{
err
=
-
3
;
...
...
@@ -450,11 +450,11 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
data_i_split
.
real
[
i
]
=
2.0
f
*
(
float
)
rand
()
/
(
float
)
RAND_MAX
-
1.0
f
;
data_i_split
.
imag
[
i
]
=
2.0
f
*
(
float
)
rand
()
/
(
float
)
RAND_MAX
-
1.0
f
;
data_cl_split
.
real
[
i
]
=
0.0
f
;
data_cl_split
.
imag
[
i
]
=
0.0
f
;
data_cl_split
.
imag
[
i
]
=
0.0
f
;
data_iref
.
real
[
i
]
=
data_i_split
.
real
[
i
];
data_iref
.
imag
[
i
]
=
data_i_split
.
imag
[
i
];
data_oref
.
real
[
i
]
=
data_iref
.
real
[
i
];
data_oref
.
imag
[
i
]
=
data_iref
.
imag
[
i
];
data_oref
.
imag
[
i
]
=
data_iref
.
imag
[
i
];
}
}
else
{
...
...
@@ -463,54 +463,54 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
data_i
[
i
].
real
=
2.0
f
*
(
float
)
rand
()
/
(
float
)
RAND_MAX
-
1.0
f
;
data_i
[
i
].
imag
=
2.0
f
*
(
float
)
rand
()
/
(
float
)
RAND_MAX
-
1.0
f
;
data_cl
[
i
].
real
=
0.0
f
;
data_cl
[
i
].
imag
=
0.0
f
;
data_cl
[
i
].
imag
=
0.0
f
;
data_iref
.
real
[
i
]
=
data_i
[
i
].
real
;
data_iref
.
imag
[
i
]
=
data_i
[
i
].
imag
;
data_oref
.
real
[
i
]
=
data_iref
.
real
[
i
];
data_oref
.
imag
[
i
]
=
data_iref
.
imag
[
i
];
}
data_oref
.
imag
[
i
]
=
data_iref
.
imag
[
i
];
}
}
plan
=
clFFT_CreatePlan
(
context
,
n
,
dim
,
dataFormat
,
&
err
);
if
(
!
plan
||
err
)
if
(
!
plan
||
err
)
{
log_error
(
"clFFT_CreatePlan failed
\n
"
);
goto
cleanup
;
}
//clFFT_DumpPlan(plan, stdout);
if
(
dataFormat
==
clFFT_SplitComplexFormat
)
{
data_in_real
=
clCreateBuffer
(
context
,
CL_MEM_READ_WRITE
|
CL_MEM_COPY_HOST_PTR
,
length
*
sizeof
(
float
),
data_i_split
.
real
,
&
err
);
if
(
!
data_in_real
||
err
)
if
(
!
data_in_real
||
err
)
{
log_error
(
"clCreateBuffer failed
\n
"
);
goto
cleanup
;
}
data_in_imag
=
clCreateBuffer
(
context
,
CL_MEM_READ_WRITE
|
CL_MEM_COPY_HOST_PTR
,
length
*
sizeof
(
float
),
data_i_split
.
imag
,
&
err
);
if
(
!
data_in_imag
||
err
)
if
(
!
data_in_imag
||
err
)
{
log_error
(
"clCreateBuffer failed
\n
"
);
goto
cleanup
;
}
if
(
testType
==
clFFT_OUT_OF_PLACE
)
{
data_out_real
=
clCreateBuffer
(
context
,
CL_MEM_READ_WRITE
|
CL_MEM_COPY_HOST_PTR
,
length
*
sizeof
(
float
),
data_cl_split
.
real
,
&
err
);
if
(
!
data_out_real
||
err
)
if
(
!
data_out_real
||
err
)
{
log_error
(
"clCreateBuffer failed
\n
"
);
goto
cleanup
;
}
data_out_imag
=
clCreateBuffer
(
context
,
CL_MEM_READ_WRITE
|
CL_MEM_COPY_HOST_PTR
,
length
*
sizeof
(
float
),
data_cl_split
.
imag
,
&
err
);
if
(
!
data_out_imag
||
err
)
if
(
!
data_out_imag
||
err
)
{
log_error
(
"clCreateBuffer failed
\n
"
);
goto
cleanup
;
}
}
}
else
{
...
...
@@ -521,7 +521,7 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
else
{
data_in
=
clCreateBuffer
(
context
,
CL_MEM_READ_WRITE
|
CL_MEM_COPY_HOST_PTR
,
length
*
sizeof
(
float
)
*
2
,
data_i
,
&
err
);
if
(
!
data_in
)
if
(
!
data_in
)
{
log_error
(
"clCreateBuffer failed
\n
"
);
goto
cleanup
;
...
...
@@ -529,17 +529,17 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
if
(
testType
==
clFFT_OUT_OF_PLACE
)
{
data_out
=
clCreateBuffer
(
context
,
CL_MEM_READ_WRITE
|
CL_MEM_COPY_HOST_PTR
,
length
*
sizeof
(
float
)
*
2
,
data_cl
,
&
err
);
if
(
!
data_out
)
if
(
!
data_out
)
{
log_error
(
"clCreateBuffer failed
\n
"
);
goto
cleanup
;
}
}
}
else
data_out
=
data_in
;
}
err
=
CL_SUCCESS
;
#ifdef __APPLE__
...
...
@@ -552,20 +552,20 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
}
else
{
for
(
iter
=
0
;
iter
<
numIter
;
iter
++
)
for
(
iter
=
0
;
iter
<
numIter
;
iter
++
)
err
|=
clFFT_ExecuteInterleaved
(
queue
,
plan
,
batchSize
,
dir
,
data_in
,
data_out
,
0
,
NULL
,
NULL
);
}
err
|=
clFinish
(
queue
);
if
(
err
)
if
(
err
)
{
log_error
(
"clFFT_Execute
\n
"
);
goto
cleanup
;
goto
cleanup
;
}
#ifdef __APPLE__
t1
=
mach_absolute_time
();
t1
=
mach_absolute_time
();
t
=
subtractTimes
(
t1
,
t0
);
char
temp
[
100
];
sprintf
(
temp
,
"GFlops achieved for n = (%d, %d, %d), batchsize = %d"
,
n
.
x
,
n
.
y
,
n
.
z
,
batchSize
);
...
...
@@ -573,7 +573,7 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
#endif
if
(
dataFormat
==
clFFT_SplitComplexFormat
)
{
{
err
|=
clEnqueueReadBuffer
(
queue
,
data_out_real
,
CL_TRUE
,
0
,
length
*
sizeof
(
float
),
data_cl_split
.
real
,
0
,
NULL
,
NULL
);
err
|=
clEnqueueReadBuffer
(
queue
,
data_out_imag
,
CL_TRUE
,
0
,
length
*
sizeof
(
float
),
data_cl_split
.
imag
,
0
,
NULL
,
NULL
);
}
...
...
@@ -581,23 +581,23 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
{
err
|=
clEnqueueReadBuffer
(
queue
,
data_out
,
CL_TRUE
,
0
,
length
*
sizeof
(
float
)
*
2
,
data_cl
,
0
,
NULL
,
NULL
);
}
if
(
err
)
if
(
err
)
{
log_error
(
"clEnqueueReadBuffer failed
\n
"
);
goto
cleanup
;
}
}
#ifdef __APPLE__
computeReferenceD
(
&
data_oref
,
n
,
batchSize
,
dim
,
dir
);
double
diff_avg
,
diff_max
,
diff_min
;
if
(
dataFormat
==
clFFT_SplitComplexFormat
)
{
diff_avg
=
computeL2Error
(
&
data_cl_split
,
&
data_oref
,
n
.
x
*
n
.
y
*
n
.
z
,
batchSize
,
&
diff_max
,
&
diff_min
);
if
(
diff_avg
>
eps_avg
)
log_error
(
"Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)
\n
"
,
n
.
x
,
n
.
y
,
n
.
z
,
batchSize
,
(
testType
==
clFFT_OUT_OF_PLACE
)
?
"out-of-place"
:
"in-place"
,
diff_avg
,
diff_max
,
diff_min
);
else
log_info
(
"Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)
\n
"
,
n
.
x
,
n
.
y
,
n
.
z
,
batchSize
,
(
testType
==
clFFT_OUT_OF_PLACE
)
?
"out-of-place"
:
"in-place"
,
diff_avg
,
diff_max
,
diff_min
);
log_info
(
"Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)
\n
"
,
n
.
x
,
n
.
y
,
n
.
z
,
batchSize
,
(
testType
==
clFFT_OUT_OF_PLACE
)
?
"out-of-place"
:
"in-place"
,
diff_avg
,
diff_max
,
diff_min
);
}
else
{
clFFT_SplitComplex
result_split
;
...
...
@@ -605,19 +605,19 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
result_split
.
imag
=
(
float
*
)
malloc
(
length
*
sizeof
(
float
));
convertInterleavedToSplit
(
&
result_split
,
data_cl
,
length
);
diff_avg
=
computeL2Error
(
&
result_split
,
&
data_oref
,
n
.
x
*
n
.
y
*
n
.
z
,
batchSize
,
&
diff_max
,
&
diff_min
);
if
(
diff_avg
>
eps_avg
)
log_error
(
"Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)
\n
"
,
n
.
x
,
n
.
y
,
n
.
z
,
batchSize
,
(
testType
==
clFFT_OUT_OF_PLACE
)
?
"out-of-place"
:
"in-place"
,
diff_avg
,
diff_max
,
diff_min
);
else
log_info
(
"Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)
\n
"
,
n
.
x
,
n
.
y
,
n
.
z
,
batchSize
,
(
testType
==
clFFT_OUT_OF_PLACE
)
?
"out-of-place"
:
"in-place"
,
diff_avg
,
diff_max
,
diff_min
);
log_info
(
"Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)
\n
"
,
n
.
x
,
n
.
y
,
n
.
z
,
batchSize
,
(
testType
==
clFFT_OUT_OF_PLACE
)
?
"out-of-place"
:
"in-place"
,
diff_avg
,
diff_max
,
diff_min
);
free
(
result_split
.
real
);
free
(
result_split
.
imag
);
}
#endif
cleanup:
clFFT_DestroyPlan
(
plan
);
if
(
dataFormat
==
clFFT_SplitComplexFormat
)
clFFT_DestroyPlan
(
plan
);
if
(
dataFormat
==
clFFT_SplitComplexFormat
)
{
if
(
data_i_split
.
real
)
free
(
data_i_split
.
real
);
...
...
@@ -627,7 +627,7 @@ cleanup:
free
(
data_cl_split
.
real
);
if
(
data_cl_split
.
imag
)
free
(
data_cl_split
.
imag
);
if
(
data_in_real
)
clReleaseMemObject
(
data_in_real
);
if
(
data_in_imag
)
...
...
@@ -637,28 +637,28 @@ cleanup:
if
(
data_out_imag
&&
clFFT_OUT_OF_PLACE
)
clReleaseMemObject
(
data_out_imag
);
}
else
else
{
if
(
data_i
)
free
(
data_i
);
if
(
data_cl
)
free
(
data_cl
);
if
(
data_in
)
clReleaseMemObject
(
data_in
);
if
(
data_out
&&
testType
==
clFFT_OUT_OF_PLACE
)
clReleaseMemObject
(
data_out
);
}
if
(
data_iref
.
real
)
free
(
data_iref
.
real
);
if
(
data_iref
.
imag
)
free
(
data_iref
.
imag
);
free
(
data_iref
.
imag
);
if
(
data_oref
.
real
)
free
(
data_oref
.
real
);
if
(
data_oref
.
imag
)
free
(
data_oref
.
imag
);
return
err
;
}
...
...
@@ -690,7 +690,7 @@ cl_device_type getGlobalDeviceType()
return
CL_DEVICE_TYPE_GPU
;
}
void
void
notify_callback
(
const
char
*
errinfo
,
const
void
*
private_info
,
size_t
cb
,
void
*
user_data
)
{
printf
(
"ERROR: %s
\n
"
,
errinfo
);
...
...
@@ -708,9 +708,9 @@ checkMemRequirements(clFFT_Dim3 n, int batchSize, clFFT_TestType testType, cl_ul
}
int
main
(
int
argc
,
char
*
const
argv
[])
{
test_start
();
cl_ulong
gMemSize
;
clFFT_Direction
dir
=
clFFT_Forward
;
int
numIter
=
1
;
...
...
@@ -720,28 +720,28 @@ int main (int argc, char * const argv[]) {
clFFT_Dimension
dim
=
clFFT_1D
;
clFFT_TestType
testType
=
clFFT_OUT_OF_PLACE
;
cl_device_id
device_ids
[
16
];
FILE
*
paramFile
;
cl_int
err
;
unsigned
int
num_devices
;
cl_device_type
device_type
=
getGlobalDeviceType
();
if
(
device_type
!=
CL_DEVICE_TYPE_GPU
)
cl_device_type
device_type
=
getGlobalDeviceType
();
if
(
device_type
!=
CL_DEVICE_TYPE_GPU
)
{
log_info
(
"Test only supported on DEVICE_TYPE_GPU
\n
"
);
test_finish
();
exit
(
0
);
}
err
=
clGetDeviceIDs
(
NULL
,
device_type
,
sizeof
(
device_ids
),
device_ids
,
&
num_devices
);
if
(
err
)
{
if
(
err
)
{
printf
(
"ERROR: clGetDeviceIDs failed with error: %d
\n
"
,
err
);
test_finish
();
return
-
1
;
}
device_id
=
NULL
;
unsigned
int
i
=
0
;
...
...
@@ -794,7 +794,7 @@ int main (int argc, char * const argv[]) {
}
}
}
if
(
!
device_id
)
{
log_error
(
"None of the devices available for compute ... aborting test
\n
"
);
test_finish
();
...
...
@@ -812,13 +812,13 @@ int main (int argc, char * const argv[]) {
}
context
=
clCreateContext
(
0
,
1
,
&
device_id
,
NULL
,
NULL
,
&
err
);