Select Git revision
Forked from
einsteinathome / libclfft
27 commits behind the upstream repository.
-
Oliver Bock authoredOliver Bock authored
main.cpp 35.00 KiB
//
// File: main.cpp
//
// Version: <1.0>
//
// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
// in consideration of your agreement to the following terms, and your use,
// installation, modification or redistribution of this Apple software
// constitutes acceptance of these terms. If you do not agree with these
// terms, please do not use, install, modify or redistribute this Apple
// software.
//
// In consideration of your agreement to abide by the following terms, and
// subject to these terms, Apple grants you a personal, non - exclusive
// license, under Apple's copyrights in this original Apple software ( the
// "Apple Software" ), to use, reproduce, modify and redistribute the Apple
// Software, with or without modifications, in source and / or binary forms;
// provided that if you redistribute the Apple Software in its entirety and
// without modifications, you must retain this notice and the following text
// and disclaimers in all such redistributions of the Apple Software. Neither
// the name, trademarks, service marks or logos of Apple Inc. may be used to
// endorse or promote products derived from the Apple Software without specific
// prior written permission from Apple. Except as expressly stated in this
// notice, no other rights or licenses, express or implied, are granted by
// Apple herein, including but not limited to any patent rights that may be
// infringed by your derivative works or by other works in which the Apple
// Software may be incorporated.
//
// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
// ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
//
// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
//
////////////////////////////////////////////////////////////////////////////////////////////////////
#include <string.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <mach/mach_time.h>
#include <Accelerate/Accelerate.h>
#else
#include <CL/cl.h>
#endif
#include <clFFT.h>
#include "procs.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <stdint.h>
#include <float.h>
#define eps_avg 10.0
#define MAX_DEVICES 16
#define MAX( _a, _b) ((_a)>(_b)?(_a) : (_b))
typedef enum {
clFFT_OUT_OF_PLACE,
clFFT_IN_PLACE,
}clFFT_TestType;
typedef struct
{
double real;
double imag;
}clFFT_ComplexDouble;
typedef struct
{
double *real;
double *imag;
}clFFT_SplitComplexDouble;
cl_device_id device_id;
cl_context context;
cl_command_queue queue;
#ifdef __APPLE__
double subtractTimes( uint64_t endTime, uint64_t startTime )
{
uint64_t difference = endTime - startTime;
static double conversion = 0.0;
if( conversion == 0.0 )
{
mach_timebase_info_data_t info;
kern_return_t err = mach_timebase_info( &info );
//Convert the timebase into seconds
if( err == 0 )
conversion = 1e-9 * (double) info.numer / (double) info.denom;
}
return conversion * (double) difference;
}
#endif
#ifdef __APPLE__
void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir)
{
FFTSetup plan_vdsp;
DSPSplitComplex out_vdsp;
FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE;
unsigned int i, j, k;
unsigned int stride;
unsigned int log2Nx = (unsigned int) log2(n.x);
unsigned int log2Ny = (unsigned int) log2(n.y);
unsigned int log2Nz = (unsigned int) log2(n.z);
unsigned int log2N;
log2N = log2Nx;
log2N = log2N > log2Ny ? log2N : log2Ny;
log2N = log2N > log2Nz ? log2N : log2Nz;
plan_vdsp = vDSP_create_fftsetup(log2N, 2);
switch(dim)
{
case clFFT_1D:
for(i = 0; i < batchSize; i++)
{
stride = i * n.x;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
break;
case clFFT_2D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
stride = j * n.x + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.x; j++)
{
stride = j + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
break;
case clFFT_3D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.y; k++)
{
stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp);
}
}
}
break;
}
vDSP_destroy_fftsetup(plan_vdsp);
}
#endif
#ifdef __APPLE__
void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir)
{
FFTSetupD plan_vdsp;
DSPDoubleSplitComplex out_vdsp;
FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE;
unsigned int i, j, k;
unsigned int stride;
unsigned int log2Nx = (int) log2(n.x);
unsigned int log2Ny = (int) log2(n.y);
unsigned int log2Nz = (int) log2(n.z);
unsigned int log2N;
log2N = log2Nx;
log2N = log2N > log2Ny ? log2N : log2Ny;
log2N = log2N > log2Nz ? log2N : log2Nz;
plan_vdsp = vDSP_create_fftsetupD(log2N, 2);
switch(dim)
{
case clFFT_1D:
for(i = 0; i < batchSize; i++)
{
stride = i * n.x;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
break;
case clFFT_2D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
stride = j * n.x + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.x; j++)
{
stride = j + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
break;
case clFFT_3D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.y; k++)
{
stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp);
}
}
}
break;
}
vDSP_destroy_fftsetupD(plan_vdsp);
}
#endif
double complexNormSq(clFFT_ComplexDouble a)
{
return (a.real * a.real + a.imag * a.imag);
}
double computeL2Error(clFFT_SplitComplex *data, clFFT_SplitComplexDouble *data_ref, int n, int batchSize, double *max_diff, double *min_diff)
{
int i, j;
double avg_norm = 0.0;
*max_diff = 0.0;
*min_diff = 0x1.0p1000;
for(j = 0; j < batchSize; j++)
{
double norm_ref = 0.0;
double norm = 0.0;
for(i = 0; i < n; i++)
{
int index = j * n + i;
clFFT_ComplexDouble diff = (clFFT_ComplexDouble) { data_ref->real[index] - data->real[index], data_ref->imag[index] - data->imag[index] };
double norm_tmp = complexNormSq(diff);
norm += norm_tmp;
norm_ref += (data_ref->real[index] * data_ref->real[index] + data_ref->imag[index] * data_ref->imag[index]);
}
double curr_norm = sqrt( norm / norm_ref ) / FLT_EPSILON;
avg_norm += curr_norm;
*max_diff = *max_diff < curr_norm ? curr_norm : *max_diff;
*min_diff = *min_diff > curr_norm ? curr_norm : *min_diff;
}
return avg_norm / batchSize;
}
void convertInterleavedToSplit(clFFT_SplitComplex *result_split, clFFT_Complex *data_cl, int length)
{
int i;
for(i = 0; i < length; i++) {
result_split->real[i] = data_cl[i].real;
result_split->imag[i] = data_cl[i].imag;
}
}
int runTest(FILE *input, clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension dim,
clFFT_DataFormat dataFormat, int numIter, clFFT_TestType testType, int debugEnabled)
{
cl_int err = CL_SUCCESS;
int iter;
#ifdef __APPLE__
double t;
uint64_t t0, t1;
int mx = log2(n.x);
int my = log2(n.y);
int mz = log2(n.z);
double gflops = 5e-9 * ((double)mx + (double)my + (double)mz) * (double)n.x * (double)n.y * (double)n.z * (double)batchSize * (double)numIter;
#endif
int length = n.x * n.y * n.z * batchSize;
float normFactor = 1.0 / 256.0;
clFFT_SplitComplex data_i_split = (clFFT_SplitComplex) { NULL, NULL };
clFFT_SplitComplex data_cl_split = (clFFT_SplitComplex) { NULL, NULL };
clFFT_Complex *data_i = NULL;
clFFT_Complex *data_cl = NULL;
clFFT_SplitComplexDouble data_iref = (clFFT_SplitComplexDouble) { NULL, NULL };
clFFT_SplitComplexDouble data_oref = (clFFT_SplitComplexDouble) { NULL, NULL };
clFFT_Plan plan = NULL;
cl_mem data_in = NULL;
cl_mem data_out = NULL;
cl_mem data_in_real = NULL;
cl_mem data_in_imag = NULL;
cl_mem data_out_real = NULL;
cl_mem data_out_imag = NULL;
if(dataFormat == clFFT_SplitComplexFormat) {
data_i_split.real = (float *) malloc(sizeof(float) * length);
data_i_split.imag = (float *) malloc(sizeof(float) * length);
data_cl_split.real = (float *) malloc(sizeof(float) * length);
data_cl_split.imag = (float *) malloc(sizeof(float) * length);
if(!data_i_split.real || !data_i_split.imag || !data_cl_split.real || !data_cl_split.imag)
{
err = -1;
log_error("Out-of-Resources\n");
goto cleanup;
}
}
else {
data_i = (clFFT_Complex *) malloc(sizeof(clFFT_Complex)*length);
data_cl = (clFFT_Complex *) malloc(sizeof(clFFT_Complex)*length);
if(!data_i || !data_cl)
{
err = -2;
log_error("Out-of-Resouces\n");
goto cleanup;
}
}
data_iref.real = (double *) malloc(sizeof(double) * length);
data_iref.imag = (double *) malloc(sizeof(double) * length);
data_oref.real = (double *) malloc(sizeof(double) * length);
data_oref.imag = (double *) malloc(sizeof(double) * length);
if(!data_iref.real || !data_iref.imag || !data_oref.real || !data_oref.imag)
{
err = -3;
log_error("Out-of-Resources\n");
goto cleanup;
}
int i;
if(dataFormat == clFFT_SplitComplexFormat) {
for(i = 0; i < length; i++)
{
data_i_split.real[i] = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
data_i_split.imag[i] = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
data_cl_split.real[i] = 0.0f;
data_cl_split.imag[i] = 0.0f;
data_iref.real[i] = data_i_split.real[i];
data_iref.imag[i] = data_i_split.imag[i];
data_oref.real[i] = data_iref.real[i];
data_oref.imag[i] = data_iref.imag[i];
}
}
else if(NULL != input) {
for(i = 0; i < length; i++) {
fscanf(input, "%f\n", &data_i[i].real);
data_i[i].imag = 0.0f;
data_cl[i].real = 0.0f;
data_cl[i].imag = 0.0f;
data_iref.real[i] = data_i[i].real;
data_iref.imag[i] = data_i[i].imag;
data_oref.real[i] = data_iref.real[i];
data_oref.imag[i] = data_iref.imag[i];
}
}
else {
for(i = 0; i < length; i++)
{
data_i[i].real = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
data_i[i].imag = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
data_cl[i].real = 0.0f;
data_cl[i].imag = 0.0f;
data_iref.real[i] = data_i[i].real;
data_iref.imag[i] = data_i[i].imag;
data_oref.real[i] = data_iref.real[i];
data_oref.imag[i] = data_iref.imag[i];
}
}
plan = clFFT_CreatePlan( context, n, dim, dataFormat, &err );
if(!plan || err)
{
log_error("clFFT_CreatePlan failed\n");
goto cleanup;
}
//clFFT_DumpPlan(plan, stdout);
if(dataFormat == clFFT_SplitComplexFormat)
{
data_in_real = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_i_split.real, &err);
if(!data_in_real || err)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
data_in_imag = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_i_split.imag, &err);
if(!data_in_imag || err)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
if(testType == clFFT_OUT_OF_PLACE)
{
data_out_real = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_cl_split.real, &err);
if(!data_out_real || err)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
data_out_imag = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_cl_split.imag, &err);
if(!data_out_imag || err)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
}
else
{
data_out_real = data_in_real;
data_out_imag = data_in_imag;
}
}
else
{
data_in = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float)*2, data_i, &err);
if(!data_in)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
if(testType == clFFT_OUT_OF_PLACE)
{
data_out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float)*2, data_cl, &err);
if(!data_out)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
}
else
data_out = data_in;
}
err = CL_SUCCESS;
#ifdef __APPLE__
t0 = mach_absolute_time();
#endif
if(dataFormat == clFFT_SplitComplexFormat)
{
for(iter = 0; iter < numIter; iter++)
err |= clFFT_ExecutePlannar(queue, plan, batchSize, dir, data_in_real, data_in_imag, data_out_real, data_out_imag, 0, NULL, NULL);
}
else
{
for(iter = 0; iter < numIter; iter++)
err |= clFFT_ExecuteInterleaved(queue, plan, batchSize, dir, data_in, data_out, 0, NULL, NULL);
}
err |= clFinish(queue);
if(err)
{
log_error("clFFT_Execute\n");
goto cleanup;
}
#ifdef __APPLE__
t1 = mach_absolute_time();
t = subtractTimes(t1, t0);
char temp[100];
sprintf(temp, "GFlops achieved for n = (%d, %d, %d), batchsize = %d", n.x, n.y, n.z, batchSize);
log_perf(gflops / (float) t, 1, "GFlops/s", "%s", temp);
#endif
if(dataFormat == clFFT_SplitComplexFormat)
{
err |= clEnqueueReadBuffer(queue, data_out_real, CL_TRUE, 0, length*sizeof(float), data_cl_split.real, 0, NULL, NULL);
err |= clEnqueueReadBuffer(queue, data_out_imag, CL_TRUE, 0, length*sizeof(float), data_cl_split.imag, 0, NULL, NULL);
}
else
{
err |= clEnqueueReadBuffer(queue, data_out, CL_TRUE, 0, length*sizeof(float)*2, data_cl, 0, NULL, NULL);
}
if(err)
{
log_error("clEnqueueReadBuffer failed\n");
goto cleanup;
}
#ifdef __APPLE__
computeReferenceD(&data_oref, n, batchSize, dim, dir);
double diff_avg, diff_max, diff_min;
if(dataFormat == clFFT_SplitComplexFormat) {
diff_avg = computeL2Error(&data_cl_split, &data_oref, n.x*n.y*n.z, batchSize, &diff_max, &diff_min);
if(diff_avg > eps_avg)
log_error("Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
else
log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
}
else {
clFFT_SplitComplex result_split;
result_split.real = (float *) malloc(length*sizeof(float));
result_split.imag = (float *) malloc(length*sizeof(float));
convertInterleavedToSplit(&result_split, data_cl, length);
diff_avg = computeL2Error(&result_split, &data_oref, n.x*n.y*n.z, batchSize, &diff_max, &diff_min);
if(diff_avg > eps_avg)
log_error("Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
else
log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
free(result_split.real);
free(result_split.imag);
}
#endif
if(debugEnabled) {
log_info("Output power spectrum for manual validation (normalized):\n");
if(dataFormat != clFFT_SplitComplexFormat) {
clFFT_SplitComplex result_split;
result_split.real = (float *) malloc(length*sizeof(float));
result_split.imag = (float *) malloc(length*sizeof(float));
convertInterleavedToSplit(&result_split, data_cl, length);
for(int i = 0; i < length; ++i) {
printf("%f\n", normFactor * (result_split.real[i]*result_split.real[i] + result_split.imag[i]*result_split.imag[i]));
}
free(result_split.real);
free(result_split.imag);
}
else {
for(int i = 0; i < length; ++i) {
printf("%f\n", normFactor * (data_cl_split.real[i]*data_cl_split.real[i] + data_cl_split.imag[i]*data_cl_split.imag[i]));
}
}
}
cleanup:
clFFT_DestroyPlan(plan);
if(dataFormat == clFFT_SplitComplexFormat)
{
if(data_i_split.real)
free(data_i_split.real);
if(data_i_split.imag)
free(data_i_split.imag);
if(data_cl_split.real)
free(data_cl_split.real);
if(data_cl_split.imag)
free(data_cl_split.imag);
if(data_in_real)
clReleaseMemObject(data_in_real);
if(data_in_imag)
clReleaseMemObject(data_in_imag);
if(data_out_real && testType == clFFT_OUT_OF_PLACE)
clReleaseMemObject(data_out_real);
if(data_out_imag && clFFT_OUT_OF_PLACE)
clReleaseMemObject(data_out_imag);
}
else
{
if(data_i)
free(data_i);
if(data_cl)
free(data_cl);
if(data_in)
clReleaseMemObject(data_in);
if(data_out && testType == clFFT_OUT_OF_PLACE)
clReleaseMemObject(data_out);
}
if(data_iref.real)
free(data_iref.real);
if(data_iref.imag)
free(data_iref.imag);
if(data_oref.real)
free(data_oref.real);
if(data_oref.imag)
free(data_oref.imag);
return err;
}
bool ifLineCommented(const char *line) {
const char *Line = line;
while(*Line != '\0')
if((*Line == '/') && (*(Line + 1) == '/'))
return true;
else
Line++;
return false;
}
cl_device_type getGlobalDeviceType()
{
char *force_cpu = getenv( "CL_DEVICE_TYPE" );
if( force_cpu != NULL )
{
if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
return CL_DEVICE_TYPE_GPU;
else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
return CL_DEVICE_TYPE_CPU;
else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
return CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
return CL_DEVICE_TYPE_DEFAULT;
}
// default
return CL_DEVICE_TYPE_GPU;
}
void
notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
printf( "ERROR: %s\n", errinfo );
}
int
checkMemRequirements(clFFT_Dim3 n, int batchSize, clFFT_TestType testType, cl_ulong gMemSize)
{
cl_ulong memReq = (testType == clFFT_OUT_OF_PLACE) ? 3 : 2;
memReq *= n.x*n.y*n.z*sizeof(clFFT_Complex)*batchSize;
memReq = memReq/1024/1024;
if(memReq >= gMemSize)
return -1;
return 0;
}
int main (int argc, char * const argv[]) {
test_start();
FILE *inputData = NULL;
cl_ulong gMemSize;
clFFT_Direction dir = clFFT_Forward;
int numIter = 1;
clFFT_Dim3 n = { 1024, 1, 1 };
int batchSize = 1;
clFFT_DataFormat dataFormat = clFFT_SplitComplexFormat;
clFFT_Dimension dim = clFFT_1D;
clFFT_TestType testType = clFFT_OUT_OF_PLACE;
cl_device_id device_ids[MAX_DEVICES];
int debugEnabled = 0;
FILE *paramFile;
cl_int err, status;
unsigned int num_devices;
cl_device_type device_type = getGlobalDeviceType();
if(device_type != CL_DEVICE_TYPE_GPU)
{
log_info("Test only supported on DEVICE_TYPE_GPU\n");
test_finish();
exit(0);
}
cl_uint numPlatforms;
cl_platform_id platform = NULL;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if(CL_SUCCESS != status) {
printf("ERROR: clGetPlatformIDs failed with error: %d\n", status);
return -1;
}
if (0 < numPlatforms) {
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(CL_SUCCESS != status) {
printf("ERROR: clGetPlatformIDs failed with error: %d\n", status);
return -1;
}
for (unsigned i = 0; i < numPlatforms; ++i)
{
char profile[32] = {0};;
status = clGetPlatformInfo(platforms[i],
CL_PLATFORM_PROFILE,
sizeof(profile),
profile,
NULL);
if(CL_SUCCESS != status) {
printf("ERROR: clGetPlatformInfo failed with error: %d\n", status);
return -1;
}
if (!strcmp(profile, "FULL_PROFILE"))
{
platform = platforms[i];
break;
}
}
delete[] platforms;
}
if( NULL == platform) {
log_error("No valid OpenCL platform found!\n");
return -1;
}
char vendor[128] = {0};
status = clGetPlatformInfo(platform,
CL_PLATFORM_VENDOR,
sizeof(vendor),
vendor,
NULL);
if(CL_SUCCESS != status) {
printf("WARN: clGetPlatformInfo failed with error: %d\n", status);
}
else {
printf("INFO: Using OpenCL platform provided by: %s\n", vendor);
}
err = clGetDeviceIDs(platform, device_type, MAX_DEVICES, device_ids, &num_devices);
if(err) {
printf("ERROR: clGetDeviceIDs failed with error: %d\n", err);
test_finish();
return -1;
}
else {
printf("INFO: Found %d suitable device(s)...\n", num_devices);
}
device_id = NULL;
unsigned int i = 0;
if (argc >= 3) {
cl_bool available;
err = clGetDeviceInfo(device_ids[atoi(argv[2])], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL);
if(err) {
printf("ERROR: Cannot check device availability of device # %d\n", atoi(argv[2]));
return -1;
}
if(available) {
device_id = device_ids[atoi(argv[2])];
}
else {
char name[200];
err = clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(name), name, NULL);
if(err == CL_SUCCESS) {
printf("ERROR: Device %s not available for compute\n", name);
}
else {
printf("ERROR: Device # %d not available for compute\n", atoi(argv[2]));
}
return -1;
}
}
else {
for(i = 0; i < num_devices; i++)
{
cl_bool available;
err = clGetDeviceInfo(device_ids[i], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL);
if(err) {
printf("ERROR: Cannot check device availability of device # %d. Continuing with next available device...\n", i);
continue;
}
if(available) {
device_id = device_ids[i];
break;
}
else {
char name[200];
err = clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(name), name, NULL);
if(err == CL_SUCCESS) {
printf("INFO: Device %s not available for compute\n", name);
}
else {
printf("INFO: Device # %d not available for compute\n", i);
}
}
}
}
if(!device_id) {
log_error("None of the devices available for compute ... aborting test\n");
test_finish();
return -1;
}
else {
char name[200];
err = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(name), name, NULL);
if(err == CL_SUCCESS) {
printf("INFO: Using device %s...\n", name);
}
else {
printf("INFO: Using device # %d...\n", i);
}
}
cl_context_properties ctxProps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0};
context = clCreateContext(ctxProps, 1, &device_id, NULL, NULL, &err);
if(!context || err)
{
log_error("clCreateContext failed\n");
test_finish();
return -1;
}
queue = clCreateCommandQueue(context, device_id, 0, &err);
if(!queue || err)
{
log_error("clCreateCommandQueue() failed.\n");
clReleaseContext(context);
test_finish();
return -1;
}
err = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &gMemSize, NULL);
if(err)
{
log_error("Failed to get global mem size\n");
clReleaseContext(context);
clReleaseCommandQueue(queue);
test_finish();
return -2;
}
gMemSize /= (1024*1024);
char delim[] = " \n";
char tmpStr[100];
char line[500];
char *param, *val;
int total_errors = 0;
if(argc == 1) {
log_error("Need file name with list of parameters to run the test\n");
test_finish();
return -1;
}
if(4 == argc && 1 == !strcmp(argv[3], "debug")) {
debugEnabled = 1;
}
if(argc >= 2) { // arguments are supplied in a file with arguments for a single run are all on the same line
paramFile = fopen(argv[1], "r");
if(!paramFile) {
log_error("Cannot open the parameter file\n");
clReleaseContext(context);
clReleaseCommandQueue(queue);
test_finish();
return -3;
}
while(fgets(line, 499, paramFile)) {
if(!strcmp(line, "") || !strcmp(line, "\n") || ifLineCommented(line))
continue;
param = strtok(line, delim);
while(param) {
val = strtok(NULL, delim);
if(!strcmp(param, "-n")) {
sscanf(val, "%d", &n.x);
val = strtok(NULL, delim);
sscanf(val, "%d", &n.y);
val = strtok(NULL, delim);
sscanf(val, "%d", &n.z);
}
else if(!strcmp(param, "-batchsize"))
sscanf(val, "%d", &batchSize);
else if(!strcmp(param, "-dir")) {
sscanf(val, "%s", tmpStr);
if(!strcmp(tmpStr, "forward"))
dir = clFFT_Forward;
else if(!strcmp(tmpStr, "inverse"))
dir = clFFT_Inverse;
}
else if(!strcmp(param, "-dim")) {
sscanf(val, "%s", tmpStr);
if(!strcmp(tmpStr, "1D"))
dim = clFFT_1D;
else if(!strcmp(tmpStr, "2D"))
dim = clFFT_2D;
else if(!strcmp(tmpStr, "3D"))
dim = clFFT_3D;
}
else if(!strcmp(param, "-format")) {
sscanf(val, "%s", tmpStr);
if(!strcmp(tmpStr, "plannar"))
dataFormat = clFFT_SplitComplexFormat;
else if(!strcmp(tmpStr, "interleaved"))
dataFormat = clFFT_InterleavedComplexFormat;
}
else if(!strcmp(param, "-numiter"))
sscanf(val, "%d", &numIter);
else if(!strcmp(param, "-testtype")) {
sscanf(val, "%s", tmpStr);
if(!strcmp(tmpStr, "out-of-place"))
testType = clFFT_OUT_OF_PLACE;
else if(!strcmp(tmpStr, "in-place"))
testType = clFFT_IN_PLACE;
}
else if(!strcmp(param, "-inputdatafile")) {
sscanf(val, "%s", tmpStr);
inputData = fopen (tmpStr, "r");
if(!inputData) {
printf("ERROR: Couldn't open input data file (%s)! Using random data instead...\n", val);
}
else {
printf("INFO: Using input data file \"%s\"...\n", val);
}
}
param = strtok(NULL, delim);
}
if(checkMemRequirements(n, batchSize, testType, gMemSize)) {
log_info("This test cannot run because memory requirements canot be met by the available device\n");
continue;
}
err = runTest(inputData, n, batchSize, dir, dim, dataFormat, numIter, testType, debugEnabled);
if (err)
total_errors++;
}
}
clReleaseContext(context);
clReleaseCommandQueue(queue);
test_finish();
#ifndef __APPLE__
printf("Test finished (validation only available on Mac OS X)! Total number of errors: %d\n", total_errors);
#endif
return total_errors;
}