Commit 05167ff1 authored by Oliver Bock's avatar Oliver Bock
Browse files

Detabbed sources

parent 7df18002
......@@ -66,23 +66,23 @@
#define eps_avg 10.0
#define MAX( _a, _b) ((_a)>(_b)?(_a) : (_b))
#define MAX( _a, _b) ((_a)>(_b)?(_a) : (_b))
typedef enum {
clFFT_OUT_OF_PLACE,
clFFT_IN_PLACE,
clFFT_OUT_OF_PLACE,
clFFT_IN_PLACE,
}clFFT_TestType;
typedef struct
{
double real;
double imag;
double real;
double imag;
}clFFT_ComplexDouble;
typedef struct
{
double *real;
double *imag;
double *real;
double *imag;
}clFFT_SplitComplexDouble;
cl_device_id device_id;
......@@ -100,9 +100,9 @@ double subtractTimes( uint64_t endTime, uint64_t startTime )
mach_timebase_info_data_t info;
kern_return_t err = mach_timebase_info( &info );
//Convert the timebase into seconds
//Convert the timebase into seconds
if( err == 0 )
conversion = 1e-9 * (double) info.numer / (double) info.denom;
conversion = 1e-9 * (double) info.numer / (double) info.denom;
}
return conversion * (double) difference;
......@@ -111,583 +111,583 @@ double subtractTimes( uint64_t endTime, uint64_t startTime )
#ifdef __APPLE__
void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir)
unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir)
{
FFTSetup plan_vdsp;
DSPSplitComplex out_vdsp;
FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE;
unsigned int i, j, k;
unsigned int stride;
unsigned int log2Nx = (unsigned int) log2(n.x);
unsigned int log2Ny = (unsigned int) log2(n.y);
unsigned int log2Nz = (unsigned int) log2(n.z);
unsigned int log2N;
log2N = log2Nx;
log2N = log2N > log2Ny ? log2N : log2Ny;
log2N = log2N > log2Nz ? log2N : log2Nz;
plan_vdsp = vDSP_create_fftsetup(log2N, 2);
switch(dim)
{
case clFFT_1D:
for(i = 0; i < batchSize; i++)
{
stride = i * n.x;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
break;
case clFFT_2D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
stride = j * n.x + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.x; j++)
{
stride = j + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
break;
case clFFT_3D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.y; k++)
{
stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp);
}
}
}
break;
}
vDSP_destroy_fftsetup(plan_vdsp);
FFTSetup plan_vdsp;
DSPSplitComplex out_vdsp;
FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE;
unsigned int i, j, k;
unsigned int stride;
unsigned int log2Nx = (unsigned int) log2(n.x);
unsigned int log2Ny = (unsigned int) log2(n.y);
unsigned int log2Nz = (unsigned int) log2(n.z);
unsigned int log2N;
log2N = log2Nx;
log2N = log2N > log2Ny ? log2N : log2Ny;
log2N = log2N > log2Nz ? log2N : log2Nz;
plan_vdsp = vDSP_create_fftsetup(log2N, 2);
switch(dim)
{
case clFFT_1D:
for(i = 0; i < batchSize; i++)
{
stride = i * n.x;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
break;
case clFFT_2D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
stride = j * n.x + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.x; j++)
{
stride = j + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
break;
case clFFT_3D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.y; k++)
{
stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp);
}
}
}
break;
}
vDSP_destroy_fftsetup(plan_vdsp);
}
#endif
#ifdef __APPLE__
void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir)
unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir)
{
FFTSetupD plan_vdsp;
DSPDoubleSplitComplex out_vdsp;
FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE;
unsigned int i, j, k;
unsigned int stride;
unsigned int log2Nx = (int) log2(n.x);
unsigned int log2Ny = (int) log2(n.y);
unsigned int log2Nz = (int) log2(n.z);
unsigned int log2N;
log2N = log2Nx;
log2N = log2N > log2Ny ? log2N : log2Ny;
log2N = log2N > log2Nz ? log2N : log2Nz;
plan_vdsp = vDSP_create_fftsetupD(log2N, 2);
switch(dim)
{
case clFFT_1D:
for(i = 0; i < batchSize; i++)
{
stride = i * n.x;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
break;
case clFFT_2D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
stride = j * n.x + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.x; j++)
{
stride = j + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
break;
case clFFT_3D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.y; k++)
{
stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp);
}
}
}
break;
}
vDSP_destroy_fftsetupD(plan_vdsp);
FFTSetupD plan_vdsp;
DSPDoubleSplitComplex out_vdsp;
FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE;
unsigned int i, j, k;
unsigned int stride;
unsigned int log2Nx = (int) log2(n.x);
unsigned int log2Ny = (int) log2(n.y);
unsigned int log2Nz = (int) log2(n.z);
unsigned int log2N;
log2N = log2Nx;
log2N = log2N > log2Ny ? log2N : log2Ny;
log2N = log2N > log2Nz ? log2N : log2Nz;
plan_vdsp = vDSP_create_fftsetupD(log2N, 2);
switch(dim)
{
case clFFT_1D:
for(i = 0; i < batchSize; i++)
{
stride = i * n.x;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
break;
case clFFT_2D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
stride = j * n.x + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.x; j++)
{
stride = j + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
break;
case clFFT_3D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.y; k++)
{
stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
}
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
{
for(k = 0; k < n.x; k++)
{
stride = k + j * n.x + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp);
}
}
}
break;
}
vDSP_destroy_fftsetupD(plan_vdsp);
}
#endif
double complexNormSq(clFFT_ComplexDouble a)
{
return (a.real * a.real + a.imag * a.imag);
return (a.real * a.real + a.imag * a.imag);
}
double computeL2Error(clFFT_SplitComplex *data, clFFT_SplitComplexDouble *data_ref, int n, int batchSize, double *max_diff, double *min_diff)
{
int i, j;
double avg_norm = 0.0;
*max_diff = 0.0;
*min_diff = 0x1.0p1000;
for(j = 0; j < batchSize; j++)
{
double norm_ref = 0.0;
double norm = 0.0;
for(i = 0; i < n; i++)
{
int index = j * n + i;
clFFT_ComplexDouble diff = (clFFT_ComplexDouble) { data_ref->real[index] - data->real[index], data_ref->imag[index] - data->imag[index] };
double norm_tmp = complexNormSq(diff);
norm += norm_tmp;
norm_ref += (data_ref->real[index] * data_ref->real[index] + data_ref->imag[index] * data_ref->imag[index]);
}
double curr_norm = sqrt( norm / norm_ref ) / FLT_EPSILON;
avg_norm += curr_norm;
*max_diff = *max_diff < curr_norm ? curr_norm : *max_diff;
*min_diff = *min_diff > curr_norm ? curr_norm : *min_diff;
}
return avg_norm / batchSize;
int i, j;
double avg_norm = 0.0;
*max_diff = 0.0;
*min_diff = 0x1.0p1000;
for(j = 0; j < batchSize; j++)
{
double norm_ref = 0.0;
double norm = 0.0;
for(i = 0; i < n; i++)
{
int index = j * n + i;
clFFT_ComplexDouble diff = (clFFT_ComplexDouble) { data_ref->real[index] - data->real[index], data_ref->imag[index] - data->imag[index] };
double norm_tmp = complexNormSq(diff);
norm += norm_tmp;
norm_ref += (data_ref->real[index] * data_ref->real[index] + data_ref->imag[index] * data_ref->imag[index]);
}
double curr_norm = sqrt( norm / norm_ref ) / FLT_EPSILON;
avg_norm += curr_norm;
*max_diff = *max_diff < curr_norm ? curr_norm : *max_diff;
*min_diff = *min_diff > curr_norm ? curr_norm : *min_diff;
}
return avg_norm / batchSize;
}
void convertInterleavedToSplit(clFFT_SplitComplex *result_split, clFFT_Complex *data_cl, int length)
{
int i;
for(i = 0; i < length; i++) {
result_split->real[i] = data_cl[i].real;
result_split->imag[i] = data_cl[i].imag;
}
int i;
for(i = 0; i < length; i++) {
result_split->real[i] = data_cl[i].real;
result_split->imag[i] = data_cl[i].imag;
}
}
int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension dim,
clFFT_DataFormat dataFormat, int numIter, clFFT_TestType testType)
clFFT_DataFormat dataFormat, int numIter, clFFT_TestType testType)
{
cl_int err = CL_SUCCESS;
int iter;
cl_int err = CL_SUCCESS;
int iter;
#ifdef __APPLE__
double t;
uint64_t t0, t1;
double t;
uint64_t t0, t1;
int mx = log2(n.x);
int my = log2(n.y);
int mz = log2(n.z);
int mx = log2(n.x);
int my = log2(n.y);
int mz = log2(n.z);
double gflops = 5e-9 * ((double)mx + (double)my + (double)mz) * (double)n.x * (double)n.y * (double)n.z * (double)batchSize * (double)numIter;
double gflops = 5e-9 * ((double)mx + (double)my + (double)mz) * (double)n.x * (double)n.y * (double)n.z * (double)batchSize * (double)numIter;
#endif
int length = n.x * n.y * n.z * batchSize;
clFFT_SplitComplex data_i_split = (clFFT_SplitComplex) { NULL, NULL };
clFFT_SplitComplex data_cl_split = (clFFT_SplitComplex) { NULL, NULL };
clFFT_Complex *data_i = NULL;
clFFT_Complex *data_cl = NULL;
clFFT_SplitComplexDouble data_iref = (clFFT_SplitComplexDouble) { NULL, NULL };
clFFT_SplitComplexDouble data_oref = (clFFT_SplitComplexDouble) { NULL, NULL };
clFFT_Plan plan = NULL;
cl_mem data_in = NULL;
cl_mem data_out = NULL;