Commit 7df18002 authored by Oliver Bock's avatar Oliver Bock
Browse files

Removing trailing whitespaces

parent bc1bc3e9
......@@ -94,58 +94,58 @@ double subtractTimes( uint64_t endTime, uint64_t startTime )
{
uint64_t difference = endTime - startTime;
static double conversion = 0.0;
if( conversion == 0.0 )
{
mach_timebase_info_data_t info;
kern_return_t err = mach_timebase_info( &info );
//Convert the timebase into seconds
if( err == 0 )
conversion = 1e-9 * (double) info.numer / (double) info.denom;
}
return conversion * (double) difference;
}
#endif
#ifdef __APPLE__
void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir)
{
FFTSetup plan_vdsp;
DSPSplitComplex out_vdsp;
FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE;
unsigned int i, j, k;
unsigned int stride;
unsigned int log2Nx = (unsigned int) log2(n.x);
unsigned int log2Ny = (unsigned int) log2(n.y);
unsigned int log2Nz = (unsigned int) log2(n.z);
unsigned int log2N;
log2N = log2Nx;
log2N = log2N > log2Ny ? log2N : log2Ny;
log2N = log2N > log2Nz ? log2N : log2Nz;
plan_vdsp = vDSP_create_fftsetup(log2N, 2);
switch(dim)
{
case clFFT_1D:
for(i = 0; i < batchSize; i++)
{
stride = i * n.x;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
break;
case clFFT_2D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
......@@ -153,7 +153,7 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride = j * n.x + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
......@@ -164,14 +164,14 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride = j + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
break;
case clFFT_3D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
......@@ -181,7 +181,7 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
......@@ -195,7 +195,7 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride = k + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
......@@ -209,55 +209,55 @@ void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n,
stride = k + j * n.x + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp);
}
}
}
break;
}
vDSP_destroy_fftsetup(plan_vdsp);
}
#endif
#ifdef __APPLE__
void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir)
{
FFTSetupD plan_vdsp;
DSPDoubleSplitComplex out_vdsp;
FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE;
unsigned int i, j, k;
unsigned int stride;
unsigned int log2Nx = (int) log2(n.x);
unsigned int log2Ny = (int) log2(n.y);
unsigned int log2Nz = (int) log2(n.z);
unsigned int log2N;
log2N = log2Nx;
log2N = log2N > log2Ny ? log2N : log2Ny;
log2N = log2N > log2Nz ? log2N : log2Nz;
plan_vdsp = vDSP_create_fftsetupD(log2N, 2);
switch(dim)
{
case clFFT_1D:
for(i = 0; i < batchSize; i++)
{
stride = i * n.x;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
break;
case clFFT_2D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.y; j++)
......@@ -265,7 +265,7 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride = j * n.x + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
......@@ -276,14 +276,14 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride = j + i * n.x * n.y;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
break;
case clFFT_3D:
for(i = 0; i < batchSize; i++)
{
for(j = 0; j < n.z; j++)
......@@ -293,7 +293,7 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp);
}
}
......@@ -307,7 +307,7 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride = k + j * n.x * n.y + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp);
}
}
......@@ -321,14 +321,14 @@ void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n,
stride = k + j * n.x + i * n.x * n.y * n.z;
out_vdsp.realp = out->real + stride;
out_vdsp.imagp = out->imag + stride;
vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp);
}
}
}
break;
}
vDSP_destroy_fftsetupD(plan_vdsp);
}
#endif
......@@ -344,12 +344,12 @@ double computeL2Error(clFFT_SplitComplex *data, clFFT_SplitComplexDouble *data_r
double avg_norm = 0.0;
*max_diff = 0.0;
*min_diff = 0x1.0p1000;
for(j = 0; j < batchSize; j++)
{
double norm_ref = 0.0;
double norm = 0.0;
for(i = 0; i < n; i++)
for(i = 0; i < n; i++)
{
int index = j * n + i;
clFFT_ComplexDouble diff = (clFFT_ComplexDouble) { data_ref->real[index] - data->real[index], data_ref->imag[index] - data->imag[index] };
......@@ -362,7 +362,7 @@ double computeL2Error(clFFT_SplitComplex *data, clFFT_SplitComplexDouble *data_r
*max_diff = *max_diff < curr_norm ? curr_norm : *max_diff;
*min_diff = *min_diff > curr_norm ? curr_norm : *min_diff;
}
return avg_norm / batchSize;
}
......@@ -375,9 +375,9 @@ void convertInterleavedToSplit(clFFT_SplitComplex *result_split, clFFT_Complex *
}
}
int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension dim,
int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension dim,
clFFT_DataFormat dataFormat, int numIter, clFFT_TestType testType)
{
{
cl_int err = CL_SUCCESS;
int iter;
......@@ -393,14 +393,14 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
#endif
int length = n.x * n.y * n.z * batchSize;
clFFT_SplitComplex data_i_split = (clFFT_SplitComplex) { NULL, NULL };
clFFT_SplitComplex data_cl_split = (clFFT_SplitComplex) { NULL, NULL };
clFFT_Complex *data_i = NULL;
clFFT_Complex *data_cl = NULL;
clFFT_SplitComplexDouble data_iref = (clFFT_SplitComplexDouble) { NULL, NULL };
clFFT_SplitComplexDouble data_iref = (clFFT_SplitComplexDouble) { NULL, NULL };
clFFT_SplitComplexDouble data_oref = (clFFT_SplitComplexDouble) { NULL, NULL };
clFFT_Plan plan = NULL;
cl_mem data_in = NULL;
cl_mem data_out = NULL;
......@@ -408,7 +408,7 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
cl_mem data_in_imag = NULL;
cl_mem data_out_real = NULL;
cl_mem data_out_imag = NULL;
if(dataFormat == clFFT_SplitComplexFormat) {
data_i_split.real = (float *) malloc(sizeof(float) * length);
data_i_split.imag = (float *) malloc(sizeof(float) * length);
......@@ -431,11 +431,11 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
goto cleanup;
}
}
data_iref.real = (double *) malloc(sizeof(double) * length);
data_iref.imag = (double *) malloc(sizeof(double) * length);
data_oref.real = (double *) malloc(sizeof(double) * length);
data_oref.imag = (double *) malloc(sizeof(double) * length);
data_oref.imag = (double *) malloc(sizeof(double) * length);
if(!data_iref.real || !data_iref.imag || !data_oref.real || !data_oref.imag)
{
err = -3;
......@@ -450,11 +450,11 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
data_i_split.real[i] = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
data_i_split.imag[i] = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
data_cl_split.real[i] = 0.0f;
data_cl_split.imag[i] = 0.0f;
data_cl_split.imag[i] = 0.0f;
data_iref.real[i] = data_i_split.real[i];
data_iref.imag[i] = data_i_split.imag[i];
data_oref.real[i] = data_iref.real[i];
data_oref.imag[i] = data_iref.imag[i];
data_oref.imag[i] = data_iref.imag[i];
}
}
else {
......@@ -463,54 +463,54 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
data_i[i].real = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
data_i[i].imag = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
data_cl[i].real = 0.0f;
data_cl[i].imag = 0.0f;
data_cl[i].imag = 0.0f;
data_iref.real[i] = data_i[i].real;
data_iref.imag[i] = data_i[i].imag;
data_oref.real[i] = data_iref.real[i];
data_oref.imag[i] = data_iref.imag[i];
}
data_oref.imag[i] = data_iref.imag[i];
}
}
plan = clFFT_CreatePlan( context, n, dim, dataFormat, &err );
if(!plan || err)
if(!plan || err)
{
log_error("clFFT_CreatePlan failed\n");
goto cleanup;
}
//clFFT_DumpPlan(plan, stdout);
if(dataFormat == clFFT_SplitComplexFormat)
{
data_in_real = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_i_split.real, &err);
if(!data_in_real || err)
if(!data_in_real || err)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
data_in_imag = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_i_split.imag, &err);
if(!data_in_imag || err)
if(!data_in_imag || err)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
if(testType == clFFT_OUT_OF_PLACE)
{
data_out_real = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_cl_split.real, &err);
if(!data_out_real || err)
if(!data_out_real || err)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
data_out_imag = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_cl_split.imag, &err);
if(!data_out_imag || err)
if(!data_out_imag || err)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
}
}
else
{
......@@ -521,7 +521,7 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
else
{
data_in = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float)*2, data_i, &err);
if(!data_in)
if(!data_in)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
......@@ -529,17 +529,17 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
if(testType == clFFT_OUT_OF_PLACE)
{
data_out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float)*2, data_cl, &err);
if(!data_out)
if(!data_out)
{
log_error("clCreateBuffer failed\n");
goto cleanup;
}
}
}
else
data_out = data_in;
}
err = CL_SUCCESS;
#ifdef __APPLE__
......@@ -552,20 +552,20 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
}
else
{
for(iter = 0; iter < numIter; iter++)
for(iter = 0; iter < numIter; iter++)
err |= clFFT_ExecuteInterleaved(queue, plan, batchSize, dir, data_in, data_out, 0, NULL, NULL);
}
err |= clFinish(queue);
if(err)
if(err)
{
log_error("clFFT_Execute\n");
goto cleanup;
goto cleanup;
}
#ifdef __APPLE__
t1 = mach_absolute_time();
t1 = mach_absolute_time();
t = subtractTimes(t1, t0);
char temp[100];
sprintf(temp, "GFlops achieved for n = (%d, %d, %d), batchsize = %d", n.x, n.y, n.z, batchSize);
......@@ -573,7 +573,7 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
#endif
if(dataFormat == clFFT_SplitComplexFormat)
{
{
err |= clEnqueueReadBuffer(queue, data_out_real, CL_TRUE, 0, length*sizeof(float), data_cl_split.real, 0, NULL, NULL);
err |= clEnqueueReadBuffer(queue, data_out_imag, CL_TRUE, 0, length*sizeof(float), data_cl_split.imag, 0, NULL, NULL);
}
......@@ -581,23 +581,23 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
{
err |= clEnqueueReadBuffer(queue, data_out, CL_TRUE, 0, length*sizeof(float)*2, data_cl, 0, NULL, NULL);
}
if(err)
if(err)
{
log_error("clEnqueueReadBuffer failed\n");
goto cleanup;
}
}
#ifdef __APPLE__
computeReferenceD(&data_oref, n, batchSize, dim, dir);
double diff_avg, diff_max, diff_min;
if(dataFormat == clFFT_SplitComplexFormat) {
diff_avg = computeL2Error(&data_cl_split, &data_oref, n.x*n.y*n.z, batchSize, &diff_max, &diff_min);
if(diff_avg > eps_avg)
log_error("Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
else
log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
}
else {
clFFT_SplitComplex result_split;
......@@ -605,19 +605,19 @@ int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension di
result_split.imag = (float *) malloc(length*sizeof(float));
convertInterleavedToSplit(&result_split, data_cl, length);
diff_avg = computeL2Error(&result_split, &data_oref, n.x*n.y*n.z, batchSize, &diff_max, &diff_min);
if(diff_avg > eps_avg)
log_error("Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
else
log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
free(result_split.real);
free(result_split.imag);
}
#endif
cleanup:
clFFT_DestroyPlan(plan);
if(dataFormat == clFFT_SplitComplexFormat)
clFFT_DestroyPlan(plan);
if(dataFormat == clFFT_SplitComplexFormat)
{
if(data_i_split.real)
free(data_i_split.real);
......@@ -627,7 +627,7 @@ cleanup:
free(data_cl_split.real);
if(data_cl_split.imag)
free(data_cl_split.imag);
if(data_in_real)
clReleaseMemObject(data_in_real);
if(data_in_imag)
......@@ -637,28 +637,28 @@ cleanup:
if(data_out_imag && clFFT_OUT_OF_PLACE)
clReleaseMemObject(data_out_imag);
}
else
else
{
if(data_i)
free(data_i);
if(data_cl)
free(data_cl);
if(data_in)
clReleaseMemObject(data_in);
if(data_out && testType == clFFT_OUT_OF_PLACE)
clReleaseMemObject(data_out);
}
if(data_iref.real)
free(data_iref.real);
if(data_iref.imag)
free(data_iref.imag);
free(data_iref.imag);
if(data_oref.real)
free(data_oref.real);
if(data_oref.imag)
free(data_oref.imag);
return err;
}
......@@ -690,7 +690,7 @@ cl_device_type getGlobalDeviceType()
return CL_DEVICE_TYPE_GPU;
}
void
void
notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
printf( "ERROR: %s\n", errinfo );
......@@ -708,9 +708,9 @@ checkMemRequirements(clFFT_Dim3 n, int batchSize, clFFT_TestType testType, cl_ul
}
int main (int argc, char * const argv[]) {
test_start();
cl_ulong gMemSize;
clFFT_Direction dir = clFFT_Forward;
int numIter = 1;
......@@ -720,28 +720,28 @@ int main (int argc, char * const argv[]) {
clFFT_Dimension dim = clFFT_1D;
clFFT_TestType testType = clFFT_OUT_OF_PLACE;
cl_device_id device_ids[16];
FILE *paramFile;
cl_int err;
unsigned int num_devices;
cl_device_type device_type = getGlobalDeviceType();
if(device_type != CL_DEVICE_TYPE_GPU)
cl_device_type device_type = getGlobalDeviceType();
if(device_type != CL_DEVICE_TYPE_GPU)
{
log_info("Test only supported on DEVICE_TYPE_GPU\n");
test_finish();
exit(0);
}
err = clGetDeviceIDs(NULL, device_type, sizeof(device_ids), device_ids, &num_devices);
if(err)
{
if(err)
{
printf("ERROR: clGetDeviceIDs failed with error: %d\n", err);
test_finish();
return -1;
}
device_id = NULL;
unsigned int i = 0;
......@@ -794,7 +794,7 @@ int main (int argc, char * const argv[]) {
}
}
}
if(!device_id) {
log_error("None of the devices available for compute ... aborting test\n");
test_finish();
......@@ -812,13 +812,13 @@ int main (int argc, char * const argv[]) {
}
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);