Commit 19061ae5 authored by Henning Fehrmann's avatar Henning Fehrmann
Browse files

testing quality of results

parent 9a3008f5
/*
* =====================================================================================
*
* Filename: fftw.c
*
* Description: FFTW profiling
*
* Version: 1.0
* Created: 29.01.2021 10:55:14
* Revision: none
* Compiler: gcc
*
* Author: Henning Fehrmann (), henning.fehrmann@aei.mpg.de
* Organization: AEI Hannover
* License: GNU General Public License v2
*
* =====================================================================================
*/
#define __HIP_PLATFORM_HCC__
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <omp.h>
#include "hardware_settings.h"
#include "profiler.h"
#define __MALLOC(P, size) P = malloc(size); \
if (P == NULL) \
{\
fprintf(stderr, "Allocation failed at line %d in %s\n", __LINE__, __FILE__); \
exit(EXIT_FAILURE); \
}\
void
prepare_data
(
__COMPLEX8__ * hA,
size_t s
)
{
#pragma omp parallel for
for (size_t i = 0; i < s; i++)
{
hA[i].x = 1.0f;
hA[i].y = 0.;
}
}
int
run_test
(
size_t T,
size_t N,
unsigned rep
)
{
struct runtime * timer;
__MALLOC(timer, sizeof(*timer));
// Create HIP device buffer
__COMPLEX8__ *A;
__COMPLEX8__ *hB;
__MALLOC(hB, sizeof(*hB) * N * T);
__ASSERT(__PREFIX(Malloc)((void**)&A, sizeof(*A) * N * T));
// Initialize data
__COMPLEX8__ * hA;
__MALLOC(hA, sizeof(*hA) * N * T);
// Create FFT plan
__FFTW_PLAN plan;
size_t length = N;
char mes[128];
//sprintf(mes, "dim: %zu\tPlan generation." ,N);
//timer_start(timer, mes);
#ifdef ROC
rocfft_plan_create
(
&plan,
rocfft_placement_inplace,
rocfft_transform_type_complex_forward,
rocfft_precision_single,
1,
&length,
1,
NULL
);
#elif CUDA
int batch = T; // --- Number of batched executions
int rank = 1; // --- 1D FFTs
int na[] = { N }; // --- Size of the Fourier transform
int istride = 1, ostride = 1; // --- Distance between two successive input/output elements
int idist = N, odist = N; // --- Distance between batches
int inembed[] = { 0 }; // --- Input size with pitch (ignored for 1D transforms)
int onembed[] = { 0 }; // --- Output size with pitch (ignored for 1D transforms)
cufftPlanMany
(
&plan,
rank,
na,
inembed,
istride,
idist,
onembed,
ostride,
odist,
CUFFT_C2C,
batch
);
#endif
prepare_data(hA, N * T);
// Copy data to device
__ASSERT(__PREFIX(Memcpy)(A, hA, sizeof(*hA) * N, __PREFIX(MemcpyHostToDevice)));
for (int r = 0 ; r < 1; r++)
{
// Execute plan
sprintf(mes, "T = %zu n = %zu\t round %d." ,T, N , r);
timer_start(timer, mes);
#ifdef ROC
rocfft_execute(plan, (void**) &A, NULL, NULL);
#elif CUDA
cufftExecC2C(plan, A, A, CUFFT_FORWARD);
#endif
__PREFIX(DeviceSynchronize)();
timer_stop(timer);
}
// Destroy plan
__DESTROY_PLAN(plan);
__ASSERT(__PREFIX(Memcpy)(hB, A, sizeof(*A) * N, __PREFIX(MemcpyDeviceToHost)));
for (size_t i = 0; i < N; i++)
{
printf("%g\t%g\n", hB[i].x, hB[i].y);
}
exit(0);
__ASSERT(__PREFIX(Free)(A));
free(hA);
free(hB);
free(timer);
return 0;
}
int
main
(
)
{
int rep = 1;
int t_min = 8;
int t_max = 11;
int n_min = 11;
int n_max = 19;
float * res = malloc(sizeof(*res) * (size_t)((n_max - n_min + 1) * rep));
if (res == NULL)
{
fprintf(stderr, "Couldn't allocate res\n");
exit(1);
}
for (int et = t_min; et <= t_max; et ++)
{
int t = 1 << et;
for (int en = n_min; en <= n_max; en++)
{
size_t n = 1 << en;
run_test(t, n, rep);
}
}
free(res);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment