Commit 7a2b7a7d authored by Henning Fehrmann's avatar Henning Fehrmann Committed by Henning Fehrmann
Browse files

loop over parameters and measure the run time in the test routine

parent 582cb735
......@@ -37,12 +37,28 @@
exit(EXIT_FAILURE); \
}\
void
prepare_data
(
__COMPLEX8__ * hA,
size_t s
)
{
float fact = 1.f/(float)x/(float)y/20.f;
#pragma omp parallel for
for (size_t i = 0; i < s; i++)
{
hA[i].x = (float)xorshf96()*fact * fact;
hA[i].y = (float)xorshf96()*fact * fact;
}
}
int
run_test
(
size_t T,
size_t N,
unsigned rep,
int nofftws
unsigned rep
)
{
......@@ -51,29 +67,21 @@ run_test
// Create HIP device buffer
__COMPLEX8__ *A;
__COMPLEX8__ *hB;
__MALLOC(hB, sizeof(*hB) * N);
__MALLOC(hB, sizeof(*hB) * N * T);
__ASSERT(__PREFIX(Malloc)((void**)&A, sizeof(*A) * N));
__ASSERT(__PREFIX(Malloc)((void**)&A, sizeof(*A) * N * T));
// Initialize data
__COMPLEX8__ * hA;
__MALLOC(hA, sizeof(*hA) * N);
float fact = 1.f/(float)x/(float)y/20.f;
for (size_t i = 0; i < N; i++)
{
hA[i].x = (float)xorshf96()*fact * fact;
hA[i].y = (float)xorshf96()*fact * fact;
}
__MALLOC(hA, sizeof(*hA) * N * T);
// Copy data to device
__ASSERT(__PREFIX(Memcpy)(A, hA, sizeof(*hA) * N, __PREFIX(MemcpyHostToDevice)));
// Create FFT plan
__FFTW_PLAN plan;
size_t length = N;
char mes[128];
sprintf(mes, "dim: %zu\tPlan generation." ,N);
timer_start(timer, mes);
//sprintf(mes, "dim: %zu\tPlan generation." ,N);
//timer_start(timer, mes);
#ifdef ROC
rocfft_plan_create
......@@ -88,34 +96,46 @@ run_test
NULL
);
#elif CUDA
cufftPlan1d( &plan, N, CUFFT_C2C, 1);
int batch = T; // --- Number of batched executions
int rank = 1; // --- 1D FFTs
int na[] = { N }; // --- Size of the Fourier transform
int istride = 1, ostride = 1; // --- Distance between two successive input/output elements
int idist = N, odist = N; // --- Distance between batches
int inembed[] = { 0 }; // --- Input size with pitch (ignored for 1D transforms)
int onembed[] = { 0 }; // --- Output size with pitch (ignored for 1D transforms)
cufftPlanMany
(
&plan,
rank,
na,
inembed,
istride,
idist,
onembed,
ostride,
odist,
CUFFT_C2C,
batch
);
#endif
timer_stop(timer);
for (int r = 0 ; r < 2; r++)
prepare_data(hA, N * T);
// Copy data to device
__ASSERT(__PREFIX(Memcpy)(A, hA, sizeof(*hA) * N, __PREFIX(MemcpyHostToDevice)));
sprintf(mes, "T = %zu n = %zu\t 1nd total %d rounds." ,T, N, rep );
timer_start(timer, mes);
for (int r = 0 ; r < rep; r++)
{
// Execute plan
sprintf(mes, "dim: %zu\tExecute plan round %d." ,N , r);
timer_start(timer, mes);
#ifdef ROC
rocfft_execute(plan, (void**) &A, NULL, NULL);
#elif CUDA
cufftExecC2C(plan, A, A, CUFFT_FORWARD);
#endif
timer_stop(timer);
// Wait for execution to finish
sprintf(mes, "dim: %zu\tSynchronize round %d." ,N , r);
timer_start(timer, mes);
#ifdef ROC
hipDeviceSynchronize();
#endif
timer_stop(timer);
__PREFIX(DeviceSynchronize)();
}
timer_stop(timer);
// Destroy plan
sprintf(mes, "dim: %zu\tDestroy plan." ,N);
timer_start(timer, mes);
__DESTROY_PLAN(plan);
timer_stop(timer);
__ASSERT(__PREFIX(Memcpy)(hB, A, sizeof(*A) * N, __PREFIX(MemcpyDeviceToHost)));
......@@ -131,23 +151,19 @@ main
(
)
{
int rep = 1;
int min_dim = 8;
int max_dim = 28;
int nofftws = 128;
float * res = malloc(sizeof(*res) * (size_t)((max_dim - min_dim) * rep));
if (res == NULL)
{
fprintf(stderr, "Couldn't allocate res\n");
exit(1);
}
int rep = 1000;
int t_min = 8;
int t_max = 11;
int n_min = 11;
int n_max = 19;
for (int i = min_dim; i < max_dim; i++)
for (int et = t_min; et <= t_max; et ++)
{
size_t dim = 1 << i;
//int ind = (i - min_dim) * rep;
run_test(dim, rep, nofftws);
int t = 1 << et;
for (int en = n_min; en <= n_max; en++)
{
size_t n = 1 << en;
run_test(t, n, rep);
}
}
free(res);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment