Skip to content
Snippets Groups Projects
Commit 5e73acb9 authored by Henning Fehrmann's avatar Henning Fehrmann Committed by Henning Fehrmann
Browse files

fftw code and outsourced code lines

parent fd2bf5ef
No related branches found
No related tags found
No related merge requests found
fftw.c 0 → 100644
#define __HIP_PLATFORM_HCC__
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <omp.h>
#include <hip/hip_runtime_api.h>
#include <hip/hip_vector_types.h>
#include "rocfft.h"
#include "hardware_settings.h"
#include "profiler.h"
#define __MALLOC(P, size) P = malloc(size); \
if (P == NULL) \
{\
fprintf(stderr, "Allocation failed at line %d in %s\n", __LINE__, __FILE__); \
exit(EXIT_FAILURE); \
}\
int
run_test
(
size_t N,
unsigned rep,
int nofftws
)
{
struct runtime * timer;
__MALLOC(timer, sizeof(*timer));
// Create HIP device buffer
__COMPLEX8__ *A;
__COMPLEX8__ *hB;
__MALLOC(hB, sizeof(*hB) * N);
__ASSERT(__PREFIX(Malloc)((void**)&A, sizeof(*A) * N));
// Initialize data
__COMPLEX8__ * hA;
__MALLOC(hA, sizeof(*hA) * N);
float fact = 1.f/(float)x/(float)y/20.f;
for (size_t i = 0; i < N; i++)
{
hA[i].x = (float)xorshf96()*fact * fact;
hA[i].y = (float)xorshf96()*fact * fact;
}
// Copy data to device
hipMemcpy(A, hA, sizeof(*hA) * N, hipMemcpyHostToDevice);
// Create FFT plan
__FFTW_PLAN plan = NULL;
size_t length = N;
char mes[128];
sprintf(mes, "dim: %zu\tPlan generation." ,N);
timer_start(timer, mes);
#ifdef ROC
rocfft_plan_create
(
&plan,
rocfft_placement_inplace,
rocfft_transform_type_complex_forward,
rocfft_precision_single,
1,
&length,
1,
NULL
);
#elif CUDA
cufftPlan1d( &plan, N, CUFFT_C2C, 1);
#endif
timer_stop(timer);
for (int r = 0 ; r < 2; r++)
{
// Execute plan
sprintf(mes, "dim: %zu\tExecute plan round %d." ,N , r);
timer_start(timer, mes);
#ifdef ROC
rocfft_execute(plan, (void**) &A, NULL, NULL);
#elif CUDA
cufftExecC2C(plan, A, A);
#endif
timer_stop(timer);
// Wait for execution to finish
sprintf(mes, "dim: %zu\tSynchronize round %d." ,N , r);
timer_start(timer, mes);
hipDeviceSynchronize();
timer_stop(timer);
}
// Destroy plan
sprintf(mes, "dim: %zu\tDestroy plan." ,N);
timer_start(timer, mes);
__DESTROY_PLAN(plan);
timer_stop(timer);
__ASSERT(__PREFIX(Memcpy)(hB, A, sizeof(*A) * N, hipMemcpyDeviceToHost));
__ASSERT(__PREFIX(Free)(A));
free(hA);
free(hB);
free(timer);
return 0;
}
int
main
(
)
{
int rep = 1;
int min_dim = 12;
int max_dim = 28;
int nofftws = 128;
float * res = malloc(sizeof(*res) * (size_t)((max_dim - min_dim) * rep));
if (res == NULL)
{
fprintf(stderr, "Couldn't allocate res\n");
exit(1);
}
for (int i = min_dim; i < max_dim; i++)
{
size_t dim = 1 << i;
//int ind = (i - min_dim) * rep;
run_test(dim, rep, nofftws);
}
free(res);
}
/*
* =====================================================================================
*
* Description: auxiliary functions for profiling
*
* Version: 1.0
* Created: 29.01.2021 08:40:18
* Revision: none
* Compiler: hipc or nvcc
*
* Author: Henning Fehrmann (), henning.fehrmann@aei.mpg.de
* Organization: AEI Hannover
* License: GNU General Public License v2
*
* =====================================================================================
*/
#include <time.h>
struct runtime
{
struct timespec start;
struct timespec stop;
char tag[128];
};
void
timer_start
(
struct runtime * timer,
char tag[128]
)
{
struct timespec start;
sprintf(timer->tag,"%s", tag);
clock_gettime(CLOCK_REALTIME , &start);
timer->start = start;
// printf("--------> start timer: %s\n", timer->tag);
}
static unsigned long x=123456789, y=362436069, z=521288629;
unsigned long
xorshf96
(
void
)
{
// NOT thread save
unsigned long t;
x ^= x << 16;
x ^= x >> 5;
x ^= x << 1;
t = x;
x = y;
y = z;
z = t ^ x ^ y;
return z;
}
double
timer_stop
(
struct runtime * timer
)
{
struct timespec stop;
clock_gettime(CLOCK_REALTIME , &stop);
timer->stop = stop;
double res= (double)
(
(timer->stop).tv_sec - (timer->start).tv_sec
)*1000.
+
(double)
(
(timer->stop).tv_nsec - (timer->start).tv_nsec
)/1000000.
;
printf("%g [ms]\t%s\n", res, timer->tag);
return res;
}
rocfft.h 0 → 100644
/******************************************************************************
* Copyright (c) 2016 - present Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*******************************************************************************/
/*! @file rocfft.h
* rocfft.h defines all the public interfaces and types
* */
#ifndef __ROCFFT_H__
#define __ROCFFT_H__
#include "rocfft-export.h"
#include "rocfft-version.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/*
#ifndef _WIN32
#include <cstddef>
#endif
*/
/*! @brief Pointer type to plan structure
* @details This type is used to declare a plan handle that can be initialized
* with rocfft_plan_create
* */
typedef struct rocfft_plan_t* rocfft_plan;
/*! @brief Pointer type to plan description structure
* @details This type is used to declare a plan description handle that can be
* initialized with rocfft_plan_description_create
* */
typedef struct rocfft_plan_description_t* rocfft_plan_description;
/*! @brief Pointer type to execution info structure
* @details This type is used to declare an execution info handle that can be
* initialized with rocfft_execution_info_create
* */
typedef struct rocfft_execution_info_t* rocfft_execution_info;
/*! @brief rocfft status/error codes */
typedef enum rocfft_status_e
{
rocfft_status_success,
rocfft_status_failure,
rocfft_status_invalid_arg_value,
rocfft_status_invalid_dimensions,
rocfft_status_invalid_array_type,
rocfft_status_invalid_strides,
rocfft_status_invalid_distance,
rocfft_status_invalid_offset,
} rocfft_status;
/*! @brief Type of transform */
typedef enum rocfft_transform_type_e
{
rocfft_transform_type_complex_forward,
rocfft_transform_type_complex_inverse,
rocfft_transform_type_real_forward,
rocfft_transform_type_real_inverse,
} rocfft_transform_type;
/*! @brief Precision */
typedef enum rocfft_precision_e
{
rocfft_precision_single,
rocfft_precision_double,
} rocfft_precision;
/*! @brief Result placement */
typedef enum rocfft_result_placement_e
{
rocfft_placement_inplace,
rocfft_placement_notinplace,
} rocfft_result_placement;
/*! @brief Array type */
typedef enum rocfft_array_type_e
{
rocfft_array_type_complex_interleaved,
rocfft_array_type_complex_planar,
rocfft_array_type_real,
rocfft_array_type_hermitian_interleaved,
rocfft_array_type_hermitian_planar,
rocfft_array_type_unset,
} rocfft_array_type;
/*! @brief Execution mode */
typedef enum rocfft_execution_mode_e
{
rocfft_exec_mode_nonblocking,
rocfft_exec_mode_nonblocking_with_flush,
rocfft_exec_mode_blocking,
} rocfft_execution_mode;
/*! @brief Library setup function, called once in program before start of
* library use */
ROCFFT_EXPORT rocfft_status rocfft_setup();
/*! @brief Library cleanup function, called once in program after end of library
* use */
ROCFFT_EXPORT rocfft_status rocfft_cleanup();
/*! @brief Create an FFT plan
*
* @details This API creates a plan, which the user can execute subsequently.
* This function
* takes many of the fundamental parameters needed to specify a transform. The
* parameters are
* self explanatory. The dimensions parameter can take a value of 1,2 or 3. The
* 'lengths' array specifies
* size of data in each dimension. Note that lengths[0] is the size of the
* innermost dimension, lengths[1]
* is the next higher dimension and so on. The 'number_of_transforms' parameter
* specifies how many transforms
* (of the same kind) needs to be computed. By specifying a value greater than
* 1, a batch of transforms can
* be computed with a single api call. Additionally, a handle to a plan
* description can be passed for more
* detailed transforms. For simple transforms, this parameter can be set to
* null ptr.
*
* @param[out] plan plan handle
* @param[in] placement placement of result
* @param[in] transform_type type of transform
* @param[in] precision precision
* @param[in] dimensions dimensions
* @param[in] lengths dimensions sized array of transform lengths
* @param[in] number_of_transforms number of transforms
* @param[in] description description handle created by
* rocfft_plan_description_create; can be
* null ptr for simple transforms
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_create(rocfft_plan* plan,
rocfft_result_placement placement,
rocfft_transform_type transform_type,
rocfft_precision precision,
size_t dimensions,
const size_t* lengths,
size_t number_of_transforms,
const rocfft_plan_description description);
/*! @brief Execute an FFT plan
*
* @details This API executes an FFT plan on buffers given by the user. If the
* transform is in-place,
* only the input buffer is needed and the output buffer parameter can be set
* to NULL. For not in-place
* transforms, output buffers have to be specified. Note that both input and
* output buffer are arrays of
* pointers, this is to facilitate passing planar buffers where real and
* imaginary parts are in 2 separate
* buffers. For the default interleaved format, just a unit sized array holding
* the pointer to input/output
* buffer need to be passed. The final parameter in this function is an
* execution_info handle. This parameter
* serves as a way for the user to control execution, as well as for the
* library to pass any execution
* related information back to the user.
*
* @param[in] plan plan handle
* @param[in,out] in_buffer array (of size 1 for interleaved data, of size 2
* for planar data) of input buffers
* @param[in,out] out_buffer array (of size 1 for interleaved data, of size 2
* for planar data) of output buffers, can be nullptr for inplace result
* placement
* @param[in] info execution info handle created by
* rocfft_execution_info_create
* */
ROCFFT_EXPORT rocfft_status rocfft_execute(const rocfft_plan plan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info);
/*! @brief Destroy an FFT plan
* @details This API frees the plan. This function destructs a plan after it is
* no longer needed.
* @param[in] plan plan handle
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_destroy(rocfft_plan plan);
#if 0
/*! @brief Set scaling factor in single precision
* @details This is one of plan description functions to specify optional additional plan properties using the description handle. This API specifies scaling factor.
* @param[in] description description handle
* @param[in] scale scaling factor
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_description_set_scale_float( rocfft_plan_description description, const float scale );
/*! @brief Set scaling factor in double precision
* @details This is one of plan description functions to specify optional additional plan properties using the description handle. This API specifies scaling factor.
* @param[in] description description handle
* @param[in] scale scaling factor
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_description_set_scale_double( rocfft_plan_description description, const double scale );
#endif
/*!
* @brief Set data layout
*
* @details This is one of plan description functions to specify
* optional additional plan properties using the description
* handle. This API specifies the layout of buffers.
*
* This function can be used to specify input and output array
* types. Not all combinations of array types are supported and error
* code will be returned for unsupported cases. Additionally, input
* and output buffer offsets can be specified. The function can be
* used to specify custom layout of data, with the ability to specify
* stride between consecutive elements in all dimensions. Also,
* distance between transform data members can be specified. The
* library will choose appropriate defaults if offsets/strides are
* set to null ptr and/or distances set to 0.
*
* @param[in, out] description description handle
* @param[in] in_array_type array type of input buffer
* @param[in] out_array_type array type of output buffer
* @param[in] in_offsets offsets, in element units, to start of data in input buffer
* @param[in] out_offsets offsets, in element units, to start of data in output buffer
* @param[in] in_strides_size size of in_strides array (must be equal to transform dimensions)
* @param[in] in_strides array of strides, in each dimension, of
* input buffer; if set to null ptr library chooses defaults
* @param[in] in_distance distance between start of each data instance in input buffer
* @param[in] out_strides_size size of out_strides array (must be
* equal to transform dimensions)
* @param[in] out_strides array of strides, in each dimension, of
* output buffer; if set to null ptr library chooses defaults
* @param[in] out_distance distance between start of each data instance in output buffer
*/
ROCFFT_EXPORT rocfft_status
rocfft_plan_description_set_data_layout(rocfft_plan_description description,
const rocfft_array_type in_array_type,
const rocfft_array_type out_array_type,
const size_t* in_offsets,
const size_t* out_offsets,
const size_t in_strides_size,
const size_t* in_strides,
const size_t in_distance,
const size_t out_strides_size,
const size_t* out_strides,
const size_t out_distance);
/*! @brief Get library version string
*
* @param[in, out] buf buffer of version string
* @param[in] len the length of input string buffer, expecting minimum 30
*/
ROCFFT_EXPORT rocfft_status rocfft_get_version_string(char* buf, size_t len);
#if 0
/*! @brief Set devices in plan description
* @details This is one of plan description functions to specify optional additional plan properties using the description handle. This API specifies what compute devices to target.
* @param[in] description description handle
* @param[in] devices array of device identifiers
* @param[in] number_of_devices number of devices (size of devices array)
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_description_set_devices( rocfft_plan_description description, void *devices, size_t number_of_devices );
#endif
/*! @brief Get work buffer size
* @details This is one of plan query functions to obtain information regarding
* a plan. This API gets the work buffer size.
* @param[in] plan plan handle
* @param[out] size_in_bytes size of needed work buffer in bytes
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_get_work_buffer_size(const rocfft_plan plan,
size_t* size_in_bytes);
/*! @brief Print all plan information
* @details This is one of plan query functions to obtain information regarding
* a plan. This API prints all plan info to stdout to help user verify plan
* specification.
* @param[in] plan plan handle
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_get_print(const rocfft_plan plan);
/*! @brief Create plan description
* @details This API creates a plan description with which the user can set
* more plan properties
* @param[out] description plan description handle
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_description_create(rocfft_plan_description* description);
/*! @brief Destroy a plan description
* @details This API frees the plan description
* @param[in] description plan description handle
* */
ROCFFT_EXPORT rocfft_status rocfft_plan_description_destroy(rocfft_plan_description description);
/*! @brief Create execution info
* @details This API creates an execution info with which the user can control
* plan execution & retrieve execution information
* @param[out] info execution info handle
* */
ROCFFT_EXPORT rocfft_status rocfft_execution_info_create(rocfft_execution_info* info);
/*! @brief Destroy an execution info
* @details This API frees the execution info
* @param[in] info execution info handle
* */
ROCFFT_EXPORT rocfft_status rocfft_execution_info_destroy(rocfft_execution_info info);
/*! @brief Set work buffer in execution info
*
* @details This is one of the execution info functions to specify optional
* additional information to control execution.
* This API specifies work buffer needed. It has to be called before the call
* to rocfft_execute.
* When a non-zero value is obtained from rocfft_plan_get_work_buffer_size,
* that means the library needs a work buffer
* to compute the transform. In this case, the user has to allocate the work
* buffer and pass it to the library via this api.
*
* @param[in] info execution info handle
* @param[in] work_buffer work buffer
* @param[in] size_in_bytes size of work buffer in bytes
* */
ROCFFT_EXPORT rocfft_status rocfft_execution_info_set_work_buffer(rocfft_execution_info info,
void* work_buffer,
const size_t size_in_bytes);
#if 0
/*! @brief Set execution mode in execution info
* @details This is one of the execution info functions to specify optional additional information to control execution.
* This API specifies execution mode. It has to be called before the call to rocfft_execute.
* Appropriate enumeration value can be specified to control blocking/non-blocking behavior of the rocfft_execute call.
* @param[in] info execution info handle
* @param[in] mode execution mode
* */
ROCFFT_EXPORT rocfft_status rocfft_execution_info_set_mode( rocfft_execution_info info,
const rocfft_execution_mode mode );
#endif
/*! @brief Set stream in execution info
* @details This is one of the execution info functions to specify optional
* additional information to control execution.
* This API specifies compute stream. It has to be called before the call to
* rocfft_execute.
* It is the underlying device queue/stream where the library computations
* would be inserted. The library assumes user
* has created such a stream in the program and merely assigns work to the
* stream. The stream must be of type hipStream_t. It is an error to pass the
* address of a hipStream_t object.
* @param[in] info execution info handle
* @param[in] stream underlying compute stream
* */
ROCFFT_EXPORT rocfft_status rocfft_execution_info_set_stream(rocfft_execution_info info,
void* stream);
#if 0
/*! @brief Get events from execution info
* @details This is one of the execution info functions to retrieve information from execution.
* This API obtains event information. It has to be called after the call to rocfft_execute.
* This gets handles to events that the library created around one or more kernel launches during execution.
* @param[in] info execution info handle
* @param[out] events array of events
* @param[out] number_of_events number of events (size of events array)
* */
ROCFFT_EXPORT rocfft_status rocfft_execution_info_get_events( const rocfft_execution_info info, void **events, size_t *number_of_events );
#endif
/*! \brief Indicates if layer is active with bitmask*/
typedef enum rocfft_layer_mode_
{
rocfft_layer_mode_none = 0b0000000000,
rocfft_layer_mode_log_trace = 0b0000000001,
rocfft_layer_mode_log_bench = 0b0000000010,
rocfft_layer_mode_log_profile = 0b0000000100,
} rocfft_layer_mode;
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* __ROCFFT_H__ */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment