Commit 3f976760 authored by Bernd Machenschalk's avatar Bernd Machenschalk
Browse files

HSA version from Christophe Choquet

compile with:
g++ -fPIC -shared -o libclFFT.so -I /opt/hsa/include clFFT.cpp fft_kernelstring.cpp  -L/opt/hsa/lib -lhsa-runtime64
For each new FFT size, uncoment line 318 of clFFT.cpp, grab the OpenCL kernel,
save it, and create a file clFFT_SIZE.cl. Compile & run.
parent 9c5a4b48
This diff is collapsed.
/***************************************************************************
* Copyright (C) 2012 by Oliver Bock,Heinz-Bernd Eggenstein *
* oliver.bock[AT]aei.mpg.de *
* heinz-bernd.eggenstein[AT]aei.mpg.de *
* *
* This file is part of libclfft (originally for Einstein@Home) *
* Derived from clFFT, (C) Apple, see notice below. *
* *
* *
* libclfft is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See *
* notice below for more details. *
* *
***************************************************************************/
//
// File: clFFT.h
//
// Version: <1.0>
//
// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
// in consideration of your agreement to the following terms, and your use,
// installation, modification or redistribution of this Apple software
// constitutes acceptance of these terms. If you do not agree with these
// terms, please do not use, install, modify or redistribute this Apple
// software.
//
// In consideration of your agreement to abide by the following terms, and
// subject to these terms, Apple grants you a personal, non - exclusive
// license, under Apple's copyrights in this original Apple software ( the
// "Apple Software" ), to use, reproduce, modify and redistribute the Apple
// Software, with or without modifications, in source and / or binary forms;
// provided that if you redistribute the Apple Software in its entirety and
// without modifications, you must retain this notice and the following text
// and disclaimers in all such redistributions of the Apple Software. Neither
// the name, trademarks, service marks or logos of Apple Inc. may be used to
// endorse or promote products derived from the Apple Software without specific
// prior written permission from Apple. Except as expressly stated in this
// notice, no other rights or licenses, express or implied, are granted by
// Apple herein, including but not limited to any patent rights that may be
// infringed by your derivative works or by other works in which the Apple
// Software may be incorporated.
//
// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
// ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
//
// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
//
////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef __CLFFT_H
#define __CLFFT_H
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#include "hsa.h"
#include "hsa_ext_finalize.h"
// Mini OpenCL
typedef int cl_int;
typedef unsigned int cl_uint;
typedef int cl_context;
typedef int cl_kernel;
typedef int cl_program;
typedef void * cl_mem;
typedef float cl_float;
#define CL_SUCCESS 0
#define CL_INVALID_VALUE -30
// XForm type
typedef enum
{
clFFT_Forward = -1,
clFFT_Inverse = 1
}clFFT_Direction;
// XForm dimension
typedef enum
{
clFFT_1D = 0,
clFFT_2D = 1,
clFFT_3D = 3
}clFFT_Dimension;
// XForm Data type
typedef enum
{
clFFT_SplitComplexFormat = 0,
clFFT_InterleavedComplexFormat = 1
}clFFT_DataFormat;
typedef enum
{
clFFT_native_trig = 0,
clFFT_sincosfunc = 1,
clFFT_BigLUT = 2,
clFFT_TaylorLUT = 3,
clFFT_RFU4 = 4,
clFFT_RFU5 = 5,
clFFT_RFU6 = 6,
clFFT_RFU7 = 7
} clFFT_TwiddleFactorMethod;
typedef struct
{
unsigned int x;
unsigned int y;
unsigned int z;
}clFFT_Dim3;
typedef struct
{
float *real;
float *imag;
} clFFT_SplitComplex;
typedef struct
{
float real;
float imag;
}clFFT_Complex;
struct __attribute__ ((aligned(16))) fft_args_t {
uint64_t global_offset_0;
uint64_t global_offset_1;
uint64_t global_offset_2;
uint64_t printf_buffer;
uint64_t vqueue_pointer;
uint64_t aqlwrap_pointer;
float *in;
float *out;
int dir;
int S;
float *cossinLUT1;
float *cossinLUT2;
};
typedef void* clFFT_Plan;
clFFT_Plan clFFT_CreatePlanAdvHSA(hsa_region_t global_region,hsa_executable_t executable,hsa_agent_t agent,hsa_region_t kernarg_region,hsa_queue_t* queue,
clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags);
cl_int
clFFT_ExecuteInterleavedHSA( clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,float *data_in, float *data_out);
void clFFT_DestroyPlan( clFFT_Plan plan );
#if 0
clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code );
clFFT_Plan clFFT_CreatePlanAdv( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, unsigned long flags, cl_int *error_code );
cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
cl_int num_events, cl_event *event_list, cl_event *event );
cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
void clFFT_DumpPlan( clFFT_Plan plan, FILE *file);
#endif
#ifdef __cplusplus
}
#endif
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -95,6 +95,21 @@ typedef struct
// context in which fft resources are created and kernels are executed
cl_context context;
// HSA global region
hsa_region_t global_region;
// HSA executable
hsa_executable_t executable;
// HSA agent
hsa_agent_t agent;
// HSA kernel argument region
hsa_region_t kernarg_region;
// HSA current execution queue
hsa_queue_t* queue;
// size of signal
clFFT_Dim3 n;
......@@ -141,25 +156,25 @@ typedef struct
size_t last_batch_size;
// temporary buffer for interleaved plan
cl_mem tempmemobj;
float* tempmemobj;
// temporary buffer for planner plan. Only one of tempmemobj or
// (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
// data format of plan (plannar or interleaved)
cl_mem tempmemobj_real, tempmemobj_imag;
float *tempmemobj_real, *tempmemobj_imag;
// precomputed lookup tables for sin,cos calculations, each of size
// precomputed lookup tables for sin,cos calculations, each of size
// sqrt(n) or 2*sqrt(n), n is size of signal;
cl_mem cossin_LUT_d1;
cl_mem cossin_LUT_d2;
float* cossin_LUT_d1;
float* cossin_LUT_d2;
int logN1;
int logN2;
size_t N1;
size_t N1;
size_t N2;
clFFT_TwiddleFactorMethod twiddleMethod;
// Maximum size of signal for which local memory transposed based
// fft is sufficient i.e. no global mem transpose (communication)
// is needed
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment