[GPU] Implement (I)RDFT-9 (#12010)

* [GPU] Implement (I)RDFT-9
* Added GPU implementation of RDFT operation
* Added basic GPU implementation of IRDFT operation, but still needs adjusting
* Added single layer tests for (I)RDFT
* Added unit tests for (I)RDFT
* Added blocked layout support for all DFTs operation

* [GPU] Added changes after review.

* [GPU] Added single-layer test for single axis case for DFTs.

* [GPU] Simplified attach_dft_impl method.

* [GPU] Moved negative axes cases to separate test for DFTs.

* [GPU] Adjusted tests to cover all signal sizes cases (less, equal and more than input size) for DFTs.

* [GPU] Fixed IRDFT implementation.

* [GPU] Used RDFTLayerTest.

* [GPU] Fixes after rebase.

* [GPU] Renamed kind enum to direction.
This commit is contained in:
Mykhailo Hnap
2022-10-04 14:26:50 +03:00
committed by GitHub
parent f82a8cf93a
commit 4946f6d87b
13 changed files with 1139 additions and 288 deletions

View File

@@ -238,6 +238,8 @@ REGISTER_FACTORY(v8, Softmax);
// ------------------------------ Supported v9 ops ------------------------------ //
REGISTER_FACTORY(v9, SoftSign)
REGISTER_FACTORY(v9, ROIAlign);
REGISTER_FACTORY(v9, RDFT);
REGISTER_FACTORY(v9, IRDFT);
// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);

View File

@@ -5,6 +5,7 @@
#pragma once
#include <openvino/core/shape.hpp>
#include <utility>
#include "primitive.hpp"
@@ -16,12 +17,18 @@ namespace cldnn {
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Kind of DFT operation.
enum class dft_kind {
/// @brief Direction of DFT operation.
enum class dft_direction {
forward,
inverse,
};
/// @brief Mode of DFT operation.
enum class dft_mode {
complex,
real,
};
/// @brief DFT primitive.
struct dft : public primitive_base<dft> {
CLDNN_DECLARE_PRIMITIVE(dft)
@@ -30,22 +37,30 @@ struct dft : public primitive_base<dft> {
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param axes Axes to perform DFT.
/// @param signal_size Signal sizes for 'axes'.
/// @param output_shape Output shape.
/// @param kind Kind of DFT operation.
/// @param direction Direction of DFT operation.
/// @param mode Mode of DFT operation.
dft(const primitive_id& id,
const primitive_id& input,
std::vector<int64_t>&& axes,
std::vector<int64_t> axes,
std::vector<int64_t> signal_size,
const ov::Shape& output_shape,
dft_kind kind,
dft_direction direction,
dft_mode mode,
const padding& output_padding = {})
: primitive_base(id, {input}, output_padding),
axes(std::move(axes)),
signal_size(std::move(signal_size)),
output_shape(output_shape),
kind(kind) {}
direction(direction),
mode(mode) {}
std::vector<int64_t> axes;
std::vector<int64_t> signal_size;
ov::Shape output_shape;
dft_kind kind;
dft_direction direction;
dft_mode mode;
};
} // namespace cldnn

View File

@@ -14,22 +14,27 @@ primitive_type_id dft::type_id() {
return &instance;
}
layout typed_primitive_inst<dft>::calc_output_layout(const dft_node& node, kernel_impl_params const& impl_param) {
auto primitive = impl_param.typed_desc<dft>();
auto input_layout = impl_param.get_input_layout();
layout dft_inst::calc_output_layout(const dft_node& node, const kernel_impl_params& impl_param) {
const auto primitive = impl_param.typed_desc<dft>();
const auto input_layout = impl_param.get_input_layout();
std::vector<tensor::value_type> dims_converted(primitive->output_shape.begin(), primitive->output_shape.end());
auto output_format = input_layout.format;
// Extend output layout for IRDFT case to make output rank match input rank
if (primitive->direction == dft_direction::inverse && primitive->mode == dft_mode::real) {
dims_converted.push_back(1);
}
// Extend shape to 4d by pushing ones before the last dim
for (auto i = dims_converted.size(); i < 4; ++i) {
dims_converted.insert(std::prev(dims_converted.end()), 1);
}
const auto output_format = format::adjust_to_rank(input_layout.format, dims_converted.size());
return {input_layout.data_type, output_format, tensor(output_format, dims_converted)};
}
std::string typed_primitive_inst<dft>::to_string(const dft_node& node) {
std::string dft_inst::to_string(const dft_node& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();
std::ostringstream os;

View File

@@ -24,11 +24,35 @@ struct dft_impl : typed_primitive_impl_ocl<dft> {
static primitive_impl* create(const dft_node& arg, const kernel_impl_params& impl_param) {
auto params = get_default_params<kernel_selector::dft_params>(impl_param);
auto primitive = arg.get_primitive();
const auto primitive = impl_param.typed_desc<dft>();
params.axes = primitive->axes;
if (primitive->kind == dft_kind::inverse) {
params.kind = kernel_selector::dft_params::inverse;
if (primitive->signal_size.empty()) {
params.signal_size = std::vector<int64_t>(params.axes.size(), -1);
} else {
params.signal_size = primitive->signal_size;
}
if (primitive->direction == dft_direction::inverse) {
params.direction = kernel_selector::dft_params::Direction::inverse;
}
if (primitive->mode == dft_mode::real) {
params.mode = kernel_selector::dft_params::Mode::real;
}
// Extend input layout for RDFT case to make input rank match output rank
if (primitive->direction == dft_direction::forward && primitive->mode == dft_mode::real) {
const auto input_layout = impl_param.get_input_layout();
const auto output_layout = impl_param.output_layout;
// No need to extend layout for input that has less than 4 dimensions
if (input_layout.get_rank() != output_layout.get_rank()) {
auto new_dims = input_layout.get_dims();
new_dims.push_back(1);
const auto new_fmt = format::adjust_to_rank(input_layout.format, new_dims.size());
params.inputs[0] = convert_data_tensor({input_layout.data_type, new_fmt, tensor(new_fmt, new_dims)});
}
}
auto optional_params = get_default_optional_params<kernel_selector::dft_optional_params>(arg.get_program());
auto& kernel_selector = kernel_selector::dft_kernel_selector::Instance();
@@ -46,16 +70,27 @@ struct dft_impl : typed_primitive_impl_ocl<dft> {
namespace detail {
attach_dft_impl::attach_dft_impl() {
implementation_map<dft>::add(impl_types::ocl,
dft_impl::create,
{
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
});
auto types = {data_types::f16, data_types::f32};
auto formats = {
// 4d
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv32,
format::bs_fs_yx_bsv32_fsv16,
// 5d
format::bfzyx,
format::b_fs_zyx_fsv16,
format::b_fs_zyx_fsv32,
format::bs_fs_zyx_bsv16_fsv32,
format::bs_fs_zyx_bsv16_fsv16,
format::bs_fs_zyx_bsv32_fsv32,
format::bs_fs_zyx_bsv32_fsv16,
// 6d
format::bfwzyx,
};
implementation_map<dft>::add(impl_types::ocl, dft_impl::create, types, formats);
}
} // namespace detail

View File

@@ -26,8 +26,10 @@ class typed_primitive_inst<dft> : public typed_primitive_inst_base<dft> {
public:
using typed_primitive_inst_base::typed_primitive_inst_base;
static layout calc_output_layout(const dft_node& node, kernel_impl_params const& impl_param);
static layout calc_output_layout(const dft_node& node, const kernel_impl_params& impl_param);
static std::string to_string(const dft_node& node);
};
using dft_inst = typed_primitive_inst<dft>;
} // namespace cldnn

View File

@@ -41,6 +41,7 @@
#include "input_layout_inst.h"
#include "shuffle_channels_inst.h"
#include "arg_max_min_inst.h"
#include "dft_inst.h"
#include "lstm_inst.h"
#include "lstm_elt_inst.h"
#include "lstm_gemm_inst.h"
@@ -1414,6 +1415,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.as<mvn>().input().get_output_layout().data_type != data_types::i8)
|| prim.as<mvn>().get_primitive()->across_channels) &&
prim.type() != cldnn::arg_max_min::type_id() &&
prim.type() != cldnn::dft::type_id() &&
prim.type() != cldnn::mutable_data::type_id() &&
prim.type() != cldnn::reduce::type_id() &&
prim.type() != cldnn::strided_slice::type_id() &&
@@ -1449,6 +1451,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::reshape::type_id() &&
prim.type() != cldnn::input_layout::type_id() &&
prim.type() != cldnn::activation::type_id() &&
prim.type() != cldnn::dft::type_id() &&
prim.type() != cldnn::softmax::type_id() &&
prim.type() != cldnn::fully_connected::type_id() &&
prim.type() != cldnn::generic_layer::type_id() &&

View File

@@ -6,6 +6,9 @@
#include <kernel_selector_utils.h>
#include <string>
#include <vector>
namespace kernel_selector {
namespace {
@@ -15,30 +18,32 @@ CommonDispatchData SetDefault(const dft_params& params) {
const auto in_layout = params.inputs.front().GetLayout();
const auto& output = params.outputs.front();
const auto out_layout = output.GetLayout();
const auto out_rank = output.Dimentions();
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws;
// We are skipping X, since it contains complex pairs and always has dimension 2
switch (out_layout) {
case DataLayout::bfyx:
switch (out_rank) {
case 4:
dispatch_data.gws = {output.Y().v, output.Feature().v, output.Batch().v};
dims_by_gws = {{Tensor::DataChannelName::Y},
{Tensor::DataChannelName::FEATURE},
{Tensor::DataChannelName::BATCH}};
break;
case DataLayout::bfzyx:
case 5:
dispatch_data.gws = {output.Y().v, output.Z().v, output.Feature().v * output.Batch().v};
dims_by_gws = {{Tensor::DataChannelName::Y},
{Tensor::DataChannelName::Z},
{Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}};
break;
case DataLayout::bfwzyx:
case 6:
dispatch_data.gws = {output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v};
dims_by_gws = {{Tensor::DataChannelName::Y},
{Tensor::DataChannelName::Z, Tensor::DataChannelName::W},
{Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}};
break;
default:
throw std::invalid_argument("Unsupported data layout for dft primitive");
throw std::invalid_argument("Unsupported output rank for dft primitive");
}
dispatch_data.lws =
@@ -48,8 +53,7 @@ CommonDispatchData SetDefault(const dft_params& params) {
}
template <class T>
void MakeJitConstForAxis(JitConstants& jit, const DataLayout& layout, int64_t index, T value) {
std::string name = "AXIS";
void MakeJitConstForParam(JitConstants& jit, const std::string& name, size_t rank, int64_t index, T value) {
switch (index) {
case 0:
jit.AddConstant(MakeJitConstant(name + "_BATCH", value));
@@ -58,18 +62,18 @@ void MakeJitConstForAxis(JitConstants& jit, const DataLayout& layout, int64_t in
jit.AddConstant(MakeJitConstant(name + "_FEATURE", value));
break;
case 2:
if (layout == DataLayout::bfwzyx) {
if (rank == 6) {
jit.AddConstant(MakeJitConstant(name + "_W", value));
} else if (layout == DataLayout::bfzyx) {
} else if (rank == 5) {
jit.AddConstant(MakeJitConstant(name + "_Z", value));
} else { // DataLayout::bfyx
} else { // rank == 4
jit.AddConstant(MakeJitConstant(name + "_Y", value));
}
break;
case 3:
if (layout == DataLayout::bfwzyx) {
if (rank == 6) {
jit.AddConstant(MakeJitConstant(name + "_Z", value));
} else { // DataLayout::bfzyx
} else { // rank == 5
jit.AddConstant(MakeJitConstant(name + "_Y", value));
}
break;
@@ -77,31 +81,94 @@ void MakeJitConstForAxis(JitConstants& jit, const DataLayout& layout, int64_t in
jit.AddConstant(MakeJitConstant(name + "_Y", value));
break;
default:
throw std::invalid_argument("Unsupported axis for dft primitive");
throw std::invalid_argument("Unsupported index for dft primitive");
}
}
} // namespace
KernelsData DFTKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
KernelsData kernels_data;
if (!Validate(params, options)) {
return kernels_data;
return {};
}
kernels_data.push_back(KernelData::Default<dft_params>(params));
KernelData& kernel_data = kernels_data.front();
auto& derived_params = dynamic_cast<dft_params&>(*kernel_data.params.get());
auto dispatch_data = SetDefault(derived_params);
auto entry_point = GetEntryPoint(kernelName, derived_params.layerID, params, options);
auto jit_constants = GetJitConstants(derived_params);
auto jit = CreateJit(kernelName, jit_constants, entry_point);
auto& clKernelData = kernel_data.kernels[0];
FillCLKernelData(clKernelData, dispatch_data, params.engineInfo, kernelName, jit, entry_point);
return kernels_data;
}
KernelsPriority DFTKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
return DONT_USE_IF_HAVE_SOMETHING_ELSE;
KernelData kd = KernelData::Default<dft_params>(params);
const auto& derived_params = dynamic_cast<const dft_params&>(params);
// For IRDFT case we create two kernels with different data
// First, do IDFT on outer axes and input data
// Second, do IRDFT on the last axis and data from the first kernel
if (derived_params.mode == dft_params::Mode::real && derived_params.direction == dft_params::Direction::inverse &&
derived_params.axes.size() > 1) {
// Helper vector
std::vector<std::pair<dft_params, cldnn::arguments_desc>> kernels_params;
// Fill IDFT kernel data
auto idft_params = derived_params;
idft_params.mode = dft_params::Mode::complex;
idft_params.axes.pop_back();
idft_params.signal_size.pop_back();
const cldnn::arguments_desc idft_arguments{{ArgumentDescriptor::Types::INPUT, 0},
{ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}};
auto& idft_input = idft_params.inputs.front();
auto idft_input_sizes = idft_input.LogicalDims();
// NOTE: This is a small workaround for a 3d case
// We always should have first dimension equal to 2, so we swap it with the second dimension
if (idft_input_sizes[0] == 1) {
std::swap(idft_input_sizes[0], idft_input_sizes[1]);
idft_input = DataTensor(idft_input_sizes, idft_input.GetDType(), idft_input.GetLayout());
}
// Calculate IDFT output sizes
auto idft_output_sizes = idft_input_sizes;
auto& idft_output = idft_params.outputs.front();
for (const auto& axis : idft_params.axes) {
auto inverted_axis = idft_output_sizes.size() - 1 - axis;
idft_output_sizes[inverted_axis] = idft_output.LogicalDims()[inverted_axis];
}
idft_output = DataTensor(idft_output_sizes, idft_input.GetDType(), idft_input.GetLayout());
// Set internal buffer
kd.internalBufferDataType = idft_input.GetDType();
kd.internalBufferSizes.push_back(idft_output.PhysicalSizeInBytes());
// Fill IRDFT kernel data
auto irdft_params = derived_params;
irdft_params.inputs.front() = idft_output;
irdft_params.axes = {derived_params.axes.back()};
irdft_params.signal_size = {derived_params.signal_size.back()};
const cldnn::arguments_desc irdft_arguments{{ArgumentDescriptor::Types::INTERNAL_BUFFER, 0},
{ArgumentDescriptor::Types::OUTPUT, 0}};
// Fill kernels
kernels_params.emplace_back(idft_params, idft_arguments);
kernels_params.emplace_back(irdft_params, irdft_arguments);
const auto kKernelsNum = kernels_params.size();
kd.kernels.resize(kKernelsNum);
for (size_t i = 0; i < kKernelsNum; ++i) {
dft_params kernel_params;
cldnn::arguments_desc kernel_arguments;
std::tie(kernel_params, kernel_arguments) = kernels_params[i];
const auto dispatch_data = SetDefault(kernel_params);
const auto entry_point = GetEntryPoint(kernelName, kernel_params.layerID, params, options, i);
const auto jit_constants = GetJitConstants(kernel_params);
const auto jit = CreateJit(kernelName, jit_constants, entry_point);
auto& clKernelData = kd.kernels[i];
FillCLKernelData(clKernelData, dispatch_data, kernel_params.engineInfo, kernelName, jit, entry_point);
clKernelData.params.arguments = kernel_arguments;
}
} else {
const auto dispatch_data = SetDefault(derived_params);
const auto entry_point = GetEntryPoint(kernelName, derived_params.layerID, derived_params, options);
const auto jit_constants = GetJitConstants(derived_params);
const auto jit = CreateJit(kernelName, jit_constants, entry_point);
auto& clKernelData = kd.kernels[0];
FillCLKernelData(clKernelData, dispatch_data, derived_params.engineInfo, kernelName, jit, entry_point);
}
return {kd};
}
ParamsKey DFTKernelRef::GetSupportedKey() const {
@@ -110,12 +177,8 @@ ParamsKey DFTKernelRef::GetSupportedKey() const {
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::bfzyx);
k.EnableInputLayout(DataLayout::bfwzyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::bfwzyx);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableBatching();
k.EnableTensorOffset();
k.EnableTensorPitches();
@@ -137,23 +200,49 @@ bool DFTKernelRef::Validate(const Params& p, const optional_params& o) const {
JitConstants DFTKernelRef::GetJitConstants(const dft_params& params) const {
auto jit = MakeBaseParamsJitConstants(params);
const auto out_layout = params.outputs.front().GetLayout();
const auto out_rank = params.outputs.front().Dimentions();
const auto out_sizes = params.outputs.front().LogicalDims();
const auto in_sizes = params.inputs.front().LogicalDims();
// We are skipping X, since it contains complex pairs and should not be in axes
const auto dims_size = in_sizes.size() - 1;
auto signal_sizes = out_sizes;
size_t s = 1;
for (auto axis : params.axes) {
for (size_t i = 0; i < params.axes.size(); ++i) {
// opencl kernels have inverted order of dimensions with respect to axis spec: x is smallest index, b is largest
auto axis = params.axes[i];
auto inverted_axis = dims_size - axis;
s *= out_sizes[inverted_axis];
MakeJitConstForAxis(jit, out_layout, axis, std::min(out_sizes[inverted_axis], in_sizes[inverted_axis]));
auto signal_size = params.signal_size[i];
// For RDFT case, we need to take signal size into account, as output size can be not the same as signal size
if (params.mode == dft_params::Mode::real && params.direction == dft_params::Direction::forward) {
if (signal_size != -1) {
signal_sizes[inverted_axis] = signal_size;
} else {
signal_sizes[inverted_axis] = in_sizes[inverted_axis];
}
}
s *= signal_sizes[inverted_axis];
// NOTE: We can use full signal size as axis value, but this doesn't make much sense, as it will be zero-padded
// So, we take minimum size here and save some dummy cycles in kernel
auto axis_value = std::min(signal_sizes[inverted_axis], in_sizes[inverted_axis]);
// For IRDFT case, we should use full signal size as axis value and interpret input data as Hermitian-symmetric
if (params.mode == dft_params::Mode::real && params.direction == dft_params::Direction::inverse) {
axis_value = signal_sizes[inverted_axis];
MakeJitConstForParam(jit, "SYMMETRIC_AXIS", out_rank, axis, true);
}
MakeJitConstForParam(jit, "AXIS", out_rank, axis, axis_value);
MakeJitConstForParam(jit, "SIGNAL_SIZE", out_rank, axis, signal_sizes[inverted_axis]);
}
if (params.kind == dft_params::inverse) {
if (params.direction == dft_params::Direction::inverse) {
jit.AddConstant(MakeJitConstant("INVERSE_DFT_MULTIPLIER", 1.f / s));
}
if (params.mode == dft_params::Mode::real) {
jit.AddConstant(MakeJitConstant("REAL_DFT", true));
}
return jit;
}

View File

@@ -10,10 +10,15 @@ namespace kernel_selector {
struct dft_params : public base_params {
std::vector<int64_t> axes;
enum Kind {
std::vector<int64_t> signal_size;
enum class Direction {
forward,
inverse,
} kind = forward;
} direction = Direction::forward;
enum class Mode {
complex,
real,
} mode = Mode::complex;
dft_params() : base_params{KernelType::DFT} {}
};
@@ -23,7 +28,6 @@ struct dft_optional_params : optional_params {
class DFTKernelRef : public KernelBaseOpenCL {
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const dft_params& params) const;

View File

@@ -6,16 +6,15 @@
// alternative: https://github.com/OpenCL/ComplexMath/blob/master/clcomplex.h
typedef float2 cfloat;
#define real(a) ((a).s0)
#define imag(a) ((a).s1)
#define cmult(a, b) ((cfloat)(real(a) * real(b) - imag(a) * imag(b), real(a) * imag(b) + imag(a) * real(b)))
#define crmult(a, b) ((cfloat)(real(a) * (b), imag(a) * (b)))
#define cadd(a, b) ((cfloat)(real(a) + real(b), imag(a) + imag(b)))
#define expi(x) ((cfloat)(cos(x), sin(x)))
#define expmi(x) ((cfloat)(cos(x), -sin(x)))
#define cload(p, offset, pitch) ((cfloat)((p)[offset], (p)[(offset) + (pitch)]))
#define cstore(p, offset, pitch, x) ((p)[offset] = real(x), (p)[(offset) + (pitch)] = imag(x))
#define czero() ((cfloat)(0))
#define real(a) ((a).s0)
#define imag(a) ((a).s1)
#define cmult(a, b) ((cfloat)(real(a) * real(b) - imag(a) * imag(b), real(a) * imag(b) + imag(a) * real(b)))
#define crmult(a, b) ((cfloat)(real(a) * (b), imag(a) * (b)))
#define cadd(a, b) ((cfloat)(real(a) + real(b), imag(a) + imag(b)))
#define expi(x) ((cfloat)(cos(x), sin(x)))
#define expmi(x) ((cfloat)(cos(x), -sin(x)))
#define conj(x) ((cfloat)(real(x), -imag(x)))
#define czero() ((cfloat)(0))
// TODO: pregenerate e{r,i} array on host in macro. maybe it could be done with kernel which runs once?
KERNEL(dft_ref)(const __global INPUT0_TYPE* data, __global OUTPUT_TYPE* output) {
@@ -23,105 +22,218 @@ KERNEL(dft_ref)(const __global INPUT0_TYPE* data, __global OUTPUT_TYPE* output)
const uint dim1 = get_global_id(1);
const uint dim2 = get_global_id(2);
const uint x = 0;
const uint y = dim0;
uint y = dim0;
#if OUTPUT_DIMS == 4
# define ORDER b, f, y, x
# define ORDER_K kb, kf, ky, x
const uint f = dim1;
const uint b = dim2;
# define ORDER_REAL b, f, y, 0
# define ORDER_IMAG b, f, y, 1
uint f = dim1;
uint b = dim2;
#elif OUTPUT_DIMS == 5
# define ORDER b, f, z, y, x
# define ORDER_K kb, kf, kz, ky, x
const uint z = dim1;
const uint f = dim2 % OUTPUT_FEATURE_NUM;
const uint b = dim2 / OUTPUT_FEATURE_NUM;
# define ORDER_REAL b, f, z, y, 0
# define ORDER_IMAG b, f, z, y, 1
uint z = dim1;
uint f = dim2 % OUTPUT_FEATURE_NUM;
uint b = dim2 / OUTPUT_FEATURE_NUM;
#elif OUTPUT_DIMS == 6
# define ORDER b, f, w, z, y, x
# define ORDER_K kb, kf, kw, kz, ky, x
const uint z = dim1 % OUTPUT_SIZE_Z;
const uint w = dim1 / OUTPUT_SIZE_Z;
const uint f = dim2 % OUTPUT_FEATURE_NUM;
const uint b = dim2 / OUTPUT_FEATURE_NUM;
# define ORDER_REAL b, f, w, z, y, 0
# define ORDER_IMAG b, f, w, z, y, 1
uint z = dim1 % OUTPUT_SIZE_Z;
uint w = dim1 / OUTPUT_SIZE_Z;
uint f = dim2 % OUTPUT_FEATURE_NUM;
uint b = dim2 / OUTPUT_FEATURE_NUM;
#endif
const uint output_real_index = GET_INDEX(OUTPUT, ORDER_REAL);
#if !defined(REAL_DFT) || !defined(INVERSE_DFT_MULTIPLIER)
const uint output_imag_index = GET_INDEX(OUTPUT, ORDER_IMAG);
#endif
// TODO: use OUTPUT_TYPE for intermediate calculations?
// We don't use it for now as we will lose a lot of precision for f16 and tests won't pass
#if defined(REAL_DFT) && defined(INVERSE_DFT_MULTIPLIER)
float Y = 0;
#else
cfloat Y = czero();
#endif
const float PI2 = M_PI_F * 2;
#ifdef AXIS_Y
const float ay = PI2 * y / OUTPUT_SIZE_Y;
const float ay = PI2 * y / SIGNAL_SIZE_Y;
#endif
#ifdef AXIS_Z
const float az = PI2 * z / OUTPUT_SIZE_Z;
const float az = PI2 * z / SIGNAL_SIZE_Z;
#endif
#ifdef AXIS_W
const float aw = PI2 * w / OUTPUT_SIZE_W;
const float aw = PI2 * w / SIGNAL_SIZE_W;
#endif
#ifdef AXIS_FEATURE
const float af = PI2 * f / OUTPUT_FEATURE_NUM;
const float af = PI2 * f / SIGNAL_SIZE_FEATURE;
#endif
#ifdef AXIS_BATCH
const float ab = PI2 * b / OUTPUT_BATCH_NUM;
const float ab = PI2 * b / SIGNAL_SIZE_BATCH;
#endif
#ifdef AXIS_BATCH
for (uint kb = 0; kb < AXIS_BATCH; ++kb)
#else
# define kb b
for (b = 0; b < AXIS_BATCH; ++b)
#endif
#ifdef AXIS_FEATURE
for (uint kf = 0; kf < AXIS_FEATURE; ++kf)
#else
# define kf f
for (f = 0; f < AXIS_FEATURE; ++f)
#endif
#ifdef AXIS_W
for (uint kw = 0; kw < AXIS_W; ++kw)
#else
# define kw w
for (w = 0; w < AXIS_W; ++w)
#endif
#ifdef AXIS_Z
for (uint kz = 0; kz < AXIS_Z; ++kz)
#else
# define kz z
for (z = 0; z < AXIS_Z; ++z)
#endif
#ifdef AXIS_Y
for (uint ky = 0; ky < AXIS_Y; ++ky)
#else
# define ky y
for (y = 0; y < AXIS_Y; ++y)
#endif
{
float a = 0;
#ifdef AXIS_Y
a += ay * ky;
a += ay * y;
#endif
#ifdef AXIS_Z
a += az * kz;
a += az * z;
#endif
#ifdef AXIS_W
a += aw * kw;
a += aw * w;
#endif
#ifdef AXIS_FEATURE
a += af * kf;
a += af * f;
#endif
#ifdef AXIS_BATCH
a += ab * kb;
a += ab * b;
#endif
const cfloat X = cload(data, GET_INDEX(INPUT0, ORDER_K), INPUT0_X_PITCH);
#ifdef INVERSE_DFT_MULTIPLIER
const cfloat E = expi(a);
#ifdef REAL_DFT
# ifdef INVERSE_DFT_MULTIPLIER
# if OUTPUT_DIMS == 4
# define SYMMETRIC_ORDER_REAL sb, sf, sy, 0
# define SYMMETRIC_ORDER_IMAG sb, sf, sy, 1
# elif OUTPUT_DIMS == 5
# define SYMMETRIC_ORDER_REAL sb, sf, sz, sy, 0
# define SYMMETRIC_ORDER_IMAG sb, sf, sz, sy, 1
# elif OUTPUT_DIMS == 6
# define SYMMETRIC_ORDER_REAL sb, sf, sw, sz, sy, 0
# define SYMMETRIC_ORDER_IMAG sb, sf, sw, sz, sy, 1
# endif
bool is_zero = false;
bool is_conj = false;
# ifdef SYMMETRIC_AXIS_BATCH
uint sb = b;
if (sb > OUTPUT_BATCH_NUM / 2) {
sb = OUTPUT_BATCH_NUM - sb;
is_conj = true;
}
if (sb >= INPUT0_BATCH_NUM) {
is_zero = true;
}
# else
# define sb b
# endif
# ifdef SYMMETRIC_AXIS_FEATURE
uint sf = f;
if (sf > OUTPUT_FEATURE_NUM / 2) {
sf = OUTPUT_FEATURE_NUM - sf;
is_conj = true;
}
if (sf >= INPUT0_FEATURE_NUM) {
is_zero = true;
}
# else
# define sf f
# endif
# ifdef SYMMETRIC_AXIS_W
uint sw = w;
if (sw > OUTPUT_SIZE_W / 2) {
sw = OUTPUT_SIZE_W - sw;
is_conj = true;
}
if (sw >= INPUT0_SIZE_W) {
is_zero = true;
}
# else
# define sw w
# endif
# ifdef SYMMETRIC_AXIS_Z
uint sz = z;
if (sz > OUTPUT_SIZE_Z / 2) {
sz = OUTPUT_SIZE_Z - sz;
is_conj = true;
}
if (sz >= INPUT0_SIZE_Z) {
is_zero = true;
}
# else
# define sz z
# endif
# ifdef SYMMETRIC_AXIS_Y
uint sy = y;
if (sy > OUTPUT_SIZE_Y / 2) {
sy = OUTPUT_SIZE_Y - sy;
is_conj = true;
}
if (sy >= INPUT0_SIZE_Y) {
is_zero = true;
}
# else
# define sy y
# endif
cfloat X = czero();
if (!is_zero) {
const uint input_real_index = GET_INDEX(INPUT0, SYMMETRIC_ORDER_REAL);
const uint input_imag_index = GET_INDEX(INPUT0, SYMMETRIC_ORDER_IMAG);
X = (cfloat)(data[input_real_index], data[input_imag_index]);
if (is_conj) {
X = conj(X);
}
}
# else
const uint input_real_index = GET_INDEX(INPUT0, ORDER_REAL);
const float X = data[input_real_index];
# endif
// clang-format off
#else
const cfloat E = expmi(a);
const uint input_real_index = GET_INDEX(INPUT0, ORDER_REAL);
const uint input_imag_index = GET_INDEX(INPUT0, ORDER_IMAG);
const cfloat X = (cfloat)(data[input_real_index], data[input_imag_index]);
#endif
Y = cadd(Y, cmult(X, E));
}
#ifdef INVERSE_DFT_MULTIPLIER
Y = crmult(Y, INVERSE_DFT_MULTIPLIER);
// No need to calculate E for IRDFT case, as we will calculate manually later
# ifndef REAL_DFT
const cfloat E = expi(a);
# endif
#else
const cfloat E = expmi(a);
#endif
cstore(output, GET_INDEX(OUTPUT, ORDER), OUTPUT_X_PITCH, Y);
#ifdef REAL_DFT
# ifdef INVERSE_DFT_MULTIPLIER
Y += real(X) * cos(a) - imag(X) * sin(a);
# else
Y = cadd(Y, crmult(E, X));
# endif
#else
Y = cadd(Y, cmult(X, E));
#endif
}
// clang-format on
#ifdef INVERSE_DFT_MULTIPLIER
# ifdef REAL_DFT
Y *= INVERSE_DFT_MULTIPLIER;
# else
Y = crmult(Y, INVERSE_DFT_MULTIPLIER);
# endif
#endif
#if defined(REAL_DFT) && defined(INVERSE_DFT_MULTIPLIER)
output[output_real_index] = Y;
#else
output[output_real_index] = real(Y);
output[output_imag_index] = imag(Y);
#endif
}
#undef real
@@ -135,5 +247,5 @@ KERNEL(dft_ref)(const __global INPUT0_TYPE* data, __global OUTPUT_TYPE* output)
#undef cstore
#undef czero
#undef GET_INDEX
#undef ORDER
#undef ORDER_K
#undef ORDER_REAL
#undef ORDER_IMAG

View File

@@ -13,39 +13,64 @@ namespace intel_gpu {
namespace {
void createDft(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::dft_kind kind) {
void createDft(Program& p,
const std::shared_ptr<ngraph::Node>& op,
cldnn::dft_direction direction,
cldnn::dft_mode mode) {
validate_inputs_count(op, {2, 3});
const auto inputs = p.GetInputPrimitiveIDs(op);
const auto layer_name = layer_type_name_ID(op);
const auto& op_friendly_name = op->get_friendly_name();
const auto& friendly_name = op->get_friendly_name();
const auto& out_shape = op->get_output_shape(0);
auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
if (!axes_constant) {
IE_THROW() << "Unsupported parameter nodes type in " << op_friendly_name << " (" << op->get_type_name() << ")";
IE_THROW() << "Unsupported parameter nodes type in " << friendly_name << " (" << op->get_type_name() << ")";
}
auto axes = axes_constant->cast_vector<int64_t>();
const uint8_t data_rank = out_shape.size();
ov::normalize_axes(op.get(), data_rank - 1, axes);
uint8_t axis_correction = op->get_input_shape(0).size();
if (direction != cldnn::dft_direction::forward || mode != cldnn::dft_mode::real) {
--axis_correction;
}
ov::normalize_axes(op.get(), axis_correction, axes);
const cldnn::dft prim(layer_name, inputs.front(), std::move(axes), out_shape, kind);
std::vector<int64_t> signal_size;
if (op->get_input_size() == 3) {
auto signal_size_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(2));
if (!signal_size_constant) {
IE_THROW() << "Unsupported parameter nodes type in " << friendly_name << " (" << op->get_type_name() << ")";
}
signal_size = signal_size_constant->cast_vector<int64_t>();
}
const cldnn::dft prim(layer_name, inputs.front(), axes, signal_size, out_shape, direction, mode);
p.add_primitive(*op, prim);
}
void CreateDFTOp(Program& p, const std::shared_ptr<ngraph::op::v7::DFT>& op) {
createDft(p, op, cldnn::dft_kind::forward);
createDft(p, op, cldnn::dft_direction::forward, cldnn::dft_mode::complex);
}
void CreateIDFTOp(Program& p, const std::shared_ptr<ngraph::op::v7::IDFT>& op) {
createDft(p, op, cldnn::dft_kind::inverse);
createDft(p, op, cldnn::dft_direction::inverse, cldnn::dft_mode::complex);
}
void CreateRDFTOp(Program& p, const std::shared_ptr<ngraph::op::v9::RDFT>& op) {
createDft(p, op, cldnn::dft_direction::forward, cldnn::dft_mode::real);
}
void CreateIRDFTOp(Program& p, const std::shared_ptr<ngraph::op::v9::IRDFT>& op) {
createDft(p, op, cldnn::dft_direction::inverse, cldnn::dft_mode::real);
}
} // namespace
REGISTER_FACTORY_IMPL(v7, DFT);
REGISTER_FACTORY_IMPL(v7, IDFT);
REGISTER_FACTORY_IMPL(v9, RDFT);
REGISTER_FACTORY_IMPL(v9, IRDFT);
} // namespace intel_gpu
} // namespace ov

View File

@@ -12,7 +12,7 @@ using namespace tests;
namespace {
template <typename vecElementType>
template <class vecElementType>
std::string vec2str(const std::vector<vecElementType>& vec) {
if (!vec.empty()) {
std::ostringstream result;
@@ -30,128 +30,131 @@ std::vector<T> convert(const std::vector<float>& v) {
return result;
}
struct dft_test_params {
struct dft_type {
dft_direction direction;
dft_mode mode;
};
const dft_type DFT{dft_direction::forward, dft_mode::complex};
const dft_type IDFT{dft_direction::inverse, dft_mode::complex};
const dft_type RDFT{dft_direction::forward, dft_mode::real};
const dft_type IRDFT{dft_direction::inverse, dft_mode::real};
template <class T>
float getThreshold(dft_type type);
template <>
float getThreshold<float>(dft_type type) {
if (type.direction == dft_direction::forward && type.mode == dft_mode::complex) {
return 1e-4f;
}
if (type.direction == dft_direction::inverse && type.mode == dft_mode::complex) {
return 4e-6f;
}
if (type.direction == dft_direction::forward && type.mode == dft_mode::real) {
return 2e-4f;
}
if (type.direction == dft_direction::inverse && type.mode == dft_mode::real) {
return 2e-6f;
}
return 0;
}
template <>
float getThreshold<half_t>(dft_type type) {
if (type.direction == dft_direction::forward && type.mode == dft_mode::complex) {
return 4e-2f;
}
if (type.direction == dft_direction::inverse && type.mode == dft_mode::complex) {
return 5e-4f;
}
if (type.direction == dft_direction::forward && type.mode == dft_mode::real) {
return 8e-3f;
}
if (type.direction == dft_direction::inverse && type.mode == dft_mode::real) {
return 2e-3f;
}
return 0;
}
struct dft_params {
std::vector<int32_t> input_shape;
std::vector<size_t> output_shape;
std::vector<int64_t> axes;
std::vector<int64_t> signal_size;
std::vector<float> input_values;
std::vector<float> expected_values;
std::string test_name;
};
struct idft_test_params {
std::vector<int32_t> input_shape;
std::vector<size_t> output_shape;
std::vector<int64_t> axes;
std::vector<float> expected_values;
std::vector<float> input_values;
std::string test_name;
};
using dft_test_params = std::tuple<format, // plain format
format, // blocked format
dft_type,
dft_params>;
template <class P>
dft_kind getKind();
template <>
dft_kind getKind<dft_test_params>() {
return dft_kind::forward;
}
template <>
dft_kind getKind<idft_test_params>() {
return dft_kind::inverse;
}
template <class P, class T>
struct dft_gpu_test : public testing::TestWithParam<P> {
template <class T>
struct dft_gpu_test : public testing::TestWithParam<dft_test_params> {
public:
void test() {
auto p = testing::TestWithParam<P>::GetParam();
format::type plain_format;
format::type blocked_format;
dft_type type;
dft_params p;
std::tie(plain_format, blocked_format, type, p) = testing::TestWithParam<dft_test_params>::GetParam();
auto& engine = get_test_engine();
const auto input_format = format::get_default_format(p.input_shape.size());
const layout data_layout(type_to_data_type<T>::value, input_format, tensor(input_format, p.input_shape));
auto data_type = type_to_data_type<T>::value;
const layout data_layout(data_type, plain_format, tensor(plain_format, p.input_shape));
auto input = engine.allocate_memory(data_layout);
set_values(input, convert<T>(p.input_values));
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(dft("dft", "input", std::move(p.axes), p.output_shape, getKind<P>()));
topology.add(reorder("reorder_input", "input", blocked_format, data_type));
topology.add(dft("dft", "reorder_input", p.axes, p.signal_size, p.output_shape, type.direction, type.mode));
// It's simpler to use "bfwzyx" format for all cases, as input and output can have different ranks
topology.add(reorder("out", "dft", format::bfwzyx, data_type));
network network(engine, topology);
network.set_input_data("input", input);
const auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "dft");
EXPECT_EQ(outputs.begin()->first, "out");
auto output = outputs.at("dft").get_memory();
auto output = outputs.at("out").get_memory();
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
const auto expected_values = convert<T>(p.expected_values);
ASSERT_EQ(output_ptr.size(), expected_values.size());
for (size_t i = 0; i < output_ptr.size(); ++i) {
EXPECT_NEAR(expected_values[i], output_ptr[i], getThreshold());
EXPECT_NEAR(expected_values[i], output_ptr[i], getThreshold<T>(type));
}
}
static std::string PrintToStringParamName(const testing::TestParamInfo<P>& info) {
auto& p = info.param;
static std::string PrintToStringParamName(const testing::TestParamInfo<dft_test_params>& info) {
format::type plain_format;
format::type blocked_format;
dft_type type;
dft_params p;
std::tie(plain_format, blocked_format, type, p) = info.param;
std::ostringstream result;
result << "InputShape=" << vec2str(p.input_shape) << "_";
result << "OutputShape=" << vec2str(p.output_shape) << "_";
result << "Precision=" << data_type_traits::name(type_to_data_type<T>::value) << "_";
result << "Axes=" << vec2str(p.axes) << "_";
result << "Inverse=" << (getKind<P>() == dft_kind::inverse);
result << "SignalSize=" << vec2str(p.signal_size) << "_";
result << "Inverse=" << (type.direction == dft_direction::inverse) << "_";
result << "Real=" << (type.mode == dft_mode::real) << "_";
result << "Format=" << fmt_to_str(blocked_format);
if (!p.test_name.empty()) {
result << "_TestName=" << p.test_name;
}
return result.str();
}
float getThreshold() const;
};
template <>
float dft_gpu_test<dft_test_params, float>::getThreshold() const {
return 1e-4f;
}
template <>
float dft_gpu_test<dft_test_params, half_t>::getThreshold() const {
return 4e-2f;
}
template <>
float dft_gpu_test<idft_test_params, float>::getThreshold() const {
return 4e-6f;
}
template <>
float dft_gpu_test<idft_test_params, half_t>::getThreshold() const {
return 5e-4f;
}
using dft_gpu_test_f32 = dft_gpu_test<dft_test_params, float>;
using dft_gpu_test_f16 = dft_gpu_test<dft_test_params, half_t>;
using idft_gpu_test_f32 = dft_gpu_test<idft_test_params, float>;
using idft_gpu_test_f16 = dft_gpu_test<idft_test_params, half_t>;
TEST_P(dft_gpu_test_f32, dft_f32) {
test();
}
TEST_P(dft_gpu_test_f16, dft_f16) {
test();
}
TEST_P(idft_gpu_test_f32, idft_f32) {
test();
}
TEST_P(idft_gpu_test_f16, idft_f16) {
test();
}
const std::vector<float> input_data = {
0.85943836, 0.009941814, 0.004292889, 0.54598427, 0.8270831, 0.49770153, 0.9035636, 0.19274887,
0.8589833, 0.88759327, 0.72343576, 0.057539318, 0.915801, 0.63455844, 0.25069925, 0.045601673,
@@ -1529,63 +1532,492 @@ const std::vector<float> expected_idft1d_signal_size_results = {
0.08365354, -0.11595764, 0.0041189813, -0.19876777, 0.05539961, -0.03458054, 0.3030866,
0.06549558, 0.100598566, -0.0328084, -0.008204469, -0.20897065, -0.08830016, -0.15857375};
const std::vector<dft_test_params> dft_params = {
static const std::vector<float> rinput_data = {
0.10606491, 0.7454715, 0.57231355, 0.4582412, 0.3847059, 0.27398932, 0.66796243, 0.395475, 0.2815729,
0.7799197, 0.59909415, 0.12294636, 0.38957402, 0.97498834, 0.46759892, 0.14017141, 0.04206858, 0.7279963,
0.61560553, 0.9027321, 0.6226334, 0.2601217, 0.5555177, 0.40498647, 0.14175586, 0.57774633, 0.52652127,
0.9385691, 0.9588788, 0.9844318, 0.23095612, 0.09707925, 0.24574867, 0.6907577, 0.1974319, 0.8295272,
0.34612727, 0.51401484, 0.66115797, 0.9336245, 0.06690067, 0.7468897, 0.39028263, 0.53575844, 0.060429193,
0.8913558, 0.77787375, 0.6701197, 0.7350527, 0.6636995, 0.18176624, 0.8629976, 0.45142895, 0.6497297,
0.159372, 0.40598175, 0.7988516, 0.7291543, 0.07090418, 0.7697132, 0.4972157, 0.7669217, 0.67975855,
0.13026066, 0.6587437, 0.24532892, 0.24545169, 0.83795583, 0.105490535, 0.7264323, 0.94568557, 0.7216649,
0.14389831, 0.7930531, 0.70895344, 0.9724701, 0.9775157, 0.49999878, 0.65569246, 0.26876843, 0.63248956,
0.85201293, 0.5689624, 0.023386303, 0.5546464, 0.36860028, 0.9603114, 0.39123482, 0.0380728, 0.89212376,
0.14387614, 0.63858676, 0.10003748, 0.8906635, 0.06681054, 0.7458642, 0.45452347, 0.54724604, 0.6496482,
0.7818356, 0.6608355, 0.77711326, 0.24588613, 0.013456763, 0.355845, 0.80388206, 0.027993264, 0.73677206,
0.52755004, 0.9052324, 0.54311025, 0.5367805, 0.4131242, 0.7752338, 0.109669454, 0.13664648, 0.7828739,
0.9083969, 0.5247593, 0.7493595, 0.19275227, 0.007190853, 0.6087981, 0.344136, 0.46909887, 0.41924855,
0.7072913, 0.19932869, 0.5303847, 0.651384, 0.06686331, 0.9717932, 0.65702224, 0.11786682, 0.3154073,
0.88923013, 0.5564087, 0.91047823, 0.28466642, 0.0934668, 0.88953066, 0.9919338, 0.18322521, 0.8185455,
0.566391, 0.014207997, 0.29673064, 0.6347744, 0.6801958, 0.39601147, 0.34374171, 0.7216888, 0.6152569,
0.76679546, 0.5860851, 0.4276813, 0.79339284, 0.13130653, 0.68764234, 0.053128112, 0.02611321, 0.2982243,
0.7618372, 0.3331729, 0.5468192, 0.15707079, 0.28592056, 0.15286565, 0.9368963, 0.350671, 0.4336494,
0.08934934, 0.41172776, 0.5850259, 0.70730376, 0.8598349, 0.088788144, 0.26711187, 0.8002491, 0.19422275,
0.8312039, 0.5198718, 0.40111357, 0.98375803, 0.77703434, 0.037818834, 0.704231, 0.689808, 0.17102319,
0.42153922, 0.7278252, 0.8030207, 0.9101717, 0.0199644, 0.13768466, 0.55669, 0.17991355, 0.6720098,
0.7733328, 0.20881335};
static const std::vector<float> expected_rdft1d_results_1 = {
4.6657147, -1.1622906e-06, 0.21456887, -0.14946258, -0.20476034, -0.37063062, -0.31414136,
0.5099413, -1.1779613, 0.07057127, -0.64047664, -1.0058284e-07, 4.982774, -1.1771917e-06,
0.6607505, 0.18829148, -0.9772357, 1.4243596, 0.8640026, 0.34923682, 0.33401352,
0.25859502, -0.7548928, 8.940697e-08, 5.9711604, -1.4901161e-06, 0.5638976, 1.5429841,
-0.52065414, 0.24638398, -0.27140495, 0.5040715, 0.5360231, 0.3234269, -0.36054826,
1.7508864e-07, 4.7464237, -1.2218952e-06, -0.29650804, 0.80609477, -0.161426, 1.0022418,
-0.50812817, 0.7967348, 0.4394225, -0.1588624, -1.3835809, -7.4505806e-08, 5.53836,
-1.7136335e-06, -0.38635445, 0.8284859, -0.23278837, -0.63777345, -0.93614054, 0.3215857,
-0.14075133, -0.67071164, -1.4772836, 2.0861626e-07, 5.0798974, -1.5944242e-06, 0.056767445,
0.03468219, -0.1497254, -0.9672509, 0.2603209, 0.69644475, -0.9208536, 0.006730467,
-1.7552528, 2.682209e-07, 4.893558, -1.6242266e-06, 0.6719861, -0.13982919, 0.064845346,
-0.39896214, 0.21785057, -0.5099982, -0.65526295, 1.4383471, -0.52023906, 2.5331974e-07,
6.687699, -1.5497208e-06, -0.7423769, 0.09968524, 1.052381, -0.21306956, 0.5875206,
-0.3038844, 0.3991575, -1.1895186, 0.17579001, 3.874302e-07, 5.2818384, -1.1026859e-06,
0.5087582, 0.106959194, 1.1816688, -0.87592727, 0.03740315, 0.5197907, -1.3198637,
0.6398836, 0.22712436, 2.2351742e-08, 5.0190897, -1.5646219e-06, -0.087282926, 0.50819266,
-0.28002462, 0.29240948, -0.32303664, 0.38377762, -0.0051696897, -0.99301195, -2.189299,
2.0861626e-07, 5.0545654, -1.5795231e-06, 0.9146397, 0.83839166, 0.870533, 0.17405808,
-0.56308234, -0.7806684, 0.26397777, 0.6880482, -1.4183462, 2.682209e-07, 5.479953,
-1.2665987e-06, 0.49444157, 0.7534672, -0.76784146, -0.4507342, 0.88815784, 0.6985409,
-0.2727425, -0.25027415, -0.7328796, 2.682209e-07, 4.1296124, -5.662441e-07, -0.46133032,
0.30635798, -0.18225375, 0.42515472, -0.5484285, 0.9704039, -0.35255045, 0.17549685,
0.8870368, -3.1292439e-07, 4.8632016, -1.8924475e-06, -0.6926452, 0.025076404, -0.039108217,
-1.7492937, -0.8120377, -0.85315156, -0.0022608787, 0.45002514, -1.1024668, 3.501773e-07,
5.4715447, -1.4901161e-06, 1.1176248, -0.2109062, -0.27492502, 0.08983741, 1.1903813,
-1.007312, -0.20150042, -0.83919466, -0.23939973, 4.917383e-07, 5.1267176, -9.983778e-07,
-0.44803134, -0.8066604, -0.3435102, -0.41692197, -0.22457689, -0.1076939, -0.29129186,
-1.1880502, 0.9255183, -1.6391277e-07, 3.8495903, -5.5134296e-07, 0.09505272, -0.12751618,
-1.1264827, 0.5068884, -1.055237, -0.19516481, -0.34035242, -0.15379356, 1.2655814,
-2.6077032e-07, 4.4372616, -9.23872e-07, -0.72962606, -0.23475963, -0.04278487, 1.1032158,
-0.558924, -0.5300043, 1.0578637, -0.2466627, 0.44617313, -7.8231096e-08, 5.5374002,
-1.4156103e-06, 0.016273111, -0.5989829, -0.19913958, 0.013256833, 1.8512837, 0.14526272,
-0.39700353, -0.07573915, 0.23181, 2.9429793e-07, 4.989425, -1.4901161e-06, 1.0391837,
0.16554561, -0.22647032, -1.0689808, -0.84556, -0.82779336, 0.9430445, 0.37618563,
0.4684292, -9.685755e-08};
static const std::vector<float> expected_rdft1d_results_2 = {2.266797, -8.195639e-08, -0.37842733, -0.41015846,
-0.48980892, -0.10356337, 2.5542018, -2.2351742e-08,
-0.3223713, 0.671882, 0.54300576, -0.35418037,
1.985015, -2.2351742e-08, -0.030243821, -0.20105253,
0.59431964, 0.07358998, 1.4619737, -7.450581e-09,
-0.4356845, 0.35701087, 0.28208786, -0.36424285,
1.8002605, -1.1920929e-07, -0.43280697, -0.56735414,
-0.30007166, -0.541847, 2.3052943, -1.2293458e-07,
-0.39316025, -0.5526293, -0.30507135, -0.6021758,
2.7329001, -6.7055225e-08, 0.28245124, -0.42586988,
-0.40586215, 0.4590181, 3.3132548, -5.9604645e-08,
0.6297612, 0.3694744, 0.077824846, -0.6248544,
2.6314974, -2.9802322e-08, 0.58795106, -0.60349375,
-0.3224758, 0.34408605, 1.8399743, -9.685755e-08,
-0.43963802, -0.079073176, -0.120658875, -1.0880115,
2.0531366, -4.4703484e-08, 0.80112594, -0.53726834,
-0.17560546, -0.026561722, 2.3779182, -9.685755e-08,
-0.21852754, -0.19336401, 0.38734403, -0.5954362,
1.6219761, 7.450581e-09, -0.43100592, 0.28373614,
0.101898566, 0.52321124, 2.128953, -1.4901161e-07,
-0.1622684, -0.94116735, -0.7350497, 0.12695336,
3.449626, -8.940697e-08, 0.56062996, -0.031283244,
-0.06161648, -0.8543532, 3.033568, -8.195639e-08,
-0.37023768, -0.03989461, -0.28719214, -0.22382751,
1.9661667, -1.4901161e-08, -0.59863573, -0.015534669,
-0.31916466, 0.55380434, 2.227056, -5.2154064e-08,
-0.12656188, 0.6895717, 0.097157195, 0.19840825,
3.5129817, -2.1234155e-07, 0.11158541, 0.5870459,
0.20993343, -0.40297145, 2.5986667, 0.0,
0.26602313, -1.1560227, 0.2542065, 0.45556274};
static const std::vector<float> expected_rdft1d_results_3 = {
4.665715, -1.6093254e-06, -0.5430559, -0.5752678, -0.37596112, -1.1571281, -0.46793216,
-0.94566363, 0.6854232, -0.3444838, -0.674704, 0.5946392, -0.64047587, 1.3560057e-06,
4.9827743, -1.7434359e-06, -0.43517, -0.049020194, -1.4773891, -1.0811031, 1.2506557,
0.5371344, 1.2869358, -0.14998645, 0.8555907, 0.3693859, -0.7548918, 1.5944242e-06,
5.971161, -1.5199184e-06, -1.2643411, 0.85635287, -0.1801207, -1.7264944, 0.6412285,
-0.4787441, 0.82227707, 0.65098876, 0.9114491, 0.40323836, -0.36054718, 1.2852252e-06,
4.7464237, -1.66893e-06, -1.5010594, 0.2253451, -0.87915635, -0.4252541, 0.4976693,
-0.6554581, 0.928985, 0.8035921, 0.6578763, -0.15220329, -1.3835799, 1.0430813e-06,
5.5383606, -1.4901161e-06, -1.619024, -0.10987502, 0.20661727, -1.3774645, -0.3057741,
-1.0960662, 0.2971667, 0.46700704, -0.20812088, -0.602368, -1.4772825, 9.3877316e-07,
5.0798974, -1.758337e-06, -0.7421876, -0.61749315, 0.21938956, -1.3415859, -0.838238,
-0.6598083, 1.0601404, -0.7129184, -0.27083004, 0.31763482, -1.7552516, 1.4677644e-06,
4.893558, -1.4975667e-06, -0.06445231, -0.55879503, 0.08908144, -1.2869594, 0.33623943,
-0.7704663, -0.047739983, -1.0678453, 0.48350462, 1.5768427, -0.52023804, 1.1697412e-06,
6.687699, -1.3113022e-06, -1.292419, -1.2920969, 1.2041754, -0.2943018, 1.1889167,
-0.66985166, 1.1336832, -0.13731277, 0.008011267, -0.9506076, 0.1757915, 1.1026859e-06,
5.2818394, -1.4305115e-06, -0.25987166, -0.48605326, 0.90237427, -0.8028362, -0.3040653,
-1.6981151, 1.1215456, -0.7120959, -0.4195284, 1.3941492, 0.22712523, 8.046627e-07,
5.01909, -1.7881393e-06, -1.1856917, -0.10931289, -0.5164983, -0.9724103, 0.30577338,
-0.72837675, 0.89680094, 0.21036407, -0.052024096, -0.9455472, -2.1892984, 1.4305115e-06,
5.054565, -1.5050173e-06, -0.3471575, 0.40542153, 0.36438322, -0.9765247, 1.2703501,
-1.7359983, -0.1160066, -0.25323528, 0.9753329, 0.5339062, -1.418345, 9.834766e-07,
5.4799523, -1.7285347e-06, -0.7905842, 0.093313254, 0.068526804, -1.8504739, -0.01845923,
0.26084417, 1.5358877, -0.4159652, 0.089752786, 0.089908056, -0.7328786, 1.4007092e-06,
4.129612, -9.536743e-07, -1.2393575, -0.28046644, -0.58673245, -0.39608067, -0.12385368,
-0.53435826, 0.77853805, 0.7645384, -0.18040559, 0.6678516, 0.88703763, 8.046627e-07,
4.8632016, -1.0430813e-06, -1.1780663, -1.0952923, 1.1691413, -1.4023741, -0.546494,
-0.92614484, -1.1796933, -0.31762218, 0.25592417, 0.0959474, -1.1024656, 1.013279e-06,
5.471545, -1.6987324e-06, 0.35812324, -0.66833705, 0.07725692, -1.6537004, 1.6561611,
0.051166296, 0.865453, -1.1392289, -0.23588535, -0.5480979, -0.2393986, 1.3411045e-06,
5.126718, -9.23872e-07, -0.6379836, -1.6675751, 0.013057679, -0.9891113, 0.20881936,
-0.30439606, 0.37222707, 0.25244698, -0.9197892, -0.77782196, 0.9255192, 1.1101365e-06,
3.8495903, -7.4505806e-07, -0.63088936, -0.4556699, -1.1905057, -1.2522144, 0.46207082,
-0.31992733, -0.4309795, 0.74295896, -0.6106033, 0.18823686, 1.2655822, 7.748604e-07,
4.4372616, -7.0780516e-07, -1.1016369, -1.0079124, -0.6083025, -0.0011255145, 1.4406854,
-0.2912693, -0.26610214, 0.87299407, 0.69553405, -0.45576566, 0.44617438, 7.4505806e-07,
5.5374007, -1.5944242e-06, -0.32642078, -1.3683549, 0.079301864, -0.83741367, 0.67391664,
0.69433576, 1.6423957, -1.1923066, 0.0334223, 0.37603495, 0.23181117, 1.4156103e-06,
4.9894247, -7.748604e-07, 0.1788401, -0.39274544, 0.78422666, -2.1340246, 0.5487572,
-0.8765497, -0.7899384, 0.5434137, 0.91613716, 0.08274247, 0.46843058, 8.34465e-07};
const std::vector<float> expected_rdft2d_results = {
52.8665, -2.9623508e-05, 1.1642078, 3.826082, -0.22771922, -0.49822173, -0.3857528, 3.2676966,
-2.5112464, -0.27454787, -8.678656, 3.7550926e-06, -0.818072, 0.8330209, 3.4618711, -0.2419473,
1.7408192, 5.744002, 1.8477443, 2.039329, 0.3268112, -2.7421296, 0.6809025, 1.7613728,
-2.294264, -0.8984407, -0.2868184, -3.2426705, -0.801461, -0.58971727, -1.463435, -2.5413132,
0.116907075, -0.5013529, -2.8377397, -2.8455539, -0.13475686, -1.3145845, -2.2820292, -0.199,
-0.056986623, 0.12560216, -0.589707, -1.7577857, -0.5274223, -1.0395792, 0.53813136, -1.7159984,
0.22503978, 2.902198, -1.8643543, -1.8789856, 2.1722724, -2.068454, 0.59446484, 0.6067899,
1.5525781, 1.7612485, 1.1877432, -0.48152098, -0.16525066, 1.5497208e-06, 1.9815066, 0.55218977,
0.80434155, -3.575598, -2.1471107, -0.57691807, -3.004384, 3.8775828, 3.1358109, -6.2584877e-07,
0.22504184, -2.9021916, 1.0378464, 0.9877456, 0.38395065, -1.6089694, -0.5107449, 1.8621777,
-4.960479, -1.8983803, 1.187743, 0.48151842, -0.1347583, 1.3145843, -0.9968031, -1.3782079,
0.9922035, 1.6614089, -0.83039653, -0.043888614, 1.9431384, -1.6448143, 0.5381324, 1.7159982,
-2.2942696, 0.8984335, 1.3057998, -0.26607463, -3.2994738, -1.9240448, 1.4963659, 2.8365738,
-4.691832, 1.2995429, -2.8377357, 2.8455553, -0.8180722, -0.8330165, -1.3755352, 0.34623986,
-3.7555497, -0.9723124, -1.1528367, -0.593254, -0.023679793, 1.8681414, 0.6809023, -1.7613728,
48.939255, -2.4735928e-05, 1.3455832, 0.11001387, -2.3319814, -1.3735183, -0.6780232, -2.4875786,
0.40718403, -1.0639579, 0.7314569, -1.2665987e-07, 0.97006464, -0.30789328, 3.3290033, 2.7749023,
-0.7520597, -0.98800826, 1.3100916, 1.1514524, 1.1085359, 4.348257, -2.839456, 2.4404035,
0.9518837, 2.1538901, 3.8438358, 2.410589, 3.0649068, 0.95690995, 2.2213395, 0.66509914,
-0.4409917, -0.37408838, -0.6316552, -1.5842111, -0.72352415, -2.5862057, 0.2678757, 0.610149,
2.9564474, 0.08470708, -2.0889034, -8.370071, -0.16373271, 2.0413866, -3.3811545, 2.0487003,
0.0316903, -1.078939, -2.5515578, -0.16135174, -0.17406325, 1.2709827, -0.67006403, -1.6342779,
0.42163712, 2.1418998, -0.96614444, 1.9175051, -0.8538456, 2.8014183e-06, 2.0189362, 0.30467552,
0.5074463, 3.7919073, 2.427857, 0.7526233, -2.4620402, 0.65359443, 0.7219074, -2.3841858e-07,
0.03169757, 1.0789458, -2.1129081, -1.0250417, 4.8181386, -0.39162922, -1.2349386, 1.8470186,
-0.49495277, -1.5516026, -0.96614635, -1.9175065, -0.7235237, 2.5862021, 0.677946, 2.0370173,
-0.29536027, 0.6505451, -2.8572361, 2.3176546, 3.4459226, 1.1869265, -3.3811545, -2.048697,
0.95187366, -2.1538982, 1.808088, -1.1755496, -2.7418838, -1.6770658, -3.5766084, -2.8320727,
-0.02944839, -1.6522555, -0.63165283, 1.5842092, 0.9700667, 0.30789307, 0.5195943, 2.4985125,
3.6537378, -0.5842519, -0.4843334, 0.78346854, 0.84766304, 1.1503224, -2.839459, -2.440402};
const std::vector<float> expected_rdft2d_results_2 = {
25.904434, -8.46386e-06, -5.3626504, 0.3475349, -2.7060094, -5.767444, 1.615847, -2.6387978,
4.020789, 1.4271183, 1.5420923, 0.6126925, -4.6167765, 5.5730343e-06, -0.753784, -0.19148755,
1.4881928, -2.7645326, -0.39467168, 1.014636, 0.5598, -1.7654291, -0.91835654, -2.3019042,
-0.49356225, -0.8411435, 0.080773115, -1.2883577, -0.5341466, 1.4913602, -0.30008763, -0.5831754,
1.7365295, 1.821624, -0.08851206, -1.622279, -0.27249795, -0.834725, -0.6706438, 0.4766277,
0.62642634, 0.5483514, -0.5341469, -1.4913592, 0.8286207, 0.35826343, -1.0869694, -1.4876881,
-1.6723244, -0.06565219, 0.16255295, 0.5317876, -0.75649667, 1.2447717, 0.6264261, -0.5483517,
-0.7537827, 0.19148779, 0.6306459, -0.23442982, 0.57131517, -1.366768, -2.7544713, 1.3638397,
0.43463084, -0.5446956, -2.9949086, 1.4802479, 0.080771565, 1.2883584, 24.998875, -7.390976e-06,
-3.1970425, -1.5453612, 1.0925753, -6.279154, 2.237704, -2.8844912, 1.8841789, -1.3615136,
0.90471864, 0.8395144, -2.6060505, 4.976988e-06, 1.1634235, 0.42319643, 2.678257, 2.4692535,
0.34259582, 0.43598562, 2.748452, 0.88622695, 2.2745323, -2.8840196, 1.8120161, -0.27884078,
-1.5445104, -0.7000726, -1.0264511, -0.7026249, -1.071573, 1.062395, -0.64628685, -0.36214483,
-0.5110928, -1.0534683, -2.786768, 2.6113648, 0.94799054, 0.53423727, -0.69832724, 2.1821892,
-1.0264513, 0.70262754, -0.41705567, -0.17140968, 1.4991179, 2.9674625, -0.012362838, -3.8260121,
-1.5786235, -0.32526863, 1.2857957, 1.7469958, -0.6983267, -2.1821907, 1.1634252, -0.42319855,
0.2716269, 0.21222934, -0.46608746, -1.6447732, 1.8890494, -1.8022469, -0.37335354, 0.69326025,
-0.07385725, -0.1723765, -1.5445105, 0.7000739};
const std::vector<float> expected_rdft3d_results = {
101.805756, -5.2273273e-05, 2.5097876, 3.936094, -2.5597036, -1.8717405, -1.0637736, 0.7801182,
-2.1040666, -1.3385094, -7.9471993, 2.026558e-06, 0.15199316, 0.52512753, 6.7908745, 2.5329556,
0.98875976, 4.755993, 3.157838, 3.190782, 1.4353466, 1.6061276, -2.158554, 4.201776,
-1.3423799, 1.2554499, 3.5570183, -0.8320818, 2.263445, 0.36719292, 0.7579028, -1.8762131,
-0.32408538, -0.87544185, -3.4693956, -4.429764, -0.85828185, -3.9007902, -2.0141544, 0.4111499,
2.8994608, 0.21030927, -2.6786098, -10.127857, -0.6911557, 1.0018079, -2.8430226, 0.33270124,
0.25672907, 1.8232578, -4.4159126, -2.040338, 1.9982092, -0.7974717, -0.07559925, -1.0274884,
1.9742157, 3.9031482, 0.22159882, 1.4359848, -1.0190966, 3.2186508e-06, 4.0004425, 0.8568655,
1.3117876, 0.2163087, 0.28074512, 0.17570588, -5.466423, 4.531178, 3.857718, -1.2516975e-06,
0.2567385, -1.823246, -1.0750613, -0.037295938, 5.20209, -2.0005994, -1.7456844, 3.7091968,
-5.45543, -3.4499822, 0.22159535, -1.4359887, -0.8582816, 3.9007854, -0.31885874, 0.65880924,
0.6968423, 2.3119528, -3.6876333, 2.273767, 5.38906, -0.45788872, -2.8430223, -0.33269957,
-1.3423961, -1.2554631, 3.1138885, -1.4416232, -6.0413575, -3.6011095, -2.080242, 0.0045015216,
-4.7212796, -0.3527125, -3.4693892, 4.429763, 0.15199506, -0.52512354, -0.85594195, 2.8447511,
-0.10181111, -1.5565643, -1.6371696, 0.19021615, 0.8239815, 3.018465, -2.158556, -4.2017746,
3.9272437, -3.9339066e-06, -0.18137527, 3.7160687, 2.1042633, 0.8752967, 0.29226887, 5.755277,
-2.9184306, 0.78941, -9.410112, 3.0100346e-06, -1.7881365, 1.140914, 0.13286811, -3.01685,
2.4928799, 6.7320104, 0.5376528, 0.88787735, -0.78172505, -7.0903873, 3.5203578, -0.6790314,
-3.246148, -3.0523329, -4.1306543, -5.653259, -3.866367, -1.5466263, -3.6847744, -3.2064118,
0.5578996, -0.12726665, -2.2060838, -1.2613428, 0.588767, 1.2716217, -2.5499039, -0.8091496,
-3.0134337, 0.0408957, 1.4991964, 6.6122847, -0.36368948, -3.0809648, 3.9192853, -3.764699,
0.19334978, 3.9811373, 0.68720365, -1.717634, 2.346336, -3.3394372, 1.2645291, 2.241068,
1.1309403, -0.3806507, 2.1538877, -2.3990266, 0.6885946, -1.4901161e-06, -0.037429705, 0.24751475,
0.2968948, -7.367506, -4.574969, -1.329541, -0.5423446, 3.2239883, 2.4139037, 2.9802322e-07,
0.19334424, -3.9811373, 3.1507545, 2.0127864, -4.4341884, -1.2173393, 0.72419256, 0.015158802,
-4.4655256, -0.34677732, 2.1538897, 2.3990245, 0.5887663, -1.2716188, -1.6747494, -3.415226,
1.2875631, 1.0108626, 2.0268395, -2.3615427, -1.502785, -2.8317401, 3.919288, 3.764695,
-3.2461433, 3.0523314, -0.5022881, 0.9094755, -0.55759126, -0.24697942, 5.0729737, 5.668646,
-4.662384, 2.9517999, -2.2060819, 1.2613468, -1.7881389, -1.1409098, -1.8951292, -2.1522717,
-7.4092865, -0.38806117, -0.6685039, -1.3767233, -0.8713439, 0.71781945, 3.5203605, 0.6790297};
const std::vector<float> expected_rdft3d_results_2 = {
50.90331, -1.4543533e-05, -8.559692, -1.1978266, -1.6134334, -12.046599, 3.8535514, -5.5232873,
5.9049683, 0.065603495, 2.4468107, 1.4522064, -7.222825, 1.2278557e-05, 0.40963984, 0.231709,
4.16645, -0.29528028, -0.052075505, 1.450621, 3.3082519, -0.8792013, 1.356175, -5.1859245,
1.3184534, -1.1199851, -1.4637363, -1.9884299, -1.5605974, 0.7887349, -1.3716602, 0.47921878,
1.0902424, 1.4594792, -0.59960556, -2.6757474, -3.0592656, 1.7766399, 0.27734682, 1.0108652,
-0.07190053, 2.7305403, -1.5605986, -0.78873086, 0.41156515, 0.18685403, 0.4121489, 1.4797752,
-1.6846865, -3.8916636, -1.4160703, 0.20651829, 0.52929974, 2.9917672, -0.07190076, -2.7305427,
0.4096415, -0.23171037, 0.9022726, -0.022200808, 0.10522783, -3.0115416, -0.8654218, -0.4384073,
0.061277367, 0.14856634, -3.0687659, 1.3078697, -1.4637384, 1.9884316, 25.904425, -24.998884,
-6.9080105, 3.5445771, -8.985163, -6.860018, -1.2686447, -4.8765025, 2.6592734, -0.45706248,
2.3816066, -0.29202732, -4.6167727, 2.6060565, -0.33058774, -1.3549114, 3.9574459, -5.44279,
0.041313916, 0.67204094, 1.446027, -4.5138807, -3.8023772, -4.576436, -0.7724026, -2.6531591,
-0.6192993, 0.25615194, -1.2367722, 2.5178113, 0.7623075, 0.48839718, 1.3743844, 2.4679115,
-1.1419809, -1.1111865, 2.3388672, 1.9520425, -0.13640736, -0.47136223, 2.8086162, 1.2466785,
0.16848034, -0.46490768, 0.6572111, 0.7753189, 1.8804929, -2.9868064, -5.498336, -0.053289652,
-0.16271627, 2.1104114, 0.9904991, -0.041024223, -1.5557647, 0.14997506, -1.1769819, -0.9719368,
0.8428756, -0.5060569, -1.0734584, -0.9006812, -4.556718, -0.5252099, 1.1278908, -0.17134166,
-3.1672862, 1.5541049, 0.78084624, 2.8328683, 0.90555733, -1.3709068e-06, -2.1656086, 1.8928962,
-3.7985847, 0.511709, -0.62185717, 0.24569236, 2.1366088, 2.7886305, 0.6373716, -0.2268233,
-2.0107267, 5.662441e-07, -1.9172084, -0.6146841, -1.1900643, -5.233785, -0.73726743, 0.5786506,
-2.188651, -2.6516552, -3.1928902, 0.58211625, -2.305578, -0.5623034, 1.6252834, -0.58828497,
0.49230486, 2.1939852, 0.7714851, -1.6455705, 2.382816, 2.1837692, 0.4225806, -0.56881106,
2.514269, -3.4460905, -1.618634, -0.057608932, 1.3247533, -1.6338379, 0.49230492, -2.1939862,
1.2456759, 0.5296728, -2.5860875, -4.45515, -1.659962, 3.7603593, 1.7411764, 0.8570565,
-2.0422916, -0.50222373, 1.3247528, 1.633839, -1.9172082, 0.6146865, 0.35901868, -0.44665974,
1.0374024, 0.27800465, -4.6435204, 3.1660864, 0.8079842, -1.2379556, -2.921052, 1.6526239,
1.6252828, 0.588284, 25.90444, 24.998867, -3.817289, -2.8495073, 3.573144, -4.6748676,
4.500339, -0.40109348, 5.382302, 3.3112957, 0.7025763, 1.5174108, -4.616783, -2.6060438,
-1.1769816, 0.97193646, -0.9810596, -0.086276084, -0.83065766, 1.3572321, -0.3264265, 0.9830234,
1.9656628, -0.027371943, -0.2147214, 0.9708719, 0.7808455, -2.8328671, 0.16847888, 0.46490908,
-1.3624828, -1.6547482, 2.0986745, 1.1753378, 0.9649557, -2.1333718, -2.8838634, -3.6214924,
-1.2048804, 1.4246187, -1.5557631, -0.14997569, -1.2367743, -2.5178103, 1.0000296, -0.05879204,
-4.0544314, 0.01142931, 2.153687, -0.078014135, 0.4878212, -1.0468364, -2.503492, 2.5305676,
2.808617, -1.2466786, -0.33058444, 1.3549128, 0.41841656, 0.03719666, 2.216088, -1.8328552,
-0.95222485, 3.2528882, -0.25863037, -0.91804826, -2.822532, 1.4063904, -0.6193025, -0.25615215};
const std::vector<float> expected_irdft1d_results_2 = {
0.10606494, 0.7454715, 0.5723136, 0.45824113, 0.38470596, 0.59909415, 0.12294642, 0.38957405, 0.9749881,
0.46759906, 0.62263334, 0.26012173, 0.5555176, 0.40498644, 0.14175594, 0.23095612, 0.097079255, 0.24574867,
0.6907576, 0.197432, 0.066900685, 0.7468896, 0.39028254, 0.5357583, 0.0604293, 0.18176621, 0.8629975,
0.45142898, 0.64972955, 0.15937212, 0.49721566, 0.7669216, 0.6797584, 0.13026062, 0.6587438, 0.9456854,
0.7216646, 0.14389832, 0.7930529, 0.7089534, 0.6324895, 0.85201263, 0.5689623, 0.023386315, 0.55464643,
0.1438762, 0.63858664, 0.10003753, 0.8906633, 0.06681056, 0.66083544, 0.7771131, 0.24588616, 0.013456774,
0.35584506, 0.54311025, 0.53678054, 0.41312417, 0.7752337, 0.10966951, 0.19275223, 0.007190934, 0.608798,
0.344136, 0.46909887, 0.06686333, 0.971793, 0.65702224, 0.117866985, 0.31540743, 0.8895306, 0.99193364,
0.18322523, 0.81854534, 0.5663911, 0.34374166, 0.72168875, 0.6152569, 0.7667953, 0.58608514, 0.026113158,
0.2982243, 0.76183707, 0.3331729, 0.5468192, 0.43364936, 0.089349344, 0.41172776, 0.5850257, 0.7073037,
0.8312039, 0.5198719, 0.4011136, 0.9837578, 0.7770344, 0.72782516, 0.8030205, 0.9101716, 0.019964492,
0.13768478};
const std::vector<float> expected_irdft1d_results_3 = {
0.80091053, 1.548053, 1.3439665, 0.97278523, 0.65876126, 1.6395509, 1.0939313, 1.5905306, 0.81558955,
1.1096439, 2.0799308, 1.9659967, 0.21628714, 1.2937224, 1.7173465, 1.5190675, 0.62673247, 1.3878733,
2.2457566, 1.2779983, 0.9537279, 1.5238736, 1.6959155, 0.9063804, 1.2134336, 1.4805167, 1.277886,
0.9217217, 1.3267591, 2.0169291, 2.619178, 0.7248324, 1.4161175, 1.3378929, 1.6759893, 0.85183966,
0.53280216, 1.4385536, 1.7184939, 1.3292406, 1.1811583, 0.9698347, 1.5283158, 1.3752562, 0.99182767,
1.3061998, 1.7824118, 1.399513, 0.26604116, 1.3193192, 1.5053986, 1.0388529, 0.9190526, 1.4711612,
2.0971189, 0.37586892, 1.5662622, 1.6827406, 1.208139, 1.0144035, 0.96595216, 2.1122026, 1.6039357,
0.44462752, 0.34932646, 1.487859, 0.9802158, 1.0321891, 0.4064213, 1.7653472, 1.5080582, 0.75743484,
1.2409652, 2.0487022, 1.567386, 0.68034726, 1.5328329, 1.2476723, 1.3539927, 0.8549268};
const std::vector<float> expected_irdft2d_results_1 = {
0.106065355, 0.7454709, 0.5723129, 0.45824066, 0.384706, 0.27398905, 0.6679619, 0.39547434, 0.2815724,
0.779919, 0.59909385, 0.122946456, 0.38957337, 0.97498655, 0.46759892, 0.14017127, 0.04206834, 0.72799486,
0.61560476, 0.9027304, 0.6226336, 0.2601218, 0.5555171, 0.4049862, 0.14175594, 0.57774574, 0.52652067,
0.9385676, 0.958878, 0.9844308, 0.2309568, 0.0970796, 0.24574815, 0.6907565, 0.19743192, 0.8295261,
0.3461272, 0.5140136, 0.66115695, 0.93362343, 0.06690116, 0.74688905, 0.39028272, 0.53575796, 0.060429227,
0.89135474, 0.77787286, 0.67011875, 0.73505205, 0.6636992, 0.18176568, 0.8629964, 0.4514285, 0.6497283,
0.15937214, 0.40598106, 0.7988508, 0.72915316, 0.07090413, 0.76971227, 0.49721542, 0.7669206, 0.6797579,
0.13026048, 0.6587432, 0.24532847, 0.24545121, 0.83795464, 0.10549038, 0.72643167, 0.94568396, 0.72166353,
0.14389817, 0.79305094, 0.7089523, 0.9724684, 0.9775141, 0.49999753, 0.6556916, 0.2687679, 0.6324893,
0.85201234, 0.5689621, 0.023386242, 0.5546462, 0.36860004, 0.9603104, 0.3912346, 0.038073156, 0.8921232,
0.14387667, 0.63858616, 0.10003737, 0.8906622, 0.06681097, 0.74586314, 0.4545233, 0.54724485, 0.6496472,
0.7818348, 0.6608358, 0.77711284, 0.24588637, 0.0134570245, 0.35584468, 0.8038809, 0.027993381, 0.7367708,
0.52754945, 0.90523165, 0.54310995, 0.5367796, 0.41312343, 0.7752323, 0.10966998, 0.13664615, 0.7828726,
0.9083951, 0.524759, 0.7493586, 0.19275239, 0.0071907635, 0.60879755, 0.34413564, 0.4690983, 0.4192482,
0.70729065, 0.1993285, 0.5303842, 0.65138334, 0.06686333, 0.97179186, 0.657022, 0.11786719, 0.3154068,
0.8892283, 0.55640805, 0.9104763, 0.28466636, 0.093467236, 0.88953, 0.9919328, 0.18322526, 0.8185441,
0.56639117, 0.014208457, 0.29673028, 0.6347738, 0.68019533, 0.39601144, 0.34374115, 0.72168803, 0.61525595,
0.76679367, 0.5860848, 0.42768106, 0.7933919, 0.13130645, 0.68764144, 0.05312841, 0.026113434, 0.2982238,
0.7618365, 0.3331724, 0.5468184, 0.15707079, 0.28592035, 0.15286529, 0.9368952, 0.35067078, 0.43364897,
0.089348935, 0.41172677, 0.58502454, 0.7073026, 0.85983366, 0.08878795, 0.2671109, 0.8002475, 0.19422255,
0.83120316, 0.5198712, 0.40111288, 0.98375624, 0.77703446, 0.03781964, 0.7042304, 0.68980736, 0.17102323,
0.42153904, 0.7278248, 0.80301994, 0.91017085, 0.019965423, 0.13768451, 0.556689, 0.17991383, 0.6720085,
0.7733324, 0.20881362};
const std::vector<float> expected_irdft2d_results_2 = {
0.10606504, 0.74547091, 0.57231341, 0.45824085, 0.38470576, 0.27398939, 0.66796227, 0.39547472, 0.28157284,
0.77991920, 0.00000012, 0.00000025, 0.59909402, 0.12294612, 0.38957398, 0.97498753, 0.46759871, 0.14017182,
0.04206866, 0.72799575, 0.61560553, 0.90273150, 0.00000029, 0.00000019, 0.62263335, 0.26012139, 0.55551768,
0.40498611, 0.14175560, 0.57774629, 0.52652119, 0.93856842, 0.95887877, 0.98443111, 0.00000026, 0.00000029,
0.23095626, 0.09707905, 0.24574875, 0.69075717, 0.19743158, 0.82952691, 0.34612741, 0.51401454, 0.66115784,
0.93362381, 0.00000013, 0.00000019, 0.06690087, 0.74688917, 0.39028283, 0.53575807, 0.06042910, 0.89135566,
0.77787371, 0.67011938, 0.73505260, 0.66369919, 0.00000020, 0.00000025, 0.66083517, 0.77711292, 0.24588620,
0.01345654, 0.35584463, 0.80388178, 0.02799342, 0.73677143, 0.52754998, 0.90523178, 0.00000020, 0.00000022,
0.54311002, 0.53678006, 0.41312413, 0.77523314, 0.10966939, 0.13664682, 0.78287364, 0.90839633, 0.52475940,
0.74935884, 0.00000017, 0.00000024, 0.19275220, 0.00719083, 0.60879792, 0.34413568, 0.46909855, 0.41924857,
0.70729118, 0.19932858, 0.53038468, 0.65138356, 0.00000024, 0.00000004, 0.06686326, 0.97179258, 0.65702215,
0.11786667, 0.31540699, 0.88922984, 0.55640881, 0.91047768, 0.28466661, 0.09346649, 0.00000006, 0.00000008,
0.88953045, 0.99193334, 0.18322520, 0.81854497, 0.56639084, 0.01420842, 0.29673067, 0.63477397, 0.68019596,
0.39601113, 0.00000014, 0.00000022};
const std::vector<float> expected_irdft3d_results_2 = {
0.29655575, 0.59799123, 0.22431113, 0.46143103, 0.53208175, 0.32705094, 0.59367000, 0.29963828, 0.41763943,
0.24033307, 0.42796425, 0.56577777, 0.37677909, 0.32099129, 0.28778578, 0.50527716, 0.39592624, -0.01477019,
0.46390174, 0.48881302, 0.69299017, 0.69097986, 0.60120016, 0.82729206, -0.09137908, 0.49852066, 0.41157645,
0.50370176, 0.50602146, 0.12422646, 0.66381460, 0.40124601, 0.71138931, 0.66414101, 0.50896081, 0.51854765,
0.21342740, 0.75042767, 0.40385838, 0.28173387, 0.29258505, 0.34233110, 0.44617152, 0.32590713, 0.69813927,
0.27029157, 0.49500125, 0.57849153, 0.52079012, 0.46437605, 0.44842544, 0.21380078, 0.57897044, 0.32123390,
0.46531573, 0.55946432, 0.36995799, 0.19326348, 0.26279333, 0.89411452, 0.45806675, 0.58413552, 0.47982321,
0.40877153, 0.23978246, 0.33369794, 0.56433968, 0.09308288, 0.20574836, 0.51936717, 0.46905154, 0.47775696,
0.17856948, 0.04195880, 0.24284739, 0.63731160, 0.16159543, 0.08925854, 0.50157161, 0.67721677, 0.75653236,
0.50840554, 0.73467008, 0.62163510, 0.00566245, 0.92257200, 0.42133956, 0.45249607, 0.36451271, 0.46674756,
0.65809363, 0.29478180, 0.79919561, 0.37987672, 0.46803394, 0.20036376, 0.30268271, 0.62990812, 0.29745090,
0.46503467, 0.30444576, 0.43581755, 0.38956261, 0.58891618, 0.43936615, 0.12833645, 0.82411153, 0.30960669,
0.24676315, 0.39269569, 0.26772071, 0.46022705, 0.77598541, 0.46882716, 0.40922151, 0.28451272, 0.27156988,
0.32720683, 0.48740341, 0.52519462, 0.47371313, 0.61046947, 0.46505542, 0.04019986, 0.27622309, 0.42926452,
0.49897225, 0.04617115, 0.50902017, 0.74826910, 0.28548445, 0.63409441, 0.13183664, 0.02507987, 0.51695660,
0.50593892, 0.17335312, 0.24157819, 0.45513622, 0.69800550, 0.40604969, 0.47128647, 0.59389774, 0.33534107,
0.50887902, 0.82998967, 0.22642939, 0.32967160, 0.50515564, 0.54070049, 0.28947697, 0.35626388, 0.58235507,
0.30633221, 0.50041779, 0.24975602, 0.38320678, 0.40595842, 0.50651077, 0.42963483, 0.25977121, 0.32014694,
0.37577291, 0.46638206, 0.05511259, 0.45463482, 0.62685054, 0.13046773, 0.49768469, 0.47645129, 0.56182954,
0.74548830, 0.73150766, 0.37579758, 0.14279248, 0.28705593, 0.45403320, 0.50334282, 0.24132925, 0.24104091,
0.31220213, 0.62432518, 0.20954334, 0.09285936, 0.56852238, 0.42261752, 0.52830257, 0.25272655, 0.72091123,
0.46923499, 0.24439716, 0.72211522, 0.33004626, 0.30411236, 0.56189500, 0.37390448, 0.40768394, 0.13754946,
0.41746636, 0.50960175, 0.34250750, 0.65386079, 0.46042782, 0.54099804, 0.41183749, 0.40593833, 0.21666628,
0.38087729, 0.64666439, 0.19817938, 0.29519793, 0.46272810, 0.49454878, 0.59059650, 0.54134465, 0.56793991,
0.29395146, 0.52647797, 0.61291826, 0.24633402, 0.24791051, 0.22666050, 0.43238182, 0.20337301, 0.31388571,
0.59658993, 0.29774026, 0.39935257, 0.77171166, 0.54813165, 0.74253426, 0.49906203, 0.53449270, 0.22820431,
0.19888670, 0.56200754, 0.55242130, 0.36939947, 0.01671917, 0.60996081};
std::vector<dft_params> extendByOneDimension(const std::vector<dft_params>& params_vector) {
std::vector<dft_params> extended_params;
for (auto params : params_vector) {
params.input_shape.insert(params.input_shape.cbegin(), 1);
params.output_shape.insert(params.output_shape.cbegin(), 1);
for (auto& axis : params.axes) {
++axis;
}
extended_params.push_back(std::move(params));
}
return extended_params;
}
const std::vector<dft_params> DFT_params_4d = {
// With original shape
{{2, 10, 10, 2}, {2, 10, 10, 2}, {2}, input_data, expected_dft1d_results},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {1, 2}, input_data, expected_dft2d_results},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {0, 1, 2}, input_data, expected_dft3d_results},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {2}, input_data_1, expected_dft1d_results_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {1, 2}, input_data_1, expected_dft2d_results_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, input_data_1, expected_dft2d_signal_size_results_2},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_4},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1, 2}, input_data_1, expected_dft3d_results_1},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {2}, {}, input_data, expected_dft1d_results},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {1, 2}, {}, input_data, expected_dft2d_results},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {0, 1, 2}, {}, input_data, expected_dft3d_results},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {2}, {}, input_data_1, expected_dft1d_results_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {1, 2}, {}, input_data_1, expected_dft2d_results_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, {4, 6}, input_data_1, expected_dft2d_signal_size_results_2},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, {4, 8}, input_data_1, expected_dft2d_signal_size_results_4},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1, 2}, {}, input_data_1, expected_dft3d_results_1},
// With changed shape
{{2, 10, 10, 2}, {2, 20, 10, 2}, {1}, input_data, expected_dft1d_signal_size_results},
{{4, 6, 8, 2}, {5, 6, 9, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_1},
{{4, 6, 8, 2}, {3, 6, 4, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_3},
{{4, 6, 8, 2}, {5, 6, 4, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_5},
{{4, 6, 8, 2}, {3, 7, 5, 2}, {0, 1, 2}, input_data_1, expected_dft3d_signal_size_results},
{{2, 10, 10, 2}, {2, 20, 10, 2}, {1}, {20}, input_data, expected_dft1d_signal_size_results},
{{4, 6, 8, 2}, {5, 6, 9, 2}, {0, 2}, {5, 9}, input_data_1, expected_dft2d_signal_size_results_1},
{{4, 6, 8, 2}, {3, 6, 4, 2}, {0, 2}, {3, 4}, input_data_1, expected_dft2d_signal_size_results_3},
{{4, 6, 8, 2}, {5, 6, 4, 2}, {0, 2}, {5, 4}, input_data_1, expected_dft2d_signal_size_results_5},
{{4, 6, 8, 2}, {3, 7, 5, 2}, {0, 1, 2}, {3, 7, 5}, input_data_1, expected_dft3d_signal_size_results},
};
const std::vector<dft_params> DFT_params_5d = extendByOneDimension(DFT_params_4d);
const std::vector<idft_test_params> idft_params = {
const std::vector<dft_params> IDFT_params_4d = {
// Inversed to DFT with original shape cases
{{2, 10, 10, 2}, {2, 10, 10, 2}, {2}, input_data, expected_dft1d_results},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {1, 2}, input_data, expected_dft2d_results},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {0, 1, 2}, input_data, expected_dft3d_results},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {2}, input_data_1, expected_dft1d_results_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {1, 2}, input_data_1, expected_dft2d_results_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, input_data_1, expected_dft2d_signal_size_results_2},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_4},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1, 2}, input_data_1, expected_dft3d_results_1},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {2}, {}, expected_dft1d_results, input_data},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {1, 2}, {}, expected_dft2d_results, input_data},
{{2, 10, 10, 2}, {2, 10, 10, 2}, {0, 1, 2}, {}, expected_dft3d_results, input_data},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {2}, {}, expected_dft1d_results_1, input_data_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {1, 2}, {}, expected_dft2d_results_1, input_data_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, {4, 6}, expected_dft2d_signal_size_results_2, input_data_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, {4, 8}, expected_dft2d_signal_size_results_4, input_data_1},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1, 2}, {}, expected_dft3d_results_1, input_data_1},
// Other cases with original shape
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, expected_idft2d_signal_size_results_2, input_data_1, "v2"},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, expected_idft2d_signal_size_results_4, input_data_1, "v2"},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, {4, 6}, input_data_1, expected_idft2d_signal_size_results_2, "v2"},
{{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, {4, 8}, input_data_1, expected_idft2d_signal_size_results_4, "v2"},
// With changed shape
{{4, 6, 8, 2}, {4, 7, 8, 2}, {1}, expected_idft1d_signal_size_results, input_data_1},
{{4, 6, 8, 2}, {5, 6, 9, 2}, {0, 2}, expected_idft2d_signal_size_results_1, input_data_1},
{{4, 6, 8, 2}, {3, 6, 4, 2}, {0, 2}, expected_idft2d_signal_size_results_3, input_data_1},
{{4, 6, 8, 2}, {5, 6, 4, 2}, {0, 2}, expected_idft2d_signal_size_results_5, input_data_1},
{{4, 6, 8, 2}, {3, 7, 5, 2}, {0, 1, 2}, expected_idft3d_signal_size_results, input_data_1},
{{4, 6, 8, 2}, {4, 7, 8, 2}, {1}, {7}, input_data_1, expected_idft1d_signal_size_results},
{{4, 6, 8, 2}, {5, 6, 9, 2}, {0, 2}, {5, 9}, input_data_1, expected_idft2d_signal_size_results_1},
{{4, 6, 8, 2}, {3, 6, 4, 2}, {0, 2}, {3, 4}, input_data_1, expected_idft2d_signal_size_results_3},
{{4, 6, 8, 2}, {5, 6, 4, 2}, {0, 2}, {5, 4}, input_data_1, expected_idft2d_signal_size_results_5},
{{4, 6, 8, 2}, {3, 7, 5, 2}, {0, 1, 2}, {3, 7, 5}, input_data_1, expected_idft3d_signal_size_results},
};
const std::vector<dft_params> IDFT_params_5d = extendByOneDimension(IDFT_params_4d);
const std::vector<dft_params> RDFT_params_3d = {
{{2, 10, 10}, {2, 10, 6, 2}, {2}, {}, rinput_data, expected_rdft1d_results_1},
{{2, 10, 10}, {2, 10, 6, 2}, {2}, {10}, rinput_data, expected_rdft1d_results_1},
{{2, 10, 10}, {2, 10, 3, 2}, {2}, {5}, rinput_data, expected_rdft1d_results_2},
{{2, 10, 10}, {2, 10, 7, 2}, {2}, {12}, rinput_data, expected_rdft1d_results_3},
{{2, 10, 10}, {2, 10, 6, 2}, {1, 2}, {}, rinput_data, expected_rdft2d_results},
{{2, 10, 10}, {2, 10, 6, 2}, {1, 2}, {10, 10}, rinput_data, expected_rdft2d_results},
{{2, 10, 10}, {2, 5, 7, 2}, {1, 2}, {5, 12}, rinput_data, expected_rdft2d_results_2},
{{2, 10, 10}, {2, 10, 6, 2}, {0, 1, 2}, {}, rinput_data, expected_rdft3d_results},
{{2, 10, 10}, {4, 5, 7, 2}, {0, 1, 2}, {4, 5, 12}, rinput_data, expected_rdft3d_results_2},
};
const std::vector<dft_params> RDFT_params_4d = extendByOneDimension(RDFT_params_3d);
const std::vector<dft_params> IRDFT_params_4d = {
{{2, 10, 6, 2}, {2, 10, 10}, {2}, {}, expected_rdft1d_results_1, rinput_data},
{{2, 10, 6, 2}, {2, 10, 10}, {2}, {10}, expected_rdft1d_results_1, rinput_data},
{{2, 10, 3, 2}, {2, 10, 5}, {2}, {5}, expected_rdft1d_results_2, expected_irdft1d_results_2},
{{2, 10, 7, 2}, {2, 10, 4}, {2}, {4}, expected_rdft1d_results_3, expected_irdft1d_results_3},
{{2, 10, 6, 2}, {2, 10, 10}, {1, 2}, {}, expected_rdft2d_results, rinput_data},
{{2, 10, 6, 2}, {2, 10, 10}, {1, 2}, {10, 10}, expected_rdft2d_results, rinput_data},
{{2, 5, 7, 2}, {2, 5, 12}, {1, 2}, {5, 12}, expected_rdft2d_results_2, expected_irdft2d_results_2},
{{2, 10, 6, 2}, {2, 10, 10}, {0, 1, 2}, {10, 10}, expected_rdft3d_results, rinput_data},
{{2, 10, 6, 2}, {4, 5, 12}, {0, 1, 2}, {4, 5, 12}, expected_rdft3d_results, expected_irdft3d_results_2},
};
const std::vector<dft_params> IRDFT_params_5d = extendByOneDimension(IRDFT_params_4d);
const format plain_format_4d = format::bfyx;
const std::vector<format> blocked_format_4d = {
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv16,
format::bs_fs_yx_bsv32_fsv32,
};
INSTANTIATE_TEST_SUITE_P(smoke_dft_float32,
dft_gpu_test_f32,
testing::ValuesIn(dft_params),
dft_gpu_test_f32::PrintToStringParamName);
const format plain_format_5d = format::bfzyx;
const std::vector<format> blocked_format_5d = {
format::bfzyx,
format::b_fs_zyx_fsv16,
format::b_fs_zyx_fsv32,
format::bs_fs_zyx_bsv16_fsv32,
format::bs_fs_zyx_bsv16_fsv16,
format::bs_fs_zyx_bsv32_fsv32,
format::bs_fs_zyx_bsv32_fsv16,
};
INSTANTIATE_TEST_SUITE_P(smoke_dft_float16,
dft_gpu_test_f16,
testing::ValuesIn(dft_params),
dft_gpu_test_f16::PrintToStringParamName);
using dft_gpu_test_float = dft_gpu_test<float>;
using dft_gpu_test_half_t = dft_gpu_test<half_t>;
INSTANTIATE_TEST_SUITE_P(smoke_idft_float32,
idft_gpu_test_f32,
testing::ValuesIn(idft_params),
idft_gpu_test_f32::PrintToStringParamName);
TEST_P(dft_gpu_test_float, test) {
ASSERT_NO_FATAL_FAILURE(test());
}
INSTANTIATE_TEST_SUITE_P(smoke_idft_float16,
idft_gpu_test_f16,
testing::ValuesIn(idft_params),
idft_gpu_test_f16::PrintToStringParamName);
TEST_P(dft_gpu_test_half_t, test) {
ASSERT_NO_FATAL_FAILURE(test());
}
#define INSTANTIATE_DFT_TEST_SUITE(dftType, dimension, inputType) \
INSTANTIATE_TEST_SUITE_P(smoke_##dftType##_##dimension##_##inputType, \
dft_gpu_test_##inputType, \
testing::Combine(testing::Values(plain_format_##dimension), \
testing::ValuesIn(blocked_format_##dimension), \
testing::Values(dftType), \
testing::ValuesIn(dftType##_params_##dimension)), \
dft_gpu_test_##inputType::PrintToStringParamName);
#define INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(dftType, dimension) \
INSTANTIATE_DFT_TEST_SUITE(dftType, dimension, float) \
INSTANTIATE_DFT_TEST_SUITE(dftType, dimension, half_t)
INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(DFT, 4d)
INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(DFT, 5d)
INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(IDFT, 4d)
INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(IDFT, 5d)
INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(RDFT, 4d)
// We don't have 5d tests for RDFT, as it doesn't support 5d blocked input, because output would need to be 6d blocked
INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(IRDFT, 4d)
INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(IRDFT, 5d)
} // namespace

View File

@@ -8,15 +8,21 @@
namespace {
const std::vector<ngraph::helpers::DFTOpType> opTypes = {ngraph::helpers::DFTOpType::FORWARD,
ngraph::helpers::DFTOpType::INVERSE};
const std::vector<InferenceEngine::Precision> inputPrecision = {InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16};
const std::vector<ngraph::helpers::DFTOpType> opTypes = {
ngraph::helpers::DFTOpType::FORWARD,
ngraph::helpers::DFTOpType::INVERSE,
};
const std::vector<InferenceEngine::Precision> inputPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
};
const auto combine = [](const std::vector<InferenceEngine::SizeVector>& inputShapes,
const std::vector<std::vector<int64_t>>& axes,
const std::vector<std::vector<int64_t>>& signalSizes) {
return testing::Combine(testing::ValuesIn(inputShapes),
testing::ValuesIn(inputPrecision),
testing::ValuesIn(inputPrecisions),
testing::ValuesIn(axes),
testing::ValuesIn(signalSizes),
testing::ValuesIn(opTypes),
@@ -27,39 +33,58 @@ using namespace LayerTestsDefinitions;
INSTANTIATE_TEST_SUITE_P(smoke_DFT_2d,
DFTLayerTest,
combine({{10, 2}, {1, 2}}, // input shapes
{{0}, {-1}}, // axes
{{}, {5}}), // signal sizes
combine({{10, 2}}, // input shapes
{{0}}, // axes
{{}, {3}}), // signal sizes
DFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DFT_3d,
DFTLayerTest,
combine({{10, 4, 2}, {1, 17, 2}}, // input shapes
{{0, 1}, {-1, -2}}, // axes
{{}, {5, 2}}), // signal sizes
combine({{10, 4, 2}}, // input shapes
{{0, 1}}, // axes
{{}, {3, 10}}), // signal sizes
DFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DFT_4d,
DFTLayerTest,
combine({{10, 4, 8, 2}, {1, 17, 12, 2}}, // input shapes
{{0, 1, 2}, {-1, -2, -3}}, // axes
{{}, {5, 2, 5}}), // signal sizes
combine({{10, 4, 8, 2}}, // input shapes
{{0, 1, 2}}, // axes
{{}, {3, 10, 8}}), // signal sizes
DFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DFT_4d_negative_reversed_axes,
DFTLayerTest,
combine({{10, 4, 8, 2}}, // input shapes
{{-1, -2, -3}}, // axes
{{}, {8, 10, 3}}), // signal sizes
DFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DFT_4d_single_axis,
DFTLayerTest,
combine({{10, 4, 8, 2}}, // input shapes
{{0}, {1}, {2}}, // axes
{{}, {1}, {5}, {20}}), // signal sizes
DFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DFT_5d,
DFTLayerTest,
combine({{10, 4, 8, 2, 2}, {1, 17, 12, 1, 2}}, // input shapes
{{0, 1, 2, 3}, {-1, -2, -3, -4}}, // axes
{{}, {5, 2, 5, 20}}), // signal sizes
combine({{10, 4, 8, 2, 2}}, // input shapes
{{0, 1, 2, 3}}, // axes
{{}, {3, 10, 8, 6}}), // signal sizes
DFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DFT_6d,
DFTLayerTest,
combine({{10, 4, 8, 2, 5, 2}, {1, 17, 12, 1, 7, 2}}, // input shapes
{{0, 1, 2, 3, 4}, {-1, -2, -3, -4, -5}}, // axes
{{}, {5, 2, 5, 20, 10}}), // signal sizes
combine({{10, 4, 8, 2, 5, 2}}, // input shapes
{{0, 1, 2, 3, 4}}, // axes
{{}, {3, 10, 8, 6, 2}}), // signal sizes
DFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DFT_6d_zero,
DFTLayerTest,
combine({{10, 4, 8, 2, 5, 2}, {1, 17, 12, 1, 7, 2}}, // input shapes
{{}}, // axes
{{}}), // signal sizes
combine({{10, 4, 8, 2, 5, 2}}, // input shapes
{{}}, // axes
{{}}), // signal sizes
DFTLayerTest::getTestCaseName);
} // namespace

View File

@@ -0,0 +1,102 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <common_test_utils/test_constants.hpp>
#include <single_layer_tests/rdft.hpp>
#include <vector>
namespace {
const std::vector<ngraph::helpers::DFTOpType> opTypes = {
ngraph::helpers::DFTOpType::FORWARD,
ngraph::helpers::DFTOpType::INVERSE,
};
const std::vector<InferenceEngine::Precision> inputPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
};
const auto combine = [](const std::vector<InferenceEngine::SizeVector>& inputShapes,
const std::vector<std::vector<int64_t>>& axes,
const std::vector<std::vector<int64_t>>& signalSizes) {
return testing::Combine(testing::ValuesIn(inputShapes),
testing::ValuesIn(inputPrecisions),
testing::ValuesIn(axes),
testing::ValuesIn(signalSizes),
testing::ValuesIn(opTypes),
testing::Values(CommonTestUtils::DEVICE_GPU));
};
using namespace LayerTestsDefinitions;
// RDFT can support 1d
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_1d,
RDFTLayerTest,
testing::Combine(testing::Values(InferenceEngine::SizeVector{10}),
testing::ValuesIn(inputPrecisions),
testing::Values(std::vector<int64_t>{0}),
testing::Values(std::vector<int64_t>{}),
testing::Values(ngraph::helpers::DFTOpType::FORWARD),
testing::Values(CommonTestUtils::DEVICE_GPU)),
RDFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_3d,
RDFTLayerTest,
combine({{10, 4, 2}}, // input shapes
{{0, 1}}, // axes
{{}, {3, 10}}), // signal sizes
RDFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d,
RDFTLayerTest,
combine({{10, 4, 8, 2}}, // input shapes
{{0, 1, 2}}, // axes
{{}, {3, 10, 8}}), // signal sizes
RDFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d_negative_reversed_axes,
RDFTLayerTest,
combine({{10, 4, 8, 2}}, // input shapes
{{-1, -2, -3}}, // axes
{{}, {8, 10, 3}}), // signal sizes
RDFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d_single_axis,
RDFTLayerTest,
combine({{10, 4, 8, 2}}, // input shapes
{{0}, {1}, {2}}, // axes
{{}, {1}, {5}, {20}}), // signal sizes
RDFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_5d,
RDFTLayerTest,
combine({{10, 4, 8, 2, 2}}, // input shapes
{{0, 1, 2, 3}}, // axes
{{}, {3, 10, 8, 6}}), // signal sizes
RDFTLayerTest::getTestCaseName);
// RDFT can support last axis
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_5d_last_axis,
RDFTLayerTest,
testing::Combine(testing::Values(InferenceEngine::SizeVector{10, 4, 8, 2, 5}),
testing::ValuesIn(inputPrecisions),
testing::ValuesIn(std::vector<std::vector<int64_t>>{{{0, 1, 2, 3, 4}}}),
testing::ValuesIn(std::vector<std::vector<int64_t>>{{}, {3, 10, 8, 6, 2}}),
testing::Values(ngraph::helpers::DFTOpType::FORWARD),
testing::Values(CommonTestUtils::DEVICE_GPU)),
RDFTLayerTest::getTestCaseName);
// IRDFT can support 6d
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_6d,
RDFTLayerTest,
testing::Combine(testing::Values(InferenceEngine::SizeVector{10, 4, 8, 2, 5, 2}),
testing::ValuesIn(inputPrecisions),
testing::ValuesIn(std::vector<std::vector<int64_t>>{{{0, 1, 2, 3, 4}}}),
testing::ValuesIn(std::vector<std::vector<int64_t>>{{}, {3, 10, 8, 6, 2}}),
testing::Values(ngraph::helpers::DFTOpType::INVERSE),
testing::Values(CommonTestUtils::DEVICE_GPU)),
RDFTLayerTest::getTestCaseName);
} // namespace