From 4946f6d87b606091d3decb46d13fc0fa2d765c1d Mon Sep 17 00:00:00 2001 From: Mykhailo Hnap Date: Tue, 4 Oct 2022 14:26:50 +0300 Subject: [PATCH] [GPU] Implement (I)RDFT-9 (#12010) * [GPU] Implement (I)RDFT-9 * Added GPU implementation of RDFT operation * Added basic GPU implementation of IRDFT operation, but still needs adjusting * Added single layer tests for (I)RDFT * Added unit tests for (I)RDFT * Added blocked layout support for all DFTs operation * [GPU] Added changes after review. * [GPU] Added single-layer test for single axis case for DFTs. * [GPU] Simplified attach_dft_impl method. * [GPU] Moved negative axes cases to separate test for DFTs. * [GPU] Adjusted tests to cover all signal sizes cases (less, equal and more than input size) for DFTs. * [GPU] Fixed IRDFT implementation. * [GPU] Used RDFTLayerTest. * [GPU] Fixes after rebase. * [GPU] Renamed kind enum to direction. --- .../intel_gpu/plugin/primitives_list.hpp | 2 + .../include/intel_gpu/primitives/dft.hpp | 29 +- src/plugins/intel_gpu/src/graph/dft.cpp | 15 +- .../intel_gpu/src/graph/impls/ocl/dft.cpp | 61 +- .../intel_gpu/src/graph/include/dft_inst.h | 4 +- src/plugins/intel_gpu/src/graph/program.cpp | 3 + .../actual_kernels/dft/dft_kernel_ref.cpp | 171 +++-- .../core/actual_kernels/dft/dft_kernel_ref.h | 10 +- .../core/cl_kernels/dft_ref.cl | 236 ++++-- src/plugins/intel_gpu/src/plugin/ops/dft.cpp | 41 +- .../tests/test_cases/dft_gpu_test.cpp | 682 ++++++++++++++---- .../single_layer_tests/dft.cpp | 71 +- .../single_layer_tests/rdft.cpp | 102 +++ 13 files changed, 1139 insertions(+), 288 deletions(-) create mode 100644 src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/rdft.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 68508d84700..93f704c20c1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -238,6 +238,8 @@ REGISTER_FACTORY(v8, Softmax); // ------------------------------ Supported v9 ops ------------------------------ // REGISTER_FACTORY(v9, SoftSign) REGISTER_FACTORY(v9, ROIAlign); +REGISTER_FACTORY(v9, RDFT); +REGISTER_FACTORY(v9, IRDFT); // --------------------------- Supported internal ops --------------------------- // REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp index 88d3c542411..5e16b838266 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "primitive.hpp" @@ -16,12 +17,18 @@ namespace cldnn { /// @addtogroup cpp_primitives Primitives /// @{ -/// @brief Kind of DFT operation. -enum class dft_kind { +/// @brief Direction of DFT operation. +enum class dft_direction { forward, inverse, }; +/// @brief Mode of DFT operation. +enum class dft_mode { + complex, + real, +}; + /// @brief DFT primitive. struct dft : public primitive_base { CLDNN_DECLARE_PRIMITIVE(dft) @@ -30,22 +37,30 @@ struct dft : public primitive_base { /// @param id This primitive id. /// @param input Input primitive id. /// @param axes Axes to perform DFT. + /// @param signal_size Signal sizes for 'axes'. /// @param output_shape Output shape. - /// @param kind Kind of DFT operation. + /// @param direction Direction of DFT operation. + /// @param mode Mode of DFT operation. dft(const primitive_id& id, const primitive_id& input, - std::vector&& axes, + std::vector axes, + std::vector signal_size, const ov::Shape& output_shape, - dft_kind kind, + dft_direction direction, + dft_mode mode, const padding& output_padding = {}) : primitive_base(id, {input}, output_padding), axes(std::move(axes)), + signal_size(std::move(signal_size)), output_shape(output_shape), - kind(kind) {} + direction(direction), + mode(mode) {} std::vector axes; + std::vector signal_size; ov::Shape output_shape; - dft_kind kind; + dft_direction direction; + dft_mode mode; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/dft.cpp b/src/plugins/intel_gpu/src/graph/dft.cpp index cb0f9eafd8a..252f7c5a19d 100644 --- a/src/plugins/intel_gpu/src/graph/dft.cpp +++ b/src/plugins/intel_gpu/src/graph/dft.cpp @@ -14,22 +14,27 @@ primitive_type_id dft::type_id() { return &instance; } -layout typed_primitive_inst::calc_output_layout(const dft_node& node, kernel_impl_params const& impl_param) { - auto primitive = impl_param.typed_desc(); - auto input_layout = impl_param.get_input_layout(); +layout dft_inst::calc_output_layout(const dft_node& node, const kernel_impl_params& impl_param) { + const auto primitive = impl_param.typed_desc(); + const auto input_layout = impl_param.get_input_layout(); std::vector dims_converted(primitive->output_shape.begin(), primitive->output_shape.end()); - auto output_format = input_layout.format; + + // Extend output layout for IRDFT case to make output rank match input rank + if (primitive->direction == dft_direction::inverse && primitive->mode == dft_mode::real) { + dims_converted.push_back(1); + } // Extend shape to 4d by pushing ones before the last dim for (auto i = dims_converted.size(); i < 4; ++i) { dims_converted.insert(std::prev(dims_converted.end()), 1); } + const auto output_format = format::adjust_to_rank(input_layout.format, dims_converted.size()); return {input_layout.data_type, output_format, tensor(output_format, dims_converted)}; } -std::string typed_primitive_inst::to_string(const dft_node& node) { +std::string dft_inst::to_string(const dft_node& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); std::ostringstream os; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp index ae724d4c672..619f405f14f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp @@ -24,11 +24,35 @@ struct dft_impl : typed_primitive_impl_ocl { static primitive_impl* create(const dft_node& arg, const kernel_impl_params& impl_param) { auto params = get_default_params(impl_param); - auto primitive = arg.get_primitive(); + const auto primitive = impl_param.typed_desc(); params.axes = primitive->axes; - if (primitive->kind == dft_kind::inverse) { - params.kind = kernel_selector::dft_params::inverse; + + if (primitive->signal_size.empty()) { + params.signal_size = std::vector(params.axes.size(), -1); + } else { + params.signal_size = primitive->signal_size; } + + if (primitive->direction == dft_direction::inverse) { + params.direction = kernel_selector::dft_params::Direction::inverse; + } + if (primitive->mode == dft_mode::real) { + params.mode = kernel_selector::dft_params::Mode::real; + } + + // Extend input layout for RDFT case to make input rank match output rank + if (primitive->direction == dft_direction::forward && primitive->mode == dft_mode::real) { + const auto input_layout = impl_param.get_input_layout(); + const auto output_layout = impl_param.output_layout; + // No need to extend layout for input that has less than 4 dimensions + if (input_layout.get_rank() != output_layout.get_rank()) { + auto new_dims = input_layout.get_dims(); + new_dims.push_back(1); + const auto new_fmt = format::adjust_to_rank(input_layout.format, new_dims.size()); + params.inputs[0] = convert_data_tensor({input_layout.data_type, new_fmt, tensor(new_fmt, new_dims)}); + } + } + auto optional_params = get_default_optional_params(arg.get_program()); auto& kernel_selector = kernel_selector::dft_kernel_selector::Instance(); @@ -46,16 +70,27 @@ struct dft_impl : typed_primitive_impl_ocl { namespace detail { attach_dft_impl::attach_dft_impl() { - implementation_map::add(impl_types::ocl, - dft_impl::create, - { - std::make_tuple(data_types::f16, format::bfyx), - std::make_tuple(data_types::f16, format::bfzyx), - std::make_tuple(data_types::f16, format::bfwzyx), - std::make_tuple(data_types::f32, format::bfyx), - std::make_tuple(data_types::f32, format::bfzyx), - std::make_tuple(data_types::f32, format::bfwzyx), - }); + auto types = {data_types::f16, data_types::f32}; + auto formats = { + // 4d + format::bfyx, + format::b_fs_yx_fsv16, + format::b_fs_yx_fsv32, + format::bs_fs_yx_bsv16_fsv16, + format::bs_fs_yx_bsv32_fsv32, + format::bs_fs_yx_bsv32_fsv16, + // 5d + format::bfzyx, + format::b_fs_zyx_fsv16, + format::b_fs_zyx_fsv32, + format::bs_fs_zyx_bsv16_fsv32, + format::bs_fs_zyx_bsv16_fsv16, + format::bs_fs_zyx_bsv32_fsv32, + format::bs_fs_zyx_bsv32_fsv16, + // 6d + format::bfwzyx, + }; + implementation_map::add(impl_types::ocl, dft_impl::create, types, formats); } } // namespace detail diff --git a/src/plugins/intel_gpu/src/graph/include/dft_inst.h b/src/plugins/intel_gpu/src/graph/include/dft_inst.h index 8cbc0f070bd..86ae5392036 100644 --- a/src/plugins/intel_gpu/src/graph/include/dft_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/dft_inst.h @@ -26,8 +26,10 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: using typed_primitive_inst_base::typed_primitive_inst_base; - static layout calc_output_layout(const dft_node& node, kernel_impl_params const& impl_param); + static layout calc_output_layout(const dft_node& node, const kernel_impl_params& impl_param); static std::string to_string(const dft_node& node); }; +using dft_inst = typed_primitive_inst; + } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index f997bd1c7be..3230269603b 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -41,6 +41,7 @@ #include "input_layout_inst.h" #include "shuffle_channels_inst.h" #include "arg_max_min_inst.h" +#include "dft_inst.h" #include "lstm_inst.h" #include "lstm_elt_inst.h" #include "lstm_gemm_inst.h" @@ -1414,6 +1415,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.as().input().get_output_layout().data_type != data_types::i8) || prim.as().get_primitive()->across_channels) && prim.type() != cldnn::arg_max_min::type_id() && + prim.type() != cldnn::dft::type_id() && prim.type() != cldnn::mutable_data::type_id() && prim.type() != cldnn::reduce::type_id() && prim.type() != cldnn::strided_slice::type_id() && @@ -1449,6 +1451,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::reshape::type_id() && prim.type() != cldnn::input_layout::type_id() && prim.type() != cldnn::activation::type_id() && + prim.type() != cldnn::dft::type_id() && prim.type() != cldnn::softmax::type_id() && prim.type() != cldnn::fully_connected::type_id() && prim.type() != cldnn::generic_layer::type_id() && diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.cpp index e2f02e638a2..7a821bb19ba 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.cpp @@ -6,6 +6,9 @@ #include +#include +#include + namespace kernel_selector { namespace { @@ -15,30 +18,32 @@ CommonDispatchData SetDefault(const dft_params& params) { const auto in_layout = params.inputs.front().GetLayout(); const auto& output = params.outputs.front(); const auto out_layout = output.GetLayout(); + const auto out_rank = output.Dimentions(); + std::vector> dims_by_gws; // We are skipping X, since it contains complex pairs and always has dimension 2 - switch (out_layout) { - case DataLayout::bfyx: + switch (out_rank) { + case 4: dispatch_data.gws = {output.Y().v, output.Feature().v, output.Batch().v}; dims_by_gws = {{Tensor::DataChannelName::Y}, {Tensor::DataChannelName::FEATURE}, {Tensor::DataChannelName::BATCH}}; break; - case DataLayout::bfzyx: + case 5: dispatch_data.gws = {output.Y().v, output.Z().v, output.Feature().v * output.Batch().v}; dims_by_gws = {{Tensor::DataChannelName::Y}, {Tensor::DataChannelName::Z}, {Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}}; break; - case DataLayout::bfwzyx: + case 6: dispatch_data.gws = {output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v}; dims_by_gws = {{Tensor::DataChannelName::Y}, {Tensor::DataChannelName::Z, Tensor::DataChannelName::W}, {Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}}; break; default: - throw std::invalid_argument("Unsupported data layout for dft primitive"); + throw std::invalid_argument("Unsupported output rank for dft primitive"); } dispatch_data.lws = @@ -48,8 +53,7 @@ CommonDispatchData SetDefault(const dft_params& params) { } template -void MakeJitConstForAxis(JitConstants& jit, const DataLayout& layout, int64_t index, T value) { - std::string name = "AXIS"; +void MakeJitConstForParam(JitConstants& jit, const std::string& name, size_t rank, int64_t index, T value) { switch (index) { case 0: jit.AddConstant(MakeJitConstant(name + "_BATCH", value)); @@ -58,18 +62,18 @@ void MakeJitConstForAxis(JitConstants& jit, const DataLayout& layout, int64_t in jit.AddConstant(MakeJitConstant(name + "_FEATURE", value)); break; case 2: - if (layout == DataLayout::bfwzyx) { + if (rank == 6) { jit.AddConstant(MakeJitConstant(name + "_W", value)); - } else if (layout == DataLayout::bfzyx) { + } else if (rank == 5) { jit.AddConstant(MakeJitConstant(name + "_Z", value)); - } else { // DataLayout::bfyx + } else { // rank == 4 jit.AddConstant(MakeJitConstant(name + "_Y", value)); } break; case 3: - if (layout == DataLayout::bfwzyx) { + if (rank == 6) { jit.AddConstant(MakeJitConstant(name + "_Z", value)); - } else { // DataLayout::bfzyx + } else { // rank == 5 jit.AddConstant(MakeJitConstant(name + "_Y", value)); } break; @@ -77,31 +81,94 @@ void MakeJitConstForAxis(JitConstants& jit, const DataLayout& layout, int64_t in jit.AddConstant(MakeJitConstant(name + "_Y", value)); break; default: - throw std::invalid_argument("Unsupported axis for dft primitive"); + throw std::invalid_argument("Unsupported index for dft primitive"); } } } // namespace KernelsData DFTKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { - KernelsData kernels_data; if (!Validate(params, options)) { - return kernels_data; + return {}; } - kernels_data.push_back(KernelData::Default(params)); - KernelData& kernel_data = kernels_data.front(); - auto& derived_params = dynamic_cast(*kernel_data.params.get()); - auto dispatch_data = SetDefault(derived_params); - auto entry_point = GetEntryPoint(kernelName, derived_params.layerID, params, options); - auto jit_constants = GetJitConstants(derived_params); - auto jit = CreateJit(kernelName, jit_constants, entry_point); - auto& clKernelData = kernel_data.kernels[0]; - FillCLKernelData(clKernelData, dispatch_data, params.engineInfo, kernelName, jit, entry_point); - return kernels_data; -} -KernelsPriority DFTKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return DONT_USE_IF_HAVE_SOMETHING_ELSE; + KernelData kd = KernelData::Default(params); + const auto& derived_params = dynamic_cast(params); + + // For IRDFT case we create two kernels with different data + // First, do IDFT on outer axes and input data + // Second, do IRDFT on the last axis and data from the first kernel + if (derived_params.mode == dft_params::Mode::real && derived_params.direction == dft_params::Direction::inverse && + derived_params.axes.size() > 1) { + // Helper vector + std::vector> kernels_params; + + // Fill IDFT kernel data + auto idft_params = derived_params; + idft_params.mode = dft_params::Mode::complex; + idft_params.axes.pop_back(); + idft_params.signal_size.pop_back(); + const cldnn::arguments_desc idft_arguments{{ArgumentDescriptor::Types::INPUT, 0}, + {ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}}; + + auto& idft_input = idft_params.inputs.front(); + auto idft_input_sizes = idft_input.LogicalDims(); + // NOTE: This is a small workaround for a 3d case + // We always should have first dimension equal to 2, so we swap it with the second dimension + if (idft_input_sizes[0] == 1) { + std::swap(idft_input_sizes[0], idft_input_sizes[1]); + idft_input = DataTensor(idft_input_sizes, idft_input.GetDType(), idft_input.GetLayout()); + } + + // Calculate IDFT output sizes + auto idft_output_sizes = idft_input_sizes; + auto& idft_output = idft_params.outputs.front(); + for (const auto& axis : idft_params.axes) { + auto inverted_axis = idft_output_sizes.size() - 1 - axis; + idft_output_sizes[inverted_axis] = idft_output.LogicalDims()[inverted_axis]; + } + idft_output = DataTensor(idft_output_sizes, idft_input.GetDType(), idft_input.GetLayout()); + + // Set internal buffer + kd.internalBufferDataType = idft_input.GetDType(); + kd.internalBufferSizes.push_back(idft_output.PhysicalSizeInBytes()); + + // Fill IRDFT kernel data + auto irdft_params = derived_params; + irdft_params.inputs.front() = idft_output; + irdft_params.axes = {derived_params.axes.back()}; + irdft_params.signal_size = {derived_params.signal_size.back()}; + const cldnn::arguments_desc irdft_arguments{{ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}, + {ArgumentDescriptor::Types::OUTPUT, 0}}; + + // Fill kernels + kernels_params.emplace_back(idft_params, idft_arguments); + kernels_params.emplace_back(irdft_params, irdft_arguments); + const auto kKernelsNum = kernels_params.size(); + kd.kernels.resize(kKernelsNum); + for (size_t i = 0; i < kKernelsNum; ++i) { + dft_params kernel_params; + cldnn::arguments_desc kernel_arguments; + std::tie(kernel_params, kernel_arguments) = kernels_params[i]; + + const auto dispatch_data = SetDefault(kernel_params); + const auto entry_point = GetEntryPoint(kernelName, kernel_params.layerID, params, options, i); + const auto jit_constants = GetJitConstants(kernel_params); + const auto jit = CreateJit(kernelName, jit_constants, entry_point); + auto& clKernelData = kd.kernels[i]; + FillCLKernelData(clKernelData, dispatch_data, kernel_params.engineInfo, kernelName, jit, entry_point); + clKernelData.params.arguments = kernel_arguments; + } + } else { + const auto dispatch_data = SetDefault(derived_params); + const auto entry_point = GetEntryPoint(kernelName, derived_params.layerID, derived_params, options); + const auto jit_constants = GetJitConstants(derived_params); + const auto jit = CreateJit(kernelName, jit_constants, entry_point); + auto& clKernelData = kd.kernels[0]; + FillCLKernelData(clKernelData, dispatch_data, derived_params.engineInfo, kernelName, jit, entry_point); + } + + return {kd}; } ParamsKey DFTKernelRef::GetSupportedKey() const { @@ -110,12 +177,8 @@ ParamsKey DFTKernelRef::GetSupportedKey() const { k.EnableInputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); - k.EnableInputLayout(DataLayout::bfyx); - k.EnableInputLayout(DataLayout::bfzyx); - k.EnableInputLayout(DataLayout::bfwzyx); - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::bfzyx); - k.EnableOutputLayout(DataLayout::bfwzyx); + k.EnableAllInputLayout(); + k.EnableAllOutputLayout(); k.EnableBatching(); k.EnableTensorOffset(); k.EnableTensorPitches(); @@ -137,23 +200,49 @@ bool DFTKernelRef::Validate(const Params& p, const optional_params& o) const { JitConstants DFTKernelRef::GetJitConstants(const dft_params& params) const { auto jit = MakeBaseParamsJitConstants(params); - const auto out_layout = params.outputs.front().GetLayout(); + const auto out_rank = params.outputs.front().Dimentions(); const auto out_sizes = params.outputs.front().LogicalDims(); const auto in_sizes = params.inputs.front().LogicalDims(); - - // We are skipping X, since it contains complex pairs and should not be in axes const auto dims_size = in_sizes.size() - 1; + auto signal_sizes = out_sizes; size_t s = 1; - for (auto axis : params.axes) { + for (size_t i = 0; i < params.axes.size(); ++i) { // opencl kernels have inverted order of dimensions with respect to axis spec: x is smallest index, b is largest + auto axis = params.axes[i]; auto inverted_axis = dims_size - axis; - s *= out_sizes[inverted_axis]; - MakeJitConstForAxis(jit, out_layout, axis, std::min(out_sizes[inverted_axis], in_sizes[inverted_axis])); + auto signal_size = params.signal_size[i]; + + // For RDFT case, we need to take signal size into account, as output size can be not the same as signal size + if (params.mode == dft_params::Mode::real && params.direction == dft_params::Direction::forward) { + if (signal_size != -1) { + signal_sizes[inverted_axis] = signal_size; + } else { + signal_sizes[inverted_axis] = in_sizes[inverted_axis]; + } + } + + s *= signal_sizes[inverted_axis]; + + // NOTE: We can use full signal size as axis value, but this doesn't make much sense, as it will be zero-padded + // So, we take minimum size here and save some dummy cycles in kernel + auto axis_value = std::min(signal_sizes[inverted_axis], in_sizes[inverted_axis]); + + // For IRDFT case, we should use full signal size as axis value and interpret input data as Hermitian-symmetric + if (params.mode == dft_params::Mode::real && params.direction == dft_params::Direction::inverse) { + axis_value = signal_sizes[inverted_axis]; + MakeJitConstForParam(jit, "SYMMETRIC_AXIS", out_rank, axis, true); + } + + MakeJitConstForParam(jit, "AXIS", out_rank, axis, axis_value); + MakeJitConstForParam(jit, "SIGNAL_SIZE", out_rank, axis, signal_sizes[inverted_axis]); } - if (params.kind == dft_params::inverse) { + if (params.direction == dft_params::Direction::inverse) { jit.AddConstant(MakeJitConstant("INVERSE_DFT_MULTIPLIER", 1.f / s)); } + if (params.mode == dft_params::Mode::real) { + jit.AddConstant(MakeJitConstant("REAL_DFT", true)); + } return jit; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.h index 05013d575c9..268b266d453 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/dft/dft_kernel_ref.h @@ -10,10 +10,15 @@ namespace kernel_selector { struct dft_params : public base_params { std::vector axes; - enum Kind { + std::vector signal_size; + enum class Direction { forward, inverse, - } kind = forward; + } direction = Direction::forward; + enum class Mode { + complex, + real, + } mode = Mode::complex; dft_params() : base_params{KernelType::DFT} {} }; @@ -23,7 +28,6 @@ struct dft_optional_params : optional_params { class DFTKernelRef : public KernelBaseOpenCL { KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; bool Validate(const Params& p, const optional_params& o) const override; JitConstants GetJitConstants(const dft_params& params) const; diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/dft_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/dft_ref.cl index 5eb0170ad09..b0748963998 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/dft_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/dft_ref.cl @@ -6,16 +6,15 @@ // alternative: https://github.com/OpenCL/ComplexMath/blob/master/clcomplex.h typedef float2 cfloat; -#define real(a) ((a).s0) -#define imag(a) ((a).s1) -#define cmult(a, b) ((cfloat)(real(a) * real(b) - imag(a) * imag(b), real(a) * imag(b) + imag(a) * real(b))) -#define crmult(a, b) ((cfloat)(real(a) * (b), imag(a) * (b))) -#define cadd(a, b) ((cfloat)(real(a) + real(b), imag(a) + imag(b))) -#define expi(x) ((cfloat)(cos(x), sin(x))) -#define expmi(x) ((cfloat)(cos(x), -sin(x))) -#define cload(p, offset, pitch) ((cfloat)((p)[offset], (p)[(offset) + (pitch)])) -#define cstore(p, offset, pitch, x) ((p)[offset] = real(x), (p)[(offset) + (pitch)] = imag(x)) -#define czero() ((cfloat)(0)) +#define real(a) ((a).s0) +#define imag(a) ((a).s1) +#define cmult(a, b) ((cfloat)(real(a) * real(b) - imag(a) * imag(b), real(a) * imag(b) + imag(a) * real(b))) +#define crmult(a, b) ((cfloat)(real(a) * (b), imag(a) * (b))) +#define cadd(a, b) ((cfloat)(real(a) + real(b), imag(a) + imag(b))) +#define expi(x) ((cfloat)(cos(x), sin(x))) +#define expmi(x) ((cfloat)(cos(x), -sin(x))) +#define conj(x) ((cfloat)(real(x), -imag(x))) +#define czero() ((cfloat)(0)) // TODO: pregenerate e{r,i} array on host in macro. maybe it could be done with kernel which runs once? KERNEL(dft_ref)(const __global INPUT0_TYPE* data, __global OUTPUT_TYPE* output) { @@ -23,105 +22,218 @@ KERNEL(dft_ref)(const __global INPUT0_TYPE* data, __global OUTPUT_TYPE* output) const uint dim1 = get_global_id(1); const uint dim2 = get_global_id(2); - const uint x = 0; - const uint y = dim0; + uint y = dim0; #if OUTPUT_DIMS == 4 -# define ORDER b, f, y, x -# define ORDER_K kb, kf, ky, x - const uint f = dim1; - const uint b = dim2; +# define ORDER_REAL b, f, y, 0 +# define ORDER_IMAG b, f, y, 1 + uint f = dim1; + uint b = dim2; #elif OUTPUT_DIMS == 5 -# define ORDER b, f, z, y, x -# define ORDER_K kb, kf, kz, ky, x - const uint z = dim1; - const uint f = dim2 % OUTPUT_FEATURE_NUM; - const uint b = dim2 / OUTPUT_FEATURE_NUM; +# define ORDER_REAL b, f, z, y, 0 +# define ORDER_IMAG b, f, z, y, 1 + uint z = dim1; + uint f = dim2 % OUTPUT_FEATURE_NUM; + uint b = dim2 / OUTPUT_FEATURE_NUM; #elif OUTPUT_DIMS == 6 -# define ORDER b, f, w, z, y, x -# define ORDER_K kb, kf, kw, kz, ky, x - const uint z = dim1 % OUTPUT_SIZE_Z; - const uint w = dim1 / OUTPUT_SIZE_Z; - const uint f = dim2 % OUTPUT_FEATURE_NUM; - const uint b = dim2 / OUTPUT_FEATURE_NUM; +# define ORDER_REAL b, f, w, z, y, 0 +# define ORDER_IMAG b, f, w, z, y, 1 + uint z = dim1 % OUTPUT_SIZE_Z; + uint w = dim1 / OUTPUT_SIZE_Z; + uint f = dim2 % OUTPUT_FEATURE_NUM; + uint b = dim2 / OUTPUT_FEATURE_NUM; +#endif + + const uint output_real_index = GET_INDEX(OUTPUT, ORDER_REAL); +#if !defined(REAL_DFT) || !defined(INVERSE_DFT_MULTIPLIER) + const uint output_imag_index = GET_INDEX(OUTPUT, ORDER_IMAG); #endif // TODO: use OUTPUT_TYPE for intermediate calculations? // We don't use it for now as we will lose a lot of precision for f16 and tests won't pass +#if defined(REAL_DFT) && defined(INVERSE_DFT_MULTIPLIER) + float Y = 0; +#else cfloat Y = czero(); +#endif const float PI2 = M_PI_F * 2; #ifdef AXIS_Y - const float ay = PI2 * y / OUTPUT_SIZE_Y; + const float ay = PI2 * y / SIGNAL_SIZE_Y; #endif #ifdef AXIS_Z - const float az = PI2 * z / OUTPUT_SIZE_Z; + const float az = PI2 * z / SIGNAL_SIZE_Z; #endif #ifdef AXIS_W - const float aw = PI2 * w / OUTPUT_SIZE_W; + const float aw = PI2 * w / SIGNAL_SIZE_W; #endif #ifdef AXIS_FEATURE - const float af = PI2 * f / OUTPUT_FEATURE_NUM; + const float af = PI2 * f / SIGNAL_SIZE_FEATURE; #endif #ifdef AXIS_BATCH - const float ab = PI2 * b / OUTPUT_BATCH_NUM; + const float ab = PI2 * b / SIGNAL_SIZE_BATCH; #endif #ifdef AXIS_BATCH - for (uint kb = 0; kb < AXIS_BATCH; ++kb) -#else -# define kb b + for (b = 0; b < AXIS_BATCH; ++b) #endif #ifdef AXIS_FEATURE - for (uint kf = 0; kf < AXIS_FEATURE; ++kf) -#else -# define kf f + for (f = 0; f < AXIS_FEATURE; ++f) #endif #ifdef AXIS_W - for (uint kw = 0; kw < AXIS_W; ++kw) -#else -# define kw w + for (w = 0; w < AXIS_W; ++w) #endif #ifdef AXIS_Z - for (uint kz = 0; kz < AXIS_Z; ++kz) -#else -# define kz z + for (z = 0; z < AXIS_Z; ++z) #endif #ifdef AXIS_Y - for (uint ky = 0; ky < AXIS_Y; ++ky) -#else -# define ky y + for (y = 0; y < AXIS_Y; ++y) #endif { float a = 0; #ifdef AXIS_Y - a += ay * ky; + a += ay * y; #endif #ifdef AXIS_Z - a += az * kz; + a += az * z; #endif #ifdef AXIS_W - a += aw * kw; + a += aw * w; #endif #ifdef AXIS_FEATURE - a += af * kf; + a += af * f; #endif #ifdef AXIS_BATCH - a += ab * kb; + a += ab * b; #endif - const cfloat X = cload(data, GET_INDEX(INPUT0, ORDER_K), INPUT0_X_PITCH); -#ifdef INVERSE_DFT_MULTIPLIER - const cfloat E = expi(a); + +#ifdef REAL_DFT +# ifdef INVERSE_DFT_MULTIPLIER +# if OUTPUT_DIMS == 4 +# define SYMMETRIC_ORDER_REAL sb, sf, sy, 0 +# define SYMMETRIC_ORDER_IMAG sb, sf, sy, 1 +# elif OUTPUT_DIMS == 5 +# define SYMMETRIC_ORDER_REAL sb, sf, sz, sy, 0 +# define SYMMETRIC_ORDER_IMAG sb, sf, sz, sy, 1 +# elif OUTPUT_DIMS == 6 +# define SYMMETRIC_ORDER_REAL sb, sf, sw, sz, sy, 0 +# define SYMMETRIC_ORDER_IMAG sb, sf, sw, sz, sy, 1 +# endif + bool is_zero = false; + bool is_conj = false; +# ifdef SYMMETRIC_AXIS_BATCH + uint sb = b; + if (sb > OUTPUT_BATCH_NUM / 2) { + sb = OUTPUT_BATCH_NUM - sb; + is_conj = true; + } + if (sb >= INPUT0_BATCH_NUM) { + is_zero = true; + } +# else +# define sb b +# endif +# ifdef SYMMETRIC_AXIS_FEATURE + uint sf = f; + if (sf > OUTPUT_FEATURE_NUM / 2) { + sf = OUTPUT_FEATURE_NUM - sf; + is_conj = true; + } + if (sf >= INPUT0_FEATURE_NUM) { + is_zero = true; + } +# else +# define sf f +# endif +# ifdef SYMMETRIC_AXIS_W + uint sw = w; + if (sw > OUTPUT_SIZE_W / 2) { + sw = OUTPUT_SIZE_W - sw; + is_conj = true; + } + if (sw >= INPUT0_SIZE_W) { + is_zero = true; + } +# else +# define sw w +# endif +# ifdef SYMMETRIC_AXIS_Z + uint sz = z; + if (sz > OUTPUT_SIZE_Z / 2) { + sz = OUTPUT_SIZE_Z - sz; + is_conj = true; + } + if (sz >= INPUT0_SIZE_Z) { + is_zero = true; + } +# else +# define sz z +# endif +# ifdef SYMMETRIC_AXIS_Y + uint sy = y; + if (sy > OUTPUT_SIZE_Y / 2) { + sy = OUTPUT_SIZE_Y - sy; + is_conj = true; + } + if (sy >= INPUT0_SIZE_Y) { + is_zero = true; + } +# else +# define sy y +# endif + cfloat X = czero(); + if (!is_zero) { + const uint input_real_index = GET_INDEX(INPUT0, SYMMETRIC_ORDER_REAL); + const uint input_imag_index = GET_INDEX(INPUT0, SYMMETRIC_ORDER_IMAG); + X = (cfloat)(data[input_real_index], data[input_imag_index]); + if (is_conj) { + X = conj(X); + } + } +# else + const uint input_real_index = GET_INDEX(INPUT0, ORDER_REAL); + const float X = data[input_real_index]; +# endif +// clang-format off #else - const cfloat E = expmi(a); + const uint input_real_index = GET_INDEX(INPUT0, ORDER_REAL); + const uint input_imag_index = GET_INDEX(INPUT0, ORDER_IMAG); + const cfloat X = (cfloat)(data[input_real_index], data[input_imag_index]); #endif - Y = cadd(Y, cmult(X, E)); - } #ifdef INVERSE_DFT_MULTIPLIER - Y = crmult(Y, INVERSE_DFT_MULTIPLIER); +// No need to calculate E for IRDFT case, as we will calculate manually later +# ifndef REAL_DFT + const cfloat E = expi(a); +# endif +#else + const cfloat E = expmi(a); #endif - cstore(output, GET_INDEX(OUTPUT, ORDER), OUTPUT_X_PITCH, Y); +#ifdef REAL_DFT +# ifdef INVERSE_DFT_MULTIPLIER + Y += real(X) * cos(a) - imag(X) * sin(a); +# else + Y = cadd(Y, crmult(E, X)); +# endif +#else + Y = cadd(Y, cmult(X, E)); +#endif + } +// clang-format on +#ifdef INVERSE_DFT_MULTIPLIER +# ifdef REAL_DFT + Y *= INVERSE_DFT_MULTIPLIER; +# else + Y = crmult(Y, INVERSE_DFT_MULTIPLIER); +# endif +#endif + +#if defined(REAL_DFT) && defined(INVERSE_DFT_MULTIPLIER) + output[output_real_index] = Y; +#else + output[output_real_index] = real(Y); + output[output_imag_index] = imag(Y); +#endif } #undef real @@ -135,5 +247,5 @@ KERNEL(dft_ref)(const __global INPUT0_TYPE* data, __global OUTPUT_TYPE* output) #undef cstore #undef czero #undef GET_INDEX -#undef ORDER -#undef ORDER_K +#undef ORDER_REAL +#undef ORDER_IMAG diff --git a/src/plugins/intel_gpu/src/plugin/ops/dft.cpp b/src/plugins/intel_gpu/src/plugin/ops/dft.cpp index af1d17c3d72..d6533449106 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/dft.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/dft.cpp @@ -13,39 +13,64 @@ namespace intel_gpu { namespace { -void createDft(Program& p, const std::shared_ptr& op, cldnn::dft_kind kind) { +void createDft(Program& p, + const std::shared_ptr& op, + cldnn::dft_direction direction, + cldnn::dft_mode mode) { validate_inputs_count(op, {2, 3}); const auto inputs = p.GetInputPrimitiveIDs(op); const auto layer_name = layer_type_name_ID(op); - const auto& op_friendly_name = op->get_friendly_name(); + const auto& friendly_name = op->get_friendly_name(); const auto& out_shape = op->get_output_shape(0); auto axes_constant = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(1)); if (!axes_constant) { - IE_THROW() << "Unsupported parameter nodes type in " << op_friendly_name << " (" << op->get_type_name() << ")"; + IE_THROW() << "Unsupported parameter nodes type in " << friendly_name << " (" << op->get_type_name() << ")"; } auto axes = axes_constant->cast_vector(); - const uint8_t data_rank = out_shape.size(); - ov::normalize_axes(op.get(), data_rank - 1, axes); + uint8_t axis_correction = op->get_input_shape(0).size(); + if (direction != cldnn::dft_direction::forward || mode != cldnn::dft_mode::real) { + --axis_correction; + } + ov::normalize_axes(op.get(), axis_correction, axes); - const cldnn::dft prim(layer_name, inputs.front(), std::move(axes), out_shape, kind); + std::vector signal_size; + if (op->get_input_size() == 3) { + auto signal_size_constant = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(2)); + if (!signal_size_constant) { + IE_THROW() << "Unsupported parameter nodes type in " << friendly_name << " (" << op->get_type_name() << ")"; + } + signal_size = signal_size_constant->cast_vector(); + } + + const cldnn::dft prim(layer_name, inputs.front(), axes, signal_size, out_shape, direction, mode); p.add_primitive(*op, prim); } void CreateDFTOp(Program& p, const std::shared_ptr& op) { - createDft(p, op, cldnn::dft_kind::forward); + createDft(p, op, cldnn::dft_direction::forward, cldnn::dft_mode::complex); } void CreateIDFTOp(Program& p, const std::shared_ptr& op) { - createDft(p, op, cldnn::dft_kind::inverse); + createDft(p, op, cldnn::dft_direction::inverse, cldnn::dft_mode::complex); +} + +void CreateRDFTOp(Program& p, const std::shared_ptr& op) { + createDft(p, op, cldnn::dft_direction::forward, cldnn::dft_mode::real); +} + +void CreateIRDFTOp(Program& p, const std::shared_ptr& op) { + createDft(p, op, cldnn::dft_direction::inverse, cldnn::dft_mode::real); } } // namespace REGISTER_FACTORY_IMPL(v7, DFT); REGISTER_FACTORY_IMPL(v7, IDFT); +REGISTER_FACTORY_IMPL(v9, RDFT); +REGISTER_FACTORY_IMPL(v9, IRDFT); } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp index aa7ad62497a..1e686757f68 100644 --- a/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp @@ -12,7 +12,7 @@ using namespace tests; namespace { -template +template std::string vec2str(const std::vector& vec) { if (!vec.empty()) { std::ostringstream result; @@ -30,128 +30,131 @@ std::vector convert(const std::vector& v) { return result; } -struct dft_test_params { +struct dft_type { + dft_direction direction; + dft_mode mode; +}; + +const dft_type DFT{dft_direction::forward, dft_mode::complex}; +const dft_type IDFT{dft_direction::inverse, dft_mode::complex}; +const dft_type RDFT{dft_direction::forward, dft_mode::real}; +const dft_type IRDFT{dft_direction::inverse, dft_mode::real}; + +template +float getThreshold(dft_type type); + +template <> +float getThreshold(dft_type type) { + if (type.direction == dft_direction::forward && type.mode == dft_mode::complex) { + return 1e-4f; + } + if (type.direction == dft_direction::inverse && type.mode == dft_mode::complex) { + return 4e-6f; + } + if (type.direction == dft_direction::forward && type.mode == dft_mode::real) { + return 2e-4f; + } + if (type.direction == dft_direction::inverse && type.mode == dft_mode::real) { + return 2e-6f; + } + return 0; +} + +template <> +float getThreshold(dft_type type) { + if (type.direction == dft_direction::forward && type.mode == dft_mode::complex) { + return 4e-2f; + } + if (type.direction == dft_direction::inverse && type.mode == dft_mode::complex) { + return 5e-4f; + } + if (type.direction == dft_direction::forward && type.mode == dft_mode::real) { + return 8e-3f; + } + if (type.direction == dft_direction::inverse && type.mode == dft_mode::real) { + return 2e-3f; + } + return 0; +} + +struct dft_params { std::vector input_shape; std::vector output_shape; std::vector axes; + std::vector signal_size; std::vector input_values; std::vector expected_values; std::string test_name; }; -struct idft_test_params { - std::vector input_shape; - std::vector output_shape; - std::vector axes; - std::vector expected_values; - std::vector input_values; - std::string test_name; -}; +using dft_test_params = std::tuple; -template -dft_kind getKind(); - -template <> -dft_kind getKind() { - return dft_kind::forward; -} - -template <> -dft_kind getKind() { - return dft_kind::inverse; -} - -template -struct dft_gpu_test : public testing::TestWithParam

{ +template +struct dft_gpu_test : public testing::TestWithParam { public: void test() { - auto p = testing::TestWithParam

::GetParam(); + format::type plain_format; + format::type blocked_format; + dft_type type; + dft_params p; + std::tie(plain_format, blocked_format, type, p) = testing::TestWithParam::GetParam(); + auto& engine = get_test_engine(); - const auto input_format = format::get_default_format(p.input_shape.size()); - const layout data_layout(type_to_data_type::value, input_format, tensor(input_format, p.input_shape)); + auto data_type = type_to_data_type::value; + const layout data_layout(data_type, plain_format, tensor(plain_format, p.input_shape)); auto input = engine.allocate_memory(data_layout); set_values(input, convert(p.input_values)); topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(dft("dft", "input", std::move(p.axes), p.output_shape, getKind

())); + topology.add(reorder("reorder_input", "input", blocked_format, data_type)); + topology.add(dft("dft", "reorder_input", p.axes, p.signal_size, p.output_shape, type.direction, type.mode)); + // It's simpler to use "bfwzyx" format for all cases, as input and output can have different ranks + topology.add(reorder("out", "dft", format::bfwzyx, data_type)); network network(engine, topology); network.set_input_data("input", input); const auto outputs = network.execute(); EXPECT_EQ(outputs.size(), size_t(1)); - EXPECT_EQ(outputs.begin()->first, "dft"); + EXPECT_EQ(outputs.begin()->first, "out"); - auto output = outputs.at("dft").get_memory(); + auto output = outputs.at("out").get_memory(); cldnn::mem_lock output_ptr(output, get_test_stream()); const auto expected_values = convert(p.expected_values); ASSERT_EQ(output_ptr.size(), expected_values.size()); for (size_t i = 0; i < output_ptr.size(); ++i) { - EXPECT_NEAR(expected_values[i], output_ptr[i], getThreshold()); + EXPECT_NEAR(expected_values[i], output_ptr[i], getThreshold(type)); } } - static std::string PrintToStringParamName(const testing::TestParamInfo

& info) { - auto& p = info.param; + static std::string PrintToStringParamName(const testing::TestParamInfo& info) { + format::type plain_format; + format::type blocked_format; + dft_type type; + dft_params p; + std::tie(plain_format, blocked_format, type, p) = info.param; + std::ostringstream result; result << "InputShape=" << vec2str(p.input_shape) << "_"; - result << "OutputShape=" << vec2str(p.output_shape) << "_"; result << "Precision=" << data_type_traits::name(type_to_data_type::value) << "_"; result << "Axes=" << vec2str(p.axes) << "_"; - result << "Inverse=" << (getKind

() == dft_kind::inverse); + result << "SignalSize=" << vec2str(p.signal_size) << "_"; + result << "Inverse=" << (type.direction == dft_direction::inverse) << "_"; + result << "Real=" << (type.mode == dft_mode::real) << "_"; + result << "Format=" << fmt_to_str(blocked_format); if (!p.test_name.empty()) { result << "_TestName=" << p.test_name; } return result.str(); } - - float getThreshold() const; }; -template <> -float dft_gpu_test::getThreshold() const { - return 1e-4f; -} - -template <> -float dft_gpu_test::getThreshold() const { - return 4e-2f; -} - -template <> -float dft_gpu_test::getThreshold() const { - return 4e-6f; -} - -template <> -float dft_gpu_test::getThreshold() const { - return 5e-4f; -} - -using dft_gpu_test_f32 = dft_gpu_test; -using dft_gpu_test_f16 = dft_gpu_test; -using idft_gpu_test_f32 = dft_gpu_test; -using idft_gpu_test_f16 = dft_gpu_test; - -TEST_P(dft_gpu_test_f32, dft_f32) { - test(); -} - -TEST_P(dft_gpu_test_f16, dft_f16) { - test(); -} - -TEST_P(idft_gpu_test_f32, idft_f32) { - test(); -} - -TEST_P(idft_gpu_test_f16, idft_f16) { - test(); -} - const std::vector input_data = { 0.85943836, 0.009941814, 0.004292889, 0.54598427, 0.8270831, 0.49770153, 0.9035636, 0.19274887, 0.8589833, 0.88759327, 0.72343576, 0.057539318, 0.915801, 0.63455844, 0.25069925, 0.045601673, @@ -1529,63 +1532,492 @@ const std::vector expected_idft1d_signal_size_results = { 0.08365354, -0.11595764, 0.0041189813, -0.19876777, 0.05539961, -0.03458054, 0.3030866, 0.06549558, 0.100598566, -0.0328084, -0.008204469, -0.20897065, -0.08830016, -0.15857375}; -const std::vector dft_params = { +static const std::vector rinput_data = { + 0.10606491, 0.7454715, 0.57231355, 0.4582412, 0.3847059, 0.27398932, 0.66796243, 0.395475, 0.2815729, + 0.7799197, 0.59909415, 0.12294636, 0.38957402, 0.97498834, 0.46759892, 0.14017141, 0.04206858, 0.7279963, + 0.61560553, 0.9027321, 0.6226334, 0.2601217, 0.5555177, 0.40498647, 0.14175586, 0.57774633, 0.52652127, + 0.9385691, 0.9588788, 0.9844318, 0.23095612, 0.09707925, 0.24574867, 0.6907577, 0.1974319, 0.8295272, + 0.34612727, 0.51401484, 0.66115797, 0.9336245, 0.06690067, 0.7468897, 0.39028263, 0.53575844, 0.060429193, + 0.8913558, 0.77787375, 0.6701197, 0.7350527, 0.6636995, 0.18176624, 0.8629976, 0.45142895, 0.6497297, + 0.159372, 0.40598175, 0.7988516, 0.7291543, 0.07090418, 0.7697132, 0.4972157, 0.7669217, 0.67975855, + 0.13026066, 0.6587437, 0.24532892, 0.24545169, 0.83795583, 0.105490535, 0.7264323, 0.94568557, 0.7216649, + 0.14389831, 0.7930531, 0.70895344, 0.9724701, 0.9775157, 0.49999878, 0.65569246, 0.26876843, 0.63248956, + 0.85201293, 0.5689624, 0.023386303, 0.5546464, 0.36860028, 0.9603114, 0.39123482, 0.0380728, 0.89212376, + 0.14387614, 0.63858676, 0.10003748, 0.8906635, 0.06681054, 0.7458642, 0.45452347, 0.54724604, 0.6496482, + 0.7818356, 0.6608355, 0.77711326, 0.24588613, 0.013456763, 0.355845, 0.80388206, 0.027993264, 0.73677206, + 0.52755004, 0.9052324, 0.54311025, 0.5367805, 0.4131242, 0.7752338, 0.109669454, 0.13664648, 0.7828739, + 0.9083969, 0.5247593, 0.7493595, 0.19275227, 0.007190853, 0.6087981, 0.344136, 0.46909887, 0.41924855, + 0.7072913, 0.19932869, 0.5303847, 0.651384, 0.06686331, 0.9717932, 0.65702224, 0.11786682, 0.3154073, + 0.88923013, 0.5564087, 0.91047823, 0.28466642, 0.0934668, 0.88953066, 0.9919338, 0.18322521, 0.8185455, + 0.566391, 0.014207997, 0.29673064, 0.6347744, 0.6801958, 0.39601147, 0.34374171, 0.7216888, 0.6152569, + 0.76679546, 0.5860851, 0.4276813, 0.79339284, 0.13130653, 0.68764234, 0.053128112, 0.02611321, 0.2982243, + 0.7618372, 0.3331729, 0.5468192, 0.15707079, 0.28592056, 0.15286565, 0.9368963, 0.350671, 0.4336494, + 0.08934934, 0.41172776, 0.5850259, 0.70730376, 0.8598349, 0.088788144, 0.26711187, 0.8002491, 0.19422275, + 0.8312039, 0.5198718, 0.40111357, 0.98375803, 0.77703434, 0.037818834, 0.704231, 0.689808, 0.17102319, + 0.42153922, 0.7278252, 0.8030207, 0.9101717, 0.0199644, 0.13768466, 0.55669, 0.17991355, 0.6720098, + 0.7733328, 0.20881335}; + +static const std::vector expected_rdft1d_results_1 = { + 4.6657147, -1.1622906e-06, 0.21456887, -0.14946258, -0.20476034, -0.37063062, -0.31414136, + 0.5099413, -1.1779613, 0.07057127, -0.64047664, -1.0058284e-07, 4.982774, -1.1771917e-06, + 0.6607505, 0.18829148, -0.9772357, 1.4243596, 0.8640026, 0.34923682, 0.33401352, + 0.25859502, -0.7548928, 8.940697e-08, 5.9711604, -1.4901161e-06, 0.5638976, 1.5429841, + -0.52065414, 0.24638398, -0.27140495, 0.5040715, 0.5360231, 0.3234269, -0.36054826, + 1.7508864e-07, 4.7464237, -1.2218952e-06, -0.29650804, 0.80609477, -0.161426, 1.0022418, + -0.50812817, 0.7967348, 0.4394225, -0.1588624, -1.3835809, -7.4505806e-08, 5.53836, + -1.7136335e-06, -0.38635445, 0.8284859, -0.23278837, -0.63777345, -0.93614054, 0.3215857, + -0.14075133, -0.67071164, -1.4772836, 2.0861626e-07, 5.0798974, -1.5944242e-06, 0.056767445, + 0.03468219, -0.1497254, -0.9672509, 0.2603209, 0.69644475, -0.9208536, 0.006730467, + -1.7552528, 2.682209e-07, 4.893558, -1.6242266e-06, 0.6719861, -0.13982919, 0.064845346, + -0.39896214, 0.21785057, -0.5099982, -0.65526295, 1.4383471, -0.52023906, 2.5331974e-07, + 6.687699, -1.5497208e-06, -0.7423769, 0.09968524, 1.052381, -0.21306956, 0.5875206, + -0.3038844, 0.3991575, -1.1895186, 0.17579001, 3.874302e-07, 5.2818384, -1.1026859e-06, + 0.5087582, 0.106959194, 1.1816688, -0.87592727, 0.03740315, 0.5197907, -1.3198637, + 0.6398836, 0.22712436, 2.2351742e-08, 5.0190897, -1.5646219e-06, -0.087282926, 0.50819266, + -0.28002462, 0.29240948, -0.32303664, 0.38377762, -0.0051696897, -0.99301195, -2.189299, + 2.0861626e-07, 5.0545654, -1.5795231e-06, 0.9146397, 0.83839166, 0.870533, 0.17405808, + -0.56308234, -0.7806684, 0.26397777, 0.6880482, -1.4183462, 2.682209e-07, 5.479953, + -1.2665987e-06, 0.49444157, 0.7534672, -0.76784146, -0.4507342, 0.88815784, 0.6985409, + -0.2727425, -0.25027415, -0.7328796, 2.682209e-07, 4.1296124, -5.662441e-07, -0.46133032, + 0.30635798, -0.18225375, 0.42515472, -0.5484285, 0.9704039, -0.35255045, 0.17549685, + 0.8870368, -3.1292439e-07, 4.8632016, -1.8924475e-06, -0.6926452, 0.025076404, -0.039108217, + -1.7492937, -0.8120377, -0.85315156, -0.0022608787, 0.45002514, -1.1024668, 3.501773e-07, + 5.4715447, -1.4901161e-06, 1.1176248, -0.2109062, -0.27492502, 0.08983741, 1.1903813, + -1.007312, -0.20150042, -0.83919466, -0.23939973, 4.917383e-07, 5.1267176, -9.983778e-07, + -0.44803134, -0.8066604, -0.3435102, -0.41692197, -0.22457689, -0.1076939, -0.29129186, + -1.1880502, 0.9255183, -1.6391277e-07, 3.8495903, -5.5134296e-07, 0.09505272, -0.12751618, + -1.1264827, 0.5068884, -1.055237, -0.19516481, -0.34035242, -0.15379356, 1.2655814, + -2.6077032e-07, 4.4372616, -9.23872e-07, -0.72962606, -0.23475963, -0.04278487, 1.1032158, + -0.558924, -0.5300043, 1.0578637, -0.2466627, 0.44617313, -7.8231096e-08, 5.5374002, + -1.4156103e-06, 0.016273111, -0.5989829, -0.19913958, 0.013256833, 1.8512837, 0.14526272, + -0.39700353, -0.07573915, 0.23181, 2.9429793e-07, 4.989425, -1.4901161e-06, 1.0391837, + 0.16554561, -0.22647032, -1.0689808, -0.84556, -0.82779336, 0.9430445, 0.37618563, + 0.4684292, -9.685755e-08}; + +static const std::vector expected_rdft1d_results_2 = {2.266797, -8.195639e-08, -0.37842733, -0.41015846, + -0.48980892, -0.10356337, 2.5542018, -2.2351742e-08, + -0.3223713, 0.671882, 0.54300576, -0.35418037, + 1.985015, -2.2351742e-08, -0.030243821, -0.20105253, + 0.59431964, 0.07358998, 1.4619737, -7.450581e-09, + -0.4356845, 0.35701087, 0.28208786, -0.36424285, + 1.8002605, -1.1920929e-07, -0.43280697, -0.56735414, + -0.30007166, -0.541847, 2.3052943, -1.2293458e-07, + -0.39316025, -0.5526293, -0.30507135, -0.6021758, + 2.7329001, -6.7055225e-08, 0.28245124, -0.42586988, + -0.40586215, 0.4590181, 3.3132548, -5.9604645e-08, + 0.6297612, 0.3694744, 0.077824846, -0.6248544, + 2.6314974, -2.9802322e-08, 0.58795106, -0.60349375, + -0.3224758, 0.34408605, 1.8399743, -9.685755e-08, + -0.43963802, -0.079073176, -0.120658875, -1.0880115, + 2.0531366, -4.4703484e-08, 0.80112594, -0.53726834, + -0.17560546, -0.026561722, 2.3779182, -9.685755e-08, + -0.21852754, -0.19336401, 0.38734403, -0.5954362, + 1.6219761, 7.450581e-09, -0.43100592, 0.28373614, + 0.101898566, 0.52321124, 2.128953, -1.4901161e-07, + -0.1622684, -0.94116735, -0.7350497, 0.12695336, + 3.449626, -8.940697e-08, 0.56062996, -0.031283244, + -0.06161648, -0.8543532, 3.033568, -8.195639e-08, + -0.37023768, -0.03989461, -0.28719214, -0.22382751, + 1.9661667, -1.4901161e-08, -0.59863573, -0.015534669, + -0.31916466, 0.55380434, 2.227056, -5.2154064e-08, + -0.12656188, 0.6895717, 0.097157195, 0.19840825, + 3.5129817, -2.1234155e-07, 0.11158541, 0.5870459, + 0.20993343, -0.40297145, 2.5986667, 0.0, + 0.26602313, -1.1560227, 0.2542065, 0.45556274}; + +static const std::vector expected_rdft1d_results_3 = { + 4.665715, -1.6093254e-06, -0.5430559, -0.5752678, -0.37596112, -1.1571281, -0.46793216, + -0.94566363, 0.6854232, -0.3444838, -0.674704, 0.5946392, -0.64047587, 1.3560057e-06, + 4.9827743, -1.7434359e-06, -0.43517, -0.049020194, -1.4773891, -1.0811031, 1.2506557, + 0.5371344, 1.2869358, -0.14998645, 0.8555907, 0.3693859, -0.7548918, 1.5944242e-06, + 5.971161, -1.5199184e-06, -1.2643411, 0.85635287, -0.1801207, -1.7264944, 0.6412285, + -0.4787441, 0.82227707, 0.65098876, 0.9114491, 0.40323836, -0.36054718, 1.2852252e-06, + 4.7464237, -1.66893e-06, -1.5010594, 0.2253451, -0.87915635, -0.4252541, 0.4976693, + -0.6554581, 0.928985, 0.8035921, 0.6578763, -0.15220329, -1.3835799, 1.0430813e-06, + 5.5383606, -1.4901161e-06, -1.619024, -0.10987502, 0.20661727, -1.3774645, -0.3057741, + -1.0960662, 0.2971667, 0.46700704, -0.20812088, -0.602368, -1.4772825, 9.3877316e-07, + 5.0798974, -1.758337e-06, -0.7421876, -0.61749315, 0.21938956, -1.3415859, -0.838238, + -0.6598083, 1.0601404, -0.7129184, -0.27083004, 0.31763482, -1.7552516, 1.4677644e-06, + 4.893558, -1.4975667e-06, -0.06445231, -0.55879503, 0.08908144, -1.2869594, 0.33623943, + -0.7704663, -0.047739983, -1.0678453, 0.48350462, 1.5768427, -0.52023804, 1.1697412e-06, + 6.687699, -1.3113022e-06, -1.292419, -1.2920969, 1.2041754, -0.2943018, 1.1889167, + -0.66985166, 1.1336832, -0.13731277, 0.008011267, -0.9506076, 0.1757915, 1.1026859e-06, + 5.2818394, -1.4305115e-06, -0.25987166, -0.48605326, 0.90237427, -0.8028362, -0.3040653, + -1.6981151, 1.1215456, -0.7120959, -0.4195284, 1.3941492, 0.22712523, 8.046627e-07, + 5.01909, -1.7881393e-06, -1.1856917, -0.10931289, -0.5164983, -0.9724103, 0.30577338, + -0.72837675, 0.89680094, 0.21036407, -0.052024096, -0.9455472, -2.1892984, 1.4305115e-06, + 5.054565, -1.5050173e-06, -0.3471575, 0.40542153, 0.36438322, -0.9765247, 1.2703501, + -1.7359983, -0.1160066, -0.25323528, 0.9753329, 0.5339062, -1.418345, 9.834766e-07, + 5.4799523, -1.7285347e-06, -0.7905842, 0.093313254, 0.068526804, -1.8504739, -0.01845923, + 0.26084417, 1.5358877, -0.4159652, 0.089752786, 0.089908056, -0.7328786, 1.4007092e-06, + 4.129612, -9.536743e-07, -1.2393575, -0.28046644, -0.58673245, -0.39608067, -0.12385368, + -0.53435826, 0.77853805, 0.7645384, -0.18040559, 0.6678516, 0.88703763, 8.046627e-07, + 4.8632016, -1.0430813e-06, -1.1780663, -1.0952923, 1.1691413, -1.4023741, -0.546494, + -0.92614484, -1.1796933, -0.31762218, 0.25592417, 0.0959474, -1.1024656, 1.013279e-06, + 5.471545, -1.6987324e-06, 0.35812324, -0.66833705, 0.07725692, -1.6537004, 1.6561611, + 0.051166296, 0.865453, -1.1392289, -0.23588535, -0.5480979, -0.2393986, 1.3411045e-06, + 5.126718, -9.23872e-07, -0.6379836, -1.6675751, 0.013057679, -0.9891113, 0.20881936, + -0.30439606, 0.37222707, 0.25244698, -0.9197892, -0.77782196, 0.9255192, 1.1101365e-06, + 3.8495903, -7.4505806e-07, -0.63088936, -0.4556699, -1.1905057, -1.2522144, 0.46207082, + -0.31992733, -0.4309795, 0.74295896, -0.6106033, 0.18823686, 1.2655822, 7.748604e-07, + 4.4372616, -7.0780516e-07, -1.1016369, -1.0079124, -0.6083025, -0.0011255145, 1.4406854, + -0.2912693, -0.26610214, 0.87299407, 0.69553405, -0.45576566, 0.44617438, 7.4505806e-07, + 5.5374007, -1.5944242e-06, -0.32642078, -1.3683549, 0.079301864, -0.83741367, 0.67391664, + 0.69433576, 1.6423957, -1.1923066, 0.0334223, 0.37603495, 0.23181117, 1.4156103e-06, + 4.9894247, -7.748604e-07, 0.1788401, -0.39274544, 0.78422666, -2.1340246, 0.5487572, + -0.8765497, -0.7899384, 0.5434137, 0.91613716, 0.08274247, 0.46843058, 8.34465e-07}; + +const std::vector expected_rdft2d_results = { + 52.8665, -2.9623508e-05, 1.1642078, 3.826082, -0.22771922, -0.49822173, -0.3857528, 3.2676966, + -2.5112464, -0.27454787, -8.678656, 3.7550926e-06, -0.818072, 0.8330209, 3.4618711, -0.2419473, + 1.7408192, 5.744002, 1.8477443, 2.039329, 0.3268112, -2.7421296, 0.6809025, 1.7613728, + -2.294264, -0.8984407, -0.2868184, -3.2426705, -0.801461, -0.58971727, -1.463435, -2.5413132, + 0.116907075, -0.5013529, -2.8377397, -2.8455539, -0.13475686, -1.3145845, -2.2820292, -0.199, + -0.056986623, 0.12560216, -0.589707, -1.7577857, -0.5274223, -1.0395792, 0.53813136, -1.7159984, + 0.22503978, 2.902198, -1.8643543, -1.8789856, 2.1722724, -2.068454, 0.59446484, 0.6067899, + 1.5525781, 1.7612485, 1.1877432, -0.48152098, -0.16525066, 1.5497208e-06, 1.9815066, 0.55218977, + 0.80434155, -3.575598, -2.1471107, -0.57691807, -3.004384, 3.8775828, 3.1358109, -6.2584877e-07, + 0.22504184, -2.9021916, 1.0378464, 0.9877456, 0.38395065, -1.6089694, -0.5107449, 1.8621777, + -4.960479, -1.8983803, 1.187743, 0.48151842, -0.1347583, 1.3145843, -0.9968031, -1.3782079, + 0.9922035, 1.6614089, -0.83039653, -0.043888614, 1.9431384, -1.6448143, 0.5381324, 1.7159982, + -2.2942696, 0.8984335, 1.3057998, -0.26607463, -3.2994738, -1.9240448, 1.4963659, 2.8365738, + -4.691832, 1.2995429, -2.8377357, 2.8455553, -0.8180722, -0.8330165, -1.3755352, 0.34623986, + -3.7555497, -0.9723124, -1.1528367, -0.593254, -0.023679793, 1.8681414, 0.6809023, -1.7613728, + 48.939255, -2.4735928e-05, 1.3455832, 0.11001387, -2.3319814, -1.3735183, -0.6780232, -2.4875786, + 0.40718403, -1.0639579, 0.7314569, -1.2665987e-07, 0.97006464, -0.30789328, 3.3290033, 2.7749023, + -0.7520597, -0.98800826, 1.3100916, 1.1514524, 1.1085359, 4.348257, -2.839456, 2.4404035, + 0.9518837, 2.1538901, 3.8438358, 2.410589, 3.0649068, 0.95690995, 2.2213395, 0.66509914, + -0.4409917, -0.37408838, -0.6316552, -1.5842111, -0.72352415, -2.5862057, 0.2678757, 0.610149, + 2.9564474, 0.08470708, -2.0889034, -8.370071, -0.16373271, 2.0413866, -3.3811545, 2.0487003, + 0.0316903, -1.078939, -2.5515578, -0.16135174, -0.17406325, 1.2709827, -0.67006403, -1.6342779, + 0.42163712, 2.1418998, -0.96614444, 1.9175051, -0.8538456, 2.8014183e-06, 2.0189362, 0.30467552, + 0.5074463, 3.7919073, 2.427857, 0.7526233, -2.4620402, 0.65359443, 0.7219074, -2.3841858e-07, + 0.03169757, 1.0789458, -2.1129081, -1.0250417, 4.8181386, -0.39162922, -1.2349386, 1.8470186, + -0.49495277, -1.5516026, -0.96614635, -1.9175065, -0.7235237, 2.5862021, 0.677946, 2.0370173, + -0.29536027, 0.6505451, -2.8572361, 2.3176546, 3.4459226, 1.1869265, -3.3811545, -2.048697, + 0.95187366, -2.1538982, 1.808088, -1.1755496, -2.7418838, -1.6770658, -3.5766084, -2.8320727, + -0.02944839, -1.6522555, -0.63165283, 1.5842092, 0.9700667, 0.30789307, 0.5195943, 2.4985125, + 3.6537378, -0.5842519, -0.4843334, 0.78346854, 0.84766304, 1.1503224, -2.839459, -2.440402}; + +const std::vector expected_rdft2d_results_2 = { + 25.904434, -8.46386e-06, -5.3626504, 0.3475349, -2.7060094, -5.767444, 1.615847, -2.6387978, + 4.020789, 1.4271183, 1.5420923, 0.6126925, -4.6167765, 5.5730343e-06, -0.753784, -0.19148755, + 1.4881928, -2.7645326, -0.39467168, 1.014636, 0.5598, -1.7654291, -0.91835654, -2.3019042, + -0.49356225, -0.8411435, 0.080773115, -1.2883577, -0.5341466, 1.4913602, -0.30008763, -0.5831754, + 1.7365295, 1.821624, -0.08851206, -1.622279, -0.27249795, -0.834725, -0.6706438, 0.4766277, + 0.62642634, 0.5483514, -0.5341469, -1.4913592, 0.8286207, 0.35826343, -1.0869694, -1.4876881, + -1.6723244, -0.06565219, 0.16255295, 0.5317876, -0.75649667, 1.2447717, 0.6264261, -0.5483517, + -0.7537827, 0.19148779, 0.6306459, -0.23442982, 0.57131517, -1.366768, -2.7544713, 1.3638397, + 0.43463084, -0.5446956, -2.9949086, 1.4802479, 0.080771565, 1.2883584, 24.998875, -7.390976e-06, + -3.1970425, -1.5453612, 1.0925753, -6.279154, 2.237704, -2.8844912, 1.8841789, -1.3615136, + 0.90471864, 0.8395144, -2.6060505, 4.976988e-06, 1.1634235, 0.42319643, 2.678257, 2.4692535, + 0.34259582, 0.43598562, 2.748452, 0.88622695, 2.2745323, -2.8840196, 1.8120161, -0.27884078, + -1.5445104, -0.7000726, -1.0264511, -0.7026249, -1.071573, 1.062395, -0.64628685, -0.36214483, + -0.5110928, -1.0534683, -2.786768, 2.6113648, 0.94799054, 0.53423727, -0.69832724, 2.1821892, + -1.0264513, 0.70262754, -0.41705567, -0.17140968, 1.4991179, 2.9674625, -0.012362838, -3.8260121, + -1.5786235, -0.32526863, 1.2857957, 1.7469958, -0.6983267, -2.1821907, 1.1634252, -0.42319855, + 0.2716269, 0.21222934, -0.46608746, -1.6447732, 1.8890494, -1.8022469, -0.37335354, 0.69326025, + -0.07385725, -0.1723765, -1.5445105, 0.7000739}; + +const std::vector expected_rdft3d_results = { + 101.805756, -5.2273273e-05, 2.5097876, 3.936094, -2.5597036, -1.8717405, -1.0637736, 0.7801182, + -2.1040666, -1.3385094, -7.9471993, 2.026558e-06, 0.15199316, 0.52512753, 6.7908745, 2.5329556, + 0.98875976, 4.755993, 3.157838, 3.190782, 1.4353466, 1.6061276, -2.158554, 4.201776, + -1.3423799, 1.2554499, 3.5570183, -0.8320818, 2.263445, 0.36719292, 0.7579028, -1.8762131, + -0.32408538, -0.87544185, -3.4693956, -4.429764, -0.85828185, -3.9007902, -2.0141544, 0.4111499, + 2.8994608, 0.21030927, -2.6786098, -10.127857, -0.6911557, 1.0018079, -2.8430226, 0.33270124, + 0.25672907, 1.8232578, -4.4159126, -2.040338, 1.9982092, -0.7974717, -0.07559925, -1.0274884, + 1.9742157, 3.9031482, 0.22159882, 1.4359848, -1.0190966, 3.2186508e-06, 4.0004425, 0.8568655, + 1.3117876, 0.2163087, 0.28074512, 0.17570588, -5.466423, 4.531178, 3.857718, -1.2516975e-06, + 0.2567385, -1.823246, -1.0750613, -0.037295938, 5.20209, -2.0005994, -1.7456844, 3.7091968, + -5.45543, -3.4499822, 0.22159535, -1.4359887, -0.8582816, 3.9007854, -0.31885874, 0.65880924, + 0.6968423, 2.3119528, -3.6876333, 2.273767, 5.38906, -0.45788872, -2.8430223, -0.33269957, + -1.3423961, -1.2554631, 3.1138885, -1.4416232, -6.0413575, -3.6011095, -2.080242, 0.0045015216, + -4.7212796, -0.3527125, -3.4693892, 4.429763, 0.15199506, -0.52512354, -0.85594195, 2.8447511, + -0.10181111, -1.5565643, -1.6371696, 0.19021615, 0.8239815, 3.018465, -2.158556, -4.2017746, + 3.9272437, -3.9339066e-06, -0.18137527, 3.7160687, 2.1042633, 0.8752967, 0.29226887, 5.755277, + -2.9184306, 0.78941, -9.410112, 3.0100346e-06, -1.7881365, 1.140914, 0.13286811, -3.01685, + 2.4928799, 6.7320104, 0.5376528, 0.88787735, -0.78172505, -7.0903873, 3.5203578, -0.6790314, + -3.246148, -3.0523329, -4.1306543, -5.653259, -3.866367, -1.5466263, -3.6847744, -3.2064118, + 0.5578996, -0.12726665, -2.2060838, -1.2613428, 0.588767, 1.2716217, -2.5499039, -0.8091496, + -3.0134337, 0.0408957, 1.4991964, 6.6122847, -0.36368948, -3.0809648, 3.9192853, -3.764699, + 0.19334978, 3.9811373, 0.68720365, -1.717634, 2.346336, -3.3394372, 1.2645291, 2.241068, + 1.1309403, -0.3806507, 2.1538877, -2.3990266, 0.6885946, -1.4901161e-06, -0.037429705, 0.24751475, + 0.2968948, -7.367506, -4.574969, -1.329541, -0.5423446, 3.2239883, 2.4139037, 2.9802322e-07, + 0.19334424, -3.9811373, 3.1507545, 2.0127864, -4.4341884, -1.2173393, 0.72419256, 0.015158802, + -4.4655256, -0.34677732, 2.1538897, 2.3990245, 0.5887663, -1.2716188, -1.6747494, -3.415226, + 1.2875631, 1.0108626, 2.0268395, -2.3615427, -1.502785, -2.8317401, 3.919288, 3.764695, + -3.2461433, 3.0523314, -0.5022881, 0.9094755, -0.55759126, -0.24697942, 5.0729737, 5.668646, + -4.662384, 2.9517999, -2.2060819, 1.2613468, -1.7881389, -1.1409098, -1.8951292, -2.1522717, + -7.4092865, -0.38806117, -0.6685039, -1.3767233, -0.8713439, 0.71781945, 3.5203605, 0.6790297}; + +const std::vector expected_rdft3d_results_2 = { + 50.90331, -1.4543533e-05, -8.559692, -1.1978266, -1.6134334, -12.046599, 3.8535514, -5.5232873, + 5.9049683, 0.065603495, 2.4468107, 1.4522064, -7.222825, 1.2278557e-05, 0.40963984, 0.231709, + 4.16645, -0.29528028, -0.052075505, 1.450621, 3.3082519, -0.8792013, 1.356175, -5.1859245, + 1.3184534, -1.1199851, -1.4637363, -1.9884299, -1.5605974, 0.7887349, -1.3716602, 0.47921878, + 1.0902424, 1.4594792, -0.59960556, -2.6757474, -3.0592656, 1.7766399, 0.27734682, 1.0108652, + -0.07190053, 2.7305403, -1.5605986, -0.78873086, 0.41156515, 0.18685403, 0.4121489, 1.4797752, + -1.6846865, -3.8916636, -1.4160703, 0.20651829, 0.52929974, 2.9917672, -0.07190076, -2.7305427, + 0.4096415, -0.23171037, 0.9022726, -0.022200808, 0.10522783, -3.0115416, -0.8654218, -0.4384073, + 0.061277367, 0.14856634, -3.0687659, 1.3078697, -1.4637384, 1.9884316, 25.904425, -24.998884, + -6.9080105, 3.5445771, -8.985163, -6.860018, -1.2686447, -4.8765025, 2.6592734, -0.45706248, + 2.3816066, -0.29202732, -4.6167727, 2.6060565, -0.33058774, -1.3549114, 3.9574459, -5.44279, + 0.041313916, 0.67204094, 1.446027, -4.5138807, -3.8023772, -4.576436, -0.7724026, -2.6531591, + -0.6192993, 0.25615194, -1.2367722, 2.5178113, 0.7623075, 0.48839718, 1.3743844, 2.4679115, + -1.1419809, -1.1111865, 2.3388672, 1.9520425, -0.13640736, -0.47136223, 2.8086162, 1.2466785, + 0.16848034, -0.46490768, 0.6572111, 0.7753189, 1.8804929, -2.9868064, -5.498336, -0.053289652, + -0.16271627, 2.1104114, 0.9904991, -0.041024223, -1.5557647, 0.14997506, -1.1769819, -0.9719368, + 0.8428756, -0.5060569, -1.0734584, -0.9006812, -4.556718, -0.5252099, 1.1278908, -0.17134166, + -3.1672862, 1.5541049, 0.78084624, 2.8328683, 0.90555733, -1.3709068e-06, -2.1656086, 1.8928962, + -3.7985847, 0.511709, -0.62185717, 0.24569236, 2.1366088, 2.7886305, 0.6373716, -0.2268233, + -2.0107267, 5.662441e-07, -1.9172084, -0.6146841, -1.1900643, -5.233785, -0.73726743, 0.5786506, + -2.188651, -2.6516552, -3.1928902, 0.58211625, -2.305578, -0.5623034, 1.6252834, -0.58828497, + 0.49230486, 2.1939852, 0.7714851, -1.6455705, 2.382816, 2.1837692, 0.4225806, -0.56881106, + 2.514269, -3.4460905, -1.618634, -0.057608932, 1.3247533, -1.6338379, 0.49230492, -2.1939862, + 1.2456759, 0.5296728, -2.5860875, -4.45515, -1.659962, 3.7603593, 1.7411764, 0.8570565, + -2.0422916, -0.50222373, 1.3247528, 1.633839, -1.9172082, 0.6146865, 0.35901868, -0.44665974, + 1.0374024, 0.27800465, -4.6435204, 3.1660864, 0.8079842, -1.2379556, -2.921052, 1.6526239, + 1.6252828, 0.588284, 25.90444, 24.998867, -3.817289, -2.8495073, 3.573144, -4.6748676, + 4.500339, -0.40109348, 5.382302, 3.3112957, 0.7025763, 1.5174108, -4.616783, -2.6060438, + -1.1769816, 0.97193646, -0.9810596, -0.086276084, -0.83065766, 1.3572321, -0.3264265, 0.9830234, + 1.9656628, -0.027371943, -0.2147214, 0.9708719, 0.7808455, -2.8328671, 0.16847888, 0.46490908, + -1.3624828, -1.6547482, 2.0986745, 1.1753378, 0.9649557, -2.1333718, -2.8838634, -3.6214924, + -1.2048804, 1.4246187, -1.5557631, -0.14997569, -1.2367743, -2.5178103, 1.0000296, -0.05879204, + -4.0544314, 0.01142931, 2.153687, -0.078014135, 0.4878212, -1.0468364, -2.503492, 2.5305676, + 2.808617, -1.2466786, -0.33058444, 1.3549128, 0.41841656, 0.03719666, 2.216088, -1.8328552, + -0.95222485, 3.2528882, -0.25863037, -0.91804826, -2.822532, 1.4063904, -0.6193025, -0.25615215}; + +const std::vector expected_irdft1d_results_2 = { + 0.10606494, 0.7454715, 0.5723136, 0.45824113, 0.38470596, 0.59909415, 0.12294642, 0.38957405, 0.9749881, + 0.46759906, 0.62263334, 0.26012173, 0.5555176, 0.40498644, 0.14175594, 0.23095612, 0.097079255, 0.24574867, + 0.6907576, 0.197432, 0.066900685, 0.7468896, 0.39028254, 0.5357583, 0.0604293, 0.18176621, 0.8629975, + 0.45142898, 0.64972955, 0.15937212, 0.49721566, 0.7669216, 0.6797584, 0.13026062, 0.6587438, 0.9456854, + 0.7216646, 0.14389832, 0.7930529, 0.7089534, 0.6324895, 0.85201263, 0.5689623, 0.023386315, 0.55464643, + 0.1438762, 0.63858664, 0.10003753, 0.8906633, 0.06681056, 0.66083544, 0.7771131, 0.24588616, 0.013456774, + 0.35584506, 0.54311025, 0.53678054, 0.41312417, 0.7752337, 0.10966951, 0.19275223, 0.007190934, 0.608798, + 0.344136, 0.46909887, 0.06686333, 0.971793, 0.65702224, 0.117866985, 0.31540743, 0.8895306, 0.99193364, + 0.18322523, 0.81854534, 0.5663911, 0.34374166, 0.72168875, 0.6152569, 0.7667953, 0.58608514, 0.026113158, + 0.2982243, 0.76183707, 0.3331729, 0.5468192, 0.43364936, 0.089349344, 0.41172776, 0.5850257, 0.7073037, + 0.8312039, 0.5198719, 0.4011136, 0.9837578, 0.7770344, 0.72782516, 0.8030205, 0.9101716, 0.019964492, + 0.13768478}; + +const std::vector expected_irdft1d_results_3 = { + 0.80091053, 1.548053, 1.3439665, 0.97278523, 0.65876126, 1.6395509, 1.0939313, 1.5905306, 0.81558955, + 1.1096439, 2.0799308, 1.9659967, 0.21628714, 1.2937224, 1.7173465, 1.5190675, 0.62673247, 1.3878733, + 2.2457566, 1.2779983, 0.9537279, 1.5238736, 1.6959155, 0.9063804, 1.2134336, 1.4805167, 1.277886, + 0.9217217, 1.3267591, 2.0169291, 2.619178, 0.7248324, 1.4161175, 1.3378929, 1.6759893, 0.85183966, + 0.53280216, 1.4385536, 1.7184939, 1.3292406, 1.1811583, 0.9698347, 1.5283158, 1.3752562, 0.99182767, + 1.3061998, 1.7824118, 1.399513, 0.26604116, 1.3193192, 1.5053986, 1.0388529, 0.9190526, 1.4711612, + 2.0971189, 0.37586892, 1.5662622, 1.6827406, 1.208139, 1.0144035, 0.96595216, 2.1122026, 1.6039357, + 0.44462752, 0.34932646, 1.487859, 0.9802158, 1.0321891, 0.4064213, 1.7653472, 1.5080582, 0.75743484, + 1.2409652, 2.0487022, 1.567386, 0.68034726, 1.5328329, 1.2476723, 1.3539927, 0.8549268}; + +const std::vector expected_irdft2d_results_1 = { + 0.106065355, 0.7454709, 0.5723129, 0.45824066, 0.384706, 0.27398905, 0.6679619, 0.39547434, 0.2815724, + 0.779919, 0.59909385, 0.122946456, 0.38957337, 0.97498655, 0.46759892, 0.14017127, 0.04206834, 0.72799486, + 0.61560476, 0.9027304, 0.6226336, 0.2601218, 0.5555171, 0.4049862, 0.14175594, 0.57774574, 0.52652067, + 0.9385676, 0.958878, 0.9844308, 0.2309568, 0.0970796, 0.24574815, 0.6907565, 0.19743192, 0.8295261, + 0.3461272, 0.5140136, 0.66115695, 0.93362343, 0.06690116, 0.74688905, 0.39028272, 0.53575796, 0.060429227, + 0.89135474, 0.77787286, 0.67011875, 0.73505205, 0.6636992, 0.18176568, 0.8629964, 0.4514285, 0.6497283, + 0.15937214, 0.40598106, 0.7988508, 0.72915316, 0.07090413, 0.76971227, 0.49721542, 0.7669206, 0.6797579, + 0.13026048, 0.6587432, 0.24532847, 0.24545121, 0.83795464, 0.10549038, 0.72643167, 0.94568396, 0.72166353, + 0.14389817, 0.79305094, 0.7089523, 0.9724684, 0.9775141, 0.49999753, 0.6556916, 0.2687679, 0.6324893, + 0.85201234, 0.5689621, 0.023386242, 0.5546462, 0.36860004, 0.9603104, 0.3912346, 0.038073156, 0.8921232, + 0.14387667, 0.63858616, 0.10003737, 0.8906622, 0.06681097, 0.74586314, 0.4545233, 0.54724485, 0.6496472, + 0.7818348, 0.6608358, 0.77711284, 0.24588637, 0.0134570245, 0.35584468, 0.8038809, 0.027993381, 0.7367708, + 0.52754945, 0.90523165, 0.54310995, 0.5367796, 0.41312343, 0.7752323, 0.10966998, 0.13664615, 0.7828726, + 0.9083951, 0.524759, 0.7493586, 0.19275239, 0.0071907635, 0.60879755, 0.34413564, 0.4690983, 0.4192482, + 0.70729065, 0.1993285, 0.5303842, 0.65138334, 0.06686333, 0.97179186, 0.657022, 0.11786719, 0.3154068, + 0.8892283, 0.55640805, 0.9104763, 0.28466636, 0.093467236, 0.88953, 0.9919328, 0.18322526, 0.8185441, + 0.56639117, 0.014208457, 0.29673028, 0.6347738, 0.68019533, 0.39601144, 0.34374115, 0.72168803, 0.61525595, + 0.76679367, 0.5860848, 0.42768106, 0.7933919, 0.13130645, 0.68764144, 0.05312841, 0.026113434, 0.2982238, + 0.7618365, 0.3331724, 0.5468184, 0.15707079, 0.28592035, 0.15286529, 0.9368952, 0.35067078, 0.43364897, + 0.089348935, 0.41172677, 0.58502454, 0.7073026, 0.85983366, 0.08878795, 0.2671109, 0.8002475, 0.19422255, + 0.83120316, 0.5198712, 0.40111288, 0.98375624, 0.77703446, 0.03781964, 0.7042304, 0.68980736, 0.17102323, + 0.42153904, 0.7278248, 0.80301994, 0.91017085, 0.019965423, 0.13768451, 0.556689, 0.17991383, 0.6720085, + 0.7733324, 0.20881362}; + +const std::vector expected_irdft2d_results_2 = { + 0.10606504, 0.74547091, 0.57231341, 0.45824085, 0.38470576, 0.27398939, 0.66796227, 0.39547472, 0.28157284, + 0.77991920, 0.00000012, 0.00000025, 0.59909402, 0.12294612, 0.38957398, 0.97498753, 0.46759871, 0.14017182, + 0.04206866, 0.72799575, 0.61560553, 0.90273150, 0.00000029, 0.00000019, 0.62263335, 0.26012139, 0.55551768, + 0.40498611, 0.14175560, 0.57774629, 0.52652119, 0.93856842, 0.95887877, 0.98443111, 0.00000026, 0.00000029, + 0.23095626, 0.09707905, 0.24574875, 0.69075717, 0.19743158, 0.82952691, 0.34612741, 0.51401454, 0.66115784, + 0.93362381, 0.00000013, 0.00000019, 0.06690087, 0.74688917, 0.39028283, 0.53575807, 0.06042910, 0.89135566, + 0.77787371, 0.67011938, 0.73505260, 0.66369919, 0.00000020, 0.00000025, 0.66083517, 0.77711292, 0.24588620, + 0.01345654, 0.35584463, 0.80388178, 0.02799342, 0.73677143, 0.52754998, 0.90523178, 0.00000020, 0.00000022, + 0.54311002, 0.53678006, 0.41312413, 0.77523314, 0.10966939, 0.13664682, 0.78287364, 0.90839633, 0.52475940, + 0.74935884, 0.00000017, 0.00000024, 0.19275220, 0.00719083, 0.60879792, 0.34413568, 0.46909855, 0.41924857, + 0.70729118, 0.19932858, 0.53038468, 0.65138356, 0.00000024, 0.00000004, 0.06686326, 0.97179258, 0.65702215, + 0.11786667, 0.31540699, 0.88922984, 0.55640881, 0.91047768, 0.28466661, 0.09346649, 0.00000006, 0.00000008, + 0.88953045, 0.99193334, 0.18322520, 0.81854497, 0.56639084, 0.01420842, 0.29673067, 0.63477397, 0.68019596, + 0.39601113, 0.00000014, 0.00000022}; + +const std::vector expected_irdft3d_results_2 = { + 0.29655575, 0.59799123, 0.22431113, 0.46143103, 0.53208175, 0.32705094, 0.59367000, 0.29963828, 0.41763943, + 0.24033307, 0.42796425, 0.56577777, 0.37677909, 0.32099129, 0.28778578, 0.50527716, 0.39592624, -0.01477019, + 0.46390174, 0.48881302, 0.69299017, 0.69097986, 0.60120016, 0.82729206, -0.09137908, 0.49852066, 0.41157645, + 0.50370176, 0.50602146, 0.12422646, 0.66381460, 0.40124601, 0.71138931, 0.66414101, 0.50896081, 0.51854765, + 0.21342740, 0.75042767, 0.40385838, 0.28173387, 0.29258505, 0.34233110, 0.44617152, 0.32590713, 0.69813927, + 0.27029157, 0.49500125, 0.57849153, 0.52079012, 0.46437605, 0.44842544, 0.21380078, 0.57897044, 0.32123390, + 0.46531573, 0.55946432, 0.36995799, 0.19326348, 0.26279333, 0.89411452, 0.45806675, 0.58413552, 0.47982321, + 0.40877153, 0.23978246, 0.33369794, 0.56433968, 0.09308288, 0.20574836, 0.51936717, 0.46905154, 0.47775696, + 0.17856948, 0.04195880, 0.24284739, 0.63731160, 0.16159543, 0.08925854, 0.50157161, 0.67721677, 0.75653236, + 0.50840554, 0.73467008, 0.62163510, 0.00566245, 0.92257200, 0.42133956, 0.45249607, 0.36451271, 0.46674756, + 0.65809363, 0.29478180, 0.79919561, 0.37987672, 0.46803394, 0.20036376, 0.30268271, 0.62990812, 0.29745090, + 0.46503467, 0.30444576, 0.43581755, 0.38956261, 0.58891618, 0.43936615, 0.12833645, 0.82411153, 0.30960669, + 0.24676315, 0.39269569, 0.26772071, 0.46022705, 0.77598541, 0.46882716, 0.40922151, 0.28451272, 0.27156988, + 0.32720683, 0.48740341, 0.52519462, 0.47371313, 0.61046947, 0.46505542, 0.04019986, 0.27622309, 0.42926452, + 0.49897225, 0.04617115, 0.50902017, 0.74826910, 0.28548445, 0.63409441, 0.13183664, 0.02507987, 0.51695660, + 0.50593892, 0.17335312, 0.24157819, 0.45513622, 0.69800550, 0.40604969, 0.47128647, 0.59389774, 0.33534107, + 0.50887902, 0.82998967, 0.22642939, 0.32967160, 0.50515564, 0.54070049, 0.28947697, 0.35626388, 0.58235507, + 0.30633221, 0.50041779, 0.24975602, 0.38320678, 0.40595842, 0.50651077, 0.42963483, 0.25977121, 0.32014694, + 0.37577291, 0.46638206, 0.05511259, 0.45463482, 0.62685054, 0.13046773, 0.49768469, 0.47645129, 0.56182954, + 0.74548830, 0.73150766, 0.37579758, 0.14279248, 0.28705593, 0.45403320, 0.50334282, 0.24132925, 0.24104091, + 0.31220213, 0.62432518, 0.20954334, 0.09285936, 0.56852238, 0.42261752, 0.52830257, 0.25272655, 0.72091123, + 0.46923499, 0.24439716, 0.72211522, 0.33004626, 0.30411236, 0.56189500, 0.37390448, 0.40768394, 0.13754946, + 0.41746636, 0.50960175, 0.34250750, 0.65386079, 0.46042782, 0.54099804, 0.41183749, 0.40593833, 0.21666628, + 0.38087729, 0.64666439, 0.19817938, 0.29519793, 0.46272810, 0.49454878, 0.59059650, 0.54134465, 0.56793991, + 0.29395146, 0.52647797, 0.61291826, 0.24633402, 0.24791051, 0.22666050, 0.43238182, 0.20337301, 0.31388571, + 0.59658993, 0.29774026, 0.39935257, 0.77171166, 0.54813165, 0.74253426, 0.49906203, 0.53449270, 0.22820431, + 0.19888670, 0.56200754, 0.55242130, 0.36939947, 0.01671917, 0.60996081}; + +std::vector extendByOneDimension(const std::vector& params_vector) { + std::vector extended_params; + for (auto params : params_vector) { + params.input_shape.insert(params.input_shape.cbegin(), 1); + params.output_shape.insert(params.output_shape.cbegin(), 1); + for (auto& axis : params.axes) { + ++axis; + } + extended_params.push_back(std::move(params)); + } + return extended_params; +} + +const std::vector DFT_params_4d = { // With original shape - {{2, 10, 10, 2}, {2, 10, 10, 2}, {2}, input_data, expected_dft1d_results}, - {{2, 10, 10, 2}, {2, 10, 10, 2}, {1, 2}, input_data, expected_dft2d_results}, - {{2, 10, 10, 2}, {2, 10, 10, 2}, {0, 1, 2}, input_data, expected_dft3d_results}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {2}, input_data_1, expected_dft1d_results_1}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {1, 2}, input_data_1, expected_dft2d_results_1}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, input_data_1, expected_dft2d_signal_size_results_2}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_4}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1, 2}, input_data_1, expected_dft3d_results_1}, + {{2, 10, 10, 2}, {2, 10, 10, 2}, {2}, {}, input_data, expected_dft1d_results}, + {{2, 10, 10, 2}, {2, 10, 10, 2}, {1, 2}, {}, input_data, expected_dft2d_results}, + {{2, 10, 10, 2}, {2, 10, 10, 2}, {0, 1, 2}, {}, input_data, expected_dft3d_results}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {2}, {}, input_data_1, expected_dft1d_results_1}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {1, 2}, {}, input_data_1, expected_dft2d_results_1}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, {4, 6}, input_data_1, expected_dft2d_signal_size_results_2}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, {4, 8}, input_data_1, expected_dft2d_signal_size_results_4}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1, 2}, {}, input_data_1, expected_dft3d_results_1}, // With changed shape - {{2, 10, 10, 2}, {2, 20, 10, 2}, {1}, input_data, expected_dft1d_signal_size_results}, - {{4, 6, 8, 2}, {5, 6, 9, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_1}, - {{4, 6, 8, 2}, {3, 6, 4, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_3}, - {{4, 6, 8, 2}, {5, 6, 4, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_5}, - {{4, 6, 8, 2}, {3, 7, 5, 2}, {0, 1, 2}, input_data_1, expected_dft3d_signal_size_results}, + {{2, 10, 10, 2}, {2, 20, 10, 2}, {1}, {20}, input_data, expected_dft1d_signal_size_results}, + {{4, 6, 8, 2}, {5, 6, 9, 2}, {0, 2}, {5, 9}, input_data_1, expected_dft2d_signal_size_results_1}, + {{4, 6, 8, 2}, {3, 6, 4, 2}, {0, 2}, {3, 4}, input_data_1, expected_dft2d_signal_size_results_3}, + {{4, 6, 8, 2}, {5, 6, 4, 2}, {0, 2}, {5, 4}, input_data_1, expected_dft2d_signal_size_results_5}, + {{4, 6, 8, 2}, {3, 7, 5, 2}, {0, 1, 2}, {3, 7, 5}, input_data_1, expected_dft3d_signal_size_results}, }; +const std::vector DFT_params_5d = extendByOneDimension(DFT_params_4d); -const std::vector idft_params = { +const std::vector IDFT_params_4d = { // Inversed to DFT with original shape cases - {{2, 10, 10, 2}, {2, 10, 10, 2}, {2}, input_data, expected_dft1d_results}, - {{2, 10, 10, 2}, {2, 10, 10, 2}, {1, 2}, input_data, expected_dft2d_results}, - {{2, 10, 10, 2}, {2, 10, 10, 2}, {0, 1, 2}, input_data, expected_dft3d_results}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {2}, input_data_1, expected_dft1d_results_1}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {1, 2}, input_data_1, expected_dft2d_results_1}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, input_data_1, expected_dft2d_signal_size_results_2}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, input_data_1, expected_dft2d_signal_size_results_4}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1, 2}, input_data_1, expected_dft3d_results_1}, + {{2, 10, 10, 2}, {2, 10, 10, 2}, {2}, {}, expected_dft1d_results, input_data}, + {{2, 10, 10, 2}, {2, 10, 10, 2}, {1, 2}, {}, expected_dft2d_results, input_data}, + {{2, 10, 10, 2}, {2, 10, 10, 2}, {0, 1, 2}, {}, expected_dft3d_results, input_data}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {2}, {}, expected_dft1d_results_1, input_data_1}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {1, 2}, {}, expected_dft2d_results_1, input_data_1}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, {4, 6}, expected_dft2d_signal_size_results_2, input_data_1}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, {4, 8}, expected_dft2d_signal_size_results_4, input_data_1}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1, 2}, {}, expected_dft3d_results_1, input_data_1}, // Other cases with original shape - {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, expected_idft2d_signal_size_results_2, input_data_1, "v2"}, - {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, expected_idft2d_signal_size_results_4, input_data_1, "v2"}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 1}, {4, 6}, input_data_1, expected_idft2d_signal_size_results_2, "v2"}, + {{4, 6, 8, 2}, {4, 6, 8, 2}, {0, 2}, {4, 8}, input_data_1, expected_idft2d_signal_size_results_4, "v2"}, // With changed shape - {{4, 6, 8, 2}, {4, 7, 8, 2}, {1}, expected_idft1d_signal_size_results, input_data_1}, - {{4, 6, 8, 2}, {5, 6, 9, 2}, {0, 2}, expected_idft2d_signal_size_results_1, input_data_1}, - {{4, 6, 8, 2}, {3, 6, 4, 2}, {0, 2}, expected_idft2d_signal_size_results_3, input_data_1}, - {{4, 6, 8, 2}, {5, 6, 4, 2}, {0, 2}, expected_idft2d_signal_size_results_5, input_data_1}, - {{4, 6, 8, 2}, {3, 7, 5, 2}, {0, 1, 2}, expected_idft3d_signal_size_results, input_data_1}, + {{4, 6, 8, 2}, {4, 7, 8, 2}, {1}, {7}, input_data_1, expected_idft1d_signal_size_results}, + {{4, 6, 8, 2}, {5, 6, 9, 2}, {0, 2}, {5, 9}, input_data_1, expected_idft2d_signal_size_results_1}, + {{4, 6, 8, 2}, {3, 6, 4, 2}, {0, 2}, {3, 4}, input_data_1, expected_idft2d_signal_size_results_3}, + {{4, 6, 8, 2}, {5, 6, 4, 2}, {0, 2}, {5, 4}, input_data_1, expected_idft2d_signal_size_results_5}, + {{4, 6, 8, 2}, {3, 7, 5, 2}, {0, 1, 2}, {3, 7, 5}, input_data_1, expected_idft3d_signal_size_results}, +}; +const std::vector IDFT_params_5d = extendByOneDimension(IDFT_params_4d); + +const std::vector RDFT_params_3d = { + {{2, 10, 10}, {2, 10, 6, 2}, {2}, {}, rinput_data, expected_rdft1d_results_1}, + {{2, 10, 10}, {2, 10, 6, 2}, {2}, {10}, rinput_data, expected_rdft1d_results_1}, + {{2, 10, 10}, {2, 10, 3, 2}, {2}, {5}, rinput_data, expected_rdft1d_results_2}, + {{2, 10, 10}, {2, 10, 7, 2}, {2}, {12}, rinput_data, expected_rdft1d_results_3}, + {{2, 10, 10}, {2, 10, 6, 2}, {1, 2}, {}, rinput_data, expected_rdft2d_results}, + {{2, 10, 10}, {2, 10, 6, 2}, {1, 2}, {10, 10}, rinput_data, expected_rdft2d_results}, + {{2, 10, 10}, {2, 5, 7, 2}, {1, 2}, {5, 12}, rinput_data, expected_rdft2d_results_2}, + {{2, 10, 10}, {2, 10, 6, 2}, {0, 1, 2}, {}, rinput_data, expected_rdft3d_results}, + {{2, 10, 10}, {4, 5, 7, 2}, {0, 1, 2}, {4, 5, 12}, rinput_data, expected_rdft3d_results_2}, +}; +const std::vector RDFT_params_4d = extendByOneDimension(RDFT_params_3d); + +const std::vector IRDFT_params_4d = { + {{2, 10, 6, 2}, {2, 10, 10}, {2}, {}, expected_rdft1d_results_1, rinput_data}, + {{2, 10, 6, 2}, {2, 10, 10}, {2}, {10}, expected_rdft1d_results_1, rinput_data}, + {{2, 10, 3, 2}, {2, 10, 5}, {2}, {5}, expected_rdft1d_results_2, expected_irdft1d_results_2}, + {{2, 10, 7, 2}, {2, 10, 4}, {2}, {4}, expected_rdft1d_results_3, expected_irdft1d_results_3}, + {{2, 10, 6, 2}, {2, 10, 10}, {1, 2}, {}, expected_rdft2d_results, rinput_data}, + {{2, 10, 6, 2}, {2, 10, 10}, {1, 2}, {10, 10}, expected_rdft2d_results, rinput_data}, + {{2, 5, 7, 2}, {2, 5, 12}, {1, 2}, {5, 12}, expected_rdft2d_results_2, expected_irdft2d_results_2}, + {{2, 10, 6, 2}, {2, 10, 10}, {0, 1, 2}, {10, 10}, expected_rdft3d_results, rinput_data}, + {{2, 10, 6, 2}, {4, 5, 12}, {0, 1, 2}, {4, 5, 12}, expected_rdft3d_results, expected_irdft3d_results_2}, +}; +const std::vector IRDFT_params_5d = extendByOneDimension(IRDFT_params_4d); + +const format plain_format_4d = format::bfyx; +const std::vector blocked_format_4d = { + format::bfyx, + format::b_fs_yx_fsv16, + format::b_fs_yx_fsv32, + format::bs_fs_yx_bsv16_fsv16, + format::bs_fs_yx_bsv32_fsv16, + format::bs_fs_yx_bsv32_fsv32, }; -INSTANTIATE_TEST_SUITE_P(smoke_dft_float32, - dft_gpu_test_f32, - testing::ValuesIn(dft_params), - dft_gpu_test_f32::PrintToStringParamName); +const format plain_format_5d = format::bfzyx; +const std::vector blocked_format_5d = { + format::bfzyx, + format::b_fs_zyx_fsv16, + format::b_fs_zyx_fsv32, + format::bs_fs_zyx_bsv16_fsv32, + format::bs_fs_zyx_bsv16_fsv16, + format::bs_fs_zyx_bsv32_fsv32, + format::bs_fs_zyx_bsv32_fsv16, +}; -INSTANTIATE_TEST_SUITE_P(smoke_dft_float16, - dft_gpu_test_f16, - testing::ValuesIn(dft_params), - dft_gpu_test_f16::PrintToStringParamName); +using dft_gpu_test_float = dft_gpu_test; +using dft_gpu_test_half_t = dft_gpu_test; -INSTANTIATE_TEST_SUITE_P(smoke_idft_float32, - idft_gpu_test_f32, - testing::ValuesIn(idft_params), - idft_gpu_test_f32::PrintToStringParamName); +TEST_P(dft_gpu_test_float, test) { + ASSERT_NO_FATAL_FAILURE(test()); +} -INSTANTIATE_TEST_SUITE_P(smoke_idft_float16, - idft_gpu_test_f16, - testing::ValuesIn(idft_params), - idft_gpu_test_f16::PrintToStringParamName); +TEST_P(dft_gpu_test_half_t, test) { + ASSERT_NO_FATAL_FAILURE(test()); +} + +#define INSTANTIATE_DFT_TEST_SUITE(dftType, dimension, inputType) \ + INSTANTIATE_TEST_SUITE_P(smoke_##dftType##_##dimension##_##inputType, \ + dft_gpu_test_##inputType, \ + testing::Combine(testing::Values(plain_format_##dimension), \ + testing::ValuesIn(blocked_format_##dimension), \ + testing::Values(dftType), \ + testing::ValuesIn(dftType##_params_##dimension)), \ + dft_gpu_test_##inputType::PrintToStringParamName); + +#define INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(dftType, dimension) \ + INSTANTIATE_DFT_TEST_SUITE(dftType, dimension, float) \ + INSTANTIATE_DFT_TEST_SUITE(dftType, dimension, half_t) + +INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(DFT, 4d) +INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(DFT, 5d) + +INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(IDFT, 4d) +INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(IDFT, 5d) + +INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(RDFT, 4d) +// We don't have 5d tests for RDFT, as it doesn't support 5d blocked input, because output would need to be 6d blocked + +INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(IRDFT, 4d) +INSTANTIATE_DFT_TEST_SUITE_WITH_TYPES(IRDFT, 5d) } // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/dft.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/dft.cpp index f73ceff0457..19b6b9eaa97 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/dft.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/dft.cpp @@ -8,15 +8,21 @@ namespace { -const std::vector opTypes = {ngraph::helpers::DFTOpType::FORWARD, - ngraph::helpers::DFTOpType::INVERSE}; -const std::vector inputPrecision = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; +const std::vector opTypes = { + ngraph::helpers::DFTOpType::FORWARD, + ngraph::helpers::DFTOpType::INVERSE, +}; + +const std::vector inputPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, +}; + const auto combine = [](const std::vector& inputShapes, const std::vector>& axes, const std::vector>& signalSizes) { return testing::Combine(testing::ValuesIn(inputShapes), - testing::ValuesIn(inputPrecision), + testing::ValuesIn(inputPrecisions), testing::ValuesIn(axes), testing::ValuesIn(signalSizes), testing::ValuesIn(opTypes), @@ -27,39 +33,58 @@ using namespace LayerTestsDefinitions; INSTANTIATE_TEST_SUITE_P(smoke_DFT_2d, DFTLayerTest, - combine({{10, 2}, {1, 2}}, // input shapes - {{0}, {-1}}, // axes - {{}, {5}}), // signal sizes + combine({{10, 2}}, // input shapes + {{0}}, // axes + {{}, {3}}), // signal sizes DFTLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_DFT_3d, DFTLayerTest, - combine({{10, 4, 2}, {1, 17, 2}}, // input shapes - {{0, 1}, {-1, -2}}, // axes - {{}, {5, 2}}), // signal sizes + combine({{10, 4, 2}}, // input shapes + {{0, 1}}, // axes + {{}, {3, 10}}), // signal sizes DFTLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_DFT_4d, DFTLayerTest, - combine({{10, 4, 8, 2}, {1, 17, 12, 2}}, // input shapes - {{0, 1, 2}, {-1, -2, -3}}, // axes - {{}, {5, 2, 5}}), // signal sizes + combine({{10, 4, 8, 2}}, // input shapes + {{0, 1, 2}}, // axes + {{}, {3, 10, 8}}), // signal sizes DFTLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_DFT_4d_negative_reversed_axes, + DFTLayerTest, + combine({{10, 4, 8, 2}}, // input shapes + {{-1, -2, -3}}, // axes + {{}, {8, 10, 3}}), // signal sizes + DFTLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_DFT_4d_single_axis, + DFTLayerTest, + combine({{10, 4, 8, 2}}, // input shapes + {{0}, {1}, {2}}, // axes + {{}, {1}, {5}, {20}}), // signal sizes + DFTLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_DFT_5d, DFTLayerTest, - combine({{10, 4, 8, 2, 2}, {1, 17, 12, 1, 2}}, // input shapes - {{0, 1, 2, 3}, {-1, -2, -3, -4}}, // axes - {{}, {5, 2, 5, 20}}), // signal sizes + combine({{10, 4, 8, 2, 2}}, // input shapes + {{0, 1, 2, 3}}, // axes + {{}, {3, 10, 8, 6}}), // signal sizes DFTLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_DFT_6d, DFTLayerTest, - combine({{10, 4, 8, 2, 5, 2}, {1, 17, 12, 1, 7, 2}}, // input shapes - {{0, 1, 2, 3, 4}, {-1, -2, -3, -4, -5}}, // axes - {{}, {5, 2, 5, 20, 10}}), // signal sizes + combine({{10, 4, 8, 2, 5, 2}}, // input shapes + {{0, 1, 2, 3, 4}}, // axes + {{}, {3, 10, 8, 6, 2}}), // signal sizes DFTLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_DFT_6d_zero, DFTLayerTest, - combine({{10, 4, 8, 2, 5, 2}, {1, 17, 12, 1, 7, 2}}, // input shapes - {{}}, // axes - {{}}), // signal sizes + combine({{10, 4, 8, 2, 5, 2}}, // input shapes + {{}}, // axes + {{}}), // signal sizes DFTLayerTest::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/rdft.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/rdft.cpp new file mode 100644 index 00000000000..e0cf68e54a0 --- /dev/null +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/rdft.cpp @@ -0,0 +1,102 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +namespace { + +const std::vector opTypes = { + ngraph::helpers::DFTOpType::FORWARD, + ngraph::helpers::DFTOpType::INVERSE, +}; + +const std::vector inputPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, +}; + +const auto combine = [](const std::vector& inputShapes, + const std::vector>& axes, + const std::vector>& signalSizes) { + return testing::Combine(testing::ValuesIn(inputShapes), + testing::ValuesIn(inputPrecisions), + testing::ValuesIn(axes), + testing::ValuesIn(signalSizes), + testing::ValuesIn(opTypes), + testing::Values(CommonTestUtils::DEVICE_GPU)); +}; + +using namespace LayerTestsDefinitions; + +// RDFT can support 1d +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_1d, + RDFTLayerTest, + testing::Combine(testing::Values(InferenceEngine::SizeVector{10}), + testing::ValuesIn(inputPrecisions), + testing::Values(std::vector{0}), + testing::Values(std::vector{}), + testing::Values(ngraph::helpers::DFTOpType::FORWARD), + testing::Values(CommonTestUtils::DEVICE_GPU)), + RDFTLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_3d, + RDFTLayerTest, + combine({{10, 4, 2}}, // input shapes + {{0, 1}}, // axes + {{}, {3, 10}}), // signal sizes + RDFTLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d, + RDFTLayerTest, + combine({{10, 4, 8, 2}}, // input shapes + {{0, 1, 2}}, // axes + {{}, {3, 10, 8}}), // signal sizes + RDFTLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d_negative_reversed_axes, + RDFTLayerTest, + combine({{10, 4, 8, 2}}, // input shapes + {{-1, -2, -3}}, // axes + {{}, {8, 10, 3}}), // signal sizes + RDFTLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d_single_axis, + RDFTLayerTest, + combine({{10, 4, 8, 2}}, // input shapes + {{0}, {1}, {2}}, // axes + {{}, {1}, {5}, {20}}), // signal sizes + RDFTLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_5d, + RDFTLayerTest, + combine({{10, 4, 8, 2, 2}}, // input shapes + {{0, 1, 2, 3}}, // axes + {{}, {3, 10, 8, 6}}), // signal sizes + RDFTLayerTest::getTestCaseName); + +// RDFT can support last axis +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_5d_last_axis, + RDFTLayerTest, + testing::Combine(testing::Values(InferenceEngine::SizeVector{10, 4, 8, 2, 5}), + testing::ValuesIn(inputPrecisions), + testing::ValuesIn(std::vector>{{{0, 1, 2, 3, 4}}}), + testing::ValuesIn(std::vector>{{}, {3, 10, 8, 6, 2}}), + testing::Values(ngraph::helpers::DFTOpType::FORWARD), + testing::Values(CommonTestUtils::DEVICE_GPU)), + RDFTLayerTest::getTestCaseName); + +// IRDFT can support 6d +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_6d, + RDFTLayerTest, + testing::Combine(testing::Values(InferenceEngine::SizeVector{10, 4, 8, 2, 5, 2}), + testing::ValuesIn(inputPrecisions), + testing::ValuesIn(std::vector>{{{0, 1, 2, 3, 4}}}), + testing::ValuesIn(std::vector>{{}, {3, 10, 8, 6, 2}}), + testing::Values(ngraph::helpers::DFTOpType::INVERSE), + testing::Values(CommonTestUtils::DEVICE_GPU)), + RDFTLayerTest::getTestCaseName); + +} // namespace