diff --git a/src/bindings/python/tests/test_ngraph/test_dft.py b/src/bindings/python/tests/test_ngraph/test_dft.py index 269a878793e..3176fd1f99f 100644 --- a/src/bindings/python/tests/test_ngraph/test_dft.py +++ b/src/bindings/python/tests/test_ngraph/test_dft.py @@ -2,7 +2,8 @@ # Copyright (C) 2018-2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import openvino.runtime.opset8 as ov +import openvino.runtime.opset9 as ov +from openvino.runtime import Shape import numpy as np from tests.runtime import get_runtime diff --git a/src/bindings/python/tests/test_ngraph/test_rdft.py b/src/bindings/python/tests/test_ngraph/test_rdft.py new file mode 100644 index 00000000000..3c3392f72aa --- /dev/null +++ b/src/bindings/python/tests/test_ngraph/test_rdft.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import openvino.runtime.opset9 as ov +from openvino.runtime import Shape +import numpy as np +from tests.runtime import get_runtime + + +np.random.seed(0) + + +def test_rdft_1d(): + runtime = get_runtime() + input_size = 50 + shape = [input_size] + data = np.random.uniform(0, 1, shape).astype(np.float32) + param = ov.parameter(Shape(shape), name="input", dtype=np.float32) + input_axes = ov.constant(np.array([0], dtype=np.int64)) + + node = ov.rdft(param, input_axes) + computation = runtime.computation(node, param) + actual = computation(data) + np_results = np.fft.rfft(data) + expected_results = np.stack((np_results.real, np_results.imag), axis=-1) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0001) + + +def test_irdft_1d(): + runtime = get_runtime() + signal_size = 50 + shape = [signal_size // 2 + 1, 2] + data = np.random.uniform(0, 1, shape).astype(np.float32) + param = ov.parameter(Shape(shape), name="input", dtype=np.float32) + input_axes = ov.constant(np.array([0], dtype=np.int64)) + node = ov.irdft(param, input_axes, ov.constant(np.array([signal_size], dtype=np.int64))) + computation = runtime.computation(node, param) + actual = computation(data) + expected_results = np.fft.irfft(data[:, 0] + 1j * data[:, 1], signal_size) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0001) + + +def test_rdft_2d(): + runtime = get_runtime() + shape = [100, 128] + data = np.random.uniform(0, 1, shape).astype(np.float32) + param = ov.parameter(Shape(shape), name="input", dtype=np.float32) + axes = [0, 1] + input_axes = ov.constant(np.array(axes, dtype=np.int64)) + node = ov.rdft(param, input_axes) + computation = runtime.computation(node, param) + actual = computation(data) + np_results = np.fft.rfftn(data, axes=axes) + expected_results = np.stack((np_results.real, np_results.imag), axis=-1) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0007) + + +def test_rdft_2d_signal_size(): + runtime = get_runtime() + shape = [100, 128] + data = np.random.uniform(0, 1, shape).astype(np.float32) + param = ov.parameter(Shape(shape), name="input", dtype=np.float32) + axes = [0, 1] + signal_size = [30, 40] + axes_node = ov.constant(np.array(axes, dtype=np.int64)) + signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64)) + node = ov.rdft(param, axes_node, signal_size_node) + computation = runtime.computation(node, param) + actual = computation(data) + np_results = np.fft.rfftn(data, s=signal_size, axes=axes) + expected_results = np.stack((np_results.real, np_results.imag), axis=-1) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0007) + + +def test_irdft_2d(): + runtime = get_runtime() + axes = [0, 1] + input_shape = [100, 65, 2] + data = np.random.uniform(0, 1, input_shape).astype(np.float32) + param = ov.parameter(Shape(input_shape), name="input", dtype=np.float32) + input_axes = ov.constant(np.array(axes, dtype=np.int64)) + node = ov.irdft(param, input_axes) + computation = runtime.computation(node, param) + actual = computation(data) + expected_results = np.fft.irfftn(data[:, :, 0] + 1j * data[:, :, 1], axes=axes) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0001) + + +def test_irdft_2d_signal_size(): + runtime = get_runtime() + axes = [0, 1] + input_shape = [100, 65, 2] + signal_size = [100, 65] + data = np.random.uniform(0, 1, input_shape).astype(np.float32) + param = ov.parameter(Shape(input_shape), name="input", dtype=np.float32) + input_axes = ov.constant(np.array(axes, dtype=np.int64)) + signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64)) + node = ov.irdft(param, input_axes, signal_size_node) + computation = runtime.computation(node, param) + actual = computation(data) + expected_results = np.fft.irfftn(data[:, :, 0] + 1j * data[:, :, 1], s=signal_size, axes=axes) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0001) + + +def test_rdft_4d(): + runtime = get_runtime() + shape = [1, 192, 36, 64] + data = np.random.uniform(0, 1, shape).astype(np.float32) + param = ov.parameter(Shape(shape), name="input", dtype=np.float32) + axes = [-2, -1] + input_axes = ov.constant(np.array(axes, dtype=np.int64)) + node = ov.rdft(param, input_axes) + computation = runtime.computation(node, param) + actual = computation(data) + np_results = np.fft.rfftn(data, axes=axes) + expected_results = np.stack((np_results.real, np_results.imag), axis=-1) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0007) + + +def test_rdft_4d_signal_size(): + runtime = get_runtime() + shape = [1, 192, 36, 64] + signal_size = [36, 64] + data = np.random.uniform(0, 1, shape).astype(np.float32) + param = ov.parameter(Shape(shape), name="input", dtype=np.float32) + axes = [-2, -1] + input_axes = ov.constant(np.array(axes, dtype=np.int64)) + signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64)) + node = ov.rdft(param, input_axes, signal_size_node) + computation = runtime.computation(node, param) + actual = computation(data) + np_results = np.fft.rfftn(data, signal_size, axes=axes) + expected_results = np.stack((np_results.real, np_results.imag), axis=-1) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0007) + + +def test_irdft_4d(): + runtime = get_runtime() + shape = [1, 192, 36, 33, 2] + data = np.random.uniform(0, 1, shape).astype(np.float32) + param = ov.parameter(Shape(shape), name="input", dtype=np.float32) + axes = [-2, -1] + input_axes = ov.constant(np.array(axes, dtype=np.int64)) + node = ov.irdft(param, input_axes) + computation = runtime.computation(node, param) + actual = computation(data) + expected_results = np.fft.irfftn(data[:, :, :, :, 0] + 1j * data[:, :, :, :, 1], axes=axes) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0001) + + +def test_irdft_4d_signal_size(): + runtime = get_runtime() + shape = [1, 192, 36, 33, 2] + signal_size = [36, 64] + data = np.random.uniform(0, 1, shape).astype(np.float32) + param = ov.parameter(Shape(shape), name="input", dtype=np.float32) + axes = [-2, -1] + input_axes = ov.constant(np.array(axes, dtype=np.int64)) + signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64)) + node = ov.irdft(param, input_axes, signal_size_node) + computation = runtime.computation(node, param) + actual = computation(data) + expected_results = np.fft.irfftn(data[:, :, :, :, 0] + 1j * data[:, :, :, :, 1], signal_size, axes=axes) + np.testing.assert_allclose(expected_results, actual[0], atol=0.0001) diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index f824285e4d5..e2f8d3a720c 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -141,6 +141,8 @@ const InferenceEngine::details::caseless_unordered_map type_t { "ShuffleChannels", Type::ShuffleChannels}, { "DFT", Type::DFT}, { "IDFT", Type::DFT}, + { "RDFT", Type::RDFT}, + { "IRDFT", Type::RDFT}, { "Abs", Type::Math}, { "Acos", Type::Math}, { "Acosh", Type::Math}, @@ -328,6 +330,8 @@ std::string NameFromType(const Type type) { return "ShuffleChannels"; case Type::DFT: return "DFT"; + case Type::RDFT: + return "RDFT"; case Type::Math: return "Math"; case Type::CTCLoss: diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index 88b64e9ad9e..a5a680969cc 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -79,6 +79,7 @@ enum class Type { Reference, ShuffleChannels, DFT, + RDFT, Math, CTCLoss, Bucketize, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.cpp new file mode 100644 index 00000000000..3a97d2f5581 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.cpp @@ -0,0 +1,447 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rdft_kernel.hpp" +#include + +namespace ov { +namespace intel_cpu { + +#define GET_OFF(field) offsetof(jit_dft_args, field) + +template +void jit_dft_kernel_f32::generate() { + using namespace Xbyak::util; + using Xbyak::Label; + using Xbyak::Xmm; + using Vmm = typename conditional3::type; + + this->preamble(); + + int input_type_size = 0; + int output_type_size = 0; + + switch (kernel_type_) { + case real_to_complex: + input_type_size = type_size; + output_type_size = complex_type_size(); + break; + case complex_to_complex: + input_type_size = complex_type_size(); + output_type_size = complex_type_size(); + break; + case complex_to_real: + input_type_size = complex_type_size(); + output_type_size = type_size; + break; + } + int vlen = cpu_isa_traits::vlen; + const int simd_size = vlen / output_type_size; + + mov(input_ptr, ptr[param1 + GET_OFF(input)]); + mov(input_size, ptr[param1 + GET_OFF(input_size)]); + mov(twiddles_ptr, ptr[param1 + GET_OFF(twiddles)]); + mov(output_start, ptr[param1 + GET_OFF(output_start)]); + mov(output_end, ptr[param1 + GET_OFF(output_end)]); + + // offset twiddles_ptr by input_size * complex_type_size() * output_start bytes + mov(signal_size, ptr[param1 + GET_OFF(signal_size)]); + mov(rax, signal_size); + lea(rax, ptr[rax * complex_type_size()]); + xor_(rdx, rdx); + mul(output_start); + add(twiddles_ptr, rax); + + // offset output_ptr by output_start * output_type_size bytes + mov(output_ptr, ptr[param1 + GET_OFF(output)]); + lea(output_ptr, ptr[output_ptr + output_type_size * output_start]); + + size_t reg_idx = 0; + Xmm xmm_signal_size = Xmm(reg_idx); + Vmm vmm_signal_size = Vmm(reg_idx); + if (is_inverse_) { + reg_idx++; + uni_vbroadcastss(Vmm(reg_idx), ptr[param1 + GET_OFF(signal_size)]); + uni_vcvtdq2ps(vmm_signal_size, Vmm(reg_idx)); + } + + Vmm vmm_neg_mask = Vmm(reg_idx); + Xmm xmm_neg_mask = Xmm(reg_idx); + if (kernel_type_ == complex_to_complex) { + reg_idx++; + if (!is_inverse_) { + mov(rax, 1ULL << 31); + } else { + mov(rax, 1ULL << 63); + } + uni_vmovq(xmm_neg_mask, rax); + uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask); + } + + mov(rax, signal_size); + and_(rax, 1); + setz(is_signal_size_even); + + Label loop_over_output; + Label loop_over_output_continue; + Label loop_simd; + Label loop_nonsimd; + + auto simd_loop = [this, vlen, simd_size, + input_type_size, reg_idx, + &vmm_signal_size, + &xmm_neg_mask, + &vmm_neg_mask] { + size_t idx = reg_idx; + Vmm result = Vmm(idx++); + Vmm inp_real = Vmm(idx++); + Vmm inp_imag = Vmm(idx++); + const Vmm& input = inp_real; + const Vmm& input_perm = inp_imag; + Vmm twiddles = Vmm(idx++); + const Vmm& cos = twiddles; + Vmm sin = Vmm(idx++); + Xmm tmp = Xmm(idx++); + + uni_vpxor(result, result, result); + + if (kernel_type_ == complex_to_complex && is_inverse_) { + mov(rdx, 1ULL << 63); + uni_vmovq(xmm_neg_mask, rdx); + uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask); + } + + Label loop; + L(loop); + { + if (kernel_type_ == real_to_complex) { + uni_vbroadcastss(inp_real, ptr[input_ptr]); + uni_vmovups(twiddles, ptr[twiddles_ptr]); + uni_vfmadd231ps(result, inp_real, twiddles); + + add(twiddles_ptr, vlen); + } else if (kernel_type_ == complex_to_real) { + uni_vbroadcastss(inp_real, ptr[input_ptr]); + uni_vbroadcastss(inp_imag, ptr[input_ptr + type_size]); + uni_vmovups(cos, ptr[twiddles_ptr]); + uni_vmovups(sin, ptr[twiddles_ptr + vlen]); + uni_vfmadd231ps(result, inp_real, cos); + uni_vfmadd231ps(result, inp_imag, sin); + + add(twiddles_ptr, 2 * vlen); + } else if (kernel_type_ == complex_to_complex) { + // output_real += input_real * cos(..) - input_imag * sin(..) + // output_imag += input_imag * cos(..) + input_real * sin(..) + uni_vbroadcastsd(input, ptr[input_ptr]); + uni_vpermilps(input_perm, input, 0b10110001); // swap real with imag + uni_vpxor(input_perm, input_perm, vmm_neg_mask); // negate imag part (or real part if is_inverse == true) + load_and_broadcast_every_other_elem(cos, twiddles_ptr, tmp); + load_and_broadcast_every_other_elem(sin, twiddles_ptr + vlen / 2, tmp); + uni_vfmadd231ps(result, input, cos); + uni_vfmadd231ps(result, input_perm, sin); + + add(twiddles_ptr, vlen); + } + + add(input_ptr, input_type_size); + + dec(input_size); + cmp(input_size, 0); + jne(loop, T_NEAR); + } + + if (is_inverse_) { + Label loop_backwards; + Label loop_backwards_exit; + + mov(input_size, signal_size); + sub(input_size, ptr[param1 + GET_OFF(input_size)]); + + if (kernel_type_ == complex_to_complex) { + mov(rdx, 1ULL << 31); + vmovq(xmm_neg_mask, rdx); + uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask); + } + + test(is_signal_size_even, 1); + jz(loop_backwards); + + sub(input_ptr, input_type_size); + + L(loop_backwards); + { + cmp(input_size, 0); + je(loop_backwards_exit, T_NEAR); + + sub(input_ptr, input_type_size); + if (kernel_type_ == complex_to_real) { + uni_vbroadcastss(inp_real, ptr[input_ptr]); + uni_vbroadcastss(inp_imag, ptr[input_ptr + type_size]); + uni_vmovups(cos, ptr[twiddles_ptr]); + uni_vmovups(sin, ptr[twiddles_ptr + vlen]); + + uni_vfmadd231ps(result, inp_real, cos); + uni_vfnmadd231ps(result, inp_imag, sin); + add(twiddles_ptr, 2 * vlen); + } else if (kernel_type_ == complex_to_complex) { + // output_real += input_real * cos(..) - input_imag * sin(..) + // output_imag += input_imag * cos(..) + input_real * sin(..) + uni_vbroadcastsd(input, ptr[input_ptr]); + uni_vpermilps(input_perm, input, 0b10110001); // swap real with imag + uni_vpxor(input_perm, input_perm, vmm_neg_mask); // negate imag part + load_and_broadcast_every_other_elem(cos, twiddles_ptr, tmp); + load_and_broadcast_every_other_elem(sin, twiddles_ptr + vlen / 2, tmp); + uni_vfmadd231ps(result, input, cos); + uni_vfmadd231ps(result, input_perm, sin); + add(twiddles_ptr, vlen); + } + + dec(input_size); + jmp(loop_backwards, T_NEAR); + } + L(loop_backwards_exit); + } + + if (is_inverse_) { + uni_vdivps(result, result, vmm_signal_size); + } + // store the results + uni_vmovups(ptr[output_ptr], result); + + add(output_ptr, vlen); + sub(output_end, simd_size); + }; + + auto nonsimd_loop = [this, + input_type_size, + output_type_size, + &xmm_signal_size, + reg_idx] { + size_t idx = reg_idx; + Xmm xmm_inp_real = Xbyak::Xmm(idx++); + Xmm xmm_inp_imag = Xbyak::Xmm(idx++); + Xmm xmm_real = Xbyak::Xmm(idx++); + Xmm xmm_imag = Xbyak::Xmm(idx++); + Xmm xmm_cos = Xbyak::Xmm(idx++); + Xmm xmm_sin = Xbyak::Xmm(idx++); + + if (kernel_type_ != complex_to_real) { + xorps(xmm_real, xmm_real); + xorps(xmm_imag, xmm_imag); + } else { + xorps(xmm_real, xmm_real); + } + + Label loop; + L(loop); + { + movss(xmm_cos, ptr[twiddles_ptr]); + movss(xmm_sin, ptr[twiddles_ptr + type_size]); + if (kernel_type_ == real_to_complex) { + movss(xmm_inp_real, ptr[input_ptr]); + + // output_real += input_real * cos(..) + mulss(xmm_cos, xmm_inp_real); + addss(xmm_real, xmm_cos); + + // output_imag += input_real * sin(..) + mulss(xmm_sin, xmm_inp_real); + addss(xmm_imag, xmm_sin); + } else if (kernel_type_ == complex_to_real) { + movss(xmm_inp_real, ptr[input_ptr]); + movss(xmm_inp_imag, ptr[input_ptr + type_size]); + + // output += real * cos(..) + imag * sin(..) + mulss(xmm_cos, xmm_inp_real); + mulss(xmm_sin, xmm_inp_imag); + addss(xmm_cos, xmm_sin); + addss(xmm_real, xmm_cos); + } else if (kernel_type_ == complex_to_complex) { + // output_real += input_real * cos(..) - input_imag * sin(..) + movss(xmm_inp_real, ptr[input_ptr]); + movss(xmm_inp_imag, ptr[input_ptr + type_size]); + mulss(xmm_inp_real, xmm_cos); + mulss(xmm_inp_imag, xmm_sin); + if (!is_inverse_) { + subss(xmm_inp_real, xmm_inp_imag); + } else { + addss(xmm_inp_real, xmm_inp_imag); + } + addss(xmm_real, xmm_inp_real); + + // output_imag += input_imag * cos(..) + input_real * sin(..) + movss(xmm_inp_real, ptr[input_ptr]); + movss(xmm_inp_imag, ptr[input_ptr + type_size]); + mulss(xmm_inp_imag, xmm_cos); + mulss(xmm_inp_real, xmm_sin); + if (!is_inverse_) { + addss(xmm_inp_imag, xmm_inp_real); + } else { + subss(xmm_inp_imag, xmm_inp_real); + } + addss(xmm_imag, xmm_inp_imag); + } + + // increment indexes for next iteration + add(twiddles_ptr, complex_type_size()); + add(input_ptr, input_type_size); + dec(input_size); + + // continue if input_size > 0 + cmp(input_size, 0); + jg(loop, T_NEAR); + } + if (is_inverse_) { + Label loop_backwards; + Label loop_backwards_exit; + + mov(input_size, signal_size); + sub(input_size, ptr[param1 + GET_OFF(input_size)]); + + test(is_signal_size_even, 1); + jz(loop_backwards); + + sub(input_ptr, input_type_size); + + L(loop_backwards); + { + cmp(input_size, 0); + je(loop_backwards_exit); + + sub(input_ptr, input_type_size); + + movss(xmm_cos, ptr[twiddles_ptr]); + movss(xmm_sin, ptr[twiddles_ptr + type_size]); + movss(xmm_inp_real, ptr[input_ptr]); + movss(xmm_inp_imag, ptr[input_ptr + type_size]); + + if (kernel_type_ == complex_to_real) { + // output += real * cos(..) - imag * sin(..) + mulss(xmm_cos, xmm_inp_real); + mulss(xmm_sin, xmm_inp_imag); + subss(xmm_cos, xmm_sin); + addss(xmm_real, xmm_cos); + } else if (kernel_type_ == complex_to_complex) { + // output_real += input_real * cos(..) - input_imag * sin(..) + movss(xmm_inp_real, ptr[input_ptr]); + movss(xmm_inp_imag, ptr[input_ptr + type_size]); + mulss(xmm_inp_real, xmm_cos); + mulss(xmm_inp_imag, xmm_sin); + subss(xmm_inp_real, xmm_inp_imag); + addss(xmm_real, xmm_inp_real); + + // output_imag += input_imag * cos(..) + input_real * sin(..) + movss(xmm_inp_real, ptr[input_ptr]); + movss(xmm_inp_imag, ptr[input_ptr + type_size]); + mulss(xmm_inp_imag, xmm_cos); + mulss(xmm_inp_real, xmm_sin); + addss(xmm_inp_imag, xmm_inp_real); + addss(xmm_imag, xmm_inp_imag); + } + + add(twiddles_ptr, complex_type_size()); + dec(input_size); + jmp(loop_backwards); + } + L(loop_backwards_exit); + } + + if (kernel_type_ == complex_to_real) { + if (is_inverse_) { + divss(xmm_real, xmm_signal_size); + } + // store the result + movss(ptr[output_ptr], xmm_real); + } else { + if (is_inverse_) { + divss(xmm_real, xmm_signal_size); + divss(xmm_imag, xmm_signal_size); + } + // store the results + movss(ptr[output_ptr], xmm_real); + movss(ptr[output_ptr + type_size], xmm_imag); + } + + add(output_ptr, output_type_size); + dec(output_end); + }; + + L(loop_over_output); + { + mov(input_ptr, ptr[param1 + GET_OFF(input)]); + mov(input_size, ptr[param1 + GET_OFF(input_size)]); + + cmp(output_end, simd_size); + jae(loop_simd, T_NEAR); + + jmp(loop_nonsimd, T_NEAR); + + L(loop_simd); + simd_loop(); + jmp(loop_over_output_continue, T_NEAR); + + L(loop_nonsimd); + nonsimd_loop(); + + L(loop_over_output_continue); + cmp(output_end, 0); + ja(loop_over_output, T_NEAR); + } + + this->postamble(); +} + +template +void jit_dft_kernel_f32::uni_vbroadcastsd(const Xbyak::Xmm& x, const Xbyak::Operand& op) { + movsd(x, op); + shufpd(x, x, 0x0); +} + +template +void jit_dft_kernel_f32::uni_vbroadcastsd(const Xbyak::Ymm& x, const Xbyak::Operand& op) { + vbroadcastsd(x, op); +} + +template +void jit_dft_kernel_f32::uni_vpermilps(const Xbyak::Xmm& x, const Xbyak::Operand& op, int8_t control) { + movups(x, op); + shufps(x, x, control); +} + +template +void jit_dft_kernel_f32::uni_vpermilps(const Xbyak::Ymm& x, const Xbyak::Operand& op, int8_t control) { + vpermilps(x, op, control); +} + +template +void jit_dft_kernel_f32::load_and_broadcast_every_other_elem(const Xbyak::Zmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) { + for (int i = 0; i < 4; i++) { + movq(tmp, ptr[reg_exp + type_size * i * 2]); + shufps(tmp, tmp, 0b01010000); + vinsertf32x4(x, x, tmp, i); + } +} + +template +void jit_dft_kernel_f32::load_and_broadcast_every_other_elem(const Xbyak::Ymm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) { + for (int i = 0; i < 2; i++) { + movq(tmp, ptr[reg_exp + type_size * i * 2]); + shufps(tmp, tmp, 0b01010000); + vinsertf128(x, x, tmp, i); + } +} + +template +void jit_dft_kernel_f32::load_and_broadcast_every_other_elem(const Xbyak::Xmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) { + movq(x, ptr[reg_exp]); + shufps(x, x, 0b01010000); +} + +template struct jit_dft_kernel_f32; +template struct jit_dft_kernel_f32; +template struct jit_dft_kernel_f32; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.hpp new file mode 100644 index 00000000000..19dc090bb1f --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.hpp @@ -0,0 +1,96 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu/x64/jit_generator.hpp" +#include + +namespace ov { +namespace intel_cpu { + +using namespace dnnl::impl; +using namespace dnnl::impl::cpu::x64; +using namespace dnnl::impl::utils; + +enum dft_type { + real_to_complex, + complex_to_complex, + complex_to_real, +}; + +template +size_t complex_type_size() { + return sizeof(T) * 2; +} + +struct jit_dft_args { + const void* input; + const void* twiddles; + void* output; + size_t input_size; + size_t signal_size; + size_t output_start; + size_t output_end; +}; + + +struct jit_dft_kernel { + jit_dft_kernel(bool is_inverse, enum dft_type type) : is_inverse_(is_inverse), kernel_type_(type) {} + + void (*ker_)(const jit_dft_args*); + + void operator()(const jit_dft_args* args) { + assert(ker_); + ker_(args); + } + + jit_dft_kernel() : ker_(nullptr) {} + virtual ~jit_dft_kernel() {} + + virtual void create_ker() = 0; + + bool is_inverse_; + enum dft_type kernel_type_; +}; + +template +struct jit_dft_kernel_f32 : public jit_dft_kernel, public jit_generator { + public: + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_dft_kernel_f32) + + jit_dft_kernel_f32(bool is_inverse, enum dft_type type) : jit_dft_kernel(is_inverse, type), jit_generator() {} + + void create_ker() override { + jit_generator::create_kernel(); + ker_ = (decltype(ker_))jit_ker(); + } + + void generate() override; + + private: + void uni_vbroadcastsd(const Xbyak::Xmm& x, const Xbyak::Operand& op); + void uni_vbroadcastsd(const Xbyak::Ymm& x, const Xbyak::Operand& op); + + void uni_vpermilps(const Xbyak::Xmm& x, const Xbyak::Operand& op, int8_t control); + void uni_vpermilps(const Xbyak::Ymm& x, const Xbyak::Operand& op, int8_t control); + + void load_and_broadcast_every_other_elem(const Xbyak::Zmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp); + void load_and_broadcast_every_other_elem(const Xbyak::Ymm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp); + void load_and_broadcast_every_other_elem(const Xbyak::Xmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp); + + int type_size = sizeof(float); + + Xbyak::Reg8 is_signal_size_even = al; + Xbyak::Reg64 input_ptr = rbx; + Xbyak::Reg64 input_size = r8; + Xbyak::Reg64 output_ptr = r9; + Xbyak::Reg64 twiddles_ptr = r10; + Xbyak::Reg64 signal_size = r11; + Xbyak::Reg64 output_start = r12; + Xbyak::Reg64 output_end = r13; +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/rdft.cpp b/src/plugins/intel_cpu/src/nodes/rdft.cpp new file mode 100644 index 00000000000..8bd1788b402 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/rdft.cpp @@ -0,0 +1,927 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rdft.h" +#include "ie_parallel.hpp" +#include "ie_precision.hpp" + +#include "utils/general_utils.h" +#include "common/cpu_memcpy.h" +#include +#include +#include + +using namespace dnnl; +using namespace InferenceEngine; + +namespace ov { +namespace intel_cpu { +namespace node { + + +static constexpr size_t DATA_INDEX = 0; +static constexpr size_t AXES_INDEX = 1; +static constexpr size_t SIGNAL_SIZE_INDEX = 2; +static constexpr double PI = 3.14159265358979323846; + + +bool RDFT::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const bool isRDFT = is_type(op); + const bool isIRDFT = is_type(op); + + if (!isRDFT && !isIRDFT) { + errorMessage = "Only opset9 RDFT/IRDFT operation is supported"; + return false; + } + } catch (...) { + return false; + } + return true; +} + +static void normalizeAxes(std::vector& axes, size_t rank) { + for (auto& axis : axes) { + if (axis < 0) { + axis += rank; + } + } +} + +static std::vector getDefaultSignalSizes(const VectorDims& inputShape, const std::vector& axes, bool inverse) { + std::vector signalSizes; + signalSizes.reserve(axes.size()); + + for (auto axis : axes) { + signalSizes.push_back(inputShape[axis]); + } + if (inverse) { + signalSizes[signalSizes.size() - 1] = 2 * (inputShape[axes.back()] - 1); + } + + return signalSizes; +} + +RDFT::RDFT(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + + std::string errorMsgPrefix = "RDFT layer with name '" + op->get_name() + "'"; + const size_t numInputs = getOriginalInputsNumber(); + if (numInputs != 2 && numInputs != 3) { + IE_THROW() << errorMsgPrefix << " has invalid number of input/output edges: " << numInputs; + } + + const auto axesRank = inputShapes[AXES_INDEX].getRank(); + if (axesRank != 1) { + IE_THROW() << errorMsgPrefix << " has invalid 'axes' input tensor with rank: " << axesRank; + } + + inverse = ov::is_type(op); + + std::shared_ptr signalSizesNode; + if (numInputs > 2) { + const auto signalSizeRank = inputShapes[SIGNAL_SIZE_INDEX].getRank(); + if (signalSizeRank != 1) { + IE_THROW() << errorMsgPrefix << " has invalid 'signalSize' input tensor with rank: " << signalSizeRank; + } + auto signalSizesNode = ov::as_type(op->get_input_node_ptr(2)); + if (!signalSizesNode) + return; + signalSizes = signalSizesNode->cast_vector(); + } + + auto axesNode = ov::as_type(op->get_input_node_ptr(1)); + if (!axesNode) + return; + + axes = axesNode->cast_vector(); + auto rank = inputShapes[DATA_INDEX].getRank() - inverse; + normalizeAxes(axes, rank); + + if (numInputs < 3) { + const auto& inputShape = inputShapes[DATA_INDEX].getStaticDims(); + signalSizes = getDefaultSignalSizes(inputShape, axes, inverse); + } +} + +void RDFT::getSupportedDescriptors() {} + +void RDFT::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + + const auto& dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); + if (!dataPrecision.is_float()) { + IE_THROW() << errorMsgPrefix << " has unsupported 'data' input precision: " << dataPrecision.name(); + } + + const auto& axesPrecision = getOriginalInputPrecisionAtPort(AXES_INDEX); + if (axesPrecision != Precision::I32 && axesPrecision != Precision::I64) { + IE_THROW() << errorMsgPrefix << " has unsupported 'axes' input precision: " << axesPrecision.name(); + } + + if (inputShapes.size() > SIGNAL_SIZE_INDEX) { + const auto& signalSizePrecision = getOriginalInputPrecisionAtPort(SIGNAL_SIZE_INDEX); + if (signalSizePrecision != Precision::I32 && signalSizePrecision != Precision::I64) { + IE_THROW() << errorMsgPrefix << " has unsupported 'signalSize' input precision: " << signalSizePrecision.name(); + } + } + + std::vector configurators({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}}); + if (inputShapes.size() > SIGNAL_SIZE_INDEX) + configurators.push_back({LayoutType::ncsp, Precision::I32}); + + addSupportedPrimDesc(configurators, {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); +} + +void RDFT::execute(dnnl::stream strm) { + const auto& inputMem = getParentEdgeAt(DATA_INDEX)->getMemory(); + const auto& outputMem = getChildEdgeAt(0)->getMemory(); + const auto& inputShape = inputMem.getStaticDims(); + const auto& outputShape = outputMem.getStaticDims(); + + auto inputPtr = reinterpret_cast(inputMem.GetPtr()); + auto outputPtr = reinterpret_cast(outputMem.GetPtr()); + + auto rank = inputShape.size() - inverse; + + if (axes.size() == 0) { + const auto& axesMem = getParentEdgeAt(AXES_INDEX)->getMemoryPtr(); + auto axesPtr = reinterpret_cast(axesMem->GetPtr()); + axes = std::vector(axesPtr, axesPtr + axesMem->getStaticDims()[0]); + normalizeAxes(axes, rank); + } + + if (signalSizes.size() == 0) { + if (SIGNAL_SIZE_INDEX < getOriginalInputsNumber()) { + const auto& signalSizeMem = getParentEdgeAt(SIGNAL_SIZE_INDEX)->getMemoryPtr(); + auto signalPtr = reinterpret_cast(signalSizeMem->GetPtr()); + signalSizes = std::vector(signalPtr, signalPtr + signalSizeMem->getStaticDims()[0]); + } else { + signalSizes = getDefaultSignalSizes(inputShape, axes, inverse); + } + } + + const auto& inputStrides = inputMem.GetDescWithType()->getStrides(); + const auto& outputStrides = outputMem.GetDescWithType()->getStrides(); + + if (twiddles.size() == 0) { + twiddles = executor->generateTwiddles(signalSizes, outputShape, axes); + } + + executor->execute(inputPtr, outputPtr, + twiddles, rank, + axes, signalSizes, + inputShape, outputShape, + inputStrides, outputStrides); +} + +bool RDFT::created() const { + return getType() == Type::RDFT; +} + +static void adjustInputSize(VectorDims& inputShape, + std::vector& signalSizes, + const VectorDims& outputShape, + const std::vector& axes, + bool isInverse) { + for (size_t i = 0; i < axes.size(); i++) { + auto axis = axes[i]; + size_t inputSize = inputShape[axis]; + size_t signalSize = signalSizes[i]; + if (signalSize <= inputSize) { + inputShape[axis] = signalSize; + } else if (!isInverse) { + IE_THROW() << "Signal size greater than input size is not supported yet"; + } + } + if (isInverse) { + inputShape[axes.back()] = signalSizes.back() / 2 + 1; + } +} + +void RDFTExecutor::execute(float* inputPtr, float* outputPtr, + const std::vector>& twiddles, + size_t rank, const std::vector& axes, + std::vector signalSizes, + VectorDims inputShape, const VectorDims& outputShape, + const VectorDims& inputStrides, const VectorDims& outputStrides) { + adjustInputSize(inputShape, signalSizes, outputShape, axes, isInverse); + + if (rank == 1) { + auto twiddlesPtr = twiddles[0].data(); + dftCommon(inputPtr, twiddlesPtr, outputPtr, + inputShape[0], signalSizes[0], outputShape[0], + isInverse ? complex_to_real : real_to_complex, + canUseFFT(signalSizes[0]), false); + } else { + if (!isInverse) + rdftNd(inputPtr, outputPtr, twiddles, axes, signalSizes, inputShape, inputStrides, outputShape, outputStrides); + else + irdftNd(inputPtr, outputPtr, twiddles, axes, signalSizes, inputShape, inputStrides, outputShape, outputStrides); + } +} + +static void coordsFromIndex(size_t index, std::vector& coords, const std::vector& shape, int excludeAxis) { + for (size_t i = coords.size(); i > 0; i--) { + if (excludeAxis == i - 1) { + coords[i - 1] = 0; + continue; + } + coords[i - 1] = index % shape[i - 1]; + index /= shape[i - 1]; + } +} + +static size_t getOffset(const std::vector& coords, const std::vector& strides) { + size_t offset = 0; + for (size_t i = 0; i < coords.size(); ++i) { + offset += coords[i] * strides[i]; + } + return offset; +} + +static void gatherReal(float* output, const float* input, size_t axis, + const std::vector& coords, + size_t size, const std::vector& strides) { + size_t inputOffset = getOffset(coords, strides); + + for (size_t i = 0; i < size; i++) { + output[i] = input[inputOffset]; + inputOffset += strides[axis]; + } +} + +static void gatherComplex(float* output, const float* input, size_t axis, + const std::vector& coords, + size_t size, const std::vector& strides) { + size_t inputOffset = getOffset(coords, strides); + + for (size_t i = 0; i < 2 * size; i += 2) { + output[i] = input[inputOffset]; + output[i + 1] = input[inputOffset + 1]; + inputOffset += strides[axis]; + } +} + +static void scatterReal(float* output, const float* input, size_t axis, + const std::vector& coords, + size_t size, const std::vector& strides) { + size_t offset = getOffset(coords, strides); + + for (size_t i = 0; i < size; i++) { + output[offset] = input[i]; + offset += strides[axis]; + } +} + +static void scatterComplex(float* output, const float* input, size_t axis, + const std::vector& coords, + size_t size, const std::vector& strides) { + size_t offset = getOffset(coords, strides); + + for (size_t i = 0; i < 2 * size; i += 2) { + output[offset] = input[i]; + output[offset + 1] = input[i + 1]; + offset += strides[axis]; + } +} + +static bool isPowerOfTwo(size_t n) { + return (n != 0) && (n & (n - 1)) == 0; +} + +static size_t dftSimdSize(int vlen) { + return vlen / (2 * sizeof(float)); +} + +bool RDFTExecutor::canUseFFT(size_t dim) { + return isPowerOfTwo(dim) && dim > 1; +} + +static void fftCopyInverseInputData(float* dst, float* src, size_t inputSize, size_t signalSize, bool parallelize) { + if (!parallelize) { + cpu_memcpy(dst, src, inputSize * complex_type_size()); + src = src + 2 * inputSize - 4; + for (size_t i = inputSize; i < signalSize; i++, src -= 2) { + dst[2 * i] = src[0]; + dst[2 * i + 1] = -src[1]; + } + } else { + parallel_for(signalSize, [&] (size_t i) { + if (i < inputSize) { + dst[2 * i] = src[2 * i]; + dst[2 * i + 1] = src[2 * i + 1]; + } else { + size_t src_idx = 2 * inputSize - 2 - i; + dst[2 * i] = src[2 * src_idx]; + dst[2 * i + 1] = -src[2 * src_idx + 1]; + } + }); + } +} + +static void fftCopyRealInputData(float* dst, float* src, size_t inputSize, bool parallelize) { + if (!parallelize) { + for (size_t i = 0; i < inputSize; i++) { + dst[2 * i] = src[i]; + dst[2 * i + 1] = 0; + } + } else { + parallel_for(inputSize, [&] (size_t i) { + dst[2 * i] = src[i]; + dst[2 * i + 1] = 0; + }); + } +} + +static void fftCopyInverseRealOutput(float* dst, float* src, size_t signalSize, bool parallelize) { + if (!parallelize) { + for (size_t i = 0; i < signalSize; i++) { + dst[i] = src[2 * i]; + } + } else { + parallel_for(signalSize, [&] (size_t i) { + dst[i] = src[2 * i]; + }); + } +} + +void RDFTExecutor::fft(float* input, const float* twiddlesPtr, float* output, + size_t inputSize, size_t signalSize, size_t outputSize, + enum dft_type type, bool parallelize) { + std::vector scratchSpace(4 * signalSize, 0); + + float* inputPtr = input; + float* outputPtr = &scratchSpace[2 * signalSize]; + + if (inputSize < signalSize || type == real_to_complex) { + if (isInverse) + fftCopyInverseInputData(&scratchSpace[0], input, inputSize, signalSize, parallelize); + else if (type == real_to_complex) + fftCopyRealInputData(&scratchSpace[0], input, inputSize, parallelize); + inputPtr = &scratchSpace[0]; + } + + size_t numBlocks = 0; + size_t blockSize = 0; + + auto blockIteration = [&] (size_t block) { + size_t inputOffset = block * blockSize; + size_t outputOffset = block * blockSize / 2; + float cos = twiddlesPtr[2 * block]; + float sin = twiddlesPtr[2 * block + 1]; + if (isInverse) + sin = -sin; + for (size_t pair = 0; pair < blockSize / 2; pair++) { + float evenReal = inputPtr[2 * (inputOffset + pair)]; + float evenImag = inputPtr[2 * (inputOffset + pair) + 1]; + float oddReal = inputPtr[2 * (inputOffset + blockSize / 2 + pair)]; + float oddImag = inputPtr[2 * (inputOffset + blockSize / 2 + pair) + 1]; + outputPtr[2 * (outputOffset + pair)] = evenReal + cos * oddReal - sin * oddImag; + outputPtr[2 * (outputOffset + pair) + 1] = evenImag + cos * oddImag + sin * oddReal; + outputPtr[2 * (outputOffset + signalSize / 2 + pair)] = evenReal - cos * oddReal + sin * oddImag; + outputPtr[2 * (outputOffset + signalSize / 2 + pair) + 1] = evenImag - cos * oddImag - sin * oddReal; + if (isInverse && numBlocks == signalSize / 2) { + outputPtr[2 * (outputOffset + pair)] /= signalSize; + outputPtr[2 * (outputOffset + pair) + 1] /= signalSize; + outputPtr[2 * (outputOffset + signalSize / 2 + pair)] /= signalSize; + outputPtr[2 * (outputOffset + signalSize / 2 + pair) + 1] /= signalSize; + } + } + }; + + for (numBlocks = 1; numBlocks < signalSize; numBlocks *= 2) { + blockSize = signalSize / numBlocks; + if (numBlocks == signalSize / 2 && outputSize == signalSize && type != complex_to_real) { + outputPtr = output; + } + if (parallelize) { + parallel_for(numBlocks, blockIteration); + } else { + for (size_t block = 0; block < numBlocks; block++) { + blockIteration(block); + } + } + twiddlesPtr += numBlocks * 2; + if (numBlocks == 1 && inputPtr == input) + inputPtr = &scratchSpace[0]; + std::swap(inputPtr, outputPtr); + } + + if (type == complex_to_real) { + fftCopyInverseRealOutput(output, inputPtr, signalSize, parallelize); + } else if (outputSize != signalSize) { + cpu_memcpy(output, inputPtr, outputSize * complex_type_size()); + } +} + +void RDFTExecutor::dftCommon(float* inputPtr, const float* twiddlesPtr, float* outputPtr, + size_t inputSize, size_t signalSize, size_t outputSize, + enum dft_type type, bool useFFT, bool parallelize) { + if (useFFT) { + fft(inputPtr, twiddlesPtr, outputPtr, + inputSize, signalSize, outputSize, + type, parallelize); + } else { + dft(inputPtr, twiddlesPtr, outputPtr, + inputSize, signalSize, outputSize, + type, parallelize); + } +} + +void RDFTExecutor::dftOnAxis(enum dft_type type, + float* inputPtr, float* outputPtr, + const float* twiddlesPtr, int axis, + size_t signalSize, + const VectorDims& inputShape, + const VectorDims& inputStrides, + const VectorDims& outputShape, + const VectorDims& outputStrides, + const std::vector& iterationRange) { + size_t inputSize = inputShape[axis]; + size_t outputSize = outputShape[axis]; + + void (*gather)(float* output, const float* input, + size_t axis, const std::vector& coords, + size_t size, const std::vector& strides) = nullptr; + void (*scatter)(float* output, const float* input, + size_t axis, const std::vector& coords, + size_t size, const std::vector& strides) = nullptr; + + size_t gatherSize = 0; + size_t scatterSize = 0; + + switch (type) { + case real_to_complex: + scatter = scatterComplex; + gather = gatherReal; + gatherSize = inputSize; + scatterSize = outputSize * 2; + break; + case complex_to_complex: + gather = gatherComplex; + scatter = scatterComplex; + gatherSize = inputSize * 2; + scatterSize = outputSize * 2; + break; + case complex_to_real: + gather = gatherComplex; + scatter = scatterReal; + gatherSize = inputSize * 2; + scatterSize = outputSize; + break; + } + + bool useFFT = canUseFFT(signalSize); + + size_t totalWorkSize = std::accumulate(iterationRange.begin(), + iterationRange.end(), + 1, std::multiplies()) / iterationRange[axis]; + bool parallelizeOuterAxes = totalWorkSize > signalSize; + + if (parallelizeOuterAxes) { + parallel_for(totalWorkSize, [&] (size_t i) { + std::vector coords(iterationRange.size(), 0); + std::vector gatherScatterBuffer(gatherSize + scatterSize); + float* gatherBuffer = &gatherScatterBuffer[0]; + float* scatterBuffer = &gatherScatterBuffer[gatherSize]; + coordsFromIndex(i, coords, iterationRange, axis); + gather(gatherBuffer, inputPtr, + axis, coords, + inputSize, inputStrides); + dftCommon(gatherBuffer, twiddlesPtr, scatterBuffer, + inputSize, signalSize, outputSize, + type, useFFT, !parallelizeOuterAxes); + scatter(outputPtr, scatterBuffer, axis, coords, outputSize, outputStrides); + }); + } else { + std::vector coords(iterationRange.size(), 0); + std::vector gatherScatterBuffer(gatherSize + scatterSize); + float* gatherBuffer = &gatherScatterBuffer[0]; + float* scatterBuffer = &gatherScatterBuffer[gatherSize]; + for (size_t i = 0; i < totalWorkSize; i++) { + coordsFromIndex(i, coords, iterationRange, axis); + gather(gatherBuffer, inputPtr, + axis, coords, + inputSize, inputStrides); + dftCommon(gatherBuffer, twiddlesPtr, scatterBuffer, + inputSize, signalSize, outputSize, + type, useFFT, !parallelizeOuterAxes); + scatter(outputPtr, scatterBuffer, axis, coords, outputSize, outputStrides); + } + } +} + +// N-dimensional real DFT +void RDFTExecutor::rdftNd(float* inputPtr, float* outputPtr, + const std::vector>& twiddles, + const std::vector& axes, + const std::vector& signalSizes, + const VectorDims& inputShape, + const VectorDims& inputStrides, + const VectorDims& outputShape, + const VectorDims& outputStrides) { + const std::vector iterationRange(outputShape.begin(), outputShape.end() - 1); + + dftOnAxis(real_to_complex, inputPtr, outputPtr, + twiddles.back().data(), axes.back(), + signalSizes.back(), + inputShape, inputStrides, + outputShape, outputStrides, + iterationRange); + inputPtr = outputPtr; + + for (size_t i = 0; i < axes.size() - 1; i++) { + auto axis = axes[i]; + dftOnAxis(complex_to_complex, inputPtr, outputPtr, + twiddles[i].data(), axis, + signalSizes[i], + outputShape, outputStrides, + outputShape, outputStrides, + iterationRange); + } +} + +// N-dimensional real inverse DFT +void RDFTExecutor::irdftNd(float* inputPtr, float* outputPtr, + const std::vector>& twiddles, + const std::vector& axes, + const std::vector& signalSizes, + const VectorDims& inputShape, + const VectorDims& originalInputStrides, + const VectorDims& outputShape, + const VectorDims& outputStrides) { + const std::vector iterationRange(inputShape.begin(), inputShape.end() - 1); + + if (axes.size() == 1) { + dftOnAxis(complex_to_real, inputPtr, outputPtr, + twiddles[0].data(), axes[0], + signalSizes[0], + inputShape, originalInputStrides, + outputShape, outputStrides, + iterationRange); + return; + } + + float* output = outputPtr; + std::vector tmp; + size_t inputShapeSize = std::accumulate(inputShape.begin(), inputShape.end(), 1, std::multiplies()); + size_t outputShapeSize = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies()); + if (inputShapeSize > outputShapeSize) { + tmp.resize(inputShapeSize); + output = &tmp[0]; + } + + std::vector inputStrides(originalInputStrides.size()); + inputStrides[originalInputStrides.size() - 1] = 1; + for (size_t i = inputStrides.size() - 1; i > 0; i--) { + inputStrides[i - 1] = inputStrides[i] * inputShape[i]; + } + + for (size_t i = 0; i < axes.size() - 1; i++) { + auto axis = axes[i]; + dftOnAxis(complex_to_complex, inputPtr, output, + twiddles[i].data(), axis, + signalSizes[i], + inputShape, originalInputStrides, + inputShape, inputStrides, + iterationRange); + inputPtr = output; + } + dftOnAxis(complex_to_real, inputPtr, outputPtr, + twiddles.back().data(), axes.back(), + signalSizes.back(), + inputShape, inputStrides, + outputShape, outputStrides, + iterationRange); +} + +std::vector RDFTExecutor::generateTwiddlesFFT(size_t N) { + std::vector twiddles; + for (size_t numBlocks = 1; numBlocks < N; numBlocks *= 2) { + for (size_t block = 0; block < numBlocks; block++) { + double angle = 2 * PI * block / (numBlocks * 2); + twiddles.push_back(std::cos(angle)); + twiddles.push_back(-std::sin(angle)); + } + } + return twiddles; +} + +std::vector RDFTExecutor::generateTwiddlesCommon(size_t signalSize, size_t outputSize, + enum dft_type type, bool useFFT) { + if (useFFT) { + return generateTwiddlesFFT(signalSize); + } + return generateTwiddlesDFT(signalSize, outputSize, type); +} + +std::vector> RDFTExecutor::generateTwiddles(const std::vector& signalSizes, + const std::vector& outputShape, + const std::vector& axes) { + std::vector> twiddles; + twiddles.reserve(axes.size()); + for (size_t i = 0; i < axes.size(); i++) { + auto axis = axes[i]; + size_t N = signalSizes[i]; + size_t K = outputShape[axis]; + auto type = complex_to_complex; + if (i == axes.size() - 1) + type = isInverse ? complex_to_real : real_to_complex; + twiddles.push_back(generateTwiddlesCommon(N, K, type, canUseFFT(N))); + } + return twiddles; +} + +struct RDFTJitExecutor : public RDFTExecutor { + RDFTJitExecutor(bool inverse, NodeDesc* primDesc) : RDFTExecutor(inverse) { + enum dft_type rdftType = isInverse ? complex_to_real : real_to_complex; + if (mayiuse(cpu::x64::avx512_core)) { + rdftKernel.reset(new jit_dft_kernel_f32(isInverse, rdftType)); + dftKernel.reset(new jit_dft_kernel_f32(isInverse, complex_to_complex)); + vlen = cpu_isa_traits::vlen; + primDesc->setImplementationType(jit_avx512); + } else if (mayiuse(cpu::x64::avx2)) { + rdftKernel.reset(new jit_dft_kernel_f32(isInverse, rdftType)); + dftKernel.reset(new jit_dft_kernel_f32(isInverse, complex_to_complex)); + vlen = cpu_isa_traits::vlen; + primDesc->setImplementationType(jit_avx2); + } else if (mayiuse(cpu::x64::sse41)) { + rdftKernel.reset(new jit_dft_kernel_f32(isInverse, rdftType)); + dftKernel.reset(new jit_dft_kernel_f32(isInverse, complex_to_complex)); + vlen = cpu_isa_traits::vlen; + primDesc->setImplementationType(jit_sse42); + } else { + IE_THROW() << "Can't create RDFT kernel"; + } + + if (rdftKernel) + rdftKernel->create_ker(); + if (dftKernel) + dftKernel->create_ker(); + } + + std::vector generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) override { + std::vector twiddles(inputSize * outputSize * 2); + int simdSize = vlen / sizeof(float); + if (type == real_to_complex || type == complex_to_complex) { + simdSize /= 2; // there are two floats per one complex element in the output + } + + parallel_for2d(outputSize / simdSize, inputSize, [&] (size_t K, size_t n) { + if (type == real_to_complex) { + for (size_t k = 0; k < simdSize; k++) { + double angle = 2 * PI * (K * simdSize + k) * n / inputSize; + twiddles[((K * inputSize + n) * simdSize + k) * 2] = std::cos(angle); + twiddles[((K * inputSize + n) * simdSize + k) * 2 + 1] = -std::sin(angle); + } + } else if (type == complex_to_real || type == complex_to_complex) { + for (size_t k = 0; k < simdSize; k++) { + double angle = 2 * PI * (K * simdSize + k) * n / inputSize; + twiddles[(K * inputSize + n) * 2 * simdSize + k] = std::cos(angle); + } + for (size_t k = 0; k < simdSize; k++) { + double angle = 2 * PI * (K * simdSize + k) * n / inputSize; + twiddles[((K * inputSize + n) * 2 + 1) * simdSize + k] = -std::sin(angle); + } + } + }); + if ((outputSize % simdSize) != 0) { + size_t start = (outputSize / simdSize) * simdSize; + parallel_for2d(outputSize - start, inputSize, [&] (size_t k, size_t n) { + k += start; + double angle = 2 * PI * k * n / inputSize; + twiddles[2 * (k * inputSize + n)] = std::cos(angle); + twiddles[2 * (k * inputSize + n) + 1] = -std::sin(angle); + }); + } + return twiddles; + } + + void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr, + size_t inputSize, size_t signalSize, size_t outputSize, + enum dft_type type, bool parallelize) override { + jit_dft_kernel* kernel = type == complex_to_complex ? dftKernel.get() : rdftKernel.get(); + if (parallelize) { + const int cachelineSize = 64; + size_t blockSize = 4 * cachelineSize / sizeof(float); + size_t numBlocks = (outputSize + blockSize - 1) / blockSize; + parallel_nt(numBlocks, [&] (size_t i, size_t nthr) { + if (numBlocks > nthr) { + auto newBlockSize = (((outputSize / nthr) + blockSize - 1) / blockSize) * blockSize; + blockSize = newBlockSize; + numBlocks = nthr; + } + jit_dft_args args{}; + args.input = inputPtr, + args.twiddles = twiddlesPtr, + args.output = outputPtr, + args.input_size = inputSize, + args.signal_size = signalSize, + args.output_start = i * blockSize, + args.output_end = std::min(outputSize - i * blockSize, blockSize), + (*kernel)(&args); + }); + } else { + jit_dft_args args{}; + args.input = inputPtr, + args.twiddles = twiddlesPtr, + args.output = outputPtr, + args.input_size = inputSize, + args.signal_size = signalSize, + args.output_start = 0, + args.output_end = outputSize, + (*kernel)(&args); + } + } + + std::unique_ptr rdftKernel = nullptr; + std::unique_ptr dftKernel = nullptr; + + int vlen; +}; + + +struct RDFTRefExecutor : public RDFTExecutor { + RDFTRefExecutor(bool inverse) : RDFTExecutor(inverse) {} + + private: + std::vector generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) override { + std::vector twiddles(inputSize * outputSize * 2); + parallel_for2d(outputSize, inputSize, [&] (size_t k, size_t n) { + double angle = 2 * PI * k * n / inputSize; + if (!isInverse) + angle = -angle; + twiddles[(k * inputSize + n) * 2] = std::cos(angle); + twiddles[(k * inputSize + n) * 2 + 1] = std::sin(angle); + }); + return twiddles; + } + + void dftRealToComplex(float* inputPtr, const float* twiddlesPtr, float* outputPtr, + size_t inputSize, size_t outputSize, bool parallelize) { + auto dftIteration = [&] (size_t k) { + float real = 0, imag = 0; + for (size_t n = 0; n < inputSize; n++) { + float cos = twiddlesPtr[2 * (k * inputSize + n)]; + float sin = twiddlesPtr[2 * (k * inputSize + n) + 1]; + real += inputPtr[n] * cos; + imag += inputPtr[n] * sin; + } + outputPtr[2 * k] = real; + outputPtr[2 * k + 1] = imag; + }; + if (parallelize) { + parallel_for(outputSize, dftIteration); + } else { + for (size_t k = 0; k < outputSize; k++) { + dftIteration(k); + } + } + } + + void dftComplexToComplex(float* inputPtr, const float* twiddlesPtr, float* outputPtr, + size_t inputSize, size_t signalSize, size_t outputSize, bool parallelize) { + auto dftIteration = [&] (size_t k) { + float real = 0, imag = 0; + for (size_t n = 0; n < inputSize; n++) { + float cos = twiddlesPtr[2 * (k * outputSize + n)]; + float sin = twiddlesPtr[2 * (k * outputSize + n) + 1]; + float inputReal = inputPtr[2 * n]; + float inputImag = inputPtr[2 * n + 1]; + real += inputReal * cos - inputImag * sin; + imag += inputImag * cos + inputReal * sin; + } + if (isInverse) { + float* inp = inputPtr + 2 * (inputSize - 2 + outputSize % 2); + for (int n = inputSize; n < signalSize; n++, inp -= 2) { + float cos = twiddlesPtr[2 * (k * outputSize + n)]; + float sin = twiddlesPtr[2 * (k * outputSize + n) + 1]; + float inputReal = inp[0]; + float inputImag = -inp[1]; + real += inputReal * cos - inputImag * sin; + imag += inputImag * cos + inputReal * sin; + } + real /= outputSize; + imag /= outputSize; + } + outputPtr[2 * k] = real; + outputPtr[2 * k + 1] = imag; + }; + if (parallelize) { + parallel_for(outputSize, dftIteration); + } else { + for (size_t k = 0; k < outputSize; k++) { + dftIteration(k); + } + } + } + + void dftComplexToReal(float* inputPtr, const float* twiddlesPtr, float* outputPtr, + size_t inputSize, size_t signalSize, size_t outputSize, bool parallelize) { + auto dftIteration = [&] (size_t k) { + float real = 0; + for (size_t n = 0; n < inputSize; n++) { + float cos = twiddlesPtr[2 * (k * outputSize + n)]; + float sin = twiddlesPtr[2 * (k * outputSize + n) + 1]; + float inputReal = inputPtr[2 * n]; + float inputImag = inputPtr[2 * n + 1]; + real += inputReal * cos - inputImag * sin; + } + if (isInverse) { + float* inp = inputPtr + 2 * (inputSize - 2 + outputSize % 2); + for (size_t n = inputSize; n < signalSize; n++, inp -= 2) { + float cos = twiddlesPtr[2 * (k * outputSize + n)]; + float sin = twiddlesPtr[2 * (k * outputSize + n) + 1]; + float inputReal = inp[0]; + float inputImag = inp[1]; + real += inputReal * cos + inputImag * sin; + } + real /= outputSize; + } + outputPtr[k] = real; + }; + if (parallelize) { + parallel_for(outputSize, dftIteration); + } else { + for (int k = 0; k < outputSize; k++) { + dftIteration(k); + } + } + } + + void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr, + size_t inputSize, size_t signalSize, size_t outputSize, + enum dft_type type, bool parallelize) override { + if (type == real_to_complex) { + dftRealToComplex(inputPtr, twiddlesPtr, outputPtr, inputSize, outputSize, parallelize); + } else if (type == complex_to_complex) { + dftComplexToComplex(inputPtr, twiddlesPtr, outputPtr, inputSize, signalSize, outputSize, parallelize); + } else if (type == complex_to_real) { + dftComplexToReal(inputPtr, twiddlesPtr, outputPtr, inputSize, signalSize, outputSize, parallelize); + } + } +}; + +struct RDFTKey { + bool isInverse; + + size_t hash() const { + using namespace dnnl::impl::primitive_hashing; + + size_t seed = 0; + seed = hash_combine(seed, isInverse); + return seed; + } + + bool operator==(const RDFTKey& rhs) const { + return isInverse == rhs.isInverse; + } +}; + +void RDFT::prepareParams() { + RDFTKey key{}; + key.isInverse = inverse; + + auto buildExecutor = [&] (const RDFTKey& key) -> std::shared_ptr { + std::shared_ptr executor; + NodeDesc* primDesc = getSelectedPrimitiveDescriptor(); + if (mayiuse(cpu::x64::sse41)) { + executor = std::make_shared(key.isInverse, primDesc); + } else { + executor = std::make_shared(key.isInverse); + primDesc->setImplementationType(ref_any); + } + return executor; + }; + + auto cache = getRuntimeCache(); + auto result = cache->getOrCreate(key, buildExecutor); + executor = result.first; + if (axes.size() > 0 && signalSizes.size() > 0 && outputShapes[0].isStatic()) { + twiddles = executor->generateTwiddles(signalSizes, outputShapes[0].getStaticDims(), axes); + } +} +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/rdft.h b/src/plugins/intel_cpu/src/nodes/rdft.h new file mode 100644 index 00000000000..da177dec66e --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/rdft.h @@ -0,0 +1,99 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include "kernels/rdft_kernel.hpp" + +namespace ov { +namespace intel_cpu { +namespace node { + +struct RDFTExecutor { + public: + RDFTExecutor(bool inverse) : isInverse(inverse) {} + void execute(float* inputPtr, float* outputPtr, + const std::vector>& twiddles, + size_t rank, const std::vector& axes, + std::vector signalSizes, + VectorDims inputShape, const VectorDims& outputShape, + const VectorDims& inputStrides, const VectorDims& outputStrides); + + std::vector> generateTwiddles(const std::vector& signalSizes, + const std::vector& outputShape, + const std::vector& axes); + + protected: + bool isInverse; + + private: + virtual bool canUseFFT(size_t dim); + virtual void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr, + size_t inputSize, size_t signalSize, size_t outputSize, + enum dft_type type, bool parallelize) = 0; + virtual void fft(float* input, const float* twiddlesPtr, float* output, + size_t inputSize, size_t signalSize, size_t outputSize, + enum dft_type type, bool parallelize); + void dftCommon(float* inputPtr, const float* twiddlesPtr, float* outputPtr, + size_t inputSize, size_t signalSize, size_t outputSize, + enum dft_type type, bool useFFT, bool parallelize); + void dftOnAxis(enum dft_type type, + float* inputPtr, float* outputPtr, + const float* twiddlesPtr, int axis, + size_t signalSize, + const VectorDims& inputShape, + const VectorDims& inputStrides, + const VectorDims& outputShape, + const VectorDims& outputStrides, + const std::vector& iteration_range); + void rdftNd(float* inputPtr, float* outputPtr, + const std::vector>& twiddles, + const std::vector& axes, + const std::vector& signalSizes, + const VectorDims& inputShape, + const VectorDims& inputStrides, + const VectorDims& outputShape, + const VectorDims& outputStrides); + void irdftNd(float* inputPtr, float* outputPtr, + const std::vector>& twiddles, + const std::vector& axes, + const std::vector& signalSizes, + const VectorDims& inputShape, + const VectorDims& inputStrides, + const VectorDims& outputShape, + const VectorDims& outputStrides); + virtual std::vector generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) = 0; + std::vector generateTwiddlesFFT(size_t N); + std::vector generateTwiddlesCommon(size_t inputSize, size_t outputSize, + enum dft_type type, bool useFFT); +}; + +class RDFT : public Node { +public: + RDFT(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + + void getSupportedDescriptors() override; + void initSupportedPrimitiveDescriptors() override; + void prepareParams() override; + void execute(dnnl::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + std::string errorMsgPrefix; + bool inverse; + std::vector axes; + std::vector signalSizes; + std::vector> twiddles; + std::shared_ptr executor; +}; + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes_factory.cpp b/src/plugins/intel_cpu/src/nodes_factory.cpp index 2354bf375d6..2f41d3b935d 100644 --- a/src/plugins/intel_cpu/src/nodes_factory.cpp +++ b/src/plugins/intel_cpu/src/nodes_factory.cpp @@ -68,6 +68,7 @@ #include "nodes/log_softmax.h" #include "nodes/strided_slice.h" #include "nodes/dft.h" +#include "nodes/rdft.h" #include "nodes/non_max_suppression.h" #include "nodes/convert.h" #include "nodes/rnn.h" @@ -123,6 +124,7 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(MemoryOutput, Type::MemoryOutput); INTEL_CPU_NODE(Tile, Type::Tile); INTEL_CPU_NODE(DFT, Type::DFT); + INTEL_CPU_NODE(RDFT, Type::RDFT); INTEL_CPU_NODE(GatherTree, Type::GatherTree); INTEL_CPU_NODE(SpaceToDepth, Type::SpaceToDepth); INTEL_CPU_NODE(FullyConnected, Type::FullyConnected); diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rdft.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rdft.cpp new file mode 100644 index 00000000000..3ed297c28f1 --- /dev/null +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rdft.cpp @@ -0,0 +1,157 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/rdft.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +const std::vector opTypes = { + ngraph::helpers::DFTOpType::FORWARD, + ngraph::helpers::DFTOpType::INVERSE +}; + +static const std::vector inputPrecision = { + InferenceEngine::Precision::FP32, +}; + +const std::vector> shapesForward1d = { + {10}, + {64}, + {100}, +}; + + +const std::vector> signalSizes1d = { + {}, {10}, +}; + +//1D case doesn't work yet on reference implementation +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_RDFT_1d, RDFTLayerTest, + ::testing::Combine( + ::testing::ValuesIn(shapesForward1d), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(std::vector{0}), + ::testing::ValuesIn(signalSizes1d), + ::testing::Values(ngraph::helpers::DFTOpType::FORWARD), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); + +const std::vector> shapesInverse1d = { + {10, 2}, + {64, 2}, + {100, 2}, +}; + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_IRDFT_1d, RDFTLayerTest, + ::testing::Combine( + ::testing::ValuesIn(shapesInverse1d), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(std::vector{0}), + ::testing::ValuesIn(signalSizes1d), + ::testing::Values(ngraph::helpers::DFTOpType::INVERSE), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); + +const std::vector> shapesForward2d = { + {10, 15}, + {64, 32}, + {100, 16}, +}; + +const std::vector> axes2d = { + {0, 1}, {1, 0}, {-2, -1}, +}; + + +const std::vector> signalSizes2d = { + {}, {10, 10}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_2d, RDFTLayerTest, + ::testing::Combine( + ::testing::ValuesIn(shapesForward2d), + ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(axes2d), + ::testing::ValuesIn(signalSizes2d), + ::testing::Values(ngraph::helpers::DFTOpType::FORWARD), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); + +const std::vector> shapesInverse2d = { + {10, 15, 2}, + {64, 32, 2}, + {100, 32, 2}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_2d, RDFTLayerTest, + ::testing::Combine( + ::testing::ValuesIn(shapesInverse2d), + ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(axes2d), + ::testing::ValuesIn(signalSizes2d), + ::testing::Values(ngraph::helpers::DFTOpType::INVERSE), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); + +const std::vector> shapesForward4d = { + {1, 3, 10, 15}, + {1, 4, 64, 32}, +}; + +const std::vector> axes4d = { + {0, 1, 2, 3}, {1, 0, -2, -1} +}; + + +const std::vector> signalSizes4d = { + {}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d, RDFTLayerTest, + ::testing::Combine( + ::testing::ValuesIn(shapesForward4d), + ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(axes4d), + ::testing::ValuesIn(signalSizes4d), + ::testing::Values(ngraph::helpers::DFTOpType::FORWARD), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); + +const std::vector> axes4d_2d = { + {2, 3}, {1, -1} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d_axes_2d, RDFTLayerTest, + ::testing::Combine( + ::testing::ValuesIn(shapesForward4d), + ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(axes4d_2d), + ::testing::Values(std::vector{}), + ::testing::Values(ngraph::helpers::DFTOpType::FORWARD), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); + + +const std::vector> shapesInverse4d = { + {1, 3, 10, 15, 2}, + {1, 4, 64, 32, 2}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_4d, RDFTLayerTest, + ::testing::Combine( + ::testing::ValuesIn(shapesInverse4d), + ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(axes4d), + ::testing::ValuesIn(signalSizes4d), + ::testing::Values(ngraph::helpers::DFTOpType::INVERSE), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_4d_axes_2d, RDFTLayerTest, + ::testing::Combine( + ::testing::ValuesIn(shapesInverse4d), + ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(axes4d_2d), + ::testing::Values(std::vector{}), + ::testing::Values(ngraph::helpers::DFTOpType::INVERSE), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); + + + diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/rdft.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/rdft.cpp new file mode 100644 index 00000000000..0d1aec181c3 --- /dev/null +++ b/src/tests/functional/plugin/cpu/single_layer_tests/rdft.cpp @@ -0,0 +1,456 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/builders.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include +#include + +using namespace CPUTestUtils; +using namespace ov::test; +using namespace ov; + +namespace CPULayerTestsDefinitions { + +using RDFTTestCPUParams = std::tuple< + Shape, + std::vector, // axes + std::vector, // signal sizes + bool, // inverse + CPUSpecificParams>; + +class RDFTTestCPU : public testing::WithParamInterface, + virtual public test::SubgraphBaseTest, public CPUTestsBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + Shape shape; + std::vector axes; + std::vector signalSizes; + bool inverse; + CPUSpecificParams cpuParams; + + std::tie(shape, axes, signalSizes, inverse, cpuParams) = obj.param; + + std::ostringstream result; + result << "shape=" << shape + << "_axes=" << CommonTestUtils::vec2str(axes) + << "_signalSizes=" << CommonTestUtils::vec2str(signalSizes) + << "_isInverse=" << inverse + << CPUTestsBase::getTestCaseName(cpuParams); + return result.str(); + } + +protected: + void SetUp() override { + Shape shape; + std::vector axes; + std::vector signalSizes; + element::Type_t precision = element::f32; + bool inverse; + CPUSpecificParams cpuParams; + + std::tie(shape, axes, signalSizes, inverse, cpuParams) = GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + selectedType = makeSelectedTypeStr(selectedType, precision); + targetDevice = CommonTestUtils::DEVICE_CPU; + targetStaticShapes.push_back(std::vector{shape}); + + auto param = std::make_shared(precision, shape); + auto axesNode = opset9::Constant::create(element::i64, Shape{axes.size()}, axes); + std::shared_ptr rdft; + if (signalSizes.size() > 0) { + auto signalSizesNode = opset9::Constant::create(element::i64, Shape{signalSizes.size()}, signalSizes); + if (inverse) { + rdft = std::make_shared(param, axesNode, signalSizesNode); + } else { + rdft = std::make_shared(param, axesNode, signalSizesNode); + } + } else { + if (inverse) { + rdft = std::make_shared(param, axesNode); + } else { + rdft = std::make_shared(param, axesNode); + } + } + function = std::make_shared(rdft, ParameterVector{param}); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + const auto& funcInputs = function->inputs(); + inputs.clear(); + + for (int i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + runtime::Tensor tensor = test::utils::create_and_fill_tensor_normal_distribution(funcInput.get_element_type(), targetInputStaticShapes[0], 0, 1, 0); + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } +}; + +TEST_P(RDFTTestCPU, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); + CheckPluginRelatedResults(compiledModel, "RDFT"); +} + +namespace { + +CPUSpecificParams getCPUSpecificParams() { + if (InferenceEngine::with_cpu_x86_avx512_core()) { + return CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"}; + } else if (InferenceEngine::with_cpu_x86_avx2()) { + return CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"}; + } else if (InferenceEngine::with_cpu_x86_sse42()) { + return CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"}; + } else { + return CPUSpecificParams{{}, {}, {"ref"}, "ref"}; + } + return {}; +} + +auto cpuParams = getCPUSpecificParams(); + +std::vector getParams1D() { + if (InferenceEngine::with_cpu_x86_avx512_core()) { + return { + {{14}, {0}, {}, false, cpuParams}, + {{13}, {0}, {}, false, cpuParams}, + {{15}, {0}, {}, false, cpuParams}, + + {{30}, {0}, {}, false, cpuParams}, + {{29}, {0}, {}, false, cpuParams}, + {{31}, {0}, {}, false, cpuParams}, + + {{46}, {0}, {}, false, cpuParams}, + {{45}, {0}, {}, false, cpuParams}, + {{47}, {0}, {}, false, cpuParams}, + + {{126}, {0}, {}, false, cpuParams}, + {{510}, {0}, {}, false, cpuParams}, + {{1022}, {0}, {}, false, cpuParams}, + + {{9, 2}, {0}, {}, true, cpuParams}, + {{8, 2}, {0}, {}, true, cpuParams}, + {{10, 2}, {0}, {}, true, cpuParams}, + + {{17, 2}, {0}, {}, true, cpuParams}, + {{16, 2}, {0}, {}, true, cpuParams}, + {{18, 2}, {0}, {}, true, cpuParams}, + + {{25, 2}, {0}, {}, true, cpuParams}, + {{24, 2}, {0}, {}, true, cpuParams}, + {{26, 2}, {0}, {}, true, cpuParams}, + + {{129, 2}, {0}, {}, true, cpuParams}, + {{513, 2}, {0}, {}, true, cpuParams}, + {{1025, 2}, {0}, {}, true, cpuParams}, + + {{25, 2}, {0}, {32}, true, cpuParams}, + {{24, 2}, {0}, {16}, true, cpuParams}, + }; + } else if (InferenceEngine::with_cpu_x86_avx2()) { + return { + {{6}, {0}, {}, false, cpuParams}, + {{5}, {0}, {}, false, cpuParams}, + {{7}, {0}, {}, false, cpuParams}, + + {{38}, {0}, {}, false, cpuParams}, + {{37}, {0}, {}, false, cpuParams}, + {{39}, {0}, {}, false, cpuParams}, + + {{106}, {0}, {}, false, cpuParams}, + {{246}, {0}, {}, false, cpuParams}, + {{245}, {0}, {118}, false, cpuParams}, + + {{126}, {0}, {}, false, cpuParams}, + {{510}, {0}, {}, false, cpuParams}, + {{1022}, {0}, {}, false, cpuParams}, + + {{5, 2}, {0}, {}, true, cpuParams}, + {{4, 2}, {0}, {}, true, cpuParams}, + {{6, 2}, {0}, {}, true, cpuParams}, + + {{9, 2}, {0}, {}, true, cpuParams}, + {{8, 2}, {0}, {}, true, cpuParams}, + {{10, 2}, {0}, {}, true, cpuParams}, + + {{17, 2}, {0}, {}, true, cpuParams}, + {{33, 2}, {0}, {}, true, cpuParams}, + {{129, 2}, {0}, {}, true, cpuParams}, + {{257, 2}, {0}, {}, true, cpuParams}, + {{513, 2}, {0}, {}, true, cpuParams}, + + {{129, 2}, {0}, {126}, true, cpuParams}, + {{257, 2}, {0}, {254}, true, cpuParams}, + {{513, 2}, {0}, {510}, true, cpuParams}, + }; + } else { + return { + {{1}, {0}, {}, false, cpuParams}, + {{2}, {0}, {}, false, cpuParams}, + {{12}, {0}, {}, false, cpuParams}, + {{14}, {0}, {}, false, cpuParams}, + {{30}, {0}, {}, false, cpuParams}, + {{62}, {0}, {}, false, cpuParams}, + {{126}, {0}, {}, false, cpuParams}, + {{250}, {0}, {}, false, cpuParams}, + {{254}, {0}, {}, false, cpuParams}, + {{62}, {0}, {61}, false, cpuParams}, + {{126}, {0}, {40}, false, cpuParams}, + {{250}, {0}, {200}, false, cpuParams}, + {{254}, {0}, {10}, false, cpuParams}, + + {{2, 2}, {0}, {}, true, cpuParams}, + {{9, 2}, {0}, {}, true, cpuParams}, + {{10, 2}, {0}, {}, true, cpuParams}, + {{17, 2}, {0}, {}, true, cpuParams}, + {{33, 2}, {0}, {}, true, cpuParams}, + {{65, 2}, {0}, {}, true, cpuParams}, + {{129, 2}, {0}, {}, true, cpuParams}, + {{257, 2}, {0}, {}, true, cpuParams}, + {{33, 2}, {0}, {50}, true, cpuParams}, + {{65, 2}, {0}, {20}, true, cpuParams}, + {{129, 2}, {0}, {200}, true, cpuParams}, + {{257, 2}, {0}, {100}, true, cpuParams}, + }; + } + return {}; +} + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_1D, RDFTTestCPU, ::testing::ValuesIn(getParams1D()), RDFTTestCPU::getTestCaseName); + +std::vector getParams2D() { + if (InferenceEngine::with_cpu_x86_avx512_core()) { + return { + {{46, 10}, {0}, {}, false, cpuParams}, + {{45, 10}, {0}, {}, false, cpuParams}, + {{47, 10}, {0}, {}, false, cpuParams}, + + {{20, 126}, {1}, {}, false, cpuParams}, + {{20, 510}, {1}, {}, false, cpuParams}, + {{20, 1022}, {1}, {}, false, cpuParams}, + + {{48, 46}, {0, 1}, {}, false, cpuParams}, + {{32, 45}, {0, 1}, {}, false, cpuParams}, + {{64, 47}, {0, 1}, {}, false, cpuParams}, + + {{72, 126}, {0, 1}, {}, false, cpuParams}, + {{32, 510}, {0, 1}, {}, false, cpuParams}, + {{16, 1022}, {0, 1}, {}, false, cpuParams}, + + {{9, 10, 2}, {0}, {}, true, cpuParams}, + {{8, 10, 2}, {0}, {}, true, cpuParams}, + {{10, 20, 2}, {0}, {}, true, cpuParams}, + + {{10, 9, 2}, {1}, {}, true, cpuParams}, + {{10, 8, 2}, {1}, {}, true, cpuParams}, + {{20, 10, 2}, {1}, {}, true, cpuParams}, + + {{129, 16, 2}, {0}, {}, true, cpuParams}, + {{513, 32, 2}, {0}, {}, true, cpuParams}, + {{1025, 72, 2}, {0}, {}, true, cpuParams}, + + {{16, 129, 2}, {1}, {}, true, cpuParams}, + {{32, 513, 2}, {1}, {}, true, cpuParams}, + {{72, 1025, 2}, {1}, {}, true, cpuParams}, + + {{16, 129, 2}, {0, 1}, {}, true, cpuParams}, + {{32, 513, 2}, {0, 1}, {}, true, cpuParams}, + {{72, 1025, 2}, {0, 1}, {}, true, cpuParams}, + + {{16, 129, 2}, {0, 1}, {16, 200}, true, cpuParams}, + {{32, 513, 2}, {0, 1}, {32, 600}, true, cpuParams}, + {{72, 1025, 2}, {0, 1}, {72, 100}, true, cpuParams}, + }; + } else if (InferenceEngine::with_cpu_x86_avx2()) { + return { + {{38, 16}, {0}, {}, false, cpuParams}, + {{37, 8}, {0}, {}, false, cpuParams}, + {{39, 24}, {0}, {}, false, cpuParams}, + + {{16, 38}, {1}, {}, false, cpuParams}, + {{8, 37}, {1}, {}, false, cpuParams}, + {{24, 39}, {1}, {}, false, cpuParams}, + + {{16, 38}, {0, 1}, {}, false, cpuParams}, + {{8, 37}, {0, 1}, {}, false, cpuParams}, + {{24, 39}, {0, 1}, {}, false, cpuParams}, + + {{126, 32}, {0}, {}, false, cpuParams}, + {{510, 64}, {0}, {}, false, cpuParams}, + {{1022, 64}, {0}, {}, false, cpuParams}, + + {{126, 32}, {0, 1}, {}, false, cpuParams}, + {{510, 64}, {0, 1}, {}, false, cpuParams}, + {{1022, 64}, {0, 1}, {}, false, cpuParams}, + + {{38, 16, 2}, {0}, {}, true, cpuParams}, + {{37, 8, 2}, {0}, {}, true, cpuParams}, + {{39, 24, 2}, {0}, {}, true, cpuParams}, + + {{16, 38, 2}, {1}, {}, true, cpuParams}, + {{8, 37, 2}, {1}, {}, true, cpuParams}, + {{24, 39, 2}, {1}, {}, true, cpuParams}, + + {{16, 38, 2}, {0, 1}, {}, true, cpuParams}, + {{8, 37, 2}, {0, 1}, {}, true, cpuParams}, + {{24, 39, 2}, {0, 1}, {}, true, cpuParams}, + + {{126, 32, 2}, {0}, {}, true, cpuParams}, + {{510, 64, 2}, {0}, {}, true, cpuParams}, + {{1022, 64, 2}, {0}, {}, true, cpuParams}, + + {{126, 32, 2}, {0, 1}, {}, true, cpuParams}, + {{510, 64, 2}, {0, 1}, {}, true, cpuParams}, + {{1022, 64, 2}, {0, 1}, {}, true, cpuParams}, + + {{129, 32, 2}, {0}, {126}, true, cpuParams}, + {{257, 16, 2}, {0}, {254}, true, cpuParams}, + {{513, 64, 2}, {0}, {510}, true, cpuParams}, + }; + } else { + return { + {{1, 1}, {0}, {}, false, cpuParams}, + {{1, 1}, {1}, {}, false, cpuParams}, + {{1, 1}, {0, 1}, {}, false, cpuParams}, + {{2, 2}, {0}, {}, false, cpuParams}, + {{2, 2}, {1}, {}, false, cpuParams}, + {{2, 2}, {0, 1}, {}, false, cpuParams}, + {{13, 13}, {0}, {}, false, cpuParams}, + {{13, 13}, {1}, {}, false, cpuParams}, + {{13, 13}, {0, 1}, {}, false, cpuParams}, + {{29, 29}, {0}, {}, false, cpuParams}, + {{29, 29}, {1}, {}, false, cpuParams}, + {{29, 29}, {0, 1}, {}, false, cpuParams}, + {{30, 32}, {0}, {}, false, cpuParams}, + {{32, 30}, {1}, {}, false, cpuParams}, + {{32, 30}, {0, 1}, {}, false, cpuParams}, + {{62, 64}, {0}, {}, false, cpuParams}, + {{64, 62}, {1}, {}, false, cpuParams}, + {{64, 62}, {0, 1}, {}, false, cpuParams}, + {{254, 128}, {0}, {}, false, cpuParams}, + {{128, 254}, {1}, {}, false, cpuParams}, + {{128, 254}, {0, 1}, {}, false, cpuParams}, + {{128, 254}, {1}, {10}, false, cpuParams}, + {{128, 254}, {0, 1}, {128, 100}, false, cpuParams}, + + {{1, 1, 2}, {0}, {1}, true, cpuParams}, + {{1, 1, 2}, {1}, {1}, true, cpuParams}, + {{1, 1, 2}, {0, 1}, {1, 1}, true, cpuParams}, + {{2, 2, 2}, {0}, {}, true, cpuParams}, + {{2, 2, 2}, {1}, {}, true, cpuParams}, + {{2, 2, 2}, {0, 1}, {}, true, cpuParams}, + {{13, 13, 2}, {0}, {}, true, cpuParams}, + {{13, 13, 2}, {1}, {}, true, cpuParams}, + {{13, 13, 2}, {0, 1}, {}, true, cpuParams}, + {{29, 29, 2}, {0}, {}, true, cpuParams}, + {{29, 29, 2}, {1}, {}, true, cpuParams}, + {{29, 29, 2}, {0, 1}, {}, true, cpuParams}, + {{30, 32, 2}, {0}, {}, true, cpuParams}, + {{32, 30, 2}, {1}, {}, true, cpuParams}, + {{32, 30, 2}, {0, 1}, {}, true, cpuParams}, + {{62, 64, 2}, {0}, {}, true, cpuParams}, + {{64, 62, 2}, {1}, {}, true, cpuParams}, + {{64, 62, 2}, {0, 1}, {}, true, cpuParams}, + {{254, 128, 2}, {0}, {}, true, cpuParams}, + {{128, 254, 2}, {1}, {}, true, cpuParams}, + {{128, 254, 2}, {0, 1}, {}, true, cpuParams}, + {{128, 254, 2}, {1}, {10}, true, cpuParams}, + {{128, 254, 2}, {0, 1}, {128, 100}, true, cpuParams}, + }; + } + return {}; +} + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_2D, RDFTTestCPU, ::testing::ValuesIn(getParams2D()), RDFTTestCPU::getTestCaseName); + + +std::vector getParams4D() { + std::vector params; + if (InferenceEngine::with_cpu_x86_avx512_core()) { + params = { + {{10, 46, 128, 65}, {1}, {}, false, cpuParams}, + {{10, 46, 128, 65}, {0, 1}, {}, false, cpuParams}, + {{46, 10, 128, 65}, {1, 0}, {}, false, cpuParams}, + {{10, 46, 128, 65}, {1, 2}, {}, false, cpuParams}, + {{46, 10, 128, 65}, {-2, -1}, {}, false, cpuParams}, + {{46, 10, 128, 65}, {3, 1}, {}, false, cpuParams}, + {{46, 10, 128, 65}, {0, 1, 2, 3}, {}, false, cpuParams}, + {{46, 10, 128, 65}, {0, 1, 2, 3}, {10, 10, 33, 50}, false, cpuParams}, + + {{10, 46, 128, 65, 2}, {1}, {}, true, cpuParams}, + {{10, 46, 128, 65, 2}, {0, 1}, {}, true, cpuParams}, + {{46, 10, 128, 65, 2}, {1, 0}, {}, true, cpuParams}, + {{10, 46, 128, 65, 2}, {1, 2}, {}, true, cpuParams}, + {{46, 10, 128, 65, 2}, {-2, -1}, {}, true, cpuParams}, + {{46, 10, 128, 65, 2}, {3, 1}, {}, true, cpuParams}, + {{46, 10, 128, 65, 2}, {0, 1, 2, 3}, {}, true, cpuParams}, + // TODO: FIXME + //{{46, 10, 128, 65, 2}, {0, 1, 2, 3}, {12, 15, 130, 40}, true, cpuParams}, + }; + } else if (InferenceEngine::with_cpu_x86_avx2()) { + params = { + {{9, 16, 32, 126}, {1}, {}, false, cpuParams}, + {{9, 16, 32, 126}, {1, 0}, {}, false, cpuParams}, + {{9, 16, 32, 126}, {1, 2}, {}, false, cpuParams}, + {{9, 16, 32, 126}, {-2, -1}, {}, false, cpuParams}, + {{9, 16, 32, 126}, {3, 1}, {}, false, cpuParams}, + {{9, 16, 32, 126}, {0, 1, 2, 3}, {}, false, cpuParams}, + {{9, 16, 32, 126}, {0, 1, 2, 3}, {8, 10, 11, 12}, false, cpuParams}, + + {{9, 16, 32, 126, 2}, {1}, {}, true, cpuParams}, + {{9, 16, 32, 126, 2}, {1, 0}, {}, true, cpuParams}, + {{9, 16, 32, 126, 2}, {1, 2}, {}, true, cpuParams}, + {{9, 16, 32, 126, 2}, {-2, -1}, {}, true, cpuParams}, + {{9, 16, 32, 126, 2}, {3, 1}, {}, true, cpuParams}, + {{9, 16, 32, 126, 2}, {0, 1, 2, 3}, {}, true, cpuParams}, + // TODO: FIXME + //{{9, 16, 32, 126, 2}, {0, 1, 2, 3}, {8, 10, 11, 12}, true, cpuParams}, + }; + } else { + params = { + {{1, 2, 13, 30}, {1}, {}, false, cpuParams}, + {{1, 2, 13, 30}, {1, 0}, {}, false, cpuParams}, + {{1, 2, 13, 30}, {1, 2}, {}, false, cpuParams}, + {{1, 2, 13, 30}, {-2, -1}, {}, false, cpuParams}, + {{1, 2, 13, 30}, {3, 2}, {}, false, cpuParams}, + {{1, 2, 13, 30}, {0, 1, 2, 3}, {}, false, cpuParams}, + {{1, 2, 13, 30}, {0, 1, 2, 3}, {1, 2, 3, 13}, false, cpuParams}, + + {{1, 2, 13, 30, 2}, {1}, {}, true, cpuParams}, + {{2, 2, 13, 30, 2}, {1, 0}, {}, true, cpuParams}, + {{1, 2, 13, 30, 2}, {1, 2}, {}, true, cpuParams}, + {{1, 2, 13, 30, 2}, {-2, -1}, {}, true, cpuParams}, + {{1, 2, 13, 30, 2}, {3, 2}, {}, true, cpuParams}, + {{1, 2, 13, 30, 2}, {0, 1, 2, 3}, {}, true, cpuParams}, + // TODO: FIXME + //{{1, 2, 13, 30, 2}, {0, 1, 2, 3}, {1, 2, 3, 13}, true, cpuParams}, + }; + } + params.push_back({{1, 192, 36, 64}, {0}, {}, false, cpuParams}); + params.push_back({{1, 192, 36, 64}, {1}, {}, false, cpuParams}); + params.push_back({{1, 192, 36, 64}, {2}, {}, false, cpuParams}); + params.push_back({{1, 192, 36, 64}, {3}, {}, false, cpuParams}); + params.push_back({{1, 192, 36, 64}, {0, 1}, {}, false, cpuParams}); + params.push_back({{1, 192, 36, 64}, {3, 2}, {}, false, cpuParams}); + params.push_back({{1, 192, 36, 64}, {-2, -1}, {36, 64}, false, cpuParams}); + params.push_back({{1, 192, 36, 64}, {0, 1, 2, 3}, {}, false, cpuParams}); + params.push_back({{2, 192, 36, 33, 2}, {0}, {}, true, cpuParams}); + params.push_back({{1, 192, 36, 33, 2}, {1}, {}, true, cpuParams}); + params.push_back({{1, 192, 36, 33, 2}, {2}, {}, true, cpuParams}); + params.push_back({{1, 192, 36, 33, 2}, {3}, {}, true, cpuParams}); + params.push_back({{1, 192, 36, 33, 2}, {0, 1}, {}, true, cpuParams}); + params.push_back({{1, 192, 36, 33, 2}, {3, 2}, {}, true, cpuParams}); + params.push_back({{1, 192, 36, 33, 2}, {-2, -1}, {36, 64}, true, cpuParams}); + params.push_back({{1, 192, 36, 33, 2}, {0, 1, 2, 3}, {}, true, cpuParams}); + + return params; +} + +INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_4D, RDFTTestCPU, ::testing::ValuesIn(getParams4D()), RDFTTestCPU::getTestCaseName); + +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/rdft.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/rdft.hpp new file mode 100644 index 00000000000..3b5685edf1f --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_layer_tests/rdft.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/rdft.hpp" + +namespace LayerTestsDefinitions { + +TEST_P(RDFTLayerTest, CompareWithRefs) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/rdft.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/rdft.hpp new file mode 100644 index 00000000000..8cf98721faf --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/rdft.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + InferenceEngine::SizeVector, // Input shapes + InferenceEngine::Precision, // Input precision + std::vector, // Axes + std::vector, // Signal size + ngraph::helpers::DFTOpType, + std::string> RDFTParams; // Device name + +class RDFTLayerTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/single_layer/rdft.cpp b/src/tests/functional/shared_test_classes/src/single_layer/rdft.cpp new file mode 100644 index 00000000000..36fe3937bab --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_layer/rdft.cpp @@ -0,0 +1,47 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/rdft.hpp" + +namespace LayerTestsDefinitions { + +std::string RDFTLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + InferenceEngine::SizeVector inputShapes; + InferenceEngine::Precision inputPrecision; + std::vector axes; + std::vector signalSize; + ngraph::helpers::DFTOpType opType; + std::string targetDevice; + std::tie(inputShapes, inputPrecision, axes, signalSize, opType, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "Precision=" << inputPrecision.name() << "_"; + result << "Axes=" << CommonTestUtils::vec2str(axes) << "_"; + result << "SignalSize=" << CommonTestUtils::vec2str(signalSize) << "_"; + result << "Inverse=" << (opType == ngraph::helpers::DFTOpType::INVERSE) << "_"; + result << "TargetDevice=" << targetDevice; + return result.str(); +} + +void RDFTLayerTest::SetUp() { + InferenceEngine::SizeVector inputShapes; + InferenceEngine::Precision inputPrecision; + std::vector axes; + std::vector signalSize; + ngraph::helpers::DFTOpType opType; + std::tie(inputShapes, inputPrecision, axes, signalSize, opType, targetDevice) = this->GetParam(); + auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); + ngraph::ParameterVector paramVector; + auto paramData = std::make_shared(inType, ngraph::Shape(inputShapes)); + paramVector.push_back(paramData); + + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramVector)); + auto rdft = ngraph::builder::makeRDFT(paramOuts[0], axes, signalSize, opType); + + + ngraph::ResultVector results{std::make_shared(rdft)}; + function = std::make_shared(results, paramVector, "RDFT"); +} +} // namespace LayerTestsDefinitions diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp index 938c6c76a59..8c269d40969 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp +++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp @@ -596,6 +596,11 @@ std::shared_ptr makeDFT(const ngraph::Output &dataNode, const std::vector &signalSize, const ngraph::helpers::DFTOpType opType); +std::shared_ptr makeRDFT(const ngraph::Output &dataNode, + const std::vector &axes, + const std::vector &signalSize, + const ngraph::helpers::DFTOpType opType); + std::shared_ptr makeEinsum(const OutputVector& inputs, const std::string& equation); } // namespace builder diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/rdft.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/rdft.cpp new file mode 100644 index 00000000000..33d2a089789 --- /dev/null +++ b/src/tests/ngraph_helpers/ngraph_functions/src/rdft.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "ngraph_functions/builders.hpp" + +namespace ngraph { +namespace builder { + +namespace { + template + std::shared_ptr CallDftCtorWithArgs(const ngraph::helpers::DFTOpType opType, Args&&... args) { + switch (opType) { + case ngraph::helpers::DFTOpType::FORWARD: + return std::make_shared(std::forward(args)...); + case ngraph::helpers::DFTOpType::INVERSE: + return std::make_shared(std::forward(args)...); + default: + throw std::logic_error("Unsupported operation type"); + } + } +} // namespace + +std::shared_ptr makeRDFT(const ngraph::Output &dataNode, + const std::vector &axes, + const std::vector &signalSize, + const ngraph::helpers::DFTOpType opType) { + auto axesNode = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{axes.size()}, axes)->output(0); + + if (!signalSize.empty()) { + auto signalSizeNode = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{signalSize.size()}, signalSize)->output(0); + return CallDftCtorWithArgs(opType, dataNode, axesNode, signalSizeNode); + } + return CallDftCtorWithArgs(opType, dataNode, axesNode); +} +} // namespace builder +} // namespace ngraph