[CPU] Add RDFT and IRDFT operators (#12099)

* [CPU] Add RDFT and IRDFT operators

Tickets: 79178 and 79192

Co-authored-by: Mateusz Bencer <mateusz.bencer@intel.com>
This commit is contained in:
Mateusz Tabaka 2022-07-25 19:23:27 +02:00 committed by GitHub
parent 862aebce71
commit 270051ebce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 2494 additions and 1 deletions

View File

@ -2,7 +2,8 @@
# Copyright (C) 2018-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import openvino.runtime.opset8 as ov
import openvino.runtime.opset9 as ov
from openvino.runtime import Shape
import numpy as np
from tests.runtime import get_runtime

View File

@ -0,0 +1,165 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import openvino.runtime.opset9 as ov
from openvino.runtime import Shape
import numpy as np
from tests.runtime import get_runtime
np.random.seed(0)
def test_rdft_1d():
runtime = get_runtime()
input_size = 50
shape = [input_size]
data = np.random.uniform(0, 1, shape).astype(np.float32)
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
input_axes = ov.constant(np.array([0], dtype=np.int64))
node = ov.rdft(param, input_axes)
computation = runtime.computation(node, param)
actual = computation(data)
np_results = np.fft.rfft(data)
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
def test_irdft_1d():
runtime = get_runtime()
signal_size = 50
shape = [signal_size // 2 + 1, 2]
data = np.random.uniform(0, 1, shape).astype(np.float32)
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
input_axes = ov.constant(np.array([0], dtype=np.int64))
node = ov.irdft(param, input_axes, ov.constant(np.array([signal_size], dtype=np.int64)))
computation = runtime.computation(node, param)
actual = computation(data)
expected_results = np.fft.irfft(data[:, 0] + 1j * data[:, 1], signal_size)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
def test_rdft_2d():
runtime = get_runtime()
shape = [100, 128]
data = np.random.uniform(0, 1, shape).astype(np.float32)
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
axes = [0, 1]
input_axes = ov.constant(np.array(axes, dtype=np.int64))
node = ov.rdft(param, input_axes)
computation = runtime.computation(node, param)
actual = computation(data)
np_results = np.fft.rfftn(data, axes=axes)
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0007)
def test_rdft_2d_signal_size():
runtime = get_runtime()
shape = [100, 128]
data = np.random.uniform(0, 1, shape).astype(np.float32)
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
axes = [0, 1]
signal_size = [30, 40]
axes_node = ov.constant(np.array(axes, dtype=np.int64))
signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64))
node = ov.rdft(param, axes_node, signal_size_node)
computation = runtime.computation(node, param)
actual = computation(data)
np_results = np.fft.rfftn(data, s=signal_size, axes=axes)
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0007)
def test_irdft_2d():
runtime = get_runtime()
axes = [0, 1]
input_shape = [100, 65, 2]
data = np.random.uniform(0, 1, input_shape).astype(np.float32)
param = ov.parameter(Shape(input_shape), name="input", dtype=np.float32)
input_axes = ov.constant(np.array(axes, dtype=np.int64))
node = ov.irdft(param, input_axes)
computation = runtime.computation(node, param)
actual = computation(data)
expected_results = np.fft.irfftn(data[:, :, 0] + 1j * data[:, :, 1], axes=axes)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
def test_irdft_2d_signal_size():
runtime = get_runtime()
axes = [0, 1]
input_shape = [100, 65, 2]
signal_size = [100, 65]
data = np.random.uniform(0, 1, input_shape).astype(np.float32)
param = ov.parameter(Shape(input_shape), name="input", dtype=np.float32)
input_axes = ov.constant(np.array(axes, dtype=np.int64))
signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64))
node = ov.irdft(param, input_axes, signal_size_node)
computation = runtime.computation(node, param)
actual = computation(data)
expected_results = np.fft.irfftn(data[:, :, 0] + 1j * data[:, :, 1], s=signal_size, axes=axes)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
def test_rdft_4d():
runtime = get_runtime()
shape = [1, 192, 36, 64]
data = np.random.uniform(0, 1, shape).astype(np.float32)
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
axes = [-2, -1]
input_axes = ov.constant(np.array(axes, dtype=np.int64))
node = ov.rdft(param, input_axes)
computation = runtime.computation(node, param)
actual = computation(data)
np_results = np.fft.rfftn(data, axes=axes)
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0007)
def test_rdft_4d_signal_size():
runtime = get_runtime()
shape = [1, 192, 36, 64]
signal_size = [36, 64]
data = np.random.uniform(0, 1, shape).astype(np.float32)
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
axes = [-2, -1]
input_axes = ov.constant(np.array(axes, dtype=np.int64))
signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64))
node = ov.rdft(param, input_axes, signal_size_node)
computation = runtime.computation(node, param)
actual = computation(data)
np_results = np.fft.rfftn(data, signal_size, axes=axes)
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0007)
def test_irdft_4d():
runtime = get_runtime()
shape = [1, 192, 36, 33, 2]
data = np.random.uniform(0, 1, shape).astype(np.float32)
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
axes = [-2, -1]
input_axes = ov.constant(np.array(axes, dtype=np.int64))
node = ov.irdft(param, input_axes)
computation = runtime.computation(node, param)
actual = computation(data)
expected_results = np.fft.irfftn(data[:, :, :, :, 0] + 1j * data[:, :, :, :, 1], axes=axes)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
def test_irdft_4d_signal_size():
runtime = get_runtime()
shape = [1, 192, 36, 33, 2]
signal_size = [36, 64]
data = np.random.uniform(0, 1, shape).astype(np.float32)
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
axes = [-2, -1]
input_axes = ov.constant(np.array(axes, dtype=np.int64))
signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64))
node = ov.irdft(param, input_axes, signal_size_node)
computation = runtime.computation(node, param)
actual = computation(data)
expected_results = np.fft.irfftn(data[:, :, :, :, 0] + 1j * data[:, :, :, :, 1], signal_size, axes=axes)
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)

View File

@ -141,6 +141,8 @@ const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_t
{ "ShuffleChannels", Type::ShuffleChannels},
{ "DFT", Type::DFT},
{ "IDFT", Type::DFT},
{ "RDFT", Type::RDFT},
{ "IRDFT", Type::RDFT},
{ "Abs", Type::Math},
{ "Acos", Type::Math},
{ "Acosh", Type::Math},
@ -328,6 +330,8 @@ std::string NameFromType(const Type type) {
return "ShuffleChannels";
case Type::DFT:
return "DFT";
case Type::RDFT:
return "RDFT";
case Type::Math:
return "Math";
case Type::CTCLoss:

View File

@ -79,6 +79,7 @@ enum class Type {
Reference,
ShuffleChannels,
DFT,
RDFT,
Math,
CTCLoss,
Bucketize,

View File

@ -0,0 +1,447 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "rdft_kernel.hpp"
#include <ie_common.h>
namespace ov {
namespace intel_cpu {
#define GET_OFF(field) offsetof(jit_dft_args, field)
template <cpu_isa_t isa>
void jit_dft_kernel_f32<isa>::generate() {
using namespace Xbyak::util;
using Xbyak::Label;
using Xbyak::Xmm;
using Vmm = typename conditional3<isa == cpu::x64::sse41, Xbyak::Xmm,
isa == cpu::x64::avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
this->preamble();
int input_type_size = 0;
int output_type_size = 0;
switch (kernel_type_) {
case real_to_complex:
input_type_size = type_size;
output_type_size = complex_type_size<float>();
break;
case complex_to_complex:
input_type_size = complex_type_size<float>();
output_type_size = complex_type_size<float>();
break;
case complex_to_real:
input_type_size = complex_type_size<float>();
output_type_size = type_size;
break;
}
int vlen = cpu_isa_traits<isa>::vlen;
const int simd_size = vlen / output_type_size;
mov(input_ptr, ptr[param1 + GET_OFF(input)]);
mov(input_size, ptr[param1 + GET_OFF(input_size)]);
mov(twiddles_ptr, ptr[param1 + GET_OFF(twiddles)]);
mov(output_start, ptr[param1 + GET_OFF(output_start)]);
mov(output_end, ptr[param1 + GET_OFF(output_end)]);
// offset twiddles_ptr by input_size * complex_type_size<float>() * output_start bytes
mov(signal_size, ptr[param1 + GET_OFF(signal_size)]);
mov(rax, signal_size);
lea(rax, ptr[rax * complex_type_size<float>()]);
xor_(rdx, rdx);
mul(output_start);
add(twiddles_ptr, rax);
// offset output_ptr by output_start * output_type_size bytes
mov(output_ptr, ptr[param1 + GET_OFF(output)]);
lea(output_ptr, ptr[output_ptr + output_type_size * output_start]);
size_t reg_idx = 0;
Xmm xmm_signal_size = Xmm(reg_idx);
Vmm vmm_signal_size = Vmm(reg_idx);
if (is_inverse_) {
reg_idx++;
uni_vbroadcastss(Vmm(reg_idx), ptr[param1 + GET_OFF(signal_size)]);
uni_vcvtdq2ps(vmm_signal_size, Vmm(reg_idx));
}
Vmm vmm_neg_mask = Vmm(reg_idx);
Xmm xmm_neg_mask = Xmm(reg_idx);
if (kernel_type_ == complex_to_complex) {
reg_idx++;
if (!is_inverse_) {
mov(rax, 1ULL << 31);
} else {
mov(rax, 1ULL << 63);
}
uni_vmovq(xmm_neg_mask, rax);
uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask);
}
mov(rax, signal_size);
and_(rax, 1);
setz(is_signal_size_even);
Label loop_over_output;
Label loop_over_output_continue;
Label loop_simd;
Label loop_nonsimd;
auto simd_loop = [this, vlen, simd_size,
input_type_size, reg_idx,
&vmm_signal_size,
&xmm_neg_mask,
&vmm_neg_mask] {
size_t idx = reg_idx;
Vmm result = Vmm(idx++);
Vmm inp_real = Vmm(idx++);
Vmm inp_imag = Vmm(idx++);
const Vmm& input = inp_real;
const Vmm& input_perm = inp_imag;
Vmm twiddles = Vmm(idx++);
const Vmm& cos = twiddles;
Vmm sin = Vmm(idx++);
Xmm tmp = Xmm(idx++);
uni_vpxor(result, result, result);
if (kernel_type_ == complex_to_complex && is_inverse_) {
mov(rdx, 1ULL << 63);
uni_vmovq(xmm_neg_mask, rdx);
uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask);
}
Label loop;
L(loop);
{
if (kernel_type_ == real_to_complex) {
uni_vbroadcastss(inp_real, ptr[input_ptr]);
uni_vmovups(twiddles, ptr[twiddles_ptr]);
uni_vfmadd231ps(result, inp_real, twiddles);
add(twiddles_ptr, vlen);
} else if (kernel_type_ == complex_to_real) {
uni_vbroadcastss(inp_real, ptr[input_ptr]);
uni_vbroadcastss(inp_imag, ptr[input_ptr + type_size]);
uni_vmovups(cos, ptr[twiddles_ptr]);
uni_vmovups(sin, ptr[twiddles_ptr + vlen]);
uni_vfmadd231ps(result, inp_real, cos);
uni_vfmadd231ps(result, inp_imag, sin);
add(twiddles_ptr, 2 * vlen);
} else if (kernel_type_ == complex_to_complex) {
// output_real += input_real * cos(..) - input_imag * sin(..)
// output_imag += input_imag * cos(..) + input_real * sin(..)
uni_vbroadcastsd(input, ptr[input_ptr]);
uni_vpermilps(input_perm, input, 0b10110001); // swap real with imag
uni_vpxor(input_perm, input_perm, vmm_neg_mask); // negate imag part (or real part if is_inverse == true)
load_and_broadcast_every_other_elem(cos, twiddles_ptr, tmp);
load_and_broadcast_every_other_elem(sin, twiddles_ptr + vlen / 2, tmp);
uni_vfmadd231ps(result, input, cos);
uni_vfmadd231ps(result, input_perm, sin);
add(twiddles_ptr, vlen);
}
add(input_ptr, input_type_size);
dec(input_size);
cmp(input_size, 0);
jne(loop, T_NEAR);
}
if (is_inverse_) {
Label loop_backwards;
Label loop_backwards_exit;
mov(input_size, signal_size);
sub(input_size, ptr[param1 + GET_OFF(input_size)]);
if (kernel_type_ == complex_to_complex) {
mov(rdx, 1ULL << 31);
vmovq(xmm_neg_mask, rdx);
uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask);
}
test(is_signal_size_even, 1);
jz(loop_backwards);
sub(input_ptr, input_type_size);
L(loop_backwards);
{
cmp(input_size, 0);
je(loop_backwards_exit, T_NEAR);
sub(input_ptr, input_type_size);
if (kernel_type_ == complex_to_real) {
uni_vbroadcastss(inp_real, ptr[input_ptr]);
uni_vbroadcastss(inp_imag, ptr[input_ptr + type_size]);
uni_vmovups(cos, ptr[twiddles_ptr]);
uni_vmovups(sin, ptr[twiddles_ptr + vlen]);
uni_vfmadd231ps(result, inp_real, cos);
uni_vfnmadd231ps(result, inp_imag, sin);
add(twiddles_ptr, 2 * vlen);
} else if (kernel_type_ == complex_to_complex) {
// output_real += input_real * cos(..) - input_imag * sin(..)
// output_imag += input_imag * cos(..) + input_real * sin(..)
uni_vbroadcastsd(input, ptr[input_ptr]);
uni_vpermilps(input_perm, input, 0b10110001); // swap real with imag
uni_vpxor(input_perm, input_perm, vmm_neg_mask); // negate imag part
load_and_broadcast_every_other_elem(cos, twiddles_ptr, tmp);
load_and_broadcast_every_other_elem(sin, twiddles_ptr + vlen / 2, tmp);
uni_vfmadd231ps(result, input, cos);
uni_vfmadd231ps(result, input_perm, sin);
add(twiddles_ptr, vlen);
}
dec(input_size);
jmp(loop_backwards, T_NEAR);
}
L(loop_backwards_exit);
}
if (is_inverse_) {
uni_vdivps(result, result, vmm_signal_size);
}
// store the results
uni_vmovups(ptr[output_ptr], result);
add(output_ptr, vlen);
sub(output_end, simd_size);
};
auto nonsimd_loop = [this,
input_type_size,
output_type_size,
&xmm_signal_size,
reg_idx] {
size_t idx = reg_idx;
Xmm xmm_inp_real = Xbyak::Xmm(idx++);
Xmm xmm_inp_imag = Xbyak::Xmm(idx++);
Xmm xmm_real = Xbyak::Xmm(idx++);
Xmm xmm_imag = Xbyak::Xmm(idx++);
Xmm xmm_cos = Xbyak::Xmm(idx++);
Xmm xmm_sin = Xbyak::Xmm(idx++);
if (kernel_type_ != complex_to_real) {
xorps(xmm_real, xmm_real);
xorps(xmm_imag, xmm_imag);
} else {
xorps(xmm_real, xmm_real);
}
Label loop;
L(loop);
{
movss(xmm_cos, ptr[twiddles_ptr]);
movss(xmm_sin, ptr[twiddles_ptr + type_size]);
if (kernel_type_ == real_to_complex) {
movss(xmm_inp_real, ptr[input_ptr]);
// output_real += input_real * cos(..)
mulss(xmm_cos, xmm_inp_real);
addss(xmm_real, xmm_cos);
// output_imag += input_real * sin(..)
mulss(xmm_sin, xmm_inp_real);
addss(xmm_imag, xmm_sin);
} else if (kernel_type_ == complex_to_real) {
movss(xmm_inp_real, ptr[input_ptr]);
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
// output += real * cos(..) + imag * sin(..)
mulss(xmm_cos, xmm_inp_real);
mulss(xmm_sin, xmm_inp_imag);
addss(xmm_cos, xmm_sin);
addss(xmm_real, xmm_cos);
} else if (kernel_type_ == complex_to_complex) {
// output_real += input_real * cos(..) - input_imag * sin(..)
movss(xmm_inp_real, ptr[input_ptr]);
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
mulss(xmm_inp_real, xmm_cos);
mulss(xmm_inp_imag, xmm_sin);
if (!is_inverse_) {
subss(xmm_inp_real, xmm_inp_imag);
} else {
addss(xmm_inp_real, xmm_inp_imag);
}
addss(xmm_real, xmm_inp_real);
// output_imag += input_imag * cos(..) + input_real * sin(..)
movss(xmm_inp_real, ptr[input_ptr]);
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
mulss(xmm_inp_imag, xmm_cos);
mulss(xmm_inp_real, xmm_sin);
if (!is_inverse_) {
addss(xmm_inp_imag, xmm_inp_real);
} else {
subss(xmm_inp_imag, xmm_inp_real);
}
addss(xmm_imag, xmm_inp_imag);
}
// increment indexes for next iteration
add(twiddles_ptr, complex_type_size<float>());
add(input_ptr, input_type_size);
dec(input_size);
// continue if input_size > 0
cmp(input_size, 0);
jg(loop, T_NEAR);
}
if (is_inverse_) {
Label loop_backwards;
Label loop_backwards_exit;
mov(input_size, signal_size);
sub(input_size, ptr[param1 + GET_OFF(input_size)]);
test(is_signal_size_even, 1);
jz(loop_backwards);
sub(input_ptr, input_type_size);
L(loop_backwards);
{
cmp(input_size, 0);
je(loop_backwards_exit);
sub(input_ptr, input_type_size);
movss(xmm_cos, ptr[twiddles_ptr]);
movss(xmm_sin, ptr[twiddles_ptr + type_size]);
movss(xmm_inp_real, ptr[input_ptr]);
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
if (kernel_type_ == complex_to_real) {
// output += real * cos(..) - imag * sin(..)
mulss(xmm_cos, xmm_inp_real);
mulss(xmm_sin, xmm_inp_imag);
subss(xmm_cos, xmm_sin);
addss(xmm_real, xmm_cos);
} else if (kernel_type_ == complex_to_complex) {
// output_real += input_real * cos(..) - input_imag * sin(..)
movss(xmm_inp_real, ptr[input_ptr]);
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
mulss(xmm_inp_real, xmm_cos);
mulss(xmm_inp_imag, xmm_sin);
subss(xmm_inp_real, xmm_inp_imag);
addss(xmm_real, xmm_inp_real);
// output_imag += input_imag * cos(..) + input_real * sin(..)
movss(xmm_inp_real, ptr[input_ptr]);
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
mulss(xmm_inp_imag, xmm_cos);
mulss(xmm_inp_real, xmm_sin);
addss(xmm_inp_imag, xmm_inp_real);
addss(xmm_imag, xmm_inp_imag);
}
add(twiddles_ptr, complex_type_size<float>());
dec(input_size);
jmp(loop_backwards);
}
L(loop_backwards_exit);
}
if (kernel_type_ == complex_to_real) {
if (is_inverse_) {
divss(xmm_real, xmm_signal_size);
}
// store the result
movss(ptr[output_ptr], xmm_real);
} else {
if (is_inverse_) {
divss(xmm_real, xmm_signal_size);
divss(xmm_imag, xmm_signal_size);
}
// store the results
movss(ptr[output_ptr], xmm_real);
movss(ptr[output_ptr + type_size], xmm_imag);
}
add(output_ptr, output_type_size);
dec(output_end);
};
L(loop_over_output);
{
mov(input_ptr, ptr[param1 + GET_OFF(input)]);
mov(input_size, ptr[param1 + GET_OFF(input_size)]);
cmp(output_end, simd_size);
jae(loop_simd, T_NEAR);
jmp(loop_nonsimd, T_NEAR);
L(loop_simd);
simd_loop();
jmp(loop_over_output_continue, T_NEAR);
L(loop_nonsimd);
nonsimd_loop();
L(loop_over_output_continue);
cmp(output_end, 0);
ja(loop_over_output, T_NEAR);
}
this->postamble();
}
template <cpu_isa_t isa>
void jit_dft_kernel_f32<isa>::uni_vbroadcastsd(const Xbyak::Xmm& x, const Xbyak::Operand& op) {
movsd(x, op);
shufpd(x, x, 0x0);
}
template <cpu_isa_t isa>
void jit_dft_kernel_f32<isa>::uni_vbroadcastsd(const Xbyak::Ymm& x, const Xbyak::Operand& op) {
vbroadcastsd(x, op);
}
template <cpu_isa_t isa>
void jit_dft_kernel_f32<isa>::uni_vpermilps(const Xbyak::Xmm& x, const Xbyak::Operand& op, int8_t control) {
movups(x, op);
shufps(x, x, control);
}
template <cpu_isa_t isa>
void jit_dft_kernel_f32<isa>::uni_vpermilps(const Xbyak::Ymm& x, const Xbyak::Operand& op, int8_t control) {
vpermilps(x, op, control);
}
template <cpu_isa_t isa>
void jit_dft_kernel_f32<isa>::load_and_broadcast_every_other_elem(const Xbyak::Zmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) {
for (int i = 0; i < 4; i++) {
movq(tmp, ptr[reg_exp + type_size * i * 2]);
shufps(tmp, tmp, 0b01010000);
vinsertf32x4(x, x, tmp, i);
}
}
template <cpu_isa_t isa>
void jit_dft_kernel_f32<isa>::load_and_broadcast_every_other_elem(const Xbyak::Ymm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) {
for (int i = 0; i < 2; i++) {
movq(tmp, ptr[reg_exp + type_size * i * 2]);
shufps(tmp, tmp, 0b01010000);
vinsertf128(x, x, tmp, i);
}
}
template <cpu_isa_t isa>
void jit_dft_kernel_f32<isa>::load_and_broadcast_every_other_elem(const Xbyak::Xmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) {
movq(x, ptr[reg_exp]);
shufps(x, x, 0b01010000);
}
template struct jit_dft_kernel_f32<cpu::x64::sse41>;
template struct jit_dft_kernel_f32<cpu::x64::avx2>;
template struct jit_dft_kernel_f32<cpu::x64::avx512_core>;
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,96 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "cpu/x64/jit_generator.hpp"
#include <dnnl_types.h>
namespace ov {
namespace intel_cpu {
using namespace dnnl::impl;
using namespace dnnl::impl::cpu::x64;
using namespace dnnl::impl::utils;
enum dft_type {
real_to_complex,
complex_to_complex,
complex_to_real,
};
template <typename T>
size_t complex_type_size() {
return sizeof(T) * 2;
}
struct jit_dft_args {
const void* input;
const void* twiddles;
void* output;
size_t input_size;
size_t signal_size;
size_t output_start;
size_t output_end;
};
struct jit_dft_kernel {
jit_dft_kernel(bool is_inverse, enum dft_type type) : is_inverse_(is_inverse), kernel_type_(type) {}
void (*ker_)(const jit_dft_args*);
void operator()(const jit_dft_args* args) {
assert(ker_);
ker_(args);
}
jit_dft_kernel() : ker_(nullptr) {}
virtual ~jit_dft_kernel() {}
virtual void create_ker() = 0;
bool is_inverse_;
enum dft_type kernel_type_;
};
template <cpu_isa_t isa>
struct jit_dft_kernel_f32 : public jit_dft_kernel, public jit_generator {
public:
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_dft_kernel_f32)
jit_dft_kernel_f32(bool is_inverse, enum dft_type type) : jit_dft_kernel(is_inverse, type), jit_generator() {}
void create_ker() override {
jit_generator::create_kernel();
ker_ = (decltype(ker_))jit_ker();
}
void generate() override;
private:
void uni_vbroadcastsd(const Xbyak::Xmm& x, const Xbyak::Operand& op);
void uni_vbroadcastsd(const Xbyak::Ymm& x, const Xbyak::Operand& op);
void uni_vpermilps(const Xbyak::Xmm& x, const Xbyak::Operand& op, int8_t control);
void uni_vpermilps(const Xbyak::Ymm& x, const Xbyak::Operand& op, int8_t control);
void load_and_broadcast_every_other_elem(const Xbyak::Zmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp);
void load_and_broadcast_every_other_elem(const Xbyak::Ymm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp);
void load_and_broadcast_every_other_elem(const Xbyak::Xmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp);
int type_size = sizeof(float);
Xbyak::Reg8 is_signal_size_even = al;
Xbyak::Reg64 input_ptr = rbx;
Xbyak::Reg64 input_size = r8;
Xbyak::Reg64 output_ptr = r9;
Xbyak::Reg64 twiddles_ptr = r10;
Xbyak::Reg64 signal_size = r11;
Xbyak::Reg64 output_start = r12;
Xbyak::Reg64 output_end = r13;
};
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,927 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <vector>
#include <cmath>
#include <dnnl_extension_utils.h>
#include <onednn/dnnl.h>
#include <cpu/x64/cpu_isa_traits.hpp>
#include <cpu/x64/jit_generator.hpp>
#include <common/primitive_hashing_utils.hpp>
#include "rdft.h"
#include "ie_parallel.hpp"
#include "ie_precision.hpp"
#include "utils/general_utils.h"
#include "common/cpu_memcpy.h"
#include <openvino/op/rdft.hpp>
#include <openvino/op/irdft.hpp>
#include <openvino/op/constant.hpp>
using namespace dnnl;
using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
namespace node {
static constexpr size_t DATA_INDEX = 0;
static constexpr size_t AXES_INDEX = 1;
static constexpr size_t SIGNAL_SIZE_INDEX = 2;
static constexpr double PI = 3.14159265358979323846;
bool RDFT::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const bool isRDFT = is_type<const ov::op::v9::RDFT>(op);
const bool isIRDFT = is_type<const ov::op::v9::IRDFT>(op);
if (!isRDFT && !isIRDFT) {
errorMessage = "Only opset9 RDFT/IRDFT operation is supported";
return false;
}
} catch (...) {
return false;
}
return true;
}
static void normalizeAxes(std::vector<int>& axes, size_t rank) {
for (auto& axis : axes) {
if (axis < 0) {
axis += rank;
}
}
}
static std::vector<int> getDefaultSignalSizes(const VectorDims& inputShape, const std::vector<int>& axes, bool inverse) {
std::vector<int> signalSizes;
signalSizes.reserve(axes.size());
for (auto axis : axes) {
signalSizes.push_back(inputShape[axis]);
}
if (inverse) {
signalSizes[signalSizes.size() - 1] = 2 * (inputShape[axes.back()] - 1);
}
return signalSizes;
}
RDFT::RDFT(const std::shared_ptr<ngraph::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) :
Node(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
}
std::string errorMsgPrefix = "RDFT layer with name '" + op->get_name() + "'";
const size_t numInputs = getOriginalInputsNumber();
if (numInputs != 2 && numInputs != 3) {
IE_THROW() << errorMsgPrefix << " has invalid number of input/output edges: " << numInputs;
}
const auto axesRank = inputShapes[AXES_INDEX].getRank();
if (axesRank != 1) {
IE_THROW() << errorMsgPrefix << " has invalid 'axes' input tensor with rank: " << axesRank;
}
inverse = ov::is_type<ov::op::v9::IRDFT>(op);
std::shared_ptr<ov::op::v0::Constant> signalSizesNode;
if (numInputs > 2) {
const auto signalSizeRank = inputShapes[SIGNAL_SIZE_INDEX].getRank();
if (signalSizeRank != 1) {
IE_THROW() << errorMsgPrefix << " has invalid 'signalSize' input tensor with rank: " << signalSizeRank;
}
auto signalSizesNode = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(2));
if (!signalSizesNode)
return;
signalSizes = signalSizesNode->cast_vector<int>();
}
auto axesNode = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(1));
if (!axesNode)
return;
axes = axesNode->cast_vector<int>();
auto rank = inputShapes[DATA_INDEX].getRank() - inverse;
normalizeAxes(axes, rank);
if (numInputs < 3) {
const auto& inputShape = inputShapes[DATA_INDEX].getStaticDims();
signalSizes = getDefaultSignalSizes(inputShape, axes, inverse);
}
}
void RDFT::getSupportedDescriptors() {}
void RDFT::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
const auto& dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX);
if (!dataPrecision.is_float()) {
IE_THROW() << errorMsgPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();
}
const auto& axesPrecision = getOriginalInputPrecisionAtPort(AXES_INDEX);
if (axesPrecision != Precision::I32 && axesPrecision != Precision::I64) {
IE_THROW() << errorMsgPrefix << " has unsupported 'axes' input precision: " << axesPrecision.name();
}
if (inputShapes.size() > SIGNAL_SIZE_INDEX) {
const auto& signalSizePrecision = getOriginalInputPrecisionAtPort(SIGNAL_SIZE_INDEX);
if (signalSizePrecision != Precision::I32 && signalSizePrecision != Precision::I64) {
IE_THROW() << errorMsgPrefix << " has unsupported 'signalSize' input precision: " << signalSizePrecision.name();
}
}
std::vector<PortConfigurator> configurators({{LayoutType::ncsp, Precision::FP32},
{LayoutType::ncsp, Precision::I32}});
if (inputShapes.size() > SIGNAL_SIZE_INDEX)
configurators.push_back({LayoutType::ncsp, Precision::I32});
addSupportedPrimDesc(configurators, {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any);
}
void RDFT::execute(dnnl::stream strm) {
const auto& inputMem = getParentEdgeAt(DATA_INDEX)->getMemory();
const auto& outputMem = getChildEdgeAt(0)->getMemory();
const auto& inputShape = inputMem.getStaticDims();
const auto& outputShape = outputMem.getStaticDims();
auto inputPtr = reinterpret_cast<float*>(inputMem.GetPtr());
auto outputPtr = reinterpret_cast<float*>(outputMem.GetPtr());
auto rank = inputShape.size() - inverse;
if (axes.size() == 0) {
const auto& axesMem = getParentEdgeAt(AXES_INDEX)->getMemoryPtr();
auto axesPtr = reinterpret_cast<const int32_t*>(axesMem->GetPtr());
axes = std::vector<int>(axesPtr, axesPtr + axesMem->getStaticDims()[0]);
normalizeAxes(axes, rank);
}
if (signalSizes.size() == 0) {
if (SIGNAL_SIZE_INDEX < getOriginalInputsNumber()) {
const auto& signalSizeMem = getParentEdgeAt(SIGNAL_SIZE_INDEX)->getMemoryPtr();
auto signalPtr = reinterpret_cast<const int32_t*>(signalSizeMem->GetPtr());
signalSizes = std::vector<int>(signalPtr, signalPtr + signalSizeMem->getStaticDims()[0]);
} else {
signalSizes = getDefaultSignalSizes(inputShape, axes, inverse);
}
}
const auto& inputStrides = inputMem.GetDescWithType<BlockedMemoryDesc>()->getStrides();
const auto& outputStrides = outputMem.GetDescWithType<BlockedMemoryDesc>()->getStrides();
if (twiddles.size() == 0) {
twiddles = executor->generateTwiddles(signalSizes, outputShape, axes);
}
executor->execute(inputPtr, outputPtr,
twiddles, rank,
axes, signalSizes,
inputShape, outputShape,
inputStrides, outputStrides);
}
bool RDFT::created() const {
return getType() == Type::RDFT;
}
static void adjustInputSize(VectorDims& inputShape,
std::vector<int>& signalSizes,
const VectorDims& outputShape,
const std::vector<int>& axes,
bool isInverse) {
for (size_t i = 0; i < axes.size(); i++) {
auto axis = axes[i];
size_t inputSize = inputShape[axis];
size_t signalSize = signalSizes[i];
if (signalSize <= inputSize) {
inputShape[axis] = signalSize;
} else if (!isInverse) {
IE_THROW() << "Signal size greater than input size is not supported yet";
}
}
if (isInverse) {
inputShape[axes.back()] = signalSizes.back() / 2 + 1;
}
}
void RDFTExecutor::execute(float* inputPtr, float* outputPtr,
const std::vector<std::vector<float>>& twiddles,
size_t rank, const std::vector<int>& axes,
std::vector<int> signalSizes,
VectorDims inputShape, const VectorDims& outputShape,
const VectorDims& inputStrides, const VectorDims& outputStrides) {
adjustInputSize(inputShape, signalSizes, outputShape, axes, isInverse);
if (rank == 1) {
auto twiddlesPtr = twiddles[0].data();
dftCommon(inputPtr, twiddlesPtr, outputPtr,
inputShape[0], signalSizes[0], outputShape[0],
isInverse ? complex_to_real : real_to_complex,
canUseFFT(signalSizes[0]), false);
} else {
if (!isInverse)
rdftNd(inputPtr, outputPtr, twiddles, axes, signalSizes, inputShape, inputStrides, outputShape, outputStrides);
else
irdftNd(inputPtr, outputPtr, twiddles, axes, signalSizes, inputShape, inputStrides, outputShape, outputStrides);
}
}
static void coordsFromIndex(size_t index, std::vector<size_t>& coords, const std::vector<size_t>& shape, int excludeAxis) {
for (size_t i = coords.size(); i > 0; i--) {
if (excludeAxis == i - 1) {
coords[i - 1] = 0;
continue;
}
coords[i - 1] = index % shape[i - 1];
index /= shape[i - 1];
}
}
static size_t getOffset(const std::vector<size_t>& coords, const std::vector<size_t>& strides) {
size_t offset = 0;
for (size_t i = 0; i < coords.size(); ++i) {
offset += coords[i] * strides[i];
}
return offset;
}
static void gatherReal(float* output, const float* input, size_t axis,
const std::vector<size_t>& coords,
size_t size, const std::vector<size_t>& strides) {
size_t inputOffset = getOffset(coords, strides);
for (size_t i = 0; i < size; i++) {
output[i] = input[inputOffset];
inputOffset += strides[axis];
}
}
static void gatherComplex(float* output, const float* input, size_t axis,
const std::vector<size_t>& coords,
size_t size, const std::vector<size_t>& strides) {
size_t inputOffset = getOffset(coords, strides);
for (size_t i = 0; i < 2 * size; i += 2) {
output[i] = input[inputOffset];
output[i + 1] = input[inputOffset + 1];
inputOffset += strides[axis];
}
}
static void scatterReal(float* output, const float* input, size_t axis,
const std::vector<size_t>& coords,
size_t size, const std::vector<size_t>& strides) {
size_t offset = getOffset(coords, strides);
for (size_t i = 0; i < size; i++) {
output[offset] = input[i];
offset += strides[axis];
}
}
static void scatterComplex(float* output, const float* input, size_t axis,
const std::vector<size_t>& coords,
size_t size, const std::vector<size_t>& strides) {
size_t offset = getOffset(coords, strides);
for (size_t i = 0; i < 2 * size; i += 2) {
output[offset] = input[i];
output[offset + 1] = input[i + 1];
offset += strides[axis];
}
}
static bool isPowerOfTwo(size_t n) {
return (n != 0) && (n & (n - 1)) == 0;
}
static size_t dftSimdSize(int vlen) {
return vlen / (2 * sizeof(float));
}
bool RDFTExecutor::canUseFFT(size_t dim) {
return isPowerOfTwo(dim) && dim > 1;
}
static void fftCopyInverseInputData(float* dst, float* src, size_t inputSize, size_t signalSize, bool parallelize) {
if (!parallelize) {
cpu_memcpy(dst, src, inputSize * complex_type_size<float>());
src = src + 2 * inputSize - 4;
for (size_t i = inputSize; i < signalSize; i++, src -= 2) {
dst[2 * i] = src[0];
dst[2 * i + 1] = -src[1];
}
} else {
parallel_for(signalSize, [&] (size_t i) {
if (i < inputSize) {
dst[2 * i] = src[2 * i];
dst[2 * i + 1] = src[2 * i + 1];
} else {
size_t src_idx = 2 * inputSize - 2 - i;
dst[2 * i] = src[2 * src_idx];
dst[2 * i + 1] = -src[2 * src_idx + 1];
}
});
}
}
static void fftCopyRealInputData(float* dst, float* src, size_t inputSize, bool parallelize) {
if (!parallelize) {
for (size_t i = 0; i < inputSize; i++) {
dst[2 * i] = src[i];
dst[2 * i + 1] = 0;
}
} else {
parallel_for(inputSize, [&] (size_t i) {
dst[2 * i] = src[i];
dst[2 * i + 1] = 0;
});
}
}
static void fftCopyInverseRealOutput(float* dst, float* src, size_t signalSize, bool parallelize) {
if (!parallelize) {
for (size_t i = 0; i < signalSize; i++) {
dst[i] = src[2 * i];
}
} else {
parallel_for(signalSize, [&] (size_t i) {
dst[i] = src[2 * i];
});
}
}
void RDFTExecutor::fft(float* input, const float* twiddlesPtr, float* output,
size_t inputSize, size_t signalSize, size_t outputSize,
enum dft_type type, bool parallelize) {
std::vector<float> scratchSpace(4 * signalSize, 0);
float* inputPtr = input;
float* outputPtr = &scratchSpace[2 * signalSize];
if (inputSize < signalSize || type == real_to_complex) {
if (isInverse)
fftCopyInverseInputData(&scratchSpace[0], input, inputSize, signalSize, parallelize);
else if (type == real_to_complex)
fftCopyRealInputData(&scratchSpace[0], input, inputSize, parallelize);
inputPtr = &scratchSpace[0];
}
size_t numBlocks = 0;
size_t blockSize = 0;
auto blockIteration = [&] (size_t block) {
size_t inputOffset = block * blockSize;
size_t outputOffset = block * blockSize / 2;
float cos = twiddlesPtr[2 * block];
float sin = twiddlesPtr[2 * block + 1];
if (isInverse)
sin = -sin;
for (size_t pair = 0; pair < blockSize / 2; pair++) {
float evenReal = inputPtr[2 * (inputOffset + pair)];
float evenImag = inputPtr[2 * (inputOffset + pair) + 1];
float oddReal = inputPtr[2 * (inputOffset + blockSize / 2 + pair)];
float oddImag = inputPtr[2 * (inputOffset + blockSize / 2 + pair) + 1];
outputPtr[2 * (outputOffset + pair)] = evenReal + cos * oddReal - sin * oddImag;
outputPtr[2 * (outputOffset + pair) + 1] = evenImag + cos * oddImag + sin * oddReal;
outputPtr[2 * (outputOffset + signalSize / 2 + pair)] = evenReal - cos * oddReal + sin * oddImag;
outputPtr[2 * (outputOffset + signalSize / 2 + pair) + 1] = evenImag - cos * oddImag - sin * oddReal;
if (isInverse && numBlocks == signalSize / 2) {
outputPtr[2 * (outputOffset + pair)] /= signalSize;
outputPtr[2 * (outputOffset + pair) + 1] /= signalSize;
outputPtr[2 * (outputOffset + signalSize / 2 + pair)] /= signalSize;
outputPtr[2 * (outputOffset + signalSize / 2 + pair) + 1] /= signalSize;
}
}
};
for (numBlocks = 1; numBlocks < signalSize; numBlocks *= 2) {
blockSize = signalSize / numBlocks;
if (numBlocks == signalSize / 2 && outputSize == signalSize && type != complex_to_real) {
outputPtr = output;
}
if (parallelize) {
parallel_for(numBlocks, blockIteration);
} else {
for (size_t block = 0; block < numBlocks; block++) {
blockIteration(block);
}
}
twiddlesPtr += numBlocks * 2;
if (numBlocks == 1 && inputPtr == input)
inputPtr = &scratchSpace[0];
std::swap(inputPtr, outputPtr);
}
if (type == complex_to_real) {
fftCopyInverseRealOutput(output, inputPtr, signalSize, parallelize);
} else if (outputSize != signalSize) {
cpu_memcpy(output, inputPtr, outputSize * complex_type_size<float>());
}
}
void RDFTExecutor::dftCommon(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
size_t inputSize, size_t signalSize, size_t outputSize,
enum dft_type type, bool useFFT, bool parallelize) {
if (useFFT) {
fft(inputPtr, twiddlesPtr, outputPtr,
inputSize, signalSize, outputSize,
type, parallelize);
} else {
dft(inputPtr, twiddlesPtr, outputPtr,
inputSize, signalSize, outputSize,
type, parallelize);
}
}
void RDFTExecutor::dftOnAxis(enum dft_type type,
float* inputPtr, float* outputPtr,
const float* twiddlesPtr, int axis,
size_t signalSize,
const VectorDims& inputShape,
const VectorDims& inputStrides,
const VectorDims& outputShape,
const VectorDims& outputStrides,
const std::vector<size_t>& iterationRange) {
size_t inputSize = inputShape[axis];
size_t outputSize = outputShape[axis];
void (*gather)(float* output, const float* input,
size_t axis, const std::vector<size_t>& coords,
size_t size, const std::vector<size_t>& strides) = nullptr;
void (*scatter)(float* output, const float* input,
size_t axis, const std::vector<size_t>& coords,
size_t size, const std::vector<size_t>& strides) = nullptr;
size_t gatherSize = 0;
size_t scatterSize = 0;
switch (type) {
case real_to_complex:
scatter = scatterComplex;
gather = gatherReal;
gatherSize = inputSize;
scatterSize = outputSize * 2;
break;
case complex_to_complex:
gather = gatherComplex;
scatter = scatterComplex;
gatherSize = inputSize * 2;
scatterSize = outputSize * 2;
break;
case complex_to_real:
gather = gatherComplex;
scatter = scatterReal;
gatherSize = inputSize * 2;
scatterSize = outputSize;
break;
}
bool useFFT = canUseFFT(signalSize);
size_t totalWorkSize = std::accumulate(iterationRange.begin(),
iterationRange.end(),
1, std::multiplies<size_t>()) / iterationRange[axis];
bool parallelizeOuterAxes = totalWorkSize > signalSize;
if (parallelizeOuterAxes) {
parallel_for(totalWorkSize, [&] (size_t i) {
std::vector<size_t> coords(iterationRange.size(), 0);
std::vector<float> gatherScatterBuffer(gatherSize + scatterSize);
float* gatherBuffer = &gatherScatterBuffer[0];
float* scatterBuffer = &gatherScatterBuffer[gatherSize];
coordsFromIndex(i, coords, iterationRange, axis);
gather(gatherBuffer, inputPtr,
axis, coords,
inputSize, inputStrides);
dftCommon(gatherBuffer, twiddlesPtr, scatterBuffer,
inputSize, signalSize, outputSize,
type, useFFT, !parallelizeOuterAxes);
scatter(outputPtr, scatterBuffer, axis, coords, outputSize, outputStrides);
});
} else {
std::vector<size_t> coords(iterationRange.size(), 0);
std::vector<float> gatherScatterBuffer(gatherSize + scatterSize);
float* gatherBuffer = &gatherScatterBuffer[0];
float* scatterBuffer = &gatherScatterBuffer[gatherSize];
for (size_t i = 0; i < totalWorkSize; i++) {
coordsFromIndex(i, coords, iterationRange, axis);
gather(gatherBuffer, inputPtr,
axis, coords,
inputSize, inputStrides);
dftCommon(gatherBuffer, twiddlesPtr, scatterBuffer,
inputSize, signalSize, outputSize,
type, useFFT, !parallelizeOuterAxes);
scatter(outputPtr, scatterBuffer, axis, coords, outputSize, outputStrides);
}
}
}
// N-dimensional real DFT
void RDFTExecutor::rdftNd(float* inputPtr, float* outputPtr,
const std::vector<std::vector<float>>& twiddles,
const std::vector<int>& axes,
const std::vector<int>& signalSizes,
const VectorDims& inputShape,
const VectorDims& inputStrides,
const VectorDims& outputShape,
const VectorDims& outputStrides) {
const std::vector<size_t> iterationRange(outputShape.begin(), outputShape.end() - 1);
dftOnAxis(real_to_complex, inputPtr, outputPtr,
twiddles.back().data(), axes.back(),
signalSizes.back(),
inputShape, inputStrides,
outputShape, outputStrides,
iterationRange);
inputPtr = outputPtr;
for (size_t i = 0; i < axes.size() - 1; i++) {
auto axis = axes[i];
dftOnAxis(complex_to_complex, inputPtr, outputPtr,
twiddles[i].data(), axis,
signalSizes[i],
outputShape, outputStrides,
outputShape, outputStrides,
iterationRange);
}
}
// N-dimensional real inverse DFT
void RDFTExecutor::irdftNd(float* inputPtr, float* outputPtr,
const std::vector<std::vector<float>>& twiddles,
const std::vector<int>& axes,
const std::vector<int>& signalSizes,
const VectorDims& inputShape,
const VectorDims& originalInputStrides,
const VectorDims& outputShape,
const VectorDims& outputStrides) {
const std::vector<size_t> iterationRange(inputShape.begin(), inputShape.end() - 1);
if (axes.size() == 1) {
dftOnAxis(complex_to_real, inputPtr, outputPtr,
twiddles[0].data(), axes[0],
signalSizes[0],
inputShape, originalInputStrides,
outputShape, outputStrides,
iterationRange);
return;
}
float* output = outputPtr;
std::vector<float> tmp;
size_t inputShapeSize = std::accumulate(inputShape.begin(), inputShape.end(), 1, std::multiplies<size_t>());
size_t outputShapeSize = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<size_t>());
if (inputShapeSize > outputShapeSize) {
tmp.resize(inputShapeSize);
output = &tmp[0];
}
std::vector<size_t> inputStrides(originalInputStrides.size());
inputStrides[originalInputStrides.size() - 1] = 1;
for (size_t i = inputStrides.size() - 1; i > 0; i--) {
inputStrides[i - 1] = inputStrides[i] * inputShape[i];
}
for (size_t i = 0; i < axes.size() - 1; i++) {
auto axis = axes[i];
dftOnAxis(complex_to_complex, inputPtr, output,
twiddles[i].data(), axis,
signalSizes[i],
inputShape, originalInputStrides,
inputShape, inputStrides,
iterationRange);
inputPtr = output;
}
dftOnAxis(complex_to_real, inputPtr, outputPtr,
twiddles.back().data(), axes.back(),
signalSizes.back(),
inputShape, inputStrides,
outputShape, outputStrides,
iterationRange);
}
std::vector<float> RDFTExecutor::generateTwiddlesFFT(size_t N) {
std::vector<float> twiddles;
for (size_t numBlocks = 1; numBlocks < N; numBlocks *= 2) {
for (size_t block = 0; block < numBlocks; block++) {
double angle = 2 * PI * block / (numBlocks * 2);
twiddles.push_back(std::cos(angle));
twiddles.push_back(-std::sin(angle));
}
}
return twiddles;
}
std::vector<float> RDFTExecutor::generateTwiddlesCommon(size_t signalSize, size_t outputSize,
enum dft_type type, bool useFFT) {
if (useFFT) {
return generateTwiddlesFFT(signalSize);
}
return generateTwiddlesDFT(signalSize, outputSize, type);
}
std::vector<std::vector<float>> RDFTExecutor::generateTwiddles(const std::vector<int>& signalSizes,
const std::vector<size_t>& outputShape,
const std::vector<int>& axes) {
std::vector<std::vector<float>> twiddles;
twiddles.reserve(axes.size());
for (size_t i = 0; i < axes.size(); i++) {
auto axis = axes[i];
size_t N = signalSizes[i];
size_t K = outputShape[axis];
auto type = complex_to_complex;
if (i == axes.size() - 1)
type = isInverse ? complex_to_real : real_to_complex;
twiddles.push_back(generateTwiddlesCommon(N, K, type, canUseFFT(N)));
}
return twiddles;
}
struct RDFTJitExecutor : public RDFTExecutor {
RDFTJitExecutor(bool inverse, NodeDesc* primDesc) : RDFTExecutor(inverse) {
enum dft_type rdftType = isInverse ? complex_to_real : real_to_complex;
if (mayiuse(cpu::x64::avx512_core)) {
rdftKernel.reset(new jit_dft_kernel_f32<cpu::x64::avx512_core>(isInverse, rdftType));
dftKernel.reset(new jit_dft_kernel_f32<cpu::x64::avx512_core>(isInverse, complex_to_complex));
vlen = cpu_isa_traits<cpu::x64::avx512_core>::vlen;
primDesc->setImplementationType(jit_avx512);
} else if (mayiuse(cpu::x64::avx2)) {
rdftKernel.reset(new jit_dft_kernel_f32<cpu::x64::avx2>(isInverse, rdftType));
dftKernel.reset(new jit_dft_kernel_f32<cpu::x64::avx2>(isInverse, complex_to_complex));
vlen = cpu_isa_traits<cpu::x64::avx2>::vlen;
primDesc->setImplementationType(jit_avx2);
} else if (mayiuse(cpu::x64::sse41)) {
rdftKernel.reset(new jit_dft_kernel_f32<cpu::x64::sse41>(isInverse, rdftType));
dftKernel.reset(new jit_dft_kernel_f32<cpu::x64::sse41>(isInverse, complex_to_complex));
vlen = cpu_isa_traits<cpu::x64::sse41>::vlen;
primDesc->setImplementationType(jit_sse42);
} else {
IE_THROW() << "Can't create RDFT kernel";
}
if (rdftKernel)
rdftKernel->create_ker();
if (dftKernel)
dftKernel->create_ker();
}
std::vector<float> generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) override {
std::vector<float> twiddles(inputSize * outputSize * 2);
int simdSize = vlen / sizeof(float);
if (type == real_to_complex || type == complex_to_complex) {
simdSize /= 2; // there are two floats per one complex element in the output
}
parallel_for2d(outputSize / simdSize, inputSize, [&] (size_t K, size_t n) {
if (type == real_to_complex) {
for (size_t k = 0; k < simdSize; k++) {
double angle = 2 * PI * (K * simdSize + k) * n / inputSize;
twiddles[((K * inputSize + n) * simdSize + k) * 2] = std::cos(angle);
twiddles[((K * inputSize + n) * simdSize + k) * 2 + 1] = -std::sin(angle);
}
} else if (type == complex_to_real || type == complex_to_complex) {
for (size_t k = 0; k < simdSize; k++) {
double angle = 2 * PI * (K * simdSize + k) * n / inputSize;
twiddles[(K * inputSize + n) * 2 * simdSize + k] = std::cos(angle);
}
for (size_t k = 0; k < simdSize; k++) {
double angle = 2 * PI * (K * simdSize + k) * n / inputSize;
twiddles[((K * inputSize + n) * 2 + 1) * simdSize + k] = -std::sin(angle);
}
}
});
if ((outputSize % simdSize) != 0) {
size_t start = (outputSize / simdSize) * simdSize;
parallel_for2d(outputSize - start, inputSize, [&] (size_t k, size_t n) {
k += start;
double angle = 2 * PI * k * n / inputSize;
twiddles[2 * (k * inputSize + n)] = std::cos(angle);
twiddles[2 * (k * inputSize + n) + 1] = -std::sin(angle);
});
}
return twiddles;
}
void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
size_t inputSize, size_t signalSize, size_t outputSize,
enum dft_type type, bool parallelize) override {
jit_dft_kernel* kernel = type == complex_to_complex ? dftKernel.get() : rdftKernel.get();
if (parallelize) {
const int cachelineSize = 64;
size_t blockSize = 4 * cachelineSize / sizeof(float);
size_t numBlocks = (outputSize + blockSize - 1) / blockSize;
parallel_nt(numBlocks, [&] (size_t i, size_t nthr) {
if (numBlocks > nthr) {
auto newBlockSize = (((outputSize / nthr) + blockSize - 1) / blockSize) * blockSize;
blockSize = newBlockSize;
numBlocks = nthr;
}
jit_dft_args args{};
args.input = inputPtr,
args.twiddles = twiddlesPtr,
args.output = outputPtr,
args.input_size = inputSize,
args.signal_size = signalSize,
args.output_start = i * blockSize,
args.output_end = std::min(outputSize - i * blockSize, blockSize),
(*kernel)(&args);
});
} else {
jit_dft_args args{};
args.input = inputPtr,
args.twiddles = twiddlesPtr,
args.output = outputPtr,
args.input_size = inputSize,
args.signal_size = signalSize,
args.output_start = 0,
args.output_end = outputSize,
(*kernel)(&args);
}
}
std::unique_ptr<jit_dft_kernel> rdftKernel = nullptr;
std::unique_ptr<jit_dft_kernel> dftKernel = nullptr;
int vlen;
};
struct RDFTRefExecutor : public RDFTExecutor {
RDFTRefExecutor(bool inverse) : RDFTExecutor(inverse) {}
private:
std::vector<float> generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) override {
std::vector<float> twiddles(inputSize * outputSize * 2);
parallel_for2d(outputSize, inputSize, [&] (size_t k, size_t n) {
double angle = 2 * PI * k * n / inputSize;
if (!isInverse)
angle = -angle;
twiddles[(k * inputSize + n) * 2] = std::cos(angle);
twiddles[(k * inputSize + n) * 2 + 1] = std::sin(angle);
});
return twiddles;
}
void dftRealToComplex(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
size_t inputSize, size_t outputSize, bool parallelize) {
auto dftIteration = [&] (size_t k) {
float real = 0, imag = 0;
for (size_t n = 0; n < inputSize; n++) {
float cos = twiddlesPtr[2 * (k * inputSize + n)];
float sin = twiddlesPtr[2 * (k * inputSize + n) + 1];
real += inputPtr[n] * cos;
imag += inputPtr[n] * sin;
}
outputPtr[2 * k] = real;
outputPtr[2 * k + 1] = imag;
};
if (parallelize) {
parallel_for(outputSize, dftIteration);
} else {
for (size_t k = 0; k < outputSize; k++) {
dftIteration(k);
}
}
}
void dftComplexToComplex(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
size_t inputSize, size_t signalSize, size_t outputSize, bool parallelize) {
auto dftIteration = [&] (size_t k) {
float real = 0, imag = 0;
for (size_t n = 0; n < inputSize; n++) {
float cos = twiddlesPtr[2 * (k * outputSize + n)];
float sin = twiddlesPtr[2 * (k * outputSize + n) + 1];
float inputReal = inputPtr[2 * n];
float inputImag = inputPtr[2 * n + 1];
real += inputReal * cos - inputImag * sin;
imag += inputImag * cos + inputReal * sin;
}
if (isInverse) {
float* inp = inputPtr + 2 * (inputSize - 2 + outputSize % 2);
for (int n = inputSize; n < signalSize; n++, inp -= 2) {
float cos = twiddlesPtr[2 * (k * outputSize + n)];
float sin = twiddlesPtr[2 * (k * outputSize + n) + 1];
float inputReal = inp[0];
float inputImag = -inp[1];
real += inputReal * cos - inputImag * sin;
imag += inputImag * cos + inputReal * sin;
}
real /= outputSize;
imag /= outputSize;
}
outputPtr[2 * k] = real;
outputPtr[2 * k + 1] = imag;
};
if (parallelize) {
parallel_for(outputSize, dftIteration);
} else {
for (size_t k = 0; k < outputSize; k++) {
dftIteration(k);
}
}
}
void dftComplexToReal(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
size_t inputSize, size_t signalSize, size_t outputSize, bool parallelize) {
auto dftIteration = [&] (size_t k) {
float real = 0;
for (size_t n = 0; n < inputSize; n++) {
float cos = twiddlesPtr[2 * (k * outputSize + n)];
float sin = twiddlesPtr[2 * (k * outputSize + n) + 1];
float inputReal = inputPtr[2 * n];
float inputImag = inputPtr[2 * n + 1];
real += inputReal * cos - inputImag * sin;
}
if (isInverse) {
float* inp = inputPtr + 2 * (inputSize - 2 + outputSize % 2);
for (size_t n = inputSize; n < signalSize; n++, inp -= 2) {
float cos = twiddlesPtr[2 * (k * outputSize + n)];
float sin = twiddlesPtr[2 * (k * outputSize + n) + 1];
float inputReal = inp[0];
float inputImag = inp[1];
real += inputReal * cos + inputImag * sin;
}
real /= outputSize;
}
outputPtr[k] = real;
};
if (parallelize) {
parallel_for(outputSize, dftIteration);
} else {
for (int k = 0; k < outputSize; k++) {
dftIteration(k);
}
}
}
void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
size_t inputSize, size_t signalSize, size_t outputSize,
enum dft_type type, bool parallelize) override {
if (type == real_to_complex) {
dftRealToComplex(inputPtr, twiddlesPtr, outputPtr, inputSize, outputSize, parallelize);
} else if (type == complex_to_complex) {
dftComplexToComplex(inputPtr, twiddlesPtr, outputPtr, inputSize, signalSize, outputSize, parallelize);
} else if (type == complex_to_real) {
dftComplexToReal(inputPtr, twiddlesPtr, outputPtr, inputSize, signalSize, outputSize, parallelize);
}
}
};
struct RDFTKey {
bool isInverse;
size_t hash() const {
using namespace dnnl::impl::primitive_hashing;
size_t seed = 0;
seed = hash_combine(seed, isInverse);
return seed;
}
bool operator==(const RDFTKey& rhs) const {
return isInverse == rhs.isInverse;
}
};
void RDFT::prepareParams() {
RDFTKey key{};
key.isInverse = inverse;
auto buildExecutor = [&] (const RDFTKey& key) -> std::shared_ptr<RDFTExecutor> {
std::shared_ptr<RDFTExecutor> executor;
NodeDesc* primDesc = getSelectedPrimitiveDescriptor();
if (mayiuse(cpu::x64::sse41)) {
executor = std::make_shared<RDFTJitExecutor>(key.isInverse, primDesc);
} else {
executor = std::make_shared<RDFTRefExecutor>(key.isInverse);
primDesc->setImplementationType(ref_any);
}
return executor;
};
auto cache = getRuntimeCache();
auto result = cache->getOrCreate(key, buildExecutor);
executor = result.first;
if (axes.size() > 0 && signalSizes.size() > 0 && outputShapes[0].isStatic()) {
twiddles = executor->generateTwiddles(signalSizes, outputShapes[0].getStaticDims(), axes);
}
}
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,99 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ie_common.h>
#include <node.h>
#include <string>
#include <map>
#include "kernels/rdft_kernel.hpp"
namespace ov {
namespace intel_cpu {
namespace node {
struct RDFTExecutor {
public:
RDFTExecutor(bool inverse) : isInverse(inverse) {}
void execute(float* inputPtr, float* outputPtr,
const std::vector<std::vector<float>>& twiddles,
size_t rank, const std::vector<int>& axes,
std::vector<int> signalSizes,
VectorDims inputShape, const VectorDims& outputShape,
const VectorDims& inputStrides, const VectorDims& outputStrides);
std::vector<std::vector<float>> generateTwiddles(const std::vector<int>& signalSizes,
const std::vector<size_t>& outputShape,
const std::vector<int>& axes);
protected:
bool isInverse;
private:
virtual bool canUseFFT(size_t dim);
virtual void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
size_t inputSize, size_t signalSize, size_t outputSize,
enum dft_type type, bool parallelize) = 0;
virtual void fft(float* input, const float* twiddlesPtr, float* output,
size_t inputSize, size_t signalSize, size_t outputSize,
enum dft_type type, bool parallelize);
void dftCommon(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
size_t inputSize, size_t signalSize, size_t outputSize,
enum dft_type type, bool useFFT, bool parallelize);
void dftOnAxis(enum dft_type type,
float* inputPtr, float* outputPtr,
const float* twiddlesPtr, int axis,
size_t signalSize,
const VectorDims& inputShape,
const VectorDims& inputStrides,
const VectorDims& outputShape,
const VectorDims& outputStrides,
const std::vector<size_t>& iteration_range);
void rdftNd(float* inputPtr, float* outputPtr,
const std::vector<std::vector<float>>& twiddles,
const std::vector<int>& axes,
const std::vector<int>& signalSizes,
const VectorDims& inputShape,
const VectorDims& inputStrides,
const VectorDims& outputShape,
const VectorDims& outputStrides);
void irdftNd(float* inputPtr, float* outputPtr,
const std::vector<std::vector<float>>& twiddles,
const std::vector<int>& axes,
const std::vector<int>& signalSizes,
const VectorDims& inputShape,
const VectorDims& inputStrides,
const VectorDims& outputShape,
const VectorDims& outputStrides);
virtual std::vector<float> generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) = 0;
std::vector<float> generateTwiddlesFFT(size_t N);
std::vector<float> generateTwiddlesCommon(size_t inputSize, size_t outputSize,
enum dft_type type, bool useFFT);
};
class RDFT : public Node {
public:
RDFT(const std::shared_ptr<ngraph::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache);
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void prepareParams() override;
void execute(dnnl::stream strm) override;
bool created() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
private:
std::string errorMsgPrefix;
bool inverse;
std::vector<int> axes;
std::vector<int> signalSizes;
std::vector<std::vector<float>> twiddles;
std::shared_ptr<RDFTExecutor> executor;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -68,6 +68,7 @@
#include "nodes/log_softmax.h"
#include "nodes/strided_slice.h"
#include "nodes/dft.h"
#include "nodes/rdft.h"
#include "nodes/non_max_suppression.h"
#include "nodes/convert.h"
#include "nodes/rnn.h"
@ -123,6 +124,7 @@ Node::NodesFactory::NodesFactory()
INTEL_CPU_NODE(MemoryOutput, Type::MemoryOutput);
INTEL_CPU_NODE(Tile, Type::Tile);
INTEL_CPU_NODE(DFT, Type::DFT);
INTEL_CPU_NODE(RDFT, Type::RDFT);
INTEL_CPU_NODE(GatherTree, Type::GatherTree);
INTEL_CPU_NODE(SpaceToDepth, Type::SpaceToDepth);
INTEL_CPU_NODE(FullyConnected, Type::FullyConnected);

View File

@ -0,0 +1,157 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/rdft.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
const std::vector<ngraph::helpers::DFTOpType> opTypes = {
ngraph::helpers::DFTOpType::FORWARD,
ngraph::helpers::DFTOpType::INVERSE
};
static const std::vector<InferenceEngine::Precision> inputPrecision = {
InferenceEngine::Precision::FP32,
};
const std::vector<std::vector<size_t>> shapesForward1d = {
{10},
{64},
{100},
};
const std::vector<std::vector<int64_t>> signalSizes1d = {
{}, {10},
};
//1D case doesn't work yet on reference implementation
INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_RDFT_1d, RDFTLayerTest,
::testing::Combine(
::testing::ValuesIn(shapesForward1d),
::testing::ValuesIn(inputPrecision),
::testing::Values(std::vector<int64_t>{0}),
::testing::ValuesIn(signalSizes1d),
::testing::Values(ngraph::helpers::DFTOpType::FORWARD),
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> shapesInverse1d = {
{10, 2},
{64, 2},
{100, 2},
};
INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_IRDFT_1d, RDFTLayerTest,
::testing::Combine(
::testing::ValuesIn(shapesInverse1d),
::testing::ValuesIn(inputPrecision),
::testing::Values(std::vector<int64_t>{0}),
::testing::ValuesIn(signalSizes1d),
::testing::Values(ngraph::helpers::DFTOpType::INVERSE),
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> shapesForward2d = {
{10, 15},
{64, 32},
{100, 16},
};
const std::vector<std::vector<int64_t>> axes2d = {
{0, 1}, {1, 0}, {-2, -1},
};
const std::vector<std::vector<int64_t>> signalSizes2d = {
{}, {10, 10},
};
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_2d, RDFTLayerTest,
::testing::Combine(
::testing::ValuesIn(shapesForward2d),
::testing::ValuesIn(inputPrecision),
::testing::ValuesIn(axes2d),
::testing::ValuesIn(signalSizes2d),
::testing::Values(ngraph::helpers::DFTOpType::FORWARD),
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> shapesInverse2d = {
{10, 15, 2},
{64, 32, 2},
{100, 32, 2},
};
INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_2d, RDFTLayerTest,
::testing::Combine(
::testing::ValuesIn(shapesInverse2d),
::testing::ValuesIn(inputPrecision),
::testing::ValuesIn(axes2d),
::testing::ValuesIn(signalSizes2d),
::testing::Values(ngraph::helpers::DFTOpType::INVERSE),
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> shapesForward4d = {
{1, 3, 10, 15},
{1, 4, 64, 32},
};
const std::vector<std::vector<int64_t>> axes4d = {
{0, 1, 2, 3}, {1, 0, -2, -1}
};
const std::vector<std::vector<int64_t>> signalSizes4d = {
{},
};
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d, RDFTLayerTest,
::testing::Combine(
::testing::ValuesIn(shapesForward4d),
::testing::ValuesIn(inputPrecision),
::testing::ValuesIn(axes4d),
::testing::ValuesIn(signalSizes4d),
::testing::Values(ngraph::helpers::DFTOpType::FORWARD),
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
const std::vector<std::vector<int64_t>> axes4d_2d = {
{2, 3}, {1, -1}
};
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d_axes_2d, RDFTLayerTest,
::testing::Combine(
::testing::ValuesIn(shapesForward4d),
::testing::ValuesIn(inputPrecision),
::testing::ValuesIn(axes4d_2d),
::testing::Values(std::vector<int64_t>{}),
::testing::Values(ngraph::helpers::DFTOpType::FORWARD),
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> shapesInverse4d = {
{1, 3, 10, 15, 2},
{1, 4, 64, 32, 2},
};
INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_4d, RDFTLayerTest,
::testing::Combine(
::testing::ValuesIn(shapesInverse4d),
::testing::ValuesIn(inputPrecision),
::testing::ValuesIn(axes4d),
::testing::ValuesIn(signalSizes4d),
::testing::Values(ngraph::helpers::DFTOpType::INVERSE),
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_4d_axes_2d, RDFTLayerTest,
::testing::Combine(
::testing::ValuesIn(shapesInverse4d),
::testing::ValuesIn(inputPrecision),
::testing::ValuesIn(axes4d_2d),
::testing::Values(std::vector<int64_t>{}),
::testing::Values(ngraph::helpers::DFTOpType::INVERSE),
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);

View File

@ -0,0 +1,456 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "ngraph_functions/builders.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include <common_test_utils/ov_tensor_utils.hpp>
#include <openvino/opsets/opset9.hpp>
using namespace CPUTestUtils;
using namespace ov::test;
using namespace ov;
namespace CPULayerTestsDefinitions {
using RDFTTestCPUParams = std::tuple<
Shape,
std::vector<int64_t>, // axes
std::vector<int64_t>, // signal sizes
bool, // inverse
CPUSpecificParams>;
class RDFTTestCPU : public testing::WithParamInterface<RDFTTestCPUParams>,
virtual public test::SubgraphBaseTest, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<RDFTTestCPUParams> obj) {
Shape shape;
std::vector<int64_t> axes;
std::vector<int64_t> signalSizes;
bool inverse;
CPUSpecificParams cpuParams;
std::tie(shape, axes, signalSizes, inverse, cpuParams) = obj.param;
std::ostringstream result;
result << "shape=" << shape
<< "_axes=" << CommonTestUtils::vec2str(axes)
<< "_signalSizes=" << CommonTestUtils::vec2str(signalSizes)
<< "_isInverse=" << inverse
<< CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() override {
Shape shape;
std::vector<int64_t> axes;
std::vector<int64_t> signalSizes;
element::Type_t precision = element::f32;
bool inverse;
CPUSpecificParams cpuParams;
std::tie(shape, axes, signalSizes, inverse, cpuParams) = GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
selectedType = makeSelectedTypeStr(selectedType, precision);
targetDevice = CommonTestUtils::DEVICE_CPU;
targetStaticShapes.push_back(std::vector<Shape>{shape});
auto param = std::make_shared<opset9::Parameter>(precision, shape);
auto axesNode = opset9::Constant::create(element::i64, Shape{axes.size()}, axes);
std::shared_ptr<Node> rdft;
if (signalSizes.size() > 0) {
auto signalSizesNode = opset9::Constant::create(element::i64, Shape{signalSizes.size()}, signalSizes);
if (inverse) {
rdft = std::make_shared<opset9::IRDFT>(param, axesNode, signalSizesNode);
} else {
rdft = std::make_shared<opset9::RDFT>(param, axesNode, signalSizesNode);
}
} else {
if (inverse) {
rdft = std::make_shared<opset9::IRDFT>(param, axesNode);
} else {
rdft = std::make_shared<opset9::RDFT>(param, axesNode);
}
}
function = std::make_shared<Model>(rdft, ParameterVector{param});
}
void generate_inputs(const std::vector<Shape>& targetInputStaticShapes) override {
const auto& funcInputs = function->inputs();
inputs.clear();
for (int i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
runtime::Tensor tensor = test::utils::create_and_fill_tensor_normal_distribution(funcInput.get_element_type(), targetInputStaticShapes[0], 0, 1, 0);
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
}
}
};
TEST_P(RDFTTestCPU, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
CheckPluginRelatedResults(compiledModel, "RDFT");
}
namespace {
CPUSpecificParams getCPUSpecificParams() {
if (InferenceEngine::with_cpu_x86_avx512_core()) {
return CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"};
} else if (InferenceEngine::with_cpu_x86_avx2()) {
return CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"};
} else if (InferenceEngine::with_cpu_x86_sse42()) {
return CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"};
} else {
return CPUSpecificParams{{}, {}, {"ref"}, "ref"};
}
return {};
}
auto cpuParams = getCPUSpecificParams();
std::vector<RDFTTestCPUParams> getParams1D() {
if (InferenceEngine::with_cpu_x86_avx512_core()) {
return {
{{14}, {0}, {}, false, cpuParams},
{{13}, {0}, {}, false, cpuParams},
{{15}, {0}, {}, false, cpuParams},
{{30}, {0}, {}, false, cpuParams},
{{29}, {0}, {}, false, cpuParams},
{{31}, {0}, {}, false, cpuParams},
{{46}, {0}, {}, false, cpuParams},
{{45}, {0}, {}, false, cpuParams},
{{47}, {0}, {}, false, cpuParams},
{{126}, {0}, {}, false, cpuParams},
{{510}, {0}, {}, false, cpuParams},
{{1022}, {0}, {}, false, cpuParams},
{{9, 2}, {0}, {}, true, cpuParams},
{{8, 2}, {0}, {}, true, cpuParams},
{{10, 2}, {0}, {}, true, cpuParams},
{{17, 2}, {0}, {}, true, cpuParams},
{{16, 2}, {0}, {}, true, cpuParams},
{{18, 2}, {0}, {}, true, cpuParams},
{{25, 2}, {0}, {}, true, cpuParams},
{{24, 2}, {0}, {}, true, cpuParams},
{{26, 2}, {0}, {}, true, cpuParams},
{{129, 2}, {0}, {}, true, cpuParams},
{{513, 2}, {0}, {}, true, cpuParams},
{{1025, 2}, {0}, {}, true, cpuParams},
{{25, 2}, {0}, {32}, true, cpuParams},
{{24, 2}, {0}, {16}, true, cpuParams},
};
} else if (InferenceEngine::with_cpu_x86_avx2()) {
return {
{{6}, {0}, {}, false, cpuParams},
{{5}, {0}, {}, false, cpuParams},
{{7}, {0}, {}, false, cpuParams},
{{38}, {0}, {}, false, cpuParams},
{{37}, {0}, {}, false, cpuParams},
{{39}, {0}, {}, false, cpuParams},
{{106}, {0}, {}, false, cpuParams},
{{246}, {0}, {}, false, cpuParams},
{{245}, {0}, {118}, false, cpuParams},
{{126}, {0}, {}, false, cpuParams},
{{510}, {0}, {}, false, cpuParams},
{{1022}, {0}, {}, false, cpuParams},
{{5, 2}, {0}, {}, true, cpuParams},
{{4, 2}, {0}, {}, true, cpuParams},
{{6, 2}, {0}, {}, true, cpuParams},
{{9, 2}, {0}, {}, true, cpuParams},
{{8, 2}, {0}, {}, true, cpuParams},
{{10, 2}, {0}, {}, true, cpuParams},
{{17, 2}, {0}, {}, true, cpuParams},
{{33, 2}, {0}, {}, true, cpuParams},
{{129, 2}, {0}, {}, true, cpuParams},
{{257, 2}, {0}, {}, true, cpuParams},
{{513, 2}, {0}, {}, true, cpuParams},
{{129, 2}, {0}, {126}, true, cpuParams},
{{257, 2}, {0}, {254}, true, cpuParams},
{{513, 2}, {0}, {510}, true, cpuParams},
};
} else {
return {
{{1}, {0}, {}, false, cpuParams},
{{2}, {0}, {}, false, cpuParams},
{{12}, {0}, {}, false, cpuParams},
{{14}, {0}, {}, false, cpuParams},
{{30}, {0}, {}, false, cpuParams},
{{62}, {0}, {}, false, cpuParams},
{{126}, {0}, {}, false, cpuParams},
{{250}, {0}, {}, false, cpuParams},
{{254}, {0}, {}, false, cpuParams},
{{62}, {0}, {61}, false, cpuParams},
{{126}, {0}, {40}, false, cpuParams},
{{250}, {0}, {200}, false, cpuParams},
{{254}, {0}, {10}, false, cpuParams},
{{2, 2}, {0}, {}, true, cpuParams},
{{9, 2}, {0}, {}, true, cpuParams},
{{10, 2}, {0}, {}, true, cpuParams},
{{17, 2}, {0}, {}, true, cpuParams},
{{33, 2}, {0}, {}, true, cpuParams},
{{65, 2}, {0}, {}, true, cpuParams},
{{129, 2}, {0}, {}, true, cpuParams},
{{257, 2}, {0}, {}, true, cpuParams},
{{33, 2}, {0}, {50}, true, cpuParams},
{{65, 2}, {0}, {20}, true, cpuParams},
{{129, 2}, {0}, {200}, true, cpuParams},
{{257, 2}, {0}, {100}, true, cpuParams},
};
}
return {};
}
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_1D, RDFTTestCPU, ::testing::ValuesIn(getParams1D()), RDFTTestCPU::getTestCaseName);
std::vector<RDFTTestCPUParams> getParams2D() {
if (InferenceEngine::with_cpu_x86_avx512_core()) {
return {
{{46, 10}, {0}, {}, false, cpuParams},
{{45, 10}, {0}, {}, false, cpuParams},
{{47, 10}, {0}, {}, false, cpuParams},
{{20, 126}, {1}, {}, false, cpuParams},
{{20, 510}, {1}, {}, false, cpuParams},
{{20, 1022}, {1}, {}, false, cpuParams},
{{48, 46}, {0, 1}, {}, false, cpuParams},
{{32, 45}, {0, 1}, {}, false, cpuParams},
{{64, 47}, {0, 1}, {}, false, cpuParams},
{{72, 126}, {0, 1}, {}, false, cpuParams},
{{32, 510}, {0, 1}, {}, false, cpuParams},
{{16, 1022}, {0, 1}, {}, false, cpuParams},
{{9, 10, 2}, {0}, {}, true, cpuParams},
{{8, 10, 2}, {0}, {}, true, cpuParams},
{{10, 20, 2}, {0}, {}, true, cpuParams},
{{10, 9, 2}, {1}, {}, true, cpuParams},
{{10, 8, 2}, {1}, {}, true, cpuParams},
{{20, 10, 2}, {1}, {}, true, cpuParams},
{{129, 16, 2}, {0}, {}, true, cpuParams},
{{513, 32, 2}, {0}, {}, true, cpuParams},
{{1025, 72, 2}, {0}, {}, true, cpuParams},
{{16, 129, 2}, {1}, {}, true, cpuParams},
{{32, 513, 2}, {1}, {}, true, cpuParams},
{{72, 1025, 2}, {1}, {}, true, cpuParams},
{{16, 129, 2}, {0, 1}, {}, true, cpuParams},
{{32, 513, 2}, {0, 1}, {}, true, cpuParams},
{{72, 1025, 2}, {0, 1}, {}, true, cpuParams},
{{16, 129, 2}, {0, 1}, {16, 200}, true, cpuParams},
{{32, 513, 2}, {0, 1}, {32, 600}, true, cpuParams},
{{72, 1025, 2}, {0, 1}, {72, 100}, true, cpuParams},
};
} else if (InferenceEngine::with_cpu_x86_avx2()) {
return {
{{38, 16}, {0}, {}, false, cpuParams},
{{37, 8}, {0}, {}, false, cpuParams},
{{39, 24}, {0}, {}, false, cpuParams},
{{16, 38}, {1}, {}, false, cpuParams},
{{8, 37}, {1}, {}, false, cpuParams},
{{24, 39}, {1}, {}, false, cpuParams},
{{16, 38}, {0, 1}, {}, false, cpuParams},
{{8, 37}, {0, 1}, {}, false, cpuParams},
{{24, 39}, {0, 1}, {}, false, cpuParams},
{{126, 32}, {0}, {}, false, cpuParams},
{{510, 64}, {0}, {}, false, cpuParams},
{{1022, 64}, {0}, {}, false, cpuParams},
{{126, 32}, {0, 1}, {}, false, cpuParams},
{{510, 64}, {0, 1}, {}, false, cpuParams},
{{1022, 64}, {0, 1}, {}, false, cpuParams},
{{38, 16, 2}, {0}, {}, true, cpuParams},
{{37, 8, 2}, {0}, {}, true, cpuParams},
{{39, 24, 2}, {0}, {}, true, cpuParams},
{{16, 38, 2}, {1}, {}, true, cpuParams},
{{8, 37, 2}, {1}, {}, true, cpuParams},
{{24, 39, 2}, {1}, {}, true, cpuParams},
{{16, 38, 2}, {0, 1}, {}, true, cpuParams},
{{8, 37, 2}, {0, 1}, {}, true, cpuParams},
{{24, 39, 2}, {0, 1}, {}, true, cpuParams},
{{126, 32, 2}, {0}, {}, true, cpuParams},
{{510, 64, 2}, {0}, {}, true, cpuParams},
{{1022, 64, 2}, {0}, {}, true, cpuParams},
{{126, 32, 2}, {0, 1}, {}, true, cpuParams},
{{510, 64, 2}, {0, 1}, {}, true, cpuParams},
{{1022, 64, 2}, {0, 1}, {}, true, cpuParams},
{{129, 32, 2}, {0}, {126}, true, cpuParams},
{{257, 16, 2}, {0}, {254}, true, cpuParams},
{{513, 64, 2}, {0}, {510}, true, cpuParams},
};
} else {
return {
{{1, 1}, {0}, {}, false, cpuParams},
{{1, 1}, {1}, {}, false, cpuParams},
{{1, 1}, {0, 1}, {}, false, cpuParams},
{{2, 2}, {0}, {}, false, cpuParams},
{{2, 2}, {1}, {}, false, cpuParams},
{{2, 2}, {0, 1}, {}, false, cpuParams},
{{13, 13}, {0}, {}, false, cpuParams},
{{13, 13}, {1}, {}, false, cpuParams},
{{13, 13}, {0, 1}, {}, false, cpuParams},
{{29, 29}, {0}, {}, false, cpuParams},
{{29, 29}, {1}, {}, false, cpuParams},
{{29, 29}, {0, 1}, {}, false, cpuParams},
{{30, 32}, {0}, {}, false, cpuParams},
{{32, 30}, {1}, {}, false, cpuParams},
{{32, 30}, {0, 1}, {}, false, cpuParams},
{{62, 64}, {0}, {}, false, cpuParams},
{{64, 62}, {1}, {}, false, cpuParams},
{{64, 62}, {0, 1}, {}, false, cpuParams},
{{254, 128}, {0}, {}, false, cpuParams},
{{128, 254}, {1}, {}, false, cpuParams},
{{128, 254}, {0, 1}, {}, false, cpuParams},
{{128, 254}, {1}, {10}, false, cpuParams},
{{128, 254}, {0, 1}, {128, 100}, false, cpuParams},
{{1, 1, 2}, {0}, {1}, true, cpuParams},
{{1, 1, 2}, {1}, {1}, true, cpuParams},
{{1, 1, 2}, {0, 1}, {1, 1}, true, cpuParams},
{{2, 2, 2}, {0}, {}, true, cpuParams},
{{2, 2, 2}, {1}, {}, true, cpuParams},
{{2, 2, 2}, {0, 1}, {}, true, cpuParams},
{{13, 13, 2}, {0}, {}, true, cpuParams},
{{13, 13, 2}, {1}, {}, true, cpuParams},
{{13, 13, 2}, {0, 1}, {}, true, cpuParams},
{{29, 29, 2}, {0}, {}, true, cpuParams},
{{29, 29, 2}, {1}, {}, true, cpuParams},
{{29, 29, 2}, {0, 1}, {}, true, cpuParams},
{{30, 32, 2}, {0}, {}, true, cpuParams},
{{32, 30, 2}, {1}, {}, true, cpuParams},
{{32, 30, 2}, {0, 1}, {}, true, cpuParams},
{{62, 64, 2}, {0}, {}, true, cpuParams},
{{64, 62, 2}, {1}, {}, true, cpuParams},
{{64, 62, 2}, {0, 1}, {}, true, cpuParams},
{{254, 128, 2}, {0}, {}, true, cpuParams},
{{128, 254, 2}, {1}, {}, true, cpuParams},
{{128, 254, 2}, {0, 1}, {}, true, cpuParams},
{{128, 254, 2}, {1}, {10}, true, cpuParams},
{{128, 254, 2}, {0, 1}, {128, 100}, true, cpuParams},
};
}
return {};
}
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_2D, RDFTTestCPU, ::testing::ValuesIn(getParams2D()), RDFTTestCPU::getTestCaseName);
std::vector<RDFTTestCPUParams> getParams4D() {
std::vector<RDFTTestCPUParams> params;
if (InferenceEngine::with_cpu_x86_avx512_core()) {
params = {
{{10, 46, 128, 65}, {1}, {}, false, cpuParams},
{{10, 46, 128, 65}, {0, 1}, {}, false, cpuParams},
{{46, 10, 128, 65}, {1, 0}, {}, false, cpuParams},
{{10, 46, 128, 65}, {1, 2}, {}, false, cpuParams},
{{46, 10, 128, 65}, {-2, -1}, {}, false, cpuParams},
{{46, 10, 128, 65}, {3, 1}, {}, false, cpuParams},
{{46, 10, 128, 65}, {0, 1, 2, 3}, {}, false, cpuParams},
{{46, 10, 128, 65}, {0, 1, 2, 3}, {10, 10, 33, 50}, false, cpuParams},
{{10, 46, 128, 65, 2}, {1}, {}, true, cpuParams},
{{10, 46, 128, 65, 2}, {0, 1}, {}, true, cpuParams},
{{46, 10, 128, 65, 2}, {1, 0}, {}, true, cpuParams},
{{10, 46, 128, 65, 2}, {1, 2}, {}, true, cpuParams},
{{46, 10, 128, 65, 2}, {-2, -1}, {}, true, cpuParams},
{{46, 10, 128, 65, 2}, {3, 1}, {}, true, cpuParams},
{{46, 10, 128, 65, 2}, {0, 1, 2, 3}, {}, true, cpuParams},
// TODO: FIXME
//{{46, 10, 128, 65, 2}, {0, 1, 2, 3}, {12, 15, 130, 40}, true, cpuParams},
};
} else if (InferenceEngine::with_cpu_x86_avx2()) {
params = {
{{9, 16, 32, 126}, {1}, {}, false, cpuParams},
{{9, 16, 32, 126}, {1, 0}, {}, false, cpuParams},
{{9, 16, 32, 126}, {1, 2}, {}, false, cpuParams},
{{9, 16, 32, 126}, {-2, -1}, {}, false, cpuParams},
{{9, 16, 32, 126}, {3, 1}, {}, false, cpuParams},
{{9, 16, 32, 126}, {0, 1, 2, 3}, {}, false, cpuParams},
{{9, 16, 32, 126}, {0, 1, 2, 3}, {8, 10, 11, 12}, false, cpuParams},
{{9, 16, 32, 126, 2}, {1}, {}, true, cpuParams},
{{9, 16, 32, 126, 2}, {1, 0}, {}, true, cpuParams},
{{9, 16, 32, 126, 2}, {1, 2}, {}, true, cpuParams},
{{9, 16, 32, 126, 2}, {-2, -1}, {}, true, cpuParams},
{{9, 16, 32, 126, 2}, {3, 1}, {}, true, cpuParams},
{{9, 16, 32, 126, 2}, {0, 1, 2, 3}, {}, true, cpuParams},
// TODO: FIXME
//{{9, 16, 32, 126, 2}, {0, 1, 2, 3}, {8, 10, 11, 12}, true, cpuParams},
};
} else {
params = {
{{1, 2, 13, 30}, {1}, {}, false, cpuParams},
{{1, 2, 13, 30}, {1, 0}, {}, false, cpuParams},
{{1, 2, 13, 30}, {1, 2}, {}, false, cpuParams},
{{1, 2, 13, 30}, {-2, -1}, {}, false, cpuParams},
{{1, 2, 13, 30}, {3, 2}, {}, false, cpuParams},
{{1, 2, 13, 30}, {0, 1, 2, 3}, {}, false, cpuParams},
{{1, 2, 13, 30}, {0, 1, 2, 3}, {1, 2, 3, 13}, false, cpuParams},
{{1, 2, 13, 30, 2}, {1}, {}, true, cpuParams},
{{2, 2, 13, 30, 2}, {1, 0}, {}, true, cpuParams},
{{1, 2, 13, 30, 2}, {1, 2}, {}, true, cpuParams},
{{1, 2, 13, 30, 2}, {-2, -1}, {}, true, cpuParams},
{{1, 2, 13, 30, 2}, {3, 2}, {}, true, cpuParams},
{{1, 2, 13, 30, 2}, {0, 1, 2, 3}, {}, true, cpuParams},
// TODO: FIXME
//{{1, 2, 13, 30, 2}, {0, 1, 2, 3}, {1, 2, 3, 13}, true, cpuParams},
};
}
params.push_back({{1, 192, 36, 64}, {0}, {}, false, cpuParams});
params.push_back({{1, 192, 36, 64}, {1}, {}, false, cpuParams});
params.push_back({{1, 192, 36, 64}, {2}, {}, false, cpuParams});
params.push_back({{1, 192, 36, 64}, {3}, {}, false, cpuParams});
params.push_back({{1, 192, 36, 64}, {0, 1}, {}, false, cpuParams});
params.push_back({{1, 192, 36, 64}, {3, 2}, {}, false, cpuParams});
params.push_back({{1, 192, 36, 64}, {-2, -1}, {36, 64}, false, cpuParams});
params.push_back({{1, 192, 36, 64}, {0, 1, 2, 3}, {}, false, cpuParams});
params.push_back({{2, 192, 36, 33, 2}, {0}, {}, true, cpuParams});
params.push_back({{1, 192, 36, 33, 2}, {1}, {}, true, cpuParams});
params.push_back({{1, 192, 36, 33, 2}, {2}, {}, true, cpuParams});
params.push_back({{1, 192, 36, 33, 2}, {3}, {}, true, cpuParams});
params.push_back({{1, 192, 36, 33, 2}, {0, 1}, {}, true, cpuParams});
params.push_back({{1, 192, 36, 33, 2}, {3, 2}, {}, true, cpuParams});
params.push_back({{1, 192, 36, 33, 2}, {-2, -1}, {36, 64}, true, cpuParams});
params.push_back({{1, 192, 36, 33, 2}, {0, 1, 2, 3}, {}, true, cpuParams});
return params;
}
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_4D, RDFTTestCPU, ::testing::ValuesIn(getParams4D()), RDFTTestCPU::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -0,0 +1,15 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "shared_test_classes/single_layer/rdft.hpp"
namespace LayerTestsDefinitions {
TEST_P(RDFTLayerTest, CompareWithRefs) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,31 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <tuple>
#include <string>
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
namespace LayerTestsDefinitions {
typedef std::tuple<
InferenceEngine::SizeVector, // Input shapes
InferenceEngine::Precision, // Input precision
std::vector<int64_t>, // Axes
std::vector<int64_t>, // Signal size
ngraph::helpers::DFTOpType,
std::string> RDFTParams; // Device name
class RDFTLayerTest : public testing::WithParamInterface<RDFTParams>, virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<RDFTParams>& obj);
protected:
void SetUp() override;
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,47 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/rdft.hpp"
namespace LayerTestsDefinitions {
std::string RDFTLayerTest::getTestCaseName(const testing::TestParamInfo<RDFTParams>& obj) {
InferenceEngine::SizeVector inputShapes;
InferenceEngine::Precision inputPrecision;
std::vector<int64_t> axes;
std::vector<int64_t> signalSize;
ngraph::helpers::DFTOpType opType;
std::string targetDevice;
std::tie(inputShapes, inputPrecision, axes, signalSize, opType, targetDevice) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
result << "Precision=" << inputPrecision.name() << "_";
result << "Axes=" << CommonTestUtils::vec2str(axes) << "_";
result << "SignalSize=" << CommonTestUtils::vec2str(signalSize) << "_";
result << "Inverse=" << (opType == ngraph::helpers::DFTOpType::INVERSE) << "_";
result << "TargetDevice=" << targetDevice;
return result.str();
}
void RDFTLayerTest::SetUp() {
InferenceEngine::SizeVector inputShapes;
InferenceEngine::Precision inputPrecision;
std::vector<int64_t> axes;
std::vector<int64_t> signalSize;
ngraph::helpers::DFTOpType opType;
std::tie(inputShapes, inputPrecision, axes, signalSize, opType, targetDevice) = this->GetParam();
auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
ngraph::ParameterVector paramVector;
auto paramData = std::make_shared<ngraph::opset1::Parameter>(inType, ngraph::Shape(inputShapes));
paramVector.push_back(paramData);
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(paramVector));
auto rdft = ngraph::builder::makeRDFT(paramOuts[0], axes, signalSize, opType);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rdft)};
function = std::make_shared<ngraph::Function>(results, paramVector, "RDFT");
}
} // namespace LayerTestsDefinitions

View File

@ -596,6 +596,11 @@ std::shared_ptr<ngraph::Node> makeDFT(const ngraph::Output<Node> &dataNode,
const std::vector<int64_t> &signalSize,
const ngraph::helpers::DFTOpType opType);
std::shared_ptr<ngraph::Node> makeRDFT(const ngraph::Output<Node> &dataNode,
const std::vector<int64_t> &axes,
const std::vector<int64_t> &signalSize,
const ngraph::helpers::DFTOpType opType);
std::shared_ptr<ngraph::Node> makeEinsum(const OutputVector& inputs,
const std::string& equation);
} // namespace builder

View File

@ -0,0 +1,40 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <memory>
#include "ngraph_functions/builders.hpp"
namespace ngraph {
namespace builder {
namespace {
template <typename ...Args>
std::shared_ptr<ngraph::Node> CallDftCtorWithArgs(const ngraph::helpers::DFTOpType opType, Args&&... args) {
switch (opType) {
case ngraph::helpers::DFTOpType::FORWARD:
return std::make_shared<ngraph::op::v9::RDFT>(std::forward<Args>(args)...);
case ngraph::helpers::DFTOpType::INVERSE:
return std::make_shared<ngraph::op::v9::IRDFT>(std::forward<Args>(args)...);
default:
throw std::logic_error("Unsupported operation type");
}
}
} // namespace
std::shared_ptr<ngraph::Node> makeRDFT(const ngraph::Output<Node> &dataNode,
const std::vector<int64_t> &axes,
const std::vector<int64_t> &signalSize,
const ngraph::helpers::DFTOpType opType) {
auto axesNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{axes.size()}, axes)->output(0);
if (!signalSize.empty()) {
auto signalSizeNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{signalSize.size()}, signalSize)->output(0);
return CallDftCtorWithArgs(opType, dataNode, axesNode, signalSizeNode);
}
return CallDftCtorWithArgs(opType, dataNode, axesNode);
}
} // namespace builder
} // namespace ngraph