[CPU] Add RDFT and IRDFT operators (#12099)
* [CPU] Add RDFT and IRDFT operators Tickets: 79178 and 79192 Co-authored-by: Mateusz Bencer <mateusz.bencer@intel.com>
This commit is contained in:
parent
862aebce71
commit
270051ebce
@ -2,7 +2,8 @@
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import openvino.runtime.opset8 as ov
|
||||
import openvino.runtime.opset9 as ov
|
||||
from openvino.runtime import Shape
|
||||
import numpy as np
|
||||
from tests.runtime import get_runtime
|
||||
|
||||
|
165
src/bindings/python/tests/test_ngraph/test_rdft.py
Normal file
165
src/bindings/python/tests/test_ngraph/test_rdft.py
Normal file
@ -0,0 +1,165 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import openvino.runtime.opset9 as ov
|
||||
from openvino.runtime import Shape
|
||||
import numpy as np
|
||||
from tests.runtime import get_runtime
|
||||
|
||||
|
||||
np.random.seed(0)
|
||||
|
||||
|
||||
def test_rdft_1d():
|
||||
runtime = get_runtime()
|
||||
input_size = 50
|
||||
shape = [input_size]
|
||||
data = np.random.uniform(0, 1, shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
|
||||
input_axes = ov.constant(np.array([0], dtype=np.int64))
|
||||
|
||||
node = ov.rdft(param, input_axes)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
np_results = np.fft.rfft(data)
|
||||
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
|
||||
|
||||
|
||||
def test_irdft_1d():
|
||||
runtime = get_runtime()
|
||||
signal_size = 50
|
||||
shape = [signal_size // 2 + 1, 2]
|
||||
data = np.random.uniform(0, 1, shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
|
||||
input_axes = ov.constant(np.array([0], dtype=np.int64))
|
||||
node = ov.irdft(param, input_axes, ov.constant(np.array([signal_size], dtype=np.int64)))
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
expected_results = np.fft.irfft(data[:, 0] + 1j * data[:, 1], signal_size)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
|
||||
|
||||
|
||||
def test_rdft_2d():
|
||||
runtime = get_runtime()
|
||||
shape = [100, 128]
|
||||
data = np.random.uniform(0, 1, shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
|
||||
axes = [0, 1]
|
||||
input_axes = ov.constant(np.array(axes, dtype=np.int64))
|
||||
node = ov.rdft(param, input_axes)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
np_results = np.fft.rfftn(data, axes=axes)
|
||||
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0007)
|
||||
|
||||
|
||||
def test_rdft_2d_signal_size():
|
||||
runtime = get_runtime()
|
||||
shape = [100, 128]
|
||||
data = np.random.uniform(0, 1, shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
|
||||
axes = [0, 1]
|
||||
signal_size = [30, 40]
|
||||
axes_node = ov.constant(np.array(axes, dtype=np.int64))
|
||||
signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64))
|
||||
node = ov.rdft(param, axes_node, signal_size_node)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
np_results = np.fft.rfftn(data, s=signal_size, axes=axes)
|
||||
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0007)
|
||||
|
||||
|
||||
def test_irdft_2d():
|
||||
runtime = get_runtime()
|
||||
axes = [0, 1]
|
||||
input_shape = [100, 65, 2]
|
||||
data = np.random.uniform(0, 1, input_shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(input_shape), name="input", dtype=np.float32)
|
||||
input_axes = ov.constant(np.array(axes, dtype=np.int64))
|
||||
node = ov.irdft(param, input_axes)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
expected_results = np.fft.irfftn(data[:, :, 0] + 1j * data[:, :, 1], axes=axes)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
|
||||
|
||||
|
||||
def test_irdft_2d_signal_size():
|
||||
runtime = get_runtime()
|
||||
axes = [0, 1]
|
||||
input_shape = [100, 65, 2]
|
||||
signal_size = [100, 65]
|
||||
data = np.random.uniform(0, 1, input_shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(input_shape), name="input", dtype=np.float32)
|
||||
input_axes = ov.constant(np.array(axes, dtype=np.int64))
|
||||
signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64))
|
||||
node = ov.irdft(param, input_axes, signal_size_node)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
expected_results = np.fft.irfftn(data[:, :, 0] + 1j * data[:, :, 1], s=signal_size, axes=axes)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
|
||||
|
||||
|
||||
def test_rdft_4d():
|
||||
runtime = get_runtime()
|
||||
shape = [1, 192, 36, 64]
|
||||
data = np.random.uniform(0, 1, shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
|
||||
axes = [-2, -1]
|
||||
input_axes = ov.constant(np.array(axes, dtype=np.int64))
|
||||
node = ov.rdft(param, input_axes)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
np_results = np.fft.rfftn(data, axes=axes)
|
||||
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0007)
|
||||
|
||||
|
||||
def test_rdft_4d_signal_size():
|
||||
runtime = get_runtime()
|
||||
shape = [1, 192, 36, 64]
|
||||
signal_size = [36, 64]
|
||||
data = np.random.uniform(0, 1, shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
|
||||
axes = [-2, -1]
|
||||
input_axes = ov.constant(np.array(axes, dtype=np.int64))
|
||||
signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64))
|
||||
node = ov.rdft(param, input_axes, signal_size_node)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
np_results = np.fft.rfftn(data, signal_size, axes=axes)
|
||||
expected_results = np.stack((np_results.real, np_results.imag), axis=-1)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0007)
|
||||
|
||||
|
||||
def test_irdft_4d():
|
||||
runtime = get_runtime()
|
||||
shape = [1, 192, 36, 33, 2]
|
||||
data = np.random.uniform(0, 1, shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
|
||||
axes = [-2, -1]
|
||||
input_axes = ov.constant(np.array(axes, dtype=np.int64))
|
||||
node = ov.irdft(param, input_axes)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
expected_results = np.fft.irfftn(data[:, :, :, :, 0] + 1j * data[:, :, :, :, 1], axes=axes)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
|
||||
|
||||
|
||||
def test_irdft_4d_signal_size():
|
||||
runtime = get_runtime()
|
||||
shape = [1, 192, 36, 33, 2]
|
||||
signal_size = [36, 64]
|
||||
data = np.random.uniform(0, 1, shape).astype(np.float32)
|
||||
param = ov.parameter(Shape(shape), name="input", dtype=np.float32)
|
||||
axes = [-2, -1]
|
||||
input_axes = ov.constant(np.array(axes, dtype=np.int64))
|
||||
signal_size_node = ov.constant(np.array(signal_size, dtype=np.int64))
|
||||
node = ov.irdft(param, input_axes, signal_size_node)
|
||||
computation = runtime.computation(node, param)
|
||||
actual = computation(data)
|
||||
expected_results = np.fft.irfftn(data[:, :, :, :, 0] + 1j * data[:, :, :, :, 1], signal_size, axes=axes)
|
||||
np.testing.assert_allclose(expected_results, actual[0], atol=0.0001)
|
@ -141,6 +141,8 @@ const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_t
|
||||
{ "ShuffleChannels", Type::ShuffleChannels},
|
||||
{ "DFT", Type::DFT},
|
||||
{ "IDFT", Type::DFT},
|
||||
{ "RDFT", Type::RDFT},
|
||||
{ "IRDFT", Type::RDFT},
|
||||
{ "Abs", Type::Math},
|
||||
{ "Acos", Type::Math},
|
||||
{ "Acosh", Type::Math},
|
||||
@ -328,6 +330,8 @@ std::string NameFromType(const Type type) {
|
||||
return "ShuffleChannels";
|
||||
case Type::DFT:
|
||||
return "DFT";
|
||||
case Type::RDFT:
|
||||
return "RDFT";
|
||||
case Type::Math:
|
||||
return "Math";
|
||||
case Type::CTCLoss:
|
||||
|
@ -79,6 +79,7 @@ enum class Type {
|
||||
Reference,
|
||||
ShuffleChannels,
|
||||
DFT,
|
||||
RDFT,
|
||||
Math,
|
||||
CTCLoss,
|
||||
Bucketize,
|
||||
|
447
src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.cpp
Normal file
447
src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.cpp
Normal file
@ -0,0 +1,447 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "rdft_kernel.hpp"
|
||||
#include <ie_common.h>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
#define GET_OFF(field) offsetof(jit_dft_args, field)
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
void jit_dft_kernel_f32<isa>::generate() {
|
||||
using namespace Xbyak::util;
|
||||
using Xbyak::Label;
|
||||
using Xbyak::Xmm;
|
||||
using Vmm = typename conditional3<isa == cpu::x64::sse41, Xbyak::Xmm,
|
||||
isa == cpu::x64::avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
|
||||
|
||||
this->preamble();
|
||||
|
||||
int input_type_size = 0;
|
||||
int output_type_size = 0;
|
||||
|
||||
switch (kernel_type_) {
|
||||
case real_to_complex:
|
||||
input_type_size = type_size;
|
||||
output_type_size = complex_type_size<float>();
|
||||
break;
|
||||
case complex_to_complex:
|
||||
input_type_size = complex_type_size<float>();
|
||||
output_type_size = complex_type_size<float>();
|
||||
break;
|
||||
case complex_to_real:
|
||||
input_type_size = complex_type_size<float>();
|
||||
output_type_size = type_size;
|
||||
break;
|
||||
}
|
||||
int vlen = cpu_isa_traits<isa>::vlen;
|
||||
const int simd_size = vlen / output_type_size;
|
||||
|
||||
mov(input_ptr, ptr[param1 + GET_OFF(input)]);
|
||||
mov(input_size, ptr[param1 + GET_OFF(input_size)]);
|
||||
mov(twiddles_ptr, ptr[param1 + GET_OFF(twiddles)]);
|
||||
mov(output_start, ptr[param1 + GET_OFF(output_start)]);
|
||||
mov(output_end, ptr[param1 + GET_OFF(output_end)]);
|
||||
|
||||
// offset twiddles_ptr by input_size * complex_type_size<float>() * output_start bytes
|
||||
mov(signal_size, ptr[param1 + GET_OFF(signal_size)]);
|
||||
mov(rax, signal_size);
|
||||
lea(rax, ptr[rax * complex_type_size<float>()]);
|
||||
xor_(rdx, rdx);
|
||||
mul(output_start);
|
||||
add(twiddles_ptr, rax);
|
||||
|
||||
// offset output_ptr by output_start * output_type_size bytes
|
||||
mov(output_ptr, ptr[param1 + GET_OFF(output)]);
|
||||
lea(output_ptr, ptr[output_ptr + output_type_size * output_start]);
|
||||
|
||||
size_t reg_idx = 0;
|
||||
Xmm xmm_signal_size = Xmm(reg_idx);
|
||||
Vmm vmm_signal_size = Vmm(reg_idx);
|
||||
if (is_inverse_) {
|
||||
reg_idx++;
|
||||
uni_vbroadcastss(Vmm(reg_idx), ptr[param1 + GET_OFF(signal_size)]);
|
||||
uni_vcvtdq2ps(vmm_signal_size, Vmm(reg_idx));
|
||||
}
|
||||
|
||||
Vmm vmm_neg_mask = Vmm(reg_idx);
|
||||
Xmm xmm_neg_mask = Xmm(reg_idx);
|
||||
if (kernel_type_ == complex_to_complex) {
|
||||
reg_idx++;
|
||||
if (!is_inverse_) {
|
||||
mov(rax, 1ULL << 31);
|
||||
} else {
|
||||
mov(rax, 1ULL << 63);
|
||||
}
|
||||
uni_vmovq(xmm_neg_mask, rax);
|
||||
uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask);
|
||||
}
|
||||
|
||||
mov(rax, signal_size);
|
||||
and_(rax, 1);
|
||||
setz(is_signal_size_even);
|
||||
|
||||
Label loop_over_output;
|
||||
Label loop_over_output_continue;
|
||||
Label loop_simd;
|
||||
Label loop_nonsimd;
|
||||
|
||||
auto simd_loop = [this, vlen, simd_size,
|
||||
input_type_size, reg_idx,
|
||||
&vmm_signal_size,
|
||||
&xmm_neg_mask,
|
||||
&vmm_neg_mask] {
|
||||
size_t idx = reg_idx;
|
||||
Vmm result = Vmm(idx++);
|
||||
Vmm inp_real = Vmm(idx++);
|
||||
Vmm inp_imag = Vmm(idx++);
|
||||
const Vmm& input = inp_real;
|
||||
const Vmm& input_perm = inp_imag;
|
||||
Vmm twiddles = Vmm(idx++);
|
||||
const Vmm& cos = twiddles;
|
||||
Vmm sin = Vmm(idx++);
|
||||
Xmm tmp = Xmm(idx++);
|
||||
|
||||
uni_vpxor(result, result, result);
|
||||
|
||||
if (kernel_type_ == complex_to_complex && is_inverse_) {
|
||||
mov(rdx, 1ULL << 63);
|
||||
uni_vmovq(xmm_neg_mask, rdx);
|
||||
uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask);
|
||||
}
|
||||
|
||||
Label loop;
|
||||
L(loop);
|
||||
{
|
||||
if (kernel_type_ == real_to_complex) {
|
||||
uni_vbroadcastss(inp_real, ptr[input_ptr]);
|
||||
uni_vmovups(twiddles, ptr[twiddles_ptr]);
|
||||
uni_vfmadd231ps(result, inp_real, twiddles);
|
||||
|
||||
add(twiddles_ptr, vlen);
|
||||
} else if (kernel_type_ == complex_to_real) {
|
||||
uni_vbroadcastss(inp_real, ptr[input_ptr]);
|
||||
uni_vbroadcastss(inp_imag, ptr[input_ptr + type_size]);
|
||||
uni_vmovups(cos, ptr[twiddles_ptr]);
|
||||
uni_vmovups(sin, ptr[twiddles_ptr + vlen]);
|
||||
uni_vfmadd231ps(result, inp_real, cos);
|
||||
uni_vfmadd231ps(result, inp_imag, sin);
|
||||
|
||||
add(twiddles_ptr, 2 * vlen);
|
||||
} else if (kernel_type_ == complex_to_complex) {
|
||||
// output_real += input_real * cos(..) - input_imag * sin(..)
|
||||
// output_imag += input_imag * cos(..) + input_real * sin(..)
|
||||
uni_vbroadcastsd(input, ptr[input_ptr]);
|
||||
uni_vpermilps(input_perm, input, 0b10110001); // swap real with imag
|
||||
uni_vpxor(input_perm, input_perm, vmm_neg_mask); // negate imag part (or real part if is_inverse == true)
|
||||
load_and_broadcast_every_other_elem(cos, twiddles_ptr, tmp);
|
||||
load_and_broadcast_every_other_elem(sin, twiddles_ptr + vlen / 2, tmp);
|
||||
uni_vfmadd231ps(result, input, cos);
|
||||
uni_vfmadd231ps(result, input_perm, sin);
|
||||
|
||||
add(twiddles_ptr, vlen);
|
||||
}
|
||||
|
||||
add(input_ptr, input_type_size);
|
||||
|
||||
dec(input_size);
|
||||
cmp(input_size, 0);
|
||||
jne(loop, T_NEAR);
|
||||
}
|
||||
|
||||
if (is_inverse_) {
|
||||
Label loop_backwards;
|
||||
Label loop_backwards_exit;
|
||||
|
||||
mov(input_size, signal_size);
|
||||
sub(input_size, ptr[param1 + GET_OFF(input_size)]);
|
||||
|
||||
if (kernel_type_ == complex_to_complex) {
|
||||
mov(rdx, 1ULL << 31);
|
||||
vmovq(xmm_neg_mask, rdx);
|
||||
uni_vbroadcastsd(vmm_neg_mask, xmm_neg_mask);
|
||||
}
|
||||
|
||||
test(is_signal_size_even, 1);
|
||||
jz(loop_backwards);
|
||||
|
||||
sub(input_ptr, input_type_size);
|
||||
|
||||
L(loop_backwards);
|
||||
{
|
||||
cmp(input_size, 0);
|
||||
je(loop_backwards_exit, T_NEAR);
|
||||
|
||||
sub(input_ptr, input_type_size);
|
||||
if (kernel_type_ == complex_to_real) {
|
||||
uni_vbroadcastss(inp_real, ptr[input_ptr]);
|
||||
uni_vbroadcastss(inp_imag, ptr[input_ptr + type_size]);
|
||||
uni_vmovups(cos, ptr[twiddles_ptr]);
|
||||
uni_vmovups(sin, ptr[twiddles_ptr + vlen]);
|
||||
|
||||
uni_vfmadd231ps(result, inp_real, cos);
|
||||
uni_vfnmadd231ps(result, inp_imag, sin);
|
||||
add(twiddles_ptr, 2 * vlen);
|
||||
} else if (kernel_type_ == complex_to_complex) {
|
||||
// output_real += input_real * cos(..) - input_imag * sin(..)
|
||||
// output_imag += input_imag * cos(..) + input_real * sin(..)
|
||||
uni_vbroadcastsd(input, ptr[input_ptr]);
|
||||
uni_vpermilps(input_perm, input, 0b10110001); // swap real with imag
|
||||
uni_vpxor(input_perm, input_perm, vmm_neg_mask); // negate imag part
|
||||
load_and_broadcast_every_other_elem(cos, twiddles_ptr, tmp);
|
||||
load_and_broadcast_every_other_elem(sin, twiddles_ptr + vlen / 2, tmp);
|
||||
uni_vfmadd231ps(result, input, cos);
|
||||
uni_vfmadd231ps(result, input_perm, sin);
|
||||
add(twiddles_ptr, vlen);
|
||||
}
|
||||
|
||||
dec(input_size);
|
||||
jmp(loop_backwards, T_NEAR);
|
||||
}
|
||||
L(loop_backwards_exit);
|
||||
}
|
||||
|
||||
if (is_inverse_) {
|
||||
uni_vdivps(result, result, vmm_signal_size);
|
||||
}
|
||||
// store the results
|
||||
uni_vmovups(ptr[output_ptr], result);
|
||||
|
||||
add(output_ptr, vlen);
|
||||
sub(output_end, simd_size);
|
||||
};
|
||||
|
||||
auto nonsimd_loop = [this,
|
||||
input_type_size,
|
||||
output_type_size,
|
||||
&xmm_signal_size,
|
||||
reg_idx] {
|
||||
size_t idx = reg_idx;
|
||||
Xmm xmm_inp_real = Xbyak::Xmm(idx++);
|
||||
Xmm xmm_inp_imag = Xbyak::Xmm(idx++);
|
||||
Xmm xmm_real = Xbyak::Xmm(idx++);
|
||||
Xmm xmm_imag = Xbyak::Xmm(idx++);
|
||||
Xmm xmm_cos = Xbyak::Xmm(idx++);
|
||||
Xmm xmm_sin = Xbyak::Xmm(idx++);
|
||||
|
||||
if (kernel_type_ != complex_to_real) {
|
||||
xorps(xmm_real, xmm_real);
|
||||
xorps(xmm_imag, xmm_imag);
|
||||
} else {
|
||||
xorps(xmm_real, xmm_real);
|
||||
}
|
||||
|
||||
Label loop;
|
||||
L(loop);
|
||||
{
|
||||
movss(xmm_cos, ptr[twiddles_ptr]);
|
||||
movss(xmm_sin, ptr[twiddles_ptr + type_size]);
|
||||
if (kernel_type_ == real_to_complex) {
|
||||
movss(xmm_inp_real, ptr[input_ptr]);
|
||||
|
||||
// output_real += input_real * cos(..)
|
||||
mulss(xmm_cos, xmm_inp_real);
|
||||
addss(xmm_real, xmm_cos);
|
||||
|
||||
// output_imag += input_real * sin(..)
|
||||
mulss(xmm_sin, xmm_inp_real);
|
||||
addss(xmm_imag, xmm_sin);
|
||||
} else if (kernel_type_ == complex_to_real) {
|
||||
movss(xmm_inp_real, ptr[input_ptr]);
|
||||
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
|
||||
|
||||
// output += real * cos(..) + imag * sin(..)
|
||||
mulss(xmm_cos, xmm_inp_real);
|
||||
mulss(xmm_sin, xmm_inp_imag);
|
||||
addss(xmm_cos, xmm_sin);
|
||||
addss(xmm_real, xmm_cos);
|
||||
} else if (kernel_type_ == complex_to_complex) {
|
||||
// output_real += input_real * cos(..) - input_imag * sin(..)
|
||||
movss(xmm_inp_real, ptr[input_ptr]);
|
||||
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
|
||||
mulss(xmm_inp_real, xmm_cos);
|
||||
mulss(xmm_inp_imag, xmm_sin);
|
||||
if (!is_inverse_) {
|
||||
subss(xmm_inp_real, xmm_inp_imag);
|
||||
} else {
|
||||
addss(xmm_inp_real, xmm_inp_imag);
|
||||
}
|
||||
addss(xmm_real, xmm_inp_real);
|
||||
|
||||
// output_imag += input_imag * cos(..) + input_real * sin(..)
|
||||
movss(xmm_inp_real, ptr[input_ptr]);
|
||||
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
|
||||
mulss(xmm_inp_imag, xmm_cos);
|
||||
mulss(xmm_inp_real, xmm_sin);
|
||||
if (!is_inverse_) {
|
||||
addss(xmm_inp_imag, xmm_inp_real);
|
||||
} else {
|
||||
subss(xmm_inp_imag, xmm_inp_real);
|
||||
}
|
||||
addss(xmm_imag, xmm_inp_imag);
|
||||
}
|
||||
|
||||
// increment indexes for next iteration
|
||||
add(twiddles_ptr, complex_type_size<float>());
|
||||
add(input_ptr, input_type_size);
|
||||
dec(input_size);
|
||||
|
||||
// continue if input_size > 0
|
||||
cmp(input_size, 0);
|
||||
jg(loop, T_NEAR);
|
||||
}
|
||||
if (is_inverse_) {
|
||||
Label loop_backwards;
|
||||
Label loop_backwards_exit;
|
||||
|
||||
mov(input_size, signal_size);
|
||||
sub(input_size, ptr[param1 + GET_OFF(input_size)]);
|
||||
|
||||
test(is_signal_size_even, 1);
|
||||
jz(loop_backwards);
|
||||
|
||||
sub(input_ptr, input_type_size);
|
||||
|
||||
L(loop_backwards);
|
||||
{
|
||||
cmp(input_size, 0);
|
||||
je(loop_backwards_exit);
|
||||
|
||||
sub(input_ptr, input_type_size);
|
||||
|
||||
movss(xmm_cos, ptr[twiddles_ptr]);
|
||||
movss(xmm_sin, ptr[twiddles_ptr + type_size]);
|
||||
movss(xmm_inp_real, ptr[input_ptr]);
|
||||
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
|
||||
|
||||
if (kernel_type_ == complex_to_real) {
|
||||
// output += real * cos(..) - imag * sin(..)
|
||||
mulss(xmm_cos, xmm_inp_real);
|
||||
mulss(xmm_sin, xmm_inp_imag);
|
||||
subss(xmm_cos, xmm_sin);
|
||||
addss(xmm_real, xmm_cos);
|
||||
} else if (kernel_type_ == complex_to_complex) {
|
||||
// output_real += input_real * cos(..) - input_imag * sin(..)
|
||||
movss(xmm_inp_real, ptr[input_ptr]);
|
||||
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
|
||||
mulss(xmm_inp_real, xmm_cos);
|
||||
mulss(xmm_inp_imag, xmm_sin);
|
||||
subss(xmm_inp_real, xmm_inp_imag);
|
||||
addss(xmm_real, xmm_inp_real);
|
||||
|
||||
// output_imag += input_imag * cos(..) + input_real * sin(..)
|
||||
movss(xmm_inp_real, ptr[input_ptr]);
|
||||
movss(xmm_inp_imag, ptr[input_ptr + type_size]);
|
||||
mulss(xmm_inp_imag, xmm_cos);
|
||||
mulss(xmm_inp_real, xmm_sin);
|
||||
addss(xmm_inp_imag, xmm_inp_real);
|
||||
addss(xmm_imag, xmm_inp_imag);
|
||||
}
|
||||
|
||||
add(twiddles_ptr, complex_type_size<float>());
|
||||
dec(input_size);
|
||||
jmp(loop_backwards);
|
||||
}
|
||||
L(loop_backwards_exit);
|
||||
}
|
||||
|
||||
if (kernel_type_ == complex_to_real) {
|
||||
if (is_inverse_) {
|
||||
divss(xmm_real, xmm_signal_size);
|
||||
}
|
||||
// store the result
|
||||
movss(ptr[output_ptr], xmm_real);
|
||||
} else {
|
||||
if (is_inverse_) {
|
||||
divss(xmm_real, xmm_signal_size);
|
||||
divss(xmm_imag, xmm_signal_size);
|
||||
}
|
||||
// store the results
|
||||
movss(ptr[output_ptr], xmm_real);
|
||||
movss(ptr[output_ptr + type_size], xmm_imag);
|
||||
}
|
||||
|
||||
add(output_ptr, output_type_size);
|
||||
dec(output_end);
|
||||
};
|
||||
|
||||
L(loop_over_output);
|
||||
{
|
||||
mov(input_ptr, ptr[param1 + GET_OFF(input)]);
|
||||
mov(input_size, ptr[param1 + GET_OFF(input_size)]);
|
||||
|
||||
cmp(output_end, simd_size);
|
||||
jae(loop_simd, T_NEAR);
|
||||
|
||||
jmp(loop_nonsimd, T_NEAR);
|
||||
|
||||
L(loop_simd);
|
||||
simd_loop();
|
||||
jmp(loop_over_output_continue, T_NEAR);
|
||||
|
||||
L(loop_nonsimd);
|
||||
nonsimd_loop();
|
||||
|
||||
L(loop_over_output_continue);
|
||||
cmp(output_end, 0);
|
||||
ja(loop_over_output, T_NEAR);
|
||||
}
|
||||
|
||||
this->postamble();
|
||||
}
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
void jit_dft_kernel_f32<isa>::uni_vbroadcastsd(const Xbyak::Xmm& x, const Xbyak::Operand& op) {
|
||||
movsd(x, op);
|
||||
shufpd(x, x, 0x0);
|
||||
}
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
void jit_dft_kernel_f32<isa>::uni_vbroadcastsd(const Xbyak::Ymm& x, const Xbyak::Operand& op) {
|
||||
vbroadcastsd(x, op);
|
||||
}
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
void jit_dft_kernel_f32<isa>::uni_vpermilps(const Xbyak::Xmm& x, const Xbyak::Operand& op, int8_t control) {
|
||||
movups(x, op);
|
||||
shufps(x, x, control);
|
||||
}
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
void jit_dft_kernel_f32<isa>::uni_vpermilps(const Xbyak::Ymm& x, const Xbyak::Operand& op, int8_t control) {
|
||||
vpermilps(x, op, control);
|
||||
}
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
void jit_dft_kernel_f32<isa>::load_and_broadcast_every_other_elem(const Xbyak::Zmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
movq(tmp, ptr[reg_exp + type_size * i * 2]);
|
||||
shufps(tmp, tmp, 0b01010000);
|
||||
vinsertf32x4(x, x, tmp, i);
|
||||
}
|
||||
}
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
void jit_dft_kernel_f32<isa>::load_and_broadcast_every_other_elem(const Xbyak::Ymm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
movq(tmp, ptr[reg_exp + type_size * i * 2]);
|
||||
shufps(tmp, tmp, 0b01010000);
|
||||
vinsertf128(x, x, tmp, i);
|
||||
}
|
||||
}
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
void jit_dft_kernel_f32<isa>::load_and_broadcast_every_other_elem(const Xbyak::Xmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp) {
|
||||
movq(x, ptr[reg_exp]);
|
||||
shufps(x, x, 0b01010000);
|
||||
}
|
||||
|
||||
template struct jit_dft_kernel_f32<cpu::x64::sse41>;
|
||||
template struct jit_dft_kernel_f32<cpu::x64::avx2>;
|
||||
template struct jit_dft_kernel_f32<cpu::x64::avx512_core>;
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
96
src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.hpp
Normal file
96
src/plugins/intel_cpu/src/nodes/kernels/rdft_kernel.hpp
Normal file
@ -0,0 +1,96 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cpu/x64/jit_generator.hpp"
|
||||
#include <dnnl_types.h>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
using namespace dnnl::impl;
|
||||
using namespace dnnl::impl::cpu::x64;
|
||||
using namespace dnnl::impl::utils;
|
||||
|
||||
enum dft_type {
|
||||
real_to_complex,
|
||||
complex_to_complex,
|
||||
complex_to_real,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
size_t complex_type_size() {
|
||||
return sizeof(T) * 2;
|
||||
}
|
||||
|
||||
struct jit_dft_args {
|
||||
const void* input;
|
||||
const void* twiddles;
|
||||
void* output;
|
||||
size_t input_size;
|
||||
size_t signal_size;
|
||||
size_t output_start;
|
||||
size_t output_end;
|
||||
};
|
||||
|
||||
|
||||
struct jit_dft_kernel {
|
||||
jit_dft_kernel(bool is_inverse, enum dft_type type) : is_inverse_(is_inverse), kernel_type_(type) {}
|
||||
|
||||
void (*ker_)(const jit_dft_args*);
|
||||
|
||||
void operator()(const jit_dft_args* args) {
|
||||
assert(ker_);
|
||||
ker_(args);
|
||||
}
|
||||
|
||||
jit_dft_kernel() : ker_(nullptr) {}
|
||||
virtual ~jit_dft_kernel() {}
|
||||
|
||||
virtual void create_ker() = 0;
|
||||
|
||||
bool is_inverse_;
|
||||
enum dft_type kernel_type_;
|
||||
};
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
struct jit_dft_kernel_f32 : public jit_dft_kernel, public jit_generator {
|
||||
public:
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_dft_kernel_f32)
|
||||
|
||||
jit_dft_kernel_f32(bool is_inverse, enum dft_type type) : jit_dft_kernel(is_inverse, type), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
ker_ = (decltype(ker_))jit_ker();
|
||||
}
|
||||
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
void uni_vbroadcastsd(const Xbyak::Xmm& x, const Xbyak::Operand& op);
|
||||
void uni_vbroadcastsd(const Xbyak::Ymm& x, const Xbyak::Operand& op);
|
||||
|
||||
void uni_vpermilps(const Xbyak::Xmm& x, const Xbyak::Operand& op, int8_t control);
|
||||
void uni_vpermilps(const Xbyak::Ymm& x, const Xbyak::Operand& op, int8_t control);
|
||||
|
||||
void load_and_broadcast_every_other_elem(const Xbyak::Zmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp);
|
||||
void load_and_broadcast_every_other_elem(const Xbyak::Ymm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp);
|
||||
void load_and_broadcast_every_other_elem(const Xbyak::Xmm& x, const Xbyak::RegExp& reg_exp, const Xbyak::Xmm& tmp);
|
||||
|
||||
int type_size = sizeof(float);
|
||||
|
||||
Xbyak::Reg8 is_signal_size_even = al;
|
||||
Xbyak::Reg64 input_ptr = rbx;
|
||||
Xbyak::Reg64 input_size = r8;
|
||||
Xbyak::Reg64 output_ptr = r9;
|
||||
Xbyak::Reg64 twiddles_ptr = r10;
|
||||
Xbyak::Reg64 signal_size = r11;
|
||||
Xbyak::Reg64 output_start = r12;
|
||||
Xbyak::Reg64 output_end = r13;
|
||||
};
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
927
src/plugins/intel_cpu/src/nodes/rdft.cpp
Normal file
927
src/plugins/intel_cpu/src/nodes/rdft.cpp
Normal file
@ -0,0 +1,927 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <dnnl_extension_utils.h>
|
||||
#include <onednn/dnnl.h>
|
||||
#include <cpu/x64/cpu_isa_traits.hpp>
|
||||
#include <cpu/x64/jit_generator.hpp>
|
||||
#include <common/primitive_hashing_utils.hpp>
|
||||
|
||||
#include "rdft.h"
|
||||
#include "ie_parallel.hpp"
|
||||
#include "ie_precision.hpp"
|
||||
|
||||
#include "utils/general_utils.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include <openvino/op/rdft.hpp>
|
||||
#include <openvino/op/irdft.hpp>
|
||||
#include <openvino/op/constant.hpp>
|
||||
|
||||
using namespace dnnl;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
namespace node {
|
||||
|
||||
|
||||
static constexpr size_t DATA_INDEX = 0;
|
||||
static constexpr size_t AXES_INDEX = 1;
|
||||
static constexpr size_t SIGNAL_SIZE_INDEX = 2;
|
||||
static constexpr double PI = 3.14159265358979323846;
|
||||
|
||||
|
||||
bool RDFT::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
const bool isRDFT = is_type<const ov::op::v9::RDFT>(op);
|
||||
const bool isIRDFT = is_type<const ov::op::v9::IRDFT>(op);
|
||||
|
||||
if (!isRDFT && !isIRDFT) {
|
||||
errorMessage = "Only opset9 RDFT/IRDFT operation is supported";
|
||||
return false;
|
||||
}
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void normalizeAxes(std::vector<int>& axes, size_t rank) {
|
||||
for (auto& axis : axes) {
|
||||
if (axis < 0) {
|
||||
axis += rank;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<int> getDefaultSignalSizes(const VectorDims& inputShape, const std::vector<int>& axes, bool inverse) {
|
||||
std::vector<int> signalSizes;
|
||||
signalSizes.reserve(axes.size());
|
||||
|
||||
for (auto axis : axes) {
|
||||
signalSizes.push_back(inputShape[axis]);
|
||||
}
|
||||
if (inverse) {
|
||||
signalSizes[signalSizes.size() - 1] = 2 * (inputShape[axes.back()] - 1);
|
||||
}
|
||||
|
||||
return signalSizes;
|
||||
}
|
||||
|
||||
RDFT::RDFT(const std::shared_ptr<ngraph::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) :
|
||||
Node(op, eng, cache) {
|
||||
std::string errorMessage;
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
|
||||
std::string errorMsgPrefix = "RDFT layer with name '" + op->get_name() + "'";
|
||||
const size_t numInputs = getOriginalInputsNumber();
|
||||
if (numInputs != 2 && numInputs != 3) {
|
||||
IE_THROW() << errorMsgPrefix << " has invalid number of input/output edges: " << numInputs;
|
||||
}
|
||||
|
||||
const auto axesRank = inputShapes[AXES_INDEX].getRank();
|
||||
if (axesRank != 1) {
|
||||
IE_THROW() << errorMsgPrefix << " has invalid 'axes' input tensor with rank: " << axesRank;
|
||||
}
|
||||
|
||||
inverse = ov::is_type<ov::op::v9::IRDFT>(op);
|
||||
|
||||
std::shared_ptr<ov::op::v0::Constant> signalSizesNode;
|
||||
if (numInputs > 2) {
|
||||
const auto signalSizeRank = inputShapes[SIGNAL_SIZE_INDEX].getRank();
|
||||
if (signalSizeRank != 1) {
|
||||
IE_THROW() << errorMsgPrefix << " has invalid 'signalSize' input tensor with rank: " << signalSizeRank;
|
||||
}
|
||||
auto signalSizesNode = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(2));
|
||||
if (!signalSizesNode)
|
||||
return;
|
||||
signalSizes = signalSizesNode->cast_vector<int>();
|
||||
}
|
||||
|
||||
auto axesNode = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(1));
|
||||
if (!axesNode)
|
||||
return;
|
||||
|
||||
axes = axesNode->cast_vector<int>();
|
||||
auto rank = inputShapes[DATA_INDEX].getRank() - inverse;
|
||||
normalizeAxes(axes, rank);
|
||||
|
||||
if (numInputs < 3) {
|
||||
const auto& inputShape = inputShapes[DATA_INDEX].getStaticDims();
|
||||
signalSizes = getDefaultSignalSizes(inputShape, axes, inverse);
|
||||
}
|
||||
}
|
||||
|
||||
void RDFT::getSupportedDescriptors() {}
|
||||
|
||||
void RDFT::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
const auto& dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX);
|
||||
if (!dataPrecision.is_float()) {
|
||||
IE_THROW() << errorMsgPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();
|
||||
}
|
||||
|
||||
const auto& axesPrecision = getOriginalInputPrecisionAtPort(AXES_INDEX);
|
||||
if (axesPrecision != Precision::I32 && axesPrecision != Precision::I64) {
|
||||
IE_THROW() << errorMsgPrefix << " has unsupported 'axes' input precision: " << axesPrecision.name();
|
||||
}
|
||||
|
||||
if (inputShapes.size() > SIGNAL_SIZE_INDEX) {
|
||||
const auto& signalSizePrecision = getOriginalInputPrecisionAtPort(SIGNAL_SIZE_INDEX);
|
||||
if (signalSizePrecision != Precision::I32 && signalSizePrecision != Precision::I64) {
|
||||
IE_THROW() << errorMsgPrefix << " has unsupported 'signalSize' input precision: " << signalSizePrecision.name();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<PortConfigurator> configurators({{LayoutType::ncsp, Precision::FP32},
|
||||
{LayoutType::ncsp, Precision::I32}});
|
||||
if (inputShapes.size() > SIGNAL_SIZE_INDEX)
|
||||
configurators.push_back({LayoutType::ncsp, Precision::I32});
|
||||
|
||||
addSupportedPrimDesc(configurators, {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any);
|
||||
}
|
||||
|
||||
void RDFT::execute(dnnl::stream strm) {
|
||||
const auto& inputMem = getParentEdgeAt(DATA_INDEX)->getMemory();
|
||||
const auto& outputMem = getChildEdgeAt(0)->getMemory();
|
||||
const auto& inputShape = inputMem.getStaticDims();
|
||||
const auto& outputShape = outputMem.getStaticDims();
|
||||
|
||||
auto inputPtr = reinterpret_cast<float*>(inputMem.GetPtr());
|
||||
auto outputPtr = reinterpret_cast<float*>(outputMem.GetPtr());
|
||||
|
||||
auto rank = inputShape.size() - inverse;
|
||||
|
||||
if (axes.size() == 0) {
|
||||
const auto& axesMem = getParentEdgeAt(AXES_INDEX)->getMemoryPtr();
|
||||
auto axesPtr = reinterpret_cast<const int32_t*>(axesMem->GetPtr());
|
||||
axes = std::vector<int>(axesPtr, axesPtr + axesMem->getStaticDims()[0]);
|
||||
normalizeAxes(axes, rank);
|
||||
}
|
||||
|
||||
if (signalSizes.size() == 0) {
|
||||
if (SIGNAL_SIZE_INDEX < getOriginalInputsNumber()) {
|
||||
const auto& signalSizeMem = getParentEdgeAt(SIGNAL_SIZE_INDEX)->getMemoryPtr();
|
||||
auto signalPtr = reinterpret_cast<const int32_t*>(signalSizeMem->GetPtr());
|
||||
signalSizes = std::vector<int>(signalPtr, signalPtr + signalSizeMem->getStaticDims()[0]);
|
||||
} else {
|
||||
signalSizes = getDefaultSignalSizes(inputShape, axes, inverse);
|
||||
}
|
||||
}
|
||||
|
||||
const auto& inputStrides = inputMem.GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
const auto& outputStrides = outputMem.GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
|
||||
if (twiddles.size() == 0) {
|
||||
twiddles = executor->generateTwiddles(signalSizes, outputShape, axes);
|
||||
}
|
||||
|
||||
executor->execute(inputPtr, outputPtr,
|
||||
twiddles, rank,
|
||||
axes, signalSizes,
|
||||
inputShape, outputShape,
|
||||
inputStrides, outputStrides);
|
||||
}
|
||||
|
||||
bool RDFT::created() const {
|
||||
return getType() == Type::RDFT;
|
||||
}
|
||||
|
||||
static void adjustInputSize(VectorDims& inputShape,
|
||||
std::vector<int>& signalSizes,
|
||||
const VectorDims& outputShape,
|
||||
const std::vector<int>& axes,
|
||||
bool isInverse) {
|
||||
for (size_t i = 0; i < axes.size(); i++) {
|
||||
auto axis = axes[i];
|
||||
size_t inputSize = inputShape[axis];
|
||||
size_t signalSize = signalSizes[i];
|
||||
if (signalSize <= inputSize) {
|
||||
inputShape[axis] = signalSize;
|
||||
} else if (!isInverse) {
|
||||
IE_THROW() << "Signal size greater than input size is not supported yet";
|
||||
}
|
||||
}
|
||||
if (isInverse) {
|
||||
inputShape[axes.back()] = signalSizes.back() / 2 + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void RDFTExecutor::execute(float* inputPtr, float* outputPtr,
|
||||
const std::vector<std::vector<float>>& twiddles,
|
||||
size_t rank, const std::vector<int>& axes,
|
||||
std::vector<int> signalSizes,
|
||||
VectorDims inputShape, const VectorDims& outputShape,
|
||||
const VectorDims& inputStrides, const VectorDims& outputStrides) {
|
||||
adjustInputSize(inputShape, signalSizes, outputShape, axes, isInverse);
|
||||
|
||||
if (rank == 1) {
|
||||
auto twiddlesPtr = twiddles[0].data();
|
||||
dftCommon(inputPtr, twiddlesPtr, outputPtr,
|
||||
inputShape[0], signalSizes[0], outputShape[0],
|
||||
isInverse ? complex_to_real : real_to_complex,
|
||||
canUseFFT(signalSizes[0]), false);
|
||||
} else {
|
||||
if (!isInverse)
|
||||
rdftNd(inputPtr, outputPtr, twiddles, axes, signalSizes, inputShape, inputStrides, outputShape, outputStrides);
|
||||
else
|
||||
irdftNd(inputPtr, outputPtr, twiddles, axes, signalSizes, inputShape, inputStrides, outputShape, outputStrides);
|
||||
}
|
||||
}
|
||||
|
||||
static void coordsFromIndex(size_t index, std::vector<size_t>& coords, const std::vector<size_t>& shape, int excludeAxis) {
|
||||
for (size_t i = coords.size(); i > 0; i--) {
|
||||
if (excludeAxis == i - 1) {
|
||||
coords[i - 1] = 0;
|
||||
continue;
|
||||
}
|
||||
coords[i - 1] = index % shape[i - 1];
|
||||
index /= shape[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
static size_t getOffset(const std::vector<size_t>& coords, const std::vector<size_t>& strides) {
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < coords.size(); ++i) {
|
||||
offset += coords[i] * strides[i];
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
static void gatherReal(float* output, const float* input, size_t axis,
|
||||
const std::vector<size_t>& coords,
|
||||
size_t size, const std::vector<size_t>& strides) {
|
||||
size_t inputOffset = getOffset(coords, strides);
|
||||
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
output[i] = input[inputOffset];
|
||||
inputOffset += strides[axis];
|
||||
}
|
||||
}
|
||||
|
||||
static void gatherComplex(float* output, const float* input, size_t axis,
|
||||
const std::vector<size_t>& coords,
|
||||
size_t size, const std::vector<size_t>& strides) {
|
||||
size_t inputOffset = getOffset(coords, strides);
|
||||
|
||||
for (size_t i = 0; i < 2 * size; i += 2) {
|
||||
output[i] = input[inputOffset];
|
||||
output[i + 1] = input[inputOffset + 1];
|
||||
inputOffset += strides[axis];
|
||||
}
|
||||
}
|
||||
|
||||
static void scatterReal(float* output, const float* input, size_t axis,
|
||||
const std::vector<size_t>& coords,
|
||||
size_t size, const std::vector<size_t>& strides) {
|
||||
size_t offset = getOffset(coords, strides);
|
||||
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
output[offset] = input[i];
|
||||
offset += strides[axis];
|
||||
}
|
||||
}
|
||||
|
||||
static void scatterComplex(float* output, const float* input, size_t axis,
|
||||
const std::vector<size_t>& coords,
|
||||
size_t size, const std::vector<size_t>& strides) {
|
||||
size_t offset = getOffset(coords, strides);
|
||||
|
||||
for (size_t i = 0; i < 2 * size; i += 2) {
|
||||
output[offset] = input[i];
|
||||
output[offset + 1] = input[i + 1];
|
||||
offset += strides[axis];
|
||||
}
|
||||
}
|
||||
|
||||
static bool isPowerOfTwo(size_t n) {
|
||||
return (n != 0) && (n & (n - 1)) == 0;
|
||||
}
|
||||
|
||||
static size_t dftSimdSize(int vlen) {
|
||||
return vlen / (2 * sizeof(float));
|
||||
}
|
||||
|
||||
bool RDFTExecutor::canUseFFT(size_t dim) {
|
||||
return isPowerOfTwo(dim) && dim > 1;
|
||||
}
|
||||
|
||||
static void fftCopyInverseInputData(float* dst, float* src, size_t inputSize, size_t signalSize, bool parallelize) {
|
||||
if (!parallelize) {
|
||||
cpu_memcpy(dst, src, inputSize * complex_type_size<float>());
|
||||
src = src + 2 * inputSize - 4;
|
||||
for (size_t i = inputSize; i < signalSize; i++, src -= 2) {
|
||||
dst[2 * i] = src[0];
|
||||
dst[2 * i + 1] = -src[1];
|
||||
}
|
||||
} else {
|
||||
parallel_for(signalSize, [&] (size_t i) {
|
||||
if (i < inputSize) {
|
||||
dst[2 * i] = src[2 * i];
|
||||
dst[2 * i + 1] = src[2 * i + 1];
|
||||
} else {
|
||||
size_t src_idx = 2 * inputSize - 2 - i;
|
||||
dst[2 * i] = src[2 * src_idx];
|
||||
dst[2 * i + 1] = -src[2 * src_idx + 1];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void fftCopyRealInputData(float* dst, float* src, size_t inputSize, bool parallelize) {
|
||||
if (!parallelize) {
|
||||
for (size_t i = 0; i < inputSize; i++) {
|
||||
dst[2 * i] = src[i];
|
||||
dst[2 * i + 1] = 0;
|
||||
}
|
||||
} else {
|
||||
parallel_for(inputSize, [&] (size_t i) {
|
||||
dst[2 * i] = src[i];
|
||||
dst[2 * i + 1] = 0;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void fftCopyInverseRealOutput(float* dst, float* src, size_t signalSize, bool parallelize) {
|
||||
if (!parallelize) {
|
||||
for (size_t i = 0; i < signalSize; i++) {
|
||||
dst[i] = src[2 * i];
|
||||
}
|
||||
} else {
|
||||
parallel_for(signalSize, [&] (size_t i) {
|
||||
dst[i] = src[2 * i];
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void RDFTExecutor::fft(float* input, const float* twiddlesPtr, float* output,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize,
|
||||
enum dft_type type, bool parallelize) {
|
||||
std::vector<float> scratchSpace(4 * signalSize, 0);
|
||||
|
||||
float* inputPtr = input;
|
||||
float* outputPtr = &scratchSpace[2 * signalSize];
|
||||
|
||||
if (inputSize < signalSize || type == real_to_complex) {
|
||||
if (isInverse)
|
||||
fftCopyInverseInputData(&scratchSpace[0], input, inputSize, signalSize, parallelize);
|
||||
else if (type == real_to_complex)
|
||||
fftCopyRealInputData(&scratchSpace[0], input, inputSize, parallelize);
|
||||
inputPtr = &scratchSpace[0];
|
||||
}
|
||||
|
||||
size_t numBlocks = 0;
|
||||
size_t blockSize = 0;
|
||||
|
||||
auto blockIteration = [&] (size_t block) {
|
||||
size_t inputOffset = block * blockSize;
|
||||
size_t outputOffset = block * blockSize / 2;
|
||||
float cos = twiddlesPtr[2 * block];
|
||||
float sin = twiddlesPtr[2 * block + 1];
|
||||
if (isInverse)
|
||||
sin = -sin;
|
||||
for (size_t pair = 0; pair < blockSize / 2; pair++) {
|
||||
float evenReal = inputPtr[2 * (inputOffset + pair)];
|
||||
float evenImag = inputPtr[2 * (inputOffset + pair) + 1];
|
||||
float oddReal = inputPtr[2 * (inputOffset + blockSize / 2 + pair)];
|
||||
float oddImag = inputPtr[2 * (inputOffset + blockSize / 2 + pair) + 1];
|
||||
outputPtr[2 * (outputOffset + pair)] = evenReal + cos * oddReal - sin * oddImag;
|
||||
outputPtr[2 * (outputOffset + pair) + 1] = evenImag + cos * oddImag + sin * oddReal;
|
||||
outputPtr[2 * (outputOffset + signalSize / 2 + pair)] = evenReal - cos * oddReal + sin * oddImag;
|
||||
outputPtr[2 * (outputOffset + signalSize / 2 + pair) + 1] = evenImag - cos * oddImag - sin * oddReal;
|
||||
if (isInverse && numBlocks == signalSize / 2) {
|
||||
outputPtr[2 * (outputOffset + pair)] /= signalSize;
|
||||
outputPtr[2 * (outputOffset + pair) + 1] /= signalSize;
|
||||
outputPtr[2 * (outputOffset + signalSize / 2 + pair)] /= signalSize;
|
||||
outputPtr[2 * (outputOffset + signalSize / 2 + pair) + 1] /= signalSize;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (numBlocks = 1; numBlocks < signalSize; numBlocks *= 2) {
|
||||
blockSize = signalSize / numBlocks;
|
||||
if (numBlocks == signalSize / 2 && outputSize == signalSize && type != complex_to_real) {
|
||||
outputPtr = output;
|
||||
}
|
||||
if (parallelize) {
|
||||
parallel_for(numBlocks, blockIteration);
|
||||
} else {
|
||||
for (size_t block = 0; block < numBlocks; block++) {
|
||||
blockIteration(block);
|
||||
}
|
||||
}
|
||||
twiddlesPtr += numBlocks * 2;
|
||||
if (numBlocks == 1 && inputPtr == input)
|
||||
inputPtr = &scratchSpace[0];
|
||||
std::swap(inputPtr, outputPtr);
|
||||
}
|
||||
|
||||
if (type == complex_to_real) {
|
||||
fftCopyInverseRealOutput(output, inputPtr, signalSize, parallelize);
|
||||
} else if (outputSize != signalSize) {
|
||||
cpu_memcpy(output, inputPtr, outputSize * complex_type_size<float>());
|
||||
}
|
||||
}
|
||||
|
||||
void RDFTExecutor::dftCommon(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize,
|
||||
enum dft_type type, bool useFFT, bool parallelize) {
|
||||
if (useFFT) {
|
||||
fft(inputPtr, twiddlesPtr, outputPtr,
|
||||
inputSize, signalSize, outputSize,
|
||||
type, parallelize);
|
||||
} else {
|
||||
dft(inputPtr, twiddlesPtr, outputPtr,
|
||||
inputSize, signalSize, outputSize,
|
||||
type, parallelize);
|
||||
}
|
||||
}
|
||||
|
||||
void RDFTExecutor::dftOnAxis(enum dft_type type,
|
||||
float* inputPtr, float* outputPtr,
|
||||
const float* twiddlesPtr, int axis,
|
||||
size_t signalSize,
|
||||
const VectorDims& inputShape,
|
||||
const VectorDims& inputStrides,
|
||||
const VectorDims& outputShape,
|
||||
const VectorDims& outputStrides,
|
||||
const std::vector<size_t>& iterationRange) {
|
||||
size_t inputSize = inputShape[axis];
|
||||
size_t outputSize = outputShape[axis];
|
||||
|
||||
void (*gather)(float* output, const float* input,
|
||||
size_t axis, const std::vector<size_t>& coords,
|
||||
size_t size, const std::vector<size_t>& strides) = nullptr;
|
||||
void (*scatter)(float* output, const float* input,
|
||||
size_t axis, const std::vector<size_t>& coords,
|
||||
size_t size, const std::vector<size_t>& strides) = nullptr;
|
||||
|
||||
size_t gatherSize = 0;
|
||||
size_t scatterSize = 0;
|
||||
|
||||
switch (type) {
|
||||
case real_to_complex:
|
||||
scatter = scatterComplex;
|
||||
gather = gatherReal;
|
||||
gatherSize = inputSize;
|
||||
scatterSize = outputSize * 2;
|
||||
break;
|
||||
case complex_to_complex:
|
||||
gather = gatherComplex;
|
||||
scatter = scatterComplex;
|
||||
gatherSize = inputSize * 2;
|
||||
scatterSize = outputSize * 2;
|
||||
break;
|
||||
case complex_to_real:
|
||||
gather = gatherComplex;
|
||||
scatter = scatterReal;
|
||||
gatherSize = inputSize * 2;
|
||||
scatterSize = outputSize;
|
||||
break;
|
||||
}
|
||||
|
||||
bool useFFT = canUseFFT(signalSize);
|
||||
|
||||
size_t totalWorkSize = std::accumulate(iterationRange.begin(),
|
||||
iterationRange.end(),
|
||||
1, std::multiplies<size_t>()) / iterationRange[axis];
|
||||
bool parallelizeOuterAxes = totalWorkSize > signalSize;
|
||||
|
||||
if (parallelizeOuterAxes) {
|
||||
parallel_for(totalWorkSize, [&] (size_t i) {
|
||||
std::vector<size_t> coords(iterationRange.size(), 0);
|
||||
std::vector<float> gatherScatterBuffer(gatherSize + scatterSize);
|
||||
float* gatherBuffer = &gatherScatterBuffer[0];
|
||||
float* scatterBuffer = &gatherScatterBuffer[gatherSize];
|
||||
coordsFromIndex(i, coords, iterationRange, axis);
|
||||
gather(gatherBuffer, inputPtr,
|
||||
axis, coords,
|
||||
inputSize, inputStrides);
|
||||
dftCommon(gatherBuffer, twiddlesPtr, scatterBuffer,
|
||||
inputSize, signalSize, outputSize,
|
||||
type, useFFT, !parallelizeOuterAxes);
|
||||
scatter(outputPtr, scatterBuffer, axis, coords, outputSize, outputStrides);
|
||||
});
|
||||
} else {
|
||||
std::vector<size_t> coords(iterationRange.size(), 0);
|
||||
std::vector<float> gatherScatterBuffer(gatherSize + scatterSize);
|
||||
float* gatherBuffer = &gatherScatterBuffer[0];
|
||||
float* scatterBuffer = &gatherScatterBuffer[gatherSize];
|
||||
for (size_t i = 0; i < totalWorkSize; i++) {
|
||||
coordsFromIndex(i, coords, iterationRange, axis);
|
||||
gather(gatherBuffer, inputPtr,
|
||||
axis, coords,
|
||||
inputSize, inputStrides);
|
||||
dftCommon(gatherBuffer, twiddlesPtr, scatterBuffer,
|
||||
inputSize, signalSize, outputSize,
|
||||
type, useFFT, !parallelizeOuterAxes);
|
||||
scatter(outputPtr, scatterBuffer, axis, coords, outputSize, outputStrides);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// N-dimensional real DFT
|
||||
void RDFTExecutor::rdftNd(float* inputPtr, float* outputPtr,
|
||||
const std::vector<std::vector<float>>& twiddles,
|
||||
const std::vector<int>& axes,
|
||||
const std::vector<int>& signalSizes,
|
||||
const VectorDims& inputShape,
|
||||
const VectorDims& inputStrides,
|
||||
const VectorDims& outputShape,
|
||||
const VectorDims& outputStrides) {
|
||||
const std::vector<size_t> iterationRange(outputShape.begin(), outputShape.end() - 1);
|
||||
|
||||
dftOnAxis(real_to_complex, inputPtr, outputPtr,
|
||||
twiddles.back().data(), axes.back(),
|
||||
signalSizes.back(),
|
||||
inputShape, inputStrides,
|
||||
outputShape, outputStrides,
|
||||
iterationRange);
|
||||
inputPtr = outputPtr;
|
||||
|
||||
for (size_t i = 0; i < axes.size() - 1; i++) {
|
||||
auto axis = axes[i];
|
||||
dftOnAxis(complex_to_complex, inputPtr, outputPtr,
|
||||
twiddles[i].data(), axis,
|
||||
signalSizes[i],
|
||||
outputShape, outputStrides,
|
||||
outputShape, outputStrides,
|
||||
iterationRange);
|
||||
}
|
||||
}
|
||||
|
||||
// N-dimensional real inverse DFT
|
||||
void RDFTExecutor::irdftNd(float* inputPtr, float* outputPtr,
|
||||
const std::vector<std::vector<float>>& twiddles,
|
||||
const std::vector<int>& axes,
|
||||
const std::vector<int>& signalSizes,
|
||||
const VectorDims& inputShape,
|
||||
const VectorDims& originalInputStrides,
|
||||
const VectorDims& outputShape,
|
||||
const VectorDims& outputStrides) {
|
||||
const std::vector<size_t> iterationRange(inputShape.begin(), inputShape.end() - 1);
|
||||
|
||||
if (axes.size() == 1) {
|
||||
dftOnAxis(complex_to_real, inputPtr, outputPtr,
|
||||
twiddles[0].data(), axes[0],
|
||||
signalSizes[0],
|
||||
inputShape, originalInputStrides,
|
||||
outputShape, outputStrides,
|
||||
iterationRange);
|
||||
return;
|
||||
}
|
||||
|
||||
float* output = outputPtr;
|
||||
std::vector<float> tmp;
|
||||
size_t inputShapeSize = std::accumulate(inputShape.begin(), inputShape.end(), 1, std::multiplies<size_t>());
|
||||
size_t outputShapeSize = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<size_t>());
|
||||
if (inputShapeSize > outputShapeSize) {
|
||||
tmp.resize(inputShapeSize);
|
||||
output = &tmp[0];
|
||||
}
|
||||
|
||||
std::vector<size_t> inputStrides(originalInputStrides.size());
|
||||
inputStrides[originalInputStrides.size() - 1] = 1;
|
||||
for (size_t i = inputStrides.size() - 1; i > 0; i--) {
|
||||
inputStrides[i - 1] = inputStrides[i] * inputShape[i];
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < axes.size() - 1; i++) {
|
||||
auto axis = axes[i];
|
||||
dftOnAxis(complex_to_complex, inputPtr, output,
|
||||
twiddles[i].data(), axis,
|
||||
signalSizes[i],
|
||||
inputShape, originalInputStrides,
|
||||
inputShape, inputStrides,
|
||||
iterationRange);
|
||||
inputPtr = output;
|
||||
}
|
||||
dftOnAxis(complex_to_real, inputPtr, outputPtr,
|
||||
twiddles.back().data(), axes.back(),
|
||||
signalSizes.back(),
|
||||
inputShape, inputStrides,
|
||||
outputShape, outputStrides,
|
||||
iterationRange);
|
||||
}
|
||||
|
||||
std::vector<float> RDFTExecutor::generateTwiddlesFFT(size_t N) {
|
||||
std::vector<float> twiddles;
|
||||
for (size_t numBlocks = 1; numBlocks < N; numBlocks *= 2) {
|
||||
for (size_t block = 0; block < numBlocks; block++) {
|
||||
double angle = 2 * PI * block / (numBlocks * 2);
|
||||
twiddles.push_back(std::cos(angle));
|
||||
twiddles.push_back(-std::sin(angle));
|
||||
}
|
||||
}
|
||||
return twiddles;
|
||||
}
|
||||
|
||||
std::vector<float> RDFTExecutor::generateTwiddlesCommon(size_t signalSize, size_t outputSize,
|
||||
enum dft_type type, bool useFFT) {
|
||||
if (useFFT) {
|
||||
return generateTwiddlesFFT(signalSize);
|
||||
}
|
||||
return generateTwiddlesDFT(signalSize, outputSize, type);
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> RDFTExecutor::generateTwiddles(const std::vector<int>& signalSizes,
|
||||
const std::vector<size_t>& outputShape,
|
||||
const std::vector<int>& axes) {
|
||||
std::vector<std::vector<float>> twiddles;
|
||||
twiddles.reserve(axes.size());
|
||||
for (size_t i = 0; i < axes.size(); i++) {
|
||||
auto axis = axes[i];
|
||||
size_t N = signalSizes[i];
|
||||
size_t K = outputShape[axis];
|
||||
auto type = complex_to_complex;
|
||||
if (i == axes.size() - 1)
|
||||
type = isInverse ? complex_to_real : real_to_complex;
|
||||
twiddles.push_back(generateTwiddlesCommon(N, K, type, canUseFFT(N)));
|
||||
}
|
||||
return twiddles;
|
||||
}
|
||||
|
||||
struct RDFTJitExecutor : public RDFTExecutor {
|
||||
RDFTJitExecutor(bool inverse, NodeDesc* primDesc) : RDFTExecutor(inverse) {
|
||||
enum dft_type rdftType = isInverse ? complex_to_real : real_to_complex;
|
||||
if (mayiuse(cpu::x64::avx512_core)) {
|
||||
rdftKernel.reset(new jit_dft_kernel_f32<cpu::x64::avx512_core>(isInverse, rdftType));
|
||||
dftKernel.reset(new jit_dft_kernel_f32<cpu::x64::avx512_core>(isInverse, complex_to_complex));
|
||||
vlen = cpu_isa_traits<cpu::x64::avx512_core>::vlen;
|
||||
primDesc->setImplementationType(jit_avx512);
|
||||
} else if (mayiuse(cpu::x64::avx2)) {
|
||||
rdftKernel.reset(new jit_dft_kernel_f32<cpu::x64::avx2>(isInverse, rdftType));
|
||||
dftKernel.reset(new jit_dft_kernel_f32<cpu::x64::avx2>(isInverse, complex_to_complex));
|
||||
vlen = cpu_isa_traits<cpu::x64::avx2>::vlen;
|
||||
primDesc->setImplementationType(jit_avx2);
|
||||
} else if (mayiuse(cpu::x64::sse41)) {
|
||||
rdftKernel.reset(new jit_dft_kernel_f32<cpu::x64::sse41>(isInverse, rdftType));
|
||||
dftKernel.reset(new jit_dft_kernel_f32<cpu::x64::sse41>(isInverse, complex_to_complex));
|
||||
vlen = cpu_isa_traits<cpu::x64::sse41>::vlen;
|
||||
primDesc->setImplementationType(jit_sse42);
|
||||
} else {
|
||||
IE_THROW() << "Can't create RDFT kernel";
|
||||
}
|
||||
|
||||
if (rdftKernel)
|
||||
rdftKernel->create_ker();
|
||||
if (dftKernel)
|
||||
dftKernel->create_ker();
|
||||
}
|
||||
|
||||
std::vector<float> generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) override {
|
||||
std::vector<float> twiddles(inputSize * outputSize * 2);
|
||||
int simdSize = vlen / sizeof(float);
|
||||
if (type == real_to_complex || type == complex_to_complex) {
|
||||
simdSize /= 2; // there are two floats per one complex element in the output
|
||||
}
|
||||
|
||||
parallel_for2d(outputSize / simdSize, inputSize, [&] (size_t K, size_t n) {
|
||||
if (type == real_to_complex) {
|
||||
for (size_t k = 0; k < simdSize; k++) {
|
||||
double angle = 2 * PI * (K * simdSize + k) * n / inputSize;
|
||||
twiddles[((K * inputSize + n) * simdSize + k) * 2] = std::cos(angle);
|
||||
twiddles[((K * inputSize + n) * simdSize + k) * 2 + 1] = -std::sin(angle);
|
||||
}
|
||||
} else if (type == complex_to_real || type == complex_to_complex) {
|
||||
for (size_t k = 0; k < simdSize; k++) {
|
||||
double angle = 2 * PI * (K * simdSize + k) * n / inputSize;
|
||||
twiddles[(K * inputSize + n) * 2 * simdSize + k] = std::cos(angle);
|
||||
}
|
||||
for (size_t k = 0; k < simdSize; k++) {
|
||||
double angle = 2 * PI * (K * simdSize + k) * n / inputSize;
|
||||
twiddles[((K * inputSize + n) * 2 + 1) * simdSize + k] = -std::sin(angle);
|
||||
}
|
||||
}
|
||||
});
|
||||
if ((outputSize % simdSize) != 0) {
|
||||
size_t start = (outputSize / simdSize) * simdSize;
|
||||
parallel_for2d(outputSize - start, inputSize, [&] (size_t k, size_t n) {
|
||||
k += start;
|
||||
double angle = 2 * PI * k * n / inputSize;
|
||||
twiddles[2 * (k * inputSize + n)] = std::cos(angle);
|
||||
twiddles[2 * (k * inputSize + n) + 1] = -std::sin(angle);
|
||||
});
|
||||
}
|
||||
return twiddles;
|
||||
}
|
||||
|
||||
void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize,
|
||||
enum dft_type type, bool parallelize) override {
|
||||
jit_dft_kernel* kernel = type == complex_to_complex ? dftKernel.get() : rdftKernel.get();
|
||||
if (parallelize) {
|
||||
const int cachelineSize = 64;
|
||||
size_t blockSize = 4 * cachelineSize / sizeof(float);
|
||||
size_t numBlocks = (outputSize + blockSize - 1) / blockSize;
|
||||
parallel_nt(numBlocks, [&] (size_t i, size_t nthr) {
|
||||
if (numBlocks > nthr) {
|
||||
auto newBlockSize = (((outputSize / nthr) + blockSize - 1) / blockSize) * blockSize;
|
||||
blockSize = newBlockSize;
|
||||
numBlocks = nthr;
|
||||
}
|
||||
jit_dft_args args{};
|
||||
args.input = inputPtr,
|
||||
args.twiddles = twiddlesPtr,
|
||||
args.output = outputPtr,
|
||||
args.input_size = inputSize,
|
||||
args.signal_size = signalSize,
|
||||
args.output_start = i * blockSize,
|
||||
args.output_end = std::min(outputSize - i * blockSize, blockSize),
|
||||
(*kernel)(&args);
|
||||
});
|
||||
} else {
|
||||
jit_dft_args args{};
|
||||
args.input = inputPtr,
|
||||
args.twiddles = twiddlesPtr,
|
||||
args.output = outputPtr,
|
||||
args.input_size = inputSize,
|
||||
args.signal_size = signalSize,
|
||||
args.output_start = 0,
|
||||
args.output_end = outputSize,
|
||||
(*kernel)(&args);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<jit_dft_kernel> rdftKernel = nullptr;
|
||||
std::unique_ptr<jit_dft_kernel> dftKernel = nullptr;
|
||||
|
||||
int vlen;
|
||||
};
|
||||
|
||||
|
||||
struct RDFTRefExecutor : public RDFTExecutor {
|
||||
RDFTRefExecutor(bool inverse) : RDFTExecutor(inverse) {}
|
||||
|
||||
private:
|
||||
std::vector<float> generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) override {
|
||||
std::vector<float> twiddles(inputSize * outputSize * 2);
|
||||
parallel_for2d(outputSize, inputSize, [&] (size_t k, size_t n) {
|
||||
double angle = 2 * PI * k * n / inputSize;
|
||||
if (!isInverse)
|
||||
angle = -angle;
|
||||
twiddles[(k * inputSize + n) * 2] = std::cos(angle);
|
||||
twiddles[(k * inputSize + n) * 2 + 1] = std::sin(angle);
|
||||
});
|
||||
return twiddles;
|
||||
}
|
||||
|
||||
void dftRealToComplex(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
|
||||
size_t inputSize, size_t outputSize, bool parallelize) {
|
||||
auto dftIteration = [&] (size_t k) {
|
||||
float real = 0, imag = 0;
|
||||
for (size_t n = 0; n < inputSize; n++) {
|
||||
float cos = twiddlesPtr[2 * (k * inputSize + n)];
|
||||
float sin = twiddlesPtr[2 * (k * inputSize + n) + 1];
|
||||
real += inputPtr[n] * cos;
|
||||
imag += inputPtr[n] * sin;
|
||||
}
|
||||
outputPtr[2 * k] = real;
|
||||
outputPtr[2 * k + 1] = imag;
|
||||
};
|
||||
if (parallelize) {
|
||||
parallel_for(outputSize, dftIteration);
|
||||
} else {
|
||||
for (size_t k = 0; k < outputSize; k++) {
|
||||
dftIteration(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dftComplexToComplex(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize, bool parallelize) {
|
||||
auto dftIteration = [&] (size_t k) {
|
||||
float real = 0, imag = 0;
|
||||
for (size_t n = 0; n < inputSize; n++) {
|
||||
float cos = twiddlesPtr[2 * (k * outputSize + n)];
|
||||
float sin = twiddlesPtr[2 * (k * outputSize + n) + 1];
|
||||
float inputReal = inputPtr[2 * n];
|
||||
float inputImag = inputPtr[2 * n + 1];
|
||||
real += inputReal * cos - inputImag * sin;
|
||||
imag += inputImag * cos + inputReal * sin;
|
||||
}
|
||||
if (isInverse) {
|
||||
float* inp = inputPtr + 2 * (inputSize - 2 + outputSize % 2);
|
||||
for (int n = inputSize; n < signalSize; n++, inp -= 2) {
|
||||
float cos = twiddlesPtr[2 * (k * outputSize + n)];
|
||||
float sin = twiddlesPtr[2 * (k * outputSize + n) + 1];
|
||||
float inputReal = inp[0];
|
||||
float inputImag = -inp[1];
|
||||
real += inputReal * cos - inputImag * sin;
|
||||
imag += inputImag * cos + inputReal * sin;
|
||||
}
|
||||
real /= outputSize;
|
||||
imag /= outputSize;
|
||||
}
|
||||
outputPtr[2 * k] = real;
|
||||
outputPtr[2 * k + 1] = imag;
|
||||
};
|
||||
if (parallelize) {
|
||||
parallel_for(outputSize, dftIteration);
|
||||
} else {
|
||||
for (size_t k = 0; k < outputSize; k++) {
|
||||
dftIteration(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dftComplexToReal(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize, bool parallelize) {
|
||||
auto dftIteration = [&] (size_t k) {
|
||||
float real = 0;
|
||||
for (size_t n = 0; n < inputSize; n++) {
|
||||
float cos = twiddlesPtr[2 * (k * outputSize + n)];
|
||||
float sin = twiddlesPtr[2 * (k * outputSize + n) + 1];
|
||||
float inputReal = inputPtr[2 * n];
|
||||
float inputImag = inputPtr[2 * n + 1];
|
||||
real += inputReal * cos - inputImag * sin;
|
||||
}
|
||||
if (isInverse) {
|
||||
float* inp = inputPtr + 2 * (inputSize - 2 + outputSize % 2);
|
||||
for (size_t n = inputSize; n < signalSize; n++, inp -= 2) {
|
||||
float cos = twiddlesPtr[2 * (k * outputSize + n)];
|
||||
float sin = twiddlesPtr[2 * (k * outputSize + n) + 1];
|
||||
float inputReal = inp[0];
|
||||
float inputImag = inp[1];
|
||||
real += inputReal * cos + inputImag * sin;
|
||||
}
|
||||
real /= outputSize;
|
||||
}
|
||||
outputPtr[k] = real;
|
||||
};
|
||||
if (parallelize) {
|
||||
parallel_for(outputSize, dftIteration);
|
||||
} else {
|
||||
for (int k = 0; k < outputSize; k++) {
|
||||
dftIteration(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize,
|
||||
enum dft_type type, bool parallelize) override {
|
||||
if (type == real_to_complex) {
|
||||
dftRealToComplex(inputPtr, twiddlesPtr, outputPtr, inputSize, outputSize, parallelize);
|
||||
} else if (type == complex_to_complex) {
|
||||
dftComplexToComplex(inputPtr, twiddlesPtr, outputPtr, inputSize, signalSize, outputSize, parallelize);
|
||||
} else if (type == complex_to_real) {
|
||||
dftComplexToReal(inputPtr, twiddlesPtr, outputPtr, inputSize, signalSize, outputSize, parallelize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct RDFTKey {
|
||||
bool isInverse;
|
||||
|
||||
size_t hash() const {
|
||||
using namespace dnnl::impl::primitive_hashing;
|
||||
|
||||
size_t seed = 0;
|
||||
seed = hash_combine(seed, isInverse);
|
||||
return seed;
|
||||
}
|
||||
|
||||
bool operator==(const RDFTKey& rhs) const {
|
||||
return isInverse == rhs.isInverse;
|
||||
}
|
||||
};
|
||||
|
||||
void RDFT::prepareParams() {
|
||||
RDFTKey key{};
|
||||
key.isInverse = inverse;
|
||||
|
||||
auto buildExecutor = [&] (const RDFTKey& key) -> std::shared_ptr<RDFTExecutor> {
|
||||
std::shared_ptr<RDFTExecutor> executor;
|
||||
NodeDesc* primDesc = getSelectedPrimitiveDescriptor();
|
||||
if (mayiuse(cpu::x64::sse41)) {
|
||||
executor = std::make_shared<RDFTJitExecutor>(key.isInverse, primDesc);
|
||||
} else {
|
||||
executor = std::make_shared<RDFTRefExecutor>(key.isInverse);
|
||||
primDesc->setImplementationType(ref_any);
|
||||
}
|
||||
return executor;
|
||||
};
|
||||
|
||||
auto cache = getRuntimeCache();
|
||||
auto result = cache->getOrCreate(key, buildExecutor);
|
||||
executor = result.first;
|
||||
if (axes.size() > 0 && signalSizes.size() > 0 && outputShapes[0].isStatic()) {
|
||||
twiddles = executor->generateTwiddles(signalSizes, outputShapes[0].getStaticDims(), axes);
|
||||
}
|
||||
}
|
||||
} // namespace node
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
99
src/plugins/intel_cpu/src/nodes/rdft.h
Normal file
99
src/plugins/intel_cpu/src/nodes/rdft.h
Normal file
@ -0,0 +1,99 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <node.h>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "kernels/rdft_kernel.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
namespace node {
|
||||
|
||||
struct RDFTExecutor {
|
||||
public:
|
||||
RDFTExecutor(bool inverse) : isInverse(inverse) {}
|
||||
void execute(float* inputPtr, float* outputPtr,
|
||||
const std::vector<std::vector<float>>& twiddles,
|
||||
size_t rank, const std::vector<int>& axes,
|
||||
std::vector<int> signalSizes,
|
||||
VectorDims inputShape, const VectorDims& outputShape,
|
||||
const VectorDims& inputStrides, const VectorDims& outputStrides);
|
||||
|
||||
std::vector<std::vector<float>> generateTwiddles(const std::vector<int>& signalSizes,
|
||||
const std::vector<size_t>& outputShape,
|
||||
const std::vector<int>& axes);
|
||||
|
||||
protected:
|
||||
bool isInverse;
|
||||
|
||||
private:
|
||||
virtual bool canUseFFT(size_t dim);
|
||||
virtual void dft(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize,
|
||||
enum dft_type type, bool parallelize) = 0;
|
||||
virtual void fft(float* input, const float* twiddlesPtr, float* output,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize,
|
||||
enum dft_type type, bool parallelize);
|
||||
void dftCommon(float* inputPtr, const float* twiddlesPtr, float* outputPtr,
|
||||
size_t inputSize, size_t signalSize, size_t outputSize,
|
||||
enum dft_type type, bool useFFT, bool parallelize);
|
||||
void dftOnAxis(enum dft_type type,
|
||||
float* inputPtr, float* outputPtr,
|
||||
const float* twiddlesPtr, int axis,
|
||||
size_t signalSize,
|
||||
const VectorDims& inputShape,
|
||||
const VectorDims& inputStrides,
|
||||
const VectorDims& outputShape,
|
||||
const VectorDims& outputStrides,
|
||||
const std::vector<size_t>& iteration_range);
|
||||
void rdftNd(float* inputPtr, float* outputPtr,
|
||||
const std::vector<std::vector<float>>& twiddles,
|
||||
const std::vector<int>& axes,
|
||||
const std::vector<int>& signalSizes,
|
||||
const VectorDims& inputShape,
|
||||
const VectorDims& inputStrides,
|
||||
const VectorDims& outputShape,
|
||||
const VectorDims& outputStrides);
|
||||
void irdftNd(float* inputPtr, float* outputPtr,
|
||||
const std::vector<std::vector<float>>& twiddles,
|
||||
const std::vector<int>& axes,
|
||||
const std::vector<int>& signalSizes,
|
||||
const VectorDims& inputShape,
|
||||
const VectorDims& inputStrides,
|
||||
const VectorDims& outputShape,
|
||||
const VectorDims& outputStrides);
|
||||
virtual std::vector<float> generateTwiddlesDFT(size_t inputSize, size_t outputSize, enum dft_type type) = 0;
|
||||
std::vector<float> generateTwiddlesFFT(size_t N);
|
||||
std::vector<float> generateTwiddlesCommon(size_t inputSize, size_t outputSize,
|
||||
enum dft_type type, bool useFFT);
|
||||
};
|
||||
|
||||
class RDFT : public Node {
|
||||
public:
|
||||
RDFT(const std::shared_ptr<ngraph::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache);
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void prepareParams() override;
|
||||
void execute(dnnl::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
private:
|
||||
std::string errorMsgPrefix;
|
||||
bool inverse;
|
||||
std::vector<int> axes;
|
||||
std::vector<int> signalSizes;
|
||||
std::vector<std::vector<float>> twiddles;
|
||||
std::shared_ptr<RDFTExecutor> executor;
|
||||
};
|
||||
|
||||
} // namespace node
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
@ -68,6 +68,7 @@
|
||||
#include "nodes/log_softmax.h"
|
||||
#include "nodes/strided_slice.h"
|
||||
#include "nodes/dft.h"
|
||||
#include "nodes/rdft.h"
|
||||
#include "nodes/non_max_suppression.h"
|
||||
#include "nodes/convert.h"
|
||||
#include "nodes/rnn.h"
|
||||
@ -123,6 +124,7 @@ Node::NodesFactory::NodesFactory()
|
||||
INTEL_CPU_NODE(MemoryOutput, Type::MemoryOutput);
|
||||
INTEL_CPU_NODE(Tile, Type::Tile);
|
||||
INTEL_CPU_NODE(DFT, Type::DFT);
|
||||
INTEL_CPU_NODE(RDFT, Type::RDFT);
|
||||
INTEL_CPU_NODE(GatherTree, Type::GatherTree);
|
||||
INTEL_CPU_NODE(SpaceToDepth, Type::SpaceToDepth);
|
||||
INTEL_CPU_NODE(FullyConnected, Type::FullyConnected);
|
||||
|
@ -0,0 +1,157 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/rdft.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
const std::vector<ngraph::helpers::DFTOpType> opTypes = {
|
||||
ngraph::helpers::DFTOpType::FORWARD,
|
||||
ngraph::helpers::DFTOpType::INVERSE
|
||||
};
|
||||
|
||||
static const std::vector<InferenceEngine::Precision> inputPrecision = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesForward1d = {
|
||||
{10},
|
||||
{64},
|
||||
{100},
|
||||
};
|
||||
|
||||
|
||||
const std::vector<std::vector<int64_t>> signalSizes1d = {
|
||||
{}, {10},
|
||||
};
|
||||
|
||||
//1D case doesn't work yet on reference implementation
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_RDFT_1d, RDFTLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapesForward1d),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::Values(std::vector<int64_t>{0}),
|
||||
::testing::ValuesIn(signalSizes1d),
|
||||
::testing::Values(ngraph::helpers::DFTOpType::FORWARD),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesInverse1d = {
|
||||
{10, 2},
|
||||
{64, 2},
|
||||
{100, 2},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_IRDFT_1d, RDFTLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapesInverse1d),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::Values(std::vector<int64_t>{0}),
|
||||
::testing::ValuesIn(signalSizes1d),
|
||||
::testing::Values(ngraph::helpers::DFTOpType::INVERSE),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesForward2d = {
|
||||
{10, 15},
|
||||
{64, 32},
|
||||
{100, 16},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<int64_t>> axes2d = {
|
||||
{0, 1}, {1, 0}, {-2, -1},
|
||||
};
|
||||
|
||||
|
||||
const std::vector<std::vector<int64_t>> signalSizes2d = {
|
||||
{}, {10, 10},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_2d, RDFTLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapesForward2d),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(axes2d),
|
||||
::testing::ValuesIn(signalSizes2d),
|
||||
::testing::Values(ngraph::helpers::DFTOpType::FORWARD),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesInverse2d = {
|
||||
{10, 15, 2},
|
||||
{64, 32, 2},
|
||||
{100, 32, 2},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_2d, RDFTLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapesInverse2d),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(axes2d),
|
||||
::testing::ValuesIn(signalSizes2d),
|
||||
::testing::Values(ngraph::helpers::DFTOpType::INVERSE),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesForward4d = {
|
||||
{1, 3, 10, 15},
|
||||
{1, 4, 64, 32},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<int64_t>> axes4d = {
|
||||
{0, 1, 2, 3}, {1, 0, -2, -1}
|
||||
};
|
||||
|
||||
|
||||
const std::vector<std::vector<int64_t>> signalSizes4d = {
|
||||
{},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d, RDFTLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapesForward4d),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(axes4d),
|
||||
::testing::ValuesIn(signalSizes4d),
|
||||
::testing::Values(ngraph::helpers::DFTOpType::FORWARD),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
|
||||
|
||||
const std::vector<std::vector<int64_t>> axes4d_2d = {
|
||||
{2, 3}, {1, -1}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d_axes_2d, RDFTLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapesForward4d),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(axes4d_2d),
|
||||
::testing::Values(std::vector<int64_t>{}),
|
||||
::testing::Values(ngraph::helpers::DFTOpType::FORWARD),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
|
||||
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesInverse4d = {
|
||||
{1, 3, 10, 15, 2},
|
||||
{1, 4, 64, 32, 2},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_4d, RDFTLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapesInverse4d),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(axes4d),
|
||||
::testing::ValuesIn(signalSizes4d),
|
||||
::testing::Values(ngraph::helpers::DFTOpType::INVERSE),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_4d_axes_2d, RDFTLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(shapesInverse4d),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(axes4d_2d),
|
||||
::testing::Values(std::vector<int64_t>{}),
|
||||
::testing::Values(ngraph::helpers::DFTOpType::INVERSE),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName);
|
||||
|
||||
|
||||
|
456
src/tests/functional/plugin/cpu/single_layer_tests/rdft.cpp
Normal file
456
src/tests/functional/plugin/cpu/single_layer_tests/rdft.cpp
Normal file
@ -0,0 +1,456 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include <common_test_utils/ov_tensor_utils.hpp>
|
||||
#include <openvino/opsets/opset9.hpp>
|
||||
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ov::test;
|
||||
using namespace ov;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
using RDFTTestCPUParams = std::tuple<
|
||||
Shape,
|
||||
std::vector<int64_t>, // axes
|
||||
std::vector<int64_t>, // signal sizes
|
||||
bool, // inverse
|
||||
CPUSpecificParams>;
|
||||
|
||||
class RDFTTestCPU : public testing::WithParamInterface<RDFTTestCPUParams>,
|
||||
virtual public test::SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<RDFTTestCPUParams> obj) {
|
||||
Shape shape;
|
||||
std::vector<int64_t> axes;
|
||||
std::vector<int64_t> signalSizes;
|
||||
bool inverse;
|
||||
CPUSpecificParams cpuParams;
|
||||
|
||||
std::tie(shape, axes, signalSizes, inverse, cpuParams) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "shape=" << shape
|
||||
<< "_axes=" << CommonTestUtils::vec2str(axes)
|
||||
<< "_signalSizes=" << CommonTestUtils::vec2str(signalSizes)
|
||||
<< "_isInverse=" << inverse
|
||||
<< CPUTestsBase::getTestCaseName(cpuParams);
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
Shape shape;
|
||||
std::vector<int64_t> axes;
|
||||
std::vector<int64_t> signalSizes;
|
||||
element::Type_t precision = element::f32;
|
||||
bool inverse;
|
||||
CPUSpecificParams cpuParams;
|
||||
|
||||
std::tie(shape, axes, signalSizes, inverse, cpuParams) = GetParam();
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
selectedType = makeSelectedTypeStr(selectedType, precision);
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
targetStaticShapes.push_back(std::vector<Shape>{shape});
|
||||
|
||||
auto param = std::make_shared<opset9::Parameter>(precision, shape);
|
||||
auto axesNode = opset9::Constant::create(element::i64, Shape{axes.size()}, axes);
|
||||
std::shared_ptr<Node> rdft;
|
||||
if (signalSizes.size() > 0) {
|
||||
auto signalSizesNode = opset9::Constant::create(element::i64, Shape{signalSizes.size()}, signalSizes);
|
||||
if (inverse) {
|
||||
rdft = std::make_shared<opset9::IRDFT>(param, axesNode, signalSizesNode);
|
||||
} else {
|
||||
rdft = std::make_shared<opset9::RDFT>(param, axesNode, signalSizesNode);
|
||||
}
|
||||
} else {
|
||||
if (inverse) {
|
||||
rdft = std::make_shared<opset9::IRDFT>(param, axesNode);
|
||||
} else {
|
||||
rdft = std::make_shared<opset9::RDFT>(param, axesNode);
|
||||
}
|
||||
}
|
||||
function = std::make_shared<Model>(rdft, ParameterVector{param});
|
||||
}
|
||||
|
||||
void generate_inputs(const std::vector<Shape>& targetInputStaticShapes) override {
|
||||
const auto& funcInputs = function->inputs();
|
||||
inputs.clear();
|
||||
|
||||
for (int i = 0; i < funcInputs.size(); ++i) {
|
||||
const auto& funcInput = funcInputs[i];
|
||||
runtime::Tensor tensor = test::utils::create_and_fill_tensor_normal_distribution(funcInput.get_element_type(), targetInputStaticShapes[0], 0, 1, 0);
|
||||
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(RDFTTestCPU, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
CheckPluginRelatedResults(compiledModel, "RDFT");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
CPUSpecificParams getCPUSpecificParams() {
|
||||
if (InferenceEngine::with_cpu_x86_avx512_core()) {
|
||||
return CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"};
|
||||
} else if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
return CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"};
|
||||
} else if (InferenceEngine::with_cpu_x86_sse42()) {
|
||||
return CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"};
|
||||
} else {
|
||||
return CPUSpecificParams{{}, {}, {"ref"}, "ref"};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
auto cpuParams = getCPUSpecificParams();
|
||||
|
||||
std::vector<RDFTTestCPUParams> getParams1D() {
|
||||
if (InferenceEngine::with_cpu_x86_avx512_core()) {
|
||||
return {
|
||||
{{14}, {0}, {}, false, cpuParams},
|
||||
{{13}, {0}, {}, false, cpuParams},
|
||||
{{15}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{30}, {0}, {}, false, cpuParams},
|
||||
{{29}, {0}, {}, false, cpuParams},
|
||||
{{31}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{46}, {0}, {}, false, cpuParams},
|
||||
{{45}, {0}, {}, false, cpuParams},
|
||||
{{47}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{126}, {0}, {}, false, cpuParams},
|
||||
{{510}, {0}, {}, false, cpuParams},
|
||||
{{1022}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{9, 2}, {0}, {}, true, cpuParams},
|
||||
{{8, 2}, {0}, {}, true, cpuParams},
|
||||
{{10, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{17, 2}, {0}, {}, true, cpuParams},
|
||||
{{16, 2}, {0}, {}, true, cpuParams},
|
||||
{{18, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{25, 2}, {0}, {}, true, cpuParams},
|
||||
{{24, 2}, {0}, {}, true, cpuParams},
|
||||
{{26, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{129, 2}, {0}, {}, true, cpuParams},
|
||||
{{513, 2}, {0}, {}, true, cpuParams},
|
||||
{{1025, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{25, 2}, {0}, {32}, true, cpuParams},
|
||||
{{24, 2}, {0}, {16}, true, cpuParams},
|
||||
};
|
||||
} else if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
return {
|
||||
{{6}, {0}, {}, false, cpuParams},
|
||||
{{5}, {0}, {}, false, cpuParams},
|
||||
{{7}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{38}, {0}, {}, false, cpuParams},
|
||||
{{37}, {0}, {}, false, cpuParams},
|
||||
{{39}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{106}, {0}, {}, false, cpuParams},
|
||||
{{246}, {0}, {}, false, cpuParams},
|
||||
{{245}, {0}, {118}, false, cpuParams},
|
||||
|
||||
{{126}, {0}, {}, false, cpuParams},
|
||||
{{510}, {0}, {}, false, cpuParams},
|
||||
{{1022}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{5, 2}, {0}, {}, true, cpuParams},
|
||||
{{4, 2}, {0}, {}, true, cpuParams},
|
||||
{{6, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{9, 2}, {0}, {}, true, cpuParams},
|
||||
{{8, 2}, {0}, {}, true, cpuParams},
|
||||
{{10, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{17, 2}, {0}, {}, true, cpuParams},
|
||||
{{33, 2}, {0}, {}, true, cpuParams},
|
||||
{{129, 2}, {0}, {}, true, cpuParams},
|
||||
{{257, 2}, {0}, {}, true, cpuParams},
|
||||
{{513, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{129, 2}, {0}, {126}, true, cpuParams},
|
||||
{{257, 2}, {0}, {254}, true, cpuParams},
|
||||
{{513, 2}, {0}, {510}, true, cpuParams},
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
{{1}, {0}, {}, false, cpuParams},
|
||||
{{2}, {0}, {}, false, cpuParams},
|
||||
{{12}, {0}, {}, false, cpuParams},
|
||||
{{14}, {0}, {}, false, cpuParams},
|
||||
{{30}, {0}, {}, false, cpuParams},
|
||||
{{62}, {0}, {}, false, cpuParams},
|
||||
{{126}, {0}, {}, false, cpuParams},
|
||||
{{250}, {0}, {}, false, cpuParams},
|
||||
{{254}, {0}, {}, false, cpuParams},
|
||||
{{62}, {0}, {61}, false, cpuParams},
|
||||
{{126}, {0}, {40}, false, cpuParams},
|
||||
{{250}, {0}, {200}, false, cpuParams},
|
||||
{{254}, {0}, {10}, false, cpuParams},
|
||||
|
||||
{{2, 2}, {0}, {}, true, cpuParams},
|
||||
{{9, 2}, {0}, {}, true, cpuParams},
|
||||
{{10, 2}, {0}, {}, true, cpuParams},
|
||||
{{17, 2}, {0}, {}, true, cpuParams},
|
||||
{{33, 2}, {0}, {}, true, cpuParams},
|
||||
{{65, 2}, {0}, {}, true, cpuParams},
|
||||
{{129, 2}, {0}, {}, true, cpuParams},
|
||||
{{257, 2}, {0}, {}, true, cpuParams},
|
||||
{{33, 2}, {0}, {50}, true, cpuParams},
|
||||
{{65, 2}, {0}, {20}, true, cpuParams},
|
||||
{{129, 2}, {0}, {200}, true, cpuParams},
|
||||
{{257, 2}, {0}, {100}, true, cpuParams},
|
||||
};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_1D, RDFTTestCPU, ::testing::ValuesIn(getParams1D()), RDFTTestCPU::getTestCaseName);
|
||||
|
||||
std::vector<RDFTTestCPUParams> getParams2D() {
|
||||
if (InferenceEngine::with_cpu_x86_avx512_core()) {
|
||||
return {
|
||||
{{46, 10}, {0}, {}, false, cpuParams},
|
||||
{{45, 10}, {0}, {}, false, cpuParams},
|
||||
{{47, 10}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{20, 126}, {1}, {}, false, cpuParams},
|
||||
{{20, 510}, {1}, {}, false, cpuParams},
|
||||
{{20, 1022}, {1}, {}, false, cpuParams},
|
||||
|
||||
{{48, 46}, {0, 1}, {}, false, cpuParams},
|
||||
{{32, 45}, {0, 1}, {}, false, cpuParams},
|
||||
{{64, 47}, {0, 1}, {}, false, cpuParams},
|
||||
|
||||
{{72, 126}, {0, 1}, {}, false, cpuParams},
|
||||
{{32, 510}, {0, 1}, {}, false, cpuParams},
|
||||
{{16, 1022}, {0, 1}, {}, false, cpuParams},
|
||||
|
||||
{{9, 10, 2}, {0}, {}, true, cpuParams},
|
||||
{{8, 10, 2}, {0}, {}, true, cpuParams},
|
||||
{{10, 20, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{10, 9, 2}, {1}, {}, true, cpuParams},
|
||||
{{10, 8, 2}, {1}, {}, true, cpuParams},
|
||||
{{20, 10, 2}, {1}, {}, true, cpuParams},
|
||||
|
||||
{{129, 16, 2}, {0}, {}, true, cpuParams},
|
||||
{{513, 32, 2}, {0}, {}, true, cpuParams},
|
||||
{{1025, 72, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{16, 129, 2}, {1}, {}, true, cpuParams},
|
||||
{{32, 513, 2}, {1}, {}, true, cpuParams},
|
||||
{{72, 1025, 2}, {1}, {}, true, cpuParams},
|
||||
|
||||
{{16, 129, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{32, 513, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{72, 1025, 2}, {0, 1}, {}, true, cpuParams},
|
||||
|
||||
{{16, 129, 2}, {0, 1}, {16, 200}, true, cpuParams},
|
||||
{{32, 513, 2}, {0, 1}, {32, 600}, true, cpuParams},
|
||||
{{72, 1025, 2}, {0, 1}, {72, 100}, true, cpuParams},
|
||||
};
|
||||
} else if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
return {
|
||||
{{38, 16}, {0}, {}, false, cpuParams},
|
||||
{{37, 8}, {0}, {}, false, cpuParams},
|
||||
{{39, 24}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{16, 38}, {1}, {}, false, cpuParams},
|
||||
{{8, 37}, {1}, {}, false, cpuParams},
|
||||
{{24, 39}, {1}, {}, false, cpuParams},
|
||||
|
||||
{{16, 38}, {0, 1}, {}, false, cpuParams},
|
||||
{{8, 37}, {0, 1}, {}, false, cpuParams},
|
||||
{{24, 39}, {0, 1}, {}, false, cpuParams},
|
||||
|
||||
{{126, 32}, {0}, {}, false, cpuParams},
|
||||
{{510, 64}, {0}, {}, false, cpuParams},
|
||||
{{1022, 64}, {0}, {}, false, cpuParams},
|
||||
|
||||
{{126, 32}, {0, 1}, {}, false, cpuParams},
|
||||
{{510, 64}, {0, 1}, {}, false, cpuParams},
|
||||
{{1022, 64}, {0, 1}, {}, false, cpuParams},
|
||||
|
||||
{{38, 16, 2}, {0}, {}, true, cpuParams},
|
||||
{{37, 8, 2}, {0}, {}, true, cpuParams},
|
||||
{{39, 24, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{16, 38, 2}, {1}, {}, true, cpuParams},
|
||||
{{8, 37, 2}, {1}, {}, true, cpuParams},
|
||||
{{24, 39, 2}, {1}, {}, true, cpuParams},
|
||||
|
||||
{{16, 38, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{8, 37, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{24, 39, 2}, {0, 1}, {}, true, cpuParams},
|
||||
|
||||
{{126, 32, 2}, {0}, {}, true, cpuParams},
|
||||
{{510, 64, 2}, {0}, {}, true, cpuParams},
|
||||
{{1022, 64, 2}, {0}, {}, true, cpuParams},
|
||||
|
||||
{{126, 32, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{510, 64, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{1022, 64, 2}, {0, 1}, {}, true, cpuParams},
|
||||
|
||||
{{129, 32, 2}, {0}, {126}, true, cpuParams},
|
||||
{{257, 16, 2}, {0}, {254}, true, cpuParams},
|
||||
{{513, 64, 2}, {0}, {510}, true, cpuParams},
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
{{1, 1}, {0}, {}, false, cpuParams},
|
||||
{{1, 1}, {1}, {}, false, cpuParams},
|
||||
{{1, 1}, {0, 1}, {}, false, cpuParams},
|
||||
{{2, 2}, {0}, {}, false, cpuParams},
|
||||
{{2, 2}, {1}, {}, false, cpuParams},
|
||||
{{2, 2}, {0, 1}, {}, false, cpuParams},
|
||||
{{13, 13}, {0}, {}, false, cpuParams},
|
||||
{{13, 13}, {1}, {}, false, cpuParams},
|
||||
{{13, 13}, {0, 1}, {}, false, cpuParams},
|
||||
{{29, 29}, {0}, {}, false, cpuParams},
|
||||
{{29, 29}, {1}, {}, false, cpuParams},
|
||||
{{29, 29}, {0, 1}, {}, false, cpuParams},
|
||||
{{30, 32}, {0}, {}, false, cpuParams},
|
||||
{{32, 30}, {1}, {}, false, cpuParams},
|
||||
{{32, 30}, {0, 1}, {}, false, cpuParams},
|
||||
{{62, 64}, {0}, {}, false, cpuParams},
|
||||
{{64, 62}, {1}, {}, false, cpuParams},
|
||||
{{64, 62}, {0, 1}, {}, false, cpuParams},
|
||||
{{254, 128}, {0}, {}, false, cpuParams},
|
||||
{{128, 254}, {1}, {}, false, cpuParams},
|
||||
{{128, 254}, {0, 1}, {}, false, cpuParams},
|
||||
{{128, 254}, {1}, {10}, false, cpuParams},
|
||||
{{128, 254}, {0, 1}, {128, 100}, false, cpuParams},
|
||||
|
||||
{{1, 1, 2}, {0}, {1}, true, cpuParams},
|
||||
{{1, 1, 2}, {1}, {1}, true, cpuParams},
|
||||
{{1, 1, 2}, {0, 1}, {1, 1}, true, cpuParams},
|
||||
{{2, 2, 2}, {0}, {}, true, cpuParams},
|
||||
{{2, 2, 2}, {1}, {}, true, cpuParams},
|
||||
{{2, 2, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{13, 13, 2}, {0}, {}, true, cpuParams},
|
||||
{{13, 13, 2}, {1}, {}, true, cpuParams},
|
||||
{{13, 13, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{29, 29, 2}, {0}, {}, true, cpuParams},
|
||||
{{29, 29, 2}, {1}, {}, true, cpuParams},
|
||||
{{29, 29, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{30, 32, 2}, {0}, {}, true, cpuParams},
|
||||
{{32, 30, 2}, {1}, {}, true, cpuParams},
|
||||
{{32, 30, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{62, 64, 2}, {0}, {}, true, cpuParams},
|
||||
{{64, 62, 2}, {1}, {}, true, cpuParams},
|
||||
{{64, 62, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{254, 128, 2}, {0}, {}, true, cpuParams},
|
||||
{{128, 254, 2}, {1}, {}, true, cpuParams},
|
||||
{{128, 254, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{128, 254, 2}, {1}, {10}, true, cpuParams},
|
||||
{{128, 254, 2}, {0, 1}, {128, 100}, true, cpuParams},
|
||||
};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_2D, RDFTTestCPU, ::testing::ValuesIn(getParams2D()), RDFTTestCPU::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<RDFTTestCPUParams> getParams4D() {
|
||||
std::vector<RDFTTestCPUParams> params;
|
||||
if (InferenceEngine::with_cpu_x86_avx512_core()) {
|
||||
params = {
|
||||
{{10, 46, 128, 65}, {1}, {}, false, cpuParams},
|
||||
{{10, 46, 128, 65}, {0, 1}, {}, false, cpuParams},
|
||||
{{46, 10, 128, 65}, {1, 0}, {}, false, cpuParams},
|
||||
{{10, 46, 128, 65}, {1, 2}, {}, false, cpuParams},
|
||||
{{46, 10, 128, 65}, {-2, -1}, {}, false, cpuParams},
|
||||
{{46, 10, 128, 65}, {3, 1}, {}, false, cpuParams},
|
||||
{{46, 10, 128, 65}, {0, 1, 2, 3}, {}, false, cpuParams},
|
||||
{{46, 10, 128, 65}, {0, 1, 2, 3}, {10, 10, 33, 50}, false, cpuParams},
|
||||
|
||||
{{10, 46, 128, 65, 2}, {1}, {}, true, cpuParams},
|
||||
{{10, 46, 128, 65, 2}, {0, 1}, {}, true, cpuParams},
|
||||
{{46, 10, 128, 65, 2}, {1, 0}, {}, true, cpuParams},
|
||||
{{10, 46, 128, 65, 2}, {1, 2}, {}, true, cpuParams},
|
||||
{{46, 10, 128, 65, 2}, {-2, -1}, {}, true, cpuParams},
|
||||
{{46, 10, 128, 65, 2}, {3, 1}, {}, true, cpuParams},
|
||||
{{46, 10, 128, 65, 2}, {0, 1, 2, 3}, {}, true, cpuParams},
|
||||
// TODO: FIXME
|
||||
//{{46, 10, 128, 65, 2}, {0, 1, 2, 3}, {12, 15, 130, 40}, true, cpuParams},
|
||||
};
|
||||
} else if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
params = {
|
||||
{{9, 16, 32, 126}, {1}, {}, false, cpuParams},
|
||||
{{9, 16, 32, 126}, {1, 0}, {}, false, cpuParams},
|
||||
{{9, 16, 32, 126}, {1, 2}, {}, false, cpuParams},
|
||||
{{9, 16, 32, 126}, {-2, -1}, {}, false, cpuParams},
|
||||
{{9, 16, 32, 126}, {3, 1}, {}, false, cpuParams},
|
||||
{{9, 16, 32, 126}, {0, 1, 2, 3}, {}, false, cpuParams},
|
||||
{{9, 16, 32, 126}, {0, 1, 2, 3}, {8, 10, 11, 12}, false, cpuParams},
|
||||
|
||||
{{9, 16, 32, 126, 2}, {1}, {}, true, cpuParams},
|
||||
{{9, 16, 32, 126, 2}, {1, 0}, {}, true, cpuParams},
|
||||
{{9, 16, 32, 126, 2}, {1, 2}, {}, true, cpuParams},
|
||||
{{9, 16, 32, 126, 2}, {-2, -1}, {}, true, cpuParams},
|
||||
{{9, 16, 32, 126, 2}, {3, 1}, {}, true, cpuParams},
|
||||
{{9, 16, 32, 126, 2}, {0, 1, 2, 3}, {}, true, cpuParams},
|
||||
// TODO: FIXME
|
||||
//{{9, 16, 32, 126, 2}, {0, 1, 2, 3}, {8, 10, 11, 12}, true, cpuParams},
|
||||
};
|
||||
} else {
|
||||
params = {
|
||||
{{1, 2, 13, 30}, {1}, {}, false, cpuParams},
|
||||
{{1, 2, 13, 30}, {1, 0}, {}, false, cpuParams},
|
||||
{{1, 2, 13, 30}, {1, 2}, {}, false, cpuParams},
|
||||
{{1, 2, 13, 30}, {-2, -1}, {}, false, cpuParams},
|
||||
{{1, 2, 13, 30}, {3, 2}, {}, false, cpuParams},
|
||||
{{1, 2, 13, 30}, {0, 1, 2, 3}, {}, false, cpuParams},
|
||||
{{1, 2, 13, 30}, {0, 1, 2, 3}, {1, 2, 3, 13}, false, cpuParams},
|
||||
|
||||
{{1, 2, 13, 30, 2}, {1}, {}, true, cpuParams},
|
||||
{{2, 2, 13, 30, 2}, {1, 0}, {}, true, cpuParams},
|
||||
{{1, 2, 13, 30, 2}, {1, 2}, {}, true, cpuParams},
|
||||
{{1, 2, 13, 30, 2}, {-2, -1}, {}, true, cpuParams},
|
||||
{{1, 2, 13, 30, 2}, {3, 2}, {}, true, cpuParams},
|
||||
{{1, 2, 13, 30, 2}, {0, 1, 2, 3}, {}, true, cpuParams},
|
||||
// TODO: FIXME
|
||||
//{{1, 2, 13, 30, 2}, {0, 1, 2, 3}, {1, 2, 3, 13}, true, cpuParams},
|
||||
};
|
||||
}
|
||||
params.push_back({{1, 192, 36, 64}, {0}, {}, false, cpuParams});
|
||||
params.push_back({{1, 192, 36, 64}, {1}, {}, false, cpuParams});
|
||||
params.push_back({{1, 192, 36, 64}, {2}, {}, false, cpuParams});
|
||||
params.push_back({{1, 192, 36, 64}, {3}, {}, false, cpuParams});
|
||||
params.push_back({{1, 192, 36, 64}, {0, 1}, {}, false, cpuParams});
|
||||
params.push_back({{1, 192, 36, 64}, {3, 2}, {}, false, cpuParams});
|
||||
params.push_back({{1, 192, 36, 64}, {-2, -1}, {36, 64}, false, cpuParams});
|
||||
params.push_back({{1, 192, 36, 64}, {0, 1, 2, 3}, {}, false, cpuParams});
|
||||
params.push_back({{2, 192, 36, 33, 2}, {0}, {}, true, cpuParams});
|
||||
params.push_back({{1, 192, 36, 33, 2}, {1}, {}, true, cpuParams});
|
||||
params.push_back({{1, 192, 36, 33, 2}, {2}, {}, true, cpuParams});
|
||||
params.push_back({{1, 192, 36, 33, 2}, {3}, {}, true, cpuParams});
|
||||
params.push_back({{1, 192, 36, 33, 2}, {0, 1}, {}, true, cpuParams});
|
||||
params.push_back({{1, 192, 36, 33, 2}, {3, 2}, {}, true, cpuParams});
|
||||
params.push_back({{1, 192, 36, 33, 2}, {-2, -1}, {36, 64}, true, cpuParams});
|
||||
params.push_back({{1, 192, 36, 33, 2}, {0, 1, 2, 3}, {}, true, cpuParams});
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_RDFT_CPU_4D, RDFTTestCPU, ::testing::ValuesIn(getParams4D()), RDFTTestCPU::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
@ -0,0 +1,15 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared_test_classes/single_layer/rdft.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
TEST_P(RDFTLayerTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::SizeVector, // Input shapes
|
||||
InferenceEngine::Precision, // Input precision
|
||||
std::vector<int64_t>, // Axes
|
||||
std::vector<int64_t>, // Signal size
|
||||
ngraph::helpers::DFTOpType,
|
||||
std::string> RDFTParams; // Device name
|
||||
|
||||
class RDFTLayerTest : public testing::WithParamInterface<RDFTParams>, virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<RDFTParams>& obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,47 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/single_layer/rdft.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string RDFTLayerTest::getTestCaseName(const testing::TestParamInfo<RDFTParams>& obj) {
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
InferenceEngine::Precision inputPrecision;
|
||||
std::vector<int64_t> axes;
|
||||
std::vector<int64_t> signalSize;
|
||||
ngraph::helpers::DFTOpType opType;
|
||||
std::string targetDevice;
|
||||
std::tie(inputShapes, inputPrecision, axes, signalSize, opType, targetDevice) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
result << "Precision=" << inputPrecision.name() << "_";
|
||||
result << "Axes=" << CommonTestUtils::vec2str(axes) << "_";
|
||||
result << "SignalSize=" << CommonTestUtils::vec2str(signalSize) << "_";
|
||||
result << "Inverse=" << (opType == ngraph::helpers::DFTOpType::INVERSE) << "_";
|
||||
result << "TargetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void RDFTLayerTest::SetUp() {
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
InferenceEngine::Precision inputPrecision;
|
||||
std::vector<int64_t> axes;
|
||||
std::vector<int64_t> signalSize;
|
||||
ngraph::helpers::DFTOpType opType;
|
||||
std::tie(inputShapes, inputPrecision, axes, signalSize, opType, targetDevice) = this->GetParam();
|
||||
auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
|
||||
ngraph::ParameterVector paramVector;
|
||||
auto paramData = std::make_shared<ngraph::opset1::Parameter>(inType, ngraph::Shape(inputShapes));
|
||||
paramVector.push_back(paramData);
|
||||
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(paramVector));
|
||||
auto rdft = ngraph::builder::makeRDFT(paramOuts[0], axes, signalSize, opType);
|
||||
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rdft)};
|
||||
function = std::make_shared<ngraph::Function>(results, paramVector, "RDFT");
|
||||
}
|
||||
} // namespace LayerTestsDefinitions
|
@ -596,6 +596,11 @@ std::shared_ptr<ngraph::Node> makeDFT(const ngraph::Output<Node> &dataNode,
|
||||
const std::vector<int64_t> &signalSize,
|
||||
const ngraph::helpers::DFTOpType opType);
|
||||
|
||||
std::shared_ptr<ngraph::Node> makeRDFT(const ngraph::Output<Node> &dataNode,
|
||||
const std::vector<int64_t> &axes,
|
||||
const std::vector<int64_t> &signalSize,
|
||||
const ngraph::helpers::DFTOpType opType);
|
||||
|
||||
std::shared_ptr<ngraph::Node> makeEinsum(const OutputVector& inputs,
|
||||
const std::string& equation);
|
||||
} // namespace builder
|
||||
|
40
src/tests/ngraph_helpers/ngraph_functions/src/rdft.cpp
Normal file
40
src/tests/ngraph_helpers/ngraph_functions/src/rdft.cpp
Normal file
@ -0,0 +1,40 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
|
||||
namespace {
|
||||
template <typename ...Args>
|
||||
std::shared_ptr<ngraph::Node> CallDftCtorWithArgs(const ngraph::helpers::DFTOpType opType, Args&&... args) {
|
||||
switch (opType) {
|
||||
case ngraph::helpers::DFTOpType::FORWARD:
|
||||
return std::make_shared<ngraph::op::v9::RDFT>(std::forward<Args>(args)...);
|
||||
case ngraph::helpers::DFTOpType::INVERSE:
|
||||
return std::make_shared<ngraph::op::v9::IRDFT>(std::forward<Args>(args)...);
|
||||
default:
|
||||
throw std::logic_error("Unsupported operation type");
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::shared_ptr<ngraph::Node> makeRDFT(const ngraph::Output<Node> &dataNode,
|
||||
const std::vector<int64_t> &axes,
|
||||
const std::vector<int64_t> &signalSize,
|
||||
const ngraph::helpers::DFTOpType opType) {
|
||||
auto axesNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{axes.size()}, axes)->output(0);
|
||||
|
||||
if (!signalSize.empty()) {
|
||||
auto signalSizeNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{signalSize.size()}, signalSize)->output(0);
|
||||
return CallDftCtorWithArgs(opType, dataNode, axesNode, signalSizeNode);
|
||||
}
|
||||
return CallDftCtorWithArgs(opType, dataNode, axesNode);
|
||||
}
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
Loading…
Reference in New Issue
Block a user