[CPU] Gather JIT implementation + Gather8 support. (#10083)
This commit is contained in:
parent
0b75589e27
commit
b34cb55081
@ -114,7 +114,6 @@ xfail_issue_52463 = xfail_test(reason="test_operator_add_size1_singleton_broadca
|
||||
xfail_issue_58033 = xfail_test(reason="Einsum operation misses support for complex ellipsis equations")
|
||||
xfail_issue_58676 = xfail_test(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07")
|
||||
xfail_issue_onnx_models_140 = xfail_test(reason="https://github.com/onnx/models/issues/140")
|
||||
xfail_issue_54630 = xfail_test(reason="Gather with negative indices is not yet implemented on CPU")
|
||||
|
||||
xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported")
|
||||
xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding")
|
||||
@ -128,3 +127,5 @@ xfail_issue_63137 = xfail_test(reason="Unsupported operations: OptionalHasElemen
|
||||
xfail_issue_63138 = xfail_test(reason="Missing ONNX Shape-15 support")
|
||||
xfail_issue_63643 = xfail_test(reason="RuntimeError: Unsupported operation of type: Convolution name")
|
||||
xfail_issue_68212 = xfail_test(reason="Unsupported reading model with bytes streams")
|
||||
|
||||
xfail_issue_77668 = xfail_test(reason="Accuracy issue related to Gather-8.")
|
||||
|
@ -4,7 +4,6 @@
|
||||
import openvino.runtime.opset8 as ov
|
||||
import numpy as np
|
||||
|
||||
from tests import xfail_issue_54630
|
||||
from tests.test_ngraph.util import run_op_node
|
||||
|
||||
|
||||
@ -55,7 +54,6 @@ def test_gather_batch_dims_1():
|
||||
assert np.allclose(result, expected)
|
||||
|
||||
|
||||
@xfail_issue_54630
|
||||
def test_gather_negative_indices():
|
||||
input_data = np.array(
|
||||
[1.0, 1.1, 1.2, 2.0, 2.1, 2.2, 3.0, 3.1, 3.2], np.float32
|
||||
@ -71,7 +69,6 @@ def test_gather_negative_indices():
|
||||
assert np.allclose(result, expected)
|
||||
|
||||
|
||||
@xfail_issue_54630
|
||||
def test_gather_batch_dims_1_negative_indices():
|
||||
|
||||
input_data = np.array([[1, 2, 3, 4, 5],
|
||||
|
@ -114,11 +114,9 @@ tests_expected_to_fail = [
|
||||
(
|
||||
xfail_issue_39662,
|
||||
"OnnxBackendNodeModelTest.test_scatter_elements_with_negative_indices_cpu",
|
||||
"OnnxBackendNodeModelTest.test_gather_negative_indices_cpu",
|
||||
),
|
||||
(
|
||||
xfail_issue_38091,
|
||||
"OnnxBackendNodeModelTest.test_gather_negative_indices_cpu",
|
||||
"OnnxBackendNodeModelTest.test_dynamicquantizelinear_cpu",
|
||||
"OnnxBackendNodeModelTest.test_dynamicquantizelinear_expanded_cpu",
|
||||
),
|
||||
|
@ -22,7 +22,8 @@ from tests import (
|
||||
xfail_issue_48190,
|
||||
xfail_issue_58676,
|
||||
xfail_issue_63643,
|
||||
xfail_issue_onnx_models_140)
|
||||
xfail_issue_onnx_models_140,
|
||||
xfail_issue_77668)
|
||||
|
||||
MODELS_ROOT_DIR = tests.MODEL_ZOO_DIR
|
||||
|
||||
@ -179,6 +180,8 @@ if len(zoo_models) > 0:
|
||||
(xfail_issue_48190, "test_onnx_model_zoo_text_machine_comprehension_roberta_model_roberta_base_11_roberta_base_11_roberta_base_11_cpu"),
|
||||
(xfail_issue_onnx_models_140, "test_onnx_model_zoo_vision_object_detection_segmentation_duc_model_ResNet101_DUC_7_ResNet101_DUC_HDC_ResNet101_DUC_HDC_cpu"),
|
||||
(xfail_issue_63643, "test_onnx_model_zoo_vision_object_detection_segmentation_ssd_mobilenetv1_model_ssd_mobilenet_v1_10_ssd_mobilenet_v1_ssd_mobilenet_v1_cpu"),
|
||||
(xfail_issue_77668, "test_onnx_model_zoo_vision_object_detection_segmentation_faster_rcnn_model_FasterRCNN_10_faster_rcnn_R_50_FPN_1x_cpu"),
|
||||
(xfail_issue_77668, "test_onnx_model_zoo_vision_object_detection_segmentation_mask_rcnn_model_MaskRCNN_10_mask_rcnn_R_50_FPN_1x_cpu"),
|
||||
|
||||
# Model MSFT
|
||||
(xfail_issue_37973, "test_MSFT_opset7_tf_inception_v2_model_cpu"),
|
||||
@ -193,6 +196,9 @@ if len(zoo_models) > 0:
|
||||
(xfail_issue_39669, "test_MSFT_opset9_cgan_cgan_cpu"),
|
||||
(xfail_issue_47495, "test_MSFT_opset10_BERT_Squad_bertsquad10_cpu"),
|
||||
(xfail_issue_63643, "test_MSFT_opset10_mlperf_ssd_mobilenet_300_ssd_mobilenet_v1_coco_2018_01_28_cpu"),
|
||||
|
||||
(xfail_issue_77668, "test_MSFT_opset10_faster_rcnn_faster_rcnn_R_50_FPN_1x_cpu"),
|
||||
(xfail_issue_77668, "test_MSFT_opset10_mask_rcnn_mask_rcnn_R_50_FPN_1x_cpu"),
|
||||
]
|
||||
for test_case in import_xfail_list + execution_xfail_list:
|
||||
xfail, test_name = test_case
|
||||
|
@ -124,7 +124,6 @@ xfail_issue_52463 = xfail_test(reason="test_operator_add_size1_singleton_broadca
|
||||
xfail_issue_58033 = xfail_test(reason="Einsum operation misses support for complex ellipsis equations")
|
||||
xfail_issue_58676 = xfail_test(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07")
|
||||
xfail_issue_onnx_models_140 = xfail_test(reason="https://github.com/onnx/models/issues/140")
|
||||
xfail_issue_54630 = xfail_test(reason="Gather with negative indices is not yet implemented on CPU")
|
||||
|
||||
xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported")
|
||||
xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding")
|
||||
@ -137,3 +136,5 @@ xfail_issue_63136 = xfail_test(reason="Unsupported operation: CastLike")
|
||||
xfail_issue_63137 = xfail_test(reason="Unsupported operations: OptionalHasElement, OptionalGetElement")
|
||||
xfail_issue_63138 = xfail_test(reason="Missing ONNX Shape-15 support")
|
||||
xfail_issue_63643 = xfail_test(reason="RuntimeError: Unsupported operation of type: Convolution name")
|
||||
|
||||
xfail_issue_77668 = xfail_test(reason="Accuracy issue related to Gather-8.")
|
||||
|
@ -4,7 +4,6 @@
|
||||
import ngraph as ng
|
||||
import numpy as np
|
||||
|
||||
from tests_compatibility import xfail_issue_54630
|
||||
from tests_compatibility.test_ngraph.util import run_op_node
|
||||
|
||||
|
||||
@ -55,7 +54,6 @@ def test_gather_batch_dims_1():
|
||||
assert np.allclose(result, expected)
|
||||
|
||||
|
||||
@xfail_issue_54630
|
||||
def test_gather_negative_indices():
|
||||
input_data = np.array(
|
||||
[1.0, 1.1, 1.2, 2.0, 2.1, 2.2, 3.0, 3.1, 3.2], np.float32
|
||||
@ -71,7 +69,6 @@ def test_gather_negative_indices():
|
||||
assert np.allclose(result, expected)
|
||||
|
||||
|
||||
@xfail_issue_54630
|
||||
def test_gather_batch_dims_1_negative_indices():
|
||||
|
||||
input_data = np.array([[1, 2, 3, 4, 5],
|
||||
|
@ -23,7 +23,8 @@ from tests_compatibility import (
|
||||
xfail_issue_48190,
|
||||
xfail_issue_58676,
|
||||
xfail_issue_63643,
|
||||
xfail_issue_onnx_models_140)
|
||||
xfail_issue_onnx_models_140,
|
||||
xfail_issue_77668)
|
||||
|
||||
MODELS_ROOT_DIR = tests_compatibility.MODEL_ZOO_DIR
|
||||
|
||||
@ -167,6 +168,7 @@ if len(zoo_models) > 0:
|
||||
(xfail_issue_48190, "test_onnx_model_zoo_text_machine_comprehension_roberta_model_roberta_base_11_roberta_base_11_roberta_base_11_cpu"),
|
||||
(xfail_issue_onnx_models_140, "test_onnx_model_zoo_vision_object_detection_segmentation_duc_model_ResNet101_DUC_7_ResNet101_DUC_HDC_ResNet101_DUC_HDC_cpu"),
|
||||
(xfail_issue_63643, "test_onnx_model_zoo_vision_object_detection_segmentation_ssd_mobilenetv1_model_ssd_mobilenet_v1_10_ssd_mobilenet_v1_ssd_mobilenet_v1_cpu"),
|
||||
(xfail_issue_77668, "test_onnx_model_zoo_vision_object_detection_segmentation_faster_rcnn_model_FasterRCNN_10_faster_rcnn_R_50_FPN_1x_cpu"),
|
||||
|
||||
# Model MSFT
|
||||
(xfail_issue_37973, "test_MSFT_opset7_tf_inception_v2_model_cpu"),
|
||||
@ -183,6 +185,8 @@ if len(zoo_models) > 0:
|
||||
(xfail_issue_39669, "test_MSFT_opset9_cgan_cgan_cpu"),
|
||||
(xfail_issue_47495, "test_MSFT_opset10_BERT_Squad_bertsquad10_cpu"),
|
||||
(xfail_issue_63643, "test_MSFT_opset10_mlperf_ssd_mobilenet_300_ssd_mobilenet_v1_coco_2018_01_28_cpu"),
|
||||
|
||||
(xfail_issue_77668, "test_MSFT_opset10_faster_rcnn_faster_rcnn_R_50_FPN_1x_cpu"),
|
||||
]
|
||||
for test_case in import_xfail_list + execution_xfail_list:
|
||||
xfail, test_name = test_case
|
||||
|
@ -50,6 +50,7 @@ class PropagateNMSPath: public pass::MatcherPass {
|
||||
op::util::BroadcastBase,
|
||||
opset8::StridedSlice,
|
||||
opset8::VariadicSplit,
|
||||
op::util::GatherBase,
|
||||
opset8::Concat,
|
||||
opset8::Convert>();
|
||||
matcher_pass_callback callback = [=](pattern::Matcher &m) {
|
||||
@ -60,7 +61,7 @@ class PropagateNMSPath: public pass::MatcherPass {
|
||||
})) {
|
||||
ov::set_nms_selected_indices(node.get());
|
||||
}
|
||||
return true;
|
||||
return false;
|
||||
};
|
||||
auto m = make_shared<pattern::Matcher>(node_pattern, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
@ -77,6 +78,7 @@ class UpdateConvertGather: public pass::MatcherPass {
|
||||
auto indices = gather->input_value(1);
|
||||
if (!ov::has_nms_selected_indices(indices.get_node()))
|
||||
return false;
|
||||
gather->get_rt_info()["dontReverseIndices"] = true;
|
||||
auto out_type = (indices.get_element_type() == element::i64 ? element::u64 : element::u32);
|
||||
auto existing_convert = dynamic_pointer_cast<opset8::Convert>(indices.get_node_shared_ptr());
|
||||
if (existing_convert && indices.get_target_inputs().size() == 1) {
|
||||
|
@ -377,6 +377,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
|
||||
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
||||
pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
|
||||
pass_config->disable<ngraph::pass::ConvertGather8ToGather7>();
|
||||
pass_config->disable<ngraph::pass::ConvertMinimum>();
|
||||
pass_config->disable<ngraph::pass::ConvertBroadcastToTiles>();
|
||||
pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>();
|
||||
@ -388,7 +389,6 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
pass_config->enable<ngraph::pass::NormalizeL2Decomposition>();
|
||||
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
|
||||
pass_config->enable<ngraph::pass::ConvertGather1ToGather7>();
|
||||
pass_config->enable<ngraph::pass::ConvertGather8ToGather7>();
|
||||
pass_config->enable<ngraph::pass::ConvertDetectionOutput1ToDetectionOutput8>();
|
||||
|
||||
if (useLpt) {
|
||||
|
1028
src/plugins/intel_cpu/src/nodes/kernels/gather_uni_kernel.cpp
Normal file
1028
src/plugins/intel_cpu/src/nodes/kernels/gather_uni_kernel.cpp
Normal file
File diff suppressed because it is too large
Load Diff
209
src/plugins/intel_cpu/src/nodes/kernels/gather_uni_kernel.hpp
Normal file
209
src/plugins/intel_cpu/src/nodes/kernels/gather_uni_kernel.hpp
Normal file
@ -0,0 +1,209 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
// Gather kernel implements two approaches for indices calculation: "Short" and "Long".
|
||||
// 1. Short approach is applicable for cases when the number of elements less or equal to vector register length.
|
||||
// It just uses permutation of current indices vector to retrieve the next.
|
||||
// 2. Long approach is applicable for cases when the number of elements is greater than vector register length.
|
||||
// It increases indices in vector on vector length and normalizes upper bound of indices.
|
||||
//
|
||||
// SUPPORTED CASES
|
||||
//--------------------------------------------------------------
|
||||
// After axis | AVX512 | AVX2 |
|
||||
// (block) size| 32bit | 16bit | 8bit | 32bit | 16bit | 8bit |
|
||||
// STATIC SHAPES
|
||||
// 1 | X | X | X | X | X | X |
|
||||
// >1 & <=vlen | X | X | X | X | | |
|
||||
// DYNAMIC SHAPES
|
||||
// 1 | X | X | X | X | X | X |
|
||||
//--------------------------------------------------------------
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cpu/x64/jit_generator.hpp"
|
||||
#include <mkldnn_types.h>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
struct jGatherConfParams {
|
||||
uint64_t dataTypeSize = 1lu;
|
||||
bool reverseIndexing = true;
|
||||
bool dynamicShapes = false;
|
||||
uint64_t batchDims = 0lu;
|
||||
uint64_t beforeAxisSize = 0lu;
|
||||
uint64_t specIdxSize = 0lu;
|
||||
uint64_t afterAxisSize = 0lu;
|
||||
};
|
||||
|
||||
struct gatherJitExecArgs {
|
||||
const void* src;
|
||||
const void* indices;
|
||||
void* dst;
|
||||
const int* axisDim;
|
||||
const uint64_t* start;
|
||||
const uint64_t* specIndicesSize;
|
||||
const uint64_t* betweenBatchAndAxisSize;
|
||||
const uint64_t* axisAndAfterAxisSizeB;
|
||||
const uint64_t* srcAfterBatchSizeB;
|
||||
const int* permIdxMask;
|
||||
const int* beforeAxisDiff;
|
||||
|
||||
const int* beforeAxisPermMask;
|
||||
const int* afterAxIdxB;
|
||||
const int* afterAxisPermMask;
|
||||
const uint64_t* afterAxisSize;
|
||||
const int* specIdxDiff;
|
||||
|
||||
uint64_t workAmount = 0lu;
|
||||
uint64_t afterAxSize = 1lu;
|
||||
// Blocked short.
|
||||
uint64_t specIdxAndAfterAxIterB;
|
||||
uint64_t specIdxAndAfterAxSizeB;
|
||||
// Only static
|
||||
const int* specIdxB;
|
||||
const int* idxBatchSumB;
|
||||
const int* dataBeforeAxisSumB;
|
||||
uint64_t betweenBatchAndAxisIter;
|
||||
};
|
||||
|
||||
struct jitGatherKernelBase {
|
||||
void (*ker_)(const gatherJitExecArgs *);
|
||||
void operator()(const gatherJitExecArgs *args) {
|
||||
assert(ker_);
|
||||
ker_(args);
|
||||
}
|
||||
explicit jitGatherKernelBase(const jGatherConfParams& jcp) : ker_(nullptr), jcp(jcp) {}
|
||||
virtual ~jitGatherKernelBase() {}
|
||||
|
||||
virtual void create_ker() = 0;
|
||||
uint64_t getVecLen() {
|
||||
return vlen;
|
||||
}
|
||||
uint64_t getDataElPerVec() {
|
||||
return dataElPerVec;
|
||||
}
|
||||
uint64_t getIdxElPerVec() {
|
||||
return idxElPerVec;
|
||||
}
|
||||
virtual bool isSupportedConfiguration(uint64_t afterAxisSize) = 0;
|
||||
|
||||
protected:
|
||||
jGatherConfParams jcp;
|
||||
uint64_t vlen;
|
||||
uint64_t dataElPerVec;
|
||||
uint64_t idxElPerVec;
|
||||
static const unsigned shufMask8bitUni[16];
|
||||
static const unsigned permMask8bitA2[8];
|
||||
static const unsigned permMask8bitA5[16];
|
||||
static const unsigned shufMask16bitUni[16];
|
||||
static const unsigned permMask16bitA2[8];
|
||||
static const unsigned permMask16bitA5[16];
|
||||
static const unsigned incVec[16];
|
||||
|
||||
int shortPermIdx[16];
|
||||
int shortBeforeAxisDiff[16];
|
||||
};
|
||||
|
||||
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
|
||||
struct jitUniGatherKernel : public jitGatherKernelBase, public dnnl::impl::cpu::x64::jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jitUniGatherKernel)
|
||||
|
||||
explicit jitUniGatherKernel(const jGatherConfParams& jcp);
|
||||
|
||||
void create_ker() override;
|
||||
void generate() override;
|
||||
|
||||
bool isSupportedConfiguration(uint64_t afterAxisSize) override;
|
||||
|
||||
protected:
|
||||
using Vmm = typename dnnl::impl::utils::conditional<isa == dnnl::impl::cpu::x64::avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
|
||||
using Vmask = typename dnnl::impl::utils::conditional<isa == dnnl::impl::cpu::x64::avx2, Xbyak::Ymm, Xbyak::Opmask>::type;
|
||||
static const uint32_t vlenXmm = dnnl::impl::cpu::x64::cpu_isa_traits<dnnl::impl::cpu::x64::sse41>::vlen;
|
||||
static const uint32_t indicesTypeSize = sizeof(uint32_t);
|
||||
static const uint8_t idxTypeShift = 2;
|
||||
uint8_t dataTypeShift = 0;
|
||||
|
||||
// Suffix B means "In Bytes".
|
||||
// 64b registers.
|
||||
const Xbyak::Reg64& regSrc = r8;
|
||||
const Xbyak::Reg64& regDst = r9;
|
||||
const Xbyak::Reg64& regIndices = r10;
|
||||
const Xbyak::Reg64& regIdxIter = r11;
|
||||
const Xbyak::Reg64& regWorkAmount = r12;
|
||||
const Xbyak::Reg64& regSpecIdxSizeB = r13;
|
||||
const Xbyak::Reg64& regAux1 = r14;
|
||||
const Xbyak::Reg64& regAux2 = rsi;
|
||||
const Xbyak::Reg64& regBetweenBatchAndAxisIter = r15;
|
||||
const Xbyak::Reg64& regBetweenBatchAndAxisSize = rbx;
|
||||
const Xbyak::Reg64& rSpecIdxAndAfterAxIterB = regIdxIter;
|
||||
const Xbyak::Reg64& rSpecIdxAndAfterAxSizeB = regSpecIdxSizeB;
|
||||
|
||||
const Xbyak::Reg64& regParams = dnnl::impl::cpu::x64::abi_param1;
|
||||
|
||||
// 32b registers.
|
||||
Xbyak::Reg32 reg32IdxIter = Xbyak::Reg32(regIdxIter.getIdx());
|
||||
Xbyak::Reg32 reg32SpecIdxSizeB = Xbyak::Reg32(regSpecIdxSizeB.getIdx());
|
||||
Xbyak::Reg32 reg32BetweenBatchAndAxisSize = Xbyak::Reg32(regBetweenBatchAndAxisSize.getIdx());
|
||||
Xbyak::Reg32 reg32BetweenBatchAndAxisIter = Xbyak::Reg32(regBetweenBatchAndAxisIter.getIdx());
|
||||
Xbyak::Reg32 reg32Aux1 = Xbyak::Reg32(regAux1.getIdx());
|
||||
Xbyak::Reg32 reg32Aux2 = Xbyak::Reg32(regAux2.getIdx());
|
||||
|
||||
// Masks pool. Do not use k0 with gather instruction!
|
||||
Vmask masksContainer[8] = {Vmask(0), Vmask(1), Vmask(2), Vmask(3), Vmask(4), Vmask(5), Vmask(6), Vmask(7)};
|
||||
// Auxiliary pool.
|
||||
Vmm vmmAuxContainer[12] = {Vmm(0), Vmm(1), Vmm(2), Vmm(3), Vmm(4), Vmm(5), Vmm(6), /*AVX5*/ Vmm(16), Vmm(17), Vmm(18), Vmm(19), Vmm(20)};
|
||||
// Common.
|
||||
Vmm vmmZeros = Vmm(7);
|
||||
Vmm vmmSrcBeforeAxisSumB = Vmm(8);
|
||||
Vmm vmmSpecIdxB = Vmm(9);
|
||||
Vmm vmmSpecIdxSizeB = Vmm(10);
|
||||
Vmm vmmAxisDim = Vmm(11);
|
||||
Vmm vmmAxisAndAfterAxisSizeB = Vmm(12);
|
||||
|
||||
// Only short.
|
||||
Vmm vmmSrcAfterBatchSizeB = Vmm(13);
|
||||
Vmm vmmPermIdxMask = Vmm(14);
|
||||
Vmm& vmmBeforeAxDiffB = vmmAxisAndAfterAxisSizeB;
|
||||
// Blocked short.
|
||||
Vmm& vmmSpecIdxDiff = vmmAuxContainer[4];
|
||||
Vmm& vmmAfterAxisSize = vmmAuxContainer[5];
|
||||
Vmm vmmAfterAxisIdxB = Vmm(15);
|
||||
Vmm& vmmAfterAxisPermMask = vmmPermIdxMask;
|
||||
Vmm& vmmBeforeAxPermMask = vmmAuxContainer[6];
|
||||
// Only long.
|
||||
Vmm vmmVecLenB = Vmm(13);
|
||||
Vmm vmmIdxBatchSumB = Vmm(14);
|
||||
|
||||
// XMM
|
||||
Xbyak::Xmm xmmAuxContainer[6] = {Xbyak::Xmm(0), Xbyak::Xmm(1), Xbyak::Xmm(2), Xbyak::Xmm(3), Xbyak::Xmm(4), Xbyak::Xmm(16)};
|
||||
Xbyak::Xmm xmmZeros = Xbyak::Xmm(vmmZeros.getIdx());
|
||||
Xbyak::Xmm xmmSrcBeforeAxisSum = Xbyak::Xmm(vmmSrcBeforeAxisSumB.getIdx());
|
||||
Xbyak::Xmm xmmSpecIdxSizeB = Xbyak::Xmm(vmmSpecIdxSizeB.getIdx());
|
||||
Xbyak::Xmm xmmSpecIdxB = Xbyak::Xmm(vmmSpecIdxB.getIdx());
|
||||
|
||||
|
||||
void calcSrcShiftLong(Vmm* vAuxPool, bool shiftFirst = true);
|
||||
void calcSrcShiftLongBlock(Vmm* vAuxPool, bool shiftFirst = true);
|
||||
void calcSrcShiftShort(Vmm* vAuxPool, bool shiftFirst = true);
|
||||
void calcSrcShiftShortBlock(Vmm* vAuxPool, bool shiftFirst);
|
||||
void process(bool isShortIdx, bool blocked);
|
||||
void process32b(bool isShortIdx, bool blocked);
|
||||
void process16b(bool isShortIdx, bool blocked);
|
||||
void process8b(bool isShortIdx, bool blocked);
|
||||
void shiftIdxAndGather(Vmm* vAuxPool, bool isShortIdx, bool shiftFirst, bool blocked);
|
||||
void tail(bool isShortIdx, bool shiftFirst = true, bool blocked = false);
|
||||
// Aux functions.
|
||||
void normalizeRawIndices(Vmm& rawIndices, Vmask& dstMask, Vmask& aux);
|
||||
void normWithUpperBound(Vmm& vTarget, Vmm& vMax, Vmask& kAuxMask);
|
||||
void fillRestWorkMask(Vmask& kMask, Vmm& vAux, const Xbyak::Reg64& rWorkRest, const Xbyak::Reg64& rAux0, const Xbyak::Reg64& rAux1);
|
||||
void storeVectorPart(const Xbyak::Reg64& rDst, const Xbyak::Reg64& rToStoreCounter, Vmm& vmmSrc, Vmm& vAux);
|
||||
void uniVpGatherDd(Vmm& vDst, const Xbyak::Address& srcAddr, Vmask& vMask);
|
||||
void fillVlenVector();
|
||||
|
||||
const unsigned* permMask8bitUni;
|
||||
const unsigned* permMask16bitUni;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -9,21 +9,26 @@
|
||||
#include "mkldnn_gather_node.h"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include <utils/general_utils.h>
|
||||
#include "kernels/gather_uni_kernel.hpp"
|
||||
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
using namespace mkldnn::impl::cpu;
|
||||
|
||||
#define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' "
|
||||
|
||||
bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (!one_of(op->get_type_info(),
|
||||
ov::op::v7::Gather::get_type_info_static())) {
|
||||
errorMessage = "Not supported Gather operation version. CPU plug-in supports only 7 version.";
|
||||
ov::op::v7::Gather::get_type_info_static(),
|
||||
ov::op::v8::Gather::get_type_info_static())) {
|
||||
errorMessage = "Not supported Gather operation version. CPU plug-in supports only 7 and 8 versions.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (op->get_input_node_shared_ptr(GATHER_AXIS)->get_type_info() != ov::op::v0::Constant::get_type_info_static()) {
|
||||
// TODO: Support parameterized Axis input for dynamic shapes.
|
||||
errorMessage = "Only Constant operation on 'axis' input is supported.";
|
||||
if (!isDynamicNgraphNode(op) && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))) {
|
||||
errorMessage = "Only Constant operation on 'axis' input is supported for static node.";
|
||||
return false;
|
||||
}
|
||||
} catch (...) {
|
||||
@ -34,79 +39,163 @@ bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr<const ov::Node
|
||||
}
|
||||
|
||||
MKLDNNGatherNode::MKLDNNGatherNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng,
|
||||
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
|
||||
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), batchDims(0) {
|
||||
std::string errorMessage;
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
errorPrefix = std::string("Layer Gather with name '") + op->get_friendly_name() + "' ";
|
||||
|
||||
if (op->get_input_size() != 3 || op->get_output_size() != 1)
|
||||
IE_THROW() << errorPrefix << "has incorrect number of input/output edges!";
|
||||
THROW_ERROR << "has incorrect number of input/output edges!";
|
||||
|
||||
dataSrcRank = inputShapes[GATHER_DATA].getRank();
|
||||
const auto idxRank = inputShapes[GATHER_INDEXES].getRank();
|
||||
if (dataSrcRank == 0 || idxRank == 0)
|
||||
IE_THROW() << errorPrefix << "has incorrect input parameters ranks.";
|
||||
const auto& dataShape = getInputShapeAtPort(GATHER_DATA);
|
||||
isDataShapeStat = dataShape.isStatic();
|
||||
dataSrcRank = dataShape.getRank();
|
||||
|
||||
const auto& idxShape = getInputShapeAtPort(GATHER_INDICES);
|
||||
isIdxShapeStat = idxShape.isStatic();
|
||||
const auto indicesRank = idxShape.getRank();
|
||||
if (dataSrcRank == 0lu || indicesRank == 0lu)
|
||||
THROW_ERROR << "has incorrect input parameters ranks.";
|
||||
|
||||
if (ov::is_type<ov::op::v8::Gather>(op)) {
|
||||
batchDims = static_cast<int>(ov::as_type_ptr<ov::op::v8::Gather>(op)->get_batch_dims());
|
||||
// WA for NMS->Gather construction. NMS fills part of the output blob by the -1 if these values
|
||||
// must not be taken into account. There is appropriate pass that looks for such subgraphs
|
||||
// and sets the dontReverseIndices flag.
|
||||
const auto& rti = op->get_rt_info();
|
||||
const auto& reverse = rti.find("dontReverseIndices");
|
||||
if (reverse == rti.end())
|
||||
reverseIndexing = true;
|
||||
else
|
||||
reverseIndexing = false;
|
||||
} else if (ov::is_type<ov::op::v7::Gather>(op)) {
|
||||
batchDims = static_cast<int>(ov::as_type_ptr<ov::op::v7::Gather>(op)->get_batch_dims());
|
||||
reverseIndexing = false;
|
||||
}
|
||||
|
||||
batchDims = static_cast<int>(ov::as_type_ptr<ov::op::v7::Gather>(op)->get_batch_dims());
|
||||
if (batchDims < 0)
|
||||
batchDims += idxRank;
|
||||
if (batchDims < 0 || batchDims >= std::min(static_cast<int>(dataSrcRank), static_cast<int>(idxRank)))
|
||||
IE_THROW() << errorPrefix << "has incorrect batch_dims " << batchDims << "!";
|
||||
batchDims += indicesRank;
|
||||
if (batchDims < 0 || batchDims >= std::min(static_cast<int>(dataSrcRank), static_cast<int>(indicesRank)))
|
||||
THROW_ERROR << "has incorrect batch_dims " << batchDims << "!";
|
||||
|
||||
if (op->get_input_node_shared_ptr(GATHER_AXIS)->get_type_info() == ov::op::v0::Constant::get_type_info_static()) {
|
||||
if (ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))) {
|
||||
isAxisInputConst = true;
|
||||
axis = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(GATHER_AXIS))->cast_vector<int>()[0];
|
||||
if (axis < 0)
|
||||
axis += dataSrcRank;
|
||||
if (axis < 0 || axis >= dataSrcRank || batchDims > axis)
|
||||
IE_THROW() << errorPrefix << "has incorrect input parameter axis value: " << axis;
|
||||
THROW_ERROR << "has incorrect input parameter axis value: " << axis;
|
||||
}
|
||||
dataSize = getOriginalInputPrecisionAtPort(GATHER_DATA).size();
|
||||
}
|
||||
|
||||
void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
dataTypeSize = getOriginalInputPrecisionAtPort(GATHER_DATA).size();
|
||||
|
||||
const auto& dataDims = getInputShapeAtPort(GATHER_DATA).getDims();
|
||||
if (isAxisInputConst && isDataShapeStat) {
|
||||
axisDim = dataDims[axis];
|
||||
beforeAxisSize = std::accumulate(dataDims.begin(), dataDims.begin() + axis, 1lu, std::multiplies<Dim>());
|
||||
betweenBatchAndAxisSize = std::accumulate(dataDims.begin() + batchDims, dataDims.begin() + axis, 1lu, std::multiplies<Dim>());
|
||||
afterAxisSize = std::accumulate(dataDims.begin() + axis + 1, dataDims.end(), 1lu, std::multiplies<Dim>());
|
||||
|
||||
afterAxisSizeInBytes = afterAxisSize * dataTypeSize;
|
||||
axisAndAfterAxisSizeInBytes = axisDim * afterAxisSizeInBytes;
|
||||
srcAfterBatchSizeInBytes = betweenBatchAndAxisSize * axisAndAfterAxisSizeInBytes;
|
||||
}
|
||||
if (isDataShapeStat) {
|
||||
beforeBatchSize = std::accumulate(dataDims.begin(), dataDims.begin() + batchDims, 1lu, std::multiplies<Dim>());
|
||||
}
|
||||
if (isIdxShapeStat) {
|
||||
const auto& idxDims = getInputShapeAtPort(GATHER_INDICES).getDims();
|
||||
specIndicesSize = std::accumulate(idxDims.begin() + batchDims, idxDims.end(), 1lu, std::multiplies<Dim>());
|
||||
|
||||
if (isDataShapeStat) {
|
||||
specIdxAndAfterAxSizeB = specIndicesSize * afterAxisSizeInBytes;
|
||||
totalWork = beforeBatchSize * betweenBatchAndAxisSize * specIndicesSize * afterAxisSize;
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation desc type will be redefined in the fn prepareParams if a kernel will be created.
|
||||
Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA);
|
||||
addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision},
|
||||
{LayoutType::ncsp, Precision::I32},
|
||||
{LayoutType::ncsp, Precision::I32, isAxisInputConst}},
|
||||
{{LayoutType::ncsp, dataPrecision}},
|
||||
impl_desc_type::ref_any);
|
||||
ref_any,
|
||||
isDynamicNode());
|
||||
}
|
||||
|
||||
void MKLDNNGatherNode::prepareParams() {
|
||||
auto& srcMemPtr = getParentEdgeAt(GATHER_DATA)->getMemoryPtr();
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << errorPrefix << " has not allocated input memory.";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
IE_THROW() << errorPrefix << " has unidentified preferable primitive descriptor.";
|
||||
void MKLDNNGatherNode::createPrimitive() {
|
||||
uint64_t idxElPerVec = 1;
|
||||
if (!isDynamicNode()) {
|
||||
idxElPerVec = x64::mayiuse(x64::avx512_common) ? x64::cpu_isa_traits<x64::avx512_common>::vlen / idxTypeSize :
|
||||
x64::mayiuse(x64::avx2) ? x64::cpu_isa_traits<x64::avx2>::vlen / idxTypeSize : 1;
|
||||
}
|
||||
// Gather instruction is not supported by SSE.
|
||||
if ((x64::mayiuse(x64::avx512_common) || x64::mayiuse(x64::avx2)) &&
|
||||
(isDynamicNode() || afterAxisSize == 1 || (afterAxisSize <= idxElPerVec &&
|
||||
(x64::mayiuse(x64::avx512_common) || (x64::mayiuse(x64::avx2) && dataTypeSize == 4))))) {
|
||||
jGatherConfParams jcp;
|
||||
jcp.dataTypeSize = dataTypeSize;
|
||||
jcp.reverseIndexing = reverseIndexing;
|
||||
jcp.dynamicShapes = isDynamicNode();
|
||||
jcp.batchDims = batchDims;
|
||||
if (!jcp.dynamicShapes) {
|
||||
jcp.beforeAxisSize = beforeAxisSize;
|
||||
jcp.specIdxSize = specIndicesSize;
|
||||
jcp.afterAxisSize = afterAxisSize;
|
||||
} else {
|
||||
if (isDataShapeStat && isAxisInputConst) {
|
||||
jcp.beforeAxisSize = beforeAxisSize;
|
||||
jcp.afterAxisSize = afterAxisSize;
|
||||
}
|
||||
if (isIdxShapeStat) {
|
||||
jcp.specIdxSize = specIndicesSize;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& srcDims = srcMemPtr->getStaticDims();
|
||||
const auto& idxDims = getParentEdgeAt(GATHER_INDEXES)->getMemory().getStaticDims();
|
||||
const auto& dstDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
|
||||
if (x64::mayiuse(x64::avx512_common)) {
|
||||
jitKernel.reset(new jitUniGatherKernel<x64::avx512_common>(jcp));
|
||||
} else if (x64::mayiuse(x64::avx2)) {
|
||||
jitKernel.reset(new jitUniGatherKernel<x64::avx2>(jcp));
|
||||
}
|
||||
if (jitKernel) {
|
||||
jitKernel->create_ker();
|
||||
|
||||
if (!isAxisInputConst) {
|
||||
axis = (reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetPtr()))[0];
|
||||
if (axis < 0)
|
||||
axis += dataSrcRank;
|
||||
if (axis < 0 || axis >= dataSrcRank || batchDims > axis)
|
||||
IE_THROW() << errorPrefix << "has incorrect input parameter axis value: " << axis;
|
||||
if (!isDynamicNode()) {
|
||||
const uint64_t dataElPerVec = jitKernel->getDataElPerVec();
|
||||
const uint64_t nthr = parallel_get_max_threads();
|
||||
const uint64_t wpt = ((totalWork / dataElPerVec) / nthr + 1) * dataElPerVec;
|
||||
execParamsPerThread.resize(nthr);
|
||||
|
||||
parallel_nt(nthr, [&](const int ithr, const int nthr) {
|
||||
const uint64_t dstStart = std::min(wpt * ithr, totalWork);
|
||||
const uint64_t dstEnd = std::min(wpt * (ithr + 1), totalWork);
|
||||
|
||||
auto& p = execParamsPerThread[ithr];
|
||||
p.workAmount = dstEnd - dstStart;
|
||||
p.dstStart = dstStart;
|
||||
p.specIdxInBytes.resize(dataElPerVec);
|
||||
p.idxBatchSumInBytes.resize(dataElPerVec);
|
||||
p.dataBeforeAxisSumInBytes.resize(dataElPerVec);
|
||||
p.betweenBatchAndAxisIter = (dstStart / specIndicesSize) % betweenBatchAndAxisSize;
|
||||
for (uint64_t j = 0lu; j < dataElPerVec; j++) {
|
||||
p.specIdxInBytes[j] = (((dstStart + j) / afterAxisSize) % specIndicesSize) * idxTypeSize;
|
||||
p.idxBatchSumInBytes[j] = ((dstStart + j) / (betweenBatchAndAxisSize * specIndicesSize * afterAxisSize)) *
|
||||
specIndicesSize * idxTypeSize;
|
||||
p.dataBeforeAxisSumInBytes[j] = ((dstStart + j) / (specIndicesSize * afterAxisSize)) * axisAndAfterAxisSizeInBytes;
|
||||
}
|
||||
initShortParams(p, dstStart);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
indexRange = srcDims[axis];
|
||||
batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies<size_t>());
|
||||
outerSize = std::accumulate(srcDims.begin() + batchDims, srcDims.begin() + axis, 1, std::multiplies<size_t>());
|
||||
dataLength = std::accumulate(srcDims.begin() + axis + 1, srcDims.end(), 1, std::multiplies<size_t>());
|
||||
srcBatchStride = std::accumulate(srcDims.begin() + batchDims, srcDims.end(), 1, std::multiplies<size_t>());
|
||||
idxBatchStride = std::accumulate(idxDims.begin() + batchDims, idxDims.end(), 1, std::multiplies<size_t>());
|
||||
dstBatchStride = std::accumulate(dstDims.begin() + batchDims, dstDims.end(), 1, std::multiplies<size_t>());
|
||||
len = dataLength * dataSize;
|
||||
if (dataLength == 0)
|
||||
IE_THROW() << errorPrefix << "had incorrect input parameters dimension!";
|
||||
MKLDNNNode::createPrimitive();
|
||||
}
|
||||
|
||||
bool MKLDNNGatherNode::needPrepareParams() const {
|
||||
@ -116,32 +205,275 @@ bool MKLDNNGatherNode::needPrepareParams() const {
|
||||
return result;
|
||||
}
|
||||
|
||||
void MKLDNNGatherNode::prepareParams() {
|
||||
auto& dataMemPtr = getParentEdgeAt(GATHER_DATA)->getMemoryPtr();
|
||||
if (!dataMemPtr || !dataMemPtr->GetPrimitivePtr())
|
||||
THROW_ERROR << " has not allocated input data memory.";
|
||||
auto& idxMemPtr = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr();
|
||||
if (!idxMemPtr || !idxMemPtr->GetPrimitivePtr())
|
||||
THROW_ERROR << " has not allocated input indices memory.";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_ERROR << " has unidentified preferable primitive descriptor.";
|
||||
|
||||
if (!isAxisInputConst) {
|
||||
axis = (reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetPtr()))[0];
|
||||
if (axis < 0)
|
||||
axis += dataSrcRank;
|
||||
if (axis < 0 || axis >= dataSrcRank || batchDims > axis)
|
||||
THROW_ERROR << "has incorrect input parameter axis value: " << axis;
|
||||
}
|
||||
|
||||
if (!isDataShapeStat || !isAxisInputConst) {
|
||||
const auto& dataDims = dataMemPtr->getStaticDims();
|
||||
axisDim = dataDims[axis];
|
||||
beforeBatchSize = std::accumulate(dataDims.begin(), dataDims.begin() + batchDims, 1lu, std::multiplies<uint64_t>());
|
||||
betweenBatchAndAxisSize = std::accumulate(dataDims.begin() + batchDims, dataDims.begin() + axis, 1lu, std::multiplies<uint64_t>());
|
||||
afterAxisSize = std::accumulate(dataDims.begin() + axis + 1, dataDims.end(), 1lu, std::multiplies<uint64_t>());
|
||||
|
||||
afterAxisSizeInBytes = afterAxisSize * dataTypeSize;
|
||||
axisAndAfterAxisSizeInBytes = axisDim * afterAxisSizeInBytes;
|
||||
srcAfterBatchSizeInBytes = betweenBatchAndAxisSize * axisAndAfterAxisSizeInBytes;
|
||||
|
||||
if (isIdxShapeStat) {
|
||||
specIdxAndAfterAxSizeB = specIndicesSize * afterAxisSizeInBytes;
|
||||
totalWork = beforeBatchSize * betweenBatchAndAxisSize * specIndicesSize * afterAxisSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isIdxShapeStat) {
|
||||
const auto& idxDims = idxMemPtr->getStaticDims();
|
||||
specIndicesSize = std::accumulate(idxDims.begin() + batchDims, idxDims.end(), 1lu, std::multiplies<uint64_t>());
|
||||
|
||||
specIdxAndAfterAxSizeB = specIndicesSize * afterAxisSizeInBytes;
|
||||
totalWork = beforeBatchSize * betweenBatchAndAxisSize * specIndicesSize * afterAxisSize;
|
||||
}
|
||||
|
||||
const auto& selectedPD = getSelectedPrimitiveDescriptor();
|
||||
if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) {
|
||||
if (x64::mayiuse(x64::avx512_common)) {
|
||||
selectedPD->setImplementationType(jit_avx512);
|
||||
} else if (x64::mayiuse(x64::avx2)) {
|
||||
selectedPD->setImplementationType(jit_avx2);
|
||||
}
|
||||
} else {
|
||||
selectedPD->setImplementationType(ref_any);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGatherNode::execute(mkldnn::stream strm) {
|
||||
const int32_t* srcIndexes = reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_INDEXES)->getMemoryPtr()->GetPtr());
|
||||
if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) {
|
||||
const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr();
|
||||
const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr();
|
||||
uint8_t* dstData = reinterpret_cast<uint8_t*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
|
||||
const uint64_t dataElPerVec = jitKernel->getDataElPerVec();
|
||||
|
||||
auto threadBody = [&](const int ithr, const int nthr) {
|
||||
auto& p = execParamsPerThread[ithr];
|
||||
auto arg = gatherJitExecArgs();
|
||||
|
||||
arg.src = srcData;
|
||||
arg.dst = dstData + p.dstStart * dataTypeSize;
|
||||
arg.indices = srcIndices;
|
||||
arg.start = &p.dstStart;
|
||||
arg.axisDim = &axisDim;
|
||||
arg.afterAxSize = afterAxisSize;
|
||||
arg.axisAndAfterAxisSizeB = &axisAndAfterAxisSizeInBytes;
|
||||
arg.srcAfterBatchSizeB = &srcAfterBatchSizeInBytes;
|
||||
arg.betweenBatchAndAxisSize = &betweenBatchAndAxisSize;
|
||||
arg.specIndicesSize = &specIndicesSize;
|
||||
arg.workAmount = p.workAmount;
|
||||
arg.specIdxB = p.specIdxInBytes.data();
|
||||
arg.idxBatchSumB = p.idxBatchSumInBytes.data();
|
||||
arg.dataBeforeAxisSumB = p.dataBeforeAxisSumInBytes.data();
|
||||
arg.betweenBatchAndAxisIter = p.betweenBatchAndAxisIter;
|
||||
|
||||
const uint64_t idxElPerVec = jitKernel->getIdxElPerVec();
|
||||
|
||||
if (afterAxisSize == 1 && specIndicesSize < idxElPerVec) { // Elementwise short case.
|
||||
arg.permIdxMask = p.permIdxMask.data();
|
||||
arg.beforeAxisDiff = p.srcBeforeAxisDiff.data();
|
||||
} else if (afterAxisSize > 1 && afterAxisSize <= dataElPerVec) { // Blocked short case.
|
||||
arg.afterAxIdxB = p.afterAxIdxInBytes.data();
|
||||
arg.specIdxDiff = p.specIdxDiff.data();
|
||||
arg.beforeAxisDiff = p.srcBeforeAxisDiff.data();
|
||||
arg.beforeAxisPermMask = p.beforeAxPermMask.data();
|
||||
arg.afterAxisPermMask = p.afterAxPermMask.data();
|
||||
arg.afterAxisSize = &afterAxisSize;
|
||||
arg.specIdxAndAfterAxIterB = p.specIdxAndAfterAxIterB;
|
||||
arg.specIdxAndAfterAxSizeB = specIdxAndAfterAxSizeB;
|
||||
}
|
||||
|
||||
(*jitKernel)(&arg);
|
||||
};
|
||||
|
||||
parallel_nt(0, threadBody);
|
||||
} else {
|
||||
execReference();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGatherNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) {
|
||||
const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr();
|
||||
const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr();
|
||||
uint8_t* dstData = reinterpret_cast<uint8_t*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
|
||||
const uint64_t dataElPerVec = jitKernel->getDataElPerVec();
|
||||
|
||||
auto threadBody = [&](const int ithr, const int nthr) {
|
||||
const uint64_t wpt = ((totalWork / dataElPerVec) / nthr + 1) * dataElPerVec;
|
||||
const uint64_t start = std::min(wpt * ithr, totalWork);
|
||||
const uint64_t end = std::min(wpt * (ithr + 1), totalWork);
|
||||
const uint64_t workAmount = end - start;
|
||||
|
||||
auto arg = gatherJitExecArgs();
|
||||
|
||||
arg.src = srcData;
|
||||
arg.dst = dstData + afterAxisSizeInBytes * start;
|
||||
arg.indices = srcIndices;
|
||||
arg.start = &start;
|
||||
arg.axisDim = &axisDim;
|
||||
arg.afterAxSize = afterAxisSize;
|
||||
arg.axisAndAfterAxisSizeB = &axisAndAfterAxisSizeInBytes;
|
||||
arg.srcAfterBatchSizeB = &srcAfterBatchSizeInBytes;
|
||||
arg.betweenBatchAndAxisSize = &betweenBatchAndAxisSize;
|
||||
arg.specIndicesSize = &specIndicesSize;
|
||||
arg.workAmount = workAmount;
|
||||
|
||||
const uint64_t idxElPerVec = jitKernel->getIdxElPerVec();
|
||||
int permIdxMask[16];
|
||||
int beforeAxisDiff[16];
|
||||
if (afterAxisSize == 1 && specIndicesSize < idxElPerVec) {
|
||||
permIdxMask[0] = idxElPerVec - specIndicesSize;
|
||||
int div = idxElPerVec / specIndicesSize;
|
||||
int remainder = idxElPerVec % specIndicesSize;
|
||||
for (int i = 1; i < idxElPerVec; i++) {
|
||||
permIdxMask[i] = permIdxMask[i - 1] + 1;
|
||||
if (permIdxMask[i] == idxElPerVec)
|
||||
permIdxMask[i] = idxElPerVec - specIndicesSize;
|
||||
}
|
||||
for (int i = 0; i < idxElPerVec; i++) {
|
||||
if (((start + i) % specIndicesSize) < (specIndicesSize - remainder))
|
||||
beforeAxisDiff[i] = axisDim * div;
|
||||
else
|
||||
beforeAxisDiff[i] = axisDim * (div + 1);
|
||||
}
|
||||
arg.permIdxMask = permIdxMask;
|
||||
arg.beforeAxisDiff = beforeAxisDiff;
|
||||
}
|
||||
|
||||
(*jitKernel)(&arg);
|
||||
};
|
||||
|
||||
parallel_nt(0, threadBody);
|
||||
} else {
|
||||
execReference();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGatherNode::initShortParams(threadExecParams& p, const uint64_t start) {
|
||||
if (!jitKernel)
|
||||
THROW_ERROR << "has uninitialized kernel in function initShortParams.";
|
||||
const uint64_t idxElPerVec = jitKernel->getIdxElPerVec();
|
||||
|
||||
if (afterAxisSize == 1) { // Elementwise gather.
|
||||
if (specIndicesSize >= idxElPerVec)
|
||||
return; // Is not a short case.
|
||||
|
||||
p.permIdxMask.resize(idxElPerVec);
|
||||
p.srcBeforeAxisDiff.resize(idxElPerVec);
|
||||
|
||||
p.permIdxMask[0] = idxElPerVec - specIndicesSize;
|
||||
for (int i = 1; i < idxElPerVec; i++) {
|
||||
p.permIdxMask[i] = p.permIdxMask[i - 1] + 1;
|
||||
if (p.permIdxMask[i] == idxElPerVec)
|
||||
p.permIdxMask[i] = idxElPerVec - specIndicesSize;
|
||||
}
|
||||
|
||||
const int div = idxElPerVec / specIndicesSize;
|
||||
const int remainder = idxElPerVec % specIndicesSize;
|
||||
for (uint64_t i = 0; i < idxElPerVec; i++) {
|
||||
if (((start + i) % specIndicesSize) < (specIndicesSize - remainder)) {
|
||||
p.srcBeforeAxisDiff[i] = axisDim * div;
|
||||
} else {
|
||||
p.srcBeforeAxisDiff[i] = axisDim * (div + 1);
|
||||
}
|
||||
}
|
||||
} else { // Blocked gather.
|
||||
if (afterAxisSize > idxElPerVec)
|
||||
return; // Is not a short case.
|
||||
|
||||
p.afterAxIdxInBytes.resize(idxElPerVec);
|
||||
p.afterAxPermMask.resize(idxElPerVec);
|
||||
p.beforeAxPermMask.resize(idxElPerVec);
|
||||
p.specIdxDiff.resize(idxElPerVec);
|
||||
p.srcBeforeAxisDiff.resize(idxElPerVec);
|
||||
|
||||
int secondStart = start + idxElPerVec;
|
||||
for (int i = 0; i < idxElPerVec; i++) {
|
||||
p.afterAxIdxInBytes[i] = (start + i) % afterAxisSize;
|
||||
p.specIdxDiff[i] = (((secondStart + i) / afterAxisSize) % specIndicesSize) * idxTypeSize - p.specIdxInBytes[i];
|
||||
if (p.specIdxDiff[i] < 0)
|
||||
p.specIdxDiff[i] += specIndicesSize * idxTypeSize;
|
||||
p.srcBeforeAxisDiff[i] = ((start + i + idxElPerVec) / (specIndicesSize * afterAxisSize)) * axisAndAfterAxisSizeInBytes -
|
||||
((start + i) / (specIndicesSize * afterAxisSize)) * axisAndAfterAxisSizeInBytes;
|
||||
|
||||
p.afterAxIdxInBytes[i] *= dataTypeSize;
|
||||
p.afterAxPermMask[i] = idxElPerVec - afterAxisSize + i;
|
||||
for (size_t j = 0lu; j < 6lu; j++) {
|
||||
if (p.afterAxPermMask[i] >= idxElPerVec)
|
||||
p.afterAxPermMask[i] -= afterAxisSize;
|
||||
}
|
||||
}
|
||||
if (specIndicesSize * afterAxisSize < idxElPerVec) {
|
||||
p.beforeAxPermMask[0] = idxElPerVec - specIndicesSize * afterAxisSize;
|
||||
for (int i = 1; i < idxElPerVec; i++) {
|
||||
p.beforeAxPermMask[i] = p.beforeAxPermMask[i - 1] + 1;
|
||||
if (p.beforeAxPermMask[i] == idxElPerVec)
|
||||
p.beforeAxPermMask[i] = idxElPerVec - specIndicesSize * afterAxisSize;
|
||||
}
|
||||
}
|
||||
|
||||
p.specIdxAndAfterAxIterB = (start * dataTypeSize) % specIdxAndAfterAxSizeB;
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGatherNode::execReference() {
|
||||
const int32_t* srcIndices = reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr());
|
||||
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr());
|
||||
uint8_t* dstData = reinterpret_cast<uint8_t*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
|
||||
parallel_for2d(batchSize, idxBatchStride, [&](const size_t i, const size_t j) {
|
||||
const unsigned int idx = static_cast<uint32_t>(srcIndexes[i * idxBatchStride + j]);
|
||||
const size_t dstIdxAndAfterAxisSize = afterAxisSizeInBytes * specIndicesSize;
|
||||
const size_t dstAfterBatchSize = betweenBatchAndAxisSize * dstIdxAndAfterAxisSize;
|
||||
parallel_for2d(beforeBatchSize, specIndicesSize, [&](const size_t b, const size_t j) {
|
||||
int ii = srcIndices[b * specIndicesSize + j];
|
||||
if (ii < 0) {
|
||||
if (reverseIndexing)
|
||||
ii += axisDim;
|
||||
else
|
||||
ii = axisDim;
|
||||
}
|
||||
const size_t idx = ii;
|
||||
const size_t c2 = dstAfterBatchSize * b + afterAxisSizeInBytes * j;
|
||||
if (idx < axisDim) {
|
||||
size_t c1 = srcAfterBatchSizeInBytes * b + afterAxisSizeInBytes * idx;
|
||||
for (size_t i = 0; i < betweenBatchAndAxisSize; i++) {
|
||||
size_t srcIdx = c1 + axisAndAfterAxisSizeInBytes * i;
|
||||
size_t dstIdx = c2 + dstIdxAndAfterAxisSize * i;
|
||||
|
||||
// while negative indices are not supported, should set zero
|
||||
if (idx < indexRange) {
|
||||
for (size_t k = 0; k < outerSize; ++k) {
|
||||
const size_t srcStride = (i * srcBatchStride + k * dataLength * indexRange) * dataSize;
|
||||
const size_t dstStride = (i * dstBatchStride + k * dataLength * idxBatchStride) * dataSize;
|
||||
|
||||
cpu_memcpy(&dstData[dstStride + j * len], &srcData[srcStride + idx * len], len);
|
||||
cpu_memcpy(&dstData[dstIdx], &srcData[srcIdx], afterAxisSizeInBytes);
|
||||
}
|
||||
} else {
|
||||
for (size_t k = 0; k < outerSize; ++k) {
|
||||
memset(&dstData[(i * dstBatchStride + k * dataLength * idxBatchStride) * dataSize + j * len], 0, len);
|
||||
for (size_t i = 0; i < betweenBatchAndAxisSize; i++) {
|
||||
memset(&dstData[c2 + dstIdxAndAfterAxisSize * i], 0, afterAxisSizeInBytes);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNGatherNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
std::vector<VectorDims> MKLDNNGatherNode::shapeInfer() const {
|
||||
return MKLDNNNode::shapeInferGeneric(PortMask(1, 2, 3));
|
||||
}
|
||||
|
||||
bool MKLDNNGatherNode::created() const {
|
||||
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <mkldnn_node.h>
|
||||
#include "kernels/gather_uni_kernel.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
@ -18,37 +19,71 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
struct threadExecParams {
|
||||
std::vector<int> specIdxInBytes;
|
||||
std::vector<int> permIdxMask;
|
||||
std::vector<int> srcBeforeAxisDiff;
|
||||
std::vector<int> idxBatchSumInBytes;
|
||||
std::vector<int> dataBeforeAxisSumInBytes;
|
||||
|
||||
std::vector<int> afterAxIdxInBytes;
|
||||
std::vector<int> specIdxDiff;
|
||||
std::vector<int> beforeAxPermMask;
|
||||
std::vector<int> afterAxPermMask;
|
||||
int betweenBatchAndAxisIter = 0;
|
||||
int specIdxAndAfterAxIterB = 0;
|
||||
|
||||
uint64_t workAmount = 0;
|
||||
uint64_t dstStart = 0;
|
||||
};
|
||||
|
||||
protected:
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
bool needPrepareParams() const override;
|
||||
void prepareParams() override;
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
|
||||
private:
|
||||
int axis = 0;
|
||||
int batchDims = 0;
|
||||
void initShortParams(threadExecParams& p, uint64_t start);
|
||||
void execReference();
|
||||
|
||||
size_t indexRange = 0;
|
||||
size_t batchSize = 1;
|
||||
size_t outerSize = 1;
|
||||
size_t dataLength = 1;
|
||||
size_t srcBatchStride = 1;
|
||||
size_t idxBatchStride = 1;
|
||||
size_t dstBatchStride = 1;
|
||||
size_t dataSize = 1;
|
||||
size_t len = 1;
|
||||
int dataSrcRank = 1;
|
||||
bool isDataShapeStat = false;
|
||||
bool isIdxShapeStat = false;
|
||||
bool isAxisInputConst = false;
|
||||
|
||||
bool reverseIndexing = false;
|
||||
|
||||
uint64_t dataTypeSize = 1lu;
|
||||
static constexpr uint64_t idxTypeSize = sizeof(int);
|
||||
|
||||
int axis = 0;
|
||||
int axisDim;
|
||||
int batchDims = 0;
|
||||
int dataSrcRank = 1;
|
||||
uint64_t specIndicesSize;
|
||||
uint64_t beforeBatchSize;
|
||||
uint64_t beforeAxisSize;
|
||||
uint64_t betweenBatchAndAxisSize;
|
||||
uint64_t afterAxisSize = 0lu;
|
||||
uint64_t afterAxisSizeInBytes = 0lu;
|
||||
uint64_t axisAndAfterAxisSizeInBytes = 0lu;
|
||||
uint64_t srcAfterBatchSizeInBytes = 0lu;
|
||||
uint64_t specIdxAndAfterAxSizeB = 0lu;
|
||||
uint64_t totalWork;
|
||||
|
||||
std::vector<threadExecParams> execParamsPerThread;
|
||||
|
||||
static constexpr size_t GATHER_DATA = 0;
|
||||
static constexpr size_t GATHER_INDEXES = 1;
|
||||
static constexpr size_t GATHER_INDICES = 1;
|
||||
static constexpr size_t GATHER_AXIS = 2;
|
||||
|
||||
std::string errorPrefix;
|
||||
std::shared_ptr<jitGatherKernelBase> jitKernel;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -100,6 +100,8 @@ const std::vector<std::vector<size_t>> inputShapes4D = {
|
||||
const std::vector<std::vector<size_t>> indicesShapes_BD0 = {
|
||||
std::vector<size_t>{4},
|
||||
std::vector<size_t>{2, 2},
|
||||
std::vector<size_t>{3, 3},
|
||||
std::vector<size_t>{5, 2},
|
||||
std::vector<size_t>{3, 2, 4},
|
||||
};
|
||||
|
||||
@ -122,7 +124,8 @@ const auto gather7ParamsSubset_BD0 = testing::Combine(
|
||||
testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Gather7_BD0, Gather7LayerTest, gather7ParamsSubset_BD0, Gather7LayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BD0, Gather7LayerTest, gather7ParamsSubset_BD0, Gather7LayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BD0, Gather8LayerTest, gather7ParamsSubset_BD0, Gather8LayerTest::getTestCaseName);
|
||||
|
||||
const std::vector<std::vector<size_t>> indicesShapes_BD1 = {
|
||||
std::vector<size_t>{4, 2},
|
||||
@ -205,4 +208,78 @@ const auto gather7ParamsSubset_NegativeBD = testing::Combine(
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Gather7_NegativeBD, Gather7LayerTest, gather7ParamsSubset_NegativeBD, Gather7LayerTest::getTestCaseName);
|
||||
|
||||
|
||||
///// GATHER-8 /////
|
||||
|
||||
const std::vector<std::vector<size_t>> dataShapes4DGather8 = {
|
||||
{10, 3, 1, 2},
|
||||
{10, 3, 3, 1},
|
||||
{10, 2, 2, 7},
|
||||
{10, 2, 2, 2},
|
||||
{10, 3, 4, 4},
|
||||
{10, 2, 3, 17}
|
||||
};
|
||||
const std::vector<std::vector<size_t>> idxShapes4DGather8 = {
|
||||
{10, 1, 1},
|
||||
{10, 1, 2},
|
||||
{10, 1, 3},
|
||||
{10, 2, 2},
|
||||
{10, 1, 7},
|
||||
{10, 2, 4},
|
||||
{10, 3, 3},
|
||||
{10, 3, 5},
|
||||
{10, 7, 3},
|
||||
{10, 8, 7}
|
||||
};
|
||||
const std::vector<std::tuple<int, int>> axesBatches4DGather8 = {
|
||||
{3, 0},
|
||||
{-1, -2},
|
||||
{2, -3},
|
||||
{2, 1},
|
||||
{1, 0},
|
||||
{1, 1},
|
||||
{0, 0}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_static_4D, Gather8LayerTest,
|
||||
testing::Combine(
|
||||
testing::ValuesIn(dataShapes4DGather8),
|
||||
testing::ValuesIn(idxShapes4DGather8),
|
||||
testing::ValuesIn(axesBatches4DGather8),
|
||||
testing::ValuesIn(netPrecisions),
|
||||
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
testing::Values(InferenceEngine::Layout::ANY),
|
||||
testing::Values(InferenceEngine::Layout::ANY),
|
||||
testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Gather8LayerTest::getTestCaseName);
|
||||
|
||||
const auto gatherParamsVec2 = testing::Combine(
|
||||
testing::ValuesIn(std::vector<std::vector<size_t>>({{5, 4}, {11, 4}, {23, 4}, {35, 4}, {51, 4}, {71, 4}})),
|
||||
testing::ValuesIn(std::vector<std::vector<size_t>>({{1}})),
|
||||
testing::ValuesIn(std::vector<std::tuple<int, int>>{std::tuple<int, int>{1, 0}}),
|
||||
testing::ValuesIn(netPrecisions),
|
||||
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
testing::Values(InferenceEngine::Layout::ANY),
|
||||
testing::Values(InferenceEngine::Layout::ANY),
|
||||
testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Vec2, Gather8LayerTest, gatherParamsVec2, Gather8LayerTest::getTestCaseName);
|
||||
|
||||
const auto gatherParamsVec3 = testing::Combine(
|
||||
testing::ValuesIn(std::vector<std::vector<size_t>>({{4, 4}})),
|
||||
testing::ValuesIn(std::vector<std::vector<size_t>>({{5}, {11}, {21}, {35}, {55}, {70}})),
|
||||
testing::ValuesIn(std::vector<std::tuple<int, int>>{std::tuple<int, int>{1, 0}}),
|
||||
testing::ValuesIn(netPrecisions),
|
||||
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
testing::Values(InferenceEngine::Layout::ANY),
|
||||
testing::Values(InferenceEngine::Layout::ANY),
|
||||
testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Vec3, Gather8LayerTest, gatherParamsVec3, Gather8LayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
@ -69,8 +69,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
|
||||
// TODO: 57562 No dynamic output shape support
|
||||
R"(.*NonZeroLayerTest.*)",
|
||||
// TODO: 69084 Not constant Axis input produces dynamic output shape.
|
||||
R"(.*GatherLayerTestCPU.*constAx=False.*)",
|
||||
// TODO: 74961. Enforce precision via inType and outType does not work properly.
|
||||
R"(.*(RNN|GRU|LSTM).*ENFORCE_BF16=YES.*)",
|
||||
// Not expected behavior
|
||||
|
@ -1,345 +1,548 @@
|
||||
//// Copyright (C) 2018-2022 Intel Corporation
|
||||
//// SPDX-License-Identifier: Apache-2.0
|
||||
////
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
//#include <shared_test_classes/single_layer/gather.hpp>
|
||||
//#include "ngraph_functions/builders.hpp"
|
||||
//#include "test_utils/cpu_test_utils.hpp"
|
||||
//
|
||||
//using namespace InferenceEngine;
|
||||
//using namespace CPUTestUtils;
|
||||
//
|
||||
//namespace CPULayerTestsDefinitions {
|
||||
//
|
||||
//using inputShapesPair = std::pair<std::vector<ov::PartialShape>, std::vector<std::vector<ov::Shape>>>;
|
||||
//
|
||||
//typedef std::tuple<
|
||||
// inputShapesPair, // Input shapes
|
||||
// int64_t, // Axis
|
||||
// int64_t, // Batch dims
|
||||
// InferenceEngine::Precision, // Network precision
|
||||
// bool, // Is axis input constant
|
||||
// std::string, // Device name
|
||||
// CPUSpecificParams // CPU specific params
|
||||
//> GatherLayerTestCPUParams;
|
||||
//
|
||||
//class GatherLayerTestCPU : public testing::WithParamInterface<GatherLayerTestCPUParams>,
|
||||
// virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
|
||||
//public:
|
||||
// static std::string getTestCaseName(testing::TestParamInfo<GatherLayerTestCPUParams> obj) {
|
||||
// inputShapesPair inputShapes;
|
||||
// int axis, batchDims;
|
||||
// Precision netPrecision;
|
||||
// std::string targetDevice;
|
||||
// bool isAxisConstant;
|
||||
// CPUSpecificParams cpuParams;
|
||||
// std::tie(inputShapes, axis, batchDims, netPrecision, isAxisConstant, targetDevice, cpuParams) = obj.param;
|
||||
//
|
||||
// std::ostringstream result;
|
||||
// result << "DynShapes=" << CommonTestUtils::partialShape2str(inputShapes.first) << "_";
|
||||
// result << "StatShapes=" << CommonTestUtils::vec2str(inputShapes.second) << "_";
|
||||
// result << "axis=" << axis << "_";
|
||||
// result << "batchDims=" << batchDims << "_";
|
||||
// result << "netPrc=" << netPrecision.name() << "_";
|
||||
// result << "constAx=" << (isAxisConstant ? "True" : "False") << "_";
|
||||
// result << "trgDev=" << targetDevice;
|
||||
// result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
//
|
||||
// return result.str();
|
||||
// }
|
||||
//
|
||||
//protected:
|
||||
// void SetUp() override {
|
||||
// inputShapesPair inputShapes;
|
||||
// int64_t batchDims;
|
||||
// Precision netPrecision;
|
||||
// CPUSpecificParams cpuParams;
|
||||
// bool isAxisConstant = true;
|
||||
// std::tie(inputShapes, axis, batchDims, netPrecision, isAxisConstant, targetDevice, cpuParams) = this->GetParam();
|
||||
//
|
||||
// std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
//
|
||||
// selectedType = std::string("ref_any_") + netPrecision.name();
|
||||
//
|
||||
// targetStaticShapes.reserve(inputShapes.second.size());
|
||||
// inputDynamicShapes.reserve(inputShapes.first.size());
|
||||
// for (int i = 0; i < (isAxisConstant ? 2 : 3); i++) {
|
||||
// if (inputShapes.second.size() > i)
|
||||
// targetStaticShapes.push_back({inputShapes.second[i]});
|
||||
// if (inputShapes.first.size() > i)
|
||||
// inputDynamicShapes.push_back(inputShapes.first[i]);
|
||||
// }
|
||||
// const ov::Shape& inputDataShape = targetStaticShapes.front().front(), indicesShape = targetStaticShapes.front()[1];
|
||||
// dataSrcRank = inputDataShape.size();
|
||||
//
|
||||
// const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
// ov::ParameterVector functionParams {
|
||||
// ngraph::builder::makeParams(ngPrc, { {"data", inputDataShape} })[0],
|
||||
// ngraph::builder::makeParams(ov::element::i32, { {"indices", indicesShape} })[0]
|
||||
// };
|
||||
// if (!isAxisConstant) {
|
||||
// functionParams.push_back(ngraph::builder::makeParams(ov::element::i32, { {"axis", {1}} })[0]);
|
||||
// }
|
||||
// auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(functionParams));
|
||||
// std::shared_ptr<ov::Node> gatherNode;
|
||||
// if (isAxisConstant) {
|
||||
// gatherNode = std::make_shared<ov::op::v8::Gather>(paramOuts[0], paramOuts[1],
|
||||
// ov::op::v0::Constant::create(ov::element::i64, ov::Shape({}), { axis }), batchDims);
|
||||
// } else {
|
||||
// gatherNode = std::make_shared<ov::op::v8::Gather>(paramOuts[0], paramOuts[1], paramOuts[2], batchDims);
|
||||
// }
|
||||
//
|
||||
// ov::ResultVector results{ std::make_shared<ov::op::v0::Result>(gatherNode) };
|
||||
// function = std::make_shared<ov::Model>(results, functionParams, "Gather");
|
||||
// }
|
||||
//
|
||||
// InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &inputInfo) const override {
|
||||
// if (inputInfo.name() == "indices") {
|
||||
// const auto& td = inputInfo.getTensorDesc();
|
||||
// size_t normAxis = axis < 0 ? axis + dataSrcRank : axis;
|
||||
// const auto axDim = targetStaticShapes[index][0][normAxis];
|
||||
// if (axDim == 1) {
|
||||
// // Random generator cannot generate values in range [0; 0]
|
||||
// int values[1] = { 0 };
|
||||
// return FuncTestUtils::createAndFillBlobWithFloatArray<int32_t>(td, values, 1);
|
||||
// } else {
|
||||
// return FuncTestUtils::createAndFillBlob(td, axDim - 1, 0);
|
||||
// }
|
||||
// } else if (inputInfo.name() == "axis") {
|
||||
// int values[1] = { static_cast<int32_t>(axis) };
|
||||
// return FuncTestUtils::createAndFillBlobWithFloatArray<int32_t>(inputInfo.getTensorDesc(), values, 1);
|
||||
// } else {
|
||||
// return LayerTestsCommon::GenerateInput(inputInfo);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// int64_t axis = 0;
|
||||
// int64_t dataSrcRank = 0;
|
||||
//};
|
||||
//
|
||||
//TEST_P(GatherLayerTestCPU, CompareWithRefs) {
|
||||
// SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
//
|
||||
// Run();
|
||||
// CheckPluginRelatedResults(executableNetwork, "Gather");
|
||||
//}
|
||||
//
|
||||
//namespace {
|
||||
//const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
// InferenceEngine::Precision::FP32,
|
||||
// InferenceEngine::Precision::BF16,
|
||||
// InferenceEngine::Precision::I8
|
||||
//};
|
||||
//
|
||||
//// 1D
|
||||
//const std::vector<inputShapesPair> staticInputShapes1D = {
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{4}, {2, 3, 4}}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{4}, {1}}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{4}, {9}}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{5}, {5}}
|
||||
// }
|
||||
// }
|
||||
//};
|
||||
//const std::vector<inputShapesPair> dynamicInputShapes1D = {
|
||||
// {
|
||||
// { // Origin dynamic shapes
|
||||
// {ov::Dimension(4, 6)}, {ov::Dimension(1, 10)}, {ov::Dimension(1, 2)}
|
||||
// },
|
||||
// { // Dynamic shapes instances
|
||||
// {{4}, {1}, {1}},
|
||||
// {{4}, {9}, {1}},
|
||||
// {{5}, {5}, {1}}
|
||||
// }
|
||||
// }
|
||||
//};
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_StaticShape1D, GatherLayerTestCPU,
|
||||
// ::testing::Combine(
|
||||
// ::testing::ValuesIn(staticInputShapes1D),
|
||||
// ::testing::Values(0),
|
||||
// ::testing::Values(0),
|
||||
// ::testing::ValuesIn(netPrecisions),
|
||||
// ::testing::Values(true),
|
||||
// ::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
// ::testing::Values(CPUSpecificParams{})),
|
||||
// GatherLayerTestCPU::getTestCaseName);
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_DynamicShape1D, GatherLayerTestCPU,
|
||||
// ::testing::Combine(
|
||||
// ::testing::ValuesIn(dynamicInputShapes1D),
|
||||
// ::testing::Values(0),
|
||||
// ::testing::Values(0),
|
||||
// ::testing::ValuesIn(netPrecisions),
|
||||
// ::testing::Values(true, false),
|
||||
// ::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
// ::testing::Values(CPUSpecificParams{})),
|
||||
// GatherLayerTestCPU::getTestCaseName);
|
||||
//
|
||||
//// 2D
|
||||
//const std::vector<inputShapesPair> staticInputShapes2D = {
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{4, 7}, {4, 55}}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{4, 17}, {4, 17}}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{4, 55}, {4, 7}}
|
||||
// }
|
||||
// }
|
||||
//};
|
||||
//const std::vector<inputShapesPair> dynamicInputShapes2D = {
|
||||
// {
|
||||
// { // Origin dynamic shapes
|
||||
// {4, ov::Dimension(3, 99)},
|
||||
// {4, ov::Dimension(3, 99)},
|
||||
// {1}
|
||||
// },
|
||||
// { // Dynamic shapes instances
|
||||
// {{4, 7}, {4, 55}, {1}},
|
||||
// {{4, 55}, {4, 7}, {1}},
|
||||
// {{4, 17}, {4, 17}, {1}}
|
||||
// }
|
||||
// }
|
||||
//};
|
||||
//const std::vector<inputShapesPair> dynamicInputShapes2Dv2 = {
|
||||
// {
|
||||
// { // Origin dynamic shapes
|
||||
// {ov::Dimension(3, 99), ov::Dimension(3, 99)},
|
||||
// {-1, ov::Dimension(3, 99)},
|
||||
// {1}
|
||||
// },
|
||||
// { // Dynamic shapes instances
|
||||
// {{4, 7}, {4, 55}, {1}},
|
||||
// {{8, 55}, {5, 7}, {1}}
|
||||
// }
|
||||
// }
|
||||
//};
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_StaticShape2D, GatherLayerTestCPU,
|
||||
// ::testing::Combine(
|
||||
// ::testing::ValuesIn(staticInputShapes2D),
|
||||
// ::testing::Values(1),
|
||||
// ::testing::ValuesIn(std::vector<int64_t>{0, 1}),
|
||||
// ::testing::ValuesIn(netPrecisions),
|
||||
// ::testing::Values(true),
|
||||
// ::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
// ::testing::Values(CPUSpecificParams{})),
|
||||
// GatherLayerTestCPU::getTestCaseName);
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_DynamicShape2D, GatherLayerTestCPU,
|
||||
// ::testing::Combine(
|
||||
// ::testing::ValuesIn(dynamicInputShapes2D),
|
||||
// ::testing::Values(1),
|
||||
// ::testing::ValuesIn(std::vector<int64_t>{0, 1}),
|
||||
// ::testing::ValuesIn(netPrecisions),
|
||||
// ::testing::Values(true, false),
|
||||
// ::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
// ::testing::Values(CPUSpecificParams{})),
|
||||
// GatherLayerTestCPU::getTestCaseName);
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_DynamicShape2Dv2, GatherLayerTestCPU,
|
||||
// ::testing::Combine(
|
||||
// ::testing::ValuesIn(dynamicInputShapes2Dv2),
|
||||
// ::testing::Values(0),
|
||||
// ::testing::Values(0),
|
||||
// ::testing::ValuesIn(netPrecisions),
|
||||
// ::testing::Values(true, false),
|
||||
// ::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
// ::testing::Values(CPUSpecificParams{})),
|
||||
// GatherLayerTestCPU::getTestCaseName);
|
||||
//
|
||||
//// 4D
|
||||
//const std::vector<inputShapesPair> staticInputShapes4D = {
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{4, 5, 6, 7}, {2, 5, 1}}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{10, 5, 6, 7}, {2, 5, 2}}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// {},
|
||||
// { // Static shapes
|
||||
// {{16, 5, 6, 7}, {3, 5, 3}}
|
||||
// }
|
||||
// }
|
||||
//};
|
||||
//const std::vector<inputShapesPair> dynamicInputShapes4D = {
|
||||
// {
|
||||
// { // Origin dynamic shapes
|
||||
// {ov::Dimension(4, 20), 5, 6, 7},
|
||||
// {ov::Dimension(2, 4), 5, ov::Dimension(1, 4)},
|
||||
// {1}
|
||||
// },
|
||||
// { // Dynamic shapes instances
|
||||
// {{4, 5, 6, 7}, {2, 5, 1}, {1}},
|
||||
// {{10, 5, 6, 7}, {2, 5, 2}, {1}},
|
||||
// {{16, 5, 6, 7}, {3, 5, 3}, {1}}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// { // Origin dynamic shapes
|
||||
// {-1, -1, -1, -1}, {-1, -1, -1}, {1}
|
||||
// },
|
||||
// { // Dynamic shapes instances
|
||||
// {{4, 5, 6, 4}, {2, 5, 16}, {1}},
|
||||
// {{10, 5, 6, 8}, {2, 5, 24}, {1}}
|
||||
// }
|
||||
// }
|
||||
//};
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_StaticShape4D, GatherLayerTestCPU,
|
||||
// ::testing::Combine(
|
||||
// ::testing::ValuesIn(staticInputShapes4D),
|
||||
// ::testing::ValuesIn(std::vector<int64_t>{0, 1, 2, -1}),
|
||||
// ::testing::Values(0),
|
||||
// ::testing::ValuesIn(netPrecisions),
|
||||
// ::testing::Values(true),
|
||||
// ::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
// ::testing::Values(CPUSpecificParams{})),
|
||||
// GatherLayerTestCPU::getTestCaseName);
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_DynamicShape4D, GatherLayerTestCPU,
|
||||
// ::testing::Combine(
|
||||
// ::testing::ValuesIn(dynamicInputShapes4D),
|
||||
// ::testing::ValuesIn(std::vector<int64_t>{0, 1, 2, -1}),
|
||||
// ::testing::Values(0),
|
||||
// ::testing::ValuesIn(netPrecisions),
|
||||
// ::testing::Values(true, false),
|
||||
// ::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
// ::testing::Values(CPUSpecificParams{})),
|
||||
// GatherLayerTestCPU::getTestCaseName);
|
||||
//} // namespace
|
||||
//} // namespace CPULayerTestsDefinitions
|
||||
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "functional_test_utils/ov_tensor_utils.hpp"
|
||||
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
std::vector<InputShape>, // Input shapes
|
||||
std::tuple<int, int>, // Axis and Batch dim
|
||||
ElementType, // Network precision
|
||||
bool, // Is const Axis
|
||||
CPUSpecificParams, // CPU specific params
|
||||
std::map<std::string, std::string> // Additional config
|
||||
> GatherLayerTestCPUParams;
|
||||
|
||||
class GatherLayerTestCPU : public testing::WithParamInterface<GatherLayerTestCPUParams>,
|
||||
virtual public ov::test::SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<GatherLayerTestCPUParams> obj) {
|
||||
std::vector<InputShape> inputShapes;
|
||||
std::tuple<int, int> axisAndBatchDims;
|
||||
ElementType netPrecision;
|
||||
bool isAxisConstant;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
|
||||
std::tie(inputShapes, axisAndBatchDims, netPrecision, isAxisConstant, cpuParams, additionalConfig) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=(";
|
||||
for (size_t i = 0lu; i < inputShapes.size(); i++) {
|
||||
result << CommonTestUtils::partialShape2str({inputShapes[i].first}) << (i < inputShapes.size() - 1lu ? "_" : "");
|
||||
}
|
||||
result << ")_TS=";
|
||||
for (size_t i = 0lu; i < inputShapes.front().second.size(); i++) {
|
||||
result << "{";
|
||||
for (size_t j = 0lu; j < inputShapes.size(); j++) {
|
||||
result << CommonTestUtils::vec2str(inputShapes[j].second[i]) << (j < inputShapes.size() - 1lu ? "_" : "");
|
||||
}
|
||||
result << "}_";
|
||||
}
|
||||
result << "axis=" << std::get<0>(axisAndBatchDims) << "_";
|
||||
result << "batchDims=" << std::get<1>(axisAndBatchDims) << "_";
|
||||
result << "netPrc=" << netPrecision << "_";
|
||||
result << "constAx=" << (isAxisConstant ? "True" : "False") << "_";
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
|
||||
if (!additionalConfig.empty()) {
|
||||
result << "_PluginConf";
|
||||
for (auto &item : additionalConfig) {
|
||||
if (item.second == InferenceEngine::PluginConfigParams::YES)
|
||||
result << "_" << item.first << "=" << item.second;
|
||||
}
|
||||
}
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
std::vector<InputShape> inputShapes;
|
||||
std::tuple<int, int> axisAndBatchDims;
|
||||
ElementType netPrecision;
|
||||
bool isAxisConstant;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
const ElementType intInputsPrecision = ElementType::i64;
|
||||
|
||||
std::tie(inputShapes, axisAndBatchDims, netPrecision, isAxisConstant, cpuParams, additionalConfig) = this->GetParam();
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
axis = std::get<0>(axisAndBatchDims);
|
||||
const int batchDims = std::get<1>(axisAndBatchDims);
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
init_input_shapes(inputShapes);
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
|
||||
if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) {
|
||||
selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16);
|
||||
} else {
|
||||
selectedType = makeSelectedTypeStr(selectedType, netPrecision);
|
||||
}
|
||||
|
||||
if (!isAxisConstant) {
|
||||
inputDynamicShapes.push_back({1});
|
||||
for (size_t i = 0lu; i < targetStaticShapes.size(); i++) {
|
||||
targetStaticShapes[i].push_back({1});
|
||||
}
|
||||
}
|
||||
|
||||
ngraph::ParameterVector params {
|
||||
std::make_shared<ov::op::v0::Parameter>(netPrecision, inputDynamicShapes[0]),
|
||||
std::make_shared<ov::op::v0::Parameter>(intInputsPrecision, inputDynamicShapes[1])
|
||||
};
|
||||
params[0]->set_friendly_name("data");
|
||||
params[1]->set_friendly_name("indices");
|
||||
if (!isAxisConstant) {
|
||||
params.push_back(std::make_shared<ov::op::v0::Parameter>(intInputsPrecision, inputDynamicShapes[2]));
|
||||
params[2]->set_friendly_name("axis");
|
||||
}
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
|
||||
std::shared_ptr<ov::Node> gatherNode;
|
||||
if (isAxisConstant) {
|
||||
gatherNode = std::make_shared<ov::op::v8::Gather>(paramOuts[0], paramOuts[1],
|
||||
ov::op::v0::Constant::create(intInputsPrecision, ov::Shape({1}), { axis }), batchDims);
|
||||
} else {
|
||||
gatherNode = std::make_shared<ov::op::v8::Gather>(paramOuts[0], paramOuts[1], paramOuts[2], batchDims);
|
||||
}
|
||||
|
||||
function = makeNgraphFunction(netPrecision, params, gatherNode, "GatherCPU");
|
||||
}
|
||||
|
||||
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
|
||||
const auto& funcInputs = function->inputs();
|
||||
inputs.clear();
|
||||
|
||||
const size_t normAxis = axis < 0 ? axis + targetInputStaticShapes[0].size() : axis;
|
||||
const int32_t axisDim = targetInputStaticShapes[0][normAxis];
|
||||
|
||||
for (int i = 0; i < funcInputs.size(); ++i) {
|
||||
const auto& funcInput = funcInputs[i];
|
||||
ov::runtime::Tensor tensor;
|
||||
|
||||
if (funcInput.get_node()->get_friendly_name() == "data") {
|
||||
const auto dataTypeSize = funcInput.get_element_type().size();
|
||||
const uint32_t range = dataTypeSize == 4 ? 0x7FFFFFFF : dataTypeSize == 2 ? 0xFFFF : 0xFF;
|
||||
tensor = ov::test::utils::create_and_fill_tensor(
|
||||
funcInput.get_element_type(), targetInputStaticShapes[0], range, 0, 1);
|
||||
} else if (funcInput.get_node()->get_friendly_name() == "indices") {
|
||||
tensor = ov::test::utils::create_and_fill_tensor(
|
||||
funcInput.get_element_type(), targetInputStaticShapes[1], axisDim * 2, -axisDim, 1);
|
||||
} else if (funcInput.get_node()->get_friendly_name() == "axis") {
|
||||
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), {1}, 1, axis, 1);
|
||||
}
|
||||
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
|
||||
}
|
||||
}
|
||||
|
||||
int64_t axis = 0;
|
||||
};
|
||||
|
||||
TEST_P(GatherLayerTestCPU, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
CheckPluginRelatedResults(executableNetwork, "Gather");
|
||||
}
|
||||
|
||||
namespace {
|
||||
const std::vector<ElementType> netPrecisions = {
|
||||
ElementType::f32,
|
||||
ElementType::bf16,
|
||||
ElementType::i8
|
||||
};
|
||||
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||
= {{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO}},
|
||||
{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES}}};
|
||||
|
||||
std::vector<bool> isAxisConst{true, false};
|
||||
const CPUSpecificParams cpuParamsRef{{}, {}, {"ref_any"}, "ref_any"};
|
||||
|
||||
std::vector<CPUSpecificParams> getCPUInfo() {
|
||||
std::vector<CPUSpecificParams> resCPUParams;
|
||||
if (InferenceEngine::with_cpu_x86_avx512f()) {
|
||||
resCPUParams.push_back(CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"});
|
||||
} else if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
resCPUParams.push_back(CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"});
|
||||
} else {
|
||||
resCPUParams.push_back(CPUSpecificParams{{}, {}, {"ref"}, "ref"});
|
||||
}
|
||||
return resCPUParams;
|
||||
}
|
||||
|
||||
///// 1D /////
|
||||
const std::vector<std::vector<ov::test::InputShape>> staticInputShapes1D = {
|
||||
{ { {}, { {1} } }, { {}, { {1} } } },
|
||||
{ { {}, { {2} } }, { {}, { {2} } } },
|
||||
{ { {}, { {3} } }, { {}, { {3} } } },
|
||||
{ { {}, { {4} } }, { {}, { {4} } } },
|
||||
{ { {}, { {5} } }, { {}, { {5} } } },
|
||||
{ { {}, { {6} } }, { {}, { {6} } } },
|
||||
{ { {}, { {7} } }, { {}, { {7} } } },
|
||||
{ { {}, { {8} } }, { {}, { {8} } } },
|
||||
{ { {}, { {9} } }, { {}, { {9} } } },
|
||||
{ { {}, { {11} } }, { {}, { {11} } } },
|
||||
{ { {}, { {13} } }, { {}, { {13} } } },
|
||||
{ { {}, { {15} } }, { {}, { {15} } } },
|
||||
{ { {}, { {16} } }, { {}, { {16} } } },
|
||||
{ { {}, { {17} } }, { {}, { {17} } } },
|
||||
{ { {}, { {19} } }, { {}, { {19} } } },
|
||||
{ { {}, { {23} } }, { {}, { {23} } } },
|
||||
{ { {}, { {24} } }, { {}, { {24} } } },
|
||||
{ { {}, { {32} } }, { {}, { {32} } } },
|
||||
{ { {}, { {33} } }, { {}, { {33} } } },
|
||||
{ { {}, { {37} } }, { {}, { {37} } } },
|
||||
{ { {}, { {41} } }, { {}, { {41} } } },
|
||||
{ { {}, { {48} } }, { {}, { {48} } } },
|
||||
{ { {}, { {51} } }, { {}, { {51} } } },
|
||||
{ { {}, { {63} } }, { {}, { {63} } } },
|
||||
{ { {}, { {64} } }, { {}, { {64} } } },
|
||||
{ { {}, { {65} } }, { {}, { {65} } } }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static_1D, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(staticInputShapes1D),
|
||||
::testing::Values(std::tuple<int, int>{0, 0}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(true),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
const std::vector<std::vector<ov::test::InputShape>> dynamicInputShapes1D = {
|
||||
{ { { ov::Dimension{1, 70} }, // Dynamic shape 0
|
||||
{ {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {11}, {13}, {15}, {16}, {17}, {19}, {23}, {24}, {32}, {55}, {63}, {64}, {65} } }, // Target shapes
|
||||
{ { -1 }, // Dynamic shape 1
|
||||
{ {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {11}, {13}, {15}, {16}, {17}, {19}, {23}, {24}, {32}, {55}, {63}, {64}, {65} } } } // Target shapes
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_dynamic_1D, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(dynamicInputShapes1D),
|
||||
::testing::Values(std::tuple<int, int>{0, 0}),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(true, false),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
///// 4D JIT /////
|
||||
std::vector<std::vector<ov::test::InputShape>> get4DShapesJitStat() {
|
||||
std::vector<std::vector<ov::test::InputShape>> result = {};
|
||||
if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
result = {
|
||||
{ { {}, { {18, 2, 2, 1} } }, // Static shapes
|
||||
{ {}, { {18, 2, 8} } }
|
||||
},
|
||||
{ { {}, { {17, 2, 2, 2} } }, // Static shapes
|
||||
{ {}, { {17, 2, 7} } }
|
||||
},
|
||||
{ { {}, { {16, 2, 2, 3} } }, // Static shapes
|
||||
{ {}, { {16, 2, 6} } }
|
||||
},
|
||||
{ { {}, { {15, 2, 2, 4} } }, // Static shapes
|
||||
{ {}, { {15, 2, 5} } }
|
||||
},
|
||||
{ { {}, { {14, 2, 2, 5} } }, // Static shapes
|
||||
{ {}, { {14, 2, 4} } }
|
||||
},
|
||||
{ { {}, { {13, 2, 2, 6} } }, // Static shapes
|
||||
{ {}, { {13, 2, 3} } }
|
||||
},
|
||||
{ { {}, { {12, 2, 2, 7} } }, // Static shapes
|
||||
{ {}, { {12, 2, 2} } }
|
||||
},
|
||||
{ { {}, { {11, 2, 2, 8} } }, // Static shapes
|
||||
{ {}, { {11, 2, 1} } }
|
||||
}
|
||||
};
|
||||
}
|
||||
if (InferenceEngine::with_cpu_x86_avx512f()) {
|
||||
std::vector<std::vector<ov::test::InputShape>> tmp = {
|
||||
{ { {}, { {19, 4, 2, 9} } }, // Static shapes
|
||||
{ {}, { {19, 4, 16} } }
|
||||
},
|
||||
{ { {}, { {20, 4, 2, 10} } }, // Static shapes
|
||||
{ {}, { {20, 4, 15} } },
|
||||
},
|
||||
{ { {}, { {21, 4, 2, 11} } }, // Static shapes
|
||||
{ {}, { {21, 4, 14} } }
|
||||
},
|
||||
{ { {}, { {22, 4, 2, 12} } }, // Static shapes
|
||||
{ {}, { {22, 4, 13} } },
|
||||
},
|
||||
{ { {}, { {23, 4, 2, 13} } }, // Static shapes
|
||||
{ {}, { {23, 4, 12} } },
|
||||
},
|
||||
{ { {}, { {24, 4, 2, 14} } }, // Static shapes
|
||||
{ {}, { {24, 4, 11} } },
|
||||
},
|
||||
{ { {}, { {25, 4, 2, 15} } }, // Static shapes
|
||||
{ {}, { {25, 4, 10} } },
|
||||
},
|
||||
{ { {}, { {26, 4, 2, 16} } }, // Static shapes
|
||||
{ {}, { {26, 4, 9} } },
|
||||
}
|
||||
};
|
||||
result.insert(result.end(), tmp.begin(), tmp.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::tuple<int, int>> get4DAxisBatchJitStat(ov::element::Type type) {
|
||||
std::vector<std::tuple<int, int>> result = {};
|
||||
if (InferenceEngine::with_cpu_x86_avx512f()) {
|
||||
if (type.size() == 4 || type.size() == 2 || type.size() == 1)
|
||||
return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}, {2, 0}, {2, 1}, {2, 2}};
|
||||
} else if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
if (type.size() == 4)
|
||||
return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}, {2, 0}, {2, 1}, {2, 2}};
|
||||
else if (type.size() == 2 || type.size() == 1)
|
||||
return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit32, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesJitStat()),
|
||||
::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::f32)),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(true),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::ValuesIn(additionalConfig)),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit16, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesJitStat()),
|
||||
::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::bf16)),
|
||||
::testing::Values(ElementType::bf16),
|
||||
::testing::Values(true),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static_4D_jit8, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesJitStat()),
|
||||
::testing::ValuesIn(get4DAxisBatchJitStat(ElementType::i8)),
|
||||
::testing::Values(ElementType::i8),
|
||||
::testing::Values(true),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<std::vector<ov::test::InputShape>> get4DShapesJitDyn() {
|
||||
std::vector<std::vector<ov::test::InputShape>> result = {};
|
||||
if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
result = {
|
||||
{ { { ov::Dimension(5, 15), -1, -1, -1 }, // Dynamic shape 0
|
||||
{ {8, 2, 2, 1}, {10, 2, 2, 2}, {8, 2, 2, 3}, {10, 2, 2, 4}} }, // Target shapes
|
||||
{ { ov::Dimension(4, 16), -1, -1 }, // Dynamic shape 1
|
||||
{ {8, 2, 8}, {10, 2, 7}, {8, 2, 6}, {10, 2, 5} } } }, // Target shapes
|
||||
{ { { -1, -1, -1, -1 }, // Dynamic shape 0
|
||||
{ {8, 2, 2, 5}, {10, 2, 2, 6}, {8, 2, 2, 7}, {10, 2, 2, 8}} }, // Target shapes
|
||||
{ { -1, -1, -1 }, // Dynamic shape 1
|
||||
{ {8, 2, 4}, {10, 2, 3}, {8, 2, 2}, {10, 2, 1} } } }, // Target shapes
|
||||
{ { { ov::Dimension(5, 15), -1, -1, -1 }, // Dynamic shape 0
|
||||
{ {10, 2, 2, 1}, {10, 2, 2, 2}, {10, 2, 2, 3}, {10, 2, 2, 4}} }, // Target shapes
|
||||
{ { 10, 2, 5 }, // Dynamic shape 1
|
||||
{ {10, 2, 5}, {10, 2, 5}, {10, 2, 5}, {10, 2, 5} } } }, // Target shapes
|
||||
{ { { 8, 2, 2, 5 }, // Dynamic shape 0
|
||||
{ {8, 2, 2, 5}, {8, 2, 2, 5}, {8, 2, 2, 5}, {8, 2, 2, 5}} }, // Target shapes
|
||||
{ { -1, -1, -1 }, // Dynamic shape 1
|
||||
{ {8, 2, 4}, {8, 2, 3}, {8, 2, 2}, {8, 2, 1} } } } // Target shapes
|
||||
};
|
||||
}
|
||||
if (InferenceEngine::with_cpu_x86_avx512f()) {
|
||||
std::vector<std::vector<ov::test::InputShape>> tmp = {
|
||||
{ { { ov::Dimension(5, 15), -1, -1, -1 }, // Dynamic shape 0
|
||||
{ {8, 2, 2, 9}, {10, 2, 2, 10}, {8, 2, 2, 11}, {10, 2, 2, 12}} }, // Target shapes
|
||||
{ { ov::Dimension(4, 16), -1, -1 }, // Dynamic shape 1
|
||||
{ {8, 2, 16}, {10, 2, 15}, {8, 2, 14}, {10, 2, 13} } } }, // Target shapes
|
||||
{ { { -1, -1, -1, -1 }, // Dynamic shape 0
|
||||
{ {8, 2, 2, 13}, {10, 2, 2, 14}, {8, 2, 2, 15}, {10, 2, 2, 16}} }, // Target shapes
|
||||
{ { -1, -1, -1 }, // Dynamic shape 1
|
||||
{ {8, 2, 12}, {10, 2, 11}, {8, 2, 10}, {10, 2, 9} } } }, // Target shapes
|
||||
{ { { ov::Dimension(5, 15), -1, -1, -1 }, // Dynamic shape 0
|
||||
{ {10, 2, 2, 9}, {10, 2, 2, 10}, {10, 2, 2, 11}, {10, 2, 2, 12}} }, // Target shapes
|
||||
{ { 10, 2, 16 }, // Dynamic shape 1
|
||||
{ {10, 2, 16}, {10, 2, 16}, {10, 2, 16}, {10, 2, 16} } } }, // Target shapes
|
||||
{ { { 8, 2, 2, 15 }, // Dynamic shape 0
|
||||
{ {8, 2, 2, 15}, {8, 2, 2, 15}, {8, 2, 2, 15}, {8, 2, 2, 15}} }, // Target shapes
|
||||
{ { -1, -1, -1 }, // Dynamic shape 1
|
||||
{ {8, 2, 12}, {8, 2, 11}, {8, 2, 10}, {8, 2, 9} } } } // Target shapes
|
||||
};
|
||||
result.insert(result.end(), tmp.begin(), tmp.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::tuple<int, int>> get4DAxisBatchJitDyn(ov::element::Type type) {
|
||||
std::vector<std::tuple<int, int>> result = {};
|
||||
if (InferenceEngine::with_cpu_x86_avx512f()) {
|
||||
if (type.size() == 4 || type.size() == 2 || type.size() == 1)
|
||||
return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}};
|
||||
} else if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
if (type.size() == 4 || type.size() == 2 || type.size() == 1)
|
||||
return std::vector<std::tuple<int, int>>{{3, 0}, {3, 1}, {3, 2}};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit32, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesJitDyn()),
|
||||
::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::f32)),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(isAxisConst),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::ValuesIn(additionalConfig)),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit16, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesJitDyn()),
|
||||
::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::bf16)),
|
||||
::testing::Values(ElementType::bf16),
|
||||
::testing::ValuesIn(isAxisConst),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_dynamic_4D_jit8, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesJitDyn()),
|
||||
::testing::ValuesIn(get4DAxisBatchJitDyn(ElementType::i8)),
|
||||
::testing::Values(ElementType::i8),
|
||||
::testing::ValuesIn(isAxisConst),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
|
||||
///// 4D REFERENCE /////
|
||||
std::vector<std::vector<ov::test::InputShape>> get4DShapesRefStat() {
|
||||
std::vector<std::vector<ov::test::InputShape>> result = {};
|
||||
if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
result = {
|
||||
{ { {}, { {10, 2, 9, 9} } }, // Static shapes
|
||||
{ {}, { {10, 2, 8} } }
|
||||
},
|
||||
{ { {}, { {11, 2, 9, 2} } }, // Static shapes
|
||||
{ {}, { {11, 2, 7} } }
|
||||
},
|
||||
{ { {}, { {12, 2, 9, 3} } }, // Static shapes
|
||||
{ {}, { {12, 2, 6} } }
|
||||
},
|
||||
{ { {}, { {13, 2, 9, 4} } }, // Static shapes
|
||||
{ {}, { {13, 2, 5} } }
|
||||
},
|
||||
{ { {}, { {14, 2, 9, 5} } }, // Static shapes
|
||||
{ {}, { {14, 2, 4} } }
|
||||
},
|
||||
{ { {}, { {15, 2, 9, 6} } }, // Static shapes
|
||||
{ {}, { {15, 2, 3} } }
|
||||
},
|
||||
{ { {}, { {16, 2, 9, 7} } }, // Static shapes
|
||||
{ {}, { {16, 2, 2} } }
|
||||
},
|
||||
{ { {}, { {17, 2, 9, 8} } }, // Static shapes
|
||||
{ {}, { {17, 2, 1} } }
|
||||
}
|
||||
};
|
||||
}
|
||||
if (InferenceEngine::with_cpu_x86_avx512f()) {
|
||||
std::vector<std::vector<ov::test::InputShape>> tmp = {
|
||||
{ { {}, { {25, 4, 4, 17} } }, // Static shapes
|
||||
{ {}, { {25, 4, 16} } }
|
||||
},
|
||||
{ { {}, { {24, 4, 4, 18} } }, // Static shapes
|
||||
{ {}, { {24, 4, 15} } },
|
||||
},
|
||||
{ { {}, { {23, 4, 4, 19} } }, // Static shapes
|
||||
{ {}, { {23, 4, 14} } }
|
||||
},
|
||||
{ { {}, { {22, 4, 4, 20} } }, // Static shapes
|
||||
{ {}, { {22, 4, 13} } },
|
||||
},
|
||||
{ { {}, { {21, 4, 4, 21} } }, // Static shapes
|
||||
{ {}, { {21, 4, 12} } },
|
||||
},
|
||||
{ { {}, { {20, 4, 4, 22} } }, // Static shapes
|
||||
{ {}, { {20, 4, 11} } },
|
||||
},
|
||||
{ { {}, { {19, 4, 4, 23} } }, // Static shapes
|
||||
{ {}, { {19, 4, 10} } },
|
||||
},
|
||||
{ { {}, { {18, 4, 4, 24} } }, // Static shapes
|
||||
{ {}, { {18, 4, 9} } },
|
||||
}
|
||||
};
|
||||
result.insert(result.end(), tmp.begin(), tmp.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::tuple<int, int>> get4DAxisBatchRefStat(ov::element::Type type) {
|
||||
std::vector<std::tuple<int, int>> result = {};
|
||||
if (InferenceEngine::with_cpu_x86_avx512f()) {
|
||||
if (type.size() == 4)
|
||||
return std::vector<std::tuple<int, int>>{{1, 0}, {1, 1}, {0, 0}};
|
||||
else if (type.size() == 2 || type.size() == 1)
|
||||
return std::vector<std::tuple<int, int>>{{0, 0}};
|
||||
} else if (InferenceEngine::with_cpu_x86_avx2()) {
|
||||
if (type.size() == 4)
|
||||
return std::vector<std::tuple<int, int>>{{1, 0}, {1, 1}, {0, 0}};
|
||||
else if (type.size() == 2 || type.size() == 1)
|
||||
return std::vector<std::tuple<int, int>>{{2, 0}, {2, 1}, {2, 2}, {1, 0}, {1, 1}, {0, 0}};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref32, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesRefStat()),
|
||||
::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::f32)),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(true),
|
||||
::testing::Values(cpuParamsRef),
|
||||
::testing::ValuesIn(additionalConfig)),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref16, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesRefStat()),
|
||||
::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::bf16)),
|
||||
::testing::Values(ElementType::bf16),
|
||||
::testing::Values(true),
|
||||
::testing::Values(cpuParamsRef),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static_4D_ref8, GatherLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(get4DShapesRefStat()),
|
||||
::testing::ValuesIn(get4DAxisBatchRefStat(ElementType::i8)),
|
||||
::testing::Values(ElementType::i8),
|
||||
::testing::Values(true),
|
||||
::testing::Values(cpuParamsRef),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
GatherLayerTestCPU::getTestCaseName);
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
||||
|
@ -104,9 +104,9 @@ std::string Gather8LayerTest::getTestCaseName(const testing::TestParamInfo<gathe
|
||||
std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetName) = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
|
||||
result << "indicesShape=" << CommonTestUtils::vec2str(indicesShape) << "_";
|
||||
result << "axis=" << std::get<0>(axis_batchIdx) << "_";
|
||||
result << "batchIdx=" << std::get<1>(axis_batchIdx) << "_";
|
||||
result << "indicesShape=" << CommonTestUtils::vec2str(indicesShape) << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "inPRC=" << inPrc.name() << "_";
|
||||
result << "outPRC=" << outPrc.name() << "_";
|
||||
@ -129,7 +129,7 @@ void Gather8LayerTest::SetUp() {
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(functionParams));
|
||||
auto indicesNode = ngraph::builder::makeConstant<int>(ngraph::element::i64, indicesShape, {}, true,
|
||||
inputShape[axis < 0 ? axis + inputShape.size() : axis] - 1,
|
||||
1 - static_cast<int>(inputShape[axis < 0 ? axis + inputShape.size() : axis]));
|
||||
-static_cast<int>(inputShape[axis < 0 ? axis + inputShape.size() : axis]));
|
||||
auto axisNode = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis });
|
||||
auto gather = std::make_shared<ngraph::opset8::Gather>(paramOuts[0], indicesNode, axisNode, batchIdx);
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset8::Result>(gather) };
|
||||
|
@ -275,7 +275,6 @@ class TestGather(OnnxRuntimeLayerTest):
|
||||
dict(shape=[6, 8, 10, 12], axis=-1, indices=[[[2, -1], [3, 2]], [[5, -1], [3, -2]]],
|
||||
output_shape=[6, 8, 10, 2, 2, 2])]
|
||||
|
||||
@pytest.mark.xfail(reason='negative indices are not yet implemented on CPU: xxx-54630')
|
||||
@pytest.mark.parametrize("params", test_data_negative_indices)
|
||||
@pytest.mark.nightly
|
||||
def test_gather_nightly_negative_indices(self, params, ie_device, precision, ir_version,
|
||||
|
Loading…
Reference in New Issue
Block a user