From dcc288680b5282401a748eaf8748686de572e527 Mon Sep 17 00:00:00 2001
From: Alexandra Sidorova <alexandra.sidorova@intel.com>
Date: Thu, 20 May 2021 09:09:28 +0300
Subject: [PATCH] [CPU] Added Gather-7 support (#5552)

---
 .../src/mkldnn_plugin/mkldnn_plugin.cpp       |   4 +
 .../nodes/mkldnn_gather_node.cpp              | 124 ++++++------
 .../mkldnn_plugin/nodes/mkldnn_gather_node.h  |  30 ++-
 .../single_layer_tests/gather.cpp             | 178 ++++++++++++++++--
 .../skip_tests_config.cpp                     |   2 +
 .../src/add_conver_to_reorder.cpp             |  21 ---
 6 files changed, 240 insertions(+), 119 deletions(-)
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 3ab7622ac91..acc93f72ebd 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -30,6 +30,8 @@
 #include <transformations/op_conversions/convert_shuffle_channels3.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
 #include <transformations/op_conversions/convert_gelu.hpp>
+#include <transformations/op_conversions/convert_gather_v7_to_gather_v1.hpp>
+#include <transformations/op_conversions/convert_gather_v1_to_gather_v7.hpp>
 #include <transformations/op_conversions/gelu7_downgrade.hpp>
 #include <transformations/op_conversions/hswish_decomposition.hpp>
 #include <transformations/op_conversions/hsigmoid_decomposition.hpp>
@@ -291,8 +293,10 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
     pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
     pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
     pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
+    pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
 
     pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
+    pass_config->enable<ngraph::pass::ConvertGather1ToGather7>();
 
     if (useLpt) {
         pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([](const_node_ptr &node) -> bool {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
index 966fc4003c3..3bd50aadf33 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
@@ -2,14 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <cmath>
 #include <vector>
 #include <string>
 #include <mkldnn_types.h>
 #include "ie_parallel.hpp"
 #include "mkldnn_gather_node.h"
 #include <ngraph/opsets/opset1.hpp>
-#include <precision_utils.h>
 #include "common/cpu_memcpy.h"
 
 using namespace MKLDNNPlugin;
@@ -17,9 +15,9 @@ using namespace InferenceEngine;
 
 bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
     try {
-        auto gatherOp = ngraph::as_type_ptr<const ngraph::op::v1::Gather>(op);
+        auto gatherOp = ngraph::as_type_ptr<const ngraph::op::v7::Gather>(op);
         if (!gatherOp) {
-            errorMessage = "Only opset1 Gather operation is supported";
+            errorMessage = "Only opset7 Gather operation is supported";
             return false;
         }
 
@@ -44,90 +42,96 @@ MKLDNNGatherNode::MKLDNNGatherNode(const std::shared_ptr<ngraph::Node>& op, cons
         IE_THROW(NotImplemented) << errorMessage;
     }
 
-    auto gatherOp = ngraph::as_type_ptr<ngraph::op::v1::Gather>(op);
+    auto gatherOp = ngraph::as_type_ptr<ngraph::op::v7::Gather>(op);
     if (gatherOp->get_input_size() != 3 || gatherOp->get_output_size() != 1)
         IE_THROW() << errorPrefix_ << "has incorrect number of input/output edges!";
 
-    const SizeVector& dictionary_dims = gatherOp->get_input_shape(GATHER_DICTIONARY);
-    if (dictionary_dims.size() == 0)
+    const SizeVector& srcDims = gatherOp->get_input_shape(GATHER_DATA);
+    const SizeVector& idxDims = gatherOp->get_input_shape(GATHER_INDEXES);
+    if (srcDims.size() == 0)
         IE_THROW() << errorPrefix_ << "has incorrect input parameters dimension!";
 
     axis = static_cast<int>(gatherOp->get_axis());
     if (axis < 0)
-        axis += dictionary_dims.size();
-    // Dictionary must be at least rank axis + 1
-    if (!(-static_cast<int>(dictionary_dims.size()) <= axis && axis < static_cast<int>(dictionary_dims.size())))
+        axis += srcDims.size();
+    if (!(0 <= axis && axis < static_cast<int>(srcDims.size())))
         IE_THROW() << errorPrefix_ << "has incorrect input parameters dimensions and axis number!";
 
-    //  Find number of dictionaries, index range and data length
-    for (int i = 0; i < axis; i++)
-        numDictionaries *= dictionary_dims[i];
-    indexRange = dictionary_dims[axis];
-    for (size_t i = axis + 1; i < dictionary_dims.size(); i++)
-        dataLength *= dictionary_dims[i];
+    batchDims = static_cast<int>(gatherOp->get_batch_dims());
+    if (batchDims < 0)
+        batchDims += idxDims.size();
+    if (!(0 <= batchDims && batchDims <= std::min(static_cast<int>(srcDims.size()), static_cast<int>(idxDims.size()))) ||
+        batchDims > axis)
+        IE_THROW() << errorPrefix_ << "has incorrect batch_dims " << batchDims << "!";
 
-    if (dataLength == 0)
-        IE_THROW() << errorPrefix_ << "had incorrect input parameters dimension!";
+    for (int i = 0; i < batchDims; i++) {
+        if (srcDims[i] != idxDims[i])
+            IE_THROW() << errorPrefix_ << "has incorrect first " << batchDims << " data and indices dimensions!";
+    }
 }
 
 void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    Precision inIdxPrecision = getOriginalInputPrecisionAtPort(GATHER_INDEXES);
-    if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16)
-        inIdxPrecision = Precision::I32;
-
-    Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DICTIONARY);
-
+    Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA);
     addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision},
-                          {TensorDescCreatorTypes::ncsp, inIdxPrecision},
+                          {TensorDescCreatorTypes::ncsp, Precision::I32},
                           {TensorDescCreatorTypes::ncsp, Precision::I32}},
                          {{TensorDescCreatorTypes::ncsp, dataPrecision}},
                          impl_desc_type::ref_any);
 }
 
-template <typename index_t, class Conversion>
-void MKLDNNGatherNode::gather() {
-    size_t src_indexSize = getParentEdgeAt(GATHER_INDEXES)->getBlob()->size();
-    size_t outputSize = getChildEdgeAt(0)->getBlob()->byteSize();
-    const auto *src_index = reinterpret_cast<const index_t *>(getParentEdgeAt(GATHER_INDEXES)->getMemoryPtr()->GetPtr());
-    const auto *src_dataDict = reinterpret_cast<const uint8_t *>(getParentEdgeAt(GATHER_DICTIONARY)->getMemoryPtr()->GetPtr());
-    auto *dst_data = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+void MKLDNNGatherNode::createPrimitive() {
+    auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
+    auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
+    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
+        IE_THROW() << errorPrefix_ << " has not allocated destination memory.";
+    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
+        IE_THROW() << errorPrefix_ << " has not allocated input memory.";
+    if (getSelectedPrimitiveDescriptor() == nullptr)
+        IE_THROW() << errorPrefix_ << " has unidentified preferable primitive descriptor.";
 
-    size_t len = dataLength * getParentEdgeAt(GATHER_DICTIONARY)->getDesc().getPrecision().size();
+    const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getDims().ToSizeVector();
+    const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getDims().ToSizeVector();
+    const SizeVector dstDims = getChildEdgeAt(0)->getDims().ToSizeVector();
+    dataSize = getParentEdgeAt(GATHER_DATA)->getDesc().getPrecision().size();
 
-    parallel_for(src_indexSize, [&](size_t i) {
-        unsigned int idx = Conversion()(src_index[i]);
+    indexRange = srcDims[axis];
+    batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies<size_t>());
+    outerSize = std::accumulate(srcDims.begin() + batchDims, srcDims.begin() + axis, 1, std::multiplies<size_t>());
+    dataLength = std::accumulate(srcDims.begin() + axis + 1, srcDims.end(), 1, std::multiplies<size_t>());
+    srcBatchStride = std::accumulate(srcDims.begin() + batchDims, srcDims.end(), 1, std::multiplies<size_t>());
+    idxBatchStride = std::accumulate(idxDims.begin() + batchDims, idxDims.end(), 1, std::multiplies<size_t>());
+    dstBatchStride = std::accumulate(dstDims.begin() + batchDims, dstDims.end(), 1, std::multiplies<size_t>());
+    len = dataLength * dataSize;
 
-        //  Index clipping
-        if (idx < indexRange) {
-            //  Copying data to destination from Dictionary
-            for (size_t j = 0; j < numDictionaries; j++) {
-                cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)],
-                            outputSize - (len * (i + j * src_indexSize)),
-                            &src_dataDict[len * (idx + j * indexRange)],
-                            len);
-            }
-        } else {
-            for (size_t j = 0; j < numDictionaries; j++) {
-                memset(&dst_data[len * (i + j * src_indexSize)], 0, len);
-            }
-        }
-    });
+    if (dataLength == 0)
+        IE_THROW() << errorPrefix_ << "had incorrect input parameters dimension!";
 }
 
 void MKLDNNGatherNode::execute(mkldnn::stream strm) {
-    switch (getParentEdgeAt(GATHER_INDEXES)->getDesc().getPrecision()) {
-        case Precision::FP32:
-            gather<float, f32toUi32>();
-            break;
-        case Precision::I32:
-            gather<int32_t, i32toUi32>();
-            break;
-        default:
-            return IE_THROW() << "Unsupported indices input precision";
-    }
+    const int32_t* srcIndexes = reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_INDEXES)->getMemoryPtr()->GetPtr());
+    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr());
+    uint8_t* dstData = reinterpret_cast<uint8_t*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    parallel_for2d(batchSize, idxBatchStride, [&](const size_t i, const size_t j) {
+        const unsigned int idx = static_cast<uint32_t>(srcIndexes[i * idxBatchStride + j]);
+
+        // while negative indices are not supported, should set zero
+        if (idx < indexRange) {
+            for (size_t k = 0; k < outerSize; ++k) {
+                const size_t srcStride = (i * srcBatchStride + k * dataLength * indexRange) * dataSize;
+                const size_t dstStride = (i * dstBatchStride + k * dataLength * idxBatchStride) * dataSize;
+
+                cpu_memcpy(&dstData[dstStride + j * len], &srcData[srcStride + idx * len], len);
+            }
+        } else {
+            for (size_t k = 0; k < outerSize; ++k) {
+                memset(&dstData[(i * dstBatchStride + k * dataLength * idxBatchStride) * dataSize + j * len], 0, len);
+            }
+        }
+    });
 }
 
 bool MKLDNNGatherNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h
index 4631436f3c6..6c7663bd95f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h
@@ -18,37 +18,31 @@ public:
 
     void getSupportedDescriptors() override {};
     void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
     void execute(mkldnn::stream strm) override;
     bool created() const override;
 
     static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
 
 private:
-    struct f32toUi32 {
-        inline unsigned int operator()(const float value) {
-            return static_cast<unsigned int>(value);
-        }
-    };
-
-    struct i32toUi32 {
-        inline unsigned int operator()(const int32_t value) {
-            return static_cast<unsigned int>(value);
-        }
-    };
-
     int axis = 0;
-    size_t numDictionaries = 1;
+    int batchDims = 0;
+
     size_t indexRange = 0;
+    size_t batchSize = 1;
+    size_t outerSize = 1;
     size_t dataLength = 1;
-    static const size_t GATHER_DICTIONARY = 0;
+    size_t srcBatchStride = 1;
+    size_t idxBatchStride = 1;
+    size_t dstBatchStride = 1;
+    size_t dataSize = 1;
+    size_t len = 1;
+
+    static const size_t GATHER_DATA = 0;
     static const size_t GATHER_INDEXES = 1;
     static const size_t GATHER_AXIS = 2;
 
     std::string errorPrefix_;
-
-    template <typename index_t, class Conversion>
-    void gather();
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather.cpp
index 483979f1b7e..32038696be6 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather.cpp
@@ -12,29 +12,30 @@ using namespace LayerTestsDefinitions;
 namespace {
 
 const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::I64,
         InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::BF16,
+        InferenceEngine::Precision::I8
 };
 
-const std::vector<std::vector<size_t>> inputShapes = {
-        std::vector<size_t>{10, 20, 30, 40},
-};
-
-const std::vector<std::vector<int>> indices = {
-        std::vector<int>{0, 3, 2, 1},
-};
-const std::vector<std::vector<size_t>> indicesShapes = {
+const std::vector<std::vector<size_t>> inputShapes_1D = {
         std::vector<size_t>{4},
-        std::vector<size_t>{2, 2}
 };
 
-const std::vector<int> axes = {0, 1, 2, 3, -1};
+const std::vector<std::vector<size_t>> indicesShapes_1D = {
+        std::vector<size_t>{1},
+        std::vector<size_t>{3},
+};
 
+const std::vector<std::tuple<int, int>> axes_batchdims_1D = {
+        std::tuple<int, int>{0, 0}
+};
 
-const auto params = testing::Combine(
-        testing::ValuesIn(indices),
-        testing::ValuesIn(indicesShapes),
-        testing::ValuesIn(axes),
-        testing::ValuesIn(inputShapes),
+const auto gather7Params_1D = testing::Combine(
+        testing::ValuesIn(inputShapes_1D),
+        testing::ValuesIn(indicesShapes_1D),
+        testing::ValuesIn(axes_batchdims_1D),
         testing::ValuesIn(netPrecisions),
         testing::Values(InferenceEngine::Precision::UNSPECIFIED),
         testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -43,11 +44,148 @@ const auto params = testing::Combine(
         testing::Values(CommonTestUtils::DEVICE_CPU)
 );
 
-INSTANTIATE_TEST_CASE_P(
-        smoke_Gather,
-        GatherLayerTest,
-        params,
-        GatherLayerTest::getTestCaseName
+INSTANTIATE_TEST_CASE_P(smoke_Gather7_1D, Gather7LayerTest, gather7Params_1D, Gather7LayerTest::getTestCaseName);
+
+const std::vector<std::vector<size_t>> inputShapes_2D = {
+        std::vector<size_t>{4, 19},
+};
+
+const std::vector<std::vector<size_t>> indicesShapes_2D = {
+        std::vector<size_t>{4},
+        std::vector<size_t>{4, 2},
+};
+
+const std::vector<std::tuple<int, int>> axes_batchdims_2D = {
+        std::tuple<int, int>{0, 0},
+        std::tuple<int, int>{1, 0},
+        std::tuple<int, int>{1, 1},
+        std::tuple<int, int>{-1, -1},
+};
+
+const auto gather7Params_2D = testing::Combine(
+        testing::ValuesIn(inputShapes_2D),
+        testing::ValuesIn(indicesShapes_2D),
+        testing::ValuesIn(axes_batchdims_2D),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(CommonTestUtils::DEVICE_CPU)
 );
 
+INSTANTIATE_TEST_CASE_P(smoke_Gather7_2D, Gather7LayerTest, gather7Params_2D, Gather7LayerTest::getTestCaseName);
+
+const std::vector<std::vector<size_t>> inputShapes4D = {
+        std::vector<size_t>{4, 5, 6, 7},
+};
+
+const std::vector<std::vector<size_t>> indicesShapes_BD0 = {
+        std::vector<size_t>{4},
+        std::vector<size_t>{2, 2},
+        std::vector<size_t>{3, 2, 4},
+};
+
+const std::vector<std::tuple<int, int>> axes_BD0 = {
+        std::tuple<int, int>{0, 0},
+        std::tuple<int, int>{1, 0},
+        std::tuple<int, int>{2, 0},
+        std::tuple<int, int>{-1, 0},
+};
+
+const auto gather7ParamsSubset_BD0 = testing::Combine(
+        testing::ValuesIn(inputShapes4D),
+        testing::ValuesIn(indicesShapes_BD0),
+        testing::ValuesIn(axes_BD0),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD0, Gather7LayerTest, gather7ParamsSubset_BD0, Gather7LayerTest::getTestCaseName);
+
+const std::vector<std::vector<size_t>> indicesShapes_BD1 = {
+        std::vector<size_t>{4, 2},
+        std::vector<size_t>{4, 5, 3},
+        std::vector<size_t>{4, 1, 2, 3},
+};
+
+const std::vector<std::tuple<int, int>> axes_BD1 = {
+        std::tuple<int, int>{1, 1},
+        std::tuple<int, int>{2, 1},
+        std::tuple<int, int>{-1, 1},
+        std::tuple<int, int>{-2, 1},
+};
+
+const auto gather7ParamsSubset_BD1 = testing::Combine(
+        testing::ValuesIn(inputShapes4D),
+        testing::ValuesIn(indicesShapes_BD1),
+        testing::ValuesIn(axes_BD1),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD1, Gather7LayerTest, gather7ParamsSubset_BD1, Gather7LayerTest::getTestCaseName);
+
+const std::vector<std::vector<size_t>> indicesShapes_BD2 = {
+        std::vector<size_t>{4, 5, 4, 3},
+        std::vector<size_t>{4, 5, 3, 2}
+};
+
+const std::vector<std::tuple<int, int>> axes_BD2 = {
+        std::tuple<int, int>{2, 2},
+        std::tuple<int, int>{3, -2},
+        std::tuple<int, int>{-1, 2},
+        std::tuple<int, int>{-1, -2},
+};
+
+const auto gather7ParamsSubset_BD2 = testing::Combine(
+        testing::ValuesIn(inputShapes4D),
+        testing::ValuesIn(indicesShapes_BD2),
+        testing::ValuesIn(axes_BD2),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD2, Gather7LayerTest, gather7ParamsSubset_BD2, Gather7LayerTest::getTestCaseName);
+
+const std::vector<std::vector<size_t>> indicesShapes_NegativeBD = {
+        std::vector<size_t>{4, 5, 4},
+        std::vector<size_t>{4, 5, 3}
+};
+
+const std::vector<std::tuple<int, int>> axes_NegativeBD = {
+        std::tuple<int, int>{0, -3},
+        std::tuple<int, int>{1, -2},
+        std::tuple<int, int>{2, -2},
+        std::tuple<int, int>{-2, -2},
+        std::tuple<int, int>{-1, -1},
+        std::tuple<int, int>{-2, -1},
+};
+
+const auto gather7ParamsSubset_NegativeBD = testing::Combine(
+        testing::ValuesIn(inputShapes4D),
+        testing::ValuesIn(indicesShapes_NegativeBD),
+        testing::ValuesIn(axes_NegativeBD),
+        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_Gather7_NegativeBD, Gather7LayerTest, gather7ParamsSubset_NegativeBD, Gather7LayerTest::getTestCaseName);
+
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index cee89ff95f6..869f0de1a2d 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -79,6 +79,8 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*smoke_PSROIPoolingAverageLayoutTest.*BF16.*)",
         R"(.*smoke_PSROIPoolingBilinearLayoutTest.*BF16.*)",
         R"(.*smoke_ROIAlignLayoutTest.*BF16.*)",
+        // reference doesn't cover I8, U8 cases. Issue: 55842
+        R"(.*Gather7LayerTest.*netPRC=I8.*)",
     };
         // TODO: 54718 Accuracy mismatch
 #ifdef _WIN32
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp
index cb23b7d784e..1b2265b1be5 100644
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp
@@ -51,27 +51,6 @@ private:
 };
 
 namespace  {
-/* Test insertion of the Convert layer if there is no suitable reorder.
-
-    Parameter[FP32]     Constant[BF16]
-          \                 /
-           \               /
-            \       Convert[I32] (Is inserted by the MKLDNNGraph)
-             \           /
-             Gather[FP32]
-                  |
-                  |
-             Output[FP32]
-*/
-
-TEST_F(AddConvertToReorderTest, smoke_TestAddConvert_CPU) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    BuildGraph(ngraph::element::bf16);
-    Run();
-    CheckNodeOfTypeCount(executableNetwork, "Convert", with_cpu_x86_avx512_core() ? 1 : 0);
-    CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
-}
 
 /* Test insertion of the Reorder layer if there is one.