From dcc288680b5282401a748eaf8748686de572e527 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Thu, 20 May 2021 09:09:28 +0300 Subject: [PATCH] [CPU] Added Gather-7 support (#5552) --- .../src/mkldnn_plugin/mkldnn_plugin.cpp | 4 + .../nodes/mkldnn_gather_node.cpp | 124 ++++++------ .../mkldnn_plugin/nodes/mkldnn_gather_node.h | 30 ++- .../single_layer_tests/gather.cpp | 178 ++++++++++++++++-- .../skip_tests_config.cpp | 2 + .../src/add_conver_to_reorder.cpp | 21 --- 6 files changed, 240 insertions(+), 119 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 3ab7622ac91..acc93f72ebd 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include #include @@ -291,8 +293,10 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { pass_config->disable(); pass_config->disable(); pass_config->disable(); + pass_config->disable(); pass_config->enable(); + pass_config->enable(); if (useLpt) { pass_config->set_callback([](const_node_ptr &node) -> bool { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp index 966fc4003c3..3bd50aadf33 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp @@ -2,14 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include #include #include "ie_parallel.hpp" #include "mkldnn_gather_node.h" #include -#include #include "common/cpu_memcpy.h" using namespace MKLDNNPlugin; @@ -17,9 +15,9 @@ using namespace InferenceEngine; bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - auto gatherOp = ngraph::as_type_ptr(op); + auto gatherOp = ngraph::as_type_ptr(op); if (!gatherOp) { - errorMessage = "Only opset1 Gather operation is supported"; + errorMessage = "Only opset7 Gather operation is supported"; return false; } @@ -44,90 +42,96 @@ MKLDNNGatherNode::MKLDNNGatherNode(const std::shared_ptr& op, cons IE_THROW(NotImplemented) << errorMessage; } - auto gatherOp = ngraph::as_type_ptr(op); + auto gatherOp = ngraph::as_type_ptr(op); if (gatherOp->get_input_size() != 3 || gatherOp->get_output_size() != 1) IE_THROW() << errorPrefix_ << "has incorrect number of input/output edges!"; - const SizeVector& dictionary_dims = gatherOp->get_input_shape(GATHER_DICTIONARY); - if (dictionary_dims.size() == 0) + const SizeVector& srcDims = gatherOp->get_input_shape(GATHER_DATA); + const SizeVector& idxDims = gatherOp->get_input_shape(GATHER_INDEXES); + if (srcDims.size() == 0) IE_THROW() << errorPrefix_ << "has incorrect input parameters dimension!"; axis = static_cast(gatherOp->get_axis()); if (axis < 0) - axis += dictionary_dims.size(); - // Dictionary must be at least rank axis + 1 - if (!(-static_cast(dictionary_dims.size()) <= axis && axis < static_cast(dictionary_dims.size()))) + axis += srcDims.size(); + if (!(0 <= axis && axis < static_cast(srcDims.size()))) IE_THROW() << errorPrefix_ << "has incorrect input parameters dimensions and axis number!"; - // Find number of dictionaries, index range and data length - for (int i = 0; i < axis; i++) - numDictionaries *= dictionary_dims[i]; - indexRange = dictionary_dims[axis]; - for (size_t i = axis + 1; i < dictionary_dims.size(); i++) - dataLength *= dictionary_dims[i]; + batchDims = static_cast(gatherOp->get_batch_dims()); + if (batchDims < 0) + batchDims += idxDims.size(); + if (!(0 <= batchDims && batchDims <= std::min(static_cast(srcDims.size()), static_cast(idxDims.size()))) || + batchDims > axis) + IE_THROW() << errorPrefix_ << "has incorrect batch_dims " << batchDims << "!"; - if (dataLength == 0) - IE_THROW() << errorPrefix_ << "had incorrect input parameters dimension!"; + for (int i = 0; i < batchDims; i++) { + if (srcDims[i] != idxDims[i]) + IE_THROW() << errorPrefix_ << "has incorrect first " << batchDims << " data and indices dimensions!"; + } } void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - Precision inIdxPrecision = getOriginalInputPrecisionAtPort(GATHER_INDEXES); - if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16) - inIdxPrecision = Precision::I32; - - Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DICTIONARY); - + Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA); addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, - {TensorDescCreatorTypes::ncsp, inIdxPrecision}, + {TensorDescCreatorTypes::ncsp, Precision::I32}, {TensorDescCreatorTypes::ncsp, Precision::I32}}, {{TensorDescCreatorTypes::ncsp, dataPrecision}}, impl_desc_type::ref_any); } -template -void MKLDNNGatherNode::gather() { - size_t src_indexSize = getParentEdgeAt(GATHER_INDEXES)->getBlob()->size(); - size_t outputSize = getChildEdgeAt(0)->getBlob()->byteSize(); - const auto *src_index = reinterpret_cast(getParentEdgeAt(GATHER_INDEXES)->getMemoryPtr()->GetPtr()); - const auto *src_dataDict = reinterpret_cast(getParentEdgeAt(GATHER_DICTIONARY)->getMemoryPtr()->GetPtr()); - auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); +void MKLDNNGatherNode::createPrimitive() { + auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) + IE_THROW() << errorPrefix_ << " has not allocated destination memory."; + if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) + IE_THROW() << errorPrefix_ << " has not allocated input memory."; + if (getSelectedPrimitiveDescriptor() == nullptr) + IE_THROW() << errorPrefix_ << " has unidentified preferable primitive descriptor."; - size_t len = dataLength * getParentEdgeAt(GATHER_DICTIONARY)->getDesc().getPrecision().size(); + const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getDims().ToSizeVector(); + const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getDims().ToSizeVector(); + const SizeVector dstDims = getChildEdgeAt(0)->getDims().ToSizeVector(); + dataSize = getParentEdgeAt(GATHER_DATA)->getDesc().getPrecision().size(); - parallel_for(src_indexSize, [&](size_t i) { - unsigned int idx = Conversion()(src_index[i]); + indexRange = srcDims[axis]; + batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies()); + outerSize = std::accumulate(srcDims.begin() + batchDims, srcDims.begin() + axis, 1, std::multiplies()); + dataLength = std::accumulate(srcDims.begin() + axis + 1, srcDims.end(), 1, std::multiplies()); + srcBatchStride = std::accumulate(srcDims.begin() + batchDims, srcDims.end(), 1, std::multiplies()); + idxBatchStride = std::accumulate(idxDims.begin() + batchDims, idxDims.end(), 1, std::multiplies()); + dstBatchStride = std::accumulate(dstDims.begin() + batchDims, dstDims.end(), 1, std::multiplies()); + len = dataLength * dataSize; - // Index clipping - if (idx < indexRange) { - // Copying data to destination from Dictionary - for (size_t j = 0; j < numDictionaries; j++) { - cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)], - outputSize - (len * (i + j * src_indexSize)), - &src_dataDict[len * (idx + j * indexRange)], - len); - } - } else { - for (size_t j = 0; j < numDictionaries; j++) { - memset(&dst_data[len * (i + j * src_indexSize)], 0, len); - } - } - }); + if (dataLength == 0) + IE_THROW() << errorPrefix_ << "had incorrect input parameters dimension!"; } void MKLDNNGatherNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(GATHER_INDEXES)->getDesc().getPrecision()) { - case Precision::FP32: - gather(); - break; - case Precision::I32: - gather(); - break; - default: - return IE_THROW() << "Unsupported indices input precision"; - } + const int32_t* srcIndexes = reinterpret_cast(getParentEdgeAt(GATHER_INDEXES)->getMemoryPtr()->GetPtr()); + const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + + parallel_for2d(batchSize, idxBatchStride, [&](const size_t i, const size_t j) { + const unsigned int idx = static_cast(srcIndexes[i * idxBatchStride + j]); + + // while negative indices are not supported, should set zero + if (idx < indexRange) { + for (size_t k = 0; k < outerSize; ++k) { + const size_t srcStride = (i * srcBatchStride + k * dataLength * indexRange) * dataSize; + const size_t dstStride = (i * dstBatchStride + k * dataLength * idxBatchStride) * dataSize; + + cpu_memcpy(&dstData[dstStride + j * len], &srcData[srcStride + idx * len], len); + } + } else { + for (size_t k = 0; k < outerSize; ++k) { + memset(&dstData[(i * dstBatchStride + k * dataLength * idxBatchStride) * dataSize + j * len], 0, len); + } + } + }); } bool MKLDNNGatherNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h index 4631436f3c6..6c7663bd95f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h @@ -18,37 +18,31 @@ public: void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; - void createPrimitive() override {}; + void createPrimitive() override; void execute(mkldnn::stream strm) override; bool created() const override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: - struct f32toUi32 { - inline unsigned int operator()(const float value) { - return static_cast(value); - } - }; - - struct i32toUi32 { - inline unsigned int operator()(const int32_t value) { - return static_cast(value); - } - }; - int axis = 0; - size_t numDictionaries = 1; + int batchDims = 0; + size_t indexRange = 0; + size_t batchSize = 1; + size_t outerSize = 1; size_t dataLength = 1; - static const size_t GATHER_DICTIONARY = 0; + size_t srcBatchStride = 1; + size_t idxBatchStride = 1; + size_t dstBatchStride = 1; + size_t dataSize = 1; + size_t len = 1; + + static const size_t GATHER_DATA = 0; static const size_t GATHER_INDEXES = 1; static const size_t GATHER_AXIS = 2; std::string errorPrefix_; - - template - void gather(); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather.cpp index 483979f1b7e..32038696be6 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather.cpp @@ -12,29 +12,30 @@ using namespace LayerTestsDefinitions; namespace { const std::vector netPrecisions = { + InferenceEngine::Precision::I64, InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, + InferenceEngine::Precision::BF16, + InferenceEngine::Precision::I8 }; -const std::vector> inputShapes = { - std::vector{10, 20, 30, 40}, -}; - -const std::vector> indices = { - std::vector{0, 3, 2, 1}, -}; -const std::vector> indicesShapes = { +const std::vector> inputShapes_1D = { std::vector{4}, - std::vector{2, 2} }; -const std::vector axes = {0, 1, 2, 3, -1}; +const std::vector> indicesShapes_1D = { + std::vector{1}, + std::vector{3}, +}; +const std::vector> axes_batchdims_1D = { + std::tuple{0, 0} +}; -const auto params = testing::Combine( - testing::ValuesIn(indices), - testing::ValuesIn(indicesShapes), - testing::ValuesIn(axes), - testing::ValuesIn(inputShapes), +const auto gather7Params_1D = testing::Combine( + testing::ValuesIn(inputShapes_1D), + testing::ValuesIn(indicesShapes_1D), + testing::ValuesIn(axes_batchdims_1D), testing::ValuesIn(netPrecisions), testing::Values(InferenceEngine::Precision::UNSPECIFIED), testing::Values(InferenceEngine::Precision::UNSPECIFIED), @@ -43,11 +44,148 @@ const auto params = testing::Combine( testing::Values(CommonTestUtils::DEVICE_CPU) ); -INSTANTIATE_TEST_CASE_P( - smoke_Gather, - GatherLayerTest, - params, - GatherLayerTest::getTestCaseName +INSTANTIATE_TEST_CASE_P(smoke_Gather7_1D, Gather7LayerTest, gather7Params_1D, Gather7LayerTest::getTestCaseName); + +const std::vector> inputShapes_2D = { + std::vector{4, 19}, +}; + +const std::vector> indicesShapes_2D = { + std::vector{4}, + std::vector{4, 2}, +}; + +const std::vector> axes_batchdims_2D = { + std::tuple{0, 0}, + std::tuple{1, 0}, + std::tuple{1, 1}, + std::tuple{-1, -1}, +}; + +const auto gather7Params_2D = testing::Combine( + testing::ValuesIn(inputShapes_2D), + testing::ValuesIn(indicesShapes_2D), + testing::ValuesIn(axes_batchdims_2D), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(CommonTestUtils::DEVICE_CPU) ); +INSTANTIATE_TEST_CASE_P(smoke_Gather7_2D, Gather7LayerTest, gather7Params_2D, Gather7LayerTest::getTestCaseName); + +const std::vector> inputShapes4D = { + std::vector{4, 5, 6, 7}, +}; + +const std::vector> indicesShapes_BD0 = { + std::vector{4}, + std::vector{2, 2}, + std::vector{3, 2, 4}, +}; + +const std::vector> axes_BD0 = { + std::tuple{0, 0}, + std::tuple{1, 0}, + std::tuple{2, 0}, + std::tuple{-1, 0}, +}; + +const auto gather7ParamsSubset_BD0 = testing::Combine( + testing::ValuesIn(inputShapes4D), + testing::ValuesIn(indicesShapes_BD0), + testing::ValuesIn(axes_BD0), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD0, Gather7LayerTest, gather7ParamsSubset_BD0, Gather7LayerTest::getTestCaseName); + +const std::vector> indicesShapes_BD1 = { + std::vector{4, 2}, + std::vector{4, 5, 3}, + std::vector{4, 1, 2, 3}, +}; + +const std::vector> axes_BD1 = { + std::tuple{1, 1}, + std::tuple{2, 1}, + std::tuple{-1, 1}, + std::tuple{-2, 1}, +}; + +const auto gather7ParamsSubset_BD1 = testing::Combine( + testing::ValuesIn(inputShapes4D), + testing::ValuesIn(indicesShapes_BD1), + testing::ValuesIn(axes_BD1), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD1, Gather7LayerTest, gather7ParamsSubset_BD1, Gather7LayerTest::getTestCaseName); + +const std::vector> indicesShapes_BD2 = { + std::vector{4, 5, 4, 3}, + std::vector{4, 5, 3, 2} +}; + +const std::vector> axes_BD2 = { + std::tuple{2, 2}, + std::tuple{3, -2}, + std::tuple{-1, 2}, + std::tuple{-1, -2}, +}; + +const auto gather7ParamsSubset_BD2 = testing::Combine( + testing::ValuesIn(inputShapes4D), + testing::ValuesIn(indicesShapes_BD2), + testing::ValuesIn(axes_BD2), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD2, Gather7LayerTest, gather7ParamsSubset_BD2, Gather7LayerTest::getTestCaseName); + +const std::vector> indicesShapes_NegativeBD = { + std::vector{4, 5, 4}, + std::vector{4, 5, 3} +}; + +const std::vector> axes_NegativeBD = { + std::tuple{0, -3}, + std::tuple{1, -2}, + std::tuple{2, -2}, + std::tuple{-2, -2}, + std::tuple{-1, -1}, + std::tuple{-2, -1}, +}; + +const auto gather7ParamsSubset_NegativeBD = testing::Combine( + testing::ValuesIn(inputShapes4D), + testing::ValuesIn(indicesShapes_NegativeBD), + testing::ValuesIn(axes_NegativeBD), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_Gather7_NegativeBD, Gather7LayerTest, gather7ParamsSubset_NegativeBD, Gather7LayerTest::getTestCaseName); + } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index cee89ff95f6..869f0de1a2d 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -79,6 +79,8 @@ std::vector disabledTestPatterns() { R"(.*smoke_PSROIPoolingAverageLayoutTest.*BF16.*)", R"(.*smoke_PSROIPoolingBilinearLayoutTest.*BF16.*)", R"(.*smoke_ROIAlignLayoutTest.*BF16.*)", + // reference doesn't cover I8, U8 cases. Issue: 55842 + R"(.*Gather7LayerTest.*netPRC=I8.*)", }; // TODO: 54718 Accuracy mismatch #ifdef _WIN32 diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp index cb23b7d784e..1b2265b1be5 100644 --- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp +++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp @@ -51,27 +51,6 @@ private: }; namespace { -/* Test insertion of the Convert layer if there is no suitable reorder. - - Parameter[FP32] Constant[BF16] - \ / - \ / - \ Convert[I32] (Is inserted by the MKLDNNGraph) - \ / - Gather[FP32] - | - | - Output[FP32] -*/ - -TEST_F(AddConvertToReorderTest, smoke_TestAddConvert_CPU) { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - - BuildGraph(ngraph::element::bf16); - Run(); - CheckNodeOfTypeCount(executableNetwork, "Convert", with_cpu_x86_avx512_core() ? 1 : 0); - CheckNodeOfTypeCount(executableNetwork, "Reorder", 0); -} /* Test insertion of the Reorder layer if there is one.