From 87b9a5b6e94378d1e659f08335fb922bae798efc Mon Sep 17 00:00:00 2001 From: Yury Gaydaychuk Date: Tue, 26 Oct 2021 12:55:08 +0300 Subject: [PATCH 1/5] [CPU] Dynamic support for ShapeOf (#7875) --- .../src/mkldnn_plugin/cpu_types.cpp | 3 + .../src/mkldnn_plugin/cpu_types.h | 1 + .../mkldnn_plugin/nodes/mkldnn_shapeof.cpp | 79 ++++++++ .../src/mkldnn_plugin/nodes/mkldnn_shapeof.h | 39 ++++ .../plugin/cpu/single_layer_tests/shapeof.cpp | 175 ++++++++++++++++++ 5 files changed, 297 insertions(+) create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_shapeof.cpp create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_shapeof.h create mode 100644 inference-engine/tests/functional/plugin/cpu/single_layer_tests/shapeof.cpp diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.cpp b/inference-engine/src/mkldnn_plugin/cpu_types.cpp index 4af6683bf78..00b22d90937 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_types.cpp +++ b/inference-engine/src/mkldnn_plugin/cpu_types.cpp @@ -65,6 +65,7 @@ const InferenceEngine::details::caseless_unordered_map type_t { "Reshape", Reshape }, { "Squeeze", Reshape }, { "Unsqueeze", Reshape }, + { "ShapeOf", ShapeOf }, { "Softmax", Softmax }, { "Reorder", Reorder }, { "BatchToSpace", BatchToSpace }, @@ -225,6 +226,8 @@ std::string NameFromType(const Type type) { return "StridedSlice"; case Reshape: return "Reshape"; + case ShapeOf: + return "ShapeOf"; case Tile: return "Tile"; case ROIAlign: diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.h b/inference-engine/src/mkldnn_plugin/cpu_types.h index 95371b6c847..0062c034c5b 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_types.h +++ b/inference-engine/src/mkldnn_plugin/cpu_types.h @@ -33,6 +33,7 @@ enum Type { Eltwise, MatMul, Reshape, + ShapeOf, Tile, ROIAlign, ROIPooling, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shapeof.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shapeof.cpp new file mode 100644 index 00000000000..28097fcaf58 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shapeof.cpp @@ -0,0 +1,79 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_shapeof.h" +#include + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +bool MKLDNNShapeOfNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (!one_of(op->get_type_info(), + ngraph::op::v0::ShapeOf::get_type_info_static(), + ngraph::op::v3::ShapeOf::get_type_info_static())) { + errorMessage = "Node is not an instance of ShapeOf form the operation set v1 or v3."; + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNShapeOfNode::MKLDNNShapeOfNode(const std::shared_ptr& op, const mkldnn::engine& eng, + MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (isSupportedOperation(op, errorMessage)) { + errorPrefix = "ShapeOf layer with name '" + getName() + "' "; + if (op->get_input_partial_shape(0).size() == 0) + IE_THROW() << errorPrefix << "gets unsupported input 0D tensor (scalar)"; + } else { + IE_THROW(NotImplemented) << errorMessage; + } +} + +void MKLDNNShapeOfNode::getSupportedDescriptors() { + if (!descs.empty()) + return; + if (getParentEdges().size() != 1) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size(); + if (getChildEdges().empty()) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); +} + +void MKLDNNShapeOfNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + + Precision precision = getOriginalInputPrecisionAtPort(0); + + const LayoutType dataFormats[4] = { LayoutType::ncsp, LayoutType::nspc, LayoutType::nCsp16c, LayoutType::nCsp8c }; + for (const auto &df : dataFormats) { + addSupportedPrimDesc({{df, precision}}, + {{LayoutType::ncsp, Precision::I32}}, + impl_desc_type::ref); + } +} + +void MKLDNNShapeOfNode::execute(mkldnn::stream strm) { + auto inPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto outPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto inDims = inPtr->getStaticDims(); + size_t dimsCount = inDims.size(); + if (outPtr->getStaticDims().size() != 1 || dimsCount != outPtr->getStaticDims()[0]) + IE_THROW() << errorPrefix << "has inconsistent input shape and output size"; + + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + + for (size_t i = 0; i < dimsCount; i++) { + dst[i] = inDims[i]; + } +} + +bool MKLDNNShapeOfNode::created() const { + return getType() == ShapeOf; +} + +REG_MKLDNN_PRIM_FOR(MKLDNNShapeOfNode, ShapeOf) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shapeof.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shapeof.h new file mode 100644 index 00000000000..2a7eb9560e6 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shapeof.h @@ -0,0 +1,39 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include + +namespace MKLDNNPlugin { + +class MKLDNNShapeOfNode : public MKLDNNNode { +public: + MKLDNNShapeOfNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + + void getSupportedDescriptors() override; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override { + if (inputShapesDefined()) + updateLastInputDims(); + }; + void execute(mkldnn::stream strm) override; + bool created() const override; + bool needPrepareParams() const override {return false;}; + void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); } + std::vector shapeInfer() const override { + return {VectorDims{getParentEdgesAtPort(0)[0]->getMemory().getStaticDims().size()}}; + } + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + std::string errorPrefix; +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/shapeof.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/shapeof.cpp new file mode 100644 index 00000000000..204f50453f3 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/shapeof.cpp @@ -0,0 +1,175 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils/cpu_test_utils.hpp" + +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { +typedef std::tuple< + std::pair, std::vector>> // input shape +> ShapeOfSpecificParams; + +typedef std::tuple< + ShapeOfSpecificParams, + InferenceEngine::Precision, // Net precision + LayerTestsUtils::TargetDevice // Device name +> ShapeOfLayerTestParams; + +typedef std::tuple< + CPULayerTestsDefinitions::ShapeOfLayerTestParams, + CPUSpecificParams> ShapeOfLayerCPUTestParamsSet; + +class ShapeOfLayerCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + CPULayerTestsDefinitions::ShapeOfLayerTestParams basicParamsSet; + CPUSpecificParams cpuParams; + std::tie(basicParamsSet, cpuParams) = obj.param; + std::string td; + Precision netPr; + std::pair, std::vector>> shapes; + + ShapeOfSpecificParams shapeOfPar; + std::tie(shapeOfPar, netPr, td) = basicParamsSet; + std::tie(shapes) = shapeOfPar; + std::ostringstream result; + result << "ShapeOfTest_"; + result << std::to_string(obj.index) << "_"; + result << "Prec=" << netPr.name() << "_"; + result << CPUTestsBase::getTestCaseName(cpuParams) << "_"; + result << "IS="; + for (const auto& shape : shapes.second) { + result << "("; + for (const auto& item : shape) { + result << CommonTestUtils::vec2str(item); + } + result << ")_"; + } + return result.str(); + } +protected: + void SetUp() override { + CPULayerTestsDefinitions::ShapeOfLayerTestParams basicParamsSet; + CPUSpecificParams cpuParams; + std::tie(basicParamsSet, cpuParams) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + + CPULayerTestsDefinitions::ShapeOfSpecificParams shapeOfParams; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::pair, std::vector>> shapes; + std::tie(shapeOfParams, netPrecision, targetDevice) = basicParamsSet; + inPrc = netPrecision; + outPrc = Precision::I32; + std::tie(shapes) = shapeOfParams; + targetStaticShapes = shapes.second; + inputDynamicShapes = shapes.first; + + auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto param = ngraph::builder::makeParams(inType, {targetStaticShapes.front().front()}); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); + auto shapeOf = std::make_shared(paramOuts[0], ngraph::element::i32); + shapeOf->get_rt_info() = getCPUInfo(); + selectedType = std::string("ref_") + inPrc.name(); + + const ngraph::ResultVector results{std::make_shared(shapeOf)}; + function = std::make_shared(results, param, "ShapeOf"); + functionRefs = ngraph::clone_function(*function); + } +}; + +TEST_P(ShapeOfLayerCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + Run(); + CheckPluginRelatedResults(executableNetwork, "ShapeOf"); +} + +namespace { + +/* CPU PARAMS */ +std::vector filterCPUInfoForDevice(const size_t dimsCount = 3) { + std::vector resCPUParams; + if (dimsCount == 5) { + resCPUParams.push_back(CPUSpecificParams{{nCdhw16c}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{nCdhw8c}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{ncdhw}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{ndhwc}, {x}, {}, {}}); + } else if (dimsCount == 4) { + resCPUParams.push_back(CPUSpecificParams{{nChw16c}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{nChw8c}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{nchw}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{nhwc}, {x}, {}, {}}); + } else { + resCPUParams.push_back(CPUSpecificParams{{nCw16c}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{nCw8c}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{abc}, {x}, {}, {}}); + resCPUParams.push_back(CPUSpecificParams{{acb}, {x}, {}, {}}); + } + + return resCPUParams; +} + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::BF16, + InferenceEngine::Precision::I32, + InferenceEngine::Precision::I8 +}; + +std::vector, std::vector>>> inShapesDynamic3d = { + {{ngraph::PartialShape{-1, -1, -1}}, + {{{ 8, 5, 4 }, { 8, 5, 3 }, { 8, 5, 2 }}}}, + {{ngraph::PartialShape{-1, -1, -1}}, + {{{ 1, 2, 4 }, { 1, 2, 3 }, { 1, 2, 2 }}}} +}; +std::vector, std::vector>>> inShapesDynamic4d = { + {{ngraph::PartialShape{-1, -1, -1, -1}}, + {{{ 8, 5, 3, 4 }, { 8, 5, 3, 3 }, { 8, 5, 3, 2 }}}}, + {{ngraph::PartialShape{-1, -1, -1, -1}}, + {{{ 1, 2, 3, 4 }, { 1, 2, 3, 3 }, { 1, 2, 3, 2 }}}} +}; +std::vector, std::vector>>> inShapesDynamic5d = { + {{ngraph::PartialShape{-1, -1, -1, -1, -1}}, + {{{ 8, 5, 3, 2, 4 }, { 8, 5, 3, 2, 3 }, { 8, 5, 3, 2, 2 }}}}, + {{ngraph::PartialShape{-1, -1, -1, -1, -1}}, + {{{ 1, 2, 3, 4, 4 }, { 1, 2, 3, 4, 3 }, { 1, 2, 3, 4, 2 }}}} +}; +const auto params5dDynamic = ::testing::Combine( + ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inShapesDynamic5d)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice(5))); + +const auto params4dDynamic = ::testing::Combine( + ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inShapesDynamic4d)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice(4))); + +const auto params3dDynamic = ::testing::Combine( + ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(inShapesDynamic3d)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice(3))); + +// We don't check static case, because of constant folding +INSTANTIATE_TEST_SUITE_P(smoke_ShapeOf3dDynamicLayoutTest, ShapeOfLayerCPUTest, + params3dDynamic, ShapeOfLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_ShapeOf4dDynamicLayoutTest, ShapeOfLayerCPUTest, + params4dDynamic, ShapeOfLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_ShapeOf5dDynamicLayoutTest, ShapeOfLayerCPUTest, + params5dDynamic, ShapeOfLayerCPUTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions From bd2fdca9de8eb59ea06ae62cc5a9d2789936bf43 Mon Sep 17 00:00:00 2001 From: Bartek Szmelczynski Date: Tue, 26 Oct 2021 12:54:47 +0200 Subject: [PATCH 2/5] remove CT from ref impl (#7959) --- .../reference/scatter_elements_update.hpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp index d907201defe..6449e115d98 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp @@ -29,21 +29,19 @@ void scatter_elem_update(const DataType* input_data, // output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1, // output[i][j][indices[i][j][k]] = updates[i][j][k] if axis = 2 - NGRAPH_SUPPRESS_DEPRECATED_START - CoordinateTransform indices_transform{indices_shape}; - CoordinateTransform data_transform{data_shape}; + CoordinateTransformBasic indices_transform{indices_shape}; + CoordinateTransformBasic data_transform{data_shape}; + const auto indices_strides = row_major_strides(indices_shape); + const auto data_strides = row_major_strides(data_shape); for (const Coordinate& indices_cord : indices_transform) { - const size_t indices_idx = indices_transform.index(indices_cord); + const size_t indices_idx = + std::inner_product(indices_cord.begin(), indices_cord.end(), indices_strides.begin(), 0); Coordinate out_cord(indices_cord); out_cord.at(axis) = indices[indices_idx]; - NGRAPH_CHECK(data_transform.has_source_coordinate(out_cord), - "Provided index coordinates are out of input data bounds: ", - out_cord, - "."); - out_buf[data_transform.index(out_cord)] = updates[indices_idx]; + const auto out_idx = std::inner_product(out_cord.begin(), out_cord.end(), data_strides.begin(), 0); + out_buf[out_idx] = updates[indices_idx]; } - NGRAPH_SUPPRESS_DEPRECATED_END } } // namespace reference } // namespace runtime From 4a96d14adce5a0152d11b04e6aa601060850c530 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Tue, 26 Oct 2021 14:22:29 +0300 Subject: [PATCH 3/5] [nGraph] Reshape: upper_bound propagation fix (#8177) --- ngraph/core/src/op/reshape.cpp | 9 ++++++++- ngraph/test/type_prop/reshape.cpp | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/ngraph/core/src/op/reshape.cpp b/ngraph/core/src/op/reshape.cpp index 257cc6bce4a..bd0f2fa5f5a 100644 --- a/ngraph/core/src/op/reshape.cpp +++ b/ngraph/core/src/op/reshape.cpp @@ -81,7 +81,7 @@ void op::v1::Reshape::validate_and_infer_types() { std::tie(lb, ub) = evaluate_both_bounds(get_input_source_output(1)); if (lb && ub) { const auto lower_bound = std::make_shared(lb)->cast_vector(); - const auto upper_bound = std::make_shared(ub)->cast_vector(); + auto upper_bound = std::make_shared(ub)->cast_vector(); shape_can_be_calculated = true; NGRAPH_CHECK(lower_bound.size() == upper_bound.size()); for (size_t i = 0; i < lower_bound.size(); ++i) { @@ -94,6 +94,13 @@ void op::v1::Reshape::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, minus_one_idx == -1, "More than one dimension has size of -1"); minus_one_idx = static_cast(i); } + + // We must handle i32 fully dynamic dimension in a special way + if (get_input_element_type(1) == element::i32 && + upper_bound[i] == std::numeric_limits::max()) { + upper_bound[i] = std::numeric_limits::max(); + } + reshape_pattern.emplace_back(lower_bound[i], upper_bound[i]); } // For scalar case reshape_patter should be empty but scalar reshape pattern should be empty diff --git a/ngraph/test/type_prop/reshape.cpp b/ngraph/test/type_prop/reshape.cpp index 083dddc5e5e..0d74ac2e4db 100644 --- a/ngraph/test/type_prop/reshape.cpp +++ b/ngraph/test/type_prop/reshape.cpp @@ -561,3 +561,17 @@ TEST(type_prop, reshape_to_scalar_3) { make_shared(param, op::Constant::create(element::i64, {}, std::vector{100}), false), std::exception); } + +TEST(type_prop, dynamic_shape_propagation_with_i32_precision) { + auto param = make_shared(element::f32, PartialShape{1, -1, -1}); + auto shape_of = std::make_shared(param, element::i32); + + auto indices = op::Constant::create(element::i32, {3}, {1, 2, 0}); + auto axis = op::Constant::create(element::i32, {1}, {0}); + auto gather = std::make_shared(shape_of, indices, axis); + + auto reshape = std::make_shared(param, gather, true); + + ASSERT_EQ(reshape->get_element_type(), element::f32); + ASSERT_EQ(reshape->get_output_partial_shape(0), (PartialShape{-1, -1, 1})); +} From a02eafb397b23416078afdd1f3eabb84a0fb77a1 Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Tue, 26 Oct 2021 14:49:36 +0300 Subject: [PATCH 4/5] [CPU] [BF16] Do not enforce BF16 for graph tail (#6114) --- .../src/mkldnn_plugin/mkldnn_graph.cpp | 66 +++++++++++++++---- .../src/mkldnn_plugin/mkldnn_node.cpp | 6 ++ .../src/mkldnn_plugin/mkldnn_node.h | 1 + .../plugin/cpu/bfloat16/gather_multiply.cpp | 2 +- .../plugin/cpu/test_utils/cpu_test_utils.cpp | 3 + 5 files changed, 63 insertions(+), 15 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index e962d362293..61928e183f5 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -1209,21 +1209,59 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo void MKLDNNGraph::EnforceBF16() { // Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision // only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default - if (implication(isQuantized(), config.manualEnforceBF16)) { - for (auto &node : graphNodes) { - if (node->getType() != Input && node->getType() != Output) { - for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) { - auto &parent = node->getParentEdgesAtPort(i)[0]->getParent(); - if (!(parent->getType() == Input && parent->isConstant()) && // exclude nodes after Constant Inputs - !(parent->getType() == Input && node->getType() == Eltwise) && // exclude Eltwise after Input since it supports conversion to BF16 - node->getOriginalInputPrecisionAtPort(i) == Precision::FP32) - node->setOriginalInputPrecisionAtPort(i, Precision::BF16); - } + if (!implication(isQuantized(), config.manualEnforceBF16)) + return; + /* list of node types that must be forced to be executed in BF16 precision + * because of performance gains */ + static const std::unordered_set> significantNodes { // std::hash is necessary old compilers (defect in C++11 standart) + Convolution, // conv nets + FullyConnected, // conv / bert nets + RNNCell, // recurent nets + RNNSeq, // recurent nets + MatMul, // bert nets + ROIPooling, // object detection nets + Interpolate, // super resolution nets + }; - for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) { - if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32) - node->setOriginalOutputPrecisionAtPort(i, Precision::BF16); - } + std::function& skipNodes)> searchForNodesToSkip; + searchForNodesToSkip = [&](const MKLDNNNodePtr& node, std::unordered_set& skipNodes) -> void { + for (size_t i = 0; i < node->getParentEdges().size(); i++) { + const auto& parent = node->getParentEdgeAt(i)->getParent(); + if (significantNodes.count(parent->getType())) // stop at significant nodes + continue; + + const auto res = skipNodes.insert(parent); + if (res.second) // node not visited yet + searchForNodesToSkip(parent, skipNodes); + } + }; + + /* Skip BF16 enforcement for tail of the graph by forming set of nodes to skip. + * Necessary to maintain accuracy. + * Experiments show zero peformance impact on average */ + std::unordered_set nodesToSkip; + // starting from output nodes + for (const auto& entry : outputNodesMap) { + const auto& node = entry.second; + searchForNodesToSkip(node, nodesToSkip); + } + + for (const auto& node : graphNodes) { + if (nodesToSkip.count(node) && !node->enforceBF16evenForGraphTail) + continue; + + if (node->getType() != Input && node->getType() != Output) { + for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) { + const auto &parent = node->getParentEdgesAtPort(i)[0]->getParent(); + if (!(parent->getType() == Input && parent->isConstant()) && // exclude skipNodes after Constant Inputs + !(parent->getType() == Input && node->getType() == Eltwise) && // exclude Eltwise after Input since it supports conversion to BF16 + node->getOriginalInputPrecisionAtPort(i) == Precision::FP32) + node->setOriginalInputPrecisionAtPort(i, Precision::BF16); + } + + for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) { + if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32) + node->setOriginalOutputPrecisionAtPort(i, Precision::BF16); } } } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index d095b02d1f2..31d36aece4a 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -159,6 +159,12 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en } } } + + const auto it = rtInfo.find("enforceBF16evenForGraphTail"); + if (it != rtInfo.end()) { + if (const auto value = std::dynamic_pointer_cast>(it->second)) + enforceBF16evenForGraphTail = value->get(); + } } MKLDNNNode::MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index b7a3622cb77..7e089789720 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -593,6 +593,7 @@ protected: std::vector implPriorities; std::vector inputMemoryFormatsFilter; std::vector outputMemoryFormatsFilter; + bool enforceBF16evenForGraphTail = false; std::string originalLayers; // contains names of the original layers separated by comma diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp index e4283a18931..84c9824f22b 100644 --- a/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp +++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp @@ -100,7 +100,7 @@ protected: // performance counters expectedPrecisions["Matmul_0"] = "BF16"; - expectedPrecisions["Mul_1"] = "BF16"; + expectedPrecisions["Mul_1"] = netPrecision.name(); // tail kept in FP32 precision } }; diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp index f01bd40b96b..4b515dcd144 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp @@ -4,6 +4,7 @@ #include "cpu_test_utils.hpp" #include "utils/rt_info/memory_formats_attribute.hpp" +#include namespace CPUTestUtils { @@ -257,6 +258,8 @@ CPUTestsBase::makeCPUInfo(std::vector inFmts, std::vector>(impls2str(priority))}); } + cpuInfo.insert({"enforceBF16evenForGraphTail", ov::make_variant(true)}); + return cpuInfo; } From ac5b0e881a08cb01b160fec983710e0bbb37d94c Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Tue, 26 Oct 2021 15:24:37 +0300 Subject: [PATCH 5/5] [IE GPU] Skip sporadic failure in CI (#8109) --- .../plugin/gpu/shared_tests_instances/skip_tests_config.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 5f1ff96d654..23a373f94ff 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -88,5 +88,7 @@ std::vector disabledTestPatterns() { R"(.*CanSetInBlobWithDifferentPrecision/netPRC=BIN.*)", R"(.*CanSetOutBlobWithDifferentPrecision/netPRC=(I4|U4).*)", R"(.*CanSetOutBlobWithDifferentPrecision/netPRC=BIN.*)", + // TODO: Issue: 68712 + R"(.*.MatMul.*CompareWithRefs.*IS0=\(1.5\)_IS1=\(1.5\).*transpose_a=0.*transpose_b=1.*CONSTANT.*FP16.*UNSPECIFIED.*UNSPECIFIED.*ANY.*)", }; }