From ba736e2bcd09fc6b855785432e57e4f2ef9cebc3 Mon Sep 17 00:00:00 2001 From: Maxim Andronov Date: Tue, 1 Feb 2022 18:52:57 +0300 Subject: [PATCH] [CPU] Fix dynamic RNNSeq with native order (#9932) --- .../intel_cpu/src/mkldnn_graph_optimizer.cpp | 3 +- .../intel_cpu/src/nodes/mkldnn_rnn.cpp | 5 +- .../convert_to_plugin_specific_node.cpp | 2 +- .../src/add_convert_to_reorder.cpp | 4 +- .../src/align_matmul_input_ranks.cpp | 2 +- .../src/concat_const_inplace.cpp | 4 +- .../src/input_noreorder_eltwise_bf16.cpp | 6 +- .../subgraph_tests/src/seq_native_order.cpp | 299 ++++++++++++++++++ .../plugin/cpu/test_utils/cpu_test_utils.cpp | 16 +- .../plugin/cpu/test_utils/cpu_test_utils.hpp | 3 +- 10 files changed, 328 insertions(+), 16 deletions(-) create mode 100644 src/tests/functional/plugin/cpu/subgraph_tests/src/seq_native_order.cpp diff --git a/src/plugins/intel_cpu/src/mkldnn_graph_optimizer.cpp b/src/plugins/intel_cpu/src/mkldnn_graph_optimizer.cpp index b1bff9c8d9b..2cc8118a76b 100644 --- a/src/plugins/intel_cpu/src/mkldnn_graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/mkldnn_graph_optimizer.cpp @@ -2031,7 +2031,8 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) { if (node->type != RNNSeq) return false; auto rnnNode = std::dynamic_pointer_cast(node); - return rnnNode && !rnnNode->hasNativeOrder() && node->outputShapes[0].getRank() == 4 && node->outputShapes[0].getDims()[1] == 1; + return rnnNode && (!rnnNode->hasNativeOrder() || node->isDynamicNode()) && node->outputShapes[0].getRank() == 4 && + node->outputShapes[0].getDims()[1] == 1; }; for (size_t i = 0; i < graphNodes.size(); i++) { diff --git a/src/plugins/intel_cpu/src/nodes/mkldnn_rnn.cpp b/src/plugins/intel_cpu/src/nodes/mkldnn_rnn.cpp index 50b0d9e1d11..2bd841e4567 100644 --- a/src/plugins/intel_cpu/src/nodes/mkldnn_rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mkldnn_rnn.cpp @@ -479,7 +479,8 @@ void MKLDNNRNN::fillSequenceDesc() { outCandidate.reserve(3); if (nativeOrder) { - outCandidate.emplace_back(outDataDescs[RNNInOutKind::Layer]); + outCandidate.emplace_back(std::make_shared(Shape{{T.minVal, N.minVal, SC}, {T.maxVal, N.maxVal, SC}}, + dataType, memory::format_tag::tnc)); } else if (N.isStatic() && N.maxVal == 1) { // WA to avoid reorder after sequence for some models outCandidate.emplace_back(std::make_shared(shapeNTSC, dataType, memory::format_tag::tnc)); @@ -927,7 +928,7 @@ std::vector MKLDNNRNN::shapeInfer() const { auto originOutputShapes = MKLDNNNode::shapeInfer(); // Graph optimizer makes the same optimization. So this is required to make shapes compatible. - if (!hasNativeOrder() && originOutputShapes[0].size() == 4lu && originOutputShapes[0][1] == 1lu) { + if (getType() == RNNSeq && originOutputShapes[0].size() == 4lu && originOutputShapes[0][1] == 1lu) { originOutputShapes[0].erase(originOutputShapes[0].begin() + 1); } return originOutputShapes; diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/convert_to_plugin_specific_node.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/convert_to_plugin_specific_node.cpp index 482d79b60b7..d83f70931d0 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/convert_to_plugin_specific_node.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/convert_to_plugin_specific_node.cpp @@ -70,7 +70,7 @@ TEST_P(ConvertToPluginSpecificNode, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() Run(); - CheckNodeOfTypeCount(executableNetwork, "Const", constNodeNum); + CheckNumberOfNodesWithType(executableNetwork, "Const", constNodeNum); } namespace { diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/add_convert_to_reorder.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/add_convert_to_reorder.cpp index 259ba4864df..f2a0ab6611a 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/add_convert_to_reorder.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/add_convert_to_reorder.cpp @@ -69,8 +69,8 @@ TEST_F(AddConvertToReorderTest, smoke_TestAddReorder_CPU) { BuildGraph(ngraph::element::i8); Run(); - CheckNodeOfTypeCount(executableNetwork, "Convert", 0); - CheckNodeOfTypeCount(executableNetwork, "Reorder", 1); + CheckNumberOfNodesWithType(executableNetwork, "Convert", 0); + CheckNumberOfNodesWithType(executableNetwork, "Reorder", 1); } } // namespace } // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/align_matmul_input_ranks.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/align_matmul_input_ranks.cpp index 7fa6fc4a86d..bf2e33c4392 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/align_matmul_input_ranks.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/align_matmul_input_ranks.cpp @@ -70,7 +70,7 @@ TEST_P(AlignMatMulInputRanksTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() Run(); - CheckNodeOfTypeCount(executableNetwork, "Reshape", expectedNumOfReshapes); // Squeeze / Unsqueeze turns into Reshape + CheckNumberOfNodesWithType(executableNetwork, "Reshape", expectedNumOfReshapes); // Squeeze / Unsqueeze turns into Reshape CheckPluginRelatedResults(executableNetwork, "MatMul"); } diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/concat_const_inplace.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/concat_const_inplace.cpp index 4145953deb0..3f07b59f8e0 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/concat_const_inplace.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/concat_const_inplace.cpp @@ -73,9 +73,9 @@ namespace { Run(); if (this->GetParam() == Precision::BF16) - CheckNodeOfTypeCount(executableNetwork, "Reorder", 4); + CheckNumberOfNodesWithType(executableNetwork, "Reorder", 4); else - CheckNodeOfTypeCount(executableNetwork, "Reorder", 3); + CheckNumberOfNodesWithType(executableNetwork, "Reorder", 3); } INSTANTIATE_TEST_SUITE_P(smoke_ConcatConstantInPlaceTest_CPU, ConcatConstantInPlaceTest, diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/input_noreorder_eltwise_bf16.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/input_noreorder_eltwise_bf16.cpp index 7a1510fd611..fd1b1d788a1 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/input_noreorder_eltwise_bf16.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/input_noreorder_eltwise_bf16.cpp @@ -55,8 +55,8 @@ TEST_F(InputNoReorderEltwiseBF16, CompareWithRefs) { Run(); - CheckNodeOfTypeCount(executableNetwork, "Reorder", 0); - CheckNodeOfTypeCount(executableNetwork, "Convert", 0); - CheckNodeOfTypeCount(executableNetwork, "Eltwise", 1); + CheckNumberOfNodesWithType(executableNetwork, "Reorder", 0); + CheckNumberOfNodesWithType(executableNetwork, "Convert", 0); + CheckNumberOfNodesWithType(executableNetwork, "Eltwise", 1); } } // namespace CPULayerTestsDefinitions diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/seq_native_order.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/seq_native_order.cpp new file mode 100644 index 00000000000..7eb9ce2ce70 --- /dev/null +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/seq_native_order.cpp @@ -0,0 +1,299 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/builders.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace SubgraphTestsDefinitions { + +enum class SEQ_TYPE { + GRU, + LSTM, + RNN +}; + +using TargetShapeParams = std::tuple; // seq_length + +using InputShapeParams = std::tuple, // bounds for batch_size and seq_length + std::vector>; // target batch_size and seq_length + +using SeqParams = std::tuple, // Activations + float, // Clip + bool, // Linear_before_reset + ov::op::RecurrentSequenceDirection, // Direction + ElementType>; // Network precision + +class SequenceCPUTest : public testing::WithParamInterface, virtual public ov::test::SubgraphBaseTest, public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + SEQ_TYPE seqType; + size_t hidden_size, input_size; + InputShapeParams inShapeParams; + std::vector activations; + float clip; + bool linearBeforeReset; + ov::op::RecurrentSequenceDirection direction; + ElementType netPrecision; + + std::tie(seqType, hidden_size, input_size, inShapeParams, activations, clip, linearBeforeReset, direction, netPrecision) = obj.param; + + std::vector bounds; + std::vector targetShapes; + std::tie(bounds, targetShapes) = inShapeParams; + + std::ostringstream result; + + if (seqType == SEQ_TYPE::GRU) { + result << "GRU_"; + } else if (seqType == SEQ_TYPE::LSTM) { + result << "LSTM_"; + } else if (seqType == SEQ_TYPE::RNN) { + result << "RNN_"; + } else { + IE_THROW() << "Unsupported seq type"; + } + result << "hidden_size=" << hidden_size << "_input_size=" << input_size << "_"; + result << "batch_size_dyn=" << bounds[0] << "_seq_length_dyn=" << bounds[1] << "_"; + for (const auto &ts : targetShapes) { + size_t bs, sl; + std::tie(bs, sl) = ts; + result << "(bs=" << bs << "_sl=" << sl << ")_"; + } + + result << "activations=" << CommonTestUtils::vec2str(activations) << "_"; + result << "clip=" << clip << "_"; + result << "linear=" << linearBeforeReset << "_"; + result << "direction=" << direction << "_"; + result << "netPrec=" << netPrecision; + + return result.str(); + } + +protected: + void SetUp() override { + const size_t batch_size_pos = 0; + const size_t seq_length_pos = 1; + + SEQ_TYPE seqType; + size_t hidden_size, input_size; + InputShapeParams inShapeParams; + std::vector activations; + float clip; + bool linearBeforeReset; + ov::op::RecurrentSequenceDirection direction; + ElementType netPrecision; + + std::tie(seqType, hidden_size, input_size, inShapeParams, activations, clip, linearBeforeReset, direction, netPrecision) = this->GetParam(); + + std::vector bounds; + std::vector targetShapes; + std::tie(bounds, targetShapes) = inShapeParams; + + targetDevice = CommonTestUtils::DEVICE_CPU; + + seqLengthInIdx = (seqType == SEQ_TYPE::LSTM ? 3 : 2); + + const size_t numDirections = direction == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1; + + // dynamic shapes + ov::PartialShape X_shape(std::vector{bounds[seq_length_pos], bounds[batch_size_pos], ov::Dimension(input_size)}); + inputDynamicShapes.push_back(X_shape); + ov::PartialShape second_in_shape(std::vector{bounds[batch_size_pos], ov::Dimension(numDirections), + ov::Dimension(hidden_size)}); + inputDynamicShapes.push_back(second_in_shape); + if (seqType == SEQ_TYPE::LSTM) { + inputDynamicShapes.push_back(second_in_shape); + } + ov::PartialShape seq_len_shape(std::vector{bounds[batch_size_pos]}); + inputDynamicShapes.push_back(seq_len_shape); + + auto hidden_size_weight = hidden_size; + if (seqType == SEQ_TYPE::GRU) { + hidden_size_weight *= 3; + } else if (seqType == SEQ_TYPE::LSTM) { + hidden_size_weight *= 4; + } + + std::vector weightShape; + ov::Shape W_shape(std::vector{numDirections, hidden_size_weight, input_size}); + weightShape.push_back(W_shape); + ov::Shape R_shape(std::vector{numDirections, hidden_size_weight, hidden_size}); + weightShape.push_back(R_shape); + ov::Shape B_shape; + if (seqType == SEQ_TYPE::GRU) { + B_shape = std::vector{numDirections, (linearBeforeReset ? (4 * hidden_size) : (3 * hidden_size))}; + } else { + B_shape = std::vector{numDirections, hidden_size_weight}; + } + weightShape.push_back(B_shape); + + // target shape + for (const auto &ts : targetShapes) { + std::vector currTS; + + size_t bs, sl; + std::tie(bs, sl) = ts; + + currTS.emplace_back(std::vector{sl, bs, input_size}); + currTS.emplace_back(std::vector{bs, numDirections, hidden_size}); + if (seqType == SEQ_TYPE::LSTM) { + currTS.emplace_back(std::vector{bs, numDirections, hidden_size}); + } + currTS.emplace_back(std::vector{bs}); + targetStaticShapes.push_back(currTS); + } + + // funciton creation + std::vector types(inputDynamicShapes.size(), netPrecision); + types.back() = ElementType::i64; + auto params = ngraph::builder::makeDynamicParams(types, inputDynamicShapes); + + std::vector order_ref_before = {1, 0, 2}; + const auto order_before = std::make_shared(ov::element::i64, + ov::Shape({order_ref_before.size()}), + order_ref_before); + const auto transpose_before = std::make_shared(params[0], order_before); + + ov::OutputVector inputs; + inputs.push_back(transpose_before); + for (size_t i = 1; i < params.size(); i++) { + inputs.push_back(params[i]); + } + + std::shared_ptr seq_node; + if (seqType == SEQ_TYPE::GRU) { + seq_node = ngraph::builder::makeGRU(inputs, + weightShape, + hidden_size, + activations, + {}, + {}, + clip, + linearBeforeReset, + true, + direction, + ngraph::helpers::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM); + } else if (seqType == SEQ_TYPE::LSTM) { + seq_node = ngraph::builder::makeLSTM(inputs, + weightShape, + hidden_size, + activations, + {}, + {}, + clip, + true, + direction, + ngraph::helpers::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM); + } else if (seqType == SEQ_TYPE::RNN) { + seq_node = ngraph::builder::makeRNN(inputs, + weightShape, + hidden_size, + activations, + {}, + {}, + clip, + true, + direction, + ngraph::helpers::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM); + } else { + IE_THROW() << "Unsupported seq type"; + } + + std::vector order_ref_after = {2, 1, 0, 3}; + const auto order_after = std::make_shared(ov::element::i64, + ov::Shape({order_ref_after.size()}), + order_ref_after); + const auto transpose_after = std::make_shared(seq_node->output(0), order_after); + + ov::OutputVector results; + results.push_back(transpose_after->output(0)); + + for (size_t i = 1; i < seq_node->get_output_size(); i++) { + results.push_back(seq_node->output(i)); + } + function = std::make_shared(results, params, "SequenceCPUTest"); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + SubgraphBaseTest::generate_inputs(targetInputStaticShapes); + + const size_t batchSize = targetInputStaticShapes[0][1]; + const int64_t maxSeqLen = targetInputStaticShapes[0][0]; + + const auto& funcInputs = function->inputs(); + const auto& seqLenInput = inputs.find(funcInputs[seqLengthInIdx].get_node_shared_ptr()); + if (seqLenInput == inputs.end()) + throw std::runtime_error("Could not find Sequence length input."); + + auto lenData = seqLenInput->second.data::value_type>(); + std::fill(lenData, lenData + batchSize, maxSeqLen); + } + +private: + size_t seqLengthInIdx = 2; +}; + +TEST_P(SequenceCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); + CheckNumberOfNodesWithType(executableNetwork, "RNNSeq", 1); + CheckNumberOfNodesWithType(executableNetwork, "Transpose", 0); +} + +const std::vector nodeType = { + SEQ_TYPE::GRU, SEQ_TYPE::LSTM, SEQ_TYPE::RNN +}; + +const std::vector hiddenSizes = { + 1, 10 +}; + +const std::vector inputSizes = { + 1, 10 +}; + +const std::vector inShapeParams = { + InputShapeParams{std::vector{-1, -1}, std::vector{TargetShapeParams{3, 8}, + TargetShapeParams{10, 2}}}, + InputShapeParams{std::vector{{1, 15}, {1, 15}}, std::vector{TargetShapeParams{3, 8}, + TargetShapeParams{10, 2}}} +}; + +std::vector> activations = { + {"sigmoid", "tanh", "tanh"} +}; + +std::vector clip{0.f}; + +std::vector direction = {ov::op::RecurrentSequenceDirection::FORWARD}; + +std::vector linearBeforeReset = {true, false}; + +std::vector netPrecisions = { ElementType::f32 }; + +INSTANTIATE_TEST_SUITE_P(smoke_SequenceCPUTest, SequenceCPUTest, + ::testing::Combine(::testing::ValuesIn(nodeType), + ::testing::ValuesIn(hiddenSizes), + ::testing::ValuesIn(inputSizes), + ::testing::ValuesIn(inShapeParams), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(linearBeforeReset), + ::testing::ValuesIn(direction), + ::testing::ValuesIn(netPrecisions)), + SequenceCPUTest::getTestCaseName); + +} // namespace SubgraphTestsDefinitions diff --git a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp index 599de747c99..38d6b430200 100644 --- a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp +++ b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp @@ -327,9 +327,7 @@ auto adjustBlockedFormatByIsa = [](std::vector& formats) { return paramsVector; } -void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount) { - InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo(); - auto function = execGraphInfo.getFunction(); +void CheckNumberOfNodesWithTypeImpl(std::shared_ptr function, std::string nodeType, size_t expectedCount) { ASSERT_NE(nullptr, function); size_t actualNodeCount = 0; for (const auto &node : function->get_ops()) { @@ -346,6 +344,18 @@ void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::stri ASSERT_EQ(expectedCount, actualNodeCount) << "Unexpected count of the node type '" << nodeType << "' "; } + +void CheckNumberOfNodesWithType(ov::runtime::CompiledModel &execNet, std::string nodeType, size_t expectedCount) { + std::shared_ptr function = execNet.get_runtime_model(); + CheckNumberOfNodesWithTypeImpl(function, nodeType, expectedCount); +} + +void CheckNumberOfNodesWithType(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount) { + InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo(); + std::shared_ptr function = execGraphInfo.getFunction(); + CheckNumberOfNodesWithTypeImpl(function, nodeType, expectedCount); +} + std::vector filterCPUInfoForDevice(std::vector CPUParams) { std::vector resCPUParams; const int selectedTypeIndex = 3; diff --git a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp index 06cc8882afc..a96d04e8560 100644 --- a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp +++ b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp @@ -169,5 +169,6 @@ const std::map cpuBF16PluginConfig = // utility functions std::vector filterCPUSpecificParams(std::vector& paramsVector); std::vector filterCPUInfoForDevice(std::vector CPUParams); -void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount); +void CheckNumberOfNodesWithType(ov::runtime::CompiledModel &execNet, std::string nodeType, size_t expectedCount); +void CheckNumberOfNodesWithType(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount); } // namespace CPUTestUtils