diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/batch_extraction_configuration.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/batch_extraction_configuration.hpp new file mode 100644 index 00000000000..710fbcfd9e9 --- /dev/null +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/batch_extraction_configuration.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace vpu { + +enum class SliceMode { + Slice, + Unchanged +}; + +class SliceConfiguration { +public: + SliceConfiguration() = default; + SliceConfiguration(std::vector inputs, std::vector outputs); + + bool isSliceSupported() const; + const std::vector& inputs() const; + const std::vector& outputs() const; + +private: + bool m_isSliceSupported = false; + std::vector m_inputs; + std::vector m_outputs; +}; + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.hpp new file mode 100644 index 00000000000..6fbc669714d --- /dev/null +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/pass/graph_rewrite.hpp" + +#include + +namespace vpu { + +class ExtractBatch: public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + + explicit ExtractBatch(std::unordered_set targets); + bool run_on_function(std::shared_ptr function) override; + +private: + std::unordered_set targets; +}; + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_binary_eltwise.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_binary_eltwise.hpp new file mode 100644 index 00000000000..bf9ae6f2d06 --- /dev/null +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_binary_eltwise.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/ngraph.hpp" +#include "batch_extraction_configuration.hpp" + +namespace vpu { + +SliceConfiguration sliceBinaryEltwise(const ngraph::Node& node); + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_convolution.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_convolution.hpp new file mode 100644 index 00000000000..c1feef7db1f --- /dev/null +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_convolution.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/ngraph.hpp" +#include "batch_extraction_configuration.hpp" + +namespace vpu { + +SliceConfiguration sliceConvolution(const ngraph::Node& node); + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_mat_mul.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_mat_mul.hpp new file mode 100644 index 00000000000..032da2bd340 --- /dev/null +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_mat_mul.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/ngraph.hpp" +#include "batch_extraction_configuration.hpp" + +namespace vpu { + +SliceConfiguration sliceMatMul(const ngraph::Node& node); + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_unary_eltwise.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_unary_eltwise.hpp new file mode 100644 index 00000000000..d008aaf7a3f --- /dev/null +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/extract_dynamic_batch/slice_unary_eltwise.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/ngraph.hpp" +#include "batch_extraction_configuration.hpp" + +namespace vpu { + +SliceConfiguration sliceUnaryEltwise(const ngraph::Node& node); + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/utilities.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/utilities.hpp index dd1fd1415be..efb43c0a6b6 100644 --- a/inference-engine/src/vpu/common/include/vpu/ngraph/utilities.hpp +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/utilities.hpp @@ -7,6 +7,11 @@ #include "ngraph/node.hpp" #include "ngraph/type/element_type.hpp" +#include "vpu/utils/error.hpp" + +#include +#include + namespace vpu { std::vector evaluateTargetShape(const ngraph::Output& value); @@ -15,6 +20,60 @@ std::shared_ptr shapeToConstant(const ngraph::element::Type& type, std::shared_ptr gatherShapeElements(const ngraph::Output&, int startIndex, size_t elemCount); -void printTo(std::ostream& stream, const ngraph::NodeTypeInfo& object); +template<> +inline void printTo(std::ostream& stream, const ngraph::NodeTypeInfo& object) { + stream << object.name << " ver. " << object.version; +} + +using Nodes = std::unordered_set; + +template +Nodes dfs(ngraph::Node* root, GetNext&& getNext, Visit&& visit) { + Nodes visited; + std::stack stack{{root}}; + while (!stack.empty()) { + const auto current = stack.top(); + stack.pop(); + + if (!visited.emplace(current).second) { + continue; + } + + if (!visit(current)) { + continue; + } + + for (const auto& next : getNext(current)) { + stack.push(next); + } + } + return visited; +} + +template +void bfs(ngraph::Node* root, NumEntries&& getNumEntries, Visit&& visit, MoveForward&& moveForward) { + std::deque deque{root}; + std::unordered_map visits; + while (!deque.empty()) { + const auto current = deque.front(); + deque.pop_front(); + + const auto numEntries = current == root ? 1 : getNumEntries(current); + + const auto visitsCount = ++visits[current]; + VPU_THROW_UNLESS(visitsCount <= numEntries, "Encountered loop at {}", current); + + if (visitsCount < numEntries) { + VPU_THROW_UNLESS(!deque.empty(), "Node {} should be visited only after all predecessors, but it is not available through all of them", current); + continue; + } + + if (!visit(current)) { + continue; + } + + moveForward(deque, current); + } +} } // namespace vpu diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/batch_extraction_configuration.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/batch_extraction_configuration.cpp new file mode 100644 index 00000000000..0a21d0ba95f --- /dev/null +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/batch_extraction_configuration.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "vpu/utils/error.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/batch_extraction_configuration.hpp" + +namespace vpu { + +SliceConfiguration::SliceConfiguration(std::vector inputs, std::vector outputs) + : m_isSliceSupported(true) + , m_inputs(std::move(inputs)) + , m_outputs(std::move(outputs)) {} + +bool SliceConfiguration::isSliceSupported() const { + return m_isSliceSupported; +} + +const std::vector& SliceConfiguration::inputs() const { + VPU_THROW_UNLESS(m_isSliceSupported, "Encountered an attempt to access inputs slice configuration for a case when slice is unsupported"); + return m_inputs; +} + +const std::vector& SliceConfiguration::outputs() const { + VPU_THROW_UNLESS(m_isSliceSupported, "Encountered an attempt to access outputs slice configuration for a case when slice is unsupported"); + return m_outputs; +} + +} // namespace vpu + diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.cpp new file mode 100644 index 00000000000..d55fe5629d4 --- /dev/null +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.cpp @@ -0,0 +1,487 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/opsets/opset5.hpp" + +#include "vpu/utils/optional.hpp" +#include "vpu/ngraph/utilities.hpp" + +#include "vpu/ngraph/transformations/extract_dynamic_batch/extract_dynamic_batch.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/slice_mat_mul.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/slice_convolution.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/slice_binary_eltwise.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/slice_unary_eltwise.hpp" + +#include + +namespace vpu { + +NGRAPH_RTTI_DEFINITION(vpu::ExtractBatch, "ExtractBatch", 0); + +ExtractBatch::ExtractBatch(std::unordered_set targets) : targets(std::move(targets)) {} + +namespace { + +class Slicers { +public: + static bool isSupported(const ngraph::Node& node) { + return getSlicers().count(node.get_type_info()); + } + + static SliceConfiguration slice(const ngraph::Node& node) { + const auto& slicers = getSlicers(); + const auto& type = node.get_type_info(); + return slicers.count(type) ? slicers.at(type)(node) : SliceConfiguration{}; + } + +private: + using Functor = std::function; + static const std::unordered_map& getSlicers() { + static const std::unordered_map& slicers = { + {ngraph::opset5::MatMul::type_info, sliceMatMul}, + {ngraph::opset5::Convolution::type_info, sliceConvolution}, + {ngraph::opset5::GroupConvolution::type_info, sliceConvolution}, + {ngraph::opset5::ConvolutionBackpropData::type_info, sliceConvolution}, + + {ngraph::opset5::Add::type_info, sliceBinaryEltwise}, + {ngraph::opset5::Multiply::type_info, sliceBinaryEltwise}, + {ngraph::opset5::Minimum::type_info, sliceBinaryEltwise}, + {ngraph::opset5::Maximum::type_info, sliceBinaryEltwise}, + + {ngraph::opset5::Relu::type_info, sliceUnaryEltwise}, + }; + return slicers; + } +}; + +struct SubGraph { + Nodes leaves; + Nodes all; +}; + +template +Nodes getNodes(ngraph::Node* from, ngraph::Node* to, Functor&& getNext) { + auto visited = dfs(from, std::forward(getNext), [to](ngraph::Node* node) { return node != to; }); + visited.erase(from); + return visited; +} + +template +SubGraph getLeaves(ngraph::Node* source, const Nodes& blackList, Functor&& getNext) { + const auto isOk = [&blackList](ngraph::Node* node) { return Slicers::slice(*node).isSliceSupported() && !blackList.count(node); }; + Nodes leaves; + auto visited = dfs(source, std::forward(getNext), [isOk, getNext, &leaves](ngraph::Node* node) { + const auto& nextNodes = getNext(node); + const auto exit = nextNodes.empty() || std::any_of(nextNodes.cbegin(), nextNodes.cend(), [isOk](ngraph::Node* node) { return !isOk(node); }); + if (exit) { + leaves.emplace(node); + return false; + } + return true; + }); + visited.erase(source); + return {leaves, visited}; +} + +template +void getLeavesLCA(ngraph::Node* source, ngraph::Node*& lca, Nodes& nodes, const Nodes& leaves, const Nodes& allBackward, + NextForward&& getNextForward, NextBackward&& getNextBackward) { + std::unordered_map depths{{ source, 0}}, leavesDepths; + const auto less = [&depths](ngraph::Node* lhs, ngraph::Node* rhs) { + VPU_THROW_UNLESS(depths.count(lhs), "There is no {} in all depth", lhs); + VPU_THROW_UNLESS(depths.count(rhs), "There is no {} in all depth", rhs); + return depths.at(lhs) < depths.at(rhs); + }; + + const auto equal = [&depths](ngraph::Node* lhs, ngraph::Node* rhs) { + VPU_THROW_UNLESS(depths.count(lhs), "There is no {} in all depth", lhs); + VPU_THROW_UNLESS(depths.count(rhs), "There is no {} in all depth", rhs); + return depths.at(lhs) == depths.at(rhs); + }; + + Nodes visited; + if (leaves.size() == 1 && leaves.count(source)) { + lca = source; + nodes = visited; + return; + } + + Nodes prevNodes; + bfs( + source, + [getNextBackward, &allBackward, &prevNodes](const ngraph::Node* current) { + prevNodes = getNextBackward(current); + for (auto it = prevNodes.begin(); it != prevNodes.end();) { + it = allBackward.count(*it) ? prevNodes.erase(it) : std::next(it); + } + return prevNodes.size(); + }, + [&](ngraph::Node* current) { + if (current == source) { + return true; + } + const auto depth = depths.at(*std::max_element(prevNodes.cbegin(), prevNodes.cend(), less)) + 1; + depths[current] = depth; + + if (leaves.count(current)) { + leavesDepths[current] = depth; + return false; + } + return true; + }, + [getNextForward](std::deque& deque, const ngraph::Node* current) { + const auto& nextNodes = getNextForward(current); + std::copy(nextNodes.cbegin(), nextNodes.cend(), std::back_inserter(deque)); + }); + + VPU_THROW_UNLESS(leavesDepths.size() == leaves.size(), "leavesDepths and leaves have different sizes: {} vs {}", leavesDepths.size(), leaves.size()); + + auto lcaCandidates = leaves; + const auto minDepthArg = std::min_element(lcaCandidates.cbegin(), lcaCandidates.cend(), less); + while (!std::all_of(lcaCandidates.cbegin(), lcaCandidates.cend(), [equal, minDepthArg](ngraph::Node* end) { return equal(end, *minDepthArg); })) { + std::unordered_map updates; + for (const auto& end : lcaCandidates) { + auto current = end; + while (!equal(current, *minDepthArg)) { + const auto& nextNodes = getNextBackward(current); + current = *std::max_element(nextNodes.cbegin(), nextNodes.cend(), less); + } + + updates[end] = current; + } + + for (const auto& update : updates) { + lcaCandidates.erase(update.first); + lcaCandidates.emplace(update.second); + } + } + + while (lcaCandidates.size() != 1) { + std::unordered_map updates; + for (const auto& end : lcaCandidates) { + const auto& nextNodes = getNextBackward(end); + const auto next = *std::max_element(nextNodes.cbegin(), nextNodes.cend(), less); + + updates[end] = next; + } + + for (const auto& update : updates) { + lcaCandidates.erase(update.first); + lcaCandidates.emplace(update.second); + } + } + + lca = *lcaCandidates.begin(); + nodes = getNodes(source, lca, getNextForward); +} + +template +std::shared_ptr makeLoop(ngraph::Node* root, ngraph::Node* leaf, Functor&& getNextTop) { + ngraph::ParameterVector parameters; + ngraph::ResultVector results; + std::unordered_map, ngraph::Output> slicedInputs, invariantInputs; + std::set> concatenatedResults; + std::set> iterValueResults; + + std::map, ngraph::Output> nodes; + const auto getInput = [&nodes, ¶meters, &slicedInputs, &invariantInputs](const ngraph::Output& currentInput) { + if (nodes.count(currentInput)) { + return nodes.at(currentInput); + } else { + const auto& currentInputNode = currentInput.get_node(); + VPU_THROW_UNLESS(ngraph::op::is_constant(currentInputNode) || ngraph::op::is_parameter(currentInputNode), + "Encountered intermediate node {} which is not cloned yet", currentInputNode); + + // assume if constant has several consumers all of them requires either Slice or Unchanged + const auto& targetInputs = currentInput.get_target_inputs(); + const auto adjacentDiff = std::adjacent_find(targetInputs.cbegin(), targetInputs.cend(), + [](const ngraph::Input& lhs, const ngraph::Input& rhs) { + const auto& lhsNode = lhs.get_node(); + const auto& rhsNode = rhs.get_node(); + + const auto& lhsSplitConfig = Slicers::slice(*lhsNode); + const auto& rhsSplitConfig = Slicers::slice(*rhsNode); + if (!lhsSplitConfig.isSliceSupported() || !rhsSplitConfig.isSliceSupported()) { + return true; + } + + const auto& lhsInputSplitConfig = lhsSplitConfig.inputs(); + const auto& rhsInputSplitConfig = rhsSplitConfig.inputs(); + return lhsInputSplitConfig[lhs.get_index()] != rhsInputSplitConfig[rhs.get_index()]; + }); + + VPU_THROW_UNLESS(adjacentDiff == targetInputs.cend(), + "Encountered constant {} that has 2 consumers ({} and {}) with different split configurations", + currentInput, adjacentDiff->get_node(), std::next(adjacentDiff)->get_node()); + + const auto& targetInput = targetInputs.begin(); + const auto& node = targetInput->get_node(); + const auto& index = targetInput->get_index(); + const auto splitInputConfiguration = Slicers::slice(*node).inputs(); + + if (splitInputConfiguration[index] == SliceMode::Slice) { + auto partialShape = currentInput.get_partial_shape(); + partialShape[0] = 1; + auto parameter = std::make_shared(currentInput.get_element_type(), partialShape); + parameters.emplace_back(parameter); + slicedInputs[parameter] = currentInput; + + nodes[currentInput] = parameter; + return static_cast>(parameter); + } else { + auto argument = currentInput; + if (ngraph::op::is_parameter(currentInputNode)) { + auto parameter = std::make_shared(currentInput.get_element_type(), currentInput.get_partial_shape()); + parameters.emplace_back(parameter); + invariantInputs[parameter] = currentInput; + + argument = parameter; + } + + nodes[currentInput] = argument; + return argument; + } + } + }; + const auto clone = [getInput](const ngraph::Node* source) { + std::vector> newInputs; + newInputs.reserve(source->get_input_size()); + const auto& currentInputs = source->input_values(); + std::transform(currentInputs.cbegin(), currentInputs.cend(), std::back_inserter(newInputs), getInput); + + auto cloned = source->copy_with_new_inputs(newInputs); + cloned->set_friendly_name(source->get_friendly_name()); + VPU_THROW_UNLESS(cloned->get_output_size() == source->get_output_size(), + "Encountered mismatch in output count between original node {} and copy without batch {}", source, cloned); + return cloned; + }; + + const auto splitInputConfiguration = Slicers::slice(*root).inputs(); + for (std::size_t i = 0; i < root->get_input_size(); ++i) { + const auto& input = root->input_value(i); + ngraph::Output argument; + if (splitInputConfiguration[i] == SliceMode::Slice) { + auto partialShape = input.get_partial_shape(); + partialShape[0] = 1; + + auto parameter = std::make_shared(input.get_element_type(), partialShape); + parameters.emplace_back(parameter); + slicedInputs[parameter] = input; + + argument = parameter; + } else if (!ngraph::op::is_constant(input.get_node())) { + auto parameter = std::make_shared(input.get_element_type(), input.get_partial_shape()); + parameters.emplace_back(parameter); + invariantInputs[parameter] = input; + + argument = parameter; + } else { + argument = input; + } + + nodes[input] = argument; + } + + std::shared_ptr bodyNode; + bfs( + root, + [getNextTop](const ngraph::Node* current) { + return getNextTop(current).size(); + }, + [leaf, clone, &bodyNode](const ngraph::Node* current) { + bodyNode = clone(current); + return current != leaf; + }, + [&](std::deque& deque, ngraph::Node* current) { + for (std::size_t i = 0; i < current->get_output_size(); ++i) { + const auto& currentOutput = current->output(i); + const auto& bodyOutput = bodyNode->output(i); + const auto& currentOutputNode = currentOutput.get_node(); + if (ngraph::op::is_output(currentOutputNode)) { + const auto splitOutputConfiguration = Slicers::slice(*current).outputs(); + auto& outputCategory = splitOutputConfiguration[i] == SliceMode::Slice ? concatenatedResults : iterValueResults; + outputCategory.emplace(bodyOutput); + results.emplace_back(std::make_shared(bodyOutput)); + } else { + nodes[currentOutput] = bodyOutput; + const auto& consumers = current->get_output_target_inputs(i); + std::transform(consumers.cbegin(), consumers.cend(), std::back_inserter(deque), + [](const ngraph::Input& consumer) { return consumer.get_node(); }); + } + } + }); + + const auto splitOutputConfiguration = Slicers::slice(*leaf).outputs(); + for (std::size_t i = 0; i < bodyNode->get_output_size(); ++i) { + const auto& output = bodyNode->output(i); + auto result = std::make_shared(output); + auto& outputCategory = splitOutputConfiguration[i] == SliceMode::Slice ? concatenatedResults : iterValueResults; + outputCategory.emplace(output); + results.emplace_back(result); + } + + VPU_THROW_UNLESS(!slicedInputs.empty(), "Failed to find sliced inputs for loop in extract batch"); + const auto& slicedInput = slicedInputs.begin()->second; + const auto shapeOf = std::make_shared(slicedInput); + + // constant's shape has to be scalar (not empty) since if this constant has empty shape, so Gather will + // have empty shape as well (Gather produces scalar). When this Gather will become ScatterElementsUpdate + // argument ScatterElementsUpdate shape inference function will fail, since it requires indices and updates + // to have exactly the same shape (indices rank must be the same as rank of data input which is 1D vector, + // so its rank = 1 != 0) + const auto constant = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, 0); + + // TODO: check all other sliced inputs have the same batch? + const auto batchSize = std::make_shared(shapeOf, constant, constant); + + const auto executionCondition = std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto loop = std::make_shared(batchSize, executionCondition); + + const auto iterationCondition = std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + results.emplace_back(std::make_shared(iterationCondition)); + auto body = std::make_shared(results, parameters, "body"); + loop->set_function(body); + for (const auto& entry : slicedInputs) { + loop->set_sliced_input(entry.first, entry.second, 0, 1, 1, -1, 0); + } + + for (const auto& entry : invariantInputs) { + loop->set_invariant_input(entry.first, entry.second); + } + + for (const auto& entry : iterValueResults) { + loop->get_iter_value(entry, -1); + } + + for (const auto& entry : concatenatedResults) { + loop->get_concatenated_slices(entry, 0, 1, 1, -1, 0); + } + + loop->set_special_body_ports({-1, static_cast(results.size()) - 1}); + loop->validate_and_infer_types(); + return loop; +} + +template +bool updateExternals(const ngraph::Node* source, const Nodes& allForward, const Nodes& allBackward, Nodes& externals, Functor&& getNextBackward) { + bool updated = false; + for (const auto& node : allForward) { + const auto& nextNodes = getNextBackward(node); + const auto hasExternalConnection = std::any_of(nextNodes.cbegin(), nextNodes.cend(), [source, &allForward, &allBackward](ngraph::Node* next) { + return !allForward.count(next) && !allBackward.count(next) && next != source; + }); + if (hasExternalConnection) { + externals.emplace(node); + updated = true; + } + } + return updated; +} + +} // namespace + +bool ExtractBatch::run_on_function(std::shared_ptr functionPointer) { + auto& function = *functionPointer; + bool changed = false; + + Nodes sources; + for (const auto& operation : function.get_ordered_ops()) { + if (targets.count(operation->get_type_info())) { + sources.emplace(operation.get()); + } + } + + auto getNextTop = [](const ngraph::Node* node) { + Nodes nextNodes; + for (std::size_t i = 0; i < node->get_input_size(); ++i) { + const auto next = node->get_input_source_output(i).get_node(); + if (ngraph::op::is_constant(next) || ngraph::op::is_parameter(next)) { + continue; + } + nextNodes.emplace(next); + } + return nextNodes; + }; + + auto getNextBottom = [](const ngraph::Node* node) { + Nodes nextNodes; + for (std::size_t i = 0; i < node->get_output_size(); ++i) { + const auto consumers = node->get_output_target_inputs(i); + for (const auto consumer : consumers) { + const auto next = consumer.get_node(); + if (ngraph::op::is_output(next)) { + continue; + } + nextNodes.insert(next); + } + } + return nextNodes; + }; + + for (auto currentSource = sources.begin(); currentSource != sources.end(); currentSource = sources.erase(currentSource)) { + const auto& source = *currentSource; + + VPU_THROW_UNLESS(Slicers::isSupported(*source), + "{} was requested as target operation type for batch extraction, but functor for this type is not provided.", source->get_type_info()); + + if (!Slicers::slice(*source).isSliceSupported()) { + continue; + } + + Nodes topExternals, bottomExternals; + + auto topSubGraph = getLeaves(source, topExternals, getNextTop); + auto bottomSubGraph = getLeaves(source, bottomExternals, getNextBottom); + + auto hasNewTopExternals = updateExternals(source, topSubGraph.all, bottomSubGraph.all, topExternals, getNextBottom); + if (hasNewTopExternals) { + topSubGraph = getLeaves(source, topExternals, getNextTop); + } + + bool hasNewBottomExternals = updateExternals(source, bottomSubGraph.all, topSubGraph.all, bottomExternals, getNextTop); + if (hasNewBottomExternals) { + bottomSubGraph = getLeaves(source, bottomExternals, getNextBottom); + } + + ngraph::Node* top = nullptr; + ngraph::Node* bottom = nullptr; + do { + getLeavesLCA(source, top, topSubGraph.all, topSubGraph.leaves, bottomSubGraph.all, getNextTop, getNextBottom); + getLeavesLCA(source, bottom, bottomSubGraph.all, bottomSubGraph.leaves, topSubGraph.all, getNextBottom, getNextTop); + + hasNewTopExternals = updateExternals(source, topSubGraph.all, bottomSubGraph.all, topExternals, getNextBottom); + if (hasNewTopExternals) { + topSubGraph = getLeaves(source, topExternals, getNextTop); + } + + hasNewBottomExternals = updateExternals(source, bottomSubGraph.all, topSubGraph.all, bottomExternals, getNextTop); + if (hasNewBottomExternals) { + bottomSubGraph = getLeaves(source, bottomExternals, getNextBottom); + } + } while (hasNewTopExternals || hasNewBottomExternals); + + for (const auto& node : topSubGraph.all) { + if (sources.count(node)) { + sources.erase(node); + } + } + + for (const auto& node : bottomSubGraph.all) { + if (sources.count(node)) { + sources.erase(node); + } + } + + auto loop = makeLoop(top, bottom, getNextTop); + auto bottomNode = bottom->shared_from_this(); + loop->set_friendly_name(bottomNode->get_friendly_name()); + ngraph::replace_node(bottomNode, loop); + function.validate_nodes_and_infer_types(); + changed = true; + } + + return changed; +} + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_binary_eltwise.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_binary_eltwise.cpp new file mode 100644 index 00000000000..a5e87ded47b --- /dev/null +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_binary_eltwise.cpp @@ -0,0 +1,79 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "vpu/utils/error.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/slice_binary_eltwise.hpp" + +namespace vpu { + +SliceConfiguration sliceBinaryEltwise(const ngraph::Node& node) { + const auto& eltwise = dynamic_cast(node); + VPU_THROW_UNLESS(eltwise.get_input_size() == 2, "Expecting operation {} to have {} inputs, got {}", node, 2, eltwise.get_input_size()); + VPU_THROW_UNLESS(eltwise.get_output_size() == 1, "Expecting operation {} to have {} outputs, got {}", node, 1, eltwise.get_output_size()); + + const auto& lhs = eltwise.input_value(0); + const auto& rhs = eltwise.input_value(1); + const auto& out = eltwise.output(0); + + const auto& lhsPartialShape = lhs.get_partial_shape(); + const auto& rhsPartialShape = rhs.get_partial_shape(); + const auto& outPartialShape = out.get_partial_shape(); + + const auto& broadcastSpec = eltwise.get_autob(); + auto inputPartialShape = lhsPartialShape; + if (broadcastSpec == ngraph::op::AutoBroadcastSpec::NONE) { + ngraph::PartialShape::merge_into(inputPartialShape, rhsPartialShape); + } else { + ngraph::PartialShape::broadcast_merge_into(inputPartialShape, rhsPartialShape, broadcastSpec); + } + + const auto& inputRank = inputPartialShape.rank(); + const auto& lhsRank = lhsPartialShape.rank(); + const auto& rhsRank = rhsPartialShape.rank(); + const auto& outRank = outPartialShape.rank(); + + VPU_THROW_UNLESS(inputRank == outRank && inputRank.is_static(), + "Expecting operation {} to have the same static rank for inputs and output, got merged inputs rank = {}, output rank = {}", + node, inputRank, outRank); + + const auto& inputRankLength = inputRank.get_length(); + const auto& lhsRankLength = lhsRank.get_length(); + const auto& rhsRankLength = rhsRank.get_length(); + const auto& outRankLength = outRank.get_length(); + + const auto& inputsBatch = inputRankLength > 0 ? inputPartialShape[0] : 0; + const auto& outBatch = outRankLength > 0 ? outPartialShape[0] : 0; + VPU_THROW_UNLESS(inputsBatch == outBatch, + "Expecting operation {} to have the same batch on both inputs and output, got input batch = {}, output batch = {}", + node, inputsBatch, outBatch); + + + if (inputsBatch.is_static() && inputsBatch.get_length() == 1) { + return {}; + } + + const auto& maxRankInputPartialShape = lhsRankLength == inputRankLength ? lhsPartialShape : rhsPartialShape; + const auto& minRankInputPartialShape = lhsRankLength == inputRankLength ? rhsPartialShape : lhsPartialShape; + + const auto checkPartialShape = [](const ngraph::PartialShape& partialShape) { + const auto dynamicDimensionsCount = std::count_if(partialShape.cbegin(), partialShape.cend(), + [](const ngraph::Dimension& dimension) { return dimension.is_dynamic(); }); + return dynamicDimensionsCount == 0 || (dynamicDimensionsCount == 1 && partialShape[0].is_dynamic()); + }; + + const auto isMaxRankInputOk = checkPartialShape(maxRankInputPartialShape); + const auto isMinRankInputOk = minRankInputPartialShape.rank().get_length() == maxRankInputPartialShape.rank().get_length() + ? checkPartialShape(minRankInputPartialShape) + : minRankInputPartialShape.is_static(); + if (!isMaxRankInputOk || !isMinRankInputOk) { + return {}; + } + + const auto lhsSplitMode = lhsRankLength < inputRankLength || lhsPartialShape[0] != inputPartialShape[0] ? SliceMode::Unchanged : SliceMode::Slice; + const auto rhsSplitMode = rhsRankLength < inputRankLength || rhsPartialShape[0] != inputPartialShape[0] ? SliceMode::Unchanged : SliceMode::Slice; + + return {{lhsSplitMode, rhsSplitMode}, {SliceMode::Slice}}; +} + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_convolution.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_convolution.cpp new file mode 100644 index 00000000000..a2ab099f2cf --- /dev/null +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_convolution.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "vpu/utils/error.hpp" +#include "ngraph/opsets/opset5.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/slice_convolution.hpp" + +namespace vpu { + +SliceConfiguration sliceConvolution(const ngraph::Node& node) { + VPU_THROW_UNLESS(node.get_input_size() == 2, "Expecting operation {} to have {} inputs, got {}", node, 2, node.get_input_size()); + VPU_THROW_UNLESS(node.get_output_size() == 1, "Expecting operation {} to have {} outputs, got {}", node, 1, node.get_output_size()); + VPU_THROW_UNLESS(ngraph::op::is_constant(node.input_value(1).get_node_shared_ptr()), "Expecting operation {} to have constant kernel, got {}", + node, node.input_value(1)); + + const auto& data = node.input_value(0); + const auto& dataPartialShape = data.get_partial_shape(); + const auto& dataRank = dataPartialShape.rank(); + VPU_THROW_UNLESS(dataRank.is_static(), "Expecting operation {} to have static rank for input {}, got {}", node, data, dataPartialShape); + const auto& dataRankLength = dataRank.get_length(); + VPU_THROW_UNLESS(dataRankLength >= 3 && dataRankLength <= 5, "Expecting operation {} to have rank of input {} in [{}, {}], got {}", + node, data, 3, 5, dataRankLength); + + const auto& batch = dataPartialShape[0]; + if (batch.is_static()) { + return {}; + } + + if (std::count_if(dataPartialShape.cbegin(), dataPartialShape.cend(), [](const ngraph::Dimension& dimension) { return dimension.is_dynamic(); }) > 1) { + return {}; + } + + return {{SliceMode::Slice, SliceMode::Unchanged}, {SliceMode::Slice}}; +} + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_mat_mul.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_mat_mul.cpp new file mode 100644 index 00000000000..0ef91da9460 --- /dev/null +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_mat_mul.cpp @@ -0,0 +1,74 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "vpu/utils/error.hpp" +#include "ngraph/opsets/opset5.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/slice_mat_mul.hpp" + +namespace vpu { + +SliceConfiguration sliceMatMul(const ngraph::Node& node) { + VPU_THROW_UNLESS(node.get_input_size() == 2, "Expecting operation {} to have {} inputs, got {}", node, 2, node.get_input_size()); + VPU_THROW_UNLESS(node.get_output_size() == 1, "Expecting operation {} to have {} outputs, got {}", node, 1, node.get_output_size()); + + // target networks have MatMul only with constant second input + // there are tests on dynamic MatMul with non-constant second input + // if try to process MatMul with non-constant second input it will + // affect tests and they will fail, since Loop support is not ready yet + if (!ngraph::op::is_constant(node.input_value(1).get_node_shared_ptr())) { + return {}; + } + + const auto& lhs = node.input_value(0); + const auto& lhsPartialShape = lhs.get_partial_shape(); + const auto& lhsRank = lhsPartialShape.rank(); + VPU_THROW_UNLESS(lhsRank.is_static(), "Expecting operation {} to have static rank for input {}, got {}", node, lhs, lhsPartialShape); + + const auto& rhs = node.input_value(0); + const auto& rhsPartialShape = rhs.get_partial_shape(); + const auto& rhsRank = rhsPartialShape.rank(); + VPU_THROW_UNLESS(rhsRank.is_static(), "Expecting operation {} to have static rank for input {}, got {}", node, rhs, rhsPartialShape); + + const auto& lhsRankLength = lhsRank.get_length(); + const auto& rhsRankLength = rhsRank.get_length(); + + const auto maxRankLength = std::max(lhsRankLength, rhsRankLength); + if (maxRankLength < 3) { + return {}; + } + + const auto isBatchStatic = [](const ngraph::PartialShape& shape) { + const auto& rank = shape.rank(); + if (rank.is_dynamic()) { + return false; + } + const auto rankLength = rank.get_length(); + if (rankLength < 3) { + return true; + } + return std::all_of(shape.rbegin() + 2, shape.rend(), [](const ngraph::Dimension& dimension) { return dimension.is_static(); }); + }; + + if (maxRankLength > 3) { + VPU_THROW_UNLESS(isBatchStatic(lhsPartialShape), "Encountered multi-dimensional dynamic batch for operation {}, but it's unsupported", node); + VPU_THROW_UNLESS(isBatchStatic(rhsPartialShape), "Encountered multi-dimensional dynamic batch for operation {}, but it's unsupported", node); + return {}; + } + + if (isBatchStatic(lhsPartialShape) && isBatchStatic(rhsPartialShape)) { + return {}; + } + + if (std::count_if(lhsPartialShape.cbegin(), lhsPartialShape.cend(), [](const ngraph::Dimension& dimension) { return dimension.is_dynamic(); }) > 1 || + std::count_if(rhsPartialShape.cbegin(), rhsPartialShape.cend(), [](const ngraph::Dimension& dimension) { return dimension.is_dynamic(); }) > 1) { + return {}; + } + + const auto& lhsSliceMode = lhsRankLength < 3 ? SliceMode::Unchanged : SliceMode::Slice; + const auto& rhsSliceMode = rhsRankLength < 3 ? SliceMode::Unchanged : SliceMode::Slice; + + return {{lhsSliceMode, rhsSliceMode}, {SliceMode::Slice}}; +} + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_unary_eltwise.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_unary_eltwise.cpp new file mode 100644 index 00000000000..9b1e881a53f --- /dev/null +++ b/inference-engine/src/vpu/common/src/ngraph/transformations/extract_dynamic_batch/slice_unary_eltwise.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include "vpu/utils/error.hpp" +#include "vpu/ngraph/transformations/extract_dynamic_batch/slice_unary_eltwise.hpp" + +namespace vpu { + +SliceConfiguration sliceUnaryEltwise(const ngraph::Node& node) { + VPU_THROW_UNLESS(node.get_input_size() == 1, "Expecting unary eltwise operation {} to have {} inputs, got {}", node, 1, node.get_input_size()); + VPU_THROW_UNLESS(node.get_output_size() == 1, "Expecting unary eltwise operation {} to have {} outputs, got {}", node, 1, node.get_output_size()); + + const auto& inp = node.input_value(0); + const auto& out = node.output(0); + + const auto& inpPartialShape = inp.get_partial_shape(); + const auto& outPartialShape = out.get_partial_shape(); + + const auto& inpRank = inpPartialShape.rank(); + const auto& outRank = outPartialShape.rank(); + + VPU_THROW_UNLESS(inpRank == outRank, + "Expecting unary eltwise operation {} to have the same static rank for input and output, got input rank = {}, output rank = {}", + node, inpRank, outRank); + + const auto& inpRankLength = inpRank.get_length(); + const auto& outRankLength = outRank.get_length(); + + const auto& inpBatch = inpRankLength > 0 ? inpPartialShape[0] : 0; + const auto& outBatch = outRankLength > 0 ? outPartialShape[0] : 0; + VPU_THROW_UNLESS(inpBatch == outBatch, + "Expecting unary eltwise operation {} to have the same batch on input and output, got input batch = {}, output batch = {}", + node, inpBatch, outBatch); + + if (inpBatch.is_static() && inpBatch.get_length() == 1) { + return {}; + } + + const auto dynamicDimensionsCount = std::count_if(inpPartialShape.cbegin(), inpPartialShape.cend(), + [](const ngraph::Dimension& dimension) { return dimension.is_dynamic(); }); + if (dynamicDimensionsCount > 1 || (dynamicDimensionsCount == 1 && inpPartialShape[0].is_static())) { + return {}; + } + + return {{SliceMode::Slice}, {SliceMode::Slice}}; +} + +} // namespace vpu diff --git a/inference-engine/src/vpu/common/src/ngraph/utilities.cpp b/inference-engine/src/vpu/common/src/ngraph/utilities.cpp index 1ef247aa2ee..cc4073b0c0f 100644 --- a/inference-engine/src/vpu/common/src/ngraph/utilities.cpp +++ b/inference-engine/src/vpu/common/src/ngraph/utilities.cpp @@ -86,8 +86,4 @@ std::shared_ptr gatherShapeElements(const ngraph::Output #include #include +#include namespace vpu { @@ -181,6 +182,13 @@ ie::ICNNNetwork::Ptr FrontEnd::convertNetwork(ie::ICNNNetwork& network) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + + manager.register_pass(std::unordered_set{ + ngraph::opset5::MatMul::type_info, + ngraph::opset5::Convolution::type_info, + ngraph::opset5::GroupConvolution::type_info + }); + manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/ngraph/core/include/ngraph/partial_shape.hpp b/ngraph/core/include/ngraph/partial_shape.hpp index 2cef5324e5a..774c84bfa61 100644 --- a/ngraph/core/include/ngraph/partial_shape.hpp +++ b/ngraph/core/include/ngraph/partial_shape.hpp @@ -44,7 +44,14 @@ namespace ngraph /// (Informal notation examples: `{1,2,3,4}`, `{6}`, `{}`) class NGRAPH_API PartialShape { + using Dimensions = std::vector; + public: + using iterator = Dimensions::iterator; + using const_iterator = Dimensions::const_iterator; + using reverse_iterator = Dimensions::reverse_iterator; + using const_reverse_iterator = Dimensions::const_reverse_iterator; + /// \brief Constructs a shape with static rank from an initializer list of Dimension. /// \param init The Dimension values for the constructed shape. /// @@ -223,6 +230,54 @@ namespace ngraph const PartialShape& src, const op::AutoBroadcastSpec& autob); + /// \brief Returns a read/write iterator that points to the first + /// element in the shape. Iteration is done in ordinary + /// element order. + iterator begin() noexcept { return m_dimensions.begin(); } + /// \brief Returns a read-only (constant) iterator that points to the + /// first element in the shape. Iteration is done in ordinary + /// element order. + const_iterator begin() const noexcept { return cbegin(); } + /// \brief Returns a read/write iterator that points one past the last + /// element in the shape. Iteration is done in ordinary + /// element order. + iterator end() noexcept { return m_dimensions.end(); } + /// \brief Returns a read-only (constant) iterator that points one past + /// the last element in the shape. Iteration is done in ordinary + /// element order. + const_iterator end() const noexcept { return cend(); } + /// \brief Returns a read/write reverse iterator that points to the + /// last element in the shape. Iteration is done in reverse + /// element order. + reverse_iterator rbegin() noexcept { return m_dimensions.rbegin(); } + /// \brief Returns a read-only (constant) reverse iterator that points + /// to the last element in the shape. Iteration is done in + /// reverse element order. + const_reverse_iterator rbegin() const noexcept { return crbegin(); } + /// \brief Returns a read/write reverse iterator that points to one + /// before the first element in the shape. Iteration is done + /// in reverse element order. + reverse_iterator rend() noexcept { return m_dimensions.rend(); } + /// \brief Returns a read-only (constant) reverse iterator that points + /// to one before the first element in the shape. Iteration + /// is done in reverse element order. + const_reverse_iterator rend() const noexcept { return crend(); } + /// \brief Returns a read-only (constant) iterator that points to the + /// first element in the shape. Iteration is done in ordinary + /// element order. + const_iterator cbegin() const noexcept { return m_dimensions.cbegin(); } + /// \brief Returns a read-only (constant) iterator that points one past + /// the last element in the shape. Iteration is done in ordinary + /// element order. + const_iterator cend() const noexcept { return m_dimensions.cend(); } + /// \brief Returns a read-only (constant) reverse iterator that points + /// to the last element in the shape. Iteration is done in + /// reverse element order. + const_reverse_iterator crbegin() const noexcept { return m_dimensions.crbegin(); } + /// \brief Returns a read-only (constant) reverse iterator that points + /// to one before the first element in the shape. Iteration + /// is done in reverse element order. + const_reverse_iterator crend() const noexcept { return m_dimensions.crend(); } private: // Private constructor for PartialShape::dynamic(). PartialShape(bool rank_is_static, const std::vector& dimensions); @@ -250,7 +305,7 @@ namespace ngraph } m_shape_type{ShapeType::SHAPE_IS_UNKNOWN}; // Shape dimensions. This has no meaning if m_rank_is_static is false. - std::vector m_dimensions; + Dimensions m_dimensions; }; /// \brief Elementwise addition of two PartialShape objects.