[GNA] Custom ops: GNAConvolution and GNAMaxPool (#18102)

This commit is contained in:
Evgeny Kotov 2023-06-24 00:27:34 +02:00 committed by GitHub
parent 31b07c40d9
commit 70e52cafd9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1726 additions and 11 deletions

View File

@ -36,7 +36,8 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
$<TARGET_PROPERTY:inference_engine_obj,SOURCE_DIR>/src # For CNNNetworkNGraphImpl
$<TARGET_PROPERTY:openvino::runtime::dev,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:openvino::pugixml,INTERFACE_INCLUDE_DIRECTORIES>)
$<TARGET_PROPERTY:openvino::pugixml,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:openvino_intel_gna_plugin,SOURCE_DIR>/src/ops)
target_compile_definitions(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:ngraph,INTERFACE_COMPILE_DEFINITIONS>)
@ -70,7 +71,8 @@ target_include_directories(${TARGET_NAME}_s
${CMAKE_CURRENT_SOURCE_DIR}/src
$<TARGET_PROPERTY:inference_engine_obj,SOURCE_DIR>/src # For CNNNetworkNGraphImpl
$<TARGET_PROPERTY:openvino::runtime::dev,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:openvino::pugixml,INTERFACE_INCLUDE_DIRECTORIES>)
$<TARGET_PROPERTY:openvino::pugixml,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:openvino_intel_gna_plugin,SOURCE_DIR>/src/ops)
set_target_properties(${TARGET_NAME}_s PROPERTIES
EXCLUDE_FROM_ALL ON

View File

@ -19,6 +19,8 @@
#include "caseless.hpp"
#include "cpp/ie_cnn_network.h"
#include "exec_graph_info.hpp"
#include "gna_convolution.hpp"
#include "gna_max_pool.hpp"
#include "ie_legacy_itt.hpp"
#include "legacy/graph_tools.hpp"
#include "legacy/net_pass.h"
@ -592,7 +594,7 @@ CNNLayerCreator::CNNLayerCreator() {
Builder::asString(axis < 0 ? axis + node->get_input_shape(0).size() : axis);
return res;
});
addSpecificCreator({"AvgPool", "MaxPool"},
addSpecificCreator({"AvgPool", "MaxPool", "GNAMaxPool"},
[](const std::shared_ptr<::ngraph::Node>& node,
const std::map<std::string, std::string>& params) -> CNNLayerPtr {
LayerParams attrs = {node->get_friendly_name(),
@ -609,7 +611,7 @@ CNNLayerCreator::CNNLayerCreator() {
res->params.erase("exclude_pad");
}
if (node->description() == "MaxPool") {
if (node->description() == "MaxPool" || node->description() == "GNAMaxPool") {
res->params["pool-method"] = "max";
} else if (node->description() == "AvgPool") {
res->params["pool-method"] = "avg";
@ -1677,7 +1679,7 @@ CNNLayerCreator::CNNLayerCreator() {
return res;
});
addSpecificCreator({"ConvolutionIE"},
addSpecificCreator({"ConvolutionIE", "GNAConvolution"},
[](const std::shared_ptr<::ngraph::Node>& node,
const std::map<std::string, std::string>& params) -> CNNLayerPtr {
LayerParams attrs = {node->get_friendly_name(),
@ -1691,10 +1693,19 @@ CNNLayerCreator::CNNLayerCreator() {
// Restore output and kernel size
auto shape = node->get_input_shape(1);
shape.erase(shape.begin(), shape.begin() + 2);
// extract HW
if (node->description() == "GNAConvolution") {
// NHWC
shape.erase(shape.begin());
shape.erase(shape.end() - 1);
res->params["output"] = Builder::asString(*(node->get_shape().rbegin()));
} else {
// NCHW
shape.erase(shape.begin(), shape.begin() + 2);
res->params["output"] = Builder::asString(node->get_shape()[1]);
}
res->params["kernel"] = Builder::asString(static_cast<std::vector<size_t>&>(shape));
res->params["output"] = Builder::asString(node->get_shape()[1]);
// forward auto_pad only when its value is different than explicit
if (params.at("auto_pad") == "explicit") {
@ -2027,7 +2038,9 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
const std::shared_ptr<::ngraph::Node>& consumerLayer,
bool keep_constants) -> bool {
if (((::ngraph::as_type_ptr<::ngraph::op::ConvolutionIE>(consumerLayer) ||
::ngraph::as_type_ptr<::ngraph::op::FullyConnected>(consumerLayer)) &&
::ngraph::as_type_ptr<::ngraph::op::FullyConnected>(consumerLayer) ||
::ngraph::as_type_ptr<ov::intel_gna::op::GNAConvolution>(consumerLayer) ||
::ngraph::as_type_ptr<ov::intel_gna::op::GNAMaxPool>(consumerLayer)) &&
!keep_constants) ||
::ngraph::as_type_ptr<::ngraph::op::v1::BinaryConvolution>(consumerLayer) ||
::ngraph::as_type_ptr<::ngraph::op::DeconvolutionIE>(consumerLayer) ||

View File

@ -452,7 +452,7 @@ inline Shape::value_type get_dim_by_axis(const Shape& shape, int64_t axis) {
* @brief unsqueezes shape to rank
*/
inline Shape unsqueeze_shape(const Shape& shape, ov::Rank::value_type rank) {
const int rank_delta = static_cast<int>(rank) - static_cast<int>(shape.size());
const ov::Rank::value_type rank_delta = rank - static_cast<ov::Rank::value_type>(shape.size());
if (rank_delta <= 0)
return shape;
@ -599,6 +599,32 @@ inline bool is_constant_1d(const Output<Node>& output) {
return ov::pass::pattern::rank_equals(0)(output) || ov::pass::pattern::rank_equals(1)(output);
}
/**
* @brief Checks if node has parent node with type T
*/
template <typename T>
bool has_parent_node(std::shared_ptr<ov::Node> node) {
for (const auto& parent : node->input_values()) {
if (dynamic_cast<const T*>(parent.get_node()))
return true;
}
return false;
}
/**
* @brief Checks if node has child node with type T
*/
template <typename T>
bool has_child_node(std::shared_ptr<ov::Node> node) {
for (size_t output_idx = 0; output_idx < node->get_output_size(); ++output_idx) {
for (auto& input : node->get_output_target_inputs(output_idx)) {
if (dynamic_cast<const T*>(input.get_node()))
return true;
}
}
return false;
}
} // namespace graph_utils
} // namespace intel_gna
} // namespace ov

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2023 Intel Corporation
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2023 Intel Corporation
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

View File

@ -52,6 +52,8 @@ enum class LayerType {
Gemm,
Pwl,
Identity,
GNAConvolution,
GNAMaxPool,
NO_TYPE
};
@ -93,6 +95,8 @@ static const InferenceEngine::details::caseless_map<std::string, LayerType> Laye
{"Pwl", LayerType::Pwl},
{"Identity", LayerType::Identity},
{"Gemm", LayerType::Gemm},
{"GNAConvolution", LayerType::GNAConvolution},
{"GNAMaxPool", LayerType::GNAMaxPool},
};
LayerType LayerTypeFromStr(const std::string& str);

View File

@ -0,0 +1,359 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_convolution.hpp"
#include <cmath>
#include <cstddef>
#include <ngraph/validation_util.hpp>
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/runtime/host_tensor.hpp"
namespace ov {
namespace intel_gna {
namespace op {
namespace internal {
int64_t calculate_num_spatial(const GNAConvolution* op,
const ov::PartialShape& input_shape,
const ov::PartialShape& filters_shape,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims) {
int64_t num_spatial = op->m_num_spatial;
if (num_spatial == -1) {
const auto& input_rank = input_shape.rank();
const auto& filters_rank = filters_shape.rank();
if (const auto& size = op->m_dilations.size())
num_spatial = static_cast<int64_t>(size);
if (const auto& size = op->m_strides.size())
num_spatial = static_cast<int64_t>(size);
if (const auto& size = op->m_pads_begin.size())
num_spatial = static_cast<int64_t>(size);
if (const auto& size = op->m_pads_end.size())
num_spatial = static_cast<int64_t>(size);
if (input_rank.is_static())
num_spatial = input_rank.get_length() - num_non_spatial_data_dims;
if (filters_rank.is_static())
num_spatial = filters_rank.get_length() - num_non_spatial_filter_dims;
}
return num_spatial;
}
void update_and_validate_attributes(GNAConvolution* op) {
const auto& num_spatial = op->m_num_spatial;
if (num_spatial != -1) {
auto& strides = op->m_strides;
auto& dilations = op->m_dilations;
auto& pad_begin = op->m_pads_begin;
auto& pad_end = op->m_pads_end;
auto& auto_pad = op->m_auto_pad;
if (strides.empty())
strides = ov::Strides(num_spatial, 1);
if (dilations.empty())
dilations = ov::Strides(num_spatial, 1);
if (pad_begin.empty() || auto_pad == ov::op::PadType::VALID)
pad_begin = ov::CoordinateDiff(num_spatial, 0);
if (pad_end.empty() || auto_pad == ov::op::PadType::VALID)
pad_end = ov::CoordinateDiff(num_spatial, 0);
NODE_VALIDATION_CHECK(op,
static_cast<int64_t>(strides.size()) == num_spatial,
"Strides should be defined for all and only spatial features.");
NODE_VALIDATION_CHECK(op,
static_cast<int64_t>(dilations.size()) == num_spatial,
"Dilations should be defined for all and only spatial features.");
NODE_VALIDATION_CHECK(op,
static_cast<int64_t>(pad_begin.size()) == num_spatial &&
static_cast<int64_t>(pad_end.size()) == num_spatial,
"Pads should be defined for all and only spatial features.");
NODE_VALIDATION_CHECK(op,
std::all_of(dilations.begin(),
dilations.end(),
[](const size_t& i) {
return i > 0;
}),
"Filter dilation (",
dilations,
") has zero dimension.");
NODE_VALIDATION_CHECK(op,
std::all_of(strides.begin(),
strides.end(),
[](const size_t& i) {
return i > 0;
}),
"Filter strides (",
strides,
") has zero dimension.");
}
}
// code is based on ngraph/core/shape_inference/include/convolution_shape_inference.hpp
// but instead of NCHW uses NHWC layout
template <class T>
inline bool dynamic_check(const int64_t& num_spatial) {
OPENVINO_ASSERT(num_spatial != -1,
"Convolution shape inference doesn't have enough information for static shape calculation");
return true;
}
// FIXME: do we need that function as a template ?
template <>
inline bool dynamic_check<ov::PartialShape>(const int64_t& num_spatial) {
return num_spatial != -1;
}
// FIXME: do we need that function as a template ?
// TODO: search where that function is used in openvino
template <class T>
bool resolve_auto_pad_for_shape(const GNAConvolution* op,
ov::CoordinateDiff& pads_begin,
ov::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims) {
const auto& auto_pad = op->get_auto_pad();
if (auto_pad != ov::op::PadType::SAME_UPPER && auto_pad != ov::op::PadType::SAME_LOWER) {
pads_begin = op->m_pads_begin;
pads_end = op->m_pads_end;
return true;
}
auto& num_spatial = op->m_num_spatial;
if (!dynamic_check<T>(num_spatial))
return false;
auto input_shape = input_shapes[0];
auto filters_shape = input_shapes[1];
if (input_shape.rank().is_dynamic())
input_shape.resize(num_spatial + num_non_spatial_data_dims);
if (filters_shape.rank().is_dynamic())
filters_shape.resize(num_spatial + num_non_spatial_filter_dims);
const auto& strides = op->m_strides;
const auto& dilations = op->m_dilations;
pads_begin.resize(num_spatial);
pads_end.resize(num_spatial);
bool status = true;
for (int64_t i = 0; i < num_spatial; ++i) {
const auto& input_dim = input_shape[i + 1];
const auto& filters_dim = filters_shape[i + 1];
if (input_dim.is_static() && filters_dim.is_static()) {
const int64_t& window_dilated_dim = (filters_dim.get_length() - 1) * dilations[i] + 1;
NODE_VALIDATION_CHECK(op,
window_dilated_dim > 0,
"Window after dilation has dimension less than 1 (dim: ",
window_dilated_dim,
") at axis ",
i,
".");
const int64_t& image_size = input_dim.get_length();
const int64_t& filter_stride = strides[i];
const int64_t& output_size = (image_size + filter_stride - 1) / filter_stride;
const int64_t& tmp = (output_size - 1) * filter_stride + window_dilated_dim;
const int64_t& padding_needed = tmp > image_size ? tmp - image_size : 0;
const size_t& padding_lhs = static_cast<size_t>(padding_needed / 2);
const size_t& padding_rhs = static_cast<size_t>(padding_needed - padding_lhs);
pads_begin[i] = auto_pad == ov::op::PadType::SAME_UPPER ? padding_lhs : padding_rhs;
pads_end[i] = auto_pad == ov::op::PadType::SAME_UPPER ? padding_rhs : padding_lhs;
} else {
status = false;
}
}
return status;
}
// FIXME: do we need that function as a template ?
// TODO: search where that function is used in openvino
template <class T>
void shape_infer(const GNAConvolution* op,
const ov::CoordinateDiff& pads_begin,
const ov::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
std::vector<T>& output_shapes) {
NODE_VALIDATION_CHECK(op, input_shapes.size() == 2 && output_shapes.size() == 1);
auto input_shape = input_shapes[0], filters_shape = input_shapes[1];
const auto& num_spatial = op->m_num_spatial;
NODE_VALIDATION_CHECK(op,
num_spatial != -1,
"Convolution shape_infer should be provided with correct num_spatial attribute");
if (input_shape.rank().is_dynamic())
input_shape.resize(num_spatial + 2);
if (filters_shape.rank().is_dynamic())
filters_shape.resize(num_spatial + 2);
NODE_VALIDATION_CHECK(op,
(static_cast<int64_t>(input_shape.size()) == (num_spatial + 2)) &&
(static_cast<int64_t>(filters_shape.size()) == (num_spatial + 2)),
"Data batch and filters rank do not match (data batch shape: ",
input_shape,
", filters shape: ",
filters_shape,
").");
// ranks are originally static or aligned with num_spatial, attributes assumed to be valid
auto& output_shape = output_shapes[0];
output_shape.resize(num_spatial + 2);
output_shape[0] = input_shape[0];
// Channel is the last in NHWC layout
*(output_shape.rbegin()) = filters_shape[0]; // NHWC C is last instead of filters_shape[0] for NCHW layout
const auto n_data_channel = *(input_shape.rbegin());
const auto n_filter_channel = *(filters_shape.rbegin());
NODE_VALIDATION_CHECK(
op,
n_data_channel.compatible(n_filter_channel), // instead of input_shape[1].compatible(filters_shape[1]),
"Data batch channel count (",
n_data_channel, // instead of input_shape[1],
") does not match filter input ",
"channel count (",
n_filter_channel, // instead of filters_shape[1],
").");
const auto& dilations = op->m_dilations;
const auto& strides = op->m_strides;
for (int64_t i = 0; i < num_spatial; ++i) {
const auto& input_dim = input_shape[i + 1];
const auto& filters_dim = filters_shape[i + 1];
if (input_dim.is_static() && filters_dim.is_static()) {
const int64_t& window_dilated_dim = (filters_dim.get_length() - 1) * dilations[i] + 1;
NODE_VALIDATION_CHECK(op,
window_dilated_dim > 0,
"Window after dilation has dimension less than 1 (dim: ",
window_dilated_dim,
") at axis ",
i,
".");
const int64_t& data_padded_dilated_dim = input_dim.get_length() + pads_begin[i] + pads_end[i];
NODE_VALIDATION_CHECK(op,
window_dilated_dim <= data_padded_dilated_dim,
"Window after dilation has dimension (dim: ",
window_dilated_dim,
") larger than the data shape after padding (dim: ",
data_padded_dilated_dim,
") at axis ",
i,
".");
output_shape[i + 1] = (data_padded_dilated_dim - window_dilated_dim) / strides[i] + 1;
}
}
}
} // namespace internal
GNAConvolution::GNAConvolution(const ov::Output<Node>& data_batch,
const ov::Output<Node>& filters,
const ov::Output<Node>& bias,
const ov::Strides& strides,
const ov::CoordinateDiff& pads_begin,
const ov::CoordinateDiff& pads_end,
const ov::Strides& dilations,
const ov::op::PadType& auto_pad)
: ov::op::Op({data_batch, filters, bias}),
m_strides(strides),
m_dilations(dilations),
m_pads_begin(pads_begin),
m_pads_end(pads_end),
m_auto_pad(auto_pad) {
constructor_validate_and_infer_types();
}
GNAConvolution::GNAConvolution(const ov::Output<Node>& data_batch,
const ov::Output<Node>& filters,
const ov::Strides& strides,
const ov::CoordinateDiff& pads_begin,
const ov::CoordinateDiff& pads_end,
const ov::Strides& dilations,
const ov::op::PadType& auto_pad)
: ov::op::Op({data_batch, filters}),
m_strides(strides),
m_dilations(dilations),
m_pads_begin(pads_begin),
m_pads_end(pads_end),
m_auto_pad(auto_pad) {
constructor_validate_and_infer_types();
}
bool GNAConvolution::visit_attributes(ov::AttributeVisitor& visitor) {
visitor.on_attribute("strides", m_strides);
visitor.on_attribute("dilations", m_dilations);
visitor.on_attribute("pads_begin", m_pads_begin);
visitor.on_attribute("pads_end", m_pads_end);
visitor.on_attribute("auto_pad", m_auto_pad);
return true;
}
void GNAConvolution::validate_and_infer_types() {
ov::element::Type data_batch_et = get_input_element_type(0);
ov::element::Type filters_et = get_input_element_type(1);
ov::element::Type result_et;
NODE_VALIDATION_CHECK(this,
ov::element::Type::merge(result_et, data_batch_et, filters_et),
"Element types for data batch and filters do not match (data batch element type: ",
data_batch_et,
", filters element type: ",
filters_et,
").");
NODE_VALIDATION_CHECK(this,
result_et.is_real() || result_et.is_integral_number(),
"Element types must be numeric. Got: ",
result_et);
auto& data_shape = get_input_partial_shape(0);
auto& filter_shape = get_input_partial_shape(1);
m_num_spatial = internal::calculate_num_spatial(this, data_shape, filter_shape, 2, 2);
internal::update_and_validate_attributes(this);
std::vector<ov::PartialShape> input_shapes = {data_shape, filter_shape};
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
if (m_num_spatial != -1) {
internal::resolve_auto_pad_for_shape(this, m_pads_begin, m_pads_end, input_shapes, 2, 2);
internal::shape_infer(this, m_pads_begin, m_pads_end, input_shapes, output_shapes);
}
set_output_type(0, result_et, output_shapes[0]);
}
std::shared_ptr<ov::Node> GNAConvolution::clone_with_new_inputs(const ov::OutputVector& new_args) const {
if (new_args.size() == 2) {
return std::make_shared<GNAConvolution>(new_args.at(0),
new_args.at(1),
m_strides,
m_pads_begin,
m_pads_end,
m_dilations,
m_auto_pad);
} else if (new_args.size() == 3) {
return std::make_shared<GNAConvolution>(new_args.at(0),
new_args.at(1),
new_args.at(2),
m_strides,
m_pads_begin,
m_pads_end,
m_dilations,
m_auto_pad);
}
OPENVINO_THROW("Unsupported number of arguments for GNAConvolution operation");
}
} // namespace op
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,181 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <transformations_visibility.hpp>
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/op.hpp"
#include "openvino/op/op.hpp"
namespace ov {
namespace intel_gna {
namespace op {
class GNAConvolution;
namespace internal {
int64_t calculate_num_spatial(const ov::intel_gna::op::GNAConvolution* op,
const ov::PartialShape& input_shape,
const ov::PartialShape& filters_shape,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims);
void update_and_validate_attributes(ov::intel_gna::op::GNAConvolution* op);
template <class T>
bool resolve_auto_pad_for_shape(const ov::intel_gna::op::GNAConvolution* op,
ov::CoordinateDiff& pads_begin,
ov::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims);
template <class T>
void shape_infer(const ov::intel_gna::op::GNAConvolution* op,
const ov::CoordinateDiff& pads_begin,
const ov::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
std::vector<T>& output_shapes);
} // namespace internal
/**
* @brief Activation modes for fused convolutions.
*
*/
enum class ActivationType { SIGMOID, RELU, TANH, ABS, LOG, EXP, SIGN, CLAMP, NO_ACTIVATION };
/// \brief Convolution with NHWC layout
///
class GNAConvolution : public ov::op::Op {
public:
OPENVINO_OP("GNAConvolution", "intel_gna", ov::op::Op);
/// \brief Constructs a convolution operation.
GNAConvolution() = default;
/// \brief Constructs a convolution operation.
///
/// \param data_batch The node producing the input data batch tensor.<br>
/// `[N, C_IN, D1, ... Df]`
/// \param filters The node producing the filters tensor.<br>
/// `[C_OUT, C_IN, F1, ... Ff]`
/// \param strides The strides.<br>
/// `[f]`
/// \param dilations The dilations.<br>
/// `[f]`
/// \param pads_begin The beginning of padding shape.<br>
/// `[f]`
/// \param pads_end The end of padding shape.<br>
/// `[f]`
/// \param auto_pad The pad type for automatically computing padding sizes.<br>
/// `[f]`
///
/// Output `[N, C_OUT, R1, ... Rf]`
///
GNAConvolution(const ov::Output<ov::Node>& data_batch,
const ov::Output<ov::Node>& filters,
const ov::Output<ov::Node>& bias,
const ov::Strides& strides,
const ov::CoordinateDiff& pads_begin,
const ov::CoordinateDiff& pads_end,
const ov::Strides& dilations,
const ov::op::PadType& auto_pad = ov::op::PadType::EXPLICIT);
GNAConvolution(const ov::Output<ov::Node>& data_batch,
const ov::Output<ov::Node>& filters,
const ov::Strides& strides,
const ov::CoordinateDiff& pads_begin,
const ov::CoordinateDiff& pads_end,
const ov::Strides& dilations,
const ov::op::PadType& auto_pad = ov::op::PadType::EXPLICIT);
void validate_and_infer_types() override;
bool visit_attributes(ov::AttributeVisitor& visitor) override;
std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
/// \return The strides.
const ov::Strides& get_strides() const {
return m_strides;
}
void set_strides(const ov::Strides& strides) {
m_strides = strides;
}
/// \return The dilations.
const ov::Strides& get_dilations() const {
return m_dilations;
}
void set_dilations(const ov::Strides& dilations) {
m_dilations = dilations;
}
/// \return The padding-below sizes (possibly negative).
const ov::CoordinateDiff& get_pads_begin() const {
return m_pads_begin;
}
void set_pads_begin(const ov::CoordinateDiff& pads_begin) {
m_pads_begin = pads_begin;
}
/// \return The padding-above sizes (possibly negative).
const ov::CoordinateDiff& get_pads_end() const {
return m_pads_end;
}
void set_adding_above(const ov::CoordinateDiff& pads_end) {
m_pads_end = pads_end;
}
/// \return The pad type for pooling.
ov::op::PadType get_auto_pad() const {
return m_auto_pad;
}
void set_auto_pad(const ov::op::PadType& auto_pad) {
m_auto_pad = auto_pad;
}
bool has_bias() const {
return m_has_add_node;
}
ActivationType get_activation() const {
return m_activation_type;
}
void set_activation(ActivationType activation_type) {
m_activation_type = activation_type;
}
protected:
ov::Strides m_strides;
ov::Strides m_dilations;
ov::CoordinateDiff m_pads_begin;
ov::CoordinateDiff m_pads_end;
ov::op::PadType m_auto_pad;
int64_t m_num_spatial = -1;
private:
friend int64_t internal::calculate_num_spatial(const ov::intel_gna::op::GNAConvolution* op,
const ov::PartialShape& input_shape,
const ov::PartialShape& filters_shape,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims);
friend void internal::update_and_validate_attributes(ov::intel_gna::op::GNAConvolution* op);
template <class T>
friend bool internal::resolve_auto_pad_for_shape(const ov::intel_gna::op::GNAConvolution* op,
ov::CoordinateDiff& pads_begin,
ov::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims);
template <class T>
friend void internal::shape_infer(const ov::intel_gna::op::GNAConvolution* op,
const ov::CoordinateDiff& pads_begin,
const ov::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
std::vector<T>& output_shapes);
bool m_has_add_node;
ActivationType m_activation_type;
};
} // namespace op
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,277 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_max_pool.hpp"
#include <assert.h>
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/validation_util.hpp"
namespace ov {
namespace intel_gna {
namespace op {
//
// Infers the output batch shape and element type for batched pooling fprop.
//
ov::PartialShape infer_batched_pooling_forward(const ov::Node* node,
const ov::PartialShape& data_batch_shape,
const ov::CoordinateDiff& data_padding_below,
const ov::CoordinateDiff& data_padding_above,
const ov::PartialShape& window_shape,
const ov::Strides& window_strides,
bool is_window_all_in_padding_allowed,
bool ceil_mode,
const ov::Strides& window_dilation);
//
// Infers the output batch shape and element type for batched pooling fprop.
//
ov::PartialShape infer_batched_pooling_forward(const ov::Node* node,
const ov::PartialShape& data_batch_shape,
const ov::CoordinateDiff& data_padding_below,
const ov::CoordinateDiff& data_padding_above,
const ov::PartialShape& window_shape,
const ov::Strides& window_strides,
bool is_window_all_in_padding_allowed,
bool ceil_mode,
const ov::Strides& window_dilation) {
NODE_VALIDATION_CHECK(node,
data_batch_shape.rank().is_dynamic() ||
(data_batch_shape.rank().get_length() >= 3 && data_batch_shape.rank().get_length() <= 5),
"Data batch must have rank of at least 4 or 5 (one batch axis, ",
"one input-channel axis, and two or three spatial dimension) ",
"(data batch shape: ",
data_batch_shape,
").");
ov::PartialShape data_spatial_shape{ov::PartialShape::dynamic()};
NODE_VALIDATION_CHECK(node,
data_spatial_shape.merge_rank(data_batch_shape.rank() - 2) &&
data_spatial_shape.merge_rank(data_padding_below.size()) &&
data_spatial_shape.merge_rank(data_padding_above.size()) &&
data_spatial_shape.merge_rank(window_shape.rank()) &&
data_spatial_shape.merge_rank(window_strides.size()),
"Ranks for data item shape (data batch has shape ",
data_batch_shape,
", so data item rank is ",
(data_batch_shape.rank() - 2),
"), padding below (",
data_padding_below,
"), padding above (",
data_padding_above,
"), window shape (",
window_shape,
"), and window strides (",
window_strides,
") do not match.");
ov::Dimension batch_size{ov::Dimension::dynamic()};
ov::Dimension channel_count{ov::Dimension::dynamic()};
ov::PartialShape data_output_spatial_shape{ov::PartialShape::dynamic(data_spatial_shape.rank())};
if (data_batch_shape.rank().is_static()) {
batch_size = data_batch_shape[0];
channel_count = *(data_batch_shape.end() - 1); // EMUTEX fix NCHW -> NHWC from data_batch_shape[1]
for (int64_t i = 0; i < data_spatial_shape.rank().get_length(); i++) {
data_spatial_shape[i] =
data_batch_shape[i +
1]; // EMUTEX fix NCHW -> NHWC from data_spatial_shape[i] = data_batch_shape[i + 2]
}
NODE_VALIDATION_CHECK(node, batch_size.is_dynamic() || batch_size.get_length() > 0, "Batch size is zero.");
NODE_VALIDATION_CHECK(node,
channel_count.is_dynamic() || channel_count.get_length() > 0,
"Channel count is zero.");
// For pooling ops we don't need dilation, so we fill in the identity value (all 1).
ov::Strides data_dilation(data_spatial_shape.rank().get_length(), 1);
ov::Strides dilations = window_dilation;
// if the window_dilation was not specified, generate the default value (no dilations)
if (window_dilation.empty()) {
// dilations equal to 1 for each spatial axis mean that the window is not dilated
dilations = ov::Strides(data_spatial_shape.rank().get_length(), 1);
}
data_output_spatial_shape = ngraph::infer_windowed_reduction_output_shape(node,
data_spatial_shape,
data_dilation,
data_padding_below,
data_padding_above,
window_shape,
window_strides,
dilations,
is_window_all_in_padding_allowed,
ceil_mode);
}
ov::PartialShape data_batch_output_shape{ov::PartialShape::dynamic(data_output_spatial_shape.rank() + 2)};
data_batch_output_shape[0] = batch_size;
*(data_batch_output_shape.end() - 1) =
channel_count; // EMUTEX fix NCHW -> NHWC data_batch_output_shape[1] = channel_count;
for (int64_t i = 0; i < data_spatial_shape.rank().get_length(); i++) {
data_batch_output_shape[i + 1] =
data_output_spatial_shape[i]; // EMUTEX fix NCHW -> NHWC data_batch_output_shape[i + 2] =
// data_output_spatial_shape[i];
}
return data_batch_output_shape;
}
GNAMaxPool::GNAMaxPool(const ov::Output<ov::Node>& arg,
const ov::Strides& strides,
const ov::Shape& pads_begin,
const ov::Shape& pads_end,
const ov::Shape& kernel,
const ov::op::RoundingType rounding_type,
const ov::op::PadType auto_pad)
: Op({arg}),
m_kernel(kernel),
m_strides(strides),
m_pads_begin(pads_begin),
m_pads_end(pads_end),
m_auto_pad(auto_pad),
m_rounding_type(rounding_type) {
constructor_validate_and_infer_types();
}
bool GNAMaxPool::visit_attributes(ov::AttributeVisitor& visitor) {
visitor.on_attribute("strides", m_strides);
visitor.on_attribute("pads_begin", m_pads_begin);
visitor.on_attribute("pads_end", m_pads_end);
visitor.on_attribute("kernel", m_kernel);
visitor.on_attribute("rounding_type", m_rounding_type);
visitor.on_attribute("auto_pad", m_auto_pad);
return true;
}
void GNAMaxPool::validate_and_infer_types() {
if (0 == m_strides.size()) {
m_strides = ov::Strides(m_kernel.size(), 1);
}
if (0 == m_pads_begin.size()) {
m_pads_begin = ov::Shape(m_kernel.size(), 0);
}
if (0 == m_pads_end.size()) {
m_pads_end = ov::Shape(m_kernel.size(), 0);
}
const ov::PartialShape& arg_shape = get_input_partial_shape(0);
NODE_VALIDATION_CHECK(
this,
arg_shape.rank().compatible(3) || arg_shape.rank().compatible(4) || arg_shape.rank().compatible(5),
"Expected a 3D, 4D or 5D tensor for the input. Got: ",
arg_shape);
if (arg_shape.rank().is_static()) {
NODE_VALIDATION_CHECK(this,
static_cast<int64_t>(m_pads_end.size()) == arg_shape.rank().get_max_length() - 2,
"Expected pads_end size to be equal to input size - 2. Got: ",
m_pads_end.size());
NODE_VALIDATION_CHECK(this,
static_cast<int64_t>(m_pads_begin.size()) == arg_shape.rank().get_max_length() - 2,
"Expected pads_begin size to be equal to input size - 2. Got: ",
m_pads_begin.size());
NODE_VALIDATION_CHECK(this,
static_cast<int64_t>(m_kernel.size()) == arg_shape.rank().get_max_length() - 2,
"Expected kernel size to be equal to input size - 2. Got: ",
m_kernel.size());
NODE_VALIDATION_CHECK(this,
static_cast<int64_t>(m_strides.size()) == arg_shape.rank().get_max_length() - 2,
"Expected strides size to be equal to input size - 2. Got: ",
m_strides.size());
}
const ov::PartialShape output_shape = infer_output_shape(ov::Strides{}); // no dilations of the filter window
set_output_type(0, get_input_element_type(0), output_shape);
}
ov::PartialShape GNAMaxPool::infer_output_shape(const ov::Strides& dilations) {
const auto& arg_shape = get_input_partial_shape(0);
bool update_auto_padding_succeed = true;
if (m_auto_pad == ov::op::PadType::SAME_UPPER || m_auto_pad == ov::op::PadType::SAME_LOWER) {
const auto filter_dilations = dilations.empty() ? ov::Strides(m_kernel.size(), 1) : dilations;
update_auto_padding_succeed = update_auto_padding(arg_shape, filter_dilations, m_pads_end, m_pads_begin);
}
if (m_auto_pad == ov::op::PadType::VALID) {
m_pads_end = ov::Shape(m_pads_end.size(), 0);
m_pads_begin = ov::Shape(m_pads_begin.size(), 0);
}
auto output_shape = ov::PartialShape::dynamic();
if (update_auto_padding_succeed) {
ov::CoordinateDiff pads_begin(m_pads_begin.begin(), m_pads_begin.end());
ov::CoordinateDiff pads_end(m_pads_end.begin(), m_pads_end.end());
output_shape = ov::intel_gna::op::infer_batched_pooling_forward(this,
get_input_partial_shape(0),
pads_begin,
pads_end,
m_kernel,
m_strides,
true,
m_rounding_type == ov::op::RoundingType::CEIL,
dilations);
} else {
if (arg_shape.rank().is_static()) {
output_shape = std::vector<ov::Dimension>(arg_shape.rank().get_max_length(), ov::Dimension::dynamic());
if (arg_shape[0].is_static()) {
output_shape[0] = arg_shape[0]; // batch size
}
if ((arg_shape.end() - 1)->is_static()) { // EMUTEX FIXED: from [1] to end() - 1 NCHW -> NHWC
*(output_shape.end() - 1) = *(arg_shape.end() - 1); // channel size
}
}
}
return output_shape;
}
bool GNAMaxPool::update_auto_padding(const ov::PartialShape& in_shape,
const ov::Strides& filter_dilations,
ov::Shape& new_pads_end,
ov::Shape& new_pads_begin) const {
bool update_auto_padding_succeed = true;
if (m_auto_pad == ov::op::PadType::SAME_UPPER || m_auto_pad == ov::op::PadType::SAME_LOWER) {
ov::CoordinateDiff pads_end, pads_begin;
update_auto_padding_succeed = ngraph::try_apply_auto_padding(in_shape,
m_kernel,
m_strides,
filter_dilations,
m_auto_pad,
pads_end,
pads_begin);
new_pads_end = ov::Shape(pads_end.begin(), pads_end.end());
new_pads_begin = ov::Shape(pads_begin.begin(), pads_begin.end());
}
return update_auto_padding_succeed;
}
std::shared_ptr<ov::Node> GNAMaxPool::clone_with_new_inputs(const ov::OutputVector& new_args) const {
check_new_args_count(this, new_args);
return std::make_shared<GNAMaxPool>(new_args.at(0),
m_strides,
m_pads_begin,
m_pads_end,
m_kernel,
m_rounding_type,
m_auto_pad);
}
} // namespace op
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,108 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <limits>
#include "ngraph/node.hpp"
#include "openvino/op/op.hpp"
#include "openvino/op/util/attr_types.hpp"
#include "openvino/op/util/max_pool_base.hpp"
namespace ov {
namespace intel_gna {
namespace op {
/// \brief Batched max pooling operation.
class GNAMaxPool : public ov::op::Op {
public:
OPENVINO_OP("GNAMaxPool", "intel_gna", ov::op::Op);
/// \brief Constructs a batched max pooling operation.
GNAMaxPool() = default;
/// \brief Constructs a batched max pooling operation.
///
/// \param arg The node producing the input data batch tensor.
/// \param strides The strides.
/// \param pads_begin The beginning of padding shape.
/// \param pads_end The end of padding shape.
/// \param kernel The kernel shape.
/// \param rounding_type Whether to use ceiling or floor rounding type while
/// computing output shape.
/// \param auto_pad The pad type for automatically computing padding sizes.
GNAMaxPool(const ov::Output<ov::Node>& arg,
const ov::Strides& strides,
const ov::Shape& pads_begin,
const ov::Shape& pads_end,
const ov::Shape& kernel,
const ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR,
const ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT);
void validate_and_infer_types() override;
bool visit_attributes(ov::AttributeVisitor& visitor) override;
/// \return The kernel shape.
const ov::Shape& get_kernel() const {
return m_kernel;
}
void set_kernel(const ov::Shape& kernel) {
m_kernel = kernel;
}
/// \return The strides.
const ov::Strides& get_strides() const {
return m_strides;
}
void set_strides(const ov::Strides& strides) {
m_strides = strides;
}
/// \return The beginning of padding shape.
const ov::Shape& get_pads_begin() const {
return m_pads_begin;
}
void set_pads_begin(const ov::Shape& pads_begin) {
m_pads_begin = pads_begin;
}
/// \return The end of padding shape.
const ov::Shape& get_pads_end() const {
return m_pads_end;
}
void set_adding_above(const ov::Shape& pads_end) {
m_pads_end = pads_end;
}
/// \return The pad type for pooling.
ov::op::PadType get_auto_pad() const {
return m_auto_pad;
}
void set_auto_pad(const ov::op::PadType auto_pad) {
m_auto_pad = auto_pad;
}
/// \return The ceiling mode being used for output shape computations
ov::op::RoundingType get_rounding_type() const {
return m_rounding_type;
}
void set_rounding_type(ov::op::RoundingType rounding_type) {
m_rounding_type = rounding_type;
}
std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
protected:
bool update_auto_padding(const ov::PartialShape& in_shape,
const ov::Strides& filter_dilations,
ov::Shape& new_pads_end,
ov::Shape& new_pads_begin) const;
ov::PartialShape infer_output_shape(const ov::Strides& dilations);
ov::Shape m_kernel;
ov::Strides m_strides;
ov::Shape m_pads_begin;
ov::Shape m_pads_end;
ov::op::PadType m_auto_pad;
ov::op::RoundingType m_rounding_type;
};
} // namespace op
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,367 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "fuse_conv_bias_activation.hpp"
#include <memory>
#include <type_traits>
#include <utility>
#include "exec_graph_info.hpp"
#include "openvino/cc/ngraph/itt.hpp"
#include "openvino/core/graph_util.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/core/shape.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/opsets/opset1.hpp"
#include "openvino/opsets/opset10.hpp"
#include "openvino/pass/manager.hpp"
#include "openvino/pass/pattern/matcher.hpp"
#include "openvino/pass/pattern/op/label.hpp"
#include "openvino/pass/pattern/op/or.hpp"
#include "openvino/pass/pattern/op/pattern.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "ops/gna_convolution.hpp"
#include "rt_info/gna_node_id.hpp"
using namespace ov::pass::pattern;
using namespace ov::intel_gna::op;
using namespace ov::intel_gna::rt_info;
using namespace ov::opset10;
namespace {
template <class A, class B>
std::pair<std::shared_ptr<A>, std::shared_ptr<B>> parse_eltwise_inputs(std::shared_ptr<ov::Node> node) {
auto eltwise = std::dynamic_pointer_cast<A>(node->input(0).get_source_output().get_node_shared_ptr());
auto constant = std::dynamic_pointer_cast<B>(node->input(1).get_source_output().get_node_shared_ptr());
if (!eltwise) {
eltwise = std::dynamic_pointer_cast<A>(node->input(1).get_source_output().get_node_shared_ptr());
constant = std::dynamic_pointer_cast<B>(node->input(0).get_source_output().get_node_shared_ptr());
}
if (!eltwise || !constant) {
return {nullptr, nullptr};
}
return {eltwise, constant};
}
struct GnaConvCallbacks {
static bool gna_convolution_with_biasadd(Matcher& m) {
auto eltwise = m.get_match_root();
auto m_conv_const_pair = parse_eltwise_inputs<GNAConvolution, Constant>(eltwise);
auto m_conv = m_conv_const_pair.first;
auto m_const = m_conv_const_pair.second;
if (!m_conv || !m_const) {
return false;
}
if (m_conv->inputs().size() != 2) {
return false;
}
if (std::dynamic_pointer_cast<Add>(eltwise) == nullptr) {
return false;
}
const ov::Output<ov::Node>& data = m_conv->input(0).get_source_output();
const ov::Output<ov::Node>& filters = m_conv->input(1).get_source_output();
const ov::Output<ov::Node>& bias = m_const->output(0);
auto gna_conv = std::make_shared<GNAConvolution>(data,
filters,
bias,
m_conv->get_strides(),
m_conv->get_pads_begin(),
m_conv->get_pads_end(),
m_conv->get_dilations(),
m_conv->get_auto_pad());
ov::Output<ov::Node> new_conv(gna_conv);
gna_conv->set_friendly_name(eltwise->get_friendly_name());
ov::copy_runtime_info({m_conv, eltwise}, new_conv.get_node_shared_ptr());
ov::intel_gna::rt_info::set_node_id(new_conv.get_node_shared_ptr(),
ov::intel_gna::rt_info::get_node_id(eltwise));
const std::string originalLayers = eltwise->get_friendly_name() + "," + m_conv->get_friendly_name();
gna_conv->get_rt_info()[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalLayers;
ov::replace_node(m.get_match_root(), new_conv.get_node_shared_ptr());
return true;
}
static std::pair<std::shared_ptr<GNAConvolution>, std::shared_ptr<ov::Node>> parse_gna_conv_inputs(
std::shared_ptr<ov::Node> add) {
std::shared_ptr<GNAConvolution> gna_conv = nullptr;
auto input0 = add->input(0).get_source_output().get_node_shared_ptr();
auto input1 = add->input(1).get_source_output().get_node_shared_ptr();
auto gna_conv0 = std::dynamic_pointer_cast<GNAConvolution>(input0);
auto gna_conv1 = std::dynamic_pointer_cast<GNAConvolution>(input1);
auto can_be_fused = [](const std::shared_ptr<ov::Node>& target, const std::shared_ptr<ov::Node>& fused_input) {
return (target && fused_input &&
(get_node_id(target) > get_node_id(fused_input) || ov::op::util::is_constant(fused_input)));
};
if (gna_conv0 && gna_conv1) {
if (can_be_fused(gna_conv0, input1)) {
return {gna_conv0, input1};
} else if (can_be_fused(gna_conv1, input0)) {
return {gna_conv1, input0};
}
}
if (gna_conv0 && can_be_fused(gna_conv0, input1)) {
return {gna_conv0, input1};
}
if (gna_conv1 && can_be_fused(gna_conv1, input0)) {
return {gna_conv1, input0};
}
return {nullptr, nullptr};
}
static bool sink_add_to_gna_convolution(Matcher& m) {
auto add = std::dynamic_pointer_cast<Add>(m.get_match_root());
auto gna_conv_node_pair = parse_gna_conv_inputs(m.get_match_root());
auto gna_conv = gna_conv_node_pair.first;
auto node = gna_conv_node_pair.second;
if (!gna_conv || !node) {
return false;
}
if (gna_conv->has_bias() || gna_conv->get_activation() != ActivationType::NO_ACTIVATION) {
return false;
}
const ov::Output<ov::Node>& data = gna_conv->input(0).get_source_output();
const ov::Output<ov::Node>& filters = gna_conv->input(1).get_source_output();
const ov::Output<ov::Node>& bias = gna_conv->input(2).get_source_output();
auto gna_conv_add = std::make_shared<GNAConvolution>(data,
filters,
bias,
gna_conv->get_strides(),
gna_conv->get_pads_begin(),
gna_conv->get_pads_end(),
gna_conv->get_dilations(),
gna_conv->get_auto_pad());
ov::Output<ov::Node> gna_conv_add_output{gna_conv_add};
gna_conv_add->set_friendly_name(add->get_friendly_name());
ov::copy_runtime_info({node, gna_conv}, gna_conv_add);
set_node_id(gna_conv_add, get_node_id(add));
auto& rt_info = gna_conv->get_rt_info();
if (rt_info.count(ExecGraphInfoSerialization::ORIGINAL_NAMES) > 0) {
auto& rt_info_layer_names = rt_info[ExecGraphInfoSerialization::ORIGINAL_NAMES];
const auto original_names = rt_info_layer_names.template as<std::string>();
const std::string original_names_with_activation = add->get_friendly_name() + "," + original_names;
rt_info_layer_names = original_names_with_activation;
}
ov::replace_node(gna_conv, gna_conv_add);
ov::replace_node(m.get_match_root(), gna_conv_add);
return true;
}
static bool sink_activation_to_gna_convolution(Matcher& m) {
auto activation_node = m.get_match_root();
auto gna_conv = std::dynamic_pointer_cast<GNAConvolution>(
activation_node->input(0).get_source_output().get_node_shared_ptr());
if (gna_conv->get_activation() != ActivationType::NO_ACTIVATION) {
return false;
}
ActivationType activation = ActivationType::NO_ACTIVATION;
if (ov::is_type<Relu>(activation_node)) {
activation = ActivationType::RELU;
} else if (ov::is_type<Sigmoid>(activation_node)) {
activation = ActivationType::SIGMOID;
} else if (ov::is_type<Tanh>(activation_node)) {
activation = ActivationType::TANH;
} else if (ov::is_type<Log>(activation_node)) {
activation = ActivationType::LOG;
} else if (ov::is_type<Abs>(activation_node)) {
activation = ActivationType::ABS;
} else if (ov::is_type<Sign>(activation_node)) {
activation = ActivationType::SIGN;
} else if (ov::is_type<Clamp>(activation_node)) {
activation = ActivationType::CLAMP;
} else {
return false;
}
gna_conv->set_activation(activation);
gna_conv->set_friendly_name(activation_node->get_friendly_name());
set_node_id(gna_conv, get_node_id(activation_node));
auto& rt_info = gna_conv->get_rt_info();
if (rt_info.count(ExecGraphInfoSerialization::ORIGINAL_NAMES) > 0) {
auto& rt_info_layer_names = rt_info[ExecGraphInfoSerialization::ORIGINAL_NAMES];
const auto original_names = rt_info_layer_names.template as<std::string>();
const std::string original_names_with_activation =
activation_node->get_friendly_name() + "," + original_names;
rt_info_layer_names = original_names_with_activation;
}
ov::replace_node(m.get_match_root(), gna_conv);
return true;
}
}; // struct GnaConvCallbacks
bool is_bias_to_be_fused(const ov::Output<ov::Node>& output) {
constexpr auto conv_bias_rank_min{3};
constexpr auto conv_bias_rank_max{5};
auto node = std::dynamic_pointer_cast<Add>(output.get_node_shared_ptr());
if (!node) {
return false;
}
auto input0 = node->input(0);
auto input1 = node->input(1);
const auto partial_shape0 = node->input(0).get_partial_shape();
const auto partial_shape1 = node->input(1).get_partial_shape();
if (partial_shape0.is_dynamic() || partial_shape1.is_dynamic()) {
return false;
}
if (node->get_autob() != ov::op::AutoBroadcastType::NUMPY) {
return false;
}
if (input0.get_element_type() != input1.get_element_type()) {
return false;
}
const auto conv_shape = partial_shape0.to_shape();
const auto bias_shape = partial_shape1.to_shape();
const auto bias_rank = bias_shape.size();
if (bias_rank < conv_bias_rank_min || bias_rank > conv_bias_rank_max) {
return false;
}
// NHWC or HWC
size_t bias_channel_index = bias_shape.size() - 1;
size_t conv_channel_index = conv_shape.size() - 1;
if (bias_shape.at(bias_channel_index) != conv_shape.at(conv_channel_index) &&
bias_shape.at(bias_channel_index) != 1) {
return false;
}
for (size_t i = 0; i < bias_shape.size(); i++) {
if ((i != bias_channel_index) && (bias_shape.at(i) != 1))
return false;
}
return true;
}
bool is_add_to_be_fused(const ov::Output<ov::Node>& output) {
auto node = std::dynamic_pointer_cast<Add>(output.get_node_shared_ptr());
if (!node) {
return false;
}
auto input0 = node->input(0);
auto input1 = node->input(1);
const auto partial_shape0 = node->input(0).get_partial_shape();
const auto partial_shape1 = node->input(1).get_partial_shape();
if (input0.get_element_type() != input1.get_element_type()) {
return false;
}
if (partial_shape0.is_dynamic() || partial_shape1.is_dynamic()) {
return false;
}
return (partial_shape0.to_shape() == partial_shape1.to_shape());
}
} // namespace
bool ov::intel_gna::pass::GnaFuseMarkUpNodesOrder::run_on_model(const std::shared_ptr<ov::Model>& m) {
RUN_ON_FUNCTION_SCOPE(GnaFuseMarkUpNodesOrder);
uint64_t id = 0;
for (auto& node : m->get_ordered_ops()) {
set_node_id(node, id++);
}
return false;
}
bool ov::intel_gna::pass::GnaFuseCleanUpNodesOrder::run_on_model(const std::shared_ptr<ov::Model>& m) {
RUN_ON_FUNCTION_SCOPE(GnaFuseCleanUpNodesOrder);
for (auto& node : m->get_ordered_ops()) {
remove_node_id(node);
}
return false;
}
ov::intel_gna::pass::FuseConvolutionWithBiasAdd::FuseConvolutionWithBiasAdd() {
MATCHER_SCOPE(FuseConvolutionWithBiasAdd);
auto conv = wrap_type<GNAConvolution>(consumers_count(1));
auto bias = wrap_type<Constant>();
auto add = wrap_type<Add>({conv, bias}, is_bias_to_be_fused);
matcher_pass_callback callback = [](Matcher& m) {
return GnaConvCallbacks::gna_convolution_with_biasadd(m);
};
auto m = std::make_shared<Matcher>(add, matcher_name);
register_matcher(m, callback);
}
ov::intel_gna::pass::FuseConvolutionWithBiasAddAdd::FuseConvolutionWithBiasAddAdd() {
MATCHER_SCOPE(FuseConvolutionWithBiasAddAdd);
auto gna_convolution = wrap_type<GNAConvolution>(consumers_count(1));
auto add1 = wrap_type<Add>({gna_convolution, any_input()}, is_add_to_be_fused);
auto add2 = wrap_type<Add>({any_input(), gna_convolution}, is_add_to_be_fused);
auto add = std::make_shared<::op::Or>(ov::OutputVector{add1, add2});
matcher_pass_callback callback = [](Matcher& m) {
return GnaConvCallbacks::sink_add_to_gna_convolution(m);
};
auto m = std::make_shared<Matcher>(add, matcher_name);
register_matcher(m, callback);
}
ov::intel_gna::pass::SinkActivationToGnaConvolution::SinkActivationToGnaConvolution() {
MATCHER_SCOPE(SinkActivationToGnaConvolution);
auto gna_convolution = wrap_type<GNAConvolution>(consumers_count(1));
auto activation = wrap_type<Relu, Sigmoid, Tanh, Abs, Log, Clamp, Sign>({gna_convolution});
matcher_pass_callback callback = [](Matcher& m) {
return GnaConvCallbacks::sink_activation_to_gna_convolution(m);
};
auto m = std::make_shared<Matcher>(activation, matcher_name);
register_matcher(m, callback);
}
bool ov::intel_gna::pass::GnaConvolutionFusion::run_on_model(const std::shared_ptr<ov::Model>& m) {
RUN_ON_FUNCTION_SCOPE(GnaConvolutionFusion);
ov::pass::Manager manager(get_pass_config());
manager.register_pass<GnaFuseMarkUpNodesOrder>();
auto fuse_conv_bias_add_activation = manager.register_pass<ov::pass::GraphRewrite>();
ADD_MATCHER(fuse_conv_bias_add_activation, FuseConvolutionWithBiasAdd)
ADD_MATCHER(fuse_conv_bias_add_activation, FuseConvolutionWithBiasAddAdd)
ADD_MATCHER(fuse_conv_bias_add_activation, SinkActivationToGnaConvolution)
fuse_conv_bias_add_activation->set_name("ov::intel_gna::pass::fuse_conv_bias_add_activation");
manager.register_pass<GnaFuseCleanUpNodesOrder>();
manager.run_passes(m);
return false;
}

View File

@ -0,0 +1,59 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/node.hpp>
#include "openvino/pass/graph_rewrite.hpp"
namespace ov {
namespace intel_gna {
namespace pass {
class GnaFuseMarkUpNodesOrder : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("GnaFuseMarkUpNodesOrder", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
};
class GnaFuseCleanUpNodesOrder : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("GnaFuseCleanUpNodesOrder", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
};
class FuseConvolutionWithBiasAdd : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("FuseConvolutionWithBiasAdd", "0");
FuseConvolutionWithBiasAdd();
};
class FuseGroupConvolutionWithBiasAdd : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("FuseGroupConvolutionWithBiasAdd", "0");
FuseGroupConvolutionWithBiasAdd();
};
class FuseConvolutionWithBiasAddAdd : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("FuseConvolutionWithBiasAddAdd", "0");
FuseConvolutionWithBiasAddAdd();
};
class SinkActivationToGnaConvolution : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("SinkActivationToGnaConvolution", "0");
SinkActivationToGnaConvolution();
};
class GnaConvolutionFusion : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("GnaConvolutionFusion", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
};
} // namespace pass
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,208 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/replace_gna_nhwc_layers.hpp"
#include <vector>
#include "common/graph_utils.hpp"
#include "openvino/cc/ngraph/itt.hpp"
#include "openvino/opsets/opset12.hpp"
#include "openvino/pass/manager.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "ops/gna_convolution.hpp"
#include "ops/gna_max_pool.hpp"
#include "transformations/utils/transformation_helper.hpp"
#include "transformations/utils/utils.hpp"
using namespace ov;
using namespace ov::opset12;
using namespace ov::pass::pattern;
using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::pass::helper;
NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::ReplaceGnaNHWCLayers, "ReplaceGnaNHWCLayers");
NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::SubstituteGNAConvolution, "SubstituteGNAConvolution");
NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::SubstituteGNAMaxPool, "SubstituteGNAMaxPool");
namespace {
ov::Shape make_transpose_order_nchw2nhwc(size_t shape_size);
ov::Shape make_transpose_order_nhwc2nchw(size_t shape_size);
/* transpose orders
before convolution layout conversion NCHW -> NHWC
3D: NCX {0, 1, 2} -> NXC {0, 2, 1}
4D: NCHW {0, 1, 2, 3} -> NHWC {0, 2, 3, 1}
after convolution layout conversion NHWC -> NCHW
3D: NXC {0, 1, 2} -> NCX {0, 2, 1}
4D: NHWC {0, 1, 2, 3} -> NCHW {0, 3, 1, 2}
so just
1) temp = A[N - 1]
2) move A[j] -> A[j + 1] for 1 <= j <= N - 2
3) A[1] = temp
*/
ov::Shape make_transpose_order_nchw2nhwc(size_t shape_size) {
ov::Shape shape(shape_size);
std::iota(shape.begin(), shape.end(), 0);
for (int i = 1; i < shape.size() - 1; ++i)
shape[i] = shape[i + 1];
*(shape.end() - 1) = 1;
return shape;
}
ov::Shape make_transpose_order_nhwc2nchw(size_t shape_size) {
ov::Shape shape(shape_size);
std::iota(shape.begin(), shape.end(), 0);
const size_t channels_position = *(shape.end() - 1);
for (size_t i = shape.size() - 1; i > 0; --i)
shape[i] = shape[i - 1];
shape[1] = channels_position;
return shape;
}
} // namespace
namespace SubstituteGNAConvolutionNS {
bool do_transformation(std::shared_ptr<ov::Node> convolution);
bool do_transformation(std::shared_ptr<ov::Node> convolution) {
auto convolution_node = std::dynamic_pointer_cast<Convolution>(convolution);
auto convolution_input_data_node = convolution_node->input_value(0);
auto convolution_filters_node = convolution_node->input_value(1);
const ov::Shape convolution_input_shape = convolution_node->get_input_shape(0);
if (convolution_input_shape.size() != 3 && convolution_input_shape.size() != 4) {
std::cout << "ReplaceGnaNHWCLayers: unsupported convolution size " << convolution_input_shape.size()
<< std::endl;
return false;
}
const ov::Shape transpose_before_order = make_transpose_order_nchw2nhwc(convolution_input_shape.size());
auto transpose_const =
Constant::create(element::i32, ov::Shape{transpose_before_order.size()}, transpose_before_order);
auto transpose_before = std::make_shared<Transpose>(convolution_input_data_node, transpose_const);
auto transpose_conv_constant = std::make_shared<Transpose>(convolution_filters_node, transpose_const);
auto conv_new = std::make_shared<ov::intel_gna::op::GNAConvolution>(transpose_before,
transpose_conv_constant,
convolution_node->get_strides(),
convolution_node->get_pads_begin(),
convolution_node->get_pads_end(),
convolution_node->get_dilations(),
convolution_node->get_auto_pad());
const ov::Shape transpose_after_order = make_transpose_order_nhwc2nchw(conv_new->get_output_shape(0).size());
auto transpose_after = std::make_shared<Transpose>(
conv_new,
Constant::create(element::i32, ov::Shape{transpose_after_order.size()}, transpose_after_order));
ov::copy_runtime_info(convolution_node,
{transpose_before, transpose_const, conv_new, transpose_after, transpose_conv_constant});
ov::replace_output_update_name(convolution->output(0), transpose_after->output(0));
return true;
}
} // namespace SubstituteGNAConvolutionNS
namespace SubstituteGNAMaxPoolNS {
bool do_transformation(std::shared_ptr<ov::Node> convolution);
bool do_transformation(std::shared_ptr<ov::Node> max_pool) {
auto max_pool_node = std::dynamic_pointer_cast<ov::op::v1::MaxPool>(max_pool);
auto max_pool_input_data_node = max_pool_node->input_value(0);
const ov::Shape max_pool_input_shape = max_pool_node->get_input_shape(0);
const ov::Shape transpose_before_order = make_transpose_order_nchw2nhwc(max_pool_input_shape.size());
auto transpose_const =
Constant::create(element::i32, ov::Shape{transpose_before_order.size()}, transpose_before_order);
auto transpose_before = std::make_shared<Transpose>(max_pool_input_data_node, transpose_const);
auto max_pool_new = std::make_shared<ov::intel_gna::op::GNAMaxPool>(transpose_before,
max_pool_node->get_strides(),
max_pool_node->get_pads_begin(),
max_pool_node->get_pads_end(),
max_pool_node->get_kernel(),
max_pool_node->get_rounding_type(),
max_pool_node->get_auto_pad());
const ov::Shape transpose_after_order = make_transpose_order_nhwc2nchw(max_pool_new->get_output_shape(0).size());
auto transpose_after = std::make_shared<Transpose>(
max_pool_new,
Constant::create(element::i32, ov::Shape{transpose_after_order.size()}, transpose_after_order));
ov::copy_runtime_info(max_pool_node, {transpose_before, transpose_const, max_pool_new, transpose_after});
ov::replace_output_update_name(max_pool->output(0), transpose_after->output(0));
return true;
}
} // namespace SubstituteGNAMaxPoolNS
// ----------------------------------------------------------------------------
ov::intel_gna::pass::SubstituteGNAConvolution::SubstituteGNAConvolution() {
MATCHER_SCOPE(SubstituteGNAConvolution);
auto convolution = wrap_type<Convolution>();
matcher_pass_callback callback = [=](Matcher& m) {
auto convolution_node = std::dynamic_pointer_cast<Convolution>(m.get_match_root());
if (!convolution_node) {
return false;
}
return SubstituteGNAConvolutionNS::do_transformation(convolution_node);
};
auto m = std::make_shared<Matcher>(convolution, matcher_name);
this->register_matcher(m, callback);
}
ov::intel_gna::pass::SubstituteGNAMaxPool::SubstituteGNAMaxPool() {
MATCHER_SCOPE(SubstituteGNAMaxPool);
auto max_pool = wrap_type<ov::op::v1::MaxPool>();
matcher_pass_callback callback = [=](Matcher& m) {
auto max_pool_node = std::dynamic_pointer_cast<ov::op::v1::MaxPool>(m.get_match_root());
if (!max_pool_node) {
return false;
}
return SubstituteGNAMaxPoolNS::do_transformation(max_pool_node);
};
auto m = std::make_shared<Matcher>(max_pool, matcher_name);
this->register_matcher(m, callback);
}
bool ov::intel_gna::pass::ReplaceGnaNHWCLayers::run_on_model(const std::shared_ptr<Model>& function) {
RUN_ON_MODEL_SCOPE(ReplaceGnaNHWCLayers);
ov::pass::Manager manager(get_pass_config());
manager.register_pass<ov::intel_gna::pass::SubstituteGNAConvolution>();
manager.register_pass<ov::intel_gna::pass::SubstituteGNAMaxPool>();
manager.run_passes(function);
return false;
}

View File

@ -0,0 +1,54 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
namespace ov {
namespace intel_gna {
namespace pass {
/**
* @brief Substitites ngraph::Convolution (NCHW) -> GNAConvolution (NHWC)
*
* Transpose (NCHW -> NHWC)
* |
* Convolution (NCHW) -> GNAConvolution (NHWC)
* |
* Transpose (NHWC -> NCHW)
*/
class SubstituteGNAConvolution : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
SubstituteGNAConvolution();
};
/**
* @brief Substitites ngraph::MaxPool (NCHW) -> GNAMaxPool (NHWC)
*
* Transpose (NCHW -> NHWC)
* |
* MaxPool (NCHW) -> GNAMaxPool (NHWC)
* |
* Transpose (NHWC -> NCHW)
*/
class SubstituteGNAMaxPool : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
SubstituteGNAMaxPool();
};
/**
* @brief calls SubstituteGNAConvolution and SubstituteGNAMaxPool together
*/
class ReplaceGnaNHWCLayers : public ngraph::pass::FunctionPass {
public:
NGRAPH_RTTI_DECLARATION;
bool run_on_model(const std::shared_ptr<ngraph::Function>& f) override;
};
} // namespace pass
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,20 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_node_id.hpp"
void ov::intel_gna::rt_info::set_node_id(const std::shared_ptr<Node>& node, uint64_t id) {
auto& rt_info = node->get_rt_info();
rt_info[GnaNodeId::get_type_info_static()] = id;
}
void ov::intel_gna::rt_info::remove_node_id(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info.erase(GnaNodeId::get_type_info_static());
}
uint64_t ov::intel_gna::rt_info::get_node_id(const std::shared_ptr<Node>& node) {
const auto& rt_info = node->get_rt_info();
return rt_info.at(GnaNodeId::get_type_info_static()).as<uint64_t>();
}

View File

@ -0,0 +1,37 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "openvino/core/node.hpp"
#include "openvino/core/runtime_attribute.hpp"
namespace ov {
namespace intel_gna {
namespace rt_info {
void set_node_id(const std::shared_ptr<Node>& node, uint64_t id);
void remove_node_id(const std::shared_ptr<Node>& node);
uint64_t get_node_id(const std::shared_ptr<Node>& node);
/**
* @ingroup ie_runtime_attr_api
* @brief GnaNodeId class represents runtime info attribute that marks operation
* with order id
*/
class GnaNodeId : public RuntimeAttribute {
public:
OPENVINO_RTTI("gna_node_id", "0");
GnaNodeId() = default;
bool is_copyable() const override {
return false;
}
};
} // namespace rt_info
} // namespace intel_gna
} // namespace ov