Init commit

This commit is contained in:
Mikhail Ryzhov 2023-04-18 17:47:17 +02:00
parent 464bf5937c
commit dad7151b37
14 changed files with 1769 additions and 2 deletions

View File

@ -52,6 +52,8 @@
#include "transformations/rt_info/fused_names_attribute.hpp"
#include "transformations/rt_info/primitives_priority_attribute.hpp"
#include "transformations/utils/utils.hpp"
#include "../../src/ops/gna_convolution.hpp"
#include "../../src/ops/gna_max_pool.hpp"
namespace Builder {
@ -614,6 +616,27 @@ CNNLayerCreator::CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node) :
}
return res;
});
addSpecificCreator({"GNAMaxPool"}, [](const std::shared_ptr<::ngraph::Node>& node,
const std::map<std::string, std::string>& params) -> CNNLayerPtr {
LayerParams attrs = {node->get_friendly_name(), "Pooling",
details::convertPrecision(node->get_output_element_type(0))};
auto res = std::make_shared<PoolingLayer>(attrs);
res->params = params;
if (res->params.find("auto_pad") != res->params.end() &&
details::CaselessEq<std::string>()(res->params["auto_pad"], "EXPLICIT"))
res->params.erase("auto_pad");
if (res->params.find("exclude_pad") != res->params.end()) {
res->params["exclude-pad"] = res->params["exclude_pad"];
res->params.erase("exclude_pad");
}
res->params["pool-method"] = "max";
return res;
});
addSpecificCreator({"Select"},
[](const std::shared_ptr<::ngraph::Node>& node,
const std::map<std::string, std::string>& params) -> CNNLayerPtr {
@ -1710,6 +1733,41 @@ CNNLayerCreator::CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node) :
return res;
});
addSpecificCreator({"GNAConvolution"}, [](const std::shared_ptr<::ngraph::Node>& node,
const std::map<std::string, std::string>& params) -> CNNLayerPtr {
LayerParams attrs = {node->get_friendly_name(), "Convolution", details::convertPrecision(node->get_output_element_type(0))};
auto res = std::make_shared<InferenceEngine::ConvolutionLayer>(attrs);
res->params = params;
auto && rt_info = node->get_rt_info();
bool keep_constants = rt_info["keep_constants"].as<bool>();
// Restore output and kernel size
auto shape = node->get_input_shape(1);
//shape.erase(shape.begin(), shape.begin() + 2); - NCHW needs to have HW, for NHWC we need second and third
// what about NC or N ?
shape.erase(shape.begin());
shape.erase(shape.end() - 1);
res->params["kernel"] = Builder::asString(static_cast<std::vector<size_t>&>(shape));
res->params["output"] = Builder::asString(*(node->get_shape().rbegin())); // instead of ->get_shape()[1]
// forward auto_pad only when its value is different than explicit
if (params.at("auto_pad") == "explicit") {
res->params.erase("auto_pad");
}
const auto weightsNode = node->input_value(1).get_node_shared_ptr();
if (!keep_constants && InferenceEngine::details::addBlob(weightsNode, res, InferenceEngine::details::weights)) {
if (node->inputs().size() == 3) {
const auto biasNode = node->input_value(2).get_node_shared_ptr();
InferenceEngine::details::addBlob(biasNode, res, InferenceEngine::details::biases);
}
}
return res;
});
addSpecificCreator({"DeformableConvolution"},
[](const std::shared_ptr<::ngraph::Node>& node,
const std::map<std::string, std::string>& params) -> CNNLayerPtr {
@ -2014,7 +2072,9 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
const std::shared_ptr<::ngraph::Node>& consumerLayer,
bool keep_constants) -> bool {
if (((::ngraph::as_type_ptr<::ngraph::op::ConvolutionIE>(consumerLayer) ||
::ngraph::as_type_ptr<::ngraph::op::FullyConnected>(consumerLayer)) &&
::ngraph::as_type_ptr<::ngraph::op::FullyConnected>(consumerLayer) ||
::ngraph::as_type_ptr<ov::intel_gna::op::GNAConvolution>(consumerLayer) ||
::ngraph::as_type_ptr<ov::intel_gna::op::GNAMaxPool>(consumerLayer)) &&
!keep_constants) ||
::ngraph::as_type_ptr<::ngraph::op::v1::BinaryConvolution>(consumerLayer) ||
::ngraph::as_type_ptr<::ngraph::op::DeconvolutionIE>(consumerLayer) ||

View File

@ -8,6 +8,7 @@
#include "common/gna_target.hpp"
#include "gna_graph_tools.hpp"
#include "weights_converter.hpp"
#include "debug_new_pass.hpp" // DEBUG
namespace ov {
namespace intel_gna {
@ -89,7 +90,11 @@ size_t LayerQuantizer::GetBiasSizeForLayer(InferenceEngine::WeightableLayer& wl)
return wl._biases->size();
} else if (LayerInfo(wl).isConvolution()) {
// Calculating biases len using outdata dims: biases number should be equal to output channels number
#ifndef DEBUG_USE_NEW_PASS
return InferenceEngine::GetDataDimByName(wl.outData.front(), InferenceEngine::DataDimName::C);
#else
return InferenceEngine::GetDataDimSizeNHWC(wl.outData.front(), InferenceEngine::DataDimName::C);
#endif
} else {
// Calculating biases size using outData dimensions
return wl.outData.front()->getDims().back();

View File

@ -11,6 +11,7 @@
#include "layers/gna_convolution_layer.hpp"
#include "log/debug.hpp"
#include "weights_converter.hpp"
#include "debug_new_pass.hpp" // DEBUG
namespace ov {
namespace intel_gna {
@ -1262,7 +1263,11 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
double weights_reducer = 1.0;
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer*>(wl);
if (conv && !LayerInfo(conv).isConvolutionFilter()) {
#ifndef DEBUG_USE_NEW_PASS
const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
#else
const auto inDepth = GetDataDimSizeNHWC(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
#endif
weights_reducer = gna_convolution_layer::getWeightsReducer(*conv);
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
weights_reducer = std::max(1.0, weights_reducer);

View File

@ -52,6 +52,8 @@ enum class LayerType {
Gemm,
Pwl,
Identity,
GNAConvolution,
GNAMaxPool,
NO_TYPE
};
@ -93,7 +95,9 @@ static const InferenceEngine::details::caseless_map<std::string, LayerType> Laye
{"Pwl", LayerType::Pwl},
{"Identity", LayerType::Identity},
{"Gemm", LayerType::Gemm},
};
{"GNAConvolution", LayerType::GNAConvolution },
{"GNAMaxPool", LayerType::GNAMaxPool },
};
LayerType LayerTypeFromStr(const std::string& str);

View File

@ -0,0 +1,364 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_convolution.hpp"
#include <ngraph/validation_util.hpp>
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include <cmath>
#include <cstddef>
NGRAPH_RTTI_DEFINITION(ov::intel_gna::op::GNAConvolution, "GNAConvolution", 0);
namespace ov {
namespace intel_gna {
namespace op {
namespace internal {
// code is based on ngraph/core/shape_inference/src/convolution_shape_inference.cpp
// differs only *op type
// TODO: think how can we avoid DRY
int64_t calculate_num_spatial(const GNAConvolution* op,
const ngraph::PartialShape& input_shape,
const ngraph::PartialShape& filters_shape,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims) {
int64_t num_spatial = op->m_num_spatial;
if (num_spatial == -1) {
const auto &input_rank = input_shape.rank();
const auto &filters_rank = filters_shape.rank();
if (const auto &size = op->m_dilations.size())
num_spatial = static_cast<int64_t>(size);
if (const auto &size = op->m_strides.size())
num_spatial = static_cast<int64_t>(size);
if (const auto &size = op->m_pads_begin.size())
num_spatial = static_cast<int64_t>(size);
if (const auto &size = op->m_pads_end.size())
num_spatial = static_cast<int64_t>(size);
if (input_rank.is_static())
num_spatial = input_rank.get_length() - num_non_spatial_data_dims;
if (filters_rank.is_static())
num_spatial = filters_rank.get_length() - num_non_spatial_filter_dims;
}
return num_spatial;
}
void update_and_validate_attributes(GNAConvolution* op) {
const auto& num_spatial = op->m_num_spatial;
if (num_spatial != -1) {
auto& strides = op->m_strides;
auto& dilations = op->m_dilations;
auto& pad_begin = op->m_pads_begin;
auto& pad_end = op->m_pads_end;
auto& auto_pad = op->m_auto_pad;
if (strides.empty())
strides = ngraph::Strides(num_spatial, 1);
if (dilations.empty())
dilations = ngraph::Strides(num_spatial, 1);
if (pad_begin.empty() || auto_pad == ov::op::PadType::VALID)
pad_begin = ngraph::CoordinateDiff(num_spatial, 0);
if (pad_end.empty() || auto_pad == ov::op::PadType::VALID)
pad_end = ngraph::CoordinateDiff(num_spatial, 0);
NODE_VALIDATION_CHECK(op,
static_cast<int64_t>(strides.size()) == num_spatial,
"Strides should be defined for all and only spatial features.");
NODE_VALIDATION_CHECK(op,
static_cast<int64_t>(dilations.size()) == num_spatial,
"Dilations should be defined for all and only spatial features.");
NODE_VALIDATION_CHECK(op,
static_cast<int64_t>(pad_begin.size()) == num_spatial &&
static_cast<int64_t>(pad_end.size()) == num_spatial,
"Pads should be defined for all and only spatial features.");
NODE_VALIDATION_CHECK(op,
std::all_of(dilations.begin(),
dilations.end(),
[](const size_t &i) {
return i > 0;
}),
"Filter dilation (",
dilations,
") has zero dimension.");
NODE_VALIDATION_CHECK(op,
std::all_of(strides.begin(),
strides.end(),
[](const size_t &i) {
return i > 0;
}),
"Filter strides (",
strides,
") has zero dimension.");
}
}
// code is based on ngraph/core/shape_inference/include/convolution_shape_inference.hpp
// but instead of NCHW uses NHWC layout
template <class T>
inline bool dynamic_check(const int64_t& num_spatial) {
OPENVINO_ASSERT(num_spatial != -1,
"Convolution shape inference doesn't have enough information for static shape calculation");
return true;
}
// FIXME: do we need that function as a template ?
template<>
inline bool dynamic_check<ngraph::PartialShape>(const int64_t& num_spatial) {
return num_spatial != -1;
}
// FIXME: do we need that function as a template ?
// TODO: search where that function is used in openvino
template<class T>
bool resolve_auto_pad_for_shape(const GNAConvolution* op,
ngraph::CoordinateDiff& pads_begin,
ngraph::CoordinateDiff& pads_end,
const std::vector<T> &input_shapes,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims) {
const auto& auto_pad = op->get_auto_pad();
if (auto_pad != ov::op::PadType::SAME_UPPER && auto_pad != ov::op::PadType::SAME_LOWER) {
pads_begin = op->m_pads_begin;
pads_end = op->m_pads_end;
return true;
}
auto& num_spatial = op->m_num_spatial;
if (!dynamic_check<T>(num_spatial))
return false;
auto input_shape = input_shapes[0];
auto filters_shape = input_shapes[1];
if (input_shape.rank().is_dynamic())
input_shape.resize(num_spatial + num_non_spatial_data_dims);
if (filters_shape.rank().is_dynamic())
filters_shape.resize(num_spatial + num_non_spatial_filter_dims);
const auto& strides = op->m_strides;
const auto& dilations = op->m_dilations;
pads_begin.resize(num_spatial);
pads_end.resize(num_spatial);
bool status = true;
for (int64_t i = 0; i < num_spatial; ++i) {
const auto& input_dim = input_shape[i + 1];
const auto& filters_dim = filters_shape[i + 1];
if (input_dim.is_static() && filters_dim.is_static()) {
const int64_t& window_dilated_dim = (filters_dim.get_length() - 1) * dilations[i] + 1;
NODE_VALIDATION_CHECK(op,
window_dilated_dim > 0,
"Window after dilation has dimension less than 1 (dim: ",
window_dilated_dim,
") at axis ",
i,
".");
const int64_t& image_size = input_dim.get_length();
const int64_t& filter_stride = strides[i];
const int64_t& output_size = (image_size + filter_stride - 1) / filter_stride;
const int64_t& tmp = (output_size - 1) * filter_stride + window_dilated_dim;
const int64_t& padding_needed = tmp > image_size ? tmp - image_size : 0;
const size_t& padding_lhs = static_cast<size_t>(padding_needed / 2);
const size_t& padding_rhs = static_cast<size_t>(padding_needed - padding_lhs);
pads_begin[i] = auto_pad == ov::op::PadType::SAME_UPPER ? padding_lhs : padding_rhs;
pads_end[i] = auto_pad == ov::op::PadType::SAME_UPPER ? padding_rhs : padding_lhs;
} else {
status = false;
}
}
return status;
}
// FIXME: do we need that function as a template ?
// TODO: search where that function is used in openvino
template<class T>
void shape_infer(const GNAConvolution* op,
const ngraph::CoordinateDiff& pads_begin,
const ngraph::CoordinateDiff& pads_end,
const std::vector<T> &input_shapes,
std::vector<T> &output_shapes) {
NODE_VALIDATION_CHECK(op, input_shapes.size() == 2 && output_shapes.size() == 1);
auto input_shape = input_shapes[0], filters_shape = input_shapes[1];
const auto& num_spatial = op->m_num_spatial;
NODE_VALIDATION_CHECK(op, num_spatial != -1,
"Convolution shape_infer should be provided with correct num_spatial attribute");
if (input_shape.rank().is_dynamic())
input_shape.resize(num_spatial + 2);
if (filters_shape.rank().is_dynamic())
filters_shape.resize(num_spatial + 2);
NODE_VALIDATION_CHECK(op,
(static_cast<int64_t>(input_shape.size()) == (num_spatial + 2)) &&
(static_cast<int64_t>(filters_shape.size()) == (num_spatial + 2)),
"Data batch and filters rank do not match (data batch shape: ",
input_shape,
", filters shape: ",
filters_shape,
").");
// ranks are originally static or aligned with num_spatial, attributes assumed to be valid
auto& output_shape = output_shapes[0];
output_shape.resize(num_spatial + 2);
output_shape[0] = input_shape[0];
// Channel is the last in NHWC layout
*(output_shape.rbegin()) = filters_shape[0]; // NHWC C is last instead of filters_shape[0] for NCHW layout
const auto n_data_channel = *(input_shape.rbegin());
const auto n_filter_channel = *(filters_shape.rbegin());
NODE_VALIDATION_CHECK(
op,
n_data_channel.compatible(n_filter_channel), // instead of input_shape[1].compatible(filters_shape[1]),
"Data batch channel count (",
n_data_channel, // instead of input_shape[1],
") does not match filter input ",
"channel count (",
n_filter_channel, // instead of filters_shape[1],
").");
const auto& dilations = op->m_dilations;
const auto& strides = op->m_strides;
for (int64_t i = 0; i < num_spatial; ++i) {
const auto& input_dim = input_shape[i + 1];
const auto& filters_dim = filters_shape[i + 1];
if (input_dim.is_static() && filters_dim.is_static()) {
const int64_t& window_dilated_dim = (filters_dim.get_length() - 1) * dilations[i] + 1;
NODE_VALIDATION_CHECK(op,
window_dilated_dim > 0,
"Window after dilation has dimension less than 1 (dim: ",
window_dilated_dim,
") at axis ",
i,
".");
const int64_t& data_padded_dilated_dim = input_dim.get_length() + pads_begin[i] + pads_end[i];
NODE_VALIDATION_CHECK(op,
window_dilated_dim <= data_padded_dilated_dim,
"Window after dilation has dimension (dim: ",
window_dilated_dim,
") larger than the data shape after padding (dim: ",
data_padded_dilated_dim,
") at axis ",
i,
".");
output_shape[i + 1] = (data_padded_dilated_dim - window_dilated_dim) / strides[i] + 1;
}
}
}
} // namespace internal
GNAConvolution::GNAConvolution(const ngraph::Output<Node>& data_batch,
const ngraph::Output<Node>& filters,
const ngraph::Output<Node>& bias,
const ngraph::Strides& strides,
const ngraph::CoordinateDiff& pads_begin,
const ngraph::CoordinateDiff& pads_end,
const ngraph::Strides& dilations,
const ov::op::PadType& auto_pad)
: ov::op::Op({data_batch, filters, bias}),
m_strides(strides),
m_dilations(dilations),
m_pads_begin(pads_begin),
m_pads_end(pads_end),
m_auto_pad(auto_pad) {
constructor_validate_and_infer_types();
}
GNAConvolution::GNAConvolution(const ngraph::Output<Node>& data_batch,
const ngraph::Output<Node>& filters,
const ngraph::Strides& strides,
const ngraph::CoordinateDiff& pads_begin,
const ngraph::CoordinateDiff& pads_end,
const ngraph::Strides& dilations,
const ov::op::PadType& auto_pad)
: ov::op::Op({data_batch, filters}),
m_strides(strides),
m_dilations(dilations),
m_pads_begin(pads_begin),
m_pads_end(pads_end),
m_auto_pad(auto_pad) {
constructor_validate_and_infer_types();
}
bool GNAConvolution::visit_attributes(ov::AttributeVisitor& visitor) {
visitor.on_attribute("strides", m_strides);
visitor.on_attribute("dilations", m_dilations);
visitor.on_attribute("pads_begin", m_pads_begin);
visitor.on_attribute("pads_end", m_pads_end);
visitor.on_attribute("auto_pad", m_auto_pad);
return true;
}
void GNAConvolution::validate_and_infer_types() {
ngraph::element::Type data_batch_et = get_input_element_type(0);
ngraph::element::Type filters_et = get_input_element_type(1);
ngraph::element::Type result_et;
NODE_VALIDATION_CHECK(this,
ngraph::element::Type::merge(result_et, data_batch_et, filters_et),
"Element types for data batch and filters do not match (data batch element type: ",
data_batch_et,
", filters element type: ",
filters_et,
").");
NODE_VALIDATION_CHECK(this,
result_et.is_real() || result_et.is_integral_number(),
"Element types must be numeric. Got: ",
result_et);
auto& data_shape = get_input_partial_shape(0);
auto& filter_shape = get_input_partial_shape(1);
m_num_spatial = internal::calculate_num_spatial(this, data_shape, filter_shape, 2, 2);
internal::update_and_validate_attributes(this);
std::vector<ov::PartialShape> input_shapes = {data_shape, filter_shape};
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
if (m_num_spatial != -1) {
internal::resolve_auto_pad_for_shape(this, m_pads_begin, m_pads_end, input_shapes, 2, 2);
internal::shape_infer(this, m_pads_begin, m_pads_end, input_shapes, output_shapes);
}
set_output_type(0, result_et, output_shapes[0]);
}
std::shared_ptr<ngraph::Node> GNAConvolution::clone_with_new_inputs(const ngraph::OutputVector& new_args) const {
if (new_args.size() == 2) {
return std::make_shared<GNAConvolution>(new_args.at(0),
new_args.at(1),
m_strides,
m_pads_begin,
m_pads_end,
m_dilations,
m_auto_pad);
} else if (new_args.size() == 3) {
return std::make_shared<GNAConvolution>(new_args.at(0),
new_args.at(1),
new_args.at(2),
m_strides,
m_pads_begin,
m_pads_end,
m_dilations,
m_auto_pad);
}
throw ngraph::ngraph_error("Unsupported number of arguments for GNAConvolution operation");
}
} // namespace op
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,191 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "openvino/op/op.hpp"
#include "ngraph/node.hpp"
#include <transformations_visibility.hpp>
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/op/op.hpp"
namespace ov {
namespace intel_gna {
namespace op {
class GNAConvolution;
namespace internal {
int64_t calculate_num_spatial(const ov::intel_gna::op::GNAConvolution * op,
const ngraph::PartialShape& input_shape,
const ngraph::PartialShape& filters_shape,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims);
void update_and_validate_attributes(ov::intel_gna::op::GNAConvolution* op);
template <class T>
bool resolve_auto_pad_for_shape(const ov::intel_gna::op::GNAConvolution* op,
ngraph::CoordinateDiff& pads_begin,
ngraph::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims);
template <class T>
void shape_infer(const ov::intel_gna::op::GNAConvolution* op,
const ngraph::CoordinateDiff& pads_begin,
const ngraph::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
std::vector<T>& output_shapes);
} // namespace internal
/**
* @brief Activation modes for fused convolutions.
*
*/
enum class ActivationType { SIGMOID,
RELU,
TANH,
ABS,
LOG,
EXP,
SIGN,
CLAMP,
NO_ACTIVATION };
/// \brief Convolution with NHWC layout
///
class GNAConvolution : public ov::op::Op {
public:
NGRAPH_RTTI_DECLARATION;
/// \brief Constructs a batched convolution operation.
GNAConvolution() = default;
/// \brief Constructs a batched convolution operation.
///
/// \param data_batch The node producing the input data batch tensor.<br>
/// `[N, C_IN, D1, ... Df]`
/// \param filters The node producing the filters tensor.<br>
/// `[C_OUT, C_IN, F1, ... Ff]`
/// \param strides The strides.<br>
/// `[f]`
/// \param dilations The dilations.<br>
/// `[f]`
/// \param pads_begin The beginning of padding shape.<br>
/// `[f]`
/// \param pads_end The end of padding shape.<br>
/// `[f]`
/// \param auto_pad The pad type for automatically computing padding sizes.<br>
/// `[f]`
///
/// Output `[N, C_OUT, R1, ... Rf]`
///
GNAConvolution(const ngraph::Output<ngraph::Node>& data_batch,
const ngraph::Output<ngraph::Node>& filters,
const ngraph::Output<ngraph::Node>& bias,
const ngraph::Strides& strides,
const ngraph::CoordinateDiff& pads_begin,
const ngraph::CoordinateDiff& pads_end,
const ngraph::Strides& dilations,
const ov::op::PadType& auto_pad = ov::op::PadType::EXPLICIT);
GNAConvolution(const ngraph::Output<ngraph::Node>& data_batch,
const ngraph::Output<ngraph::Node>& filters,
const ngraph::Strides& strides,
const ngraph::CoordinateDiff& pads_begin,
const ngraph::CoordinateDiff& pads_end,
const ngraph::Strides& dilations,
const ov::op::PadType& auto_pad = ov::op::PadType::EXPLICIT);
void validate_and_infer_types() override;
bool visit_attributes(ov::AttributeVisitor& visitor) override;
std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
/// \return The strides.
const ngraph::Strides& get_strides() const {
return m_strides;
}
void set_strides(const ngraph::Strides& strides) {
m_strides = strides;
}
/// \return The dilations.
const ngraph::Strides& get_dilations() const {
return m_dilations;
}
void set_dilations(const ngraph::Strides& dilations) {
m_dilations = dilations;
}
/// \return The padding-below sizes (possibly negative).
const ngraph::CoordinateDiff& get_pads_begin() const {
return m_pads_begin;
}
void set_pads_begin(const ngraph::CoordinateDiff& pads_begin) {
m_pads_begin = pads_begin;
}
/// \return The padding-above sizes (possibly negative).
const ngraph::CoordinateDiff& get_pads_end() const {
return m_pads_end;
}
void set_adding_above(const ngraph::CoordinateDiff& pads_end) {
m_pads_end = pads_end;
}
/// \return The pad type for convolution.
const ov::op::PadType& get_auto_pad() const {
return m_auto_pad;
}
void set_auto_pad(const ov::op::PadType& auto_pad) {
m_auto_pad = auto_pad;
}
bool has_add_node() const { return m_has_add_node; }
ActivationType get_activation() const { return m_activation_type; }
void set_activation(ActivationType activation_type) { m_activation_type = activation_type; }
/*
* TODO: for unit tests
bool evaluate(ov::runtime::TensorVector& output_values,
const ov::runtime::TensorVector& input_values,
const ov::EvaluationContext & evaluation_context) const override;
bool has_evaluate() const override;
*/
protected:
ngraph::Strides m_strides;
ngraph::Strides m_dilations;
ngraph::CoordinateDiff m_pads_begin;
ngraph::CoordinateDiff m_pads_end;
ov::op::PadType m_auto_pad;
int64_t m_num_spatial = -1;
private:
friend int64_t internal::calculate_num_spatial(const ov::intel_gna::op::GNAConvolution* op,
const ngraph::PartialShape& input_shape,
const ngraph::PartialShape& filters_shape,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims);
friend void internal::update_and_validate_attributes(ov::intel_gna::op::GNAConvolution* op);
template <class T>
friend bool internal::resolve_auto_pad_for_shape(const ov::intel_gna::op::GNAConvolution* op,
ngraph::CoordinateDiff& pads_begin,
ngraph::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
const int64_t& num_non_spatial_data_dims,
const int64_t& num_non_spatial_filter_dims);
template <class T>
friend void internal::shape_infer(const ov::intel_gna::op::GNAConvolution* op,
const ngraph::CoordinateDiff& pads_begin,
const ngraph::CoordinateDiff& pads_end,
const std::vector<T>& input_shapes,
std::vector<T>& output_shapes);
bool m_has_add_node;
ActivationType m_activation_type;
};
} // namespace op
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,275 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_max_pool.hpp"
#include <assert.h>
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/validation_util.hpp"
#include "ngraph/node.hpp"
#include "ngraph/validation_util.hpp"
NGRAPH_RTTI_DEFINITION(ov::intel_gna::op::GNAMaxPool, "GNAMaxPool", 0);
namespace ov {
namespace intel_gna {
namespace op {
//
// Infers the output batch shape and element type for batched pooling fprop.
//
ov::PartialShape infer_batched_pooling_forward(const ngraph::Node* node,
const ov::PartialShape& data_batch_shape,
const ov::CoordinateDiff& data_padding_below,
const ov::CoordinateDiff& data_padding_above,
const ov::PartialShape& window_shape,
const ngraph::Strides& window_strides,
bool is_window_all_in_padding_allowed,
bool ceil_mode,
const ngraph::Strides& window_dilation);
//
// Infers the output batch shape and element type for batched pooling fprop.
//
ov::PartialShape infer_batched_pooling_forward(const ngraph::Node* node,
const ov::PartialShape& data_batch_shape,
const ov::CoordinateDiff& data_padding_below,
const ov::CoordinateDiff& data_padding_above,
const ov::PartialShape& window_shape,
const ngraph::Strides& window_strides,
bool is_window_all_in_padding_allowed,
bool ceil_mode,
const ngraph::Strides& window_dilation) {
NODE_VALIDATION_CHECK(node,
data_batch_shape.rank().is_dynamic() ||
(data_batch_shape.rank().get_length() >= 3 && data_batch_shape.rank().get_length() <= 5),
"Data batch must have rank of at least 4 or 5 (one batch axis, ",
"one input-channel axis, and two or three spatial dimension) ",
"(data batch shape: ",
data_batch_shape,
").");
ov::PartialShape data_spatial_shape{ov::PartialShape::dynamic()};
NODE_VALIDATION_CHECK(node,
data_spatial_shape.merge_rank(data_batch_shape.rank() - 2) &&
data_spatial_shape.merge_rank(data_padding_below.size()) &&
data_spatial_shape.merge_rank(data_padding_above.size()) &&
data_spatial_shape.merge_rank(window_shape.rank()) &&
data_spatial_shape.merge_rank(window_strides.size()),
"Ranks for data item shape (data batch has shape ",
data_batch_shape,
", so data item rank is ",
(data_batch_shape.rank() - 2),
"), padding below (",
data_padding_below,
"), padding above (",
data_padding_above,
"), window shape (",
window_shape,
"), and window strides (",
window_strides,
") do not match.");
ov::Dimension batch_size{ov::Dimension::dynamic()};
ov::Dimension channel_count{ov::Dimension::dynamic()};
ov::PartialShape data_output_spatial_shape{ov::PartialShape::dynamic(data_spatial_shape.rank())};
if (data_batch_shape.rank().is_static()) {
batch_size = data_batch_shape[0];
channel_count = *(data_batch_shape.end() - 1); // EMUTEX fix NCHW -> NHWC from data_batch_shape[1]
for (int64_t i = 0; i < data_spatial_shape.rank().get_length(); i++) {
data_spatial_shape[i] = data_batch_shape[i + 1]; // EMUTEX fix NCHW -> NHWC from data_spatial_shape[i] = data_batch_shape[i + 2]
}
NODE_VALIDATION_CHECK(node, batch_size.is_dynamic() || batch_size.get_length() > 0, "Batch size is zero.");
NODE_VALIDATION_CHECK(node,
channel_count.is_dynamic() || channel_count.get_length() > 0,
"Channel count is zero.");
// For pooling ops we don't need dilation, so we fill in the identity value (all 1).
ngraph::Strides data_dilation(data_spatial_shape.rank().get_length(), 1);
ngraph::Strides dilations = window_dilation;
// if the window_dilation was not specified, generate the default value (no dilations)
if (window_dilation.empty()) {
// dilations equal to 1 for each spatial axis mean that the window is not dilated
dilations = ngraph::Strides(data_spatial_shape.rank().get_length(), 1);
}
data_output_spatial_shape = ngraph::infer_windowed_reduction_output_shape(node,
data_spatial_shape,
data_dilation,
data_padding_below,
data_padding_above,
window_shape,
window_strides,
dilations,
is_window_all_in_padding_allowed,
ceil_mode);
}
ov::PartialShape data_batch_output_shape{ov::PartialShape::dynamic(data_output_spatial_shape.rank() + 2)};
data_batch_output_shape[0] = batch_size;
*(data_batch_output_shape.end() - 1) = channel_count;// EMUTEX fix NCHW -> NHWC data_batch_output_shape[1] = channel_count;
for (int64_t i = 0; i < data_spatial_shape.rank().get_length(); i++) {
data_batch_output_shape[i + 1] = data_output_spatial_shape[i]; // EMUTEX fix NCHW -> NHWC data_batch_output_shape[i + 2] = data_output_spatial_shape[i];
}
return data_batch_output_shape;
}
GNAMaxPool::GNAMaxPool(const ngraph::Output<ngraph::Node>& arg,
const ngraph::Strides& strides,
const ov::Shape& pads_begin,
const ov::Shape& pads_end,
const ov::Shape& kernel,
const ov::op::RoundingType rounding_type,
const ov::op::PadType auto_pad)
: Op({arg}),
m_kernel(kernel),
m_strides(strides),
m_pads_begin(pads_begin),
m_pads_end(pads_end),
m_auto_pad(auto_pad),
m_rounding_type(rounding_type) {
constructor_validate_and_infer_types();
}
bool GNAMaxPool::visit_attributes(ov::AttributeVisitor& visitor) {
visitor.on_attribute("strides", m_strides);
visitor.on_attribute("pads_begin", m_pads_begin);
visitor.on_attribute("pads_end", m_pads_end);
visitor.on_attribute("kernel", m_kernel);
visitor.on_attribute("rounding_type", m_rounding_type);
visitor.on_attribute("auto_pad", m_auto_pad);
return true;
}
void GNAMaxPool::validate_and_infer_types() {
if (0 == m_strides.size()) {
m_strides = ngraph::Strides(m_kernel.size(), 1);
}
if (0 == m_pads_begin.size()) {
m_pads_begin = ov::Shape(m_kernel.size(), 0);
}
if (0 == m_pads_end.size()) {
m_pads_end = ov::Shape(m_kernel.size(), 0);
}
const ov::PartialShape& arg_shape = get_input_partial_shape(0);
NODE_VALIDATION_CHECK(
this,
arg_shape.rank().compatible(3) || arg_shape.rank().compatible(4) || arg_shape.rank().compatible(5),
"Expected a 3D, 4D or 5D tensor for the input. Got: ",
arg_shape);
if (arg_shape.rank().is_static()) {
NODE_VALIDATION_CHECK(this,
static_cast<int64_t>(m_pads_end.size()) == arg_shape.rank().get_max_length() - 2,
"Expected pads_end size to be equal to input size - 2. Got: ",
m_pads_end.size());
NODE_VALIDATION_CHECK(this,
static_cast<int64_t>(m_pads_begin.size()) == arg_shape.rank().get_max_length() - 2,
"Expected pads_begin size to be equal to input size - 2. Got: ",
m_pads_begin.size());
NODE_VALIDATION_CHECK(this,
static_cast<int64_t>(m_kernel.size()) == arg_shape.rank().get_max_length() - 2,
"Expected kernel size to be equal to input size - 2. Got: ",
m_kernel.size());
NODE_VALIDATION_CHECK(this,
static_cast<int64_t>(m_strides.size()) == arg_shape.rank().get_max_length() - 2,
"Expected strides size to be equal to input size - 2. Got: ",
m_strides.size());
}
const ov::PartialShape output_shape = infer_output_shape(ngraph::Strides{}); // no dilations of the filter window
set_output_type(0, get_input_element_type(0), output_shape);
}
ov::PartialShape GNAMaxPool::infer_output_shape(const ngraph::Strides& dilations) {
const auto& arg_shape = get_input_partial_shape(0);
bool update_auto_padding_succeed = true;
if (m_auto_pad == ov::op::PadType::SAME_UPPER || m_auto_pad == ov::op::PadType::SAME_LOWER) {
const auto filter_dilations = dilations.empty() ? ngraph::Strides(m_kernel.size(), 1) : dilations;
update_auto_padding_succeed = update_auto_padding(arg_shape, filter_dilations, m_pads_end, m_pads_begin);
}
if (m_auto_pad == ov::op::PadType::VALID) {
m_pads_end = ov::Shape(m_pads_end.size(), 0);
m_pads_begin = ov::Shape(m_pads_begin.size(), 0);
}
auto output_shape = ov::PartialShape::dynamic();
if (update_auto_padding_succeed) {
ov::CoordinateDiff pads_begin(m_pads_begin.begin(), m_pads_begin.end());
ov::CoordinateDiff pads_end(m_pads_end.begin(), m_pads_end.end());
output_shape = ov::intel_gna::op::infer_batched_pooling_forward(this,
get_input_partial_shape(0),
pads_begin,
pads_end,
m_kernel,
m_strides,
true,
m_rounding_type == ov::op::RoundingType::CEIL,
dilations);
} else {
if (arg_shape.rank().is_static()) {
output_shape = std::vector<ov::Dimension>(arg_shape.rank().get_max_length(), ov::Dimension::dynamic());
if (arg_shape[0].is_static()) {
output_shape[0] = arg_shape[0]; // batch size
}
if ((arg_shape.end() - 1)->is_static()) { // EMUTEX FIXED: from [1] to end() - 1 NCHW -> NHWC
*(output_shape.end() - 1) = *(arg_shape.end() - 1); // channel size
}
}
}
return output_shape;
}
bool GNAMaxPool::update_auto_padding(const ov::PartialShape& in_shape,
const ngraph::Strides& filter_dilations,
ov::Shape& new_pads_end,
ov::Shape& new_pads_begin) const {
bool update_auto_padding_succeed = true;
if (m_auto_pad == ov::op::PadType::SAME_UPPER || m_auto_pad == ov::op::PadType::SAME_LOWER) {
ov::CoordinateDiff pads_end, pads_begin;
update_auto_padding_succeed = ngraph::try_apply_auto_padding(in_shape,
m_kernel,
m_strides,
filter_dilations,
m_auto_pad,
pads_end,
pads_begin);
new_pads_end = ov::Shape(pads_end.begin(), pads_end.end());
new_pads_begin = ov::Shape(pads_begin.begin(), pads_begin.end());
}
return update_auto_padding_succeed;
}
std::shared_ptr<ngraph::Node> GNAMaxPool::clone_with_new_inputs(const ov::OutputVector& new_args) const {
check_new_args_count(this, new_args);
return std::make_shared<GNAMaxPool>(new_args.at(0),
m_strides,
m_pads_begin,
m_pads_end,
m_kernel,
m_rounding_type,
m_auto_pad);
}
} // namespace op
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,108 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <limits>
#include "openvino/op/op.hpp"
#include "openvino/op/util/attr_types.hpp"
#include "ngraph/node.hpp"
#include "openvino/op/util/max_pool_base.hpp"
namespace ov {
namespace intel_gna {
namespace op {
/// \brief Batched max pooling operation.
class GNAMaxPool : public ov::op::Op {
public:
NGRAPH_RTTI_DECLARATION;
/// \brief Constructs a batched max pooling operation.
GNAMaxPool() = default;
/// \brief Constructs a batched max pooling operation.
///
/// \param arg The node producing the input data batch tensor.
/// \param strides The strides.
/// \param pads_begin The beginning of padding shape.
/// \param pads_end The end of padding shape.
/// \param kernel The kernel shape.
/// \param rounding_type Whether to use ceiling or floor rounding type while
/// computing output shape.
/// \param auto_pad The pad type for automatically computing padding sizes.
GNAMaxPool(const ngraph::Output<ngraph::Node>& arg,
const ngraph::Strides& strides,
const ngraph::Shape& pads_begin,
const ngraph::Shape& pads_end,
const ngraph::Shape& kernel,
const ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR,
const ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT);
void validate_and_infer_types() override;
bool visit_attributes(ov::AttributeVisitor& visitor) override;
/// \return The kernel shape.
const ngraph::Shape& get_kernel() const {
return m_kernel;
}
void set_kernel(const ngraph::Shape& kernel) {
m_kernel = kernel;
}
/// \return The strides.
const ngraph::Strides& get_strides() const {
return m_strides;
}
void set_strides(const ngraph::Strides& strides) {
m_strides = strides;
}
/// \return The beginning of padding shape.
const ngraph::Shape& get_pads_begin() const {
return m_pads_begin;
}
void set_pads_begin(const ngraph::Shape& pads_begin) {
m_pads_begin = pads_begin;
}
/// \return The end of padding shape.
const ngraph::Shape& get_pads_end() const {
return m_pads_end;
}
void set_adding_above(const ngraph::Shape& pads_end) {
m_pads_end = pads_end;
}
/// \return The pad type for pooling.
ov::op::PadType get_auto_pad() const {
return m_auto_pad;
}
void set_auto_pad(const ov::op::PadType auto_pad) {
m_auto_pad = auto_pad;
}
/// \return The ceiling mode being used for output shape computations
ov::op::RoundingType get_rounding_type() const {
return m_rounding_type;
}
void set_rounding_type(ov::op::RoundingType rounding_type) {
m_rounding_type = rounding_type;
}
std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
protected:
bool update_auto_padding(const ov::PartialShape& in_shape,
const ngraph::Strides& filter_dilations,
ngraph::Shape& new_pads_end,
ngraph::Shape& new_pads_begin) const;
ov::PartialShape infer_output_shape(const ngraph::Strides& dilations);
ngraph::Shape m_kernel;
ngraph::Strides m_strides;
ngraph::Shape m_pads_begin;
ngraph::Shape m_pads_end;
ov::op::PadType m_auto_pad;
ov::op::RoundingType m_rounding_type;
};
} // namespace op
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,369 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <type_traits>
#include <utility>
#include <memory>
#include "openvino/cc/ngraph/itt.hpp"
#include "fuse_conv_biasadd_activation.hpp"
#include "exec_graph_info.hpp"
#include "openvino/core/graph_util.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/core/shape.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/opsets/opset1.hpp"
#include "openvino/pass/pattern/matcher.hpp"
#include "openvino/pass/pattern/op/or.hpp"
#include "openvino/pass/pattern/op/label.hpp"
#include "openvino/pass/pattern/op/pattern.hpp"
#include "openvino/pass/manager.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "ops/gna_convolution.hpp"
#include "rt_info/gna_node_id.hpp"
#include "openvino/opsets/opset10.hpp"
using namespace ov::pass::pattern;
using namespace ov::intel_gna::op;
using namespace ov::intel_gna::rt_info;
using namespace ov::opset10;
namespace {
template <class A, class B>
std::pair<std::shared_ptr<A>, std::shared_ptr<B>> parse_eltwise_inputs(std::shared_ptr<ov::Node> node) {
auto eltwise = std::dynamic_pointer_cast<A>(node->input(0).get_source_output().get_node_shared_ptr());
auto constant = std::dynamic_pointer_cast<B>(node->input(1).get_source_output().get_node_shared_ptr());
if (!eltwise) {
eltwise = std::dynamic_pointer_cast<A>(node->input(1).get_source_output().get_node_shared_ptr());
constant = std::dynamic_pointer_cast<B>(node->input(0).get_source_output().get_node_shared_ptr());
}
if (!eltwise || !constant) {
return {nullptr, nullptr};
}
return {eltwise, constant};
}
struct GnaConvCallbacks {
static bool gna_convolution_with_biasadd(Matcher &m) {
auto eltwise = m.get_match_root();
auto m_conv_const_pair = parse_eltwise_inputs<GNAConvolution, Constant>(eltwise);
auto m_conv = m_conv_const_pair.first;
auto m_const = m_conv_const_pair.second;
if (!m_conv || !m_const) {
return false;
}
if (m_conv->inputs().size() != 2) {
return false;
}
if (std::dynamic_pointer_cast<Add>(eltwise) == nullptr) {
return false;
}
const ov::Output<ov::Node> &data = m_conv->input(0).get_source_output();
const ov::Output<ov::Node> &filters = m_conv->input(1).get_source_output();
const ov::Output<ov::Node> &bias = m_const->output(0);
auto gna_conv = std::make_shared<GNAConvolution>(data,
filters,
bias,
m_conv->get_strides(),
m_conv->get_pads_begin(),
m_conv->get_pads_end(),
m_conv->get_dilations(),
m_conv->get_auto_pad());
ov::Output<ov::Node> new_conv(gna_conv);
gna_conv->set_friendly_name(eltwise->get_friendly_name());
ov::copy_runtime_info({m_conv, eltwise}, new_conv.get_node_shared_ptr());
ov::intel_gna::rt_info::set_node_id(new_conv.get_node_shared_ptr(), ov::intel_gna::rt_info::get_node_id(eltwise));
const std::string originalLayers = eltwise->get_friendly_name() + "," + m_conv->get_friendly_name();
gna_conv->get_rt_info()[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalLayers;
ov::replace_node(m.get_match_root(), new_conv.get_node_shared_ptr());
return true;
}
static std::pair<std::shared_ptr<GNAConvolution>, std::shared_ptr<ov::Node>> parse_gna_conv_inputs(
std::shared_ptr<ov::Node> add) {
std::shared_ptr<GNAConvolution> gna_conv = nullptr;
auto input0 = add->input(0).get_source_output().get_node_shared_ptr();
auto input1 = add->input(1).get_source_output().get_node_shared_ptr();
auto gna_conv0 = std::dynamic_pointer_cast<GNAConvolution>(input0);
auto gna_conv1 = std::dynamic_pointer_cast<GNAConvolution>(input1);
auto can_be_fused = [](const std::shared_ptr<ov::Node>& target, const std::shared_ptr<ov::Node>& fused_input) {
return (target && fused_input && (get_node_id(target) > get_node_id(fused_input) || ov::op::util::is_constant(fused_input)));
};
if (gna_conv0 && gna_conv1) {
if (can_be_fused(gna_conv0, input1)) {
return {gna_conv0, input1};
} else if (can_be_fused(gna_conv1, input0)) {
return {gna_conv1, input0};
}
}
if (gna_conv0 && can_be_fused(gna_conv0, input1)) {
return {gna_conv0, input1};
}
if (gna_conv1 && can_be_fused(gna_conv1, input0)) {
return {gna_conv1, input0};
}
return {nullptr, nullptr};
}
static bool sink_add_to_gna_convolution(Matcher &m) {
auto add = std::dynamic_pointer_cast<Add>(m.get_match_root());
auto gna_conv_node_pair = parse_gna_conv_inputs(m.get_match_root());
auto gna_conv = gna_conv_node_pair.first;
auto node = gna_conv_node_pair.second;
if (!gna_conv || !node) {
return false;
}
if (gna_conv->has_add_node() || gna_conv->get_activation() != ActivationType::NO_ACTIVATION) {
return false;
}
const ov::Output<ov::Node> &data = gna_conv->input(0).get_source_output();
const ov::Output<ov::Node> &filters = gna_conv->input(1).get_source_output();
const ov::Output<ov::Node> &bias = gna_conv->input(2).get_source_output();
auto gna_conv_add = std::make_shared<GNAConvolution>(data,
filters,
bias,
gna_conv->get_strides(),
gna_conv->get_pads_begin(),
gna_conv->get_pads_end(),
gna_conv->get_dilations(),
gna_conv->get_auto_pad());
ov::Output<ov::Node> gna_conv_add_output{gna_conv_add};
gna_conv_add->set_friendly_name(add->get_friendly_name());
ov::copy_runtime_info({node, gna_conv}, gna_conv_add);
set_node_id(gna_conv_add, get_node_id(add));
auto &rt_info = gna_conv->get_rt_info();
if (rt_info.count(ExecGraphInfoSerialization::ORIGINAL_NAMES) > 0) {
auto &rt_info_layer_names = rt_info[ExecGraphInfoSerialization::ORIGINAL_NAMES];
const auto original_names = rt_info_layer_names.template as<std::string>();
const std::string original_names_with_activation = add->get_friendly_name() + "," + original_names;
rt_info_layer_names = original_names_with_activation;
}
ov::replace_node(gna_conv, gna_conv_add);
ov::replace_node(m.get_match_root(), gna_conv_add);
return true;
}
static bool sink_activation_to_gna_convolution(Matcher &m) {
auto activation_node = m.get_match_root();
auto gna_conv = std::dynamic_pointer_cast<GNAConvolution>(
activation_node->input(0).get_source_output().get_node_shared_ptr());
if (gna_conv->get_activation() != ActivationType::NO_ACTIVATION) {
return false;
}
ActivationType activation = ActivationType::NO_ACTIVATION;
if (ov::is_type<Relu>(activation_node)) {
activation = ActivationType::RELU;
} else if (ov::is_type<Sigmoid>(activation_node)) {
activation = ActivationType::SIGMOID;
} else if (ov::is_type<Tanh>(activation_node)) {
activation = ActivationType::TANH;
} else if (ov::is_type<Log>(activation_node)) {
activation = ActivationType::LOG;
} else if (ov::is_type<Abs>(activation_node)) {
activation = ActivationType::ABS;
} else if (ov::is_type<Sign>(activation_node)) {
activation = ActivationType::SIGN;
} else if (ov::is_type<Clamp>(activation_node)) {
activation = ActivationType::CLAMP;
} else {
return false;
}
gna_conv->set_activation(activation);
gna_conv->set_friendly_name(activation_node->get_friendly_name());
set_node_id(gna_conv, get_node_id(activation_node));
auto &rt_info = gna_conv->get_rt_info();
if (rt_info.count(ExecGraphInfoSerialization::ORIGINAL_NAMES) > 0) {
auto &rt_info_layer_names = rt_info[ExecGraphInfoSerialization::ORIGINAL_NAMES];
const auto original_names = rt_info_layer_names.template as<std::string>();
const std::string original_names_with_activation =
activation_node->get_friendly_name() + "," + original_names;
rt_info_layer_names = original_names_with_activation;
}
ov::replace_node(m.get_match_root(), gna_conv);
return true;
}
}; // struct GnaConvCallbacks
bool is_bias_to_be_fused(const ov::Output<ov::Node>& output) {
constexpr auto conv_bias_rank_min{3};
constexpr auto conv_bias_rank_max{5};
auto node = std::dynamic_pointer_cast<Add>(output.get_node_shared_ptr());
if (!node) {
return false;
}
auto input0 = node->input(0);
auto input1 = node->input(1);
const auto partial_shape0 = node->input(0).get_partial_shape();
const auto partial_shape1 = node->input(1).get_partial_shape();
if (partial_shape0.is_dynamic() || partial_shape1.is_dynamic()) {
return false;
}
if (node->get_autob() != ov::op::AutoBroadcastType::NUMPY) {
return false;
}
if (input0.get_element_type() != input1.get_element_type()) {
return false;
}
const auto conv_shape = partial_shape0.to_shape();
const auto bias_shape = partial_shape1.to_shape();
const auto bias_rank = bias_shape.size();
if (bias_rank < conv_bias_rank_min || bias_rank > conv_bias_rank_max) {
return false;
}
// NHWC or HWC
size_t bias_channel_index = bias_shape.size() - 1;
size_t conv_channel_index = conv_shape.size() - 1;
if (bias_shape.at(bias_channel_index) != conv_shape.at(conv_channel_index) && bias_shape.at(bias_channel_index) != 1 ) {
return false;
}
for (size_t i = 0; i < bias_shape.size(); i++) {
if ((i != bias_channel_index) && (bias_shape.at(i) != 1)) return false;
}
return true;
}
bool is_add_to_be_fused(const ov::Output<ov::Node>& output) {
auto node = std::dynamic_pointer_cast<Add>(output.get_node_shared_ptr());
if (!node) {
return false;
}
auto input0 = node->input(0);
auto input1 = node->input(1);
const auto partial_shape0 = node->input(0).get_partial_shape();
const auto partial_shape1 = node->input(1).get_partial_shape();
if (input0.get_element_type() != input1.get_element_type()) {
return false;
}
if (partial_shape0.is_dynamic() || partial_shape1.is_dynamic()) {
return false;
}
return (partial_shape0.to_shape() == partial_shape1.to_shape());
}
} // namespace
bool ov::intel_gna::pass::GnaFuseMarkUpNodesOrder::run_on_model(const std::shared_ptr<ov::Model>& m) {
RUN_ON_FUNCTION_SCOPE(GnaFuseMarkUpNodesOrder);
uint64_t id = 0;
for (auto& node : m->get_ordered_ops()) {
set_node_id(node, id++);
}
return false;
}
bool ov::intel_gna::pass::GnaFuseCleanUpNodesOrder::run_on_model(const std::shared_ptr<ov::Model>& m) {
RUN_ON_FUNCTION_SCOPE(GnaFuseCleanUpNodesOrder);
for (auto& node : m->get_ordered_ops()) {
remove_node_id(node);
}
return false;
}
ov::intel_gna::pass::FuseConvolutionWithBiasAdd::FuseConvolutionWithBiasAdd() {
MATCHER_SCOPE(FuseConvolutionWithBiasAdd);
auto conv = wrap_type<GNAConvolution>(consumers_count(1));
auto bias = wrap_type<Constant>();
auto add = wrap_type<Add>({conv, bias}, is_bias_to_be_fused);
matcher_pass_callback callback = [](Matcher &m) {
return GnaConvCallbacks::gna_convolution_with_biasadd(m);
};
auto m = std::make_shared<Matcher>(add, matcher_name);
register_matcher(m, callback);
}
ov::intel_gna::pass::FuseConvolutionWithBiasAddAdd::FuseConvolutionWithBiasAddAdd() {
MATCHER_SCOPE(FuseConvolutionWithBiasAddAdd);
auto gna_convolution = wrap_type<GNAConvolution>(consumers_count(1));
auto add1 = wrap_type<Add>({gna_convolution, any_input()}, is_add_to_be_fused);
auto add2 = wrap_type<Add>({any_input(), gna_convolution}, is_add_to_be_fused);
auto add = std::make_shared<::op::Or>(ov::OutputVector{ add1, add2 });
matcher_pass_callback callback = [](Matcher &m) {
return GnaConvCallbacks::sink_add_to_gna_convolution(m);
};
auto m = std::make_shared<Matcher>(add, matcher_name);
register_matcher(m, callback);
}
ov::intel_gna::pass::SinkActivationToGnaConvolution::SinkActivationToGnaConvolution() {
MATCHER_SCOPE(SinkActivationToGnaConvolution);
auto gna_convolution = wrap_type<GNAConvolution>(consumers_count(1));
auto activation = wrap_type<Relu,
Sigmoid,
Tanh,
Abs,
Log,
Clamp,
Sign>({gna_convolution});
matcher_pass_callback callback = [](Matcher &m) {
return GnaConvCallbacks::sink_activation_to_gna_convolution(m);
};
auto m = std::make_shared<Matcher>(activation, matcher_name);
register_matcher(m, callback);
}
bool ov::intel_gna::pass::GnaConvolutionFusion::run_on_model(const std::shared_ptr<ov::Model>& m) {
RUN_ON_FUNCTION_SCOPE(GnaConvolutionFusion);
ov::pass::Manager manager(get_pass_config());
manager.register_pass<GnaFuseMarkUpNodesOrder>();
auto fuse_conv_bias_add_activation = manager.register_pass<ov::pass::GraphRewrite>();
ADD_MATCHER(fuse_conv_bias_add_activation, FuseConvolutionWithBiasAdd)
ADD_MATCHER(fuse_conv_bias_add_activation, FuseConvolutionWithBiasAddAdd)
ADD_MATCHER(fuse_conv_bias_add_activation, SinkActivationToGnaConvolution)
fuse_conv_bias_add_activation->set_name("ov::intel_gna::pass::fuse_conv_bias_add_activation");
manager.register_pass<GnaFuseCleanUpNodesOrder>();
manager.run_passes(m);
return false;
}

View File

@ -0,0 +1,58 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/node.hpp>
#include "openvino/pass/graph_rewrite.hpp"
namespace ov {
namespace intel_gna {
namespace pass {
class GnaFuseMarkUpNodesOrder : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("GnaFuseMarkUpNodesOrder", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
};
class GnaFuseCleanUpNodesOrder : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("GnaFuseCleanUpNodesOrder", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
};
class FuseConvolutionWithBiasAdd : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("FuseConvolutionWithBiasAdd", "0");
FuseConvolutionWithBiasAdd();
};
class FuseGroupConvolutionWithBiasAdd : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("FuseGroupConvolutionWithBiasAdd", "0");
FuseGroupConvolutionWithBiasAdd();
};
class FuseConvolutionWithBiasAddAdd : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("FuseConvolutionWithBiasAddAdd", "0");
FuseConvolutionWithBiasAddAdd();
};
class SinkActivationToGnaConvolution : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("SinkActivationToGnaConvolution", "0");
SinkActivationToGnaConvolution();
};
class GnaConvolutionFusion : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("GnaConvolutionFusion", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
};
} // namespace pass
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,20 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_node_id.hpp"
void ov::intel_gna::rt_info::set_node_id(const std::shared_ptr<Node>& node, uint64_t id) {
auto& rt_info = node->get_rt_info();
rt_info[GnaNodeId::get_type_info_static()] = id;
}
void ov::intel_gna::rt_info::remove_node_id(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info.erase(GnaNodeId::get_type_info_static());
}
uint64_t ov::intel_gna::rt_info::get_node_id(const std::shared_ptr<Node>& node) {
const auto& rt_info = node->get_rt_info();
return rt_info.at(GnaNodeId::get_type_info_static()).as<uint64_t>();
}

View File

@ -0,0 +1,37 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "openvino/core/node.hpp"
#include "openvino/core/runtime_attribute.hpp"
namespace ov {
namespace intel_gna {
namespace rt_info {
void set_node_id(const std::shared_ptr<Node>& node, uint64_t id);
void remove_node_id(const std::shared_ptr<Node>& node);
uint64_t get_node_id(const std::shared_ptr<Node>& node);
/**
* @ingroup ie_runtime_attr_api
* @brief GnaNodeId class represents runtime info attribute that marks operation
* with order id
*/
class GnaNodeId : public RuntimeAttribute {
public:
OPENVINO_RTTI("gna_node_id", "0");
GnaNodeId() = default;
bool is_copyable() const override {
return false;
}
};
} // namespace rt_info
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,235 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/transpose_nchw.hpp"
#include "transformations/utils/transformation_helper.hpp"
#include <ngraph/opsets/opset8.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <transformations/utils/utils.hpp>
#include <ops/gna_convolution.hpp>
#include <ops/gna_max_pool.hpp>
#include <ngraph/pass/manager.hpp>
#include <vector>
NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::TransposeNCHW, "TransposeNCHW", 0);
NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::SubstituteGNAConvolution, "SubstituteGNAConvolution", 0);
NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::SubstituteGNAMaxPool, "SubstituteGNAMaxPool", 0);
using Node = std::shared_ptr<ngraph::Node>;
namespace {
ngraph::Shape MakeTransposeOrderNCHW2NHWC(size_t shape_size);
ngraph::Shape MakeTransposeOrderNHWC2NCHW(size_t shape_size);
/* transpose orders
before convolution convert NCHW -> NHWC
3D: NCX {0, 1, 2} -> NXC {0, 2, 1}
4D: NCHW {0, 1, 2, 3} -> NHWC {0, 2, 3, 1}
5D: NCZYX {0, 1, 2, 3, 4} -> NZYXC {0, 2, 3, 4, 1}
after convolution convert NHWC -> NCHW
3D: NXC {0, 1, 2} -> NCX {0, 2, 1}
4D: NHWC {0, 1, 2, 3} -> NCHW {0, 3, 1, 2}
5D: NZYXC {0, 1, 2, 3} -> NCZYX {0, 4, 1, 2, 3}
so just
1) temp = A[N - 1]
2) move A[j] -> A[j + 1] for 1 <= j <= N - 2
3) A[1] = temp
*/
ngraph::Shape MakeTransposeOrderNCHW2NHWC(size_t shape_size) {
ngraph::Shape shape(shape_size);
std::iota(shape.begin(), shape.end(), 0);
for (int i = 1; i < shape.size() - 1; ++i)
shape[i] = shape[i + 1];
*(shape.end() - 1) = 1;
return shape;
}
ngraph::Shape MakeTransposeOrderNHWC2NCHW(size_t shape_size) {
ngraph::Shape shape(shape_size);
std::iota(shape.begin(), shape.end(), 0);
const size_t channels_position = *(shape.end() - 1);
for (int i = shape.size() - 1; i > 0; --i)
shape[i] = shape[i - 1];
shape[1] = channels_position;
return shape;
}
template <typename T>
bool HasParentNode(Node node) {
for (const auto& parent : node->input_values()) {
if (dynamic_cast<const T*>(parent.get_node()))
return true;
}
return false;
}
template <typename T>
bool HasChildNode(Node node) {
for (size_t output_idx = 0; output_idx < node->get_output_size(); ++output_idx) {
for (auto& input : node->get_output_target_inputs(output_idx)) {
if (dynamic_cast<const T*>(input.get_node()))
return true;
}
}
return false;
}
} // namespace
namespace SubstituteGNAConvolutionNS {
bool DoTransformation(Node convolution);
bool DoTransformation(Node convolution) {
auto convolution_node = std::dynamic_pointer_cast<ngraph::opset8::Convolution>(convolution);
auto convolution_input_data_node = convolution_node->input_value(0);
auto convolution_input_const_node = convolution_node->input_value(1);
const ngraph::Shape convolution_input_shape = convolution_node->get_input_shape(0);
// TODO: check input_data_node is not Reshape since that pattern should be matched in another transformation
if (convolution_input_shape.size() != 3 && convolution_input_shape.size() != 4) {
std::cout << "TransposeNCHW: unsupported convolution size " << convolution_input_shape.size() << std::endl;
return false;
}
const ngraph::Shape transpose_before_order = MakeTransposeOrderNCHW2NHWC(convolution_input_shape.size());
auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64,
ngraph::Shape{transpose_before_order.size()},
transpose_before_order);
auto transpose_before = std::make_shared<ngraph::opset8::Transpose>(convolution_input_data_node,
transpose_const);
auto transpose_conv_constant = std::make_shared<ngraph::opset8::Transpose>(convolution_input_const_node,
transpose_const);
auto conv_new = std::make_shared<ov::intel_gna::op::GNAConvolution>(transpose_before,
transpose_conv_constant,
convolution_node->get_strides(),
convolution_node->get_pads_begin(),
convolution_node->get_pads_end(),
convolution_node->get_dilations(),
convolution_node->get_auto_pad());
const ngraph::Shape transpose_after_order = MakeTransposeOrderNHWC2NCHW(conv_new->get_output_shape(0).size());
auto transpose_after = std::make_shared<ngraph::opset8::Transpose>(conv_new,
ngraph::opset8::Constant::create(ngraph::element::i64,
ngraph::Shape{transpose_after_order.size()},
transpose_after_order));
ov::copy_runtime_info(convolution_node, {transpose_before, transpose_const, conv_new, transpose_after});
ov::replace_output_update_name(convolution->output(0), transpose_after->output(0));
return true;
}
} // namespace SubstituteGNAConvolutionNS
namespace SubstituteGNAMaxPoolNS {
bool DoTransformation(Node convolution);
bool DoTransformation(Node max_pool) {
auto max_pool_node = std::dynamic_pointer_cast<ov::op::v1::MaxPool>(max_pool);
auto max_pool_input_data_node = max_pool_node->input_value(0);
const ngraph::Shape max_pool_input_shape = max_pool_node->get_input_shape(0);
const ngraph::Shape transpose_before_order = MakeTransposeOrderNCHW2NHWC(max_pool_input_shape.size());
auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64,
ngraph::Shape{transpose_before_order.size()},
transpose_before_order);
auto transpose_before = std::make_shared<ngraph::opset8::Transpose>(max_pool_input_data_node,
transpose_const);
auto max_pool_new = std::make_shared<ov::intel_gna::op::GNAMaxPool>(transpose_before,
max_pool_node->get_strides(),
max_pool_node->get_pads_begin(),
max_pool_node->get_pads_end(),
max_pool_node->get_kernel(),
max_pool_node->get_rounding_type(),
max_pool_node->get_auto_pad());
const ngraph::Shape transpose_after_order = MakeTransposeOrderNHWC2NCHW(max_pool_new->get_output_shape(0).size());
auto transpose_after = std::make_shared<ngraph::opset8::Transpose>(max_pool_new,
ngraph::opset8::Constant::create(ngraph::element::i64,
ngraph::Shape{transpose_after_order.size()},
transpose_after_order));
ov::copy_runtime_info(max_pool_node, {transpose_before, transpose_const, max_pool_new, transpose_after});
ov::replace_output_update_name(max_pool->output(0), transpose_after->output(0));
return true;
}
} // namespace SubstituteGNAMaxPoolNS
// ----------------------------------------------------------------------------
ov::intel_gna::pass::SubstituteGNAConvolution::SubstituteGNAConvolution() {
MATCHER_SCOPE(SubstituteGNAConvolution);
auto convolution = ngraph::pattern::wrap_type<ngraph::opset8::Convolution>();
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
auto convolution_node = std::dynamic_pointer_cast<ngraph::opset8::Convolution>(m.get_match_root());
if (!convolution_node) {
return false;
}
return SubstituteGNAConvolutionNS::DoTransformation(convolution_node);
};
auto m = std::make_shared<ngraph::pattern::Matcher>(convolution, matcher_name);
this->register_matcher(m, callback);
}
ov::intel_gna::pass::SubstituteGNAMaxPool::SubstituteGNAMaxPool() {
MATCHER_SCOPE(SubstituteGNAMaxPool);
auto max_pool = ngraph::pattern::wrap_type<ov::op::v1::MaxPool>();
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
auto max_pool_node = std::dynamic_pointer_cast<ov::op::v1::MaxPool>(m.get_match_root());
if (!max_pool_node) {
return false;
}
return SubstituteGNAMaxPoolNS::DoTransformation(max_pool_node);
};
auto m = std::make_shared<ngraph::pattern::Matcher>(max_pool, matcher_name);
this->register_matcher(m, callback);
}
bool ov::intel_gna::pass::TransposeNCHW::run_on_model(const std::shared_ptr<ngraph::Function>& function) {
RUN_ON_FUNCTION_SCOPE(TransposeNCHW);
ngraph::pass::Manager manager(get_pass_config());
manager.register_pass<ov::intel_gna::pass::SubstituteGNAConvolution>();
manager.register_pass<ov::intel_gna::pass::SubstituteGNAMaxPool>();
manager.run_passes(function);
return false; // FIXME: should we return true here?
}

View File

@ -0,0 +1,36 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
namespace ov {
namespace intel_gna {
namespace pass {
/**
* @brief TODO
*/
class SubstituteGNAConvolution : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
SubstituteGNAConvolution();
};
class SubstituteGNAMaxPool : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
SubstituteGNAMaxPool();
};
class TransposeNCHW : public ngraph::pass::FunctionPass {
public:
NGRAPH_RTTI_DECLARATION;
bool run_on_model(const std::shared_ptr<ngraph::Function>& f) override;
};
} // namespace pass
} // namespace intel_gna
} // namespace ov