Merge branch 'gna_layout_debug' of https://github.com/evkotov/openvino into gna_layout_debug
This commit is contained in:
commit
04a715267a
@ -30,7 +30,7 @@ TSSliceForward::TSSliceForward() {
|
||||
|
||||
auto& main_node = pattern_to_node.at(main_node_label);
|
||||
auto transpose = std::dynamic_pointer_cast<Transpose>(pattern_to_node.at(transpose_label));
|
||||
if (!transpose) {
|
||||
if (!transpose || main_node->get_input_size() < 5) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -40,14 +40,17 @@ TSSliceForward::TSSliceForward() {
|
||||
}
|
||||
|
||||
// remove Transpose on 1st input:
|
||||
auto transpose_parent = main_node->input_value(0).get_node()->input_value(0);
|
||||
auto transpose_parent = transpose->input_value(0);
|
||||
main_node->input(0).replace_source_output(transpose_parent);
|
||||
|
||||
const auto transpose_axis_order = transpose_const->get_axis_vector_val();
|
||||
auto axis = std::make_shared<Constant>(element::i32, Shape{}, std::vector<int32_t>{0});
|
||||
|
||||
main_node->input(4).replace_source_output(
|
||||
ChangeValuesOrder(main_node->input_value(4), transpose_axis_order, axis));
|
||||
auto data = std::make_shared<Constant>(element::i32, Shape{transpose_axis_order.size()}, transpose_axis_order);
|
||||
const auto& indices = main_node->input_value(4);
|
||||
auto new_axis = std::make_shared<Gather>(data, indices, axis);
|
||||
|
||||
main_node->input(4).replace_source_output(new_axis);
|
||||
|
||||
main_node->validate_and_infer_types();
|
||||
TransposeInputsInfo transpose_input_info = {transpose, transpose_const, 0};
|
||||
@ -82,19 +85,28 @@ TSSliceBackward::TSSliceBackward() {
|
||||
auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
|
||||
auto main_node = pattern_to_output.at(main_node_label).get_node_shared_ptr();
|
||||
|
||||
if (main_node->get_input_size() < 5) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto& new_node : sink_backward::InsertTransposeBeforeNode(main_node,
|
||||
transpose_const,
|
||||
/* input_indexes= */ {0})) {
|
||||
register_new_node(new_node);
|
||||
}
|
||||
|
||||
// remove output transposes
|
||||
RemoveSingleOutputConsumers(main_node);
|
||||
SwapNames(main_node, transpose);
|
||||
const auto transpose_axis_order = transpose_const->get_axis_vector_val();
|
||||
const auto reversed_transpose_order = ReverseTransposeOrder(transpose_axis_order);
|
||||
auto axis = std::make_shared<Constant>(element::i32, Shape{}, std::vector<int32_t>{0});
|
||||
main_node->input(4).replace_source_output(
|
||||
ChangeValuesOrder(main_node->input_value(4), reversed_transpose_order, axis));
|
||||
auto data =
|
||||
std::make_shared<Constant>(element::i32, Shape{reversed_transpose_order.size()}, reversed_transpose_order);
|
||||
const auto& indices = main_node->input_value(4);
|
||||
auto new_axis = std::make_shared<Gather>(data, indices, axis);
|
||||
main_node->input(4).replace_source_output(new_axis);
|
||||
|
||||
main_node->validate_and_infer_types();
|
||||
return true;
|
||||
};
|
||||
|
@ -23,6 +23,14 @@ using namespace ov::pass::transpose_sinking::utils;
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* @brief Checks that Reshape operation is equal to Squeeze:
|
||||
* Only 1 dims are deleted, all other dims must be the same.
|
||||
* Converts these 1 dims to axes format.
|
||||
* @arg reshape Reshape operation.
|
||||
* @arg reshape_to_shape 2nd input to Reshape op as a constant.
|
||||
* @arg result_axes Contains axes which will be squeezed.
|
||||
*/
|
||||
bool shape_to_squeeze_axes(const std::shared_ptr<Node>& reshape,
|
||||
const std::shared_ptr<Constant>& reshape_to_shape,
|
||||
std::vector<size_t>& result_axes) {
|
||||
@ -61,10 +69,23 @@ bool shape_to_squeeze_axes(const std::shared_ptr<Node>& reshape,
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<size_t> squeeze_axes_to_shape(const Output<Node>& input_node, std::vector<size_t> squeeze_axes) {
|
||||
std::vector<size_t> to_shape;
|
||||
/**
|
||||
* @brief Converts squeezed_axes to actual shape (2nd input) for Reshape operation
|
||||
* using the shape of the 1st input to Reshape.
|
||||
* @arg input_node 1st input to Reshape op.
|
||||
* @arg squeeze_axes In case of Reshape op is equal to squeeze, these axes indicate the places where 1 dims have
|
||||
* to be deleted.
|
||||
*/
|
||||
bool squeeze_axes_to_shape(const Output<Node>& input_node,
|
||||
std::vector<size_t> squeeze_axes,
|
||||
std::vector<size_t>& to_shape) {
|
||||
to_shape.clear();
|
||||
std::sort(squeeze_axes.begin(), squeeze_axes.end());
|
||||
const auto& input_shape = input_node.get_shape(); // check is static
|
||||
const auto& input_pshape = input_node.get_partial_shape();
|
||||
if (input_pshape.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
const auto& input_shape = input_pshape.get_shape();
|
||||
for (size_t i = 0, j = 0; i < input_shape.size(); ++i) {
|
||||
if (j < squeeze_axes.size() && i == squeeze_axes[j]) {
|
||||
++j;
|
||||
@ -72,7 +93,7 @@ std::vector<size_t> squeeze_axes_to_shape(const Output<Node>& input_node, std::v
|
||||
}
|
||||
to_shape.push_back(input_shape[i]);
|
||||
}
|
||||
return to_shape;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -109,7 +130,7 @@ TSSqueezeForward::TSSqueezeForward() {
|
||||
|
||||
// if 2nd input to squeeze is empty then all '1' dims will be deleted.
|
||||
if (non_negative_axes.empty()) {
|
||||
auto input_pshape = transpose->input_value(0).get_partial_shape();
|
||||
auto input_pshape = transpose->output(0).get_partial_shape();
|
||||
if (input_pshape.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
@ -133,10 +154,15 @@ TSSqueezeForward::TSSqueezeForward() {
|
||||
transpose_order_values);
|
||||
|
||||
if (as_type_ptr<Reshape>(squeeze)) {
|
||||
new_values = squeeze_axes_to_shape(transpose->input_value(0), new_values);
|
||||
std::vector<size_t> to_shape;
|
||||
auto success = squeeze_axes_to_shape(transpose->input_value(0), new_values, to_shape);
|
||||
if (!success) {
|
||||
return false;
|
||||
}
|
||||
new_values = to_shape;
|
||||
}
|
||||
|
||||
auto new_const = Constant::create(squeeze_axes->get_element_type(), squeeze_axes->get_shape(), new_values);
|
||||
auto new_const = Constant::create(squeeze_axes->get_element_type(), {new_values.size()}, new_values);
|
||||
auto new_squeeze = squeeze->clone_with_new_inputs({transpose->input_value(0), new_const});
|
||||
auto new_transpose = transpose->clone_with_new_inputs({new_squeeze, new_transpose_order});
|
||||
|
||||
@ -215,7 +241,12 @@ TSSqueezeBackward::TSSqueezeBackward() {
|
||||
transpose_order_values);
|
||||
auto new_transpose = transpose->clone_with_new_inputs({squeeze->input_value(0), new_transpose_order});
|
||||
if (as_type_ptr<Reshape>(squeeze)) {
|
||||
new_values = squeeze_axes_to_shape(new_transpose->output(0), new_values);
|
||||
std::vector<size_t> to_shape;
|
||||
auto success = squeeze_axes_to_shape(new_transpose->output(0), new_values, to_shape);
|
||||
if (!success) {
|
||||
return false;
|
||||
}
|
||||
new_values = to_shape;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> new_squeeze;
|
||||
|
@ -23,6 +23,14 @@ using namespace ov::pass::transpose_sinking::utils;
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* @brief Checks that Reshape operation is equal to Unsqueeze:
|
||||
* Only 1 dims are inserted, all other dims must be the same.
|
||||
* Converts these 1 dims to axes format.
|
||||
* @arg reshape Reshape operation.
|
||||
* @arg reshape_to_shape 2nd input to Reshape op as a constant.
|
||||
* @arg result_axes contains axes which will be unsqueezed.
|
||||
*/
|
||||
bool shape_to_unsqueeze_axes(const std::shared_ptr<Node>& reshape,
|
||||
const std::shared_ptr<Constant>& reshape_to_shape,
|
||||
std::vector<size_t>& result_axes) {
|
||||
@ -60,9 +68,23 @@ bool shape_to_unsqueeze_axes(const std::shared_ptr<Node>& reshape,
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<size_t> unsqueeze_axes_to_shape(const Output<Node>& input_node, std::vector<size_t> unsqueeze_axes) {
|
||||
const auto& input_shape = input_node.get_shape(); // check is static
|
||||
std::vector<size_t> to_shape(input_shape.size() + unsqueeze_axes.size());
|
||||
/**
|
||||
* @brief Converts unsqueeze_axes to actual shape (2nd input) for Reshape operation
|
||||
* using the shape of the 1st input to Reshape.
|
||||
* @arg input_node 1st input to Reshape op.
|
||||
* @arg unsqueeze_axes In case of Reshape op is equal to Unsqueeze, these axes indicate the places where 1 dims have
|
||||
* to be inserted.
|
||||
*/
|
||||
bool unsqueeze_axes_to_shape(const Output<Node>& input_node,
|
||||
std::vector<size_t> unsqueeze_axes,
|
||||
std::vector<size_t>& to_shape) {
|
||||
to_shape.clear();
|
||||
const auto& input_pshape = input_node.get_partial_shape();
|
||||
if (input_pshape.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
const auto& input_shape = input_pshape.get_shape();
|
||||
to_shape.resize(input_shape.size() + unsqueeze_axes.size());
|
||||
std::sort(unsqueeze_axes.begin(), unsqueeze_axes.end());
|
||||
for (size_t i = 0, j = 0, k = 0; i < to_shape.size(); ++i) {
|
||||
if (j < unsqueeze_axes.size() && i == unsqueeze_axes[j]) {
|
||||
@ -73,7 +95,7 @@ std::vector<size_t> unsqueeze_axes_to_shape(const Output<Node>& input_node, std:
|
||||
k++;
|
||||
}
|
||||
}
|
||||
return to_shape;
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@ -114,7 +136,11 @@ TSUnsqueezeForward::TSUnsqueezeForward() {
|
||||
|
||||
std::shared_ptr<Node> new_unsqueeze;
|
||||
if (as_type_ptr<Reshape>(unsqueeze)) {
|
||||
auto new_values = unsqueeze_axes_to_shape(transpose->input_value(0), non_negative_axes);
|
||||
std::vector<size_t> new_values;
|
||||
auto success = unsqueeze_axes_to_shape(transpose->input_value(0), non_negative_axes, new_values);
|
||||
if (!success) {
|
||||
return false;
|
||||
}
|
||||
auto new_const = Constant::create(unsqueeze_axes->get_element_type(), {new_values.size()}, new_values);
|
||||
new_unsqueeze = unsqueeze->clone_with_new_inputs({transpose->input_value(0), new_const});
|
||||
} else {
|
||||
@ -194,7 +220,12 @@ TSUnsqueezeBackward::TSUnsqueezeBackward() {
|
||||
|
||||
auto new_transpose = transpose->clone_with_new_inputs({unsqueeze->input_value(0), new_transpose_order});
|
||||
if (as_type_ptr<Reshape>(unsqueeze)) {
|
||||
new_values = unsqueeze_axes_to_shape(new_transpose->output(0), new_values);
|
||||
std::vector<size_t> to_shape;
|
||||
auto success = unsqueeze_axes_to_shape(new_transpose->output(0), new_values, to_shape);
|
||||
if (!success) {
|
||||
return false;
|
||||
}
|
||||
new_values = to_shape;
|
||||
}
|
||||
auto new_const = Constant::create(unsqueeze_axes->get_element_type(), unsqueeze_axes->get_shape(), new_values);
|
||||
auto new_unsqueeze = unsqueeze->clone_with_new_inputs({new_transpose, new_const});
|
||||
|
@ -862,6 +862,15 @@ auto test_forward_reshape_unsqueeze = []() {
|
||||
new_out_vec[0] = make_shared<Transpose>(out_vec[0], order);
|
||||
return new_out_vec;
|
||||
};
|
||||
auto new_constant = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
|
||||
OutputVector new_out_vec(out_vec.size());
|
||||
new_out_vec[0] = out_vec[0];
|
||||
new_out_vec[1] = make_shared<Constant>(out_vec[1].get_element_type(),
|
||||
out_vec[1].get_shape(),
|
||||
std::vector<int64_t>{6, 1, 5, 1, 4});
|
||||
return new_out_vec;
|
||||
};
|
||||
test_case.model_ref.preprocess_inputs_to_main = {{new_constant}, {{1}}};
|
||||
test_case.model_ref.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
|
||||
test_case.model_ref.preprocess_outputs_of_main = {{new_transpose}, {{0}}};
|
||||
test_case.model_ref.model_template = create_model;
|
||||
@ -1283,7 +1292,14 @@ auto test_backward_reshape_squeeze = []() {
|
||||
new_out_vec[1] = out_vec[1];
|
||||
return new_out_vec;
|
||||
};
|
||||
test_case.model_ref.preprocess_inputs_to_main = {{new_transpose}, {{0}}};
|
||||
auto new_constant = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
|
||||
OutputVector new_out_vec(out_vec.size());
|
||||
new_out_vec[0] = out_vec[0];
|
||||
new_out_vec[1] =
|
||||
make_shared<Constant>(out_vec[1].get_element_type(), out_vec[1].get_shape(), std::vector<int64_t>{6, 5, 4});
|
||||
return new_out_vec;
|
||||
};
|
||||
test_case.model_ref.preprocess_inputs_to_main = {{new_transpose, new_constant}, {{0}, {1}}};
|
||||
test_case.model_ref.main_op = {CREATE_RESHAPE_FACTORY(Reshape)};
|
||||
test_case.model_ref.model_template = create_model;
|
||||
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
#include "op/hardmax.hpp"
|
||||
|
||||
#include <openvino/opsets/opset11.hpp>
|
||||
|
||||
#include "exceptions.hpp"
|
||||
#include "ngraph/builder/reshape.hpp"
|
||||
#include "ngraph/op/one_hot.hpp"
|
||||
@ -37,11 +39,11 @@ OutputVector hardmax(const Node& node) {
|
||||
|
||||
const auto indices_axis = 1;
|
||||
const auto topk =
|
||||
std::make_shared<default_opset::TopK>(coerced_tensor,
|
||||
default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
|
||||
indices_axis,
|
||||
default_opset::TopK::Mode::MAX,
|
||||
default_opset::TopK::SortType::NONE);
|
||||
std::make_shared<ov::opset11::TopK>(coerced_tensor,
|
||||
default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
|
||||
indices_axis,
|
||||
ov::opset11::TopK::Mode::MAX,
|
||||
ov::opset11::TopK::SortType::NONE);
|
||||
|
||||
const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
|
||||
const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
|
||||
@ -71,11 +73,11 @@ OutputVector hardmax(const Node& node) {
|
||||
row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size);
|
||||
|
||||
const auto topk =
|
||||
std::make_shared<default_opset::TopK>(input,
|
||||
default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
|
||||
axis,
|
||||
default_opset::TopK::Mode::MAX,
|
||||
default_opset::TopK::SortType::NONE);
|
||||
std::make_shared<ov::opset11::TopK>(input,
|
||||
default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
|
||||
axis,
|
||||
ov::opset11::TopK::Mode::MAX,
|
||||
ov::opset11::TopK::SortType::NONE);
|
||||
|
||||
const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
|
||||
const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <openvino/opsets/opset11.hpp>
|
||||
|
||||
#include "default_opset.hpp"
|
||||
#include "ngraph/node.hpp"
|
||||
@ -37,13 +38,12 @@ OutputVector topk(const Node& node) {
|
||||
const auto k_node = node.get_attribute_as_constant<std::int64_t>("k");
|
||||
const std::int64_t axis{node.get_attribute_value<std::int64_t>("axis", -1)};
|
||||
|
||||
std::shared_ptr<ngraph::Node> top_k =
|
||||
std::make_shared<default_opset::TopK>(data,
|
||||
k_node,
|
||||
axis,
|
||||
default_opset::TopK::Mode::MAX,
|
||||
default_opset::TopK::SortType::SORT_VALUES,
|
||||
element::i64);
|
||||
std::shared_ptr<ngraph::Node> top_k = std::make_shared<ov::opset11::TopK>(data,
|
||||
k_node,
|
||||
axis,
|
||||
ov::opset11::TopK::Mode::MAX,
|
||||
ov::opset11::TopK::SortType::SORT_VALUES,
|
||||
element::i64);
|
||||
|
||||
return {top_k->output(0), top_k->output(1)};
|
||||
}
|
||||
@ -55,13 +55,12 @@ OutputVector topk(const Node& node) {
|
||||
auto k = get_k(node);
|
||||
const std::int64_t axis{node.get_attribute_value<std::int64_t>("axis", -1)};
|
||||
|
||||
std::shared_ptr<ngraph::Node> top_k =
|
||||
std::make_shared<default_opset::TopK>(data,
|
||||
k,
|
||||
axis,
|
||||
default_opset::TopK::Mode::MAX,
|
||||
default_opset::TopK::SortType::SORT_VALUES,
|
||||
element::i64);
|
||||
std::shared_ptr<ngraph::Node> top_k = std::make_shared<ov::opset11::TopK>(data,
|
||||
k,
|
||||
axis,
|
||||
ov::opset11::TopK::Mode::MAX,
|
||||
ov::opset11::TopK::SortType::SORT_VALUES,
|
||||
element::i64);
|
||||
|
||||
return {top_k->output(0), top_k->output(1)};
|
||||
}
|
||||
@ -79,13 +78,13 @@ OutputVector topk(const Node& node) {
|
||||
const auto sorted = node.get_attribute_value<std::int64_t>("sorted", 1);
|
||||
|
||||
// Map attribute values to nGraph enums
|
||||
const auto sort_type = sorted ? default_opset::TopK::SortType::SORT_VALUES : default_opset::TopK::SortType::NONE;
|
||||
const auto sort_type = sorted ? ov::opset11::TopK::SortType::SORT_VALUES : ov::opset11::TopK::SortType::NONE;
|
||||
|
||||
const auto compute_max = static_cast<bool>(largest);
|
||||
const auto mode = compute_max ? default_opset::TopK::Mode::MAX : default_opset::TopK::Mode::MIN;
|
||||
const auto mode = compute_max ? ov::opset11::TopK::Mode::MAX : ov::opset11::TopK::Mode::MIN;
|
||||
|
||||
std::shared_ptr<ngraph::Node> top_k =
|
||||
std::make_shared<default_opset::TopK>(data, k, axis, mode, sort_type, element::i64);
|
||||
std::make_shared<ov::opset11::TopK>(data, k, axis, mode, sort_type, element::i64);
|
||||
|
||||
return {top_k->output(0), top_k->output(1)};
|
||||
}
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
#include "utils/arg_min_max_factory.hpp"
|
||||
|
||||
#include <openvino/opsets/opset11.hpp>
|
||||
|
||||
#include "default_opset.hpp"
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
#include "ngraph/validation_util.hpp"
|
||||
@ -18,14 +20,14 @@ ArgMinMaxFactory::ArgMinMaxFactory(const Node& node)
|
||||
m_select_last_index{node.get_attribute_value<std::int64_t>("select_last_index", 0)} {}
|
||||
|
||||
std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_arg_max() const {
|
||||
return make_topk_subgraph(default_opset::TopK::Mode::MAX);
|
||||
return make_topk_subgraph(ov::opset11::TopK::Mode::MAX);
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_arg_min() const {
|
||||
return make_topk_subgraph(default_opset::TopK::Mode::MIN);
|
||||
return make_topk_subgraph(ov::opset11::TopK::Mode::MIN);
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(default_opset::TopK::Mode mode) const {
|
||||
std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(ov::opset11::TopK::Mode mode) const {
|
||||
const auto k_node = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
|
||||
|
||||
if (m_select_last_index == 1) {
|
||||
@ -59,11 +61,11 @@ std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(default_opset
|
||||
const auto axis_node = default_opset::Constant::create(ngraph::element::i64, Shape{1}, {normalized_axis});
|
||||
const auto reverse = std::make_shared<opset1::Reverse>(m_input_node, axis_node, opset1::Reverse::Mode::INDEX);
|
||||
|
||||
const auto topk = std::make_shared<default_opset::TopK>(reverse,
|
||||
k_node,
|
||||
normalized_axis,
|
||||
mode,
|
||||
default_opset::TopK::SortType::NONE);
|
||||
const auto topk = std::make_shared<ov::opset11::TopK>(reverse,
|
||||
k_node,
|
||||
normalized_axis,
|
||||
mode,
|
||||
ov::opset11::TopK::SortType::NONE);
|
||||
|
||||
const auto data_shape = std::make_shared<default_opset::ShapeOf>(m_input_node);
|
||||
const auto dims_on_axis = std::make_shared<default_opset::Gather>(
|
||||
@ -88,7 +90,7 @@ std::shared_ptr<ngraph::Node> ArgMinMaxFactory::make_topk_subgraph(default_opset
|
||||
}
|
||||
|
||||
const auto topk =
|
||||
std::make_shared<default_opset::TopK>(m_input_node, k_node, m_axis, mode, default_opset::TopK::SortType::NONE);
|
||||
std::make_shared<ov::opset11::TopK>(m_input_node, k_node, m_axis, mode, ov::opset11::TopK::SortType::NONE);
|
||||
|
||||
const auto result = std::make_shared<default_opset::Convert>(topk->output(1), element::i64);
|
||||
|
||||
|
@ -139,13 +139,11 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
|
||||
if (val == PluginConfigParams::YES) {
|
||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
|
||||
enforceBF16 = true;
|
||||
manualEnforceBF16 = true;
|
||||
} else {
|
||||
IE_THROW() << "Platform doesn't support BF16 format";
|
||||
}
|
||||
} else if (val == PluginConfigParams::NO) {
|
||||
enforceBF16 = false;
|
||||
manualEnforceBF16 = false;
|
||||
} else {
|
||||
IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
|
||||
<< ". Expected only YES/NO";
|
||||
@ -159,13 +157,11 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
|
||||
if (val == "bf16") {
|
||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
|
||||
enforceBF16 = true;
|
||||
manualEnforceBF16 = true;
|
||||
} else {
|
||||
IE_THROW() << "Platform doesn't support BF16 format";
|
||||
}
|
||||
} else if (val == "f32") {
|
||||
enforceBF16 = false;
|
||||
manualEnforceBF16 = false;
|
||||
} else {
|
||||
IE_THROW() << "Wrong value for property key " << ov::inference_precision.name()
|
||||
<< ". Supported values: bf16, f32";
|
||||
|
@ -52,12 +52,10 @@ struct Config {
|
||||
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
|
||||
LPTransformsMode lpTransformsMode = LPTransformsMode::On;
|
||||
bool enforceBF16 = true;
|
||||
bool manualEnforceBF16 = false;
|
||||
#else
|
||||
// Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode.
|
||||
LPTransformsMode lpTransformsMode = LPTransformsMode::Off;
|
||||
bool enforceBF16 = false;
|
||||
bool manualEnforceBF16 = false;
|
||||
#endif
|
||||
|
||||
DenormalsOptMode denormalsOptMode = DenormalsOptMode::DO_Keep;
|
||||
|
@ -136,7 +136,7 @@ bool DnnlPostOpsComposer::appendScale(const std::vector<float>& scale, bool isLa
|
||||
if (oscale_values.size() == 1)
|
||||
oscale_mask = 0;
|
||||
else
|
||||
oscale_mask = 1 << 1; // it works for both Conv/Matmul
|
||||
oscale_mask = 1 << idxOC;
|
||||
updateOutputScales();
|
||||
return true;
|
||||
}
|
||||
|
@ -1506,11 +1506,6 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo
|
||||
|
||||
// Set all non const data paths precision to BF16
|
||||
void Graph::EnforceBF16() {
|
||||
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
|
||||
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
|
||||
if (!implication(context->isGraphQuantized(), getConfig().manualEnforceBF16))
|
||||
return;
|
||||
|
||||
std::function<void(const NodePtr&, std::unordered_set<NodePtr>& skipNodes)> searchForNodesToSkip;
|
||||
searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set<NodePtr>& skipNodes) -> void {
|
||||
for (size_t i = 0; i < node->getParentEdges().size(); i++) {
|
||||
|
@ -261,7 +261,7 @@ void summary_perf(const Graph &graph) {
|
||||
}
|
||||
const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf;
|
||||
|
||||
if (summaryPerf.empty())
|
||||
if (summaryPerf.empty() || !std::stoi(summaryPerf))
|
||||
return;
|
||||
|
||||
std::map<std::string, double> perf_by_type;
|
||||
@ -308,7 +308,7 @@ void summary_perf(const Graph &graph) {
|
||||
std::stringstream ss;
|
||||
int percentage = static_cast<int>(it.second*100/total_avg);
|
||||
if (percentage == 0) break;
|
||||
ss << std::setw(10) << std::right << percentage << " % :" << it.first << std::endl;
|
||||
ss << std::setw(10) << std::right << percentage << " % : " << std::setw(8) << std::right << it.second << "(us) " << it.first << std::endl;
|
||||
std::cout << ss.str();
|
||||
}
|
||||
}
|
||||
|
@ -734,21 +734,6 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support
|
||||
* for bf16 depthwise postops.
|
||||
* This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as
|
||||
* multiple binary post ops.
|
||||
* This check can already be removed for FC fusing, but should be kept for Convolution,
|
||||
* which still uses legacy depthwise postops for performance reasons.
|
||||
*/
|
||||
static bool BF16QuantizeNodeFusing(const NodePtr& parentNode, const NodePtr& childNode) {
|
||||
return childNode->getType() == Type::FakeQuantize &&
|
||||
one_of(Precision::BF16,
|
||||
parentNode->getOriginalOutputPrecisionAtPort(0),
|
||||
childNode->getOriginalOutputPrecisionAtPort(0));
|
||||
}
|
||||
|
||||
void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) {
|
||||
auto& graphNodes = graph.GetNodes();
|
||||
|
||||
@ -772,12 +757,6 @@ void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// BF16 Quantize Layer Fusing Disabling
|
||||
if (BF16QuantizeNodeFusing(parentNode, childNode)) {
|
||||
parent++;
|
||||
continue;
|
||||
}
|
||||
|
||||
childNode->fuseInto(parentNode);
|
||||
|
||||
if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) {
|
||||
@ -1066,12 +1045,6 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperation(Graph &graph) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// BF16 Quantize Layer Fusing Disabling
|
||||
if (BF16QuantizeNodeFusing(parentNode, childNode)) {
|
||||
parent++;
|
||||
continue;
|
||||
}
|
||||
|
||||
childNode->fuseInto(parentNode);
|
||||
|
||||
if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) {
|
||||
|
@ -503,11 +503,6 @@ void Convolution::getSupportedDescriptors() {
|
||||
|
||||
if (canBeExecutedInInt8()) {
|
||||
DEBUG_LOG(getName(), "Creating I8 descriptor");
|
||||
// We have to extend convolution_x8s8s32x from oneDNN to support BF16 output data type
|
||||
if (outputDataType == memory::data_type::bf16)
|
||||
outputDataType = memory::data_type::f32;
|
||||
if (eltwisePrecision == Precision::BF16)
|
||||
eltwisePrecision = Precision::FP32;
|
||||
// initTryBrgconvFlag depends on outputDataType, should be after outputDataType computed
|
||||
if (!enforceBrgconv)
|
||||
initTryBrgconvFlag();
|
||||
|
@ -232,29 +232,29 @@ void FullyConnected::getSupportedDescriptors() {
|
||||
auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(DATA_ID));
|
||||
outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(DATA_ID));
|
||||
|
||||
if (inputDataType == memory::data_type::f32) {
|
||||
outputDataType = memory::data_type::f32;
|
||||
}
|
||||
|
||||
if (!fusedWith.empty()) {
|
||||
outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
|
||||
}
|
||||
auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(WEIGHTS_ID));
|
||||
|
||||
// We have to extend gemm_x8s8s32x_inner_product_fwd_t from oneDNN to support BF16 output data type
|
||||
if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8)
|
||||
&& inputDataType != memory::data_type::bf16) {
|
||||
inputDataType = outputDataType = memory::data_type::f32;
|
||||
}
|
||||
|
||||
if (one_of(inputDataType , memory::data_type::u8, memory::data_type::s8)
|
||||
&& outputDataType == memory::data_type::bf16) {
|
||||
// revert back outputDataType on special cases
|
||||
if (inputDataType == memory::data_type::f32) {
|
||||
// oneDNN only support f32 output when input is f32, even if FQ is fused
|
||||
outputDataType = memory::data_type::f32;
|
||||
}
|
||||
|
||||
if (inputDataType == memory::data_type::bf16
|
||||
&& one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) {
|
||||
outputDataType = memory::data_type::bf16;
|
||||
} else if (inputDataType == memory::data_type::bf16) {
|
||||
// bf16 input only supports bf16/f32 output, even if FQ is fused as post-ops
|
||||
if (one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) {
|
||||
outputDataType = memory::data_type::bf16;
|
||||
}
|
||||
} else if (one_of(inputDataType, memory::data_type::u8, memory::data_type::s8)) {
|
||||
if (weightsDataType != memory::data_type::s8) {
|
||||
// weight has to be s8 for INT8 mode, otherwise fallback to
|
||||
// f32 mode
|
||||
inputDataType = outputDataType = memory::data_type::f32;
|
||||
}
|
||||
} else {
|
||||
// s32/u32/... unsupported input data types, fallback to f32
|
||||
inputDataType = outputDataType = memory::data_type::f32;
|
||||
}
|
||||
|
||||
inDims = isDynamicNode() ? makeDummyInputDims() : getInputShapeAtPort(DATA_ID).getStaticDims();
|
||||
|
@ -204,34 +204,6 @@ MatMul::MatMul(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr
|
||||
}
|
||||
|
||||
bool MatMul::canFuse(const NodePtr& node) const {
|
||||
// per channel binary post op for rank > 2D is supported only by oneDNN reference implementation because of unusual MatMul channel axis (issue 6669)
|
||||
if (getOutputShapeAtPort(0).getRank() > 2) {
|
||||
if (const auto* eltwiseNode = dynamic_cast<Eltwise *>(node.get())) {
|
||||
if (one_of(eltwiseNode->getAlgorithm(), Algorithm::EltwiseAdd,
|
||||
Algorithm::EltwiseMultiply,
|
||||
Algorithm::EltwiseSubtract,
|
||||
Algorithm::EltwiseDivide,
|
||||
Algorithm::EltwisePrelu,
|
||||
Algorithm::EltwiseMulAdd,
|
||||
Algorithm::EltwisePowerStatic) &&
|
||||
eltwiseNode->getBroadcastingPolicy() != Eltwise::PerTensor) {
|
||||
return false;
|
||||
}
|
||||
} else if (const auto* fakeQuantizeNode = dynamic_cast<FakeQuantize *>(node.get())) {
|
||||
if (fakeQuantizeNode->getBroadcastingPolicy() != FakeQuantize::PerTensor) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Todo:
|
||||
// Consider the case when Matmul doesn't support execution in int8, but is getting fused with FQ with int8 output.
|
||||
// Then the Matmul will change its output precision to fp32, but the FQ child will still has the int8 input precision.
|
||||
// This information should be propagated! Note that we may need to propagate updated precision to child fused nodes.
|
||||
if (node->getType() == Type::FakeQuantize &&
|
||||
one_of(node->getOriginalOutputPrecisionAtPort(0), Precision::I8, Precision::U8) &&
|
||||
!canBeExecutedInInt8(getOriginalInputPrecisionAtPort(0), getOriginalInputPrecisionAtPort(1)))
|
||||
return false;
|
||||
return canFuseSimpleOperation(node);
|
||||
}
|
||||
|
||||
@ -344,12 +316,20 @@ void MatMul::getSupportedDescriptors() {
|
||||
outPortPrec = firstInPortPrec = secondInPortPrec = Precision::FP32;
|
||||
}
|
||||
|
||||
Precision postOpsPrec = outPortPrec;
|
||||
if (!fusedWith.empty()) {
|
||||
outPortPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
|
||||
postOpsPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
|
||||
}
|
||||
|
||||
if (!canBeExecutedInInt8(firstInPortPrec, secondInPortPrec) && one_of(outPortPrec, Precision::U8, Precision::I8))
|
||||
outPortPrec = Precision::FP32; // INT output is not supported for non-INT inputs
|
||||
if (canBeExecutedInInt8(firstInPortPrec, secondInPortPrec)) {
|
||||
// INT8 mode support wide range of output precisions
|
||||
outPortPrec = postOpsPrec;
|
||||
} else if (postOpsPrec == Precision::FP32) {
|
||||
// all non-INT8 modes support fp32 output precision
|
||||
outPortPrec = postOpsPrec;
|
||||
} else {
|
||||
// otherwise we ignore postOpsPrec and stay with getOriginalOutputPrecisionAtPort(0)
|
||||
}
|
||||
|
||||
const auto& inputShape0 = getInputShapeAtPort(0);
|
||||
const auto& inputShape1 = getInputShapeAtPort(1);
|
||||
|
@ -479,11 +479,6 @@ std::ostream & operator<<(std::ostream & os, const PrintableModel& model) {
|
||||
os << std::endl;
|
||||
|
||||
// recursively output subgraphs
|
||||
if (auto subgraph = std::dynamic_pointer_cast<ngraph::snippets::op::Subgraph>(op)) {
|
||||
os << "\t\t snippets Subgraph: " << subgraph->get_friendly_name() << " is_quantized:" << subgraph->is_quantized() << std::endl;
|
||||
os << PrintableModel(subgraph->body(), tag, prefix + "\t\t");
|
||||
}
|
||||
|
||||
if (auto msubgraph = std::dynamic_pointer_cast<op::util::MultiSubGraphOp>(op)) {
|
||||
auto cnt = msubgraph->get_internal_subgraphs_size();
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
|
2
src/plugins/intel_cpu/thirdparty/onednn
vendored
2
src/plugins/intel_cpu/thirdparty/onednn
vendored
@ -1 +1 @@
|
||||
Subproject commit bd3498162fab7401b571c6ce77d837f1adcff265
|
||||
Subproject commit 02857209960e9d91c1b3df90ab4c7ac359bf0973
|
@ -688,22 +688,6 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
|
||||
|
||||
if (dims_size >= 2) {
|
||||
InferenceEngine::CNNLayerPtr prev_layer, pre_prev_layer;
|
||||
// Skip all convolutions in this check, they will be handled during concat primitive creation
|
||||
auto isFusableWithConv = [](InferenceEngine::CNNLayerPtr ptr) {
|
||||
return (LayerInfo(ptr).isFusableWithConv() || LayerInfo(ptr).isNonFunctional() ||
|
||||
(LayerInfo(ptr).isPermute() &&
|
||||
((ptr->input()->getLayout() == InferenceEngine::Layout::NCHW &&
|
||||
ptr->GetParamAsInts("order") ==
|
||||
permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) ||
|
||||
(ptr->input()->getLayout() == InferenceEngine::Layout::CHW &&
|
||||
ptr->GetParamAsInts("order") == std::vector<int32_t>{0, 2, 1} /* NCW to NWC */))));
|
||||
};
|
||||
|
||||
for (auto input_idx = 0; input_idx != concat_layer->insData.size(); input_idx++) {
|
||||
prev_layer = InferenceEngine::CNNNetPrevLayerSkipCertain(layer, input_idx, isFusableWithConv);
|
||||
if (prev_layer && LayerInfo(prev_layer).isConvolution())
|
||||
return true;
|
||||
}
|
||||
|
||||
// Look for trivial cases which will be flattened later
|
||||
// for explanation of what is meant by trivial case,
|
||||
@ -783,10 +767,6 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
|
||||
if (!is_not_trivial_concat || concat_all_const_or_inputs)
|
||||
return true;
|
||||
|
||||
// For interleaved inputs start checking from axis 1
|
||||
// and allow concatenation on axis 0 only when all other dimesions = 1
|
||||
std::rotate(in_dims.begin(), in_dims.begin() + 1, in_dims.end());
|
||||
concat_axis == 0 ? concat_axis = static_cast<unsigned int>(dims_size - 1) : concat_axis--;
|
||||
|
||||
// Looking for any axis with dimension > 1 before concatentaion axis;
|
||||
// in general such concatenation is unsupported
|
||||
|
@ -624,19 +624,6 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
}
|
||||
|
||||
#ifndef DEBUG_USE_NEW_PASS
|
||||
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know
|
||||
// how kaldi will handle that
|
||||
if (!dnn->do_rotate_input) {
|
||||
if ((inputs->getLayout() != InferenceEngine::Layout::NHWC || transpose_h_w) &&
|
||||
LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = effectiveStride;
|
||||
dnn->num_rotate_columns = num_inputs / effectiveStride;
|
||||
} else {
|
||||
dnn->do_rotate_input = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
@ -816,22 +803,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
|
||||
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
|
||||
|
||||
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know
|
||||
// how kaldi will handle that
|
||||
if (!dnn->do_rotate_input && inputs->getLayout() != InferenceEngine::Layout::NHWC &&
|
||||
LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = in_channels;
|
||||
if (in_height != 1) {
|
||||
dnn->num_rotate_rows *= convolution._stride_y;
|
||||
}
|
||||
if (in_width != 1) {
|
||||
dnn->num_rotate_rows *= convolution._stride_x;
|
||||
}
|
||||
dnn->num_rotate_columns = num_inputs / dnn->num_rotate_rows;
|
||||
}
|
||||
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
const auto kernelHW = convolution._kernel_y * convolution._kernel_x;
|
||||
|
@ -928,14 +928,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
}
|
||||
}
|
||||
|
||||
if (dnn->do_rotate_input && transpose_inputs_info.empty()) {
|
||||
for (auto& inputLayer : inputLayers) {
|
||||
transpose_inputs_info.insert(
|
||||
{inputLayer->name,
|
||||
{TranspositionInfo{dnn->do_rotate_input, dnn->num_rotate_rows, dnn->num_rotate_columns}}});
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Need to remove this conversation when ngraph NCHW<->NHWC transformation is enabled
|
||||
if (!transpose_inputs_info.empty()) {
|
||||
ConvertTransposeMapToModel(transpose_inputs_info, inputs_ptr_->Get());
|
||||
|
@ -79,6 +79,7 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
|
||||
const bool has_convolution = ov::op::util::has_op_with_type<ov::opset8::Convolution>(model);
|
||||
const bool has_maxpool = ov::op::util::has_op_with_type<ov::opset8::MaxPool>(model);
|
||||
const bool has_slice = ov::op::util::has_op_with_type<ov::opset8::Slice>(model);
|
||||
const bool has_matmul = ov::op::util::has_op_with_type<ov::opset8::MatMul>(model);
|
||||
const bool has_mvn = ov::op::util::has_op_with_type<ov::opset8::MVN>(model) ||
|
||||
ov::op::util::has_op_with_type<ov::op::v0::MVN>(model);
|
||||
|
||||
@ -136,8 +137,7 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
|
||||
manager.register_pass<ov::intel_gna::pass::SubstituteSoftsign>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeLayerToBeEliminated>();
|
||||
// TODO enable this transformation for networks without convolutions
|
||||
if (has_convolution || has_maxpool || has_mvn) {
|
||||
EMUTEX_DEBUG_VISUALIZE("before");
|
||||
if (has_convolution || has_maxpool || has_mvn || has_matmul) {
|
||||
manager.register_pass<ov::intel_gna::pass::TransposeNCHW>();
|
||||
EMUTEX_DEBUG_VISUALIZE("after_TransposeNCHW");
|
||||
manager.register_pass<ov::intel_gna::pass::ReshapeTransposeSubstitute>();
|
||||
|
@ -18,6 +18,54 @@ namespace intel_gna {
|
||||
namespace pass {
|
||||
namespace helper {
|
||||
|
||||
void GetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
|
||||
OPENVINO_ASSERT(conv);
|
||||
conv_data.output_height = conv->get_output_shape(0)[2];
|
||||
conv_data.output_width = conv->get_output_shape(0)[3];
|
||||
conv_data.input_channel_count = conv->input_value(0).get_shape()[1];
|
||||
conv_data.input_height = conv->input_value(0).get_shape()[2];
|
||||
conv_data.input_width = conv->input_value(0).get_shape()[3];
|
||||
conv_data.filter_count = conv->input_value(1).get_shape()[0];
|
||||
conv_data.filter_channel_count = conv->input_value(1).get_shape()[1];
|
||||
conv_data.filter_height = conv->input_value(1).get_shape()[2];
|
||||
conv_data.filter_width = conv->input_value(1).get_shape()[3];
|
||||
conv_data.filter_dilation_height = conv->get_dilations()[0];
|
||||
conv_data.filter_dilation_width = conv->get_dilations()[1];
|
||||
conv_data.filter_stride_height = conv->get_strides()[0];
|
||||
conv_data.filter_stride_width = conv->get_strides()[1];
|
||||
conv_data.output_channel_count = conv_data.filter_count;
|
||||
conv_data.pads_begin_height = conv->get_pads_begin()[0];
|
||||
conv_data.pads_begin_width = conv->get_pads_begin()[1];
|
||||
conv_data.pads_end_height = conv->get_pads_end()[0];
|
||||
conv_data.pads_end_width = conv->get_pads_end()[1];
|
||||
conv_data.padding_type = conv->get_auto_pad();
|
||||
conv_data.element_type = conv->get_element_type();
|
||||
}
|
||||
|
||||
void GetConvData(std::shared_ptr<ov::intel_gna::op::GNAConvolution> conv, ConvData& conv_data) {
|
||||
OPENVINO_ASSERT(conv);
|
||||
conv_data.output_height = conv->get_output_shape(0)[2];
|
||||
conv_data.output_width = conv->get_output_shape(0)[3];
|
||||
conv_data.input_channel_count = conv->input_value(0).get_shape()[3];
|
||||
conv_data.input_height = conv->input_value(0).get_shape()[1];
|
||||
conv_data.input_width = conv->input_value(0).get_shape()[2];
|
||||
conv_data.filter_count = conv->input_value(1).get_shape()[0];
|
||||
conv_data.filter_channel_count = conv->input_value(1).get_shape()[3];
|
||||
conv_data.filter_height = conv->input_value(1).get_shape()[1];
|
||||
conv_data.filter_width = conv->input_value(1).get_shape()[2];
|
||||
conv_data.filter_dilation_height = conv->get_dilations()[0];
|
||||
conv_data.filter_dilation_width = conv->get_dilations()[1];
|
||||
conv_data.filter_stride_height = conv->get_strides()[0];
|
||||
conv_data.filter_stride_width = conv->get_strides()[1];
|
||||
conv_data.output_channel_count = conv_data.filter_count;
|
||||
conv_data.pads_begin_height = conv->get_pads_begin()[0];
|
||||
conv_data.pads_begin_width = conv->get_pads_begin()[1];
|
||||
conv_data.pads_end_height = conv->get_pads_end()[0];
|
||||
conv_data.pads_end_width = conv->get_pads_end()[1];
|
||||
conv_data.padding_type = conv->get_auto_pad();
|
||||
conv_data.element_type = conv->get_element_type();
|
||||
}
|
||||
|
||||
std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(const size_t expected_count,
|
||||
const ngraph::Dimension& expected_rank) {
|
||||
return [=](ngraph::Output<ngraph::Node> output) -> bool {
|
||||
|
@ -42,30 +42,15 @@ struct ConvData {
|
||||
* @param conv_data convolution data structure to put data into
|
||||
* @return void
|
||||
*/
|
||||
template <class T>
|
||||
void GetConvData(const T& conv, ConvData& conv_data) {
|
||||
OPENVINO_ASSERT(conv);
|
||||
conv_data.output_height = conv->get_output_shape(0)[2];
|
||||
conv_data.output_width = conv->get_output_shape(0)[3];
|
||||
conv_data.input_channel_count = conv->input_value(0).get_shape()[1];
|
||||
conv_data.input_height = conv->input_value(0).get_shape()[2];
|
||||
conv_data.input_width = conv->input_value(0).get_shape()[3];
|
||||
conv_data.filter_count = conv->input_value(1).get_shape()[0];
|
||||
conv_data.filter_channel_count = conv->input_value(1).get_shape()[1];
|
||||
conv_data.filter_height = conv->input_value(1).get_shape()[2];
|
||||
conv_data.filter_width = conv->input_value(1).get_shape()[3];
|
||||
conv_data.filter_dilation_height = conv->get_dilations()[0];
|
||||
conv_data.filter_dilation_width = conv->get_dilations()[1];
|
||||
conv_data.filter_stride_height = conv->get_strides()[0];
|
||||
conv_data.filter_stride_width = conv->get_strides()[1];
|
||||
conv_data.output_channel_count = conv_data.filter_count;
|
||||
conv_data.pads_begin_height = conv->get_pads_begin()[0];
|
||||
conv_data.pads_begin_width = conv->get_pads_begin()[1];
|
||||
conv_data.pads_end_height = conv->get_pads_end()[0];
|
||||
conv_data.pads_end_width = conv->get_pads_end()[1];
|
||||
conv_data.padding_type = conv->get_auto_pad();
|
||||
conv_data.element_type = conv->get_element_type();
|
||||
}
|
||||
void GetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data);
|
||||
|
||||
/**
|
||||
* @brief gets all convolution related data into a struct for further processing
|
||||
* @param conv GNA custom convolution node to get data of
|
||||
* @param conv_data convolution data structure to put data into
|
||||
* @return void
|
||||
*/
|
||||
void GetConvData(std::shared_ptr<ov::intel_gna::op::GNAConvolution> conv, ConvData& conv_data);
|
||||
|
||||
/**
|
||||
* @brief ngraph matcher predicate fusing existing predicates for consumers count and rank of a layer
|
||||
|
Loading…
Reference in New Issue
Block a user