Merge remote-tracking branch 'upstream/master' into sy/test/ConvolutionLayerTest_dynamic_shape_case
This commit is contained in:
commit
dd3d17c85f
@ -0,0 +1,230 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <shared_test_classes/base/layer_test_utils.hpp>
|
||||
#include <tuple>
|
||||
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
using namespace reference_tests;
|
||||
|
||||
struct ExperimentalROIParams {
|
||||
ExperimentalROIParams(const std::vector<Tensor>& experimental_detectron_roi_feature_inputs,
|
||||
const std::vector<Tensor>& expected_results,
|
||||
const std::string& test_case_name)
|
||||
: inputs{experimental_detectron_roi_feature_inputs},
|
||||
expected_results{expected_results},
|
||||
test_case_name{test_case_name} {}
|
||||
|
||||
std::vector<Tensor> inputs;
|
||||
std::vector<Tensor> expected_results;
|
||||
std::string test_case_name;
|
||||
};
|
||||
|
||||
class ReferenceExperimentalROILayerTest : public testing::TestWithParam<ExperimentalROIParams>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto params = GetParam();
|
||||
function = create_function(params.inputs);
|
||||
inputData.reserve(params.inputs.size());
|
||||
refOutData.reserve(params.expected_results.size());
|
||||
for (const auto& input_tensor : params.inputs) {
|
||||
inputData.push_back(input_tensor.data);
|
||||
}
|
||||
for (const auto& expected_tensor : params.expected_results) {
|
||||
refOutData.push_back(expected_tensor.data);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<ExperimentalROIParams>& obj) {
|
||||
auto param = obj.param;
|
||||
return param.test_case_name;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<Function> create_function(const std::vector<Tensor>& inputs) {
|
||||
op::v6::ExperimentalDetectronROIFeatureExtractor::Attributes attrs;
|
||||
attrs.aligned = false;
|
||||
attrs.output_size = 3;
|
||||
attrs.sampling_ratio = 2;
|
||||
attrs.pyramid_scales = {4};
|
||||
|
||||
const size_t num_of_inputs = inputs.size();
|
||||
NodeVector node_vector(num_of_inputs);
|
||||
ParameterVector parameter_vector(num_of_inputs);
|
||||
for (size_t i = 0; i < num_of_inputs; ++i) {
|
||||
const auto& current_input = inputs[i];
|
||||
auto current_parameter = std::make_shared<op::Parameter>(current_input.type, current_input.shape);
|
||||
node_vector[i] = current_parameter;
|
||||
parameter_vector[i] = current_parameter;
|
||||
}
|
||||
|
||||
auto roi = std::make_shared<op::v6::ExperimentalDetectronROIFeatureExtractor>(node_vector, attrs);
|
||||
auto fun = std::make_shared<Function>(OutputVector{roi->output(0), roi->output(1)}, parameter_vector);
|
||||
return fun;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceExperimentalROILayerTest, ExperimentalROIWithHardcodedRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_ExperimentalROI_With_Hardcoded_Refs,
|
||||
ReferenceExperimentalROILayerTest,
|
||||
::testing::Values(
|
||||
ExperimentalROIParams(
|
||||
std::vector<Tensor>{Tensor(Shape{2, 4},
|
||||
ngraph::element::f32,
|
||||
std::vector<float>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
|
||||
Tensor(Shape{1, 2, 2, 3},
|
||||
ngraph::element::f32,
|
||||
std::vector<float>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0})},
|
||||
std::vector<Tensor>{Tensor(Shape{2, 2, 3, 3},
|
||||
ngraph::element::f32,
|
||||
std::vector<float>{1.416667,
|
||||
1.75,
|
||||
2.083333,
|
||||
2.416667,
|
||||
2.75,
|
||||
3.083333,
|
||||
3.166667,
|
||||
3.5,
|
||||
3.833333,
|
||||
7.416667,
|
||||
7.75,
|
||||
8.083333,
|
||||
8.416667,
|
||||
8.75,
|
||||
9.083334,
|
||||
9.166666,
|
||||
9.5,
|
||||
9.833334,
|
||||
4.166667,
|
||||
4.5,
|
||||
4.833333,
|
||||
4.166667,
|
||||
4.5,
|
||||
4.833333,
|
||||
2.083333,
|
||||
2.25,
|
||||
2.416667,
|
||||
10.16667,
|
||||
10.5,
|
||||
10.83333,
|
||||
10.16667,
|
||||
10.5,
|
||||
10.83333,
|
||||
5.083333,
|
||||
5.25,
|
||||
5.416667}),
|
||||
Tensor(Shape{2, 4},
|
||||
ngraph::element::f32,
|
||||
std::vector<float>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0})},
|
||||
"experimental_detectron_roi_feature_eval_f32"),
|
||||
ExperimentalROIParams(
|
||||
std::vector<Tensor>{Tensor(Shape{2, 4},
|
||||
ngraph::element::f16,
|
||||
std::vector<ngraph::float16>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
|
||||
Tensor(Shape{1, 2, 2, 3},
|
||||
ngraph::element::f16,
|
||||
std::vector<ngraph::float16>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0})},
|
||||
std::vector<Tensor>{Tensor(Shape{2, 2, 3, 3},
|
||||
ngraph::element::f16,
|
||||
std::vector<ngraph::float16>{1.416667,
|
||||
1.75,
|
||||
2.083333,
|
||||
2.416667,
|
||||
2.75,
|
||||
3.083333,
|
||||
3.166667,
|
||||
3.5,
|
||||
3.833333,
|
||||
7.416667,
|
||||
7.75,
|
||||
8.083333,
|
||||
8.416667,
|
||||
8.75,
|
||||
9.083334,
|
||||
9.166666,
|
||||
9.5,
|
||||
9.833334,
|
||||
4.166667,
|
||||
4.5,
|
||||
4.833333,
|
||||
4.166667,
|
||||
4.5,
|
||||
4.833333,
|
||||
2.083333,
|
||||
2.25,
|
||||
2.416667,
|
||||
10.16667,
|
||||
10.5,
|
||||
10.83333,
|
||||
10.16667,
|
||||
10.5,
|
||||
10.83333,
|
||||
5.083333,
|
||||
5.25,
|
||||
5.416667}),
|
||||
Tensor(Shape{2, 4},
|
||||
ngraph::element::f16,
|
||||
std::vector<ngraph::float16>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0})},
|
||||
"experimental_detectron_roi_feature_eval_f16"),
|
||||
ExperimentalROIParams(
|
||||
std::vector<Tensor>{Tensor(Shape{2, 4},
|
||||
ngraph::element::bf16,
|
||||
std::vector<ngraph::bfloat16>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
|
||||
Tensor(Shape{1, 2, 2, 3},
|
||||
ngraph::element::bf16,
|
||||
std::vector<ngraph::bfloat16>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0})},
|
||||
std::vector<Tensor>{Tensor(Shape{2, 2, 3, 3},
|
||||
ngraph::element::bf16,
|
||||
std::vector<ngraph::bfloat16>{1.416667,
|
||||
1.75,
|
||||
2.083333,
|
||||
2.416667,
|
||||
2.75,
|
||||
3.083333,
|
||||
3.166667,
|
||||
3.5,
|
||||
3.833333,
|
||||
7.416667,
|
||||
7.75,
|
||||
8.083333,
|
||||
8.416667,
|
||||
8.75,
|
||||
9.083334,
|
||||
9.166666,
|
||||
9.5,
|
||||
9.833334,
|
||||
4.166667,
|
||||
4.5,
|
||||
4.833333,
|
||||
4.166667,
|
||||
4.5,
|
||||
4.833333,
|
||||
2.083333,
|
||||
2.25,
|
||||
2.416667,
|
||||
10.16667,
|
||||
10.5,
|
||||
10.83333,
|
||||
10.16667,
|
||||
10.5,
|
||||
10.83333,
|
||||
5.083333,
|
||||
5.25,
|
||||
5.416667}),
|
||||
Tensor(Shape{2, 4},
|
||||
ngraph::element::bf16,
|
||||
std::vector<ngraph::bfloat16>{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0})},
|
||||
"experimental_detectron_roi_feature_eval_bf16")));
|
@ -39,8 +39,6 @@ protected:
|
||||
NodeVector& convertNodes,
|
||||
NodeVector& subtractNodes,
|
||||
NodeVector& multiplyNodes) const;
|
||||
|
||||
std::shared_ptr<Node> concatenateDeqNodes(NodeVector& nodes) const;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -14,8 +14,6 @@ class LP_TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBase
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
DepthToSpaceTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
};
|
||||
|
||||
|
@ -18,6 +18,7 @@ public:
|
||||
~TransparentBaseTransformation() override {};
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -176,13 +176,13 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
||||
// after : Y = SC2 * ( SC1' * (X1 - SH1') + X2 ) , where :
|
||||
// SC1' = SC1 / SC2
|
||||
// SH1' = SH1 + SC2 * SH2 / SC1
|
||||
std::shared_ptr<Node> newSubtractFullPathValues = fold<opset1::Add>(
|
||||
auto newSubtractFullPathValues = fold<opset1::Add>(
|
||||
subtractFullPathValues,
|
||||
fold<opset1::Divide>(
|
||||
fold<opset1::Multiply>(subtractEmptyPathValues, multiplyEmptyPathValues),
|
||||
multiplyFullPathValues));
|
||||
|
||||
std::shared_ptr<Node> newMultiplyFullPathValues = fold<opset1::Divide>(multiplyFullPathValues, multiplyEmptyPathValues);
|
||||
auto newMultiplyFullPathValues = fold<opset1::Divide>(multiplyFullPathValues, multiplyEmptyPathValues);
|
||||
|
||||
if (NetworkHelper::isZeroConst(newSubtractFullPathValues)) {
|
||||
newSubtractFullPathValues = nullptr;
|
||||
|
@ -1,19 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/common/operation_precision_restriction.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/rt_info/precisions_attribute.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
|
@ -70,20 +70,11 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
||||
}
|
||||
}
|
||||
|
||||
auto broadcastElementWiseConst = [](
|
||||
// FakeQuantize constant shape must be broadcastable to the shape on data.
|
||||
std::shared_ptr<ngraph::opset1::Constant> operation,
|
||||
const ngraph::Shape targetShape) -> std::shared_ptr<Node> {
|
||||
auto targetShapeConst = std::make_shared<ngraph::opset1::Constant>(
|
||||
element::i64, ngraph::Shape{ targetShape.size() },
|
||||
targetShape);
|
||||
|
||||
auto broadcast = ngraph::pass::low_precision::fold<ngraph::opset1::Broadcast>(
|
||||
operation,
|
||||
targetShapeConst,
|
||||
ngraph::op::AutoBroadcastType::NUMPY);
|
||||
|
||||
return broadcast;
|
||||
// FakeQuantize constant shape must be broadcastable to the shape on data.
|
||||
auto broadcastElementWiseConst = [](std::shared_ptr<opset1::Constant> operation, const Shape targetShape) {
|
||||
auto targetShapeConst = std::make_shared<opset1::Constant>(element::i64, Shape{ targetShape.size() }, targetShape);
|
||||
auto broadcast = fold<ngraph::opset1::Broadcast>(operation, targetShapeConst);
|
||||
return broadcast;
|
||||
};
|
||||
|
||||
bool someDqInLowPrecision = std::any_of(
|
||||
@ -247,15 +238,8 @@ void ConcatTransformation::fillDequantizationNodes(
|
||||
// FakeQuantize constant shape must be broadcastable to the shape on data.
|
||||
std::shared_ptr<ngraph::opset1::Constant> operation,
|
||||
const ngraph::Shape targetShape) -> std::shared_ptr<Node> {
|
||||
auto targetShapeConst = std::make_shared<ngraph::opset1::Constant>(
|
||||
element::i64, ngraph::Shape{ targetShape.size() },
|
||||
targetShape);
|
||||
|
||||
auto broadcast = ngraph::pass::low_precision::fold<ngraph::opset1::Broadcast>(
|
||||
operation,
|
||||
targetShapeConst,
|
||||
ngraph::op::AutoBroadcastType::NUMPY);
|
||||
|
||||
auto targetShapeConst = opset1::Constant::create(element::i64, ngraph::Shape{ targetShape.size() }, targetShape);
|
||||
auto broadcast = fold<ngraph::opset1::Broadcast>(operation, targetShapeConst);
|
||||
return broadcast;
|
||||
};
|
||||
|
||||
@ -308,10 +292,6 @@ void ConcatTransformation::fillDequantizationNodes(
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> ConcatTransformation::concatenateDeqNodes(NodeVector& nodes) const {
|
||||
return nodes.size() == 1ul ? nodes[0] : fold<ngraph::opset1::Concat>(nodes, 1);
|
||||
}
|
||||
|
||||
bool ConcatTransformation::isHandled(const TransformationContext& context, const std::vector<std::shared_ptr<ngraph::Node>>& quantizationOperations) {
|
||||
for (const std::shared_ptr<ngraph::Node>& quantizationLayer : quantizationOperations) {
|
||||
if (context.quantizedFakeQuantizeNames.find(quantizationLayer->get_friendly_name()) != context.quantizedFakeQuantizeNames.end()) {
|
||||
|
@ -49,7 +49,7 @@ bool ConvertTransformation::transform(TransformationContext& context, ngraph::pa
|
||||
const ngraph::element::Type precisionBefore = convert->get_input_element_type(0);
|
||||
|
||||
std::shared_ptr<opset1::Subtract> subtract = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
|
||||
convert->get_input_node_shared_ptr(0),
|
||||
convert->input_value(0),
|
||||
std::make_shared<opset1::Constant>(precisionBefore, Shape{}, std::vector<size_t>({ 0 })));
|
||||
NetworkHelper::setOutDataPrecision(subtract, convert->get_output_element_type(0));
|
||||
|
||||
|
@ -181,7 +181,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
||||
zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
|
||||
|
||||
auto zeroPointConstant = fold<opset1::Broadcast>(
|
||||
subtractFromWeights->get_input_node_shared_ptr(1),
|
||||
subtractFromWeights->input_value(1),
|
||||
std::make_shared<opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
|
||||
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
||||
}
|
||||
|
@ -1,22 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/create_precisions_dependent_attribute.hpp"
|
||||
|
||||
#include <assert.h>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include "low_precision/rt_info/precisions_attribute.hpp"
|
||||
#include "low_precision/rt_info/precision_preserved_attribute.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass::low_precision;
|
@ -29,21 +29,6 @@ DepthToSpaceTransformation::DepthToSpaceTransformation(const Params& params) : T
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool DepthToSpaceTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) {
|
||||
std::shared_ptr<Node> depthToSpace = m.get_match_root();
|
||||
if (!canBeTransformed(context, depthToSpace)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
depthToSpace = NetworkHelper::separateInStandaloneBranch(depthToSpace);
|
||||
moveDequantizationAfter(context, depthToSpace, NetworkHelper::getDequantization(depthToSpace), true);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DepthToSpaceTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DepthToSpaceTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const {
|
||||
if (!LayerTransformation::canBeTransformed(context, layer)) {
|
||||
return false;
|
||||
|
@ -67,7 +67,7 @@ static std::shared_ptr<Node> updateShape(std::shared_ptr<Node> constantOp, const
|
||||
return constantOp;
|
||||
}
|
||||
|
||||
static std::shared_ptr<Node> getData(const std::shared_ptr<Node>& eltwise) {
|
||||
static std::shared_ptr<Node> getDataNode(const std::shared_ptr<Node>& eltwise) {
|
||||
if (!ov::is_type<opset1::Constant>(eltwise->get_input_node_shared_ptr(0))) {
|
||||
return eltwise->get_input_node_shared_ptr(0);
|
||||
}
|
||||
@ -123,7 +123,7 @@ bool FakeQuantizeTransformation::checkElementwise(const std::shared_ptr<Node>& e
|
||||
}
|
||||
}
|
||||
|
||||
return fq::getData(eltwise) != nullptr;
|
||||
return fq::getDataNode(eltwise) != nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwise(
|
||||
@ -132,8 +132,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
||||
const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) const {
|
||||
const std::shared_ptr<Node> eltwise = fakeQuantize->get_input_node_shared_ptr(0);
|
||||
|
||||
std::shared_ptr<Node> inputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(1), deqPrecision);
|
||||
std::shared_ptr<Node> inputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(2), deqPrecision);
|
||||
std::shared_ptr<Node> inputLowConst_f32 = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||
std::shared_ptr<Node> inputHighConst_f32 = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||
|
||||
std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
|
||||
if (ov::is_type<opset1::Multiply>(eltwise) && checkElementwise(eltwise)) {
|
||||
@ -166,10 +166,10 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
||||
inputLowConst_f32 = fq::updateShape(fold<opset1::Add>(inputLowConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
||||
inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
||||
} else if (ov::is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
|
||||
if (ov::is_type<opset1::Convolution>(fq::getData(eltwise)) ||
|
||||
ov::is_type<opset1::GroupConvolution>(fq::getData(eltwise)) ||
|
||||
ov::is_type<opset1::ConvolutionBackpropData>(fq::getData(eltwise)) ||
|
||||
ov::is_type<opset1::GroupConvolutionBackpropData>(fq::getData(eltwise))) {
|
||||
if (ov::is_type<opset1::Convolution>(fq::getDataNode(eltwise)) ||
|
||||
ov::is_type<opset1::GroupConvolution>(fq::getDataNode(eltwise)) ||
|
||||
ov::is_type<opset1::ConvolutionBackpropData>(fq::getDataNode(eltwise)) ||
|
||||
ov::is_type<opset1::GroupConvolutionBackpropData>(fq::getDataNode(eltwise))) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -189,7 +189,7 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const auto data = fq::getData(eltwise);
|
||||
const auto data = fq::getDataNode(eltwise);
|
||||
const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise);
|
||||
|
||||
const auto newFakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(fakeQuantize->clone_with_new_inputs({
|
||||
|
@ -42,7 +42,7 @@ bool FoldConvertTransformation::transform(TransformationContext& context, ngraph
|
||||
return;
|
||||
}
|
||||
|
||||
const auto resultConstant = ngraph::pass::low_precision::foldConvert(convert->get_input_node_shared_ptr(0), convert->output(0).get_element_type());
|
||||
const auto resultConstant = ngraph::pass::low_precision::foldConvert(convert->input_value(0), convert->get_output_element_type(0));
|
||||
assert(ov::is_type<opset1::Constant>(resultConstant));
|
||||
|
||||
replace_node(convert, resultConstant);
|
||||
|
@ -47,8 +47,8 @@ std::shared_ptr<Node> removeConvertIfPossibleForSubtract(
|
||||
if (NetworkHelper::checkConstantValuePrecision(precisionBeforeConvert, subtract->get_input_node_shared_ptr(1))) {
|
||||
newSubtract = std::make_shared<ngraph::op::TypeRelaxed<opset1::Subtract>>(
|
||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
||||
ngraph::op::TemporaryReplaceOutputType(convert->get_input_source_output(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(subtract->get_input_node_shared_ptr(1), element::f32).get());
|
||||
ngraph::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(subtract->input_value(1), element::f32).get());
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newSubtract, subtract->get_output_element_type(0));
|
||||
replace_node(subtract, newSubtract);
|
||||
}
|
||||
@ -63,11 +63,11 @@ bool FuseConvertTransformation::transform(TransformationContext& context, ngraph
|
||||
}
|
||||
|
||||
const auto convert = ov::as_type_ptr<opset1::Convert>(op->get_input_node_shared_ptr(0));
|
||||
std::shared_ptr<Node> parent = convert->get_input_node_shared_ptr(0);
|
||||
auto parent = convert->input_value(0);
|
||||
|
||||
if (ov::is_type<opset1::Constant>(parent)) {
|
||||
if (ov::is_type<opset1::Constant>(parent.get_node_shared_ptr())) {
|
||||
auto convertedConstant = foldConvert(parent, convert->get_convert_element_type());
|
||||
NetworkHelper::copyInfo(parent, convertedConstant);
|
||||
NetworkHelper::copyInfo(parent.get_node_shared_ptr(), convertedConstant);
|
||||
replace_node(convert, convertedConstant);
|
||||
} else {
|
||||
std::shared_ptr<Node> newOp;
|
||||
@ -77,15 +77,15 @@ bool FuseConvertTransformation::transform(TransformationContext& context, ngraph
|
||||
} else if (ov::is_type<opset1::Multiply>(op)) {
|
||||
newOp = std::make_shared<ngraph::op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
||||
ngraph::op::TemporaryReplaceOutputType(convert->get_input_source_output(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(op->get_input_node_shared_ptr(1), element::f32).get());
|
||||
ngraph::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(op->input_value(1), element::f32).get());
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newOp, op->get_output_element_type(0));
|
||||
replace_node(op, newOp);
|
||||
} else if (ov::is_type<opset1::Add>(op)) {
|
||||
newOp = std::make_shared<ngraph::op::TypeRelaxed<opset1::Add>>(
|
||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
||||
ngraph::op::TemporaryReplaceOutputType(convert->get_input_source_output(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(op->get_input_node_shared_ptr(1), element::f32).get());
|
||||
ngraph::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(op->input_value(1), element::f32).get());
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newOp, op->get_output_element_type(0));
|
||||
replace_node(op, newOp);
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ std::shared_ptr<Node> updateShape(std::shared_ptr<Node> op, const PartialShape&
|
||||
return op;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> getData(const std::shared_ptr<Node>& eltwise) {
|
||||
std::shared_ptr<Node> getDataNode(const std::shared_ptr<Node>& eltwise) {
|
||||
if (!ov::is_type<opset1::Constant>(eltwise->get_input_node_shared_ptr(0))) {
|
||||
return eltwise->get_input_node_shared_ptr(0);
|
||||
}
|
||||
@ -108,7 +108,7 @@ bool eltwiseWithConstant(const std::shared_ptr<Node>& eltwise) {
|
||||
}
|
||||
}
|
||||
|
||||
return getData(eltwise) != nullptr;
|
||||
return getDataNode(eltwise) != nullptr;
|
||||
}
|
||||
|
||||
} // namespace fuse_fq
|
||||
@ -144,8 +144,8 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
|
||||
inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||
inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||
} else if (ov::is_type<opset1::Add>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
|
||||
if (ov::is_type<opset1::Convolution>(fuse_fq::getData(eltwise)) ||
|
||||
ov::is_type<opset1::GroupConvolution>(fuse_fq::getData(eltwise))) {
|
||||
if (ov::is_type<opset1::Convolution>(fuse_fq::getDataNode(eltwise)) ||
|
||||
ov::is_type<opset1::GroupConvolution>(fuse_fq::getDataNode(eltwise))) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -157,15 +157,18 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
|
||||
inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||
} else if (ov::is_type<opset1::Convert>(eltwise)) {
|
||||
// issue #40611
|
||||
if ((eltwise->input(0).get_element_type() == element::i32) && (eltwise->output(0).get_element_type() == element::f32)) {
|
||||
if ((eltwise->get_input_element_type(0) == element::i32) && (eltwise->get_output_element_type(0) == element::f32)) {
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const auto data = fuse_fq::getDataNode(eltwise);
|
||||
const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise);
|
||||
|
||||
std::shared_ptr<opset1::FakeQuantize> newFakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(fakeQuantize->clone_with_new_inputs({
|
||||
fuse_fq::getData(eltwise),
|
||||
data->output(outputIdx),
|
||||
inputLowConst,
|
||||
inputHightConst,
|
||||
fakeQuantize->input_value(3),
|
||||
|
@ -46,9 +46,12 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
||||
}
|
||||
|
||||
const auto multiplyConstant = multiply->get_input_node_shared_ptr(1);
|
||||
if (!ov::is_type<opset1::Constant>(multiplyConstant)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto outputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision);
|
||||
auto outputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision);
|
||||
auto outputLowConst_f32 = foldConvert(fakeQuantize->input_value(3), deqPrecision);
|
||||
auto outputHighConst_f32 = foldConvert(fakeQuantize->input_value(4), deqPrecision);
|
||||
|
||||
const auto value = multiplyConstant->get_output_element_type(0) == element::f32 ?
|
||||
multiplyConstant :
|
||||
@ -57,9 +60,6 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
||||
outputLowConst_f32 = fold<opset1::Multiply>(outputLowConst_f32, value);
|
||||
outputHighConst_f32 = fold<opset1::Multiply>(outputHighConst_f32, value);
|
||||
|
||||
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
|
||||
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
|
||||
|
||||
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
|
||||
@ -69,7 +69,7 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
||||
|
||||
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
||||
opset1::FakeQuantize(
|
||||
fakeQuantizeParent->output(parentIndex),
|
||||
fakeQuantize->input_value(0),
|
||||
inputLow,
|
||||
inputHigh,
|
||||
outputLowConst_f32,
|
||||
|
@ -45,9 +45,12 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
||||
}
|
||||
|
||||
const auto subtractConstant = subtract->get_input_node_shared_ptr(1);
|
||||
if (!ov::is_type<opset1::Constant>(subtractConstant)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto outputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision);
|
||||
auto outputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision);
|
||||
auto outputLowConst_f32 = foldConvert(fakeQuantize->input_value(3), deqPrecision);
|
||||
auto outputHighConst_f32 = foldConvert(fakeQuantize->input_value(4), deqPrecision);
|
||||
|
||||
const auto value = subtractConstant->get_output_element_type(0) == element::f32 ?
|
||||
subtractConstant :
|
||||
@ -56,9 +59,6 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
||||
outputLowConst_f32 = fold<opset1::Subtract>(outputLowConst_f32, value);
|
||||
outputHighConst_f32 = fold<opset1::Subtract>(outputHighConst_f32, value);
|
||||
|
||||
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
|
||||
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
|
||||
|
||||
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
|
||||
@ -68,7 +68,7 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
||||
|
||||
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
||||
opset1::FakeQuantize(
|
||||
fakeQuantizeParent->output(parentIndex),
|
||||
fakeQuantize->input_value(0),
|
||||
inputLow,
|
||||
inputHigh,
|
||||
outputLowConst_f32,
|
||||
|
@ -109,7 +109,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
||||
// multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z]
|
||||
const auto newSubConst = NetworkHelper::toScalarIfPossible(fold<opset1::MatMul>(
|
||||
broadcastedConst,
|
||||
foldConvert(newMatMul->get_input_node_shared_ptr(1), newMatMul->get_element_type()),
|
||||
foldConvert(newMatMul->input_value(1), newMatMul->get_element_type()),
|
||||
newMatMul->get_transpose_a(),
|
||||
newMatMul->get_transpose_b()));
|
||||
|
||||
|
@ -77,10 +77,10 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
|
||||
return false;
|
||||
}
|
||||
|
||||
auto multiplyParent = multiply->get_input_source_output(multiplyBranch.first);
|
||||
auto constParent = multiply->get_input_source_output(multiplyBranch.first == 0 ? 1 : 0);
|
||||
auto multiplyParentParent = multiplyParent.get_node_shared_ptr()->get_input_source_output(multiplyBranch.second);
|
||||
auto multiplyParentConst = multiplyParent.get_node_shared_ptr()->get_input_source_output(multiplyBranch.second == 0 ? 1 : 0);
|
||||
auto multiplyParent = multiply->input_value(multiplyBranch.first);
|
||||
auto constParent = multiply->input_value(multiplyBranch.first == 0 ? 1 : 0);
|
||||
auto multiplyParentParent = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second);
|
||||
auto multiplyParentConst = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second == 0 ? 1 : 0);
|
||||
|
||||
newMultiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 },
|
||||
@ -127,7 +127,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
|
||||
// before: Y = (SC1 * (X1 - SH1)) * (SC2 * X2)
|
||||
// after : Y = (SC1' * (X1 - SH1)) * (X2) , where :
|
||||
// SC1' = SC1 * SC2
|
||||
std::shared_ptr<Node> newMultiplyValuesFullPath = fold<opset1::Multiply>(multiplyValuesEmptyPath, multiplyValuesFullPath);
|
||||
auto newMultiplyValuesFullPath = fold<opset1::Multiply>(multiplyValuesEmptyPath, multiplyValuesFullPath);
|
||||
OutputVector inputs{ {}, {} };
|
||||
inputs[emptyPathIndex] = dequantizationEmptyPath.data;
|
||||
inputs[fullPathIndex] = std::make_shared<opset1::Multiply>(
|
||||
|
@ -149,7 +149,7 @@ bool MVNTransformation::transform(TransformationContext &context, ngraph::patter
|
||||
if (ov::is_type<op::MVN>(mvn)) {
|
||||
newMVN = mvn->copy_with_new_inputs({dequantization.data});
|
||||
} else {
|
||||
newMVN = mvn->copy_with_new_inputs({dequantization.data, mvn->get_input_node_shared_ptr(1)});
|
||||
newMVN = mvn->copy_with_new_inputs({dequantization.data, mvn->input_value(1)});
|
||||
}
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newMVN, deqPrecision);
|
||||
NetworkHelper::copyInfo(mvn, newMVN);
|
||||
|
@ -233,10 +233,10 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
|
||||
if (multiplyConst == nullptr)
|
||||
return addAfterMultiply;
|
||||
|
||||
const auto x = multiply->get_input_source_output(multiplyInputBranch);
|
||||
auto a = multiply->get_input_node_shared_ptr(multiplyInputBranch == 0 ? 1 : 0);
|
||||
auto b = addAfterMultiply->get_input_node_shared_ptr(multiplyBranch == 0 ? 1 : 0);
|
||||
std::shared_ptr<Node> bDivA;
|
||||
const auto x = multiply->input_value(multiplyInputBranch);
|
||||
auto a = as_type_ptr<opset1::Constant>(multiply->get_input_node_shared_ptr(multiplyInputBranch == 0 ? 1 : 0));
|
||||
auto b = as_type_ptr<opset1::Constant>(addAfterMultiply->get_input_node_shared_ptr(multiplyBranch == 0 ? 1 : 0));
|
||||
std::shared_ptr<opset1::Constant> bDivA;
|
||||
|
||||
const auto aPShape = a->get_output_partial_shape(0);
|
||||
assert(aPShape.is_static());
|
||||
@ -248,8 +248,8 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
|
||||
|
||||
if ((shape_size(bShape) == 1) || (shape_size(aShape) == 1) || (shape_size(bShape) == shape_size(aShape))) {
|
||||
// safely division to avoid NaN
|
||||
const std::vector<float> bValues = ov::as_type_ptr<opset1::Constant>(b)->cast_vector<float>();
|
||||
const std::vector<float> aValues = ov::as_type_ptr<opset1::Constant>(a)->cast_vector<float>();
|
||||
const std::vector<float> bValues = b->cast_vector<float>();
|
||||
const std::vector<float> aValues = a->cast_vector<float>();
|
||||
const bool aBroadcasted = bValues.size() > aValues.size();
|
||||
const bool bBroadcasted = bValues.size() < aValues.size();
|
||||
std::vector<float> bDivAValues(aBroadcasted ? bValues.size() : aValues.size());
|
||||
@ -271,16 +271,16 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
|
||||
aBroadcasted ? bShape : aShape,
|
||||
bDivAValues);
|
||||
} else {
|
||||
b = foldConvert(b, element::f32);
|
||||
a = foldConvert(a, element::f32);
|
||||
bDivA = fold<opset1::Divide>(b, a);
|
||||
b = as_type_ptr<opset1::Constant>(foldConvert(b->output(0), element::f32));
|
||||
a = as_type_ptr<opset1::Constant>(foldConvert(a->output(0), element::f32));
|
||||
bDivA = as_type_ptr<opset1::Constant>(fold<opset1::Divide>(b->output(0), a->output(0)));
|
||||
// TODO: issue #49868
|
||||
bDivA = foldConvert(bDivA, a->get_output_element_type(0));
|
||||
bDivA = as_type_ptr<opset1::Constant>(foldConvert(bDivA->output(0), a->get_element_type()));
|
||||
}
|
||||
|
||||
OutputVector inputs{ {}, {} };
|
||||
inputs[0] = x;
|
||||
inputs[1] = bDivA;
|
||||
inputs[1] = bDivA->output(0);
|
||||
|
||||
std::shared_ptr<opset1::Add> newAdd = std::make_shared<op::TypeRelaxed<opset1::Add>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
@ -292,8 +292,8 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
|
||||
auto newMultiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ multiply->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(newAdd, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(a, element::f32).get());
|
||||
ngraph::op::TemporaryReplaceOutputType(newAdd->output(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(a->output(0), element::f32).get());
|
||||
copyInfo({ multiply, newMultiply }, newMultiply);
|
||||
|
||||
replace_node(addAfterMultiply, newMultiply);
|
||||
@ -460,7 +460,7 @@ std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter
|
||||
}
|
||||
|
||||
auto newInput = multiply->input_value(1 - constant1->output(0).get_target_inputs().begin()->get_index());
|
||||
auto multiplyResult = fold<opset1::Multiply>(constant1, constant2);
|
||||
auto multiplyResult = fold<opset1::Multiply>(constant1->output(0), constant2->output(0));
|
||||
{
|
||||
// optimize constant shape: used in rfcn-resnet101-coco
|
||||
const auto multiplyResultConstant = ov::as_type_ptr<opset1::Constant>(multiplyResult);
|
||||
@ -526,13 +526,13 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
|
||||
}
|
||||
|
||||
if (dequantization.subtract != nullptr) {
|
||||
if (dequantization.subtract->input(0).get_element_type() != dequantization.subtract->input(1).get_element_type()) {
|
||||
if (dequantization.subtract->get_input_element_type(0) != dequantization.subtract->get_input_element_type(1)) {
|
||||
return dequantization;
|
||||
}
|
||||
|
||||
if (dequantization.subtractConvert != nullptr) {
|
||||
const auto convertionResult = foldConvert(
|
||||
dequantization.subtractConstant,
|
||||
dequantization.subtractConstant->output(0),
|
||||
dequantization.subtractConvert->get_element_type());
|
||||
if (ov::is_type<opset1::Constant>(convertionResult)) {
|
||||
replace_node(dequantization.subtractConvert, convertionResult);
|
||||
@ -541,8 +541,8 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
|
||||
}
|
||||
|
||||
const std::shared_ptr<Node> result = fold<opset1::Subtract>(
|
||||
dequantization.subtract->get_input_node_shared_ptr(0),
|
||||
dequantization.subtract->get_input_node_shared_ptr(1));
|
||||
dequantization.subtract->input_value(0),
|
||||
dequantization.subtract->input_value(1));
|
||||
if (ov::is_type<opset1::Constant>(result)) {
|
||||
if (inPlace) {
|
||||
copyInfo(dequantization.subtract, result);
|
||||
@ -555,18 +555,18 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
|
||||
}
|
||||
|
||||
if (dequantization.multiply != nullptr) {
|
||||
if (dequantization.multiply->input(0).get_element_type() != dequantization.multiply->input(1).get_element_type()) {
|
||||
if (dequantization.multiply->get_input_element_type(0) != dequantization.multiply->get_input_element_type(1)) {
|
||||
return dequantization;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> result = fold<opset1::Multiply>(
|
||||
dequantization.multiply->get_input_node_shared_ptr(0),
|
||||
dequantization.multiply->get_input_node_shared_ptr(1));
|
||||
dequantization.multiply->input_value(0),
|
||||
dequantization.multiply->input_value(1));
|
||||
if (!ov::is_type<opset1::Constant>(result)) {
|
||||
return dequantization;
|
||||
}
|
||||
if (dequantization.multiply->get_output_element_type(0) != result->get_element_type()) {
|
||||
result = foldConvert(result, dequantization.multiply->get_output_element_type(0));
|
||||
result = foldConvert(result->output(0), dequantization.multiply->get_output_element_type(0));
|
||||
}
|
||||
if (inPlace) {
|
||||
copyInfo(dequantization.multiply, result);
|
||||
@ -599,7 +599,7 @@ std::shared_ptr<ngraph::Node> NetworkHelper::separateInStandaloneBranch(std::sha
|
||||
outputs.push_back(input.get_source_output());
|
||||
}
|
||||
|
||||
auto subtract = dequantization.subtract->clone_with_new_inputs({parent, parentOnWeights->clone_with_new_inputs(outputs) });
|
||||
auto subtract = dequantization.subtract->clone_with_new_inputs({parent, parentOnWeights->clone_with_new_inputs(outputs)->output(0) });
|
||||
subtract->set_friendly_name("");
|
||||
copy_runtime_info(parent.get_node_shared_ptr(), subtract);
|
||||
parent = subtract->output(0);
|
||||
@ -608,7 +608,7 @@ std::shared_ptr<ngraph::Node> NetworkHelper::separateInStandaloneBranch(std::sha
|
||||
if (dequantization.multiply != nullptr) {
|
||||
auto multiply = dequantization.multiply->clone_with_new_inputs({
|
||||
parent,
|
||||
dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) });
|
||||
dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({})->output(0) });
|
||||
multiply->set_friendly_name("");
|
||||
copy_runtime_info(parent.get_node_shared_ptr(), multiply);
|
||||
parent = multiply->output(0);
|
||||
@ -650,11 +650,11 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
|
||||
std::shared_ptr<opset1::FakeQuantize> newFakeQuantize = std::make_shared<ngraph::op::TypeRelaxed<opset1::FakeQuantize>>(
|
||||
std::vector<ngraph::element::Type>{ element::f32, element::f32, element::f32, element::f32, element::f32 },
|
||||
std::vector<ngraph::element::Type>{},
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(1), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(2), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(3), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->get_input_node_shared_ptr(4), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->input_value(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->input_value(1), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->input_value(2), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->input_value(3), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->input_value(4), element::f32).get(),
|
||||
fakeQuantize->get_levels());
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newFakeQuantize, node->get_output_element_type(0));
|
||||
replace_node(node->shared_from_this(), newFakeQuantize);
|
||||
@ -889,14 +889,14 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::composeFakeQuantize(const s
|
||||
if (dequantization.subtract != nullptr) {
|
||||
const auto subtractValue = (dequantization.subtractConvert == nullptr) ?
|
||||
dequantization.subtractConstant :
|
||||
foldConvert(dequantization.subtractConstant, dequantization.subtractConvert->output(0).get_element_type());
|
||||
foldConvert(dequantization.subtractConstant->output(0), dequantization.subtractConvert->get_destination_type());
|
||||
|
||||
const std::shared_ptr<opset1::FakeQuantize> replacement = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
||||
newFakeQuantize->input_value(0),
|
||||
newFakeQuantize->input_value(1),
|
||||
newFakeQuantize->input_value(2),
|
||||
fold<opset1::Subtract>(newFakeQuantize->get_input_node_shared_ptr(3), subtractValue),
|
||||
fold<opset1::Subtract>(newFakeQuantize->get_input_node_shared_ptr(4), subtractValue),
|
||||
fold<opset1::Subtract>(newFakeQuantize->input_value(3), subtractValue),
|
||||
fold<opset1::Subtract>(newFakeQuantize->input_value(4), subtractValue),
|
||||
newFakeQuantize->get_levels(),
|
||||
newFakeQuantize->get_auto_broadcast());
|
||||
replace_node(dequantization.subtract, replacement);
|
||||
@ -907,11 +907,9 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::composeFakeQuantize(const s
|
||||
|
||||
if (dequantization.multiply != nullptr) {
|
||||
// multiply different precision constants (value1 & value2) and convert result to first argument precision (value1)
|
||||
auto multiply = [](
|
||||
const std::shared_ptr<ngraph::Node>& value1,
|
||||
const std::shared_ptr<ngraph::Node>& value2) -> std::shared_ptr<ngraph::Node> {
|
||||
const ngraph::element::Type precision1 = value1->output(0).get_element_type();
|
||||
const ngraph::element::Type precision2 = value2->output(0).get_element_type();
|
||||
auto multiply = [](const Output<Node>& value1, const Output<Node>& value2) {
|
||||
const ngraph::element::Type precision1 = value1.get_element_type();
|
||||
const ngraph::element::Type precision2 = value2.get_element_type();
|
||||
// 1) precision1 & precision2 are not equal but similar
|
||||
// 2) precision2 >= precision1
|
||||
assert((precision2.is_real() == precision1.is_real()) && (precision2.bitwidth() >= precision1.bitwidth()));
|
||||
@ -921,7 +919,7 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::composeFakeQuantize(const s
|
||||
value2);
|
||||
|
||||
if (output->output(0).get_element_type() != precision1) {
|
||||
output = foldConvert(output, precision1);
|
||||
output = foldConvert(output->output(0), precision1);
|
||||
}
|
||||
|
||||
return output;
|
||||
@ -931,8 +929,8 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::composeFakeQuantize(const s
|
||||
newFakeQuantize->input_value(0ul),
|
||||
newFakeQuantize->input_value(1ul),
|
||||
newFakeQuantize->input_value(2ul),
|
||||
multiply(newFakeQuantize->get_input_node_shared_ptr(3ul), dequantization.multiplyConstant),
|
||||
multiply(newFakeQuantize->get_input_node_shared_ptr(4ul), dequantization.multiplyConstant),
|
||||
multiply(newFakeQuantize->input_value(3ul), dequantization.multiplyConstant),
|
||||
multiply(newFakeQuantize->input_value(4ul), dequantization.multiplyConstant),
|
||||
newFakeQuantize->get_levels(),
|
||||
newFakeQuantize->get_auto_broadcast());
|
||||
|
||||
@ -956,8 +954,6 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
||||
const bool updatePrecision,
|
||||
const element::Type deqPrecision,
|
||||
const size_t outChannelsShapeIndex) {
|
||||
using std::make_shared;
|
||||
|
||||
const auto outputLow = fq->input_value(3);
|
||||
const auto outputHigh = fq->input_value(4);
|
||||
|
||||
@ -1015,8 +1011,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
||||
nullptr;
|
||||
std::shared_ptr<Node> scale = std::make_shared<opset1::Constant>(element::f32, outputLow.get_shape(), scales);
|
||||
|
||||
auto newMin = make_shared<opset1::Constant>(outputLow.get_element_type(), outputLow.get_shape(), minValues);
|
||||
auto newMax = make_shared<opset1::Constant>(outputLow.get_element_type(), outputLow.get_shape(), maxValues);
|
||||
auto newMin = std::make_shared<opset1::Constant>(outputLow.get_element_type(), outputLow.get_shape(), minValues);
|
||||
auto newMax = std::make_shared<opset1::Constant>(outputLow.get_element_type(), outputLow.get_shape(), maxValues);
|
||||
|
||||
if (isScalarLike(newMin)) {
|
||||
newMin = toScalar(newMin);
|
||||
@ -1072,7 +1068,7 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
||||
std::shared_ptr<opset1::Constant> newFqConstant = ov::as_type_ptr<opset1::Constant>(newFQ);
|
||||
|
||||
if (ov::is_type<opset1::Constant>(newFQ)) {
|
||||
convert = foldConvert(newFQ, precision);
|
||||
convert = foldConvert(newFQ->output(0), precision);
|
||||
} else if (ov::is_type<opset1::FakeQuantize>(newFQ)) {
|
||||
newFQ = setOutDataPrecision(ov::as_type_ptr<opset1::FakeQuantize>(newFQ), precision);
|
||||
convert = newFQ;
|
||||
@ -1192,11 +1188,9 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
|
||||
const bool hasZeroPoint,
|
||||
const bool updatePrecision,
|
||||
const element::Type deqPrecision) {
|
||||
using std::make_shared;
|
||||
|
||||
const ngraph::element::Type_t fqPrecision = fq->get_output_element_type(0);
|
||||
auto newMin = make_shared<opset1::Constant>(fqPrecision, Shape{}, min);
|
||||
auto newMax = make_shared<opset1::Constant>(fqPrecision, Shape{}, max);
|
||||
auto newMin = std::make_shared<opset1::Constant>(fqPrecision, Shape{}, min);
|
||||
auto newMax = std::make_shared<opset1::Constant>(fqPrecision, Shape{}, max);
|
||||
|
||||
auto outputLow = fq->input_value(3);
|
||||
auto outputHigh = fq->input_value(4);
|
||||
@ -1205,12 +1199,12 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
|
||||
|
||||
const std::shared_ptr<opset1::Constant> scale = ov::as_type_ptr<opset1::Constant>(foldConvert(fold<opset1::Divide>(
|
||||
fold<opset1::Subtract>(outputHigh, outputLow),
|
||||
fold<opset1::Subtract>(newMax, newMin)), deqPrecision));
|
||||
fold<opset1::Subtract>(newMax->output(0), newMin->output(0))), deqPrecision));
|
||||
assert(scale != nullptr);
|
||||
|
||||
std::shared_ptr<opset1::Constant> shift = hasZeroPoint ?
|
||||
ov::as_type_ptr<opset1::Constant>(foldConvert(fold<opset1::Divide>(
|
||||
fold<opset1::Subtract>(fold<opset1::Multiply>(newMin, outputHigh), fold<opset1::Multiply>(newMax, outputLow)),
|
||||
fold<opset1::Subtract>(fold<opset1::Multiply>(newMin->output(0), outputHigh), fold<opset1::Multiply>(newMax->output(0), outputLow)),
|
||||
fold<opset1::Subtract>(outputHigh, outputLow)), deqPrecision)) :
|
||||
nullptr;
|
||||
assert((!hasZeroPoint) || (hasZeroPoint && shift != nullptr));
|
||||
@ -1240,7 +1234,7 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Subtract> subtract;
|
||||
if (shift != nullptr) {
|
||||
subtract = make_shared<ngraph::op::TypeRelaxed<opset1::Subtract>>(parent, shift);
|
||||
subtract = std::make_shared<ngraph::op::TypeRelaxed<opset1::Subtract>>(parent, shift);
|
||||
subtract->set_output_type(0, deqPrecision, subtract->get_output_partial_shape(0));
|
||||
parent = subtract;
|
||||
} else {
|
||||
@ -1416,16 +1410,16 @@ FakeQuantizeDequantization NetworkHelper::normalizeDequantization(FakeQuantizeDe
|
||||
return dequantization;
|
||||
}
|
||||
if (dequantization.multiply != nullptr && ov::as_type_ptr<ngraph::opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(0))) {
|
||||
std::shared_ptr<Node> leftParent = dequantization.multiply->get_input_node_shared_ptr(0);
|
||||
std::shared_ptr<Node> rightParent = dequantization.multiply->get_input_node_shared_ptr(1);
|
||||
const auto leftParent = dequantization.multiply->input_value(0);
|
||||
const auto rightParent = dequantization.multiply->input_value(1);
|
||||
std::shared_ptr<opset1::Multiply> normalized_multiply = ov::as_type_ptr<opset1::Multiply>(
|
||||
dequantization.multiply->clone_with_new_inputs({rightParent, leftParent}));
|
||||
replace_node(dequantization.multiply, normalized_multiply);
|
||||
dequantization.multiply = normalized_multiply;
|
||||
}
|
||||
if (dequantization.subtract != nullptr && ov::as_type_ptr<ngraph::opset1::Constant>(dequantization.subtract->get_input_node_shared_ptr(0))) {
|
||||
std::shared_ptr<Node> leftParent = dequantization.subtract->get_input_node_shared_ptr(0);
|
||||
std::shared_ptr<Node> rightParent = dequantization.subtract->get_input_node_shared_ptr(1);
|
||||
const auto leftParent = dequantization.subtract->input_value(0);
|
||||
const auto rightParent = dequantization.subtract->input_value(1);
|
||||
std::shared_ptr<opset1::Subtract> normalized_subtract = ov::as_type_ptr<opset1::Subtract>(
|
||||
dequantization.subtract->clone_with_new_inputs({rightParent, leftParent}));
|
||||
replace_node(dequantization.subtract, normalized_subtract);
|
||||
@ -1452,7 +1446,7 @@ std::shared_ptr<opset1::Constant> NetworkHelper::normalizeDequantizationShape(co
|
||||
std::iota(unsqueezeConstantShape.begin(), unsqueezeConstantShape.end(), 0ul);
|
||||
|
||||
const auto newConstant = fold<opset1::Unsqueeze>(
|
||||
constant,
|
||||
constant->output(0),
|
||||
op::Constant::create(element::i32, Shape{ unsqueezeConstantShape.size() }, unsqueezeConstantShape));
|
||||
|
||||
return ov::as_type_ptr<opset1::Constant>(newConstant);
|
||||
@ -1471,13 +1465,13 @@ std::shared_ptr<opset1::Constant> NetworkHelper::normalizeDequantizationShape(co
|
||||
FakeQuantizeDequantizationValues NetworkHelper::createEmptyValues(const FakeQuantizeDequantization& dequantization, const element::Type precision) {
|
||||
const std::shared_ptr<Node> multiplyConstant = dequantization.multiply ?
|
||||
dequantization.multiplyConstant->get_element_type() != precision ?
|
||||
foldConvert(dequantization.multiplyConstant, precision) :
|
||||
foldConvert(dequantization.multiplyConstant->output(0), precision) :
|
||||
dequantization.multiplyConstant :
|
||||
std::make_shared<opset1::Constant>(precision, Shape({}), std::vector<float>({ 1.f }));
|
||||
|
||||
const std::shared_ptr<Node> subtractConstant = dequantization.subtract ?
|
||||
dequantization.subtractConstant->get_element_type() != precision ?
|
||||
foldConvert(dequantization.subtractConstant, precision) :
|
||||
foldConvert(dequantization.subtractConstant->output(0), precision) :
|
||||
dequantization.subtractConstant :
|
||||
std::make_shared<opset1::Constant>(precision, Shape({}), std::vector<float>({ 0.f }));
|
||||
|
||||
@ -1538,7 +1532,7 @@ std::shared_ptr<Node> NetworkHelper::optimizeSubtract(std::shared_ptr<opset1::Su
|
||||
NetworkHelper::copyInfo(shift, roundedShift);
|
||||
|
||||
// Propagate convertInputType down
|
||||
replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, roundedShift);
|
||||
replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, roundedShift->output(0));
|
||||
NetworkHelper::copyInfo(subtract, replacement);
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, convertOutputType);
|
||||
replace_node(subtract, replacement);
|
||||
@ -1546,7 +1540,7 @@ std::shared_ptr<Node> NetworkHelper::optimizeSubtract(std::shared_ptr<opset1::Su
|
||||
|
||||
return replacement;
|
||||
} else if (ov::is_type<opset1::Convert>(subtractParent) && ov::is_type<opset1::Constant>(subtractParent->get_input_node_shared_ptr(0))) {
|
||||
auto replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, subtractParent->get_input_node_shared_ptr(0));
|
||||
auto replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, subtractParent->input_value(0));
|
||||
NetworkHelper::copyInfo(subtract, replacement);
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, convertOutputType);
|
||||
replace_node(subtract, replacement);
|
||||
@ -1569,11 +1563,9 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
||||
(NetworkHelper::getDequantization(operation).multiplyConstant == nullptr) ||
|
||||
(NetworkHelper::getDequantization(operation).multiplyConstant.get() == dequantization.multiplyConstant.get()));
|
||||
|
||||
std::vector<Output<Node>> inputs(operation->get_input_size());
|
||||
for (size_t i = 0; i < operation->get_input_size(); ++i) {
|
||||
inputs[i] = operation->get_input_node_shared_ptr(i);
|
||||
}
|
||||
assert(operation->get_output_size() == 1);
|
||||
|
||||
OutputVector inputs = operation->input_values();
|
||||
const size_t dequantizationIndex = getChildInputIndex(dequantization.multiply, operation);
|
||||
inputs[dequantizationIndex] = moveSubtract ?
|
||||
dequantization.data :
|
||||
@ -1623,7 +1615,7 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
||||
ngraph::op::TemporaryReplaceOutputType(
|
||||
dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ?
|
||||
dequantization.subtractConstant :
|
||||
foldConvert(dequantization.subtractConstant, parentPrecision), element::f32).get());
|
||||
foldConvert(dequantization.subtractConstant->output(0), parentPrecision), element::f32).get());
|
||||
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
||||
} else {
|
||||
parent = std::make_shared<opset1::Subtract>(parent, dequantization.subtractConvert);
|
||||
|
@ -30,15 +30,15 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& reshap
|
||||
assert(ov::is_type<opset1::Constant>(elementwiseValues));
|
||||
|
||||
const std::shared_ptr<opset1::Reshape> newReshape = ov::as_type_ptr<opset1::Reshape>(reshape->clone_with_new_inputs({
|
||||
elementwise->get_input_node_shared_ptr(0ul),
|
||||
elementwise->input_value(0),
|
||||
reshapeValues }));
|
||||
|
||||
std::shared_ptr<Node> newElementwiseValues;
|
||||
|
||||
const Shape elementwiseValuesShape = elementwiseValues->output(0).get_shape();
|
||||
const Shape elementwiseValuesShape = elementwiseValues->get_output_shape(0);
|
||||
if (!elementwiseValuesShape.empty() && (elementwiseValuesShape.size() != 1ul)) {
|
||||
// update shape constant value to avoid eltwise constan value broadcasting
|
||||
const Shape elementwiseShape = elementwise->output(0).get_shape();
|
||||
const Shape elementwiseShape = elementwise->get_output_shape(0);
|
||||
const std::vector<size_t> reshapeValuesVector = ov::as_type_ptr<opset1::Constant>(reshapeValues)->cast_vector<size_t>();
|
||||
|
||||
const std::vector<size_t> newReshapeValuesVector = ngraph::pass::low_precision::NetworkHelper::updateReshapeValues(
|
||||
@ -47,13 +47,13 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& reshap
|
||||
reshapeValuesVector);
|
||||
|
||||
const auto newReshapeValues = std::make_shared<opset1::Constant>(
|
||||
reshapeValues->output(0).get_element_type(),
|
||||
reshapeValues->get_output_element_type(0),
|
||||
Shape{ newReshapeValuesVector.size() },
|
||||
newReshapeValuesVector);
|
||||
|
||||
newElementwiseValues = ngraph::pass::low_precision::fold_reshape<opset1::Reshape>(
|
||||
elementwiseValues->output(0),
|
||||
newReshapeValues->output(0),
|
||||
elementwiseValues,
|
||||
newReshapeValues,
|
||||
ov::as_type_ptr<opset1::Reshape>(reshape)->get_special_zero());
|
||||
assert(ov::is_type<opset1::Constant>(newElementwiseValues));
|
||||
} else {
|
||||
@ -71,7 +71,7 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& reshap
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> moveThroughConvert(const std::shared_ptr<Node>& reshape, const std::shared_ptr<Node>& convert) {
|
||||
const auto newReshape = reshape->clone_with_new_inputs({ convert->get_input_node_shared_ptr(0), reshape->get_input_node_shared_ptr(1) });
|
||||
const auto newReshape = reshape->clone_with_new_inputs({ convert->input_value(0), reshape->input_value(1) });
|
||||
const auto newConvert = convert->clone_with_new_inputs({ newReshape });
|
||||
replace_node(reshape, newConvert);
|
||||
copy_runtime_info({ convert, reshape }, { newReshape, newConvert });
|
||||
@ -81,7 +81,7 @@ std::shared_ptr<Node> moveThroughConvert(const std::shared_ptr<Node>& reshape, c
|
||||
|
||||
void fuseConstant(const std::shared_ptr<Node>& reshape, const std::shared_ptr<Node>& constant) {
|
||||
ngraph::OutputVector result(1);
|
||||
reshape->constant_fold(result, { constant->output(0), reshape->get_input_node_ptr(1)->output(0) });
|
||||
reshape->constant_fold(result, { constant, reshape->input_value(1) });
|
||||
const auto newConstant = result[0].get_node_shared_ptr();
|
||||
replace_node(reshape, newConstant);
|
||||
copy_runtime_info({ constant, reshape }, newConstant);
|
||||
|
@ -30,8 +30,8 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
|
||||
elementwiseValuesConvert->get_input_node_shared_ptr(0ul);
|
||||
assert(ov::is_type<opset1::Constant>(elementwiseValues));
|
||||
|
||||
const auto transposeValuesShape = transposeValues->output(0).get_shape();
|
||||
const auto elementwiseValuesShape = elementwiseValues->output(0).get_shape();
|
||||
const auto transposeValuesShape = transposeValues->get_output_shape(0);
|
||||
const auto elementwiseValuesShape = elementwiseValues->get_output_shape(0);
|
||||
if (elementwiseValuesShape.size() != shape_size(transposeValuesShape)) {
|
||||
if (shape_size(elementwiseValuesShape) != 1ul) {
|
||||
return nullptr;
|
||||
@ -51,8 +51,8 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
|
||||
transposeValues }));
|
||||
|
||||
const auto newElementwiseValues = ngraph::pass::low_precision::fold<opset1::Transpose>(
|
||||
elementwiseValues->output(0),
|
||||
transposeValues->output(0));
|
||||
elementwiseValues,
|
||||
transposeValues);
|
||||
assert(ov::is_type<opset1::Constant>(newElementwiseValues));
|
||||
|
||||
const auto newElementwise = elementwise->clone_with_new_inputs({
|
||||
@ -68,7 +68,7 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> moveThroughConvert(const std::shared_ptr<Node>& transpose, const std::shared_ptr<Node>& convert) {
|
||||
const auto newTranspose = transpose->clone_with_new_inputs({convert->get_input_node_shared_ptr(0), transpose->get_input_node_ptr(1)->output(0) });
|
||||
const auto newTranspose = transpose->clone_with_new_inputs({convert->input_value(0), transpose->input_value(1) });
|
||||
const auto newConvert = convert->clone_with_new_inputs({ newTranspose });
|
||||
replace_node(transpose, newConvert);
|
||||
copy_runtime_info({ convert, transpose }, { newTranspose, newConvert });
|
||||
@ -78,8 +78,8 @@ std::shared_ptr<Node> moveThroughConvert(const std::shared_ptr<Node>& transpose,
|
||||
|
||||
void fuseConstant(const std::shared_ptr<Node>& transpose, const std::shared_ptr<Node>& constant) {
|
||||
const auto newConstant = ngraph::pass::low_precision::fold<opset1::Transpose>(
|
||||
constant->output(0),
|
||||
transpose->get_input_node_ptr(1)->output(0));
|
||||
constant,
|
||||
transpose->input_value(1));
|
||||
|
||||
replace_node(transpose, newConstant);
|
||||
copy_runtime_info({ constant, transpose }, newConstant);
|
||||
|
@ -63,7 +63,7 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
|
||||
}
|
||||
}
|
||||
|
||||
const auto reshapeOutputPShape = reshape->output(0).get_partial_shape();
|
||||
const auto reshapeOutputPShape = reshape->get_output_partial_shape(0);
|
||||
const auto reshapeOutputRank = reshapeOutputPShape.rank();
|
||||
assert(reshapeOutputRank.is_static());
|
||||
assert(reshapeOutputRank.get_length() >= 2);
|
||||
|
@ -52,7 +52,7 @@ std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>> Va
|
||||
|
||||
FakeQuantizeDequantization dequantization;
|
||||
{
|
||||
const auto targetInputs = node->output(0).get_target_inputs();
|
||||
const auto targetInputs = node->get_output_target_inputs(0);
|
||||
if (targetInputs.size() == 1ul) {
|
||||
dequantization = NetworkHelper::getDequantizationBelow(node, true);
|
||||
}
|
||||
@ -75,7 +75,7 @@ std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>> Va
|
||||
auto multiplyResult = dequantization.multiplyConstant == nullptr ?
|
||||
node->get_input_node_ptr(3)->shared_from_this() :
|
||||
fold<opset1::Multiply>(
|
||||
foldConvert(node->get_input_node_ptr(3)->shared_from_this(), params.deqPrecision),
|
||||
foldConvert(node->input_value(3), params.deqPrecision),
|
||||
dequantization.multiplyConstant);
|
||||
|
||||
auto multiplyResultConstant = ov::as_type_ptr<opset1::Constant>(multiplyResult);
|
||||
@ -87,7 +87,7 @@ std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>> Va
|
||||
auto multiplyResult = dequantization.multiplyConstant == nullptr ?
|
||||
node->get_input_node_ptr(4)->shared_from_this() :
|
||||
fold<opset1::Multiply>(
|
||||
foldConvert(node->get_input_node_ptr(4)->shared_from_this(), params.deqPrecision),
|
||||
foldConvert(node->input_value(4), params.deqPrecision),
|
||||
dequantization.multiplyConstant);
|
||||
|
||||
auto multiplyResultConstant = ov::as_type_ptr<opset1::Constant>(multiplyResult);
|
||||
|
@ -1,16 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
using namespace ngraph;
|
@ -47,7 +47,7 @@ bool SqueezeTransformation::transform(TransformationContext& context, ngraph::pa
|
||||
return NetworkHelper::toScalar(dequantizationOpConstant);
|
||||
}
|
||||
if (constantShape.size() == inputRankValue) {
|
||||
return ov::as_type_ptr<opset1::Constant>(fold<opset1::Squeeze>(dequantizationOpConstant, squeeze->get_input_node_shared_ptr(1)));
|
||||
return ov::as_type_ptr<opset1::Constant>(fold<opset1::Squeeze>(dequantizationOpConstant, squeeze->input_value(1)));
|
||||
}
|
||||
|
||||
return dequantizationOpConstant;
|
||||
|
@ -62,9 +62,9 @@ std::shared_ptr<opset1::Constant> stridedSliceDeqConstant(
|
||||
|
||||
const auto result = fold<ngraph::opset1::StridedSlice>(
|
||||
constant,
|
||||
stridedSlice->get_input_node_shared_ptr(1),
|
||||
stridedSlice->get_input_node_shared_ptr(2),
|
||||
stridedSlice->get_input_node_shared_ptr(3),
|
||||
stridedSlice->input_value(1),
|
||||
stridedSlice->input_value(2),
|
||||
stridedSlice->input_value(3),
|
||||
beginMask,
|
||||
endMask,
|
||||
stridedSlice->get_new_axis_mask(),
|
||||
|
@ -55,10 +55,10 @@ bool SubtractTransformation::transform(TransformationContext& context, ngraph::p
|
||||
// X * SC - SH = X * SC - SH' * SC
|
||||
// SH' = SH / SC
|
||||
std::shared_ptr<opset1::Subtract> newSubtract = ov::as_type_ptr<opset1::Subtract>(subtract->copy_with_new_inputs({
|
||||
dequantization.multiply->get_input_node_shared_ptr(0),
|
||||
dequantization.multiply->input_value(0),
|
||||
ngraph::pass::low_precision::fold<ngraph::opset1::Divide>(
|
||||
subtract->get_input_node_shared_ptr(1),
|
||||
dequantization.multiply->get_input_node_shared_ptr(1))
|
||||
subtract->input_value(1),
|
||||
dequantization.multiply->input_value(1))
|
||||
}));
|
||||
|
||||
std::shared_ptr<Node> newMultiply = dequantization.multiply->copy_with_new_inputs({
|
||||
@ -72,8 +72,8 @@ bool SubtractTransformation::transform(TransformationContext& context, ngraph::p
|
||||
|
||||
if (dequantization.subtract != nullptr) {
|
||||
std::shared_ptr<opset1::Subtract> newSubtract = ov::as_type_ptr<opset1::Subtract>(subtract->copy_with_new_inputs({
|
||||
dequantization.subtract->get_input_node_shared_ptr(0),
|
||||
fold<ngraph::opset1::Add>(subtract->get_input_node_shared_ptr(1), dequantization.subtractConstant)
|
||||
dequantization.subtract->input_value(0),
|
||||
fold<ngraph::opset1::Add>(subtract->input_value(1), dequantization.subtractConstant)
|
||||
}));
|
||||
|
||||
replace_node(subtract, newSubtract);
|
||||
@ -86,8 +86,8 @@ bool SubtractTransformation::transform(TransformationContext& context, ngraph::p
|
||||
subtract->set_output_type(0, originalPrecision, subtract->get_output_partial_shape(0));
|
||||
|
||||
replace_node(subtract, std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
|
||||
subtract->get_input_node_shared_ptr(0),
|
||||
subtract->get_input_node_shared_ptr(1)));
|
||||
subtract->input_value(0),
|
||||
subtract->input_value(1)));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -4,9 +4,7 @@
|
||||
|
||||
#include "low_precision/transparent_base_transformation.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "low_precision/network_helper.hpp"
|
||||
@ -16,27 +14,20 @@ using namespace ngraph::pass;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
bool TransparentBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||
auto operation = m.get_match_root();
|
||||
const std::shared_ptr<Node> dequantization = operation->input_value(0).get_node_shared_ptr();
|
||||
// const std::shared_ptr<Node> dequantizationParent = dequantization->input_value(0).get_node_shared_ptr();
|
||||
std::shared_ptr<Node> op = m.get_match_root();
|
||||
if (!canBeTransformed(context, op)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// auto newOperation = operation->copy_with_new_inputs({ dequantizationParent });
|
||||
// const auto newDequantization = dequantization->copy_with_new_inputs({
|
||||
// newOperation,
|
||||
// dequantization->input_value(1),
|
||||
// dequantization->input_value(2) });
|
||||
|
||||
// const std::string friendlyName = operation->get_friendly_name();
|
||||
//// TODO: new operation name has to be unique
|
||||
// newOperation->set_friendly_name(friendlyName + "_original");
|
||||
// newDequantization->set_friendly_name(friendlyName);
|
||||
|
||||
// replace_node(operation, newDequantization);
|
||||
|
||||
// NetworkHelper::moveDequantization(operation, dequantization);
|
||||
op = NetworkHelper::separateInStandaloneBranch(op);
|
||||
moveDequantizationAfter(context, op, NetworkHelper::getDequantization(op), true);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TransparentBaseTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TransparentBaseTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
|
||||
return true;
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ bool UnsqueezeTransformation::transform(TransformationContext& context, ngraph::
|
||||
}
|
||||
|
||||
if (constantShape.size() == inputRankValue) {
|
||||
return ov::as_type_ptr<opset1::Constant>(fold<opset1::Unsqueeze>(dequantizationOpConstant, unsqueeze->get_input_node_shared_ptr(1)));
|
||||
return ov::as_type_ptr<opset1::Constant>(fold<opset1::Unsqueeze>(dequantizationOpConstant, unsqueeze->input_value(1)));
|
||||
}
|
||||
|
||||
return dequantizationOpConstant;
|
||||
|
@ -50,8 +50,10 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*smoke_MemoryTest.*iteration_count=3.*IS=\(1.10\).*)",
|
||||
R"(.*smoke_MemoryTest.*iteration_count=4.*IS=\(1.10\).*)",
|
||||
R"(.*smoke_MemoryTest.*iteration_count=10.*IS=\(1.10\).*)",
|
||||
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=10.*IS=\(1.2\).*)",
|
||||
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=10.*IS=\(1.2\).*)",/**/
|
||||
// CVS-58963: Not implemented yet
|
||||
R"(.*Behavior.*InferRequest.*OutOfFirstOutIsInputForSecondNetwork.*)",
|
||||
// TODO: Issue: 29577
|
||||
R"(.*QueryNetwork.*)",
|
||||
};
|
||||
}
|
||||
|
@ -20,4 +20,16 @@ TEST_P(ActivationDynamicLayerTest, CompareWithRefs) {
|
||||
Run();
|
||||
}
|
||||
|
||||
TEST_P(ActivationLayerTest, QueryNetwork) {
|
||||
QueryNetwork();
|
||||
}
|
||||
|
||||
TEST_P(ActivationParamLayerTest, QueryNetwork) {
|
||||
QueryNetwork();
|
||||
}
|
||||
|
||||
TEST_P(ActivationDynamicLayerTest, QueryNetwork) {
|
||||
QueryNetwork();
|
||||
}
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
@ -12,4 +12,8 @@ TEST_P(SplitConvConcat, CompareWithRefImpl) {
|
||||
Run();
|
||||
};
|
||||
|
||||
TEST_P(SplitConvConcat, QueryNetwork) {
|
||||
QueryNetwork();
|
||||
}
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -57,6 +57,8 @@ public:
|
||||
|
||||
virtual void Serialize();
|
||||
|
||||
virtual void QueryNetwork();
|
||||
|
||||
static void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> &expected,
|
||||
const std::vector<InferenceEngine::Blob::Ptr> &actual,
|
||||
float threshold);
|
||||
|
@ -94,6 +94,23 @@ void LayerTestsCommon::Serialize() {
|
||||
CommonTestUtils::removeIRFiles(out_xml_path, out_bin_path);
|
||||
}
|
||||
|
||||
void LayerTestsCommon::QueryNetwork() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED();
|
||||
cnnNetwork = InferenceEngine::CNNNetwork(function);
|
||||
|
||||
auto queryNetworkResult = PluginCache::get().ie()->QueryNetwork(cnnNetwork, targetDevice);
|
||||
std::set<std::string> expected;
|
||||
for (auto&& node : function->get_ops()) {
|
||||
expected.insert(node->get_friendly_name());
|
||||
}
|
||||
|
||||
std::set<std::string> actual;
|
||||
for (auto&& res : queryNetworkResult.supportedLayersMap) {
|
||||
actual.insert(res.first);
|
||||
}
|
||||
ASSERT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr LayerTestsCommon::GenerateInput(const InferenceEngine::InputInfo& info) const {
|
||||
return FuncTestUtils::createAndFillBlob(targetStaticShape.empty() || targetStaticShape[0].empty() ?
|
||||
info.getTensorDesc() :
|
||||
|
@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <ngraph/runtime/host_tensor.hpp>
|
||||
#include <vector>
|
||||
|
||||
#include "ngraph/node.hpp"
|
||||
#include "ngraph/op/util/op_types.hpp"
|
||||
#include "ngraph/ops.hpp"
|
||||
#include "ngraph/shape_util.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace runtime {
|
||||
namespace reference {
|
||||
void experimental_detectron_roi_feature_extractor(
|
||||
const std::vector<std::vector<float>>& inputs,
|
||||
const std::vector<Shape>& input_shapes,
|
||||
const op::v6::ExperimentalDetectronROIFeatureExtractor::Attributes& attrs,
|
||||
float* output_rois_features,
|
||||
float* output_rois);
|
||||
|
||||
void experimental_detectron_roi_feature_extractor_postprocessing(void* prois_features,
|
||||
void* prois,
|
||||
const ngraph::element::Type output_type,
|
||||
const std::vector<float>& output_roi_features,
|
||||
const std::vector<float>& output_rois,
|
||||
const Shape& output_roi_features_shape,
|
||||
const Shape& output_rois_shape);
|
||||
} // namespace reference
|
||||
} // namespace runtime
|
||||
} // namespace ngraph
|
@ -0,0 +1,387 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph/runtime/reference/experimental_detectron_roi_feature_extractor.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <numeric>
|
||||
|
||||
#include "ngraph/op/experimental_detectron_roi_feature.hpp"
|
||||
#include "ngraph/shape.hpp"
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
# if defined(__linux__) && defined(__i386__) && (__GNUC__ == 7 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0)
|
||||
# define NEED_FIX 1
|
||||
# else
|
||||
# define NEED_FIX 0
|
||||
# endif
|
||||
#else
|
||||
# define NEED_FIX 0
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
constexpr int64_t input_rois_port = 0;
|
||||
constexpr int64_t input_features_start_port = 1;
|
||||
|
||||
void redistribute_rois(const std::vector<float>& rois, std::vector<int64_t>& level_ids, const int64_t levels_num) {
|
||||
const float canonical_scale = 224.0f;
|
||||
const int64_t canonical_level = 2;
|
||||
const size_t num_rois = level_ids.size();
|
||||
|
||||
for (size_t i = 0; i < num_rois; ++i) {
|
||||
const float x0 = rois[4 * i + 0];
|
||||
const float y0 = rois[4 * i + 1];
|
||||
const float x1 = rois[4 * i + 2];
|
||||
const float y1 = rois[4 * i + 3];
|
||||
|
||||
int64_t target_level = levels_num;
|
||||
float area = (x1 - x0) * (y1 - y0);
|
||||
if (area > 0) {
|
||||
area = std::sqrt(area) / canonical_scale;
|
||||
area = std::log2(area + 1e-6f);
|
||||
target_level = static_cast<int64_t>(std::floor(area + canonical_level));
|
||||
target_level = std::max(static_cast<int64_t>(0), std::min(levels_num - 1, target_level));
|
||||
}
|
||||
|
||||
level_ids[i] = target_level;
|
||||
}
|
||||
}
|
||||
|
||||
void reord(const std::vector<float>& src_data,
|
||||
const std::vector<int64_t>& ranks,
|
||||
const int64_t step,
|
||||
float* dst_data,
|
||||
std::vector<int64_t>& dst_mapping) {
|
||||
int64_t n = static_cast<int64_t>(ranks.size());
|
||||
|
||||
std::iota(dst_mapping.begin(), dst_mapping.end(), 0);
|
||||
std::sort(dst_mapping.begin(), dst_mapping.end(), [&ranks](int64_t i1, int64_t i2) {
|
||||
return ranks[i1] < ranks[i2];
|
||||
});
|
||||
for (int64_t i = 0; i < n; ++i) {
|
||||
const int64_t j = dst_mapping[i];
|
||||
memcpy(dst_data + i * step, src_data.data() + j * step, sizeof(float) * step);
|
||||
}
|
||||
}
|
||||
|
||||
void split_points(const std::vector<int64_t>& ids, std::vector<int64_t>& rois_per_level, const int64_t levels_num) {
|
||||
rois_per_level.clear();
|
||||
rois_per_level.resize(levels_num, 0);
|
||||
for (size_t i = 0; i < ids.size(); ++i) {
|
||||
rois_per_level[ids[i]]++;
|
||||
}
|
||||
for (int64_t i = 1; i < levels_num; ++i) {
|
||||
rois_per_level[i] += rois_per_level[i - 1];
|
||||
}
|
||||
rois_per_level.insert(rois_per_level.begin(), 0);
|
||||
}
|
||||
|
||||
// implementation taken from Caffe2
|
||||
template <typename T>
|
||||
struct PreCalc {
|
||||
int64_t pos1;
|
||||
int64_t pos2;
|
||||
int64_t pos3;
|
||||
int64_t pos4;
|
||||
T w1;
|
||||
T w2;
|
||||
T w3;
|
||||
T w4;
|
||||
};
|
||||
|
||||
// The function pre_calc_for_bilinear_interpolate() gives incorrect results for -O3 optimization level, when IE
|
||||
// is compiled using GCC 7.5.0 on Ubuntu 18.04 32-bit. But results are correct, for example, if we use Clang 10.0
|
||||
// on Ubuntu 18.04 32-bit with -O3 optimization level. Next, the function pre_calc_for_bilinear_interpolate()
|
||||
// gives incorrect results after compiling by GCC 7.5.0 or Clang 10 in Ubuntu 18.04 32-bit, if the optimization
|
||||
// level is -O1 or -O2. Finally, the function gives correct result in Ubuntu 18.04 32-bit, if the optimization
|
||||
// level is -O0.
|
||||
#if NEED_FIX
|
||||
# pragma GCC push_options
|
||||
# pragma GCC optimize("-O0")
|
||||
#endif
|
||||
template <typename T>
|
||||
void pre_calc_for_bilinear_interpolate(const int64_t height,
|
||||
const int64_t width,
|
||||
const int64_t pooled_height,
|
||||
const int64_t pooled_width,
|
||||
const int64_t iy_upper,
|
||||
const int64_t ix_upper,
|
||||
T roi_start_h,
|
||||
T roi_start_w,
|
||||
T bin_size_h,
|
||||
T bin_size_w,
|
||||
int64_t roi_bin_grid_h,
|
||||
int64_t roi_bin_grid_w,
|
||||
std::vector<PreCalc<T>>& pre_calc) {
|
||||
int64_t pre_calc_index = 0;
|
||||
for (int64_t ph = 0; ph < pooled_height; ph++) {
|
||||
for (int64_t pw = 0; pw < pooled_width; pw++) {
|
||||
for (int64_t iy = 0; iy < iy_upper; iy++) {
|
||||
for (int64_t ix = 0; ix < ix_upper; ix++) {
|
||||
T y = roi_start_h + static_cast<T>(ph) * bin_size_h +
|
||||
(static_cast<T>(iy) + static_cast<T>(0.5f)) * bin_size_h / static_cast<T>(roi_bin_grid_h);
|
||||
T x = roi_start_w + static_cast<T>(pw) * bin_size_w +
|
||||
(static_cast<T>(ix) + static_cast<T>(0.5f)) * bin_size_w / static_cast<T>(roi_bin_grid_w);
|
||||
|
||||
// deal with: inverse elements are out of feature map boundary
|
||||
if (y < static_cast<T>(-1.0f) || y > static_cast<T>(height) || x < static_cast<T>(-1.0f) ||
|
||||
x > static_cast<T>(width)) {
|
||||
// empty
|
||||
pre_calc_index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
y = std::max(y, static_cast<T>(0.0f));
|
||||
x = std::max(x, static_cast<T>(0.0f));
|
||||
|
||||
int64_t y_low = static_cast<int64_t>(y);
|
||||
int64_t x_low = static_cast<int64_t>(x);
|
||||
int64_t y_high = 0;
|
||||
int64_t x_high = 0;
|
||||
|
||||
if (y_low >= height - 1) {
|
||||
y_high = y_low = height - 1;
|
||||
y = static_cast<T>(y_low);
|
||||
} else {
|
||||
y_high = y_low + 1;
|
||||
}
|
||||
|
||||
if (x_low >= width - 1) {
|
||||
x_high = x_low = width - 1;
|
||||
x = static_cast<T>(x_low);
|
||||
} else {
|
||||
x_high = x_low + 1;
|
||||
}
|
||||
|
||||
T ly = y - y_low;
|
||||
T lx = x - x_low;
|
||||
T hy = static_cast<T>(1.0) - ly;
|
||||
T hx = static_cast<T>(1.0) - lx;
|
||||
|
||||
// save weights and indeces
|
||||
PreCalc<T> pc;
|
||||
pc.pos1 = y_low * width + x_low;
|
||||
pc.pos2 = y_low * width + x_high;
|
||||
pc.pos3 = y_high * width + x_low;
|
||||
pc.pos4 = y_high * width + x_high;
|
||||
pc.w1 = hy * hx;
|
||||
pc.w2 = hy * lx;
|
||||
pc.w3 = ly * hx;
|
||||
pc.w4 = ly * lx;
|
||||
pre_calc.at(pre_calc_index) = pc;
|
||||
|
||||
pre_calc_index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#if NEED_FIX
|
||||
# pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignForward(const int64_t nthreads,
|
||||
const T* bottom_data,
|
||||
const T& spatial_scale,
|
||||
const int64_t channels,
|
||||
const int64_t height,
|
||||
const int64_t width,
|
||||
const int64_t pooled_height,
|
||||
const int64_t pooled_width,
|
||||
const int64_t sampling_ratio,
|
||||
const T* bottom_rois,
|
||||
const bool aligned,
|
||||
T* top_data) {
|
||||
int64_t roi_cols = 4;
|
||||
|
||||
int64_t n_rois = nthreads / channels / pooled_width / pooled_height;
|
||||
// (n, c, ph, pw) is an element in the pooled output
|
||||
for (int64_t n = 0; n < n_rois; ++n) {
|
||||
int64_t index_n = n * channels * pooled_width * pooled_height;
|
||||
|
||||
// roi could have 4 or 5 columns
|
||||
const T* offset_bottom_rois = bottom_rois + n * roi_cols;
|
||||
int64_t roi_batch_ind = 0;
|
||||
if (roi_cols == 5) {
|
||||
roi_batch_ind = static_cast<int64_t>(offset_bottom_rois[0]);
|
||||
offset_bottom_rois++;
|
||||
}
|
||||
|
||||
T offset = aligned ? static_cast<T>(0.5) : static_cast<T>(0.0);
|
||||
// Do not use rounding; this implementation detail is critical
|
||||
T roi_start_w = offset_bottom_rois[0] * spatial_scale - offset;
|
||||
T roi_start_h = offset_bottom_rois[1] * spatial_scale - offset;
|
||||
T roi_end_w = offset_bottom_rois[2] * spatial_scale - offset;
|
||||
T roi_end_h = offset_bottom_rois[3] * spatial_scale - offset;
|
||||
|
||||
// Force malformed ROIs to be 1x1
|
||||
T roi_width = std::max(roi_end_w - roi_start_w, static_cast<T>(1.0));
|
||||
T roi_height = std::max(roi_end_h - roi_start_h, static_cast<T>(1.0));
|
||||
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
|
||||
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
|
||||
|
||||
// We use roi_bin_grid to sample the grid and mimic integral
|
||||
int64_t roi_bin_grid_h =
|
||||
(sampling_ratio > 0) ? sampling_ratio : static_cast<int64_t>(std::ceil(roi_height / pooled_height));
|
||||
int64_t roi_bin_grid_w =
|
||||
(sampling_ratio > 0) ? sampling_ratio : static_cast<int64_t>(std::ceil(roi_width / pooled_width));
|
||||
|
||||
// We do average (integral) pooling inside a bin
|
||||
const T count = static_cast<T>(roi_bin_grid_h * roi_bin_grid_w);
|
||||
|
||||
// we want to precalculate indices and weights shared by all channels,
|
||||
// this is the key point of optimization
|
||||
std::vector<PreCalc<T>> pre_calc(roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
|
||||
|
||||
pre_calc_for_bilinear_interpolate<T>(height,
|
||||
width,
|
||||
pooled_height,
|
||||
pooled_width,
|
||||
roi_bin_grid_h,
|
||||
roi_bin_grid_w,
|
||||
roi_start_h,
|
||||
roi_start_w,
|
||||
bin_size_h,
|
||||
bin_size_w,
|
||||
roi_bin_grid_h,
|
||||
roi_bin_grid_w,
|
||||
pre_calc);
|
||||
|
||||
for (int64_t c = 0; c < channels; c++) {
|
||||
int64_t index_n_c = index_n + c * pooled_width * pooled_height;
|
||||
const T* offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width;
|
||||
int64_t pre_calc_index = 0;
|
||||
|
||||
for (int64_t ph = 0; ph < pooled_height; ph++) {
|
||||
for (int64_t pw = 0; pw < pooled_width; pw++) {
|
||||
int64_t index = index_n_c + ph * pooled_width + pw;
|
||||
T output_val = 0.;
|
||||
for (int64_t iy = 0; iy < roi_bin_grid_h; iy++) {
|
||||
for (int64_t ix = 0; ix < roi_bin_grid_w; ix++) {
|
||||
PreCalc<T> pc = pre_calc[pre_calc_index];
|
||||
output_val += pc.w1 * offset_bottom_data[pc.pos1] + pc.w2 * offset_bottom_data[pc.pos2] +
|
||||
pc.w3 * offset_bottom_data[pc.pos3] + pc.w4 * offset_bottom_data[pc.pos4];
|
||||
|
||||
pre_calc_index += 1;
|
||||
}
|
||||
}
|
||||
output_val /= count;
|
||||
|
||||
top_data[index] = output_val;
|
||||
} // for pw
|
||||
} // for ph
|
||||
} // for c
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace ngraph {
|
||||
namespace runtime {
|
||||
namespace reference {
|
||||
void experimental_detectron_roi_feature_extractor(
|
||||
const std::vector<std::vector<float>>& inputs,
|
||||
const std::vector<Shape>& input_shapes,
|
||||
const op::v6::ExperimentalDetectronROIFeatureExtractor::Attributes& attrs,
|
||||
float* output_rois_features,
|
||||
float* output_rois) {
|
||||
int64_t output_dim = attrs.output_size;
|
||||
auto pyramid_scales = attrs.pyramid_scales;
|
||||
int64_t sampling_ratio = attrs.sampling_ratio;
|
||||
bool aligned = attrs.aligned;
|
||||
int64_t pooled_height = output_dim;
|
||||
int64_t pooled_width = output_dim;
|
||||
|
||||
const int64_t levels_num = static_cast<int64_t>(inputs.size() - input_features_start_port);
|
||||
const int64_t num_rois = static_cast<int64_t>(input_shapes[input_rois_port][0]);
|
||||
const int64_t channels_num = static_cast<int64_t>(input_shapes[input_features_start_port][1]);
|
||||
const int64_t feaxels_per_roi = pooled_height * pooled_width * channels_num;
|
||||
|
||||
const float* input_rois = inputs[input_rois_port].data();
|
||||
|
||||
std::vector<int64_t> level_ids(num_rois, 0);
|
||||
redistribute_rois(inputs[input_rois_port], level_ids, levels_num);
|
||||
|
||||
std::vector<float> reordered_rois(4 * num_rois, 0);
|
||||
std::vector<int64_t> original_rois_mapping(num_rois, 0);
|
||||
reord(inputs[input_rois_port], level_ids, 4, reordered_rois.data(), original_rois_mapping);
|
||||
|
||||
std::vector<int64_t> rois_per_level;
|
||||
split_points(level_ids, rois_per_level, levels_num + 1);
|
||||
|
||||
std::vector<float> output_rois_features_temp(feaxels_per_roi * num_rois, 0);
|
||||
for (int64_t i = 0; i < levels_num; ++i) {
|
||||
const int64_t level_rois_offset = rois_per_level[i];
|
||||
const int64_t level_rois_num = rois_per_level[i + 1] - level_rois_offset;
|
||||
if (level_rois_num > 0) {
|
||||
const float* featuremap = inputs[input_features_start_port + i].data();
|
||||
const int64_t featuremap_height = static_cast<int64_t>(input_shapes[input_features_start_port + i][2]);
|
||||
const int64_t featuremap_width = static_cast<int64_t>(input_shapes[input_features_start_port + i][3]);
|
||||
ROIAlignForward<float>(feaxels_per_roi * level_rois_num,
|
||||
featuremap,
|
||||
1.0f / pyramid_scales[i],
|
||||
channels_num,
|
||||
featuremap_height,
|
||||
featuremap_width,
|
||||
pooled_height,
|
||||
pooled_width,
|
||||
sampling_ratio,
|
||||
&reordered_rois[4 * level_rois_offset],
|
||||
aligned,
|
||||
&output_rois_features_temp[feaxels_per_roi * level_rois_offset]);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int64_t> dummy_mapping(num_rois, 0);
|
||||
reord(output_rois_features_temp, original_rois_mapping, feaxels_per_roi, output_rois_features, dummy_mapping);
|
||||
|
||||
memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
|
||||
}
|
||||
|
||||
void experimental_detectron_roi_feature_extractor_postprocessing(void* prois_features,
|
||||
void* prois,
|
||||
const ngraph::element::Type output_type,
|
||||
const std::vector<float>& output_rois_features,
|
||||
const std::vector<float>& output_rois,
|
||||
const Shape& output_rois_features_shape,
|
||||
const Shape& output_rois_shape) {
|
||||
size_t output_rois_features_size = shape_size(output_rois_features_shape);
|
||||
size_t output_rois_size = shape_size(output_rois_shape);
|
||||
|
||||
switch (output_type) {
|
||||
case element::Type_t::bf16: {
|
||||
bfloat16* output_rois_features_ptr = reinterpret_cast<bfloat16*>(prois_features);
|
||||
bfloat16* output_rois_ptr = reinterpret_cast<bfloat16*>(prois);
|
||||
for (size_t i = 0; i < output_rois_features_size; ++i) {
|
||||
output_rois_features_ptr[i] = bfloat16(output_rois_features[i]);
|
||||
}
|
||||
for (size_t i = 0; i < output_rois_size; ++i) {
|
||||
output_rois_ptr[i] = bfloat16(output_rois[i]);
|
||||
}
|
||||
} break;
|
||||
case element::Type_t::f16: {
|
||||
float16* output_rois_features_ptr = reinterpret_cast<float16*>(prois_features);
|
||||
float16* output_rois_ptr = reinterpret_cast<float16*>(prois);
|
||||
for (size_t i = 0; i < output_rois_features_size; ++i) {
|
||||
output_rois_features_ptr[i] = float16(output_rois_features[i]);
|
||||
}
|
||||
for (size_t i = 0; i < output_rois_size; ++i) {
|
||||
output_rois_ptr[i] = float16(output_rois[i]);
|
||||
}
|
||||
} break;
|
||||
case element::Type_t::f32: {
|
||||
float* output_rois_features_ptr = reinterpret_cast<float*>(prois_features);
|
||||
float* output_rois_ptr = reinterpret_cast<float*>(prois);
|
||||
memcpy(output_rois_features_ptr, output_rois_features.data(), output_rois_features_size * sizeof(float));
|
||||
memcpy(output_rois_ptr, output_rois.data(), output_rois_size * sizeof(float));
|
||||
} break;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
} // namespace reference
|
||||
} // namespace runtime
|
||||
} // namespace ngraph
|
@ -30,6 +30,7 @@
|
||||
#include <ngraph/runtime/reference/experimental_detectron_detection_output.hpp>
|
||||
#include <ngraph/runtime/reference/experimental_detectron_prior_grid_generator.hpp>
|
||||
#include <ngraph/runtime/reference/experimental_detectron_proposal_single_image.hpp>
|
||||
#include <ngraph/runtime/reference/experimental_detectron_roi_feature_extractor.hpp>
|
||||
#include <ngraph/runtime/reference/experimental_detectron_topk_rois.hpp>
|
||||
#include <ngraph/runtime/reference/extract_image_patches.hpp>
|
||||
#include <ngraph/runtime/reference/fft.hpp>
|
||||
@ -1222,6 +1223,80 @@ bool evaluate(const shared_ptr<op::v6::ExperimentalDetectronDetectionOutput>& op
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace experimental_roi_feature {
|
||||
struct InfoForEDROIFeature {
|
||||
Shape output_rois_features_shape;
|
||||
Shape output_rois_shape;
|
||||
};
|
||||
|
||||
InfoForEDROIFeature get_info_for_ed_roi_feature(const std::vector<Shape> input_shapes,
|
||||
const op::v6::ExperimentalDetectronROIFeatureExtractor::Attributes& attrs) {
|
||||
InfoForEDROIFeature result;
|
||||
|
||||
size_t output_size = static_cast<size_t>(attrs.output_size);
|
||||
auto out_shape = Shape{0, 0, output_size, output_size};
|
||||
auto out_rois_shape = Shape{0, 4};
|
||||
|
||||
auto rois_shape = input_shapes[0];
|
||||
|
||||
out_shape[0] = rois_shape[0];
|
||||
out_rois_shape[0] = rois_shape[0];
|
||||
|
||||
out_shape[1] = input_shapes[1][1];
|
||||
|
||||
result.output_rois_features_shape = out_shape;
|
||||
result.output_rois_shape = out_rois_shape;
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace experimental_roi_feature
|
||||
|
||||
template <element::Type_t ET>
|
||||
bool evaluate(const shared_ptr<op::v6::ExperimentalDetectronROIFeatureExtractor>& op,
|
||||
const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs)
|
||||
{
|
||||
const auto attrs = op->get_attrs();
|
||||
|
||||
std::vector<std::vector<float>> input_data;
|
||||
std::vector<Shape> input_shapes;
|
||||
for (const auto& input : inputs) {
|
||||
const auto current_shape = input->get_shape();
|
||||
input_data.push_back(get_floats(input, current_shape));
|
||||
input_shapes.push_back(current_shape);
|
||||
}
|
||||
|
||||
const auto info = experimental_roi_feature::get_info_for_ed_roi_feature(input_shapes, attrs);
|
||||
const auto& output_rois_features_shape = info.output_rois_features_shape;
|
||||
const auto& output_rois_shape = info.output_rois_shape;
|
||||
|
||||
const auto output_type = op->get_input_element_type(0);
|
||||
|
||||
outputs[0]->set_element_type(output_type);
|
||||
outputs[0]->set_shape(output_rois_features_shape);
|
||||
outputs[1]->set_element_type(output_type);
|
||||
outputs[1]->set_shape(output_rois_shape);
|
||||
|
||||
std::vector<float> output_rois_features(shape_size(output_rois_features_shape));
|
||||
std::vector<float> output_rois(shape_size(output_rois_shape));
|
||||
|
||||
runtime::reference::experimental_detectron_roi_feature_extractor(input_data,
|
||||
input_shapes,
|
||||
attrs,
|
||||
output_rois_features.data(),
|
||||
output_rois.data());
|
||||
|
||||
runtime::reference::experimental_detectron_roi_feature_extractor_postprocessing(outputs[0]->get_data_ptr(),
|
||||
outputs[1]->get_data_ptr(),
|
||||
output_type,
|
||||
output_rois_features,
|
||||
output_rois,
|
||||
output_rois_features_shape,
|
||||
output_rois_shape);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace fft_v7 {
|
||||
struct InfoForFFT7 {
|
||||
std::vector<float> input_data;
|
||||
|
@ -88,6 +88,7 @@ NGRAPH_OP(CTCGreedyDecoderSeqLen, op::v6)
|
||||
NGRAPH_OP(ExperimentalDetectronDetectionOutput, op::v6)
|
||||
NGRAPH_OP(ExperimentalDetectronGenerateProposalsSingleImage, op::v6)
|
||||
NGRAPH_OP(ExperimentalDetectronPriorGridGenerator, op::v6)
|
||||
NGRAPH_OP(ExperimentalDetectronROIFeatureExtractor, op::v6)
|
||||
NGRAPH_OP(ExperimentalDetectronTopKROIs, op::v6)
|
||||
NGRAPH_OP(GatherElements, op::v6)
|
||||
NGRAPH_OP(MVN, ngraph::op::v6)
|
||||
|
@ -129,7 +129,7 @@ quantize_clamp_int32
|
||||
minimum_u16
|
||||
|
||||
# Interpreter backend doesn't implement evaluate method for OP ExperimentalDetectronROIFeatureExtractor
|
||||
INTERPRETER.onnx_model_experimental_detectron_roi_feature_extractor
|
||||
# INTERPRETER.onnx_model_experimental_detectron_roi_feature_extractor
|
||||
|
||||
# No evaluator for DeformableConv2D
|
||||
onnx_model_deformable_conv_2d
|
||||
|
Loading…
Reference in New Issue
Block a user