Fix performance on resnet50 quantized models (#7670)

* Fix performance on resnet50 quantized models

LP transformations won't work on the model unless the last 4 inputs to FakeQuantize
are constants. In order to meet that requirement, we need to perform constant folding
for those inputs in QuantizeLinear ONNX operator.

Ticket: 65375

* fix "Cannot find blob with name: y" exception during onnx_model_quant_conv_linear

* remove linking with onnx_ngraph_frontend

* fix exclude path
This commit is contained in:
Mateusz Tabaka 2021-10-13 09:18:37 +02:00 committed by GitHub
parent 0d020974f9
commit db527fff41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 401 additions and 16 deletions

View File

@ -66,6 +66,11 @@ ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/ngraph/test"
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ngraph"
onnx_out_files)
set(rel_path "inference-engine/tests/functional/plugin/shared/models")
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/func_tests/models"
ft_out_files)
set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader")
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader"
@ -116,6 +121,7 @@ if(ENABLE_TESTS)
endif()
add_custom_target(test_model_zoo DEPENDS ${onnx_out_files}
${ft_out_files}
${ie_onnx_out_files}
${ie_serialize_out_files}
${ie_onnx_import_out_files})

View File

@ -80,11 +80,11 @@ public:
static std::shared_ptr<Node> swapMultiplyAndAdd(std::shared_ptr<opset1::Add> addAfterMultiply, const int multiplyBranch);
static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::vector<std::shared_ptr<Node>>& targets);
static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::vector<std::shared_ptr<Node>>& targets, bool overrideName = true);
static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target);
static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target, bool overrideName = true);
static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target);
static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target, bool overrideName = true);
static bool isScalarLike(std::shared_ptr<opset1::Constant> constant);

View File

@ -108,7 +108,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
// multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z]
const auto newSubConst = NetworkHelper::toScalarIfPossible(fold<opset1::MatMul>(
broadcastedConst,
foldConvert(broadcastedConst, newMatMul->get_element_type()),
foldConvert(newMatMul->input_value(1), newMatMul->get_element_type()),
newMatMul->get_transpose_a(),
newMatMul->get_transpose_b()));

View File

@ -302,12 +302,13 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
void NetworkHelper::copyInfo(
const std::vector<std::shared_ptr<Node>>& sources,
const std::vector<std::shared_ptr<Node>>& targets) {
const std::vector<std::shared_ptr<Node>>& targets,
bool overrideName) {
ngraph::copy_runtime_info(sources, targets);
for (const auto& target : targets) {
const std::string friendlyName = sources[0]->get_friendly_name();
if (!friendlyName.empty()) {
if (!friendlyName.empty() && overrideName) {
target->set_friendly_name(friendlyName);
}
@ -345,12 +346,12 @@ void NetworkHelper::copyInfo(
}
}
void NetworkHelper::copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target) {
copyInfo(sources, std::vector<std::shared_ptr<Node>>{ target });
void NetworkHelper::copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target, bool overrideName) {
copyInfo(sources, std::vector<std::shared_ptr<Node>>{ target }, overrideName);
}
void NetworkHelper::copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target) {
copyInfo(std::vector<std::shared_ptr<Node>>{ source }, std::vector<std::shared_ptr<Node>>{ target });
void NetworkHelper::copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target, bool overrideName) {
copyInfo(std::vector<std::shared_ptr<Node>>{ source }, std::vector<std::shared_ptr<Node>>{ target }, overrideName);
}
bool NetworkHelper::isScalarLike(std::shared_ptr<opset1::Constant> constant) {
@ -657,8 +658,10 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
ngraph::op::TemporaryReplaceOutputType(fakeQuantize->input_value(4), element::f32).get(),
fakeQuantize->get_levels());
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newFakeQuantize, node->get_output_element_type(0));
newFakeQuantize->set_friendly_name(node->get_friendly_name());
replace_node(node->shared_from_this(), newFakeQuantize);
NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize);
bool overrideName = false;
NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize, overrideName);
return newFakeQuantize;
}

View File

@ -16,7 +16,7 @@ if (NGRAPH_ONNX_FRONTEND_ENABLE)
list(APPEND LINK_LIBRARIES onnx_custom_op)
list(APPEND DEPENDENCIES template_extension onnx_custom_op)
else()
set(EXCLUDED_SOURCE_PATHS "${CMAKE_CURRENT_SOURCE_DIR}/extension")
set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/extension ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
endif()
addIeTargetTest(

View File

@ -0,0 +1,12 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include "onnx/quantized_models_tests.hpp"
using namespace ONNXTestsDefinitions;
INSTANTIATE_TEST_SUITE_P(ONNXQuantizedModels, QuantizedModelsTests,
::testing::Values(CommonTestUtils::DEVICE_CPU),
QuantizedModelsTests::getTestCaseName);

View File

@ -6,6 +6,14 @@ set(TARGET_NAME funcSharedTests)
set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
set(DEPENDENCIES inference_engine mock_engine HeteroPlugin MultiDevicePlugin)
if (NGRAPH_ONNX_FRONTEND_ENABLE)
list(APPEND DEPENDENCIES test_model_zoo)
list(APPEND DEFINES TEST_MODELS="${TEST_MODEL_ZOO}/func_tests/models/")
else()
set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/onnx)
endif()
addIeTarget(
NAME ${TARGET_NAME}
TYPE STATIC
@ -15,6 +23,8 @@ addIeTarget(
ADD_CPPLINT
DEVELOPER_PACKAGE
inference_engine_tests
EXCLUDED_SOURCE_PATHS ${EXCLUDED_SOURCE_PATHS}
DEFINES ${DEFINES}
INCLUDES
PUBLIC
${PUBLIC_HEADERS_DIR}
@ -29,10 +39,7 @@ addIeTarget(
openvino::util
inference_engine_transformations
DEPENDENCIES
inference_engine
mock_engine
HeteroPlugin
MultiDevicePlugin
${DEPENDENCIES}
)
# CVS-55376

View File

@ -0,0 +1,22 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include "shared_test_classes/base/layer_test_utils.hpp"
namespace ONNXTestsDefinitions {
class QuantizedModelsTests : public testing::WithParamInterface<std::string>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<std::string>& obj);
protected:
void SetUp() override;
void runModel(const char* model, const std::unordered_map<std::string, ngraph::element::Type_t>& expected_layer_types);
};
} // namespace ONNXTestsDefinitions

View File

@ -0,0 +1,132 @@
ir_version: 6
producer_name: "pytorch"
producer_version: "1.8"
graph {
node {
output: "884"
name: "Constant_10"
op_type: "Constant"
attribute {
name: "value"
t {
dims: 1
data_type: 1
raw_data: "\000\000\000\000"
}
type: TENSOR
}
}
node {
output: "885"
name: "Constant_11"
op_type: "Constant"
attribute {
name: "value"
t {
dims: 1
data_type: 1
raw_data: "6\241\311@"
}
type: TENSOR
}
}
node {
input: "883"
input: "884"
input: "885"
input: "884"
input: "885"
output: "886"
name: "FakeQuantize_12"
op_type: "FakeQuantize"
attribute {
name: "levels"
i: 256
type: INT
}
domain: "org.openvinotoolkit"
}
node {
input: "886"
output: "887"
name: "MaxPool_13"
op_type: "MaxPool"
attribute {
name: "ceil_mode"
i: 0
type: INT
}
attribute {
name: "kernel_shape"
ints: 3
ints: 3
type: INTS
}
attribute {
name: "pads"
ints: 1
ints: 1
ints: 1
ints: 1
type: INTS
}
attribute {
name: "strides"
ints: 2
ints: 2
type: INTS
}
}
name: "torch-jit-export"
input {
name: "883"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 1
}
dim {
dim_value: 64
}
dim {
dim_value: 112
}
dim {
dim_value: 112
}
}
}
}
}
output {
name: "887"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 1
}
dim {
dim_value: 64
}
dim {
dim_value: 56
}
dim {
dim_value: 56
}
}
}
}
}
}
opset_import {
version: 10
}
opset_import {
domain: "org.openvinotoolkit"
version: 1
}

View File

@ -0,0 +1,126 @@
ir_version: 6
producer_name: "pytorch"
producer_version: "1.8"
graph {
node {
output: "886"
name: "Constant_12"
op_type: "Constant"
attribute {
name: "value"
t {
data_type: 1
raw_data: "\242k\312<"
}
type: TENSOR
}
}
node {
output: "887"
name: "Constant_13"
op_type: "Constant"
attribute {
name: "value"
t {
data_type: 2
raw_data: "\000"
}
type: TENSOR
}
}
node {
input: "885"
input: "886"
input: "887"
output: "888"
name: "QuantizeLinear_14"
op_type: "QuantizeLinear"
}
node {
input: "888"
input: "886"
input: "887"
output: "889"
name: "DequantizeLinear_15"
op_type: "DequantizeLinear"
}
node {
input: "889"
output: "890"
name: "MaxPool_16"
op_type: "MaxPool"
attribute {
name: "ceil_mode"
i: 0
type: INT
}
attribute {
name: "kernel_shape"
ints: 3
ints: 3
type: INTS
}
attribute {
name: "pads"
ints: 1
ints: 1
ints: 1
ints: 1
type: INTS
}
attribute {
name: "strides"
ints: 2
ints: 2
type: INTS
}
}
name: "torch-jit-export"
input {
name: "885"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 1
}
dim {
dim_value: 64
}
dim {
dim_value: 112
}
dim {
dim_value: 112
}
}
}
}
}
output {
name: "890"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 1
}
dim {
dim_value: 64
}
dim {
dim_value: 56
}
dim {
dim_value: 56
}
}
}
}
}
}
opset_import {
version: 10
}

View File

@ -0,0 +1,49 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <file_utils.h>
#include "onnx/quantized_models_tests.hpp"
namespace ONNXTestsDefinitions {
std::string QuantizedModelsTests::getTestCaseName(const testing::TestParamInfo<std::string>& obj) {
std::string targetDevice = obj.param;
std::ostringstream result;
result << "device=" << targetDevice;
return result.str();
}
void QuantizedModelsTests::SetUp() {
targetDevice = this->GetParam();
}
static std::string getModelFullPath(const char* path) {
return FileUtils::makePath<char>(TEST_MODELS, path);
}
void QuantizedModelsTests::runModel(const char* model, const std::unordered_map<std::string, ngraph::element::Type_t>& expected_layer_types) {
auto ie = getCore();
auto network = ie->ReadNetwork(getModelFullPath(model));
function = network.getFunction();
Run();
auto runtime_function = executableNetwork.GetExecGraphInfo().getFunction();
int ops_found = 0;
for (const auto& node : runtime_function->get_ordered_ops()) {
const auto& name = node->get_friendly_name();
if (expected_layer_types.count(name)) {
ops_found++;
ASSERT_EQ(expected_layer_types.at(name), node->get_element_type());
}
}
ASSERT_GT(ops_found, 0);
}
TEST_P(QuantizedModelsTests, MaxPoolQDQ) {
runModel("max_pool_qdq.onnx", {{"890_original", ngraph::element::u8}});
}
TEST_P(QuantizedModelsTests, MaxPoolFQ) {
runModel("max_pool_fq.onnx", {{"887_original", ngraph::element::u8}});
}
} // namespace ONNXTestsDefinitions

View File

@ -34,6 +34,8 @@ bool evaluate_equal(const HostTensorPtr& arg0,
out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean);
switch (arg0->get_element_type()) {
NGRAPH_TYPE_CASE(evaluate_equal, boolean, arg0, arg1, out, broadcast_spec);
NGRAPH_TYPE_CASE(evaluate_equal, i8, arg0, arg1, out, broadcast_spec);
NGRAPH_TYPE_CASE(evaluate_equal, u8, arg0, arg1, out, broadcast_spec);
NGRAPH_TYPE_CASE(evaluate_equal, i32, arg0, arg1, out, broadcast_spec);
NGRAPH_TYPE_CASE(evaluate_equal, i64, arg0, arg1, out, broadcast_spec);
NGRAPH_TYPE_CASE(evaluate_equal, u32, arg0, arg1, out, broadcast_spec);
@ -72,6 +74,8 @@ bool op::v1::Equal::has_evaluate() const {
NGRAPH_OP_SCOPE(v1_Equal_has_evaluate);
switch (get_input_element_type(0)) {
case ngraph::element::boolean:
case ngraph::element::i8:
case ngraph::element::u8:
case ngraph::element::i32:
case ngraph::element::i64:
case ngraph::element::u32:

View File

@ -88,9 +88,13 @@ std::tuple<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>> get_inp
input_low =
std::make_shared<default_opset::Multiply>(y_scale,
std::make_shared<default_opset::Subtract>(output_low, zero_point));
if (auto constant = get_constant_from_source(input_low))
input_low = constant;
input_high =
std::make_shared<default_opset::Multiply>(y_scale,
std::make_shared<default_opset::Subtract>(output_high, zero_point));
if (auto constant = get_constant_from_source(input_high))
input_high = constant;
return std::make_tuple(input_low, input_high);
}

View File

@ -444,6 +444,26 @@ TEST(constant_folding, const_convert) {
vector<int64_t> expected{1, 2, 3, 4, 5};
test_const_convert(in, expected);
}
{
vector<int8_t> in{-128, -2, 0, 1, 3, 127};
vector<float> expected{-128, -2, 0, 1, 3, 127};
test_const_convert(in, expected);
}
{
vector<uint8_t> in{0, 1, 3, 127, 255};
vector<float> expected{0, 1, 3, 127, 255};
test_const_convert(in, expected);
}
{
vector<float> in{-300, -128, -1, 0, 33, 127, 128};
vector<int8_t> expected{-44, -128, -1, 0, 33, 127, -128};
test_const_convert(in, expected);
}
{
vector<float> in{0, 33, 127, 255, 256};
vector<uint8_t> expected{0, 33, 127, 255, 0};
test_const_convert(in, expected);
}
}
TEST(constant_folding, shape_of_v0) {