[GNA] Remove extra reshape before maxpool. Fix activation and maxpool reordering. (#5404)

Fix convolution input transposition for Kaldi models with FakeQuantise layers.
Fix floating point error in gnaFuncTests with debug logs.
This commit is contained in:
Elizaveta Lobanova 2021-04-30 15:42:27 +03:00 committed by GitHub
parent bcb67bfb6a
commit 22e4566faa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 238 additions and 9 deletions

View File

@ -453,6 +453,12 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size(); size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size();
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input; auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
// Skip FakeQuantize and ScaleShift between Convolution and Input
if (LayerInfo(connectedInputLayer).isFakeQuantize()) {
connectedInputLayer = CNNNetPrevLayerSkipCertain(connectedInputLayer, 0, [](CNNLayerPtr l) {
return LayerInfo(l).isScaleShift();
});
}
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
if (!dnn->do_rotate_input) { if (!dnn->do_rotate_input) {
@ -626,6 +632,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
ptr_weights, ptr_weights,
ptr_biases); ptr_biases);
currentComponent.num_bytes_per_input = inputs->getPrecision().size();
currentComponent.num_bytes_per_output = outputs->getPrecision().size(); currentComponent.num_bytes_per_output = outputs->getPrecision().size();
if (inputs->getLayout() == Layout::NHWC) { if (inputs->getLayout() == Layout::NHWC) {

View File

@ -56,6 +56,8 @@
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp> #include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp> #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
#include "transformations/remove_extra_reshapes.hpp"
#if GNA_LIB_VER == 2 #if GNA_LIB_VER == 2
#include <gna2-model-api.h> #include <gna2-model-api.h>
@ -663,6 +665,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>(); manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>(); manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>(); manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
manager.register_pass<RemoveExtraReshapes>();
// UnrollTI should be the last transformation in the transformation pipeline // UnrollTI should be the last transformation in the transformation pipeline
manager.register_pass<ngraph::pass::UnrollTensorIterator>(); manager.register_pass<ngraph::pass::UnrollTensorIterator>();

View File

@ -371,19 +371,21 @@ namespace {
void ReorderMaxPoolPass::run() { void ReorderMaxPoolPass::run() {
// detecting following pattern // detecting following pattern
// conv->relu->maxpooling // conv->activation->maxpooling
// changing it to conv->maxpooling->relu // changing it to conv->maxpooling->activation
for (auto & l : *pLayers) { for (auto & l : *pLayers) {
auto pool = LayerInfo(l); auto pool = LayerInfo(l);
if (!pool.isMaxPooling()) continue; if (!pool.isMaxPooling()) continue;
// don't reorder if pooling is 2D for CNN2D // don't reorder if pooling is 2D for CNN2D
auto pooling = dynamic_cast<PoolingLayer*>(l.get()); auto pooling = dynamic_cast<PoolingLayer*>(l.get());
if (pooling == nullptr || (is2D(pooling->_kernel) || is2D(pooling->_stride))) continue; // todo: return the check for stride after it'll be fixed in MO for Kaldi models
if (pooling == nullptr || (is2D(pooling->_kernel))) continue;
// checking prev layer type // checking prev layer type
auto activation = LayerInfo(CNNNetPrevLayer(l)); auto actLayer = CNNNetPrevLayer(l);
if (!activation.isActivation()) continue; auto activation = LayerInfo(actLayer);
if (!activation.isActivation() || actLayer->insData.size() > 1) continue;
// if activation came from convolution // if activation came from convolution
auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation))); auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation)));

View File

@ -0,0 +1,31 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/remove_extra_reshapes.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);
RemoveExtraReshapes::RemoveExtraReshapes() {
const auto reshape = ngraph::pattern::wrap_type<ngraph::opset1::Reshape>();
const auto pooling = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>({reshape});
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
const auto& pattern_map = m.get_pattern_value_map();
const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
if (reshape_node->get_input_shape(0) != reshape_node->get_output_shape(0)) {
return false;
}
ngraph::replace_output_update_name(reshape_node->output(0), reshape_node->input_value(0));
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, "RemoveExtraReshapes");
this->register_matcher(m, callback);
}

View File

@ -0,0 +1,20 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
namespace GNAPluginNS {
/**
* @brief Removes reshapes before MaxPool which do nothing. Such reshapes can be a result of conversion from IR10 to IR7.
*/
class RemoveExtraReshapes : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
RemoveExtraReshapes();
};
} // namespace GNAPluginNS

View File

@ -0,0 +1,148 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <memory>
#include <tuple>
#include <vector>
#include <string>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/pass/convert_prc.hpp"
typedef std::tuple<
InferenceEngine::Precision, // Network Precision
std::string, // Target Device
std::map<std::string, std::string>, // Configuration
std::vector<size_t>, // Input Shape
std::pair<float, float>, // Input Min and Max
size_t // Levels
> fqMaxpoolReorderingParams;
namespace LayerTestsDefinitions {
class FQMaxpoolReordering : public testing::WithParamInterface<fqMaxpoolReorderingParams>,
public LayerTestsUtils::LayerTestsCommon {
float inputDataMin = 0.0f;
float inputDataMax = 0.0f;
float inputDataResolution = 1.0f;
public:
static std::string getTestCaseName(testing::TestParamInfo<fqMaxpoolReorderingParams> obj) {
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::vector<size_t> inputShape;
std::pair<float, float> inputMinMax;
size_t levels = 0;
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice << "_";
for (auto const& configItem : configuration) {
result << "_configItem=" << configItem.first << "_" << configItem.second;
}
result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
result << "_levels=" << levels;
return result.str();
}
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
}
protected:
void SetUp() override {
InferenceEngine::Precision netPrecision;
std::vector<size_t> inputShape;
std::pair<float, float> inputMinMax;
size_t levels = 0;
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.first });
auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.second });
auto inputVector = ngraph::builder::makeParams(ngPrc, {inputShape});
auto inputFQ = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});
auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax});
auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
convLowNode, convHighNode, convLowNode, convHighNode, levels);
auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
auto conv = std::make_shared<ngraph::opset1::Convolution>(inputFQ, convWeightsFQ, std::vector<size_t>{ 1, 1 },
std::vector<ptrdiff_t>{ 0, 0 }, std::vector<ptrdiff_t>{ 0, 0 },
std::vector<size_t>{ 1, 1 },
ngraph::op::PadType::VALID);
auto biasesWeightsNode = ngraph::builder::makeConstant(ngPrc, {}, std::vector<float>{ 0.0f });
auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
auto convFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(add,
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
auto maxpool = ngraph::builder::makePooling(convFQNode, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(maxpool)};
function = std::make_shared<ngraph::Function>(results, inputVector, "FQMaxPoolReorder");
}
};
TEST_P(FQMaxpoolReordering, CompareWithRefImpl) {
Run();
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
}
};
const std::vector<std::vector<size_t>> inputShape = {
{1, 1, 1, 1024},
{1, 8, 1, 168},
};
const std::vector<std::pair<float, float>> inputMinMax = {
{-0.5, 0.5},
{-2, 2},
{-8, 8}
};
const std::vector<size_t> levels = {
65535,
};
INSTANTIATE_TEST_CASE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs),
::testing::ValuesIn(inputShape),
::testing::ValuesIn(inputMinMax),
::testing::ValuesIn(levels)),
FQMaxpoolReordering::getTestCaseName);
} // namespace LayerTestsDefinitions

View File

@ -46,10 +46,13 @@ const auto convParams = ::testing::Combine(
::testing::ValuesIn(outputChannels) ::testing::ValuesIn(outputChannels)
); );
const std::vector<bool> permute = {false, true};
INSTANTIATE_TEST_CASE_P(smoke_FqConvFqAffineTest, FqConvFqAffineTest, INSTANTIATE_TEST_CASE_P(smoke_FqConvFqAffineTest, FqConvFqAffineTest,
::testing::Combine( ::testing::Combine(
fqParams, fqParams,
convParams, convParams,
::testing::ValuesIn(permute),
::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes), ::testing::ValuesIn(inputShapes),
::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::Values(CommonTestUtils::DEVICE_GNA),

View File

@ -30,6 +30,7 @@ typedef std::tuple<
typedef std::tuple< typedef std::tuple<
FqSpecificParams, FqSpecificParams,
ConvParams, ConvParams,
bool, // Permute after convolution
InferenceEngine::Precision, // Net precision InferenceEngine::Precision, // Net precision
InferenceEngine::SizeVector, // Input shapes InferenceEngine::SizeVector, // Input shapes
LayerTestsUtils::TargetDevice, // Device name LayerTestsUtils::TargetDevice, // Device name

View File

@ -9,11 +9,12 @@ namespace SubgraphTestsDefinitions {
std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqAffineTestParamsSet> obj) { std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqAffineTestParamsSet> obj) {
FqSpecificParams fqParams; FqSpecificParams fqParams;
ConvParams convParams; ConvParams convParams;
bool permute;
InferenceEngine::Precision netPrecision; InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShapes; InferenceEngine::SizeVector inputShapes;
std::string targetDevice; std::string targetDevice;
std::map<std::string, std::string> config; std::map<std::string, std::string> config;
std::tie(fqParams, convParams, netPrecision, inputShapes, targetDevice, config) = obj.param; std::tie(fqParams, convParams, permute, netPrecision, inputShapes, targetDevice, config) = obj.param;
std::vector<size_t> levels; std::vector<size_t> levels;
std::vector<float> inputArg; std::vector<float> inputArg;
@ -39,17 +40,19 @@ std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqA
result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_"; result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_"; result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
result << "IC=" << inputChannels << "_"; result << "IC=" << inputChannels << "_";
result << "OC=" << outputChannels; result << "OC=" << outputChannels << "_";
result << "permute=" << permute << "\n";
return result.str(); return result.str();
} }
void FqConvFqAffineTest::SetUp() { void FqConvFqAffineTest::SetUp() {
FqSpecificParams fqParams; FqSpecificParams fqParams;
ConvParams convParams; ConvParams convParams;
bool permute;
std::vector<size_t> inputShape; std::vector<size_t> inputShape;
std::map<std::string, std::string> config; std::map<std::string, std::string> config;
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
std::tie(fqParams, convParams, netPrecision, inputShape, targetDevice, config) = this->GetParam(); std::tie(fqParams, convParams, permute, netPrecision, inputShape, targetDevice, config) = this->GetParam();
configuration.insert(config.begin(), config.end()); configuration.insert(config.begin(), config.end());
std::vector<size_t> levels; std::vector<size_t> levels;
@ -100,8 +103,19 @@ void FqConvFqAffineTest::SetUp() {
auto heightAfterConv = (convInputShape[2] - kernelShape[0]) / strides[0] + 1; auto heightAfterConv = (convInputShape[2] - kernelShape[0]) / strides[0] + 1;
std::vector<size_t> outFormShapes = {1, outputChannels * widthAfterConv * heightAfterConv }; std::vector<size_t> outFormShapes = {1, outputChannels * widthAfterConv * heightAfterConv };
ngraph::Output<ngraph::Node> nodeBeforeReshape;
if (permute) {
auto permuteOrder = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64,
ngraph::Shape{4},
ngraph::Shape{{0, 3, 2, 1}});
auto transpose = std::make_shared<ngraph::opset1::Transpose>(add, permuteOrder);
nodeBeforeReshape = transpose;
} else {
nodeBeforeReshape = add;
}
auto reshapePattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes); auto reshapePattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(add, reshapePattern2, false); auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(nodeBeforeReshape, reshapePattern2, false);
auto matMulWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outFormShapes[1], outFormShapes[1]}, { 1.0f }); auto matMulWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outFormShapes[1], outFormShapes[1]}, { 1.0f });
auto matMulLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin}); auto matMulLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});