[GNA] Remove extra reshape before maxpool. Fix activation and maxpool reordering. (#5404)
Fix convolution input transposition for Kaldi models with FakeQuantise layers. Fix floating point error in gnaFuncTests with debug logs.
This commit is contained in:
parent
bcb67bfb6a
commit
22e4566faa
@ -453,6 +453,12 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size();
|
||||
|
||||
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
|
||||
// Skip FakeQuantize and ScaleShift between Convolution and Input
|
||||
if (LayerInfo(connectedInputLayer).isFakeQuantize()) {
|
||||
connectedInputLayer = CNNNetPrevLayerSkipCertain(connectedInputLayer, 0, [](CNNLayerPtr l) {
|
||||
return LayerInfo(l).isScaleShift();
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
|
||||
if (!dnn->do_rotate_input) {
|
||||
@ -626,6 +632,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
ptr_weights,
|
||||
ptr_biases);
|
||||
|
||||
currentComponent.num_bytes_per_input = inputs->getPrecision().size();
|
||||
currentComponent.num_bytes_per_output = outputs->getPrecision().size();
|
||||
|
||||
if (inputs->getLayout() == Layout::NHWC) {
|
||||
|
@ -56,6 +56,8 @@
|
||||
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
|
||||
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
|
||||
|
||||
#include "transformations/remove_extra_reshapes.hpp"
|
||||
|
||||
#if GNA_LIB_VER == 2
|
||||
#include <gna2-model-api.h>
|
||||
|
||||
@ -663,6 +665,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
|
||||
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
|
||||
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
|
||||
manager.register_pass<RemoveExtraReshapes>();
|
||||
// UnrollTI should be the last transformation in the transformation pipeline
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
|
||||
|
@ -371,19 +371,21 @@ namespace {
|
||||
|
||||
void ReorderMaxPoolPass::run() {
|
||||
// detecting following pattern
|
||||
// conv->relu->maxpooling
|
||||
// changing it to conv->maxpooling->relu
|
||||
// conv->activation->maxpooling
|
||||
// changing it to conv->maxpooling->activation
|
||||
for (auto & l : *pLayers) {
|
||||
auto pool = LayerInfo(l);
|
||||
if (!pool.isMaxPooling()) continue;
|
||||
|
||||
// don't reorder if pooling is 2D for CNN2D
|
||||
auto pooling = dynamic_cast<PoolingLayer*>(l.get());
|
||||
if (pooling == nullptr || (is2D(pooling->_kernel) || is2D(pooling->_stride))) continue;
|
||||
// todo: return the check for stride after it'll be fixed in MO for Kaldi models
|
||||
if (pooling == nullptr || (is2D(pooling->_kernel))) continue;
|
||||
|
||||
// checking prev layer type
|
||||
auto activation = LayerInfo(CNNNetPrevLayer(l));
|
||||
if (!activation.isActivation()) continue;
|
||||
auto actLayer = CNNNetPrevLayer(l);
|
||||
auto activation = LayerInfo(actLayer);
|
||||
if (!activation.isActivation() || actLayer->insData.size() > 1) continue;
|
||||
|
||||
// if activation came from convolution
|
||||
auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation)));
|
||||
|
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/remove_extra_reshapes.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);
|
||||
|
||||
RemoveExtraReshapes::RemoveExtraReshapes() {
|
||||
const auto reshape = ngraph::pattern::wrap_type<ngraph::opset1::Reshape>();
|
||||
const auto pooling = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>({reshape});
|
||||
|
||||
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
|
||||
const auto& pattern_map = m.get_pattern_value_map();
|
||||
const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
|
||||
if (reshape_node->get_input_shape(0) != reshape_node->get_output_shape(0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ngraph::replace_output_update_name(reshape_node->output(0), reshape_node->input_value(0));
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, "RemoveExtraReshapes");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
|
||||
/**
|
||||
* @brief Removes reshapes before MaxPool which do nothing. Such reshapes can be a result of conversion from IR10 to IR7.
|
||||
*/
|
||||
class RemoveExtraReshapes : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
RemoveExtraReshapes();
|
||||
};
|
||||
|
||||
} // namespace GNAPluginNS
|
@ -0,0 +1,148 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision, // Network Precision
|
||||
std::string, // Target Device
|
||||
std::map<std::string, std::string>, // Configuration
|
||||
std::vector<size_t>, // Input Shape
|
||||
std::pair<float, float>, // Input Min and Max
|
||||
size_t // Levels
|
||||
> fqMaxpoolReorderingParams;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class FQMaxpoolReordering : public testing::WithParamInterface<fqMaxpoolReorderingParams>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
float inputDataMin = 0.0f;
|
||||
float inputDataMax = 0.0f;
|
||||
float inputDataResolution = 1.0f;
|
||||
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<fqMaxpoolReorderingParams> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::string targetDevice;
|
||||
std::map<std::string, std::string> configuration;
|
||||
std::vector<size_t> inputShape;
|
||||
std::pair<float, float> inputMinMax;
|
||||
size_t levels = 0;
|
||||
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
for (auto const& configItem : configuration) {
|
||||
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||
}
|
||||
result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
|
||||
result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
|
||||
result << "_levels=" << levels;
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
|
||||
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
|
||||
std::vector<size_t> inputShape;
|
||||
std::pair<float, float> inputMinMax;
|
||||
size_t levels = 0;
|
||||
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.first });
|
||||
auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.second });
|
||||
|
||||
auto inputVector = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
|
||||
auto inputFQ = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
|
||||
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
|
||||
|
||||
auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
|
||||
auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});
|
||||
auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax});
|
||||
auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
|
||||
convLowNode, convHighNode, convLowNode, convHighNode, levels);
|
||||
auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
|
||||
|
||||
auto conv = std::make_shared<ngraph::opset1::Convolution>(inputFQ, convWeightsFQ, std::vector<size_t>{ 1, 1 },
|
||||
std::vector<ptrdiff_t>{ 0, 0 }, std::vector<ptrdiff_t>{ 0, 0 },
|
||||
std::vector<size_t>{ 1, 1 },
|
||||
ngraph::op::PadType::VALID);
|
||||
auto biasesWeightsNode = ngraph::builder::makeConstant(ngPrc, {}, std::vector<float>{ 0.0f });
|
||||
auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
|
||||
|
||||
auto convFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(add,
|
||||
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
|
||||
|
||||
auto maxpool = ngraph::builder::makePooling(convFQNode, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
|
||||
ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(maxpool)};
|
||||
function = std::make_shared<ngraph::Function>(results, inputVector, "FQMaxPoolReorder");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(FQMaxpoolReordering, CompareWithRefImpl) {
|
||||
Run();
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> configs = {
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> inputShape = {
|
||||
{1, 1, 1, 1024},
|
||||
{1, 8, 1, 168},
|
||||
};
|
||||
|
||||
const std::vector<std::pair<float, float>> inputMinMax = {
|
||||
{-0.5, 0.5},
|
||||
{-2, 2},
|
||||
{-8, 8}
|
||||
};
|
||||
|
||||
const std::vector<size_t> levels = {
|
||||
65535,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(configs),
|
||||
::testing::ValuesIn(inputShape),
|
||||
::testing::ValuesIn(inputMinMax),
|
||||
::testing::ValuesIn(levels)),
|
||||
FQMaxpoolReordering::getTestCaseName);
|
||||
} // namespace LayerTestsDefinitions
|
@ -46,10 +46,13 @@ const auto convParams = ::testing::Combine(
|
||||
::testing::ValuesIn(outputChannels)
|
||||
);
|
||||
|
||||
const std::vector<bool> permute = {false, true};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_FqConvFqAffineTest, FqConvFqAffineTest,
|
||||
::testing::Combine(
|
||||
fqParams,
|
||||
convParams,
|
||||
::testing::ValuesIn(permute),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
|
@ -30,6 +30,7 @@ typedef std::tuple<
|
||||
typedef std::tuple<
|
||||
FqSpecificParams,
|
||||
ConvParams,
|
||||
bool, // Permute after convolution
|
||||
InferenceEngine::Precision, // Net precision
|
||||
InferenceEngine::SizeVector, // Input shapes
|
||||
LayerTestsUtils::TargetDevice, // Device name
|
||||
|
@ -9,11 +9,12 @@ namespace SubgraphTestsDefinitions {
|
||||
std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqAffineTestParamsSet> obj) {
|
||||
FqSpecificParams fqParams;
|
||||
ConvParams convParams;
|
||||
bool permute;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
std::string targetDevice;
|
||||
std::map<std::string, std::string> config;
|
||||
std::tie(fqParams, convParams, netPrecision, inputShapes, targetDevice, config) = obj.param;
|
||||
std::tie(fqParams, convParams, permute, netPrecision, inputShapes, targetDevice, config) = obj.param;
|
||||
|
||||
std::vector<size_t> levels;
|
||||
std::vector<float> inputArg;
|
||||
@ -39,17 +40,19 @@ std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqA
|
||||
result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
|
||||
result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
|
||||
result << "IC=" << inputChannels << "_";
|
||||
result << "OC=" << outputChannels;
|
||||
result << "OC=" << outputChannels << "_";
|
||||
result << "permute=" << permute << "\n";
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void FqConvFqAffineTest::SetUp() {
|
||||
FqSpecificParams fqParams;
|
||||
ConvParams convParams;
|
||||
bool permute;
|
||||
std::vector<size_t> inputShape;
|
||||
std::map<std::string, std::string> config;
|
||||
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
std::tie(fqParams, convParams, netPrecision, inputShape, targetDevice, config) = this->GetParam();
|
||||
std::tie(fqParams, convParams, permute, netPrecision, inputShape, targetDevice, config) = this->GetParam();
|
||||
configuration.insert(config.begin(), config.end());
|
||||
|
||||
std::vector<size_t> levels;
|
||||
@ -100,8 +103,19 @@ void FqConvFqAffineTest::SetUp() {
|
||||
auto heightAfterConv = (convInputShape[2] - kernelShape[0]) / strides[0] + 1;
|
||||
std::vector<size_t> outFormShapes = {1, outputChannels * widthAfterConv * heightAfterConv };
|
||||
|
||||
ngraph::Output<ngraph::Node> nodeBeforeReshape;
|
||||
if (permute) {
|
||||
auto permuteOrder = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4},
|
||||
ngraph::Shape{{0, 3, 2, 1}});
|
||||
auto transpose = std::make_shared<ngraph::opset1::Transpose>(add, permuteOrder);
|
||||
nodeBeforeReshape = transpose;
|
||||
} else {
|
||||
nodeBeforeReshape = add;
|
||||
}
|
||||
|
||||
auto reshapePattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(add, reshapePattern2, false);
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(nodeBeforeReshape, reshapePattern2, false);
|
||||
|
||||
auto matMulWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outFormShapes[1], outFormShapes[1]}, { 1.0f });
|
||||
auto matMulLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});
|
||||
|
Loading…
Reference in New Issue
Block a user