[GNA] incorrect diag insertion (#14858)

* [GNA] Create ngraph implementation for relu_torch_pot model for further tests. Create legacy pass fusing FC-Eltwise-Const layers pattern into single FC layer with biases

* [GNA] Fix review comments, applied proper code style to changed code
This commit is contained in:
Marcin Kacprzak 2023-02-22 11:22:55 +01:00 committed by GitHub
parent f41c75b965
commit c8643a9a30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 404 additions and 13 deletions

View File

@ -445,5 +445,16 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromNextLayers(Infere
return findTranspositionInfoRecursive(layer);
}
/**
* @brief Return true if the layer has max one non-1 dimension
* (because we can treat it as a single dimension layer, then)
*/
inline bool IsOneDimLayer(InferenceEngine::CNNLayerPtr layer) {
auto dims = layer->insData[0].lock()->getDims();
return std::count_if(std::begin(dims), std::end(dims), [](size_t dim) {
return dim > 1;
}) <= 1;
}
} // namespace intel_gna
} // namespace ov

View File

@ -21,6 +21,10 @@ namespace InferenceEngine {
static constexpr size_t invalid_data_idx = std::numeric_limits<size_t>::max();
inline bool DoNotSkip(CNNLayerPtr layer) {
return false;
};
// compares data, for copied network and in old network
inline bool areEqualDatas(DataPtr source, DataPtr target) {
if (source.get() == target.get()) {
@ -693,6 +697,47 @@ std::vector<std::pair<CNNLayerPtr, int>> CNNNetGetPrevLayersSkip(CNNLayerPtr ori
return prevLayers;
}
/**
* @brief Removes 'to_remove' layer from between two other - 'prev' and 'next', then connects 'prev' and 'next'
* @param prev Layer before 'to_remove'
* @param to_remove Layer to be removed
* @param prevOutputNo Output number of 'prev', which will be connected with 'next'
* @param nextInputNo Input number of 'next', which will be connected with 'prev'
* @return true if layer was removed, otherwise return false
*/
inline bool CNNRemoveAndConnect(CNNLayerPtr prev, CNNLayerPtr to_remove, int prevOutputNo = 0, int nextInputNo = 0) {
CNNLayerPtr next = CNNNetCheckNextLayerSkipCertain(to_remove, 0, 0, true, DoNotSkip).first;
if (!prev || !next) {
return false;
}
IE_ASSERT(prev->outData.size() > 0);
IE_ASSERT(next->outData.size() > 0);
if (to_remove->outData.size() != 1) {
// Cannot remove layer, which has different number of outputs than 1
return false;
}
// Get first output ptr of 'prev'
auto prevDPtr = prev->outData[prevOutputNo];
// Assign first output of 'prev' to first input of 'next'
next->insData[nextInputNo] = prevDPtr;
// Add 'next' to inputTo map of 'prev',
// so now it will point 'next' as a layer, which uses 'prev' as its input.
auto& prevInputToMap = getInputTo(prevDPtr);
prevInputToMap[next->name] = next;
// Remove reference to 'to_remove' from inputTo map of 'prev'
prevInputToMap.erase(to_remove->name);
return true;
}
/**
* @brief remove given layer from topology, currently only layers with one input data and one output data supported
*/

View File

@ -223,6 +223,7 @@ void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& ne
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
passes->registerPass<ForbidActivationFusingPass>();
passes->registerPass<FuseMultipleIdentitiesPass>();
passes->registerPass<FuseFullyConnectedWithEltwisePass>();
legacy_pass_index = passes->run(legacy_pass_index);
}

View File

@ -62,6 +62,51 @@ std::shared_ptr<IPassManager> BasePass::getPassManager() {
return sharedMgr;
}
/**
* @brief Perform addition of two blobs values
*/
template <class T>
static void SumBlobs_t(Blob::Ptr& src_blob, Blob::Ptr& dst_blob) {
IE_ASSERT(src_blob != nullptr);
IE_ASSERT(dst_blob != nullptr);
IE_ASSERT(src_blob->size() == dst_blob->size());
IE_ASSERT(src_blob->getTensorDesc().getPrecision() == dst_blob->getTensorDesc().getPrecision());
T* src_blob_buf = src_blob->buffer().as<T*>();
T* dst_blob_buf = dst_blob->buffer().as<T*>();
std::transform(dst_blob_buf, dst_blob_buf + dst_blob->size(), src_blob_buf, dst_blob_buf, std::plus<T>());
}
static void SumBlobs(Blob::Ptr& src_blob, Blob::Ptr& dst_blob) {
IE_ASSERT(src_blob != nullptr);
switch (src_blob->getTensorDesc().getPrecision()) {
#define CASE(x) \
case x: \
return SumBlobs_t<PrecisionTrait<x>::value_type>(src_blob, dst_blob);
CASE(InferenceEngine::Precision::FP32);
CASE(InferenceEngine::Precision::FP64);
CASE(InferenceEngine::Precision::FP16);
CASE(InferenceEngine::Precision::BF16);
CASE(InferenceEngine::Precision::I4);
CASE(InferenceEngine::Precision::I8);
CASE(InferenceEngine::Precision::I16);
CASE(InferenceEngine::Precision::I32);
CASE(InferenceEngine::Precision::I64);
CASE(InferenceEngine::Precision::U4);
CASE(InferenceEngine::Precision::U8);
CASE(InferenceEngine::Precision::U16);
CASE(InferenceEngine::Precision::U32);
CASE(InferenceEngine::Precision::U64);
CASE(InferenceEngine::Precision::Q78);
CASE(InferenceEngine::Precision::BIN);
CASE(InferenceEngine::Precision::BOOL);
#undef CASE
default:
IE_THROW() << "Wrong precision specified: " << src_blob->getTensorDesc().getPrecision().name();
}
}
static Blob::Ptr convertToRWBlob(const Blob::Ptr& readOnlyBlob, const std::string& name = {}) {
auto blob = Blob::CreateFromData(std::make_shared<Data>(name, readOnlyBlob->getTensorDesc()));
blob->allocate();
@ -2171,10 +2216,6 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
return;
}
auto donotSkip = [](CNNLayerPtr) {
return false;
};
auto allowFQFuse = [this](CNNLayerPtr layer) -> bool {
auto skipNonFunctionalOrMemory = [](CNNLayerPtr layer) {
return LayerInfo(layer).isNonFunctional() || LayerInfo(layer).isMemory();
@ -2202,15 +2243,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
return;
}
auto donotSkip = [](CNNLayerPtr) {
return false;
};
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
IE_ASSERT(quantParams != nullptr);
// Find all output layers connected to FQ
auto nextLayers = CNNNetGetAllNextLayersSkipCertain(layer.get(), -1, donotSkip);
auto nextLayers = CNNNetGetAllNextLayersSkipCertain(layer.get(), -1, DoNotSkip);
if (nextLayers.empty()) {
quantParams->_src_quant.CopyStats(srcQuantParams->_dst_quant);
if (LayerInfo(layer).isNonFunctional()) {
@ -2264,7 +2301,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
continue;
}
GNAFakeQuantizeLayer fqLayer(l);
auto prevLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, donotSkip);
auto prevLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, DoNotSkip);
auto prevDataIt = std::find_if(std::begin(prevLayer->outData), std::end(prevLayer->outData), [l](DataPtr data) {
return getInputTo(data).find(l->name) != std::end(getInputTo(data));
});
@ -2307,8 +2344,8 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
// Propogate destination statistics to multiply layer if it's set for the next sum/sub layer (is considered as
// bias)
if (LayerInfo(prevLayer).isEltwiseSum() || LayerInfo(prevLayer).isEltwiseSub()) {
auto eltwPrevLayer = CNNNetPrevLayerSkipCertain(prevLayer, 0, donotSkip);
auto constLayer = CNNNetPrevLayerSkipCertain(prevLayer, 1, donotSkip);
auto eltwPrevLayer = CNNNetPrevLayerSkipCertain(prevLayer, 0, DoNotSkip);
auto constLayer = CNNNetPrevLayerSkipCertain(prevLayer, 1, DoNotSkip);
if (LayerInfo(eltwPrevLayer).isEltwise() && LayerInfo(constLayer).isConst()) {
auto quantParamsEltwLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(eltwPrevLayer);
quantParamsEltwLayer->_dst_quant.CopyStats(quantParamsPrevLayer->_dst_quant);
@ -2334,7 +2371,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
auto prevData = *prevDataIt;
// Find all output layers connected to FQ
auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip);
auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, DoNotSkip);
if (nextLayers.empty()) {
continue;
}
@ -2582,6 +2619,93 @@ void TransposeWeightsFromNCHWToNHWCPass::run() {
}
}
void FuseFullyConnectedWithEltwisePass::run() {
// This legacy pass removes the Eltwise (only if it performs SUM op) from between FC and Any.
// The blob data of Const layer attached to Eltwise is added to biases blob data of FC layer.
// Finally Const is also removed.
// Permute and Reshape layers existing between FC and Eltwise remain in the network in order
// to keep final data shape unchanged.
//
// This operation can be illustrated as follows:
//
// --
// Original: Result: Removed:
//
// FC FC (Eltwise)
// | | (Const)
// Permute Permute
// (optional) (if exists in Original)
// | |
// Reshape Reshape
// (optional) Const (if exists in Original)
// | / |
// Eltwise(sum) Any
// |
// Any (e.g. ReLU)
//--
//
// NOTE: This pass is implemented to prevent unnecessary roundtrip to memory (additional layer).
// It can be fully removed if corresponding LPT transformation is already implemented.
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FuseFullyConnectedWithEltwisePass");
auto skipPermuteAndReshape = [](CNNLayerPtr layer) {
if (IsOneDimLayer(layer)) {
if (LayerInfo(layer).isPermute() || LayerInfo(layer).isReshape()) {
return true;
}
}
return false;
};
for (auto& layer : *pLayers) {
if (!LayerInfo(layer).isFullyConnected() || !layer->outData.size()) {
continue;
}
CNNLayerPtr next = nullptr;
auto& fully_connected = layer;
// Find Eltwise skipping Premutes and Reshapes
auto eltwise = CNNNetCheckNextLayerSkipCertain(fully_connected, 0, 0, true, skipPermuteAndReshape).first;
// If the layer is not Eltwise or does not perform 'sum' operation, we should skip the pass
if (!eltwise || !LayerInfo(eltwise).isEltwiseSum()) {
continue;
}
// Get the Eltwise's input layers
CNNLayerPtr eltwise_const = nullptr;
CNNLayerPtr eltwise_input = nullptr;
for (size_t i = 0; i < eltwise->insData.size(); i++) {
// Get Eltwise's prev layer and check its kind
auto before_eltwise =
CNNNetHasPrevLayer(eltwise.get(), 0) ? CNNNetPrevLayerSkipCertain(eltwise, i, DoNotSkip) : nullptr;
if (LayerInfo(before_eltwise).isConst()) {
eltwise_const = before_eltwise;
} else {
eltwise_input = before_eltwise;
}
}
if (!eltwise_const || !eltwise_input) {
continue;
}
// Find (any) layer after Eltwise
auto any_layer = CNNNetCheckNextLayerSkipCertain(eltwise, 0, 0, true, DoNotSkip).first;
if (!any_layer) {
continue;
}
// Connect FC with layer after Eltwise (Eltwise and Const will be removed)
if (CNNRemoveAndConnect(eltwise_input, eltwise)) {
// Add data from Const "custom" blob to FC "biases" blob
auto& const_blob = eltwise_const->blobs.find("custom")->second;
auto& fc_blob = fully_connected->blobs.find("biases")->second;
SumBlobs(const_blob, fc_blob);
}
}
}
int PassManager::run(int index) {
#if defined PLOT || defined ENABLE_V7_SERIALIZE
auto dumpNetworkAfterPass = [&index, this](std::shared_ptr<Pass> pass) {

View File

@ -222,6 +222,12 @@ DECL_PASS(MoveFakeQuantizeLayerIntoQuantParams);
*/
DECL_PASS(TransposeWeightsFromNCHWToNHWC);
/**
* @brief fuse FullyConnected and Eltwise layers, also in case there is a Reshape between them having input with only
* one dimension > 1
*/
DECL_PASS(FuseFullyConnectedWithEltwise);
struct PassManagerSettings {
/// @brief whether to run passes before copy
bool runBeforeCopy;

View File

@ -0,0 +1,204 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ie_core.hpp>
#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/pass/convert_prc.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
namespace DiagonalInsertionTestNs {
using namespace ngraph;
using namespace ngraph::builder;
using namespace ngraph::element;
using namespace ngraph::op;
using namespace ngraph::opset9;
using namespace std;
using DiagonalInsertionTestParams = tuple<map<string, string>, // Configuration
vector<vector<float>> // FakeQuantize min/max params
>;
constexpr uint16_t fq_levels = numeric_limits<uint16_t>::max();
// This class performs tests on the following network:
// Params
// Const |
// | FakeQuantize
// FakeQuantize |
// | Reshape
// \ /
// MatMul
// |
// Const Reshape
// | /
// FakeQuantize /
// \ /
// Add
// |
// FakeQuantize
// |
// ReLU
// |
// Result
// The above network should cause the FuseFullyConnectedWithEltwisePass to be fired
// The final network should have only one functional layer - FullyConnected
class DiagonalInsertionTest : public testing::WithParamInterface<DiagonalInsertionTestParams>,
public LayerTestsUtils::LayerTestsCommon {
const int32_t seed = 7235346;
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
return FuncTestUtils::createAndFillBlobFloatNormalDistribution(info.getTensorDesc(), 0.0f, 0.2f, seed);
}
ParameterVector CreateInputVector(const Type& type, const vector<std::size_t>& shapes) {
return makeParams(type, {shapes});
}
shared_ptr<FakeQuantize> CreateFQNode(const Type& type,
const shared_ptr<ov::Node>& node,
float fq_min,
float fq_max,
std::size_t levels) {
//
auto fq_inp_min = makeConstant<float>(type, {1}, {fq_min});
auto fq_inp_max = makeConstant<float>(type, {1}, {fq_max});
auto fq_out_min = makeConstant<float>(type, {1}, {fq_min});
auto fq_out_max = makeConstant<float>(type, {1}, {fq_max});
return make_shared<FakeQuantize>(node, fq_inp_min, fq_inp_max, fq_out_min, fq_out_max, levels);
}
std::shared_ptr<Reshape> CreateReshapeNode(element::Type in_type,
shared_ptr<Node> input_node,
std::vector<size_t> target_shape_vect) {
//
const auto target_shape_const = Constant::create(in_type, Shape{target_shape_vect.size()}, target_shape_vect);
return std::make_shared<Reshape>(input_node, target_shape_const, false);
}
bool IsDebugEnabled(map<string, string>& configuration) {
return configuration.find("LOG_LEVEL") != configuration.end() && configuration["LOG_LEVEL"] == "LOG_DEBUG";
}
public:
static string getTestCaseName(testing::TestParamInfo<DiagonalInsertionTestParams> obj) {
map<string, string> configuration;
vector<vector<float>> fq_min_max;
tie(configuration, fq_min_max) = obj.param;
ostringstream result;
for (auto const& config_item : configuration) {
result << "_configItem=" << config_item.first << ":" << config_item.second;
}
for (auto const& fq : fq_min_max) {
result << "_fqMin=" << fq[0] << "_fqMax=" << fq[1];
}
return result.str();
}
protected:
void SetUp() override {
// Loosen threshold because of precision decrease during test
threshold = 0.1;
targetDevice = CommonTestUtils::DEVICE_GNA;
const size_t height = 512;
const size_t width = 1024;
const auto precision = ::ngraph::element::Type_t::f32;
const vector<std::size_t> input_shape = {width};
// Receive test params
vector<vector<float>> fq_min_max;
tie(configuration, fq_min_max) = this->GetParam();
// Create network
auto input_vect = makeParams(precision, {input_shape});
auto input_fq = CreateFQNode(precision, input_vect[0], fq_min_max[0][0], fq_min_max[0][1], fq_levels);
auto reshape = CreateReshapeNode(ngraph::element::Type_t::i32, input_fq, {width, 1});
auto mm_const = makeConstant<float>(precision, {height, width}, {}, true);
auto mm_const_fq = CreateFQNode(precision, mm_const, fq_min_max[1][0], fq_min_max[1][1], fq_levels);
auto matmul = makeMatMul(mm_const_fq, reshape);
auto matmul_fq = CreateFQNode(precision, matmul, fq_min_max[2][0], fq_min_max[2][1], fq_levels);
auto add_mm_reshape = CreateReshapeNode(ngraph::element::Type_t::i32, matmul, {height});
auto add_const = makeConstant<float>(precision, {height}, {}, true);
auto add_const_fq = CreateFQNode(precision, add_const, fq_min_max[3][0], fq_min_max[3][1], fq_levels);
auto add = make_shared<Add>(add_const_fq, add_mm_reshape);
auto add_fq = CreateFQNode(precision, add, fq_min_max[4][0], fq_min_max[4][1], fq_levels);
auto relu = make_shared<Relu>(add_fq);
function = make_shared<ngraph::Function>(relu, input_vect, "DiagonalInsertion");
}
};
TEST_P(DiagonalInsertionTest, CompareWithRefs) {
Run();
};
const vector<map<string, string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_PRECISION", "I16"},
{"GNA_EXEC_TARGET", "GNA_TARGET_3_5"},
},
};
vector<vector<float>> fq_mm1 = {{-19.38653564453125, 19.38653564453125},
{-4.872922897338867, 4.872922897338867},
{-633.115478515625, 633.115478515625},
{-3.2157254219055176, 3.2157254219055176},
{-633.0288696289062, 633.0288696289062}};
vector<vector<float>> fq_mm2 = {{-1.38653564453125, 1.38653564453125},
{-0.872922897338867, 0.872922897338867},
{-63.115478515625, 63.115478515625},
{-0.2157254219055176, 0.2157254219055176},
{-63.0288696289062, 63.0288696289062}};
vector<vector<float>> fq_mm3 = {{-0.1938653564453125, 0.1938653564453125},
{-0.04872922897338867, 0.04872922897338867},
{-6.33115478515625, 6.33115478515625},
{-0.032157254219055176, 0.032157254219055176},
{-6.330288696289062, 6.330288696289062}};
vector<vector<float>> fq_mm4 = {{-4.38653564453125, 4.38653564453125},
{-48.72922897338867, 48.72922897338867},
{-3.115478515625, 3.115478515625},
{-32.157254219055176, 32.157254219055176},
{-30.0288696289062, 30.0288696289062}};
vector<vector<float>> fq_mm5 = {{-390.38653564453125, 390.38653564453125},
{-400.872922897338867, 400.872922897338867},
{-633.115478515625, 633.115478515625},
{-399.2157254219055176, 399.2157254219055176},
{-633.0288696289062, 633.0288696289062}};
vector<vector<vector<float>>> fq_min_max = {fq_mm1, fq_mm2, fq_mm3, fq_mm4, fq_mm5};
INSTANTIATE_TEST_SUITE_P(smoke_DiagonalInsertion,
DiagonalInsertionTest,
::testing::Combine(::testing::ValuesIn(configs), ::testing::ValuesIn(fq_min_max)),
DiagonalInsertionTest::getTestCaseName);
} // namespace DiagonalInsertionTestNs