[GNA] Improved accuracy on model after Accuracy Aware (#7576)

* improved accuracy on model after Accuracy Aware

* refactoring+test

* removed unnessary FakeQuantI8/I16

* added comments, moved fake_quantized from UpdateInputScaleFromNetwork(), removed _Np template param from QuantDescTmpl
This commit is contained in:
Dmitrii Khurtin
2021-09-28 11:34:09 +03:00
committed by GitHub
parent acc14c6469
commit a3dfa980ad
10 changed files with 276 additions and 125 deletions

View File

@@ -16,7 +16,6 @@ struct GNAFlags {
float pwlMaxErrorPercent = 1.0f;
bool gna_openmp_multithreading = false;
bool sw_fp32 = false;
bool fake_quantized = false;
bool performance_counting = false;
bool input_low_precision = false;
};

View File

@@ -39,14 +39,12 @@ struct QuantDescTmpl {
InferenceEngine::TPrecision<Op> _Op;
InferenceEngine::TPrecision<Wp> _Wp;
InferenceEngine::TPrecision<Bp> _Bp;
InferenceEngine::TPrecision<Np> _Np;
QuantDescTmpl() = default;
QuantDescTmpl(InferenceEngine::TPrecision<Ip> _Ip,
InferenceEngine::TPrecision<Op> _Op,
InferenceEngine::TPrecision<Wp> _Wp,
InferenceEngine::TPrecision<Bp> _Bp,
InferenceEngine::TPrecision<Np> _Np) : _Op(_Op), _Ip(_Ip), _Wp(_Wp), _Bp(_Bp), _Np(_Np) {
InferenceEngine::TPrecision<Bp> _Bp) : _Op(_Op), _Ip(_Ip), _Wp(_Wp), _Bp(_Bp) {
}
InferenceEngine::Precision getInputPrecision() const {
@@ -58,9 +56,6 @@ struct QuantDescTmpl {
InferenceEngine::Precision getBiasesPrecision() const {
return _Bp;
}
InferenceEngine::Precision getNetPrecision() const {
return _Np;
}
InferenceEngine::Precision getOutputPrecision() const {
return _Op;
}
@@ -74,23 +69,16 @@ typename InferenceEngine::PrecisionTrait<InferenceEngine::Precision::X>::value_t
struct QuantI16 : public QuantDescTmpl<PRECISION_TYPE(I16, I32, I16, I32, MIXED)> {
QuantI16() {
_Np = InferenceEngine::Precision::MIXED;
}
};
struct QuantI8 : public QuantDescTmpl<P_TYPE(I16), P_TYPE(I32), P_TYPE(I8), gna_compound_bias_t, P_TYPE(MIXED)> {
QuantI8() {
_Np = InferenceEngine::Precision::MIXED;
}
};
// Low precision path quantizer (I8 inputs, weights, biases)
struct QuantI8_I8 : public QuantDescTmpl<PRECISION_TYPE(I8, I32, I8, I8, MIXED)> {
QuantI8_I8() {
_Np = InferenceEngine::Precision::MIXED;
}
};
// for support proper trait instantiation for quantization function callback
struct FakeQuant : public QuantDescTmpl<P_TYPE(I16), P_TYPE(I32), P_TYPE(MIXED), P_TYPE(MIXED), P_TYPE(MIXED)> {
};
struct FakeQuantI16 : public QuantI16 {};
struct FakeQuantI8 : public QuantI8 {};
@@ -654,9 +642,24 @@ class DataQuantizer<Desc, InferenceEngine::WeightableLayer *> : public DataQuant
public:
explicit DataQuantizer(float scaleFactor) : DataQuantizerBase(scaleFactor) {}
bool operator()(InferenceEngine::WeightableLayer *wl) const {
quantizeWeightsBiases<typename Desc::MandatoryType>(Desc::mandatory(), wl, Quant<typename Desc::MandatoryType>());
(*this)(wl, typename Desc::MandatoryType());
return true;
}
template<typename T>
void operator()(InferenceEngine::WeightableLayer *wl, const T&) const {
quantizeWeightsBiases<T>(T(), wl, Quant<T>());
}
void operator()(InferenceEngine::WeightableLayer *wl, const FakeQuant&) const {
auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*wl);
IE_ASSERT(quantData->_weights_quant.IsStatsSet());
if (quantData->_weights_quant.GetLevels() <= std::numeric_limits<uint8_t>::max()) {
quantizeWeightsBiases<FakeQuantI8>(FakeQuantI8(), wl, Quant<FakeQuantI8>());
} else {
quantizeWeightsBiases<FakeQuantI16>(FakeQuantI16(), wl, Quant<FakeQuantI16>());
}
}
};
template<class Desc>
@@ -691,13 +694,18 @@ class LayersQuantizer : public frontend::DataQuantizerBase {
}
};
/*
* Major of layers will be executed in I16 mode
* most of auto generated primitives like one for aligning support
* GNA 1.0, 2.0 doesnt support I8 for convolution layer.
* Some layers will be switched into I16 mode to not lose accuracy while memory and
* runtime performance of layers like scaleshifts still OK since it is O(N).
*/
using QuantI16 = frontend::QuantPair<frontend::QuantI16, frontend::QuantI16>;
using QuantI8 = frontend::QuantPair<frontend::QuantI8, frontend::QuantI16>;
using QuantI8_I8 = frontend::QuantPair<frontend::QuantI8_I8, frontend::QuantI8_I8>;
using FakeQuantI16 = frontend::QuantPair<frontend::FakeQuantI16, frontend::FakeQuantI16>;
using FakeQuantI8 = frontend::QuantPair<frontend::FakeQuantI8, frontend::FakeQuantI16>;
using FakeQuant = frontend::QuantPair<frontend::FakeQuant, frontend::FakeQuantI16>;
enum class QuantizedDataType {
input,

View File

@@ -83,9 +83,7 @@ class ModelQuantizer {
scaleIndex++;
}
bool isFakeQuantize = std::is_same<T, FakeQuantI8>() || std::is_same<T, FakeQuantI16>();
propagateScaleFactor(sortedNewNet, T::mandatory().getWeightsPrecision().size(), T::optional().getWeightsPrecision().size(),
T::mandatory().getInputPrecision().size(), isFakeQuantize);
propagateScaleFactor(sortedNewNet);
// sorted order gives possibility for propagate quantisation along depended layers
for (auto &&layer : sortedNewNet) {
@@ -96,9 +94,8 @@ class ModelQuantizer {
}
private :
void propagateScaleFactor(std::vector<InferenceEngine::CNNLayerPtr> & net, int mandWeightsBytesSize,
int optWeightsBytesSize, int inputsBytesSize, bool fakeQuantize) const {
ScaleFactorCalculator sf(net, mandWeightsBytesSize, optWeightsBytesSize, inputsBytesSize, fakeQuantize);
void propagateScaleFactor(std::vector<InferenceEngine::CNNLayerPtr> & net) const {
ScaleFactorCalculator<T> sf(net);
int infiniteLoopCount = 0;
std::vector<std::string> infiniteLoopPattern;

View File

@@ -22,6 +22,10 @@
#include "round_float_define.hpp"
namespace GNAPluginNS {
template<typename QUANT_DESC>
class ScaleFactorCalculator;
namespace frontend {
static const float MIN_SEARCH_WEIGHTS_VAL = 1.0f;
static const float MAX_SEARCH_WEIGHTS_VAL = 1024.0f;
@@ -133,7 +137,8 @@ static float selectBestWeightsScaleFactors(float inScale, float outScale, std::v
for (size_t j = 0; j < slopes.size(); ++j) {
auto s = gna_slope(slopes[j], inScale * weightScale, outScale);
auto slope = static_cast<uint32_t>(s.slope * s.slope_scale);
if (slope < static_cast<uint32_t>(std::numeric_limits<int16_t>::min()) && slope > static_cast<uint32_t>(std::numeric_limits<int16_t>::max())) {
if (slope < static_cast<uint32_t>(std::numeric_limits<int16_t>::min()) &&
slope > static_cast<uint32_t>(std::numeric_limits<int16_t>::max())) {
sd += std::numeric_limits<int8_t>::max();
continue;
}
@@ -206,24 +211,23 @@ static double calculateWeightsReducerFromDstStats(Quantization dst_quant) {
* @brief calculates output scale factor per layer
* @tparam T
*/
template<class T>
template<typename T, typename QUANT_DESC>
class ScaleFactorPerLayer {
public:
/**
* @brief calculates weights scale factor to fit dynamic range into target bitsize,
* also calculates output scale factor for the given layer
* @param cnnLayer
* @param weightsSize
* @param result
* @return
*/
bool operator()(T cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, bool fakeQuantize, int infiniteLoopCount) {
bool operator()(T cnnLayer, ScaleFactorUpdateResult &result, int infiniteLoopCount) {
return false;
}
};
template<>
class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
template<typename QUANT_DESC>
class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
private :
const float activation_scale_factor = 2048.f;
const float low_prec_activation_scale_factor = 4.f;
@@ -450,11 +454,14 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
}
public :
bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, bool fakeQuantize,
int infiniteLoopCount) {
bool operator()(InferenceEngine::CNNLayer *cnnLayer, ScaleFactorUpdateResult &result, int infiniteLoopCount) {
if ( !cnnLayer ) {
IE_THROW() << "Incorrect Convolutional Layer pointer \n";
}
int inputsSize = ScaleFactorCalculator<QUANT_DESC>::GetInputsBytesSize();
bool fakeQuantize = ScaleFactorCalculator<QUANT_DESC>::IsFakeQuantize();
LayerInfo layerInfo(*cnnLayer);
// TODO: current approach set input scale factor for true input layer(s) equals to provided factor,
auto quant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer);
@@ -656,8 +663,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
}
};
template<>
class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
template<typename QUANT_DESC>
class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*, QUANT_DESC> {
private:
bool requantizeEltwiseInput(InferenceEngine::EltwiseLayer* eltwiseLayer, uint8_t inputIx, int16_t maxValue,
bool fakeQuantize, ScaleFactorUpdateResult &result) {
@@ -726,11 +733,12 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
}
public:
bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, ScaleFactorUpdateResult &result, int infiniteLoopCount) {
if ( !eltwiseLayer ) {
THROW_GNA_EXCEPTION << "Incorrect Eltwise Layer pointer \n";
}
int inputsSize = ScaleFactorCalculator<QUANT_DESC>::GetInputsBytesSize();
bool fakeQuantize = ScaleFactorCalculator<QUANT_DESC>::IsFakeQuantize();
bool lowPrecision = (inputsSize == sizeof(int8_t));
auto in0 = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, 0);
@@ -836,15 +844,16 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
}
};
template<>
class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
template<typename QUANT_DESC>
class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*, QUANT_DESC> {
public:
bool operator()(InferenceEngine::ConcatLayer* concatLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
bool operator()(InferenceEngine::ConcatLayer* concatLayer, ScaleFactorUpdateResult &result, int infiniteLoopCount) {
if ( !concatLayer ) {
THROW_GNA_EXCEPTION << "Incorrect Concat Layer pointer \n";
}
bool fakeQuantize = ScaleFactorCalculator<QUANT_DESC>::IsFakeQuantize();
if (concatLayer->insData.size() < 2) {
THROW_GNA_EXCEPTION << "Concat layer has unsupported number of incoming layers.";
}
@@ -1061,8 +1070,8 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
}
};
template<>
class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
template<typename QUANT_DESC>
class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*, QUANT_DESC> {
private:
std::vector<std::tuple<uint16_t const, float const, float const>> thresholds {
// tuple values: scale factor threshold, scale factor reduction factor for I16 precision, for I8 precision
@@ -1074,14 +1083,15 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
};
public:
bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
bool operator()(InferenceEngine::WeightableLayer *wl, ScaleFactorUpdateResult &result, int infiniteLoopCount) {
if ( !wl ) {
THROW_GNA_EXCEPTION << "Incorrect Weightable Layer pointer \n";
} else if (!wl->_weights) {
THROW_GNA_EXCEPTION << "Incorrect weight value for " << wl->name << ":" << wl->type << "\n";
}
int inputsSize = ScaleFactorCalculator<QUANT_DESC>::GetInputsBytesSize();
bool fakeQuantize = ScaleFactorCalculator<QUANT_DESC>::IsFakeQuantize();
auto prevLayer = CNNNetPrevLayer(wl);
auto quantDataForInputLayer =
InferenceEngine::getInjectedData<QuantizedLayerParams>(*InferenceEngine::CNNNetPrevLayer(wl).get());
@@ -1111,6 +1121,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
}
// TODO: pass 8 bits somehow
int weightsSize = ScaleFactorCalculator<QUANT_DESC>::GetMandatoryWeightsBytesSize(wl);
if (!quant->_weights_quant.IsScaleSet()) {
size_t scaleRange = 0;
if (weightsSize == 2) {
@@ -1217,19 +1228,20 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
}
};
template<>
class ScaleFactorPerLayer<InferenceEngine::ScaleShiftLayer*> : public ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
template<typename QUANT_DESC>
class ScaleFactorPerLayer<InferenceEngine::ScaleShiftLayer*, QUANT_DESC> :
public ScaleFactorPerLayer<InferenceEngine::WeightableLayer*, QUANT_DESC> {
};
template<>
class ScaleFactorPerLayer<InferenceEngine::ConvolutionLayer*> : public ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
template<typename QUANT_DESC>
class ScaleFactorPerLayer<InferenceEngine::ConvolutionLayer*, QUANT_DESC> :
public ScaleFactorPerLayer<InferenceEngine::WeightableLayer*, QUANT_DESC> {
};
template<>
class ScaleFactorPerLayer<InferenceEngine::GemmLayer*> {
template<typename QUANT_DESC>
class ScaleFactorPerLayer<InferenceEngine::GemmLayer*, QUANT_DESC> {
public:
bool operator() (InferenceEngine::GemmLayer* gemmLayer, int weightsSize, int inputSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
bool operator() (InferenceEngine::GemmLayer* gemmLayer, ScaleFactorUpdateResult &result, int infiniteLoopCount) {
if ( !gemmLayer ) {
THROW_GNA_EXCEPTION << "Incorrect Gemm Layer pointer \n";
}
@@ -1278,21 +1290,16 @@ public:
* @brief scale factor calculator will calculate only output scale factors for the layer
* if scale factor propagation not possible, it will fall indicate a restart condition
*/
template<typename QUANT_DESC>
class ScaleFactorCalculator {
using Cnt = std::vector<InferenceEngine::CNNLayerPtr>;
Cnt net;
mutable Cnt::const_iterator idx;
mutable bool needRestart = false;
int mandWeightsBytesSize;
int optWeightsBytesSize;
bool isFakeQuantize;
int inputsBytesSize;
int infiniteLoopCount = 0;
public:
ScaleFactorCalculator(Cnt &net, int mandWeightsBytesSize, int optWeightsBytesSize, int inputsBytesSize, bool fakeQuantize)
: net(net), mandWeightsBytesSize(mandWeightsBytesSize), optWeightsBytesSize(optWeightsBytesSize),
inputsBytesSize(inputsBytesSize), isFakeQuantize(fakeQuantize) {
ScaleFactorCalculator(Cnt &net) : net(net) {
idx = std::begin(this->net);
}
bool needToRestart() const {
@@ -1311,13 +1318,7 @@ class ScaleFactorCalculator {
bool operator()(T ptr) const {
needRestart = false;
frontend::ScaleFactorUpdateResult result;
auto weightsBytesSize = mandWeightsBytesSize;
if (LayerInfo(ptr).isConvolution() || LayerInfo(ptr).isScaleShift()) {
weightsBytesSize = optWeightsBytesSize;
}
if (!frontend::ScaleFactorPerLayer<T>()(ptr, weightsBytesSize, inputsBytesSize, result, isFakeQuantize, infiniteLoopCount)) {
if (!frontend::ScaleFactorPerLayer<T, QUANT_DESC>()(ptr, result, infiniteLoopCount)) {
return false;
}
if (result) {
@@ -1337,6 +1338,39 @@ class ScaleFactorCalculator {
needRestart = true;
return true;
}
};
template<class T>
static int GetMandatoryWeightsBytesSize(T ptr) {
auto info = LayerInfo(ptr);
if (info.isConvolution() || info.isScaleShift()) {
return GetOptionalWeightsBytesSize();
}
if (IsFakeQuantize()) {
auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*ptr);
if (quantData->_weights_quant.IsStatsSet()) {
if (quantData->_weights_quant.GetLevels() <= std::numeric_limits<uint8_t>::max()) {
return frontend::FakeQuantI8().getWeightsPrecision().size();
} else {
return frontend::FakeQuantI16().getWeightsPrecision().size();
}
}
}
return QUANT_DESC::mandatory().getWeightsPrecision().size();
}
static int GetOptionalWeightsBytesSize() {
return QUANT_DESC::optional().getWeightsPrecision().size();
}
static int GetInputsBytesSize() {
return QUANT_DESC::mandatory().getInputPrecision().size();
}
static bool IsFakeQuantize() {
return std::is_same<QUANT_DESC, FakeQuant>();
}
}; // class ScaleFactorCalculator
} // namespace GNAPluginNS

View File

@@ -429,43 +429,7 @@ void GNAPlugin::InitGNADevice() {
graphCompiler.setGNAMemoryPtr(gnamem);
}
void GNAPlugin::UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork & network) {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateGnaQuantModeFromNetwork");
// fp32 emulation mode dont need any modifications to configuration
if (config.gnaFlags.sw_fp32) return;
// search for FQ layers
// only supports cases of int16 or int8
auto it = details::CNNNetworkIterator(network), end = details::CNNNetworkIterator();
for (; it != end; it++) {
if (!LayerInfo(*it).isFakeQuantize()) {
continue;
}
GNAFakeQuantizeLayer fqLayer(*it);
auto inputLayer = fqLayer.getInputLayer();
// this fake quantize represents data quantization - not weights
if (!LayerInfo(inputLayer).isConst()) {
continue;
}
// also in mixed mode i8 should be stated as target precision
if (fqLayer.getLevels() <= std::numeric_limits<uint8_t>::max()) {
config.gnaPrecision = InferenceEngine::Precision::I8;
} else if (fqLayer.getLevels() <= std::numeric_limits<uint16_t>::max()) {
config.gnaPrecision = InferenceEngine::Precision::I16;
} else {
THROW_GNA_LAYER_EXCEPTION(*it)
<< "unsupported quantisation scheme: number of levels is " << fqLayer.getLevels() << " while only up to "
<< std::numeric_limits<uint16_t>::max() << " is supported";
}
gnaFlags->fake_quantized = true;
config.gnaFlags.fake_quantized = true;
}
}
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & network) {
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network) {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateInputScaleFromNetwork");
// fp32 emulation mode dont need any modifications to configuration
if (config.gnaFlags.sw_fp32) return;
@@ -480,6 +444,7 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
if (!LayerInfo(nextToInputLayer.second).isFakeQuantize()) {
continue;
}
// replacing scale factor from this fq layer
GNAFakeQuantizeLayer fqLayer(nextToInputLayer.second);
auto inputRange = fqLayer.getInputRange();
@@ -714,12 +679,13 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
}
bool isNgraphPassesUsed = false;
bool fake_quantized = false;
if (_network.getFunction()) {
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
const auto& graph = clonedNetwork.getFunction();
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
fake_quantized = ngraph::op::util::has_op_with_type<ngraph::opset7::FakeQuantize>(graph);
// WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
manager.register_pass<ngraph::pass::ConvertPriorBox>();
manager.register_pass<ngraph::pass::CommonOptimizations>();
@@ -783,9 +749,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
THROW_GNA_EXCEPTION << error.c_str();
}
// FQ networks now replaces certain flags in the plugin - flags will'be owerritten
UpdateGnaQuantModeFromNetwork(network);
UpdateInputScaleFromNetwork(network);
if (fake_quantized) {
UpdateInputScaleFromNetwork(network);
}
// Set input and output information from orginal network
UpdateInputsAndOutputsInfoFromNetwork(network);
@@ -849,19 +815,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
// to run all passes need to have two calls to pass manager
run_passes(newNet, true, gnaFlags->input_low_precision);
run_passes(newNet, false, gnaFlags->input_low_precision);
} else if (gnaFlags->fake_quantized) {
switch (config.gnaPrecision) {
case Precision::I16:
ModelQuantizer<FakeQuantI16> q16;
newNet = q16.quantize(network, run_passes, inputsDesc->inputScaleFactors);
break;
case Precision::I8:
ModelQuantizer<FakeQuantI8> q8;
newNet = q8.quantize(network, run_passes, inputsDesc->inputScaleFactors);
break;
default:
THROW_GNA_EXCEPTION << "unsupported GNA precision for quantisation: " << config.gnaPrecision;
}
} else if (fake_quantized) {
ModelQuantizer<FakeQuant> modelQuantizer;
newNet = modelQuantizer.quantize(network, run_passes, inputsDesc->inputScaleFactors);
} else {
switch (config.gnaPrecision) {
case Precision::I16:

View File

@@ -211,8 +211,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
int idx = 0);
void UpdateFieldsFromConfig();
void UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork &);
void UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork &);
void UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network);
void UpdateInputsAndOutputsInfoFromNetwork(InferenceEngine::CNNNetwork &);
/**
* @brief Tries to init an output on the base of a layer data

View File

@@ -0,0 +1,30 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "common_test_utils/test_constants.hpp"
#include "subgraph_tests/fq_with_mixed_levels.hpp"
namespace SubgraphTestsDefinitions {
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_FqWithMixedLevelsTest, FqWithMixedLevelsTest,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs)),
FqWithMixedLevelsTest::getTestCaseName);
} // namespace
} // namespace SubgraphTestsDefinitions

View File

@@ -0,0 +1,18 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#ifndef TEST_FQ_WITH_MIXED_LEVELS_HPP
#define TEST_FQ_WITH_MIXED_LEVELS_HPP
#include "shared_test_classes/subgraph/fq_with_mixed_levels.hpp"
namespace SubgraphTestsDefinitions {
TEST_P(FqWithMixedLevelsTest, CompareWithRefImpl) {
Run();
};
} // namespace SubgraphTestsDefinitions
#endif // TEST_FQ_WITH_MIXED_LEVELS_HPP

View File

@@ -0,0 +1,36 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#ifndef FQ_WITH_MIXED_LEVELS_HPP
#define FQ_WITH_MIXED_LEVELS_HPP
#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
namespace SubgraphTestsDefinitions {
typedef std::tuple<
InferenceEngine::Precision, // Network Precision
std::string, // Target Device
std::map<std::string, std::string> // Configuration
> FqWithMixedLevelsParams;
class FqWithMixedLevelsTest : public testing::WithParamInterface<FqWithMixedLevelsParams>,
public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<FqWithMixedLevelsParams>& obj);
protected:
void SetUp() override;
};
} // namespace SubgraphTestsDefinitions
#endif // FQ_WITH_MIXED_LEVELS_HPP

View File

@@ -0,0 +1,74 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/subgraph/fq_with_mixed_levels.hpp"
#include "ngraph_functions/builders.hpp"
namespace SubgraphTestsDefinitions {
std::string FqWithMixedLevelsTest::getTestCaseName(const testing::TestParamInfo<FqWithMixedLevelsParams>& obj) {
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::tie(netPrecision, targetDevice, configuration) = obj.param;
std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice;
for (auto const& configItem : configuration) {
result << "_configItem=" << configItem.first << "_" << configItem.second;
}
return result.str();
}
void FqWithMixedLevelsTest::SetUp() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> tempConfig;
std::tie(netPrecision, targetDevice, tempConfig) = this->GetParam();
configuration.insert(tempConfig.begin(), tempConfig.end());
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto unit = [=](const std::shared_ptr<ngraph::Node>& input,
const std::vector<std::vector<size_t>>& shapes,
float weights_min, float weights_max,
size_t level1, const std::vector<std::vector<float>>& data1,
size_t level2, const std::vector<std::vector<float>>& data2,
size_t level3, const std::vector<std::vector<float>>& data3) {
auto sigmoid = std::make_shared<ngraph::opset7::Sigmoid>(input);
auto fake1 = ngraph::builder::makeFakeQuantize(sigmoid, ngPrc, level1, { 1 }, data1[0], data1[1], data1[2], data1[3]);
std::vector<float> weights = CommonTestUtils::generate_float_numbers(shapes[1][0] * shapes[1][1], weights_min, weights_max);
auto constant = std::make_shared<ngraph::opset7::Constant>(ngPrc, ngraph::Shape{shapes[1][0], shapes[1][1]}, weights);
auto fake2 = ngraph::builder::makeFakeQuantize(constant, ngPrc, level2, { 1 }, data2[0], data2[1], data2[2], data2[3]);
auto matmul = ngraph::builder::makeMatMul(fake1, fake2, false, true);
auto bias = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{shapes[0][0], shapes[1][0]}, std::vector<float>{ 1.0 });
auto add = ngraph::builder::makeEltwise(matmul, bias, ngraph::helpers::EltwiseTypes::ADD);
return ngraph::builder::makeFakeQuantize(add, ngPrc, level3, { 1 }, data3[0], data3[1], data3[2], data3[3]);
};
auto params = ngraph::builder::makeParams(ngPrc, {{ 1, 8 }});
auto input = ngraph::builder::makeFakeQuantize(params[0], ngPrc, std::numeric_limits<uint32_t>::max(), { 1 },
{ -10. }, { 10. }, { -10. }, { 10. });
input = unit(input,
{{1, 8}, {8, 8}},
-20., 20.,
std::numeric_limits<uint16_t>::max(), {{ -1.0 }, { 1.0 }, { -1.0 }, { 1.0 }},
std::numeric_limits<uint8_t>::max(), {{ -2.5 }, { 2.5 }, { -2.5 }, { 2.5 }},
std::numeric_limits<uint32_t>::max(), {{ -5. } , { 5. }, { -5. }, { 5. }});
input = unit(input,
{{ 1, 8 }, { 8, 8 }},
-13., 13.,
std::numeric_limits<uint16_t>::max(), {{ -1.0 }, { 1.0 }, { -1.0 }, { 1.0 }},
std::numeric_limits<uint16_t>::max(), {{ -2.5 }, { 2.5 }, { -2.5 }, { 2.5 }},
std::numeric_limits<uint32_t>::max(), {{ -5. } , { 5. }, { -5. }, { 5. }});
input = unit(input,
{{1, 8}, {8, 8}},
-20., 20.,
std::numeric_limits<uint16_t>::max(), {{ -1.0 }, { 1.0 }, { -1.0 }, { 1.0 }},
std::numeric_limits<uint8_t>::max(), {{ -2.5 }, { 2.5 }, { -2.5 }, { 2.5 }},
std::numeric_limits<uint32_t>::max(), {{ -5. } , { 5. }, { -5. }, { 5. }});
auto result = std::make_shared<ngraph::opset7::Result>(input);
function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, params, "FqWithMixedLevelsTest");
}
} // namespace SubgraphTestsDefinitions