Files
openvino/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
Ilya Lavrenov 4f0225014d Deprecated cnn layer (#1138)
* Deprecated getInputTo, getCreatorLayer

* Fixes

* Fixed ie_layers moving to legacy

* Fixed onnx importer dependency

* Fixed python

* Fix python API compilation

* Added comments not to use _impl from Data

Co-authored-by: Nadezhda Ageeva <nadezhda.ageeva@intel.com>
2020-07-03 20:57:28 +03:00

113 lines
4.4 KiB
C++

// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <utility>
#include <string>
#include "layer_transform.hpp"
#include "gna_graph_tools.hpp"
#include "details/ie_cnn_network_tools.h"
#include "layer_quantizer.hpp"
#include "scale_factor_calc.hpp"
#include "weights_converter.hpp"
namespace GNAPluginNS {
/**
* Quantize entire cnn - network
* @tparam T - type trait for weights and biases
*/
template<class T>
class ModelQuantizer {
public:
InferenceEngine::ICNNNetwork::Ptr quantize(InferenceEngine::ICNNNetwork &model, float scaleFactor) const {
return quantize(model, [](InferenceEngine::CNNNetPtr &, bool runBeforeCopy){}, std::vector<float>({scaleFactor}));
}
template <class PreQuantisationCb>
InferenceEngine::ICNNNetwork::Ptr quantize(InferenceEngine::ICNNNetwork &model, const PreQuantisationCb &cb, float scaleFactor) const {
return quantize(model, cb, std::vector<float>({scaleFactor}));
}
InferenceEngine::ICNNNetwork::Ptr quantize(InferenceEngine::ICNNNetwork &model, std::vector<float> scaleFactor) const {
return quantize(model, [](InferenceEngine::CNNNetPtr &, bool runBeforeCopy){}, scaleFactor);
}
template <class PreQuantisationCb>
InferenceEngine::ICNNNetwork::Ptr quantize(InferenceEngine::ICNNNetwork &model, const PreQuantisationCb &cb, std::vector<float> scaleFactor) const {
auto visitor = [&](InferenceEngine::CNNLayerPtr lp) {
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
transformLayer(newLayer, WeightsConverter());
return newLayer;
};
auto copiedNet = InferenceEngine::CNNNetCopy(model);
cb(copiedNet, true);
IE_ASSERT(copiedNet.get() != nullptr);
copiedNet = InferenceEngine::CNNNetCopy(*copiedNet, visitor);
// TODO: probably not the best way of using dynamic cast in order to transform Precision
// one of solution is to create not copyNet overloads, that accepts 2 functors, one for layer copy
// and another one for net copy
auto rawNet = dynamic_cast<InferenceEngine::details::CNNNetworkImpl *>(copiedNet.get());
// allow client code to access copied topology, to avoid copies if user would like to chain quantisation with
// another preprocessing
cb(copiedNet, false);
if (scaleFactor.empty()) {
THROW_GNA_EXCEPTION << "Scale factor is empty";
}
LayersQuantizer<T> lc(*scaleFactor.begin());
auto sortedNewNet = InferenceEngine::details::CNNNetSortTopologically(*copiedNet.get());
gnalog() << "Sorted layers: " << std::endl;
for (auto &&layer : sortedNewNet) {
gnalog() << layer->name << std::endl;
}
/// filling scale factors for input layers, memory layers will have scaleFactor of 1.0 by default
InferenceEngine::InputsDataMap dm;
copiedNet->getInputsInfo(dm);
int scaleIndex = 0;
for (auto &&inputData : dm) {
auto inputLayer = getCreatorLayer(inputData.second->getInputData()).lock();
auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
if (scaleFactor.size() <= scaleIndex) {
THROW_GNA_EXCEPTION << "Scale factors are not set for some of the inputs";
}
IE_ASSERT(quantData != nullptr);
quantData->_src_quant.scale = scaleFactor[scaleIndex];
scaleIndex++;
}
propagateScaleFactor(sortedNewNet, T::mandatory().getWeightsPrecision().size());
// sorted order gives possibility for propagate quantisation along depended layers
for (auto &&layer : sortedNewNet) {
transformLayer(layer, lc);
}
return copiedNet;
}
private :
void propagateScaleFactor(std::vector<InferenceEngine::CNNLayerPtr> & net, int weightsBytesSize) const {
ScaleFactorCalculator sf(net, weightsBytesSize);
while (!sf.allLayersProcessed()) {
for (auto &&layer : sf.getStartLayers()) {
transformLayer(layer, sf);
// transforming until we reached cases where output scale updated due to situation in downstream layer
if (sf.needToRestart()) {
break;
}
}
}
}
};
} // namespace GNAPluginNS