[IE CLDNN] Restored optional conversion of quantized model to fp32 (#4485)

This commit is contained in:
Vladimir Paramuzov 2021-03-03 09:28:53 +03:00 committed by GitHub
parent ebe63f557c
commit 92d750747c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -330,6 +330,11 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
using namespace ngraph::pass::low_precision;
ngraph::pass::Manager manager;
// Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
// With this key users can work-around such issues
if (!config.enable_fp16_for_quantized_models) {
manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32);
}
auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>();
const std::vector<ngraph::element::Type> supportedTypes = { ngraph::element::i8, ngraph::element::u8 };
lptPrerequisites->add_matcher<PullReshapeThroughDequantization>(supportedTypes);