[IE CLDNN] Improve network outputs detection in quantized FP16+INT8 IR to avoid converting them to FP16 precision (#3407)

2020-12-01 11:27:51 +01:00 · 2020-12-01 11:27:51 +01:00 · 4a91f914e2
commit 4a91f914e2
parent c902eb5df7
1 changed files with 40 additions and 8 deletions
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@ -405,6 +405,8 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cld
        }
    }

+    OutputsDataMap outputsMap = network.getOutputsInfo();
+
    // [WA part2] Try to find non-quantized layers and convert them back to FP16
    if (config.enableInt8) {
        if (fqFound && baselineIsFP16 && config.enable_fp16_for_quantized_models) {
@ -417,14 +419,42 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cld
                if (layer->outData.empty() || layer->insData.empty())
                    continue;

-                auto canReduceOutputPrecision = [](const CNNLayerPtr& l) -> bool {
-                    auto type = LayerTypeFromStr(l->type);
-                    // Don't do conversion for outputs
-                    auto next = GetNextLayers(l);
-                    if (next.empty()) {
-                        return false;
+                auto isOutputLayer = [](const CNNLayerPtr& l, const OutputsDataMap& networkOutputs) -> bool {
+                    bool is_output = false;
+
+                    if (GetNextLayers(l).empty())
+                        is_output = true;
+
+                    // Condition above is not enough, as network output layer
+                    // can still be used in other parts of the graph
+                    // (e.g. 1st output form TopK primitive may become network output
+                    // while 2nd output from the same primitive may still be used
+                    // in the graph).
+                    if (!is_output) {
+                        for (auto layerOutput : l->outData) {
+                            for (auto networkOutput : networkOutputs) {
+                               if (layerOutput->getName() == networkOutput.second->getName()) {
+                                   is_output = true;
+                                   break;
+                               }
+                            }
+
+                            if (is_output)
+                                break;
+                        }
                    }

+                    return is_output;
+                };
+
+                auto canReduceOutputPrecision = [](const CNNLayerPtr& l, const bool isNetworkOutput) -> bool {
+                    // Don't do the conversion for network outputs
+                    if (isNetworkOutput)
+                        return false;
+
+                    auto type = LayerTypeFromStr(l->type);
+                    auto next = GetNextLayers(l);
+
                    if (type == LayerType::ScaleShift) {
                        // ScaleShift is supposed to return Dequantized values, so in most of the cases we can convert it's output to FP16
                        // The exception is when the next node is Eltwise, so LPT keeps modified ScaleShift node on one of the branches
@ -462,9 +492,11 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cld
                    return result;
                };

+                bool is_network_output = isOutputLayer(layer, outputsMap);
+
                if (canReducePrecision(layer)) {
-                    convertLayerPrecision<Precision::FP32, Precision::FP16>(layer, GetNextLayers(layer).empty());
-                } else if (canReduceOutputPrecision(layer)) {
+                    convertLayerPrecision<Precision::FP32, Precision::FP16>(layer, is_network_output);
+                } else if (canReduceOutputPrecision(layer, is_network_output)) {
                    for (auto &out_data : layer->outData) {
                        if (out_data->getPrecision() == Precision::FP32)
                            out_data->setPrecision(Precision::FP16);