[CPU] Do not set BF16 on input port for Eltwise after Input (#5542)

* [CPU] Do not set BF16 on input port for Eltwise after Input

Since Eltwise supports conversion to BF16
Thus unnecessary Reorder is avoided

* Create a separate function for enforcing BF16 on ports

* Add test to verify that no extra Reorder is inserted

Also:
- update legacy test
- remove extra code which is not applicable anymore

* Correct expected precision in legacy test
This commit is contained in:
Egor Duplensky 2021-05-12 11:21:05 +03:00 committed by GitHub
parent 4c452b8bb6
commit cfc235bd65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 95 additions and 35 deletions

View File

@ -262,33 +262,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
graphNodes.push_back(outNode);
}
// We set all non const data paths precision to BF16 in case enforceBF16 flag is switched on.
if (config.enforceBF16) {
bool isQuantizedModel = false;
for (auto& node : graphNodes) {
if (node->getType() == FakeQuantize)
isQuantizedModel = true;
}
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
if (implication(isQuantizedModel, config.manualEnforceBF16)) {
for (auto &node : graphNodes) {
if (node->getType() != Input && node->getType() != Output) {
for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
if (!(parent->getType() == Input && parent->isConstant()) && node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
}
for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) {
if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32)
node->setOriginalOutputPrecisionAtPort(i, Precision::BF16);
}
}
}
}
}
if (config.enforceBF16)
EnforceBF16();
// change precision for input/output nodes to avoid extra data conversion when set input/output blobs
// also we need to change input/output precisions for consumers/producers to avoid inserting reorder
@ -1201,6 +1176,35 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo
return true;
}
// Set all non const data paths precision to BF16
void MKLDNNGraph::EnforceBF16() {
bool isQuantizedModel = false;
for (auto& node : graphNodes) {
if (node->getType() == FakeQuantize)
isQuantizedModel = true;
}
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
if (implication(isQuantizedModel, config.manualEnforceBF16)) {
for (auto &node : graphNodes) {
if (node->getType() != Input && node->getType() != Output) {
for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
if (!(parent->getType() == Input && parent->isConstant()) && // exclude nodes after Constant Inputs
!(parent->getType() == Input && node->getType() == Eltwise) && // exclude Eltwise after Input since it supports conversion to BF16
node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
}
for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) {
if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32)
node->setOriginalOutputPrecisionAtPort(i, Precision::BF16);
}
}
}
}
}
InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
return dump_graph_as_ie_ngraph_net(*this);

View File

@ -204,6 +204,7 @@ protected:
friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
private:
void EnforceBF16();
void printGraphInfo() const;
};

View File

@ -142,13 +142,6 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
DropDoubleReorders(graph);
graph.RemoveDroppedNodes();
#if 0
/* disable, since there is no use case for it at the moment
* should be enabled after ngraph migration */
DropConvertReorder(graph);
graph.RemoveDroppedNodes();
#endif
MergeTransposeAndReorder(graph);
graph.RemoveDroppedNodes();

View File

@ -123,7 +123,7 @@ protected:
// performance counters
expectedPrecisions["Matmul_0"] = "BF16";
expectedPrecisions["Mul_1"] = "BF16";
expectedPrecisions["Add_1"] = "BF16";
expectedPrecisions["Add_1"] = netPrecision.name(); // FP32->BF16 in case of FP32 net, BF16->BF16 in case of BF16 net
expectedPrecisions["Relu_1"] = "ndef";
expectedPrecisions["Conc_1"] = "BF16";
expectedPrecisions["Matmul_1"] = "BF16";

View File

@ -0,0 +1,62 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <ngraph_functions/builders.hpp>
#include "ie_common.h"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "test_utils/cpu_test_utils.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
class InputNoReorderEltwiseBF16 : virtual public LayerTestsUtils::LayerTestsCommon,
public CPUTestsBase {
protected:
void SetUp() {
auto netPrecision = inPrc = Precision::FP32;
outPrc = Precision::BF16;
targetDevice = CommonTestUtils::DEVICE_CPU;
std::map<std::string, std::string> additional_config{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}};
configuration.insert(additional_config.begin(), additional_config.end());
std::vector<size_t> inputShape {2, 4, 4, 1};
std::vector<size_t> outputShape = inputShape;
auto eltwiseType = ngraph::helpers::EltwiseTypes::ADD;
auto secondaryInputType = ngraph::helpers::InputLayerType::CONSTANT;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto input = ngraph::builder::makeParams(ngPrc, {inputShape});
std::shared_ptr<ngraph::Node> secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, inputShape);
auto eltwise = ngraph::builder::makeEltwise(input[0], secondaryInput, eltwiseType);
function = makeNgraphFunction(ngPrc, input, eltwise, "Eltwise");
}
};
/* FP32 network with enforced BF16 precision.
* Test that no Reorder (or Convert) is inserted after Input.
* Eltwise performs the conversion by itself.
Input[FP32] Constant[FP32]
\ /
\ /
X No Reorder X
\ /
Eltwise[FP32->BF16]
|
|
Output[BF16]
*/
TEST_F(InputNoReorderEltwiseBF16, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
CheckNodeOfTypeCount(executableNetwork, "Convert", 0);
CheckNodeOfTypeCount(executableNetwork, "Eltwise", 1);
}
} // namespace CPULayerTestsDefinitions