[CPU] Do not set BF16 on input port for Eltwise after Input (#5542)
* [CPU] Do not set BF16 on input port for Eltwise after Input Since Eltwise supports conversion to BF16 Thus unnecessary Reorder is avoided * Create a separate function for enforcing BF16 on ports * Add test to verify that no extra Reorder is inserted Also: - update legacy test - remove extra code which is not applicable anymore * Correct expected precision in legacy test
This commit is contained in:
parent
4c452b8bb6
commit
cfc235bd65
@ -262,33 +262,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
|
||||
graphNodes.push_back(outNode);
|
||||
}
|
||||
|
||||
// We set all non const data paths precision to BF16 in case enforceBF16 flag is switched on.
|
||||
if (config.enforceBF16) {
|
||||
bool isQuantizedModel = false;
|
||||
for (auto& node : graphNodes) {
|
||||
if (node->getType() == FakeQuantize)
|
||||
isQuantizedModel = true;
|
||||
}
|
||||
|
||||
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
|
||||
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
|
||||
if (implication(isQuantizedModel, config.manualEnforceBF16)) {
|
||||
for (auto &node : graphNodes) {
|
||||
if (node->getType() != Input && node->getType() != Output) {
|
||||
for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
|
||||
auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
|
||||
if (!(parent->getType() == Input && parent->isConstant()) && node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
|
||||
node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) {
|
||||
if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32)
|
||||
node->setOriginalOutputPrecisionAtPort(i, Precision::BF16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (config.enforceBF16)
|
||||
EnforceBF16();
|
||||
|
||||
// change precision for input/output nodes to avoid extra data conversion when set input/output blobs
|
||||
// also we need to change input/output precisions for consumers/producers to avoid inserting reorder
|
||||
@ -1201,6 +1176,35 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo
|
||||
return true;
|
||||
}
|
||||
|
||||
// Set all non const data paths precision to BF16
|
||||
void MKLDNNGraph::EnforceBF16() {
|
||||
bool isQuantizedModel = false;
|
||||
for (auto& node : graphNodes) {
|
||||
if (node->getType() == FakeQuantize)
|
||||
isQuantizedModel = true;
|
||||
}
|
||||
|
||||
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
|
||||
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
|
||||
if (implication(isQuantizedModel, config.manualEnforceBF16)) {
|
||||
for (auto &node : graphNodes) {
|
||||
if (node->getType() != Input && node->getType() != Output) {
|
||||
for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
|
||||
auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
|
||||
if (!(parent->getType() == Input && parent->isConstant()) && // exclude nodes after Constant Inputs
|
||||
!(parent->getType() == Input && node->getType() == Eltwise) && // exclude Eltwise after Input since it supports conversion to BF16
|
||||
node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
|
||||
node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) {
|
||||
if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32)
|
||||
node->setOriginalOutputPrecisionAtPort(i, Precision::BF16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
|
||||
return dump_graph_as_ie_ngraph_net(*this);
|
||||
|
@ -204,6 +204,7 @@ protected:
|
||||
friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
|
||||
|
||||
private:
|
||||
void EnforceBF16();
|
||||
void printGraphInfo() const;
|
||||
};
|
||||
|
||||
|
@ -142,13 +142,6 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
|
||||
DropDoubleReorders(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
#if 0
|
||||
/* disable, since there is no use case for it at the moment
|
||||
* should be enabled after ngraph migration */
|
||||
DropConvertReorder(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
#endif
|
||||
|
||||
MergeTransposeAndReorder(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
|
@ -123,7 +123,7 @@ protected:
|
||||
// performance counters
|
||||
expectedPrecisions["Matmul_0"] = "BF16";
|
||||
expectedPrecisions["Mul_1"] = "BF16";
|
||||
expectedPrecisions["Add_1"] = "BF16";
|
||||
expectedPrecisions["Add_1"] = netPrecision.name(); // FP32->BF16 in case of FP32 net, BF16->BF16 in case of BF16 net
|
||||
expectedPrecisions["Relu_1"] = "ndef";
|
||||
expectedPrecisions["Conc_1"] = "BF16";
|
||||
expectedPrecisions["Matmul_1"] = "BF16";
|
||||
|
@ -0,0 +1,62 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <ngraph_functions/builders.hpp>
|
||||
#include "ie_common.h"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
class InputNoReorderEltwiseBF16 : virtual public LayerTestsUtils::LayerTestsCommon,
|
||||
public CPUTestsBase {
|
||||
protected:
|
||||
void SetUp() {
|
||||
auto netPrecision = inPrc = Precision::FP32;
|
||||
outPrc = Precision::BF16;
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
std::map<std::string, std::string> additional_config{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}};
|
||||
configuration.insert(additional_config.begin(), additional_config.end());
|
||||
|
||||
std::vector<size_t> inputShape {2, 4, 4, 1};
|
||||
std::vector<size_t> outputShape = inputShape;
|
||||
auto eltwiseType = ngraph::helpers::EltwiseTypes::ADD;
|
||||
auto secondaryInputType = ngraph::helpers::InputLayerType::CONSTANT;
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto input = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
std::shared_ptr<ngraph::Node> secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, inputShape);
|
||||
auto eltwise = ngraph::builder::makeEltwise(input[0], secondaryInput, eltwiseType);
|
||||
|
||||
function = makeNgraphFunction(ngPrc, input, eltwise, "Eltwise");
|
||||
}
|
||||
};
|
||||
|
||||
/* FP32 network with enforced BF16 precision.
|
||||
* Test that no Reorder (or Convert) is inserted after Input.
|
||||
* Eltwise performs the conversion by itself.
|
||||
|
||||
Input[FP32] Constant[FP32]
|
||||
\ /
|
||||
\ /
|
||||
X No Reorder X
|
||||
\ /
|
||||
Eltwise[FP32->BF16]
|
||||
|
|
||||
|
|
||||
Output[BF16]
|
||||
*/
|
||||
TEST_F(InputNoReorderEltwiseBF16, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
|
||||
CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
|
||||
CheckNodeOfTypeCount(executableNetwork, "Convert", 0);
|
||||
CheckNodeOfTypeCount(executableNetwork, "Eltwise", 1);
|
||||
}
|
||||
} // namespace CPULayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user