[CPU] Do not set BF16 on input port for Eltwise after Input (#5542)
* [CPU] Do not set BF16 on input port for Eltwise after Input Since Eltwise supports conversion to BF16 Thus unnecessary Reorder is avoided * Create a separate function for enforcing BF16 on ports * Add test to verify that no extra Reorder is inserted Also: - update legacy test - remove extra code which is not applicable anymore * Correct expected precision in legacy test
This commit is contained in:
parent
4c452b8bb6
commit
cfc235bd65
@ -262,33 +262,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
|
|||||||
graphNodes.push_back(outNode);
|
graphNodes.push_back(outNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We set all non const data paths precision to BF16 in case enforceBF16 flag is switched on.
|
if (config.enforceBF16)
|
||||||
if (config.enforceBF16) {
|
EnforceBF16();
|
||||||
bool isQuantizedModel = false;
|
|
||||||
for (auto& node : graphNodes) {
|
|
||||||
if (node->getType() == FakeQuantize)
|
|
||||||
isQuantizedModel = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
|
|
||||||
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
|
|
||||||
if (implication(isQuantizedModel, config.manualEnforceBF16)) {
|
|
||||||
for (auto &node : graphNodes) {
|
|
||||||
if (node->getType() != Input && node->getType() != Output) {
|
|
||||||
for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
|
|
||||||
auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
|
|
||||||
if (!(parent->getType() == Input && parent->isConstant()) && node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
|
|
||||||
node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) {
|
|
||||||
if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32)
|
|
||||||
node->setOriginalOutputPrecisionAtPort(i, Precision::BF16);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// change precision for input/output nodes to avoid extra data conversion when set input/output blobs
|
// change precision for input/output nodes to avoid extra data conversion when set input/output blobs
|
||||||
// also we need to change input/output precisions for consumers/producers to avoid inserting reorder
|
// also we need to change input/output precisions for consumers/producers to avoid inserting reorder
|
||||||
@ -1201,6 +1176,35 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set all non const data paths precision to BF16
|
||||||
|
void MKLDNNGraph::EnforceBF16() {
|
||||||
|
bool isQuantizedModel = false;
|
||||||
|
for (auto& node : graphNodes) {
|
||||||
|
if (node->getType() == FakeQuantize)
|
||||||
|
isQuantizedModel = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
|
||||||
|
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
|
||||||
|
if (implication(isQuantizedModel, config.manualEnforceBF16)) {
|
||||||
|
for (auto &node : graphNodes) {
|
||||||
|
if (node->getType() != Input && node->getType() != Output) {
|
||||||
|
for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
|
||||||
|
auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
|
||||||
|
if (!(parent->getType() == Input && parent->isConstant()) && // exclude nodes after Constant Inputs
|
||||||
|
!(parent->getType() == Input && node->getType() == Eltwise) && // exclude Eltwise after Input since it supports conversion to BF16
|
||||||
|
node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
|
||||||
|
node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) {
|
||||||
|
if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32)
|
||||||
|
node->setOriginalOutputPrecisionAtPort(i, Precision::BF16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
|
InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
|
||||||
return dump_graph_as_ie_ngraph_net(*this);
|
return dump_graph_as_ie_ngraph_net(*this);
|
||||||
|
@ -204,6 +204,7 @@ protected:
|
|||||||
friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
|
friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void EnforceBF16();
|
||||||
void printGraphInfo() const;
|
void printGraphInfo() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -142,13 +142,6 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
|
|||||||
DropDoubleReorders(graph);
|
DropDoubleReorders(graph);
|
||||||
graph.RemoveDroppedNodes();
|
graph.RemoveDroppedNodes();
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* disable, since there is no use case for it at the moment
|
|
||||||
* should be enabled after ngraph migration */
|
|
||||||
DropConvertReorder(graph);
|
|
||||||
graph.RemoveDroppedNodes();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
MergeTransposeAndReorder(graph);
|
MergeTransposeAndReorder(graph);
|
||||||
graph.RemoveDroppedNodes();
|
graph.RemoveDroppedNodes();
|
||||||
|
|
||||||
|
@ -123,7 +123,7 @@ protected:
|
|||||||
// performance counters
|
// performance counters
|
||||||
expectedPrecisions["Matmul_0"] = "BF16";
|
expectedPrecisions["Matmul_0"] = "BF16";
|
||||||
expectedPrecisions["Mul_1"] = "BF16";
|
expectedPrecisions["Mul_1"] = "BF16";
|
||||||
expectedPrecisions["Add_1"] = "BF16";
|
expectedPrecisions["Add_1"] = netPrecision.name(); // FP32->BF16 in case of FP32 net, BF16->BF16 in case of BF16 net
|
||||||
expectedPrecisions["Relu_1"] = "ndef";
|
expectedPrecisions["Relu_1"] = "ndef";
|
||||||
expectedPrecisions["Conc_1"] = "BF16";
|
expectedPrecisions["Conc_1"] = "BF16";
|
||||||
expectedPrecisions["Matmul_1"] = "BF16";
|
expectedPrecisions["Matmul_1"] = "BF16";
|
||||||
|
@ -0,0 +1,62 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <ngraph_functions/builders.hpp>
|
||||||
|
#include "ie_common.h"
|
||||||
|
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace CPULayerTestsDefinitions {
|
||||||
|
|
||||||
|
class InputNoReorderEltwiseBF16 : virtual public LayerTestsUtils::LayerTestsCommon,
|
||||||
|
public CPUTestsBase {
|
||||||
|
protected:
|
||||||
|
void SetUp() {
|
||||||
|
auto netPrecision = inPrc = Precision::FP32;
|
||||||
|
outPrc = Precision::BF16;
|
||||||
|
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||||
|
std::map<std::string, std::string> additional_config{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}};
|
||||||
|
configuration.insert(additional_config.begin(), additional_config.end());
|
||||||
|
|
||||||
|
std::vector<size_t> inputShape {2, 4, 4, 1};
|
||||||
|
std::vector<size_t> outputShape = inputShape;
|
||||||
|
auto eltwiseType = ngraph::helpers::EltwiseTypes::ADD;
|
||||||
|
auto secondaryInputType = ngraph::helpers::InputLayerType::CONSTANT;
|
||||||
|
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
auto input = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||||
|
std::shared_ptr<ngraph::Node> secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, inputShape);
|
||||||
|
auto eltwise = ngraph::builder::makeEltwise(input[0], secondaryInput, eltwiseType);
|
||||||
|
|
||||||
|
function = makeNgraphFunction(ngPrc, input, eltwise, "Eltwise");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* FP32 network with enforced BF16 precision.
|
||||||
|
* Test that no Reorder (or Convert) is inserted after Input.
|
||||||
|
* Eltwise performs the conversion by itself.
|
||||||
|
|
||||||
|
Input[FP32] Constant[FP32]
|
||||||
|
\ /
|
||||||
|
\ /
|
||||||
|
X No Reorder X
|
||||||
|
\ /
|
||||||
|
Eltwise[FP32->BF16]
|
||||||
|
|
|
||||||
|
|
|
||||||
|
Output[BF16]
|
||||||
|
*/
|
||||||
|
TEST_F(InputNoReorderEltwiseBF16, CompareWithRefs) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
|
||||||
|
Run();
|
||||||
|
|
||||||
|
CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
|
||||||
|
CheckNodeOfTypeCount(executableNetwork, "Convert", 0);
|
||||||
|
CheckNodeOfTypeCount(executableNetwork, "Eltwise", 1);
|
||||||
|
}
|
||||||
|
} // namespace CPULayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user