[CPU] Change precision configuration by ov::pass::ConvertPrecision (#19993)

This commit is contained in:
Aleksandr Voron
2023-10-16 14:32:47 +02:00
committed by GitHub
parent e812831346
commit 475ddb9283
3 changed files with 39 additions and 14 deletions

View File

@@ -255,10 +255,16 @@ void Config::readProperties(const std::map<std::string, std::string> &prop, cons
// when both execution_mode and inference_precision are specified
if (!inferencePrecisionSetExplicitly) {
if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) {
inferencePrecision = ov::element::f32;
#if defined(OV_CPU_ARM_ENABLE_FP16)
//fp16 precision is used as default precision on ARM for non-convolution networks
//fp16 ACL convolution is slower than fp32
if (modelType != ModelType::CNN)
inferencePrecision = ov::element::f16;
#else
if (mayiuse(avx512_core_bf16))
inferencePrecision = ov::element::bf16;
else
inferencePrecision = ov::element::f32;
#endif
} else {
inferencePrecision = ov::element::f32;
}

View File

@@ -1709,7 +1709,10 @@ void Graph::EnforceInferencePrecision() {
if (inferPrec == Precision::FP32)
return; // nothing to do, only precision reduction is currently allowed
#if defined(OV_CPU_ARM_ENABLE_FP16)
if (inferPrec == Precision::FP16)
return; // precision of configured by ov::pass::ConvertPrecision
#endif
std::function<void(const NodePtr&, std::unordered_set<NodePtr>& skipNodes)> searchForNodesToSkip;
searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set<NodePtr>& skipNodes) -> void {
for (size_t i = 0; i < node->getParentEdges().size(); i++) {

View File

@@ -195,14 +195,6 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
CPU_REGISTER_PASS_COMMON(manager, ov::pass::InitNodeInfo);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkShapeOfSubgraphs);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
CPU_SET_CALLBACK_COMMON(manager,
[](const_node_ptr &node) -> bool {
const auto outputs = node->get_output_target_inputs(0);
return outputs.size() != 1 || !is_type<ov::op::v0::MatMul>(outputs.begin()->get_node());
},
ov::pass::KeepConstAndDecompression);
const bool useLpt = !defaultPrecisions.empty();
if (useLpt) {
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions);
@@ -243,7 +235,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
}, ov::pass::MarkDequantizationSubgraph);
}
auto get_convert_precisions = []() {
auto get_convert_precisions = [&]() {
precisions_map map = {
{ov::element::i64, ov::element::i32},
{ov::element::u64, ov::element::i32},
@@ -251,7 +243,6 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
{ov::element::u16, ov::element::i32},
{ov::element::u32, ov::element::i32},
{ov::element::f64, ov::element::f32},
{ov::element::f16, ov::element::f32},
{ov::element::boolean, ov::element::u8},
{ov::element::i4, ov::element::i8},
{ov::element::u4, ov::element::u8}
@@ -259,12 +250,37 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
// @todo should we always convert to f32 regardless of hardware support, as it is done for f16?
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
map.insert({ov::element::bf16, ov::element::f32});
#if defined(OV_CPU_ARM_ENABLE_FP16)
if (inferencePrecision != ov::element::f16)
map.insert({ov::element::f16, ov::element::f32});
#else
map.insert({ov::element::f16, ov::element::f32});
#endif
return map;
};
static const auto precisions = get_convert_precisions();
type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
#if defined(OV_CPU_ARM_ENABLE_FP16)
if (inferencePrecision == ov::element::f16) {
precisions_map fp_convert_precision_map = {
{ov::element::f32, ov::element::f16}
};
type_to_fuse_map empty_fuse_map = {};
const bool keep_precision_sensitive_in_fp32 = true;
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, fp_convert_precision_map,
empty_fuse_map,
keep_precision_sensitive_in_fp32);
}
#endif
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
CPU_SET_CALLBACK_COMMON(manager,
[](const_node_ptr &node) -> bool {
const auto outputs = node->get_output_target_inputs(0);
return outputs.size() != 1 || !is_type<ov::op::v0::MatMul>(outputs.begin()->get_node());
},
ov::pass::KeepConstAndDecompression);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::CommonOptimizations);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::WrapInterpolateIntoTransposes);