[CPU] Change precision configuration by ov::pass::ConvertPrecision (#19993)
This commit is contained in:
@@ -255,10 +255,16 @@ void Config::readProperties(const std::map<std::string, std::string> &prop, cons
|
||||
// when both execution_mode and inference_precision are specified
|
||||
if (!inferencePrecisionSetExplicitly) {
|
||||
if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) {
|
||||
inferencePrecision = ov::element::f32;
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
//fp16 precision is used as default precision on ARM for non-convolution networks
|
||||
//fp16 ACL convolution is slower than fp32
|
||||
if (modelType != ModelType::CNN)
|
||||
inferencePrecision = ov::element::f16;
|
||||
#else
|
||||
if (mayiuse(avx512_core_bf16))
|
||||
inferencePrecision = ov::element::bf16;
|
||||
else
|
||||
inferencePrecision = ov::element::f32;
|
||||
#endif
|
||||
} else {
|
||||
inferencePrecision = ov::element::f32;
|
||||
}
|
||||
|
||||
@@ -1709,7 +1709,10 @@ void Graph::EnforceInferencePrecision() {
|
||||
|
||||
if (inferPrec == Precision::FP32)
|
||||
return; // nothing to do, only precision reduction is currently allowed
|
||||
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
if (inferPrec == Precision::FP16)
|
||||
return; // precision of configured by ov::pass::ConvertPrecision
|
||||
#endif
|
||||
std::function<void(const NodePtr&, std::unordered_set<NodePtr>& skipNodes)> searchForNodesToSkip;
|
||||
searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set<NodePtr>& skipNodes) -> void {
|
||||
for (size_t i = 0; i < node->getParentEdges().size(); i++) {
|
||||
|
||||
@@ -195,14 +195,6 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::InitNodeInfo);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkShapeOfSubgraphs);
|
||||
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
|
||||
CPU_SET_CALLBACK_COMMON(manager,
|
||||
[](const_node_ptr &node) -> bool {
|
||||
const auto outputs = node->get_output_target_inputs(0);
|
||||
return outputs.size() != 1 || !is_type<ov::op::v0::MatMul>(outputs.begin()->get_node());
|
||||
},
|
||||
ov::pass::KeepConstAndDecompression);
|
||||
|
||||
const bool useLpt = !defaultPrecisions.empty();
|
||||
if (useLpt) {
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions);
|
||||
@@ -243,7 +235,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
|
||||
}, ov::pass::MarkDequantizationSubgraph);
|
||||
}
|
||||
|
||||
auto get_convert_precisions = []() {
|
||||
auto get_convert_precisions = [&]() {
|
||||
precisions_map map = {
|
||||
{ov::element::i64, ov::element::i32},
|
||||
{ov::element::u64, ov::element::i32},
|
||||
@@ -251,7 +243,6 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
|
||||
{ov::element::u16, ov::element::i32},
|
||||
{ov::element::u32, ov::element::i32},
|
||||
{ov::element::f64, ov::element::f32},
|
||||
{ov::element::f16, ov::element::f32},
|
||||
{ov::element::boolean, ov::element::u8},
|
||||
{ov::element::i4, ov::element::i8},
|
||||
{ov::element::u4, ov::element::u8}
|
||||
@@ -259,12 +250,37 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
|
||||
// @todo should we always convert to f32 regardless of hardware support, as it is done for f16?
|
||||
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
|
||||
map.insert({ov::element::bf16, ov::element::f32});
|
||||
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
if (inferencePrecision != ov::element::f16)
|
||||
map.insert({ov::element::f16, ov::element::f32});
|
||||
#else
|
||||
map.insert({ov::element::f16, ov::element::f32});
|
||||
#endif
|
||||
return map;
|
||||
};
|
||||
static const auto precisions = get_convert_precisions();
|
||||
type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
|
||||
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
if (inferencePrecision == ov::element::f16) {
|
||||
precisions_map fp_convert_precision_map = {
|
||||
{ov::element::f32, ov::element::f16}
|
||||
};
|
||||
type_to_fuse_map empty_fuse_map = {};
|
||||
const bool keep_precision_sensitive_in_fp32 = true;
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, fp_convert_precision_map,
|
||||
empty_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
}
|
||||
#endif
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
|
||||
CPU_SET_CALLBACK_COMMON(manager,
|
||||
[](const_node_ptr &node) -> bool {
|
||||
const auto outputs = node->get_output_target_inputs(0);
|
||||
return outputs.size() != 1 || !is_type<ov::op::v0::MatMul>(outputs.begin()->get_node());
|
||||
},
|
||||
ov::pass::KeepConstAndDecompression);
|
||||
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::CommonOptimizations);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::WrapInterpolateIntoTransposes);
|
||||
|
||||
Reference in New Issue
Block a user