[GPU] Support hint::inference_precision (#12526)

This commit is contained in:
Vladimir Paramuzov 2022-08-15 16:34:36 +04:00 committed by GitHub
parent de9a785328
commit 03b7c1e69e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 99 additions and 3 deletions

View File

@ -262,6 +262,7 @@ All parameters must be set before calling `ov::Core::compile_model()` in order t
- ov::hint::model_priority
- ov::hint::performance_mode
- ov::hint::num_requests
- ov::hint::inference_precision
- ov::num_streams
- ov::compilation_num_threads
- ov::device::id

View File

@ -396,6 +396,7 @@ int main(int argc, char* argv[]) {
} else if (device.find("GPU") != std::string::npos) {
// for GPU execution, more throughput-oriented execution via streams
setThroughputStreams();
set_infer_precision();
if ((device_name.find("MULTI") != std::string::npos) &&
(device_name.find("CPU") != std::string::npos)) {

View File

@ -37,6 +37,7 @@ struct Config {
graph_dumps_dir(""),
sources_dumps_dir(""),
kernels_cache_dir(""),
inference_precision(ov::element::undefined),
task_exec_config({"GPU plugin internal task executor", // name
std::max(1, static_cast<int>(std::thread::hardware_concurrency())), // # of streams
1, // # of threads per streams
@ -80,6 +81,7 @@ struct Config {
std::string graph_dumps_dir;
std::string sources_dumps_dir;
std::string kernels_cache_dir;
ov::element::Type inference_precision;
InferenceEngine::IStreamsExecutor::Config task_exec_config;
bool enable_loop_unrolling;

View File

@ -164,6 +164,8 @@ InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) con
return ov::util::from_string(val, ov::num_streams);
} else if (name == ov::hint::num_requests) {
return ov::util::from_string(val, ov::hint::num_requests);
} else if (name == ov::hint::inference_precision) {
return ov::util::from_string(val, ov::hint::inference_precision);
} else if (name == ov::device::id) {
return ov::util::from_string(val, ov::device::id);
} else {
@ -201,6 +203,7 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con
ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RO},
ov::PropertyName{ov::num_streams.name(), PropertyMutability::RO},
ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RO},
ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RO},
ov::PropertyName{ov::device::id.name(), PropertyMutability::RO}
};
} else if (name == ov::model_name) {

View File

@ -69,6 +69,12 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
const auto hints = perfHintsConfig.SupportedKeys();
if (hints.end() != std::find(hints.begin(), hints.end(), key)) {
perfHintsConfig.SetConfig(key, val);
} else if (key == ov::hint::inference_precision) {
std::stringstream ss(val);
ss >> inference_precision;
OPENVINO_ASSERT(inference_precision == ov::element::f16 ||
inference_precision == ov::element::f32,
"Unexpected inference precision set: ", inference_precision);
} else if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0 || key == ov::enable_profiling) {
if (val.compare(PluginConfigParams::YES) == 0) {
useProfiling = true;
@ -379,6 +385,8 @@ void Config::adjustKeyMapValues() {
else
key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS] = PluginConfigParams::NO;
key_config_map[ov::hint::inference_precision.name()] = inference_precision.get_type_name();
{
if (queuePriority == cldnn::priority_mode_types::high &&
(task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG ||
@ -519,6 +527,7 @@ bool Config::isNewApiProperty(std::string property) {
static const std::set<std::string> new_api_keys{
ov::intel_gpu::hint::queue_priority.name(),
ov::intel_gpu::hint::queue_throttle.name(),
ov::hint::inference_precision.name(),
ov::compilation_num_threads.name(),
ov::num_streams.name(),
};

View File

@ -599,8 +599,9 @@ Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string,
} else if (name == ov::num_streams) {
return ov::util::from_string(val, ov::num_streams);
} else if (name == ov::hint::num_requests) {
auto temp = ov::util::from_string(val, ov::hint::num_requests);
return temp;
return ov::util::from_string(val, ov::hint::num_requests);
} else if (name == ov::hint::inference_precision) {
return ov::util::from_string(val, ov::hint::inference_precision);
} else if (name == ov::device::id) {
return ov::util::from_string(val, ov::device::id);
} else {
@ -728,6 +729,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RW},
ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
};
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {

View File

@ -161,7 +161,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConvertGather0D>();
static const precisions_array convert_precision_list {
precisions_array convert_precision_list {
{ngraph::element::i64, ngraph::element::i32},
{ngraph::element::u64, ngraph::element::i32},
{ngraph::element::u16, ngraph::element::i32},
@ -171,6 +171,15 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
{ngraph::element::u4, ngraph::element::u8},
};
if (config.inference_precision != ov::element::undefined) {
std::vector<ov::element::Type> supported_fp_element_types = {ngraph::element::f32, ngraph::element::f16};
for (auto& et : supported_fp_element_types) {
if (et != config.inference_precision) {
convert_precision_list.push_back({et, config.inference_precision});
}
}
}
manager.register_pass<ngraph::pass::Validate>();
manager.register_pass<ngraph::pass::ConvertPrecision>(convert_precision_list);

View File

@ -0,0 +1,54 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <utility>
#include <vector>
#include <memory>
#include "openvino/runtime/core.hpp"
#include <common_test_utils/test_common.hpp>
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "base/ov_behavior_test_utils.hpp"
#include "functional_test_utils/ov_plugin_cache.hpp"
using namespace ::testing;
using params = std::tuple<ov::element::Type, ov::element::Type>;
class InferencePrecisionTests : public testing::WithParamInterface<params>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<params> &obj) {
ov::element::Type model_precision;
ov::element::Type inference_precision;
std::tie(model_precision, inference_precision) = obj.param;
std::stringstream s;
s << "model_precision=" << model_precision << "_inference_precison=" << inference_precision;
return s.str();
}
};
TEST_P(InferencePrecisionTests, smoke_canSetInferencePrecisionAndInfer) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
auto core = ov::test::utils::PluginCache::get().core();
ov::element::Type model_precision;
ov::element::Type inference_precision;
std::tie(model_precision, inference_precision) = GetParam();
auto function = ov::test::behavior::getDefaultNGraphFunctionForTheDevice("GPU", {1, 1, 32, 32}, model_precision);
ov::CompiledModel compiled_model;
OV_ASSERT_NO_THROW(compiled_model = core->compile_model(function, "GPU", ov::hint::inference_precision(inference_precision)));
auto req = compiled_model.create_infer_request();
OV_ASSERT_NO_THROW(req.infer());
}
static const std::vector<params> test_params = {
{ov::element::f16, ov::element::f32},
{ov::element::f16, ov::element::f16},
{ov::element::f32, ov::element::f32},
{ov::element::f32, ov::element::f16},
};
INSTANTIATE_TEST_SUITE_P(smoke_GPU_BehaviorTests, InferencePrecisionTests, ::testing::ValuesIn(test_params), InferencePrecisionTests::getTestCaseName);

View File

@ -337,6 +337,21 @@ TEST_P(OVClassGetPropertyTest_GPU, GetAndSetEnableProfilingNoThrow) {
OV_ASSERT_PROPERTY_SUPPORTED(ov::enable_profiling);
}
TEST_P(OVClassGetPropertyTest_GPU, GetAndSetInferencePrecisionNoThrow) {
ov::Core ie;
auto value = ov::element::undefined;
const auto expected_default_precision = ov::element::undefined;
OV_ASSERT_NO_THROW(value = ie.get_property(deviceName, ov::hint::inference_precision));
ASSERT_EQ(expected_default_precision, value);
const auto forced_precision = ov::element::f16;
OV_ASSERT_NO_THROW(ie.set_property(deviceName, ov::hint::inference_precision(forced_precision)));
OV_ASSERT_NO_THROW(value = ie.get_property(deviceName, ov::hint::inference_precision));
ASSERT_EQ(value, forced_precision);
}
TEST_P(OVClassGetPropertyTest_GPU, GetAndSetModelPriorityNoThrow) {
ov::Core ie;