[GPU] Support hint::inference_precision (#12526)
This commit is contained in:
parent
de9a785328
commit
03b7c1e69e
@ -262,6 +262,7 @@ All parameters must be set before calling `ov::Core::compile_model()` in order t
|
||||
- ov::hint::model_priority
|
||||
- ov::hint::performance_mode
|
||||
- ov::hint::num_requests
|
||||
- ov::hint::inference_precision
|
||||
- ov::num_streams
|
||||
- ov::compilation_num_threads
|
||||
- ov::device::id
|
||||
|
@ -396,6 +396,7 @@ int main(int argc, char* argv[]) {
|
||||
} else if (device.find("GPU") != std::string::npos) {
|
||||
// for GPU execution, more throughput-oriented execution via streams
|
||||
setThroughputStreams();
|
||||
set_infer_precision();
|
||||
|
||||
if ((device_name.find("MULTI") != std::string::npos) &&
|
||||
(device_name.find("CPU") != std::string::npos)) {
|
||||
|
@ -37,6 +37,7 @@ struct Config {
|
||||
graph_dumps_dir(""),
|
||||
sources_dumps_dir(""),
|
||||
kernels_cache_dir(""),
|
||||
inference_precision(ov::element::undefined),
|
||||
task_exec_config({"GPU plugin internal task executor", // name
|
||||
std::max(1, static_cast<int>(std::thread::hardware_concurrency())), // # of streams
|
||||
1, // # of threads per streams
|
||||
@ -80,6 +81,7 @@ struct Config {
|
||||
std::string graph_dumps_dir;
|
||||
std::string sources_dumps_dir;
|
||||
std::string kernels_cache_dir;
|
||||
ov::element::Type inference_precision;
|
||||
InferenceEngine::IStreamsExecutor::Config task_exec_config;
|
||||
|
||||
bool enable_loop_unrolling;
|
||||
|
@ -164,6 +164,8 @@ InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) con
|
||||
return ov::util::from_string(val, ov::num_streams);
|
||||
} else if (name == ov::hint::num_requests) {
|
||||
return ov::util::from_string(val, ov::hint::num_requests);
|
||||
} else if (name == ov::hint::inference_precision) {
|
||||
return ov::util::from_string(val, ov::hint::inference_precision);
|
||||
} else if (name == ov::device::id) {
|
||||
return ov::util::from_string(val, ov::device::id);
|
||||
} else {
|
||||
@ -201,6 +203,7 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con
|
||||
ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::num_streams.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::device::id.name(), PropertyMutability::RO}
|
||||
};
|
||||
} else if (name == ov::model_name) {
|
||||
|
@ -69,6 +69,12 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
|
||||
const auto hints = perfHintsConfig.SupportedKeys();
|
||||
if (hints.end() != std::find(hints.begin(), hints.end(), key)) {
|
||||
perfHintsConfig.SetConfig(key, val);
|
||||
} else if (key == ov::hint::inference_precision) {
|
||||
std::stringstream ss(val);
|
||||
ss >> inference_precision;
|
||||
OPENVINO_ASSERT(inference_precision == ov::element::f16 ||
|
||||
inference_precision == ov::element::f32,
|
||||
"Unexpected inference precision set: ", inference_precision);
|
||||
} else if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0 || key == ov::enable_profiling) {
|
||||
if (val.compare(PluginConfigParams::YES) == 0) {
|
||||
useProfiling = true;
|
||||
@ -379,6 +385,8 @@ void Config::adjustKeyMapValues() {
|
||||
else
|
||||
key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS] = PluginConfigParams::NO;
|
||||
|
||||
key_config_map[ov::hint::inference_precision.name()] = inference_precision.get_type_name();
|
||||
|
||||
{
|
||||
if (queuePriority == cldnn::priority_mode_types::high &&
|
||||
(task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG ||
|
||||
@ -519,6 +527,7 @@ bool Config::isNewApiProperty(std::string property) {
|
||||
static const std::set<std::string> new_api_keys{
|
||||
ov::intel_gpu::hint::queue_priority.name(),
|
||||
ov::intel_gpu::hint::queue_throttle.name(),
|
||||
ov::hint::inference_precision.name(),
|
||||
ov::compilation_num_threads.name(),
|
||||
ov::num_streams.name(),
|
||||
};
|
||||
|
@ -599,8 +599,9 @@ Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string,
|
||||
} else if (name == ov::num_streams) {
|
||||
return ov::util::from_string(val, ov::num_streams);
|
||||
} else if (name == ov::hint::num_requests) {
|
||||
auto temp = ov::util::from_string(val, ov::hint::num_requests);
|
||||
return temp;
|
||||
return ov::util::from_string(val, ov::hint::num_requests);
|
||||
} else if (name == ov::hint::inference_precision) {
|
||||
return ov::util::from_string(val, ov::hint::inference_precision);
|
||||
} else if (name == ov::device::id) {
|
||||
return ov::util::from_string(val, ov::device::id);
|
||||
} else {
|
||||
@ -728,6 +729,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
|
||||
ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
|
||||
};
|
||||
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
||||
|
@ -161,7 +161,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
|
||||
manager.register_pass<ngraph::pass::ConvertGather0D>();
|
||||
|
||||
static const precisions_array convert_precision_list {
|
||||
precisions_array convert_precision_list {
|
||||
{ngraph::element::i64, ngraph::element::i32},
|
||||
{ngraph::element::u64, ngraph::element::i32},
|
||||
{ngraph::element::u16, ngraph::element::i32},
|
||||
@ -171,6 +171,15 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
{ngraph::element::u4, ngraph::element::u8},
|
||||
};
|
||||
|
||||
if (config.inference_precision != ov::element::undefined) {
|
||||
std::vector<ov::element::Type> supported_fp_element_types = {ngraph::element::f32, ngraph::element::f16};
|
||||
for (auto& et : supported_fp_element_types) {
|
||||
if (et != config.inference_precision) {
|
||||
convert_precision_list.push_back({et, config.inference_precision});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
manager.register_pass<ngraph::pass::Validate>();
|
||||
manager.register_pass<ngraph::pass::ConvertPrecision>(convert_precision_list);
|
||||
|
||||
|
@ -0,0 +1,54 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "openvino/runtime/core.hpp"
|
||||
|
||||
#include <common_test_utils/test_common.hpp>
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "base/ov_behavior_test_utils.hpp"
|
||||
#include "functional_test_utils/ov_plugin_cache.hpp"
|
||||
|
||||
using namespace ::testing;
|
||||
|
||||
using params = std::tuple<ov::element::Type, ov::element::Type>;
|
||||
|
||||
class InferencePrecisionTests : public testing::WithParamInterface<params>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<params> &obj) {
|
||||
ov::element::Type model_precision;
|
||||
ov::element::Type inference_precision;
|
||||
std::tie(model_precision, inference_precision) = obj.param;
|
||||
std::stringstream s;
|
||||
s << "model_precision=" << model_precision << "_inference_precison=" << inference_precision;
|
||||
return s.str();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(InferencePrecisionTests, smoke_canSetInferencePrecisionAndInfer) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
auto core = ov::test::utils::PluginCache::get().core();
|
||||
ov::element::Type model_precision;
|
||||
ov::element::Type inference_precision;
|
||||
std::tie(model_precision, inference_precision) = GetParam();
|
||||
auto function = ov::test::behavior::getDefaultNGraphFunctionForTheDevice("GPU", {1, 1, 32, 32}, model_precision);
|
||||
ov::CompiledModel compiled_model;
|
||||
OV_ASSERT_NO_THROW(compiled_model = core->compile_model(function, "GPU", ov::hint::inference_precision(inference_precision)));
|
||||
auto req = compiled_model.create_infer_request();
|
||||
OV_ASSERT_NO_THROW(req.infer());
|
||||
}
|
||||
|
||||
static const std::vector<params> test_params = {
|
||||
{ov::element::f16, ov::element::f32},
|
||||
{ov::element::f16, ov::element::f16},
|
||||
{ov::element::f32, ov::element::f32},
|
||||
{ov::element::f32, ov::element::f16},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GPU_BehaviorTests, InferencePrecisionTests, ::testing::ValuesIn(test_params), InferencePrecisionTests::getTestCaseName);
|
@ -337,6 +337,21 @@ TEST_P(OVClassGetPropertyTest_GPU, GetAndSetEnableProfilingNoThrow) {
|
||||
OV_ASSERT_PROPERTY_SUPPORTED(ov::enable_profiling);
|
||||
}
|
||||
|
||||
TEST_P(OVClassGetPropertyTest_GPU, GetAndSetInferencePrecisionNoThrow) {
|
||||
ov::Core ie;
|
||||
auto value = ov::element::undefined;
|
||||
const auto expected_default_precision = ov::element::undefined;
|
||||
|
||||
OV_ASSERT_NO_THROW(value = ie.get_property(deviceName, ov::hint::inference_precision));
|
||||
ASSERT_EQ(expected_default_precision, value);
|
||||
|
||||
const auto forced_precision = ov::element::f16;
|
||||
|
||||
OV_ASSERT_NO_THROW(ie.set_property(deviceName, ov::hint::inference_precision(forced_precision)));
|
||||
OV_ASSERT_NO_THROW(value = ie.get_property(deviceName, ov::hint::inference_precision));
|
||||
ASSERT_EQ(value, forced_precision);
|
||||
}
|
||||
|
||||
TEST_P(OVClassGetPropertyTest_GPU, GetAndSetModelPriorityNoThrow) {
|
||||
ov::Core ie;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user