[GPU] Support hint::inference_precision (#12526)

2022-08-15 16:34:36 +04:00 · 2022-08-15 16:34:36 +04:00 · 03b7c1e69e
commit 03b7c1e69e
parent de9a785328
9 changed files with 99 additions and 3 deletions
--- a/docs/OV_Runtime_UG/supported_plugins/GPU.md
+++ b/docs/OV_Runtime_UG/supported_plugins/GPU.md
@ -262,6 +262,7 @@ All parameters must be set before calling `ov::Core::compile_model()` in order t
 - ov::hint::model_priority
 - ov::hint::performance_mode
 - ov::hint::num_requests
+- ov::hint::inference_precision
 - ov::num_streams
 - ov::compilation_num_threads
 - ov::device::id
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@ -396,6 +396,7 @@ int main(int argc, char* argv[]) {
            } else if (device.find("GPU") != std::string::npos) {
                // for GPU execution, more throughput-oriented execution via streams
                setThroughputStreams();
+                set_infer_precision();

                if ((device_name.find("MULTI") != std::string::npos) &&
                    (device_name.find("CPU") != std::string::npos)) {
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp
@ -37,6 +37,7 @@ struct Config {
                                          graph_dumps_dir(""),
                                          sources_dumps_dir(""),
                                          kernels_cache_dir(""),
+                                          inference_precision(ov::element::undefined),
                                          task_exec_config({"GPU plugin internal task executor",                        // name
                                                    std::max(1, static_cast<int>(std::thread::hardware_concurrency())), // # of streams
                                                    1,                                                                  // # of threads per streams
@ -80,6 +81,7 @@ struct Config {
    std::string graph_dumps_dir;
    std::string sources_dumps_dir;
    std::string kernels_cache_dir;
+    ov::element::Type inference_precision;
    InferenceEngine::IStreamsExecutor::Config task_exec_config;

    bool enable_loop_unrolling;
--- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
+++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@ -164,6 +164,8 @@ InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) con
                return ov::util::from_string(val, ov::num_streams);
            } else if (name == ov::hint::num_requests) {
                return ov::util::from_string(val, ov::hint::num_requests);
+            } else if (name == ov::hint::inference_precision) {
+                return ov::util::from_string(val, ov::hint::inference_precision);
            } else if (name == ov::device::id) {
                return ov::util::from_string(val, ov::device::id);
            } else {
@ -201,6 +203,7 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con
            ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RO},
            ov::PropertyName{ov::num_streams.name(), PropertyMutability::RO},
            ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RO},
+            ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RO},
            ov::PropertyName{ov::device::id.name(), PropertyMutability::RO}
        };
    } else if (name == ov::model_name) {
--- a/src/plugins/intel_gpu/src/plugin/device_config.cpp
+++ b/src/plugins/intel_gpu/src/plugin/device_config.cpp
@ -69,6 +69,12 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
        const auto hints = perfHintsConfig.SupportedKeys();
        if (hints.end() != std::find(hints.begin(), hints.end(), key)) {
            perfHintsConfig.SetConfig(key, val);
+        } else if (key == ov::hint::inference_precision) {
+            std::stringstream ss(val);
+            ss >> inference_precision;
+            OPENVINO_ASSERT(inference_precision == ov::element::f16 ||
+                            inference_precision == ov::element::f32,
+                            "Unexpected inference precision set: ", inference_precision);
        } else if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0 || key == ov::enable_profiling) {
            if (val.compare(PluginConfigParams::YES) == 0) {
                useProfiling = true;
@ -379,6 +385,8 @@ void Config::adjustKeyMapValues() {
    else
        key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS] = PluginConfigParams::NO;

+    key_config_map[ov::hint::inference_precision.name()] = inference_precision.get_type_name();
+
    {
        if (queuePriority == cldnn::priority_mode_types::high &&
            (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG ||
@ -519,6 +527,7 @@ bool Config::isNewApiProperty(std::string property) {
    static const std::set<std::string> new_api_keys{
        ov::intel_gpu::hint::queue_priority.name(),
        ov::intel_gpu::hint::queue_throttle.name(),
+        ov::hint::inference_precision.name(),
        ov::compilation_num_threads.name(),
        ov::num_streams.name(),
    };
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@ -599,8 +599,9 @@ Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string,
            } else if (name == ov::num_streams) {
                return ov::util::from_string(val, ov::num_streams);
            } else if (name == ov::hint::num_requests) {
-                auto temp = ov::util::from_string(val, ov::hint::num_requests);
-                return temp;
+                return ov::util::from_string(val, ov::hint::num_requests);
+            } else if (name == ov::hint::inference_precision) {
+                return ov::util::from_string(val, ov::hint::inference_precision);
            } else if (name == ov::device::id) {
                return ov::util::from_string(val, ov::device::id);
            } else {
@ -728,6 +729,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
            ov::PropertyName{ov::compilation_num_threads.name(), PropertyMutability::RW},
            ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW},
            ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW},
+            ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
            ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
        };
    } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@ -161,7 +161,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
        manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
        manager.register_pass<ngraph::pass::ConvertGather0D>();

-        static const precisions_array convert_precision_list {
+        precisions_array convert_precision_list {
                {ngraph::element::i64, ngraph::element::i32},
                {ngraph::element::u64, ngraph::element::i32},
                {ngraph::element::u16, ngraph::element::i32},
@ -171,6 +171,15 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                {ngraph::element::u4, ngraph::element::u8},
        };

+        if (config.inference_precision != ov::element::undefined) {
+            std::vector<ov::element::Type> supported_fp_element_types = {ngraph::element::f32, ngraph::element::f16};
+            for (auto& et : supported_fp_element_types) {
+                if (et != config.inference_precision) {
+                    convert_precision_list.push_back({et, config.inference_precision});
+                }
+            }
+        }
+
        manager.register_pass<ngraph::pass::Validate>();
        manager.register_pass<ngraph::pass::ConvertPrecision>(convert_precision_list);

--- a/src/tests/functional/plugin/gpu/behavior/inference_precision.cpp
+++ b/src/tests/functional/plugin/gpu/behavior/inference_precision.cpp
@ -0,0 +1,54 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <utility>
+#include <vector>
+#include <memory>
+
+#include "openvino/runtime/core.hpp"
+
+#include <common_test_utils/test_common.hpp>
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "base/ov_behavior_test_utils.hpp"
+#include "functional_test_utils/ov_plugin_cache.hpp"
+
+using namespace ::testing;
+
+using params = std::tuple<ov::element::Type, ov::element::Type>;
+
+class InferencePrecisionTests : public testing::WithParamInterface<params>,
+                                virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<params> &obj) {
+        ov::element::Type model_precision;
+        ov::element::Type inference_precision;
+        std::tie(model_precision, inference_precision) = obj.param;
+        std::stringstream s;
+        s << "model_precision=" << model_precision << "_inference_precison=" << inference_precision;
+        return s.str();
+    }
+};
+
+TEST_P(InferencePrecisionTests, smoke_canSetInferencePrecisionAndInfer) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    auto core =  ov::test::utils::PluginCache::get().core();
+    ov::element::Type model_precision;
+    ov::element::Type inference_precision;
+    std::tie(model_precision, inference_precision) = GetParam();
+    auto function = ov::test::behavior::getDefaultNGraphFunctionForTheDevice("GPU", {1, 1, 32, 32}, model_precision);
+    ov::CompiledModel compiled_model;
+    OV_ASSERT_NO_THROW(compiled_model = core->compile_model(function, "GPU", ov::hint::inference_precision(inference_precision)));
+    auto req = compiled_model.create_infer_request();
+    OV_ASSERT_NO_THROW(req.infer());
+}
+
+static const std::vector<params> test_params = {
+    {ov::element::f16, ov::element::f32},
+    {ov::element::f16, ov::element::f16},
+    {ov::element::f32, ov::element::f32},
+    {ov::element::f32, ov::element::f16},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_GPU_BehaviorTests, InferencePrecisionTests, ::testing::ValuesIn(test_params), InferencePrecisionTests::getTestCaseName);
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@ -337,6 +337,21 @@ TEST_P(OVClassGetPropertyTest_GPU, GetAndSetEnableProfilingNoThrow) {
    OV_ASSERT_PROPERTY_SUPPORTED(ov::enable_profiling);
 }

+TEST_P(OVClassGetPropertyTest_GPU, GetAndSetInferencePrecisionNoThrow) {
+    ov::Core ie;
+    auto value = ov::element::undefined;
+    const auto expected_default_precision = ov::element::undefined;
+
+    OV_ASSERT_NO_THROW(value = ie.get_property(deviceName, ov::hint::inference_precision));
+    ASSERT_EQ(expected_default_precision, value);
+
+    const auto forced_precision = ov::element::f16;
+
+    OV_ASSERT_NO_THROW(ie.set_property(deviceName, ov::hint::inference_precision(forced_precision)));
+    OV_ASSERT_NO_THROW(value = ie.get_property(deviceName, ov::hint::inference_precision));
+    ASSERT_EQ(value, forced_precision);
+}
+
 TEST_P(OVClassGetPropertyTest_GPU, GetAndSetModelPriorityNoThrow) {
    ov::Core ie;