diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU.md b/docs/OV_Runtime_UG/supported_plugins/GPU.md index eafe3aad406..0e6d3579e4b 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/GPU.md @@ -418,6 +418,7 @@ All parameters must be set before calling ``ov::Core::compile_model()`` in order - ``ov::intel_gpu::hint::queue_priority`` - ``ov::intel_gpu::hint::queue_throttle`` - ``ov::intel_gpu::enable_loop_unrolling`` +- ``ov::intel_gpu::disable_winograd_convolution`` Read-only Properties +++++++++++++++++++++++++++++++++++++++ diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 1ff7ce8a41b..0953f75c06e 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -103,6 +103,7 @@ void regmodule_properties(py::module m) { wrap_property_RO(m_intel_gpu, ov::intel_gpu::memory_statistics, "memory_statistics"); wrap_property_RW(m_intel_gpu, ov::intel_gpu::enable_loop_unrolling, "enable_loop_unrolling"); + wrap_property_RW(m_intel_gpu, ov::intel_gpu::disable_winograd_convolution, "disable_winograd_convolution"); // Submodule hint (intel_gpu) py::module m_intel_gpu_hint = m_intel_gpu.def_submodule( diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index f03f25675c5..f9ecbf9e8ee 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -328,6 +328,11 @@ def test_properties_ro(ov_property_ro, expected_value): "GPU_ENABLE_LOOP_UNROLLING", ((True, True),), ), + ( + properties.intel_gpu.disable_winograd_convolution, + "GPU_DISABLE_WINOGRAD_CONVOLUTION", + ((True, True),), + ), ( properties.intel_gpu.hint.queue_throttle, "GPU_QUEUE_THROTTLE", diff --git a/src/inference/include/openvino/runtime/intel_gpu/properties.hpp b/src/inference/include/openvino/runtime/intel_gpu/properties.hpp index 4cbe906d47d..7f661d5b67a 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/properties.hpp @@ -63,6 +63,14 @@ static constexpr Property, PropertyMutability::R */ static constexpr Property enable_loop_unrolling{"GPU_ENABLE_LOOP_UNROLLING"}; +/** + * @brief Turning on this key disables winograd convolution. + * Winograd convolution has different characteristics for accuracy and performance compared to other convolution + * implementations. + * @ingroup ov_runtime_ocl_gpu_prop_cpp_api + */ +static constexpr Property disable_winograd_convolution{"GPU_DISABLE_WINOGRAD_CONVOLUTION"}; + namespace hint { /** * @brief This enum represents the possible value of ov::intel_gpu::hint::queue_throttle property: diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 5809c4935d8..00f75f37fb7 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -496,11 +496,16 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, reorder_node } namespace { -bool should_use_winograd_2x3_s1(std::shared_ptr const& prim, +bool should_use_winograd_2x3_s1(const convolution_node& node, layout const& input_layout, layout const& weights_layout, bool output_size_handling_enabled) { + bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution); + if (disable_winograd_conv) + return false; + // cases when NOT to use winograd + auto prim = node.get_primitive(); if (input_layout.data_type != data_types::f16 || input_layout.feature() % 64 != 0 // current algorithm is effective for ifm to be multiply of 64 || weights_layout.spatial(0) != 3 // weights have to be 3x3 by definiton @@ -589,7 +594,7 @@ bool layout_optimizer::convolution_byxf_opt(const layout& input_layout, all_zeroes(conv->padding_begin) && all_zeroes(conv->padding_end)) || // Winograd - should_use_winograd_2x3_s1(conv, input_layout, weights_layout, _output_size_handling_enabled)) + should_use_winograd_2x3_s1(node, input_layout, weights_layout, _output_size_handling_enabled)) return true; return false; @@ -2004,7 +2009,7 @@ bool layout_optimizer::is_format_optimized(const convolution_node& node, const f case format::b_fs_yx_fsv16: return convolution_b_fs_yx_fsv16_opt(input_layout, output_layout, weights_layout, prim, use_weak_restrictions) && // Work-around for inability to use b_fs_yx_fsv16 and winograd together - !should_use_winograd_2x3_s1(prim, input_layout, weights_layout, _output_size_handling_enabled); + !should_use_winograd_2x3_s1(node, input_layout, weights_layout, _output_size_handling_enabled); case format::b_fs_zyx_fsv16: case format::bs_fs_zyx_bsv16_fsv16: return convolution_b_fs_zyx_fsv16_opt(input_layout, output_layout, weights_layout, prim); diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index e20da7f0ca3..b215700a046 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -320,6 +320,7 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::hint::queue_throttle.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::enable_loop_unrolling.name(), PropertyMutability::RO}, + ov::PropertyName{ov::intel_gpu::disable_winograd_convolution.name(), PropertyMutability::RO}, ov::PropertyName{ov::cache_dir.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RO}, diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 4fa41fc4c53..a8c15b85460 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -746,6 +746,7 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RW}, ov::PropertyName{ov::intel_gpu::hint::queue_throttle.name(), PropertyMutability::RW}, ov::PropertyName{ov::intel_gpu::enable_loop_unrolling.name(), PropertyMutability::RW}, + ov::PropertyName{ov::intel_gpu::disable_winograd_convolution.name(), PropertyMutability::RW}, ov::PropertyName{ov::cache_dir.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW}, diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 49d82e6a7be..e1375ef14dd 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -53,6 +53,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), + std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), std::make_tuple(ov::internal::exclusive_async_requests, false), // Legacy API properties