[GPU] Add disable winograd convolution in execute_config. (#18964)
* [GPU] Add disable winograd convolution in execute_config. Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
@@ -418,6 +418,7 @@ All parameters must be set before calling ``ov::Core::compile_model()`` in order
|
||||
- ``ov::intel_gpu::hint::queue_priority``
|
||||
- ``ov::intel_gpu::hint::queue_throttle``
|
||||
- ``ov::intel_gpu::enable_loop_unrolling``
|
||||
- ``ov::intel_gpu::disable_winograd_convolution``
|
||||
|
||||
Read-only Properties
|
||||
+++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
@@ -103,6 +103,7 @@ void regmodule_properties(py::module m) {
|
||||
wrap_property_RO(m_intel_gpu, ov::intel_gpu::memory_statistics, "memory_statistics");
|
||||
|
||||
wrap_property_RW(m_intel_gpu, ov::intel_gpu::enable_loop_unrolling, "enable_loop_unrolling");
|
||||
wrap_property_RW(m_intel_gpu, ov::intel_gpu::disable_winograd_convolution, "disable_winograd_convolution");
|
||||
|
||||
// Submodule hint (intel_gpu)
|
||||
py::module m_intel_gpu_hint = m_intel_gpu.def_submodule(
|
||||
|
||||
@@ -328,6 +328,11 @@ def test_properties_ro(ov_property_ro, expected_value):
|
||||
"GPU_ENABLE_LOOP_UNROLLING",
|
||||
((True, True),),
|
||||
),
|
||||
(
|
||||
properties.intel_gpu.disable_winograd_convolution,
|
||||
"GPU_DISABLE_WINOGRAD_CONVOLUTION",
|
||||
((True, True),),
|
||||
),
|
||||
(
|
||||
properties.intel_gpu.hint.queue_throttle,
|
||||
"GPU_QUEUE_THROTTLE",
|
||||
|
||||
@@ -63,6 +63,14 @@ static constexpr Property<std::map<std::string, uint64_t>, PropertyMutability::R
|
||||
*/
|
||||
static constexpr Property<bool> enable_loop_unrolling{"GPU_ENABLE_LOOP_UNROLLING"};
|
||||
|
||||
/**
|
||||
* @brief Turning on this key disables winograd convolution.
|
||||
* Winograd convolution has different characteristics for accuracy and performance compared to other convolution
|
||||
* implementations.
|
||||
* @ingroup ov_runtime_ocl_gpu_prop_cpp_api
|
||||
*/
|
||||
static constexpr Property<bool> disable_winograd_convolution{"GPU_DISABLE_WINOGRAD_CONVOLUTION"};
|
||||
|
||||
namespace hint {
|
||||
/**
|
||||
* @brief This enum represents the possible value of ov::intel_gpu::hint::queue_throttle property:
|
||||
|
||||
@@ -496,11 +496,16 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, reorder_node
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool should_use_winograd_2x3_s1(std::shared_ptr<const convolution> const& prim,
|
||||
bool should_use_winograd_2x3_s1(const convolution_node& node,
|
||||
layout const& input_layout,
|
||||
layout const& weights_layout,
|
||||
bool output_size_handling_enabled) {
|
||||
bool disable_winograd_conv = node.get_program().get_config().get_property(ov::intel_gpu::disable_winograd_convolution);
|
||||
if (disable_winograd_conv)
|
||||
return false;
|
||||
|
||||
// cases when NOT to use winograd
|
||||
auto prim = node.get_primitive();
|
||||
if (input_layout.data_type != data_types::f16
|
||||
|| input_layout.feature() % 64 != 0 // current algorithm is effective for ifm to be multiply of 64
|
||||
|| weights_layout.spatial(0) != 3 // weights have to be 3x3 by definiton
|
||||
@@ -589,7 +594,7 @@ bool layout_optimizer::convolution_byxf_opt(const layout& input_layout,
|
||||
all_zeroes(conv->padding_begin) &&
|
||||
all_zeroes(conv->padding_end)) ||
|
||||
// Winograd
|
||||
should_use_winograd_2x3_s1(conv, input_layout, weights_layout, _output_size_handling_enabled))
|
||||
should_use_winograd_2x3_s1(node, input_layout, weights_layout, _output_size_handling_enabled))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -2004,7 +2009,7 @@ bool layout_optimizer::is_format_optimized(const convolution_node& node, const f
|
||||
case format::b_fs_yx_fsv16:
|
||||
return convolution_b_fs_yx_fsv16_opt(input_layout, output_layout, weights_layout, prim, use_weak_restrictions) &&
|
||||
// Work-around for inability to use b_fs_yx_fsv16 and winograd together
|
||||
!should_use_winograd_2x3_s1(prim, input_layout, weights_layout, _output_size_handling_enabled);
|
||||
!should_use_winograd_2x3_s1(node, input_layout, weights_layout, _output_size_handling_enabled);
|
||||
case format::b_fs_zyx_fsv16:
|
||||
case format::bs_fs_zyx_bsv16_fsv16:
|
||||
return convolution_b_fs_zyx_fsv16_opt(input_layout, output_layout, weights_layout, prim);
|
||||
|
||||
@@ -320,6 +320,7 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con
|
||||
ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::intel_gpu::hint::queue_throttle.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::intel_gpu::enable_loop_unrolling.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::intel_gpu::disable_winograd_convolution.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::cache_dir.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RO},
|
||||
ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RO},
|
||||
|
||||
@@ -746,6 +746,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
|
||||
ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::intel_gpu::hint::queue_throttle.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::intel_gpu::enable_loop_unrolling.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::intel_gpu::disable_winograd_convolution.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::cache_dir.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RW},
|
||||
ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW},
|
||||
|
||||
@@ -53,6 +53,7 @@ void ExecutionConfig::set_default() {
|
||||
std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM),
|
||||
std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM),
|
||||
std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true),
|
||||
std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false),
|
||||
std::make_tuple(ov::internal::exclusive_async_requests, false),
|
||||
|
||||
// Legacy API properties
|
||||
|
||||
Reference in New Issue
Block a user