From 0509c66ce03e43f9e7f660788a9e335a04275b25 Mon Sep 17 00:00:00 2001 From: Konrad Dobros Date: Thu, 2 Jul 2020 13:18:28 +0200 Subject: [PATCH] [IE CLDNN] Add some auto-tuning improvements (#1154) - add error reporting for failed kernel runs during auto-tune - fix auto-tuning for asymmetric quantization - add asymmetric quantization information to cache - change auto-tuning metric from average to min --- .../convolution/convolution_params.cpp | 2 +- .../core/common/weight_bias_params.cpp | 16 +++++++++ .../core/common/weight_bias_params.h | 1 + .../thirdparty/clDNN/src/gpu/kernel.cpp | 2 +- .../clDNN/src/gpu/kernel_runner.cpp | 35 ++++++++++++------- 5 files changed, 42 insertions(+), 14 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp index 44bd574cc91..68b2ca86197 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp @@ -41,7 +41,7 @@ std::string convolution_params::to_string() const { std::string convolution_params::to_cache_string_v2() const { std::stringstream s; - s << weight_bias_params::to_cache_string_v2() << ";"; + s << parent::to_cache_string_v2() << ";"; s << filterSize.x << "_" << filterSize.y << "_" << filterSize.z << ";"; s << stride.x << "_" << stride.y << "_" << stride.z << ";"; s << dilation.x << "_" << dilation.y << "_" << dilation.z << ";"; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.cpp index 4278cc33186..e6a94c06ee8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.cpp @@ -15,6 +15,7 @@ */ #include "weight_bias_params.h" +#include namespace kernel_selector { ParamsKey weight_bias_params::GetParamsKey() const { @@ -37,4 +38,19 @@ ParamsKey weight_bias_params::GetParamsKey() const { return k; } + +std::string weight_bias_zero_point_params::to_cache_string_v2() const { + std::stringstream s; + + s << weight_bias_params::to_cache_string_v2(); + if (!activations_zero_points.empty()) + s << ";activation_zp"; + if (!weights_zero_points.empty()) + s << ";weights_zp"; + if (HasCompensation()) + s << ";compensation"; + + return s.str(); +} + } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.h index ac4c5f5160d..7d1db484771 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.h @@ -43,6 +43,7 @@ struct weight_bias_zero_point_params : public weight_bias_params { MultiDataTensor compensation; bool HasCompensation() const { return !compensation.empty(); } + std::string to_cache_string_v2() const override; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp b/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp index cf666b1fc59..aeeb91262eb 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp @@ -238,7 +238,7 @@ void set_arguments(kernels_cache::kernel_type& kernel, } if (status != CL_SUCCESS) { - throw std::runtime_error("Error set args\n"); + throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n"); } } } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp b/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp index ecf71af2c4c..37225d9fed7 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp @@ -135,8 +135,9 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern if (zero_points_exist) { const auto& zero_point_params = static_cast(weights_bias_params); - if (weight_zero_point_buffers.empty()) { - for (auto& weight_zero_point : zero_point_params.weights_zero_points) { + if (!zero_point_params.weights_zero_points.empty()) { + if (weight_zero_point_buffers.empty()) { + auto& weight_zero_point = zero_point_params.weights_zero_points[0]; auto num_of_elements = static_cast(weight_zero_point.PhysicalSize()); weight_zero_point_buffers.push_back( engine->allocate_memory({ @@ -145,28 +146,33 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern tensor(1, num_of_elements, 1, 1) }, 0)); } + args.weights_zero_points = weight_zero_point_buffers[0]; } - if (activation_zero_point_buffers.empty()) { - for (auto& activation_zero_point : zero_point_params.activations_zero_points) { + if (!zero_point_params.activations_zero_points.empty()) { + if (activation_zero_point_buffers.empty()) { + auto& activation_zero_point = zero_point_params.activations_zero_points[0]; auto num_of_elements = static_cast(activation_zero_point.PhysicalSize()); - weight_zero_point_buffers.push_back( + activation_zero_point_buffers.push_back( engine->allocate_memory({ from_data_type(activation_zero_point.GetDType()), format::bfyx, tensor(1, num_of_elements, 1, 1) }, 0)); } + args.activations_zero_points = activation_zero_point_buffers[0]; } - if (compensation_buffers.empty()) { - for (auto& compensation : zero_point_params.compensation) { + if (!zero_point_params.compensation.empty()) { + if (compensation_buffers.empty()) { + auto& compensation = zero_point_params.compensation[0]; auto num_of_elements = static_cast(compensation.PhysicalSize()); - weight_zero_point_buffers.push_back( + compensation_buffers.push_back( engine->allocate_memory({ from_data_type(compensation.GetDType()), format::bfyx, tensor(1, num_of_elements, 1, 1) }, 0)); } + args.compensation = compensation_buffers[0]; } } } @@ -202,19 +208,24 @@ std::vector kernel_runner::run_kernels(const kernel_se int i = 0; for (auto it = batch_start; it < batch_end; it++) { std::vector events; - auto kernel_run_time = std::chrono::nanoseconds::zero(); + auto kernel_run_time = std::chrono::nanoseconds::max(); int num_of_runs = 0; for (int iteration = 0; iteration < runs_per_kernel; iteration++) { event_impl::ptr event; try { event = kernels[i].run(0, it->kernels[0], {}, args); + } catch (std::exception& e) { + std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName + << " with auto-tune index " << it->autoTuneIndex << std::endl + << ", error message:" << e.what(); } catch (...) { // Could not run this kernel. Push back NULL event (will be ignored later). + std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName + << " with auto-tune index " << it->autoTuneIndex << std::endl; } events.push_back(event); } - context->queue(0).finish(); for (auto& event : events) { @@ -222,7 +233,7 @@ std::vector kernel_runner::run_kernels(const kernel_se auto profiling_intervals = event->get_profiling_info(); for (auto const& profiling_interval : profiling_intervals) { if (profiling_interval.name == "executing") { - kernel_run_time += profiling_interval.value->value(); + kernel_run_time = std::min(profiling_interval.value->value(), kernel_run_time); num_of_runs++; break; } @@ -231,7 +242,7 @@ std::vector kernel_runner::run_kernels(const kernel_se } if (num_of_runs > 0) { - run_times.push_back(kernel_run_time / num_of_runs); + run_times.push_back(kernel_run_time); num_of_kernels_run += 1; } else { run_times.push_back(std::chrono::nanoseconds::max());