[IE CLDNN] Add some auto-tuning improvements (#1154)

- add error reporting for failed kernel runs during auto-tune
- fix auto-tuning for asymmetric quantization
- add asymmetric quantization information to cache
- change auto-tuning metric from average to min
This commit is contained in:
Konrad Dobros 2020-07-02 13:18:28 +02:00 committed by GitHub
parent 054e1cfd13
commit 0509c66ce0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 42 additions and 14 deletions

View File

@ -41,7 +41,7 @@ std::string convolution_params::to_string() const {
std::string convolution_params::to_cache_string_v2() const {
std::stringstream s;
s << weight_bias_params::to_cache_string_v2() << ";";
s << parent::to_cache_string_v2() << ";";
s << filterSize.x << "_" << filterSize.y << "_" << filterSize.z << ";";
s << stride.x << "_" << stride.y << "_" << stride.z << ";";
s << dilation.x << "_" << dilation.y << "_" << dilation.z << ";";

View File

@ -15,6 +15,7 @@
*/
#include "weight_bias_params.h"
#include <sstream>
namespace kernel_selector {
ParamsKey weight_bias_params::GetParamsKey() const {
@ -37,4 +38,19 @@ ParamsKey weight_bias_params::GetParamsKey() const {
return k;
}
std::string weight_bias_zero_point_params::to_cache_string_v2() const {
std::stringstream s;
s << weight_bias_params::to_cache_string_v2();
if (!activations_zero_points.empty())
s << ";activation_zp";
if (!weights_zero_points.empty())
s << ";weights_zp";
if (HasCompensation())
s << ";compensation";
return s.str();
}
} // namespace kernel_selector

View File

@ -43,6 +43,7 @@ struct weight_bias_zero_point_params : public weight_bias_params {
MultiDataTensor compensation;
bool HasCompensation() const { return !compensation.empty(); }
std::string to_cache_string_v2() const override;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -238,7 +238,7 @@ void set_arguments(kernels_cache::kernel_type& kernel,
}
if (status != CL_SUCCESS) {
throw std::runtime_error("Error set args\n");
throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n");
}
}
}

View File

@ -135,8 +135,9 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern
if (zero_points_exist) {
const auto& zero_point_params =
static_cast<const kernel_selector::weight_bias_zero_point_params&>(weights_bias_params);
if (weight_zero_point_buffers.empty()) {
for (auto& weight_zero_point : zero_point_params.weights_zero_points) {
if (!zero_point_params.weights_zero_points.empty()) {
if (weight_zero_point_buffers.empty()) {
auto& weight_zero_point = zero_point_params.weights_zero_points[0];
auto num_of_elements = static_cast<int>(weight_zero_point.PhysicalSize());
weight_zero_point_buffers.push_back(
engine->allocate_memory({
@ -145,28 +146,33 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern
tensor(1, num_of_elements, 1, 1) },
0));
}
args.weights_zero_points = weight_zero_point_buffers[0];
}
if (activation_zero_point_buffers.empty()) {
for (auto& activation_zero_point : zero_point_params.activations_zero_points) {
if (!zero_point_params.activations_zero_points.empty()) {
if (activation_zero_point_buffers.empty()) {
auto& activation_zero_point = zero_point_params.activations_zero_points[0];
auto num_of_elements = static_cast<int>(activation_zero_point.PhysicalSize());
weight_zero_point_buffers.push_back(
activation_zero_point_buffers.push_back(
engine->allocate_memory({
from_data_type(activation_zero_point.GetDType()),
format::bfyx,
tensor(1, num_of_elements, 1, 1) },
0));
}
args.activations_zero_points = activation_zero_point_buffers[0];
}
if (compensation_buffers.empty()) {
for (auto& compensation : zero_point_params.compensation) {
if (!zero_point_params.compensation.empty()) {
if (compensation_buffers.empty()) {
auto& compensation = zero_point_params.compensation[0];
auto num_of_elements = static_cast<int>(compensation.PhysicalSize());
weight_zero_point_buffers.push_back(
compensation_buffers.push_back(
engine->allocate_memory({
from_data_type(compensation.GetDType()),
format::bfyx,
tensor(1, num_of_elements, 1, 1) },
0));
}
args.compensation = compensation_buffers[0];
}
}
}
@ -202,19 +208,24 @@ std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_se
int i = 0;
for (auto it = batch_start; it < batch_end; it++) {
std::vector<event_impl::ptr> events;
auto kernel_run_time = std::chrono::nanoseconds::zero();
auto kernel_run_time = std::chrono::nanoseconds::max();
int num_of_runs = 0;
for (int iteration = 0; iteration < runs_per_kernel; iteration++) {
event_impl::ptr event;
try {
event = kernels[i].run(0, it->kernels[0], {}, args);
} catch (std::exception& e) {
std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName
<< " with auto-tune index " << it->autoTuneIndex << std::endl
<< ", error message:" << e.what();
} catch (...) {
// Could not run this kernel. Push back NULL event (will be ignored later).
std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName
<< " with auto-tune index " << it->autoTuneIndex << std::endl;
}
events.push_back(event);
}
context->queue(0).finish();
for (auto& event : events) {
@ -222,7 +233,7 @@ std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_se
auto profiling_intervals = event->get_profiling_info();
for (auto const& profiling_interval : profiling_intervals) {
if (profiling_interval.name == "executing") {
kernel_run_time += profiling_interval.value->value();
kernel_run_time = std::min(profiling_interval.value->value(), kernel_run_time);
num_of_runs++;
break;
}
@ -231,7 +242,7 @@ std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_se
}
if (num_of_runs > 0) {
run_times.push_back(kernel_run_time / num_of_runs);
run_times.push_back(kernel_run_time);
num_of_kernels_run += 1;
} else {
run_times.push_back(std::chrono::nanoseconds::max());