From 34d16b8777a58dc822cad2b9c128c4e279818b23 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 20 Jan 2023 15:17:12 +0400 Subject: [PATCH] [GPU] Move tuning cache loading to kernel selector (#15112) * [GPU] Move tuning cache loading to kernel selector. Remove tuning modes * [GPU] Removed kernel runner --- .../include/intel_gpu/graph/program.hpp | 8 - .../intel_gpu/runtime/internal_properties.hpp | 35 -- .../src/graph/impls/ocl/arg_max_min.cpp | 1 - .../graph/impls/ocl/binary_convolution.cpp | 9 - .../src/graph/impls/ocl/convolution.cpp | 9 - .../impls/ocl/deformable_convolution.cpp | 1 - .../src/graph/impls/ocl/fully_connected.cpp | 2 - .../graph/include/kernel_selector_helper.h | 3 - .../intel_gpu/src/graph/kernel_runner.cpp | 249 ------------ .../intel_gpu/src/graph/kernel_runner.h | 45 --- .../src/graph/kernel_selector_helper.cpp | 22 -- src/plugins/intel_gpu/src/graph/program.cpp | 17 - .../src/kernel_selector/auto_tuner.cpp | 93 ++--- .../src/kernel_selector/auto_tuner.h | 16 +- .../src/kernel_selector/common_types.h | 30 -- .../kernel_selector/device_cache_reader.cpp | 52 --- .../src/kernel_selector/device_cache_reader.h | 14 - .../src/kernel_selector/kernel_selector.cpp | 128 +------ .../src/kernel_selector/kernel_selector.h | 17 +- .../kernel_selector_params.cpp | 19 - .../kernel_selector/kernel_selector_params.h | 26 -- .../binary_convolution_kernel_selector.cpp | 2 +- ...volution_kernel_b_fs_yx_fsv16_imad_1x1.cpp | 1 - ...convolution_kernel_b_fs_zyx_fsv16_imad.cpp | 1 - .../convolution/convolution_kernel_imad.cpp | 1 - ...n_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp | 1 - ...n_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp | 1 - .../convolution_kernel_mmad_b_fs_yx_fsv32.cpp | 1 - ...nvolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp | 1 - ...tion_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp | 1 - ...ution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp | 1 - .../convolution/convolution_kernel_ref.cpp | 1 - .../convolution_kernel_yxfb_ref.cpp | 1 - ...eformable_convolution_kernel_bfyx_conv.cpp | 1 - ...ormable_convolution_kernel_bfyx_interp.cpp | 1 - ...deformable_convolution_kernel_bfyx_ref.cpp | 1 - .../src/runtime/execution_config.cpp | 1 - .../intel_gpu/tests/test_cases/cache_test.cpp | 353 ------------------ 38 files changed, 77 insertions(+), 1089 deletions(-) delete mode 100644 src/plugins/intel_gpu/src/graph/kernel_runner.cpp delete mode 100644 src/plugins/intel_gpu/src/graph/kernel_runner.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.h delete mode 100644 src/plugins/intel_gpu/tests/test_cases/cache_test.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index b0ef3e95db6..62967567b86 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -17,10 +17,6 @@ #include #include -namespace kernel_selector { -class TuningCache; -} // namespace kernel_selector - namespace cldnn { struct topology; @@ -248,9 +244,6 @@ public: kernel::ptr get_kernel(kernel_id id); kernels_cache& get_kernels_cache() const; - void load_tuning_cache(); - std::shared_ptr get_tuning_cache() const { return tuning_cache; } - // returns {-1, -1} if it failed to estimate by allocating given batch size std::pair get_estimated_device_mem_usage(); @@ -270,7 +263,6 @@ private: std::vector outputs; nodes_ordering processing_order; std::unique_ptr pm; - std::shared_ptr tuning_cache; bool is_body_program; int8_t is_subgroup_local_block_io_supported; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index ec2f7806d4d..c5ccf331910 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -48,41 +48,6 @@ static constexpr Property partial_build_program{"G static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; - -/// @brief Tuning mode. -enum class TuningMode { - /// @brief Tuning is disabled. - tuning_disabled, - - /// @brief Tuning using the cached data (no on-line tuning for non-existing data). - tuning_use_cache, - - /// @brief Tuning using the cached data if exist, tune and update cache otherwise. - tuning_tune_and_cache, - - /// @brief Tuning using the cached data and update tasks. - /// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc. - /// No tuning for non-existing data. - tuning_use_and_update, - - /// @brief Retune the cache data even if it exists. - tuning_retune_and_cache -}; - -struct TuningConfig { - TuningMode mode; - std::string cache_file_path; - - TuningConfig() : mode(TuningMode::tuning_disabled), cache_file_path("") {} -}; - -inline std::ostream& operator<<(std::ostream& os, const TuningConfig& val) { - os << val.cache_file_path; - return os; -} - -static constexpr Property tuning_config{"GPU_TUNING_CONFIG"}; - static constexpr Property force_implementations{"GPU_FORCE_IMPLEMENTATIONS"}; static constexpr Property config_file{"CONFIG_FILE"}; static constexpr Property enable_lp_transformations{"LP_TRANSFORMS_MODE"}; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp index 99af4f90e87..b9922e59646 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp @@ -9,7 +9,6 @@ #include "kernel_selector_helper.h" #include "arg_max_min/arg_max_min_kernel_selector.h" #include "arg_max_min/arg_max_min_kernel_base.h" -#include "kernel_runner.h" namespace cldnn { namespace ocl { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp index 5d9565dfe3d..4a80568a948 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp @@ -8,7 +8,6 @@ #include "impls/implementation_map.hpp" #include "intel_gpu/runtime/error_handler.hpp" #include "kernel_selector_helper.h" -#include "kernel_runner.h" #include "kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.h" #include "kernel_selector/kernels/binary_convolution/binary_convolution_params.h" #include @@ -75,14 +74,6 @@ public: uint32_t dilation_x = dilation.size() >= 1 ? dilation[dilation.size() - 1] : 1; params.dilation = {dilation_x, dilation_y, dilation_z}; - const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config); - - if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache || - tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) { - optional_params.tuningParams.runner = - std::make_shared(impl_param.get_program().get_engine(), impl_param.get_program().get_id(), true); - } - return {params, optional_params}; } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index 4c2170f60e1..ede4bffa479 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -8,7 +8,6 @@ #include "impls/implementation_map.hpp" #include "intel_gpu/runtime/error_handler.hpp" #include "kernel_selector_helper.h" -#include "kernel_runner.h" #include "convolution/convolution_kernel_selector.h" #include "convolution/convolution_params.h" #include @@ -166,14 +165,6 @@ public: auto& kernel_selector = kernel_selector::convolution_kernel_selector::Instance(); - const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config); - - if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache || - tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) { - conv_optional_params.tuningParams.runner = - std::make_shared(arg.get_program().get_engine(), arg.get_program().get_id(), true, true); - } - auto best_kernel = kernel_selector.get_best_kernel(conv_params, conv_optional_params); return make_unique(best_kernel); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp index 80854a8cd5d..0531dd2f0fb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp @@ -7,7 +7,6 @@ #include "impls/implementation_map.hpp" #include "intel_gpu/runtime/error_handler.hpp" #include "kernel_selector_helper.h" -#include "kernel_runner.h" #include "convolution/convolution_kernel_selector.h" #include "convolution/convolution_params.h" #include diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 1f0875ba86b..13b1fa0c3a9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -10,7 +10,6 @@ #include "fully_connected/fully_connected_params.h" #include "intel_gpu/runtime/error_handler.hpp" -#include "kernel_runner.h" #include "intel_gpu/primitives/reorder.hpp" #include "intel_gpu/primitives/input_layout.hpp" @@ -119,7 +118,6 @@ public: params.quantization = kernel_selector::QuantizationType::NONE; } - optional_params.tuningParams.runner = std::make_shared(progam.get_engine(), progam.get_id(), true); return {params, optional_params}; } diff --git a/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h index 500937d9aac..774628219e6 100644 --- a/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h +++ b/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h @@ -28,7 +28,6 @@ using namespace cldnn; namespace cldnn { enum class data_types : size_t; -enum class tuning_mode; struct format; struct layout; struct program; @@ -65,7 +64,6 @@ using softmax_dim = kernel_selector::SoftmaxDim; using mean_subtruct_mode = kernel_selector::MeanSubtractMode; using mean_op = kernel_selector::MeanOp; using concat_axis = kernel_selector::ConcatAxis; -using tuning_mode = kernel_selector::TuningMode; using sample_type = kernel_selector::ResampleType; using coordinate_transformation_mode = kernel_selector::CoordinateTransformationMode; using nearest_mode = kernel_selector::NearestMode; @@ -101,7 +99,6 @@ kernel_selector::data_layout to_data_layout(format f); cldnn::format from_data_layout(kernel_selector::data_layout l); kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped); cldnn::format::type from_weights_layout(kernel_selector::weights_layout l); -kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode); kernel_selector::data_tensor convert_data_tensor(const layout& l, const tensor view_offset = tensor {}); kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped = false); layout from_weights_tensor(const kernel_selector::weights_tensor& t); diff --git a/src/plugins/intel_gpu/src/graph/kernel_runner.cpp b/src/plugins/intel_gpu/src/graph/kernel_runner.cpp deleted file mode 100644 index a350da6a3b9..00000000000 --- a/src/plugins/intel_gpu/src/graph/kernel_runner.cpp +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "kernel_runner.h" -#include "runtime/kernels_cache.hpp" -#include "intel_gpu/runtime/stream.hpp" -#include "weight_bias_params.h" -#include "kernel_selector_helper.h" -#include -#include -#include -#include - -namespace cldnn { -namespace gpu { - -kernel_runner::kernel_runner(engine& engine_ref, uint32_t program_id, bool weights_and_bias_exist, bool zero_points_exist) - : _engine(engine_ref), program_id(program_id), weights_and_bias_exist(weights_and_bias_exist), zero_points_exist(zero_points_exist) {} - -void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kernels_data, - kernel_arguments_data& args) { - const auto& base_params = *static_cast(kernels_data[0].params.get()); - // Prepare input buffers - if (input_buffers.empty()) { - for (const auto& input : base_params.inputs) { - int num_of_input_elements = static_cast(input.PhysicalSize()); - input_buffers.push_back(_engine.allocate_memory( - {from_data_type(input.GetDType()), format::bfyx, tensor(1, 1, num_of_input_elements, 1)})); - } - } - for (const auto& input : input_buffers) { - args.inputs.push_back(input); - } - // Prepare fused operations buffers - if (fused_ops_buffers.empty()) { - for (auto& fused_op : base_params.fused_ops) { - for (auto& fused_ops_input : fused_op.tensors) { - auto num_of_elements = static_cast(fused_ops_input.PhysicalSize()); - fused_ops_buffers.push_back(_engine.allocate_memory( - { from_data_type(fused_ops_input.GetDType()), format::bfyx, tensor(1, 1, num_of_elements, 1) })); - } - } - } - for (const auto& fused_op_input : fused_ops_buffers) { - args.fused_op_inputs.push_back(fused_op_input); - } - // Prepare output buffer - if (output_buffers.empty()) { - for (size_t i = 0; i < base_params.outputs.size(); ++i) { - int num_of_output_elements = static_cast(base_params.outputs[i].PhysicalSize()); - output_buffers.push_back(_engine.allocate_memory({from_data_type(base_params.outputs[0].GetDType()), - format::bfyx, tensor(1, 1, num_of_output_elements, 1)})); - } - } - for (const auto& output : output_buffers) { - args.outputs.push_back(output); - } - - - if (weights_and_bias_exist) { - // Prepare weight buffer - const auto& weights_bias_params = - *static_cast(kernels_data[0].params.get()); - int num_of_weight_elements_ifm = static_cast(weights_bias_params.weights.IFM().v); - int num_of_weight_elements_spatial_y = static_cast(weights_bias_params.weights.Y().v); - int num_of_weight_elements_spatial_x = static_cast(weights_bias_params.weights.X().v); - int num_of_weight_elements_spatial = static_cast(weights_bias_params.weights.PhysicalSize()); - int num_of_weight_elements_ofm = 1; - - cldnn::format::type fmt = cldnn::format::bfyx; - - if (!cldnn::format::is_image_2d(from_weights_layout(weights_bias_params.weights.GetLayout()))) { - if (weight_buffers.empty()) - weight_buffers.push_back( - _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), - fmt, - tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)})); - - if (weight_buffers[0]->get_layout().format != fmt) - weight_buffers[0] = - _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), - fmt, - tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)}); - - while (weight_buffers[0]->get_layout().bytes_count() < weights_bias_params.weights.PhysicalSizeInBytes()) { - // Weights layout depends on the kernel. Multiply the buffer size by 2 until it is big enough - // (to avoid complex computations of the exact buffer size according to the chosen layout). - weight_buffers.clear(); - num_of_weight_elements_spatial *= 2; - weight_buffers.push_back( - _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), - fmt, - tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)})); - } - } else { - weight_buffers.clear(); - fmt = from_weights_layout(weights_bias_params.weights.GetLayout()); - num_of_weight_elements_ofm = static_cast(weights_bias_params.weights.OFM().v); - weight_buffers.push_back(_engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), - fmt, - tensor(num_of_weight_elements_ofm, - num_of_weight_elements_ifm, - num_of_weight_elements_spatial_x, - num_of_weight_elements_spatial_y)})); - } - args.weights = weight_buffers[0]; - - // Prepare bias buffer - if (!weights_bias_params.bias.empty()) { - if (bias_buffers.empty()) { - int num_of_bias_elements = static_cast(weights_bias_params.bias[0].PhysicalSize()); - bias_buffers.push_back(_engine.allocate_memory({from_data_type(weights_bias_params.bias[0].GetDType()), - format::bfyx, - tensor(1, num_of_bias_elements, 1, 1)})); - } - args.bias = bias_buffers[0]; - } - if (zero_points_exist) { - const auto& zero_point_params = - static_cast(weights_bias_params); - if (!zero_point_params.weights_zero_points.empty()) { - if (weight_zero_point_buffers.empty()) { - auto& weight_zero_point = zero_point_params.weights_zero_points[0]; - auto num_of_elements = static_cast(weight_zero_point.PhysicalSize()); - weight_zero_point_buffers.push_back( - _engine.allocate_memory({ - from_data_type(weight_zero_point.GetDType()), - format::bfyx, - tensor(1, num_of_elements, 1, 1) })); - } - args.weights_zero_points = weight_zero_point_buffers[0]; - } - if (!zero_point_params.activations_zero_points.empty()) { - if (activation_zero_point_buffers.empty()) { - auto& activation_zero_point = zero_point_params.activations_zero_points[0]; - auto num_of_elements = static_cast(activation_zero_point.PhysicalSize()); - activation_zero_point_buffers.push_back( - _engine.allocate_memory({ - from_data_type(activation_zero_point.GetDType()), - format::bfyx, - tensor(1, num_of_elements, 1, 1) })); - } - args.activations_zero_points = activation_zero_point_buffers[0]; - } - if (!zero_point_params.compensation.empty()) { - if (compensation_buffers.empty()) { - auto& compensation = zero_point_params.compensation[0]; - auto num_of_elements = static_cast(compensation.PhysicalSize()); - compensation_buffers.push_back( - _engine.allocate_memory({ - from_data_type(compensation.GetDType()), - format::bfyx, - tensor(1, num_of_elements, 1, 1) })); - } - args.compensation = compensation_buffers[0]; - } - } - } -} - -std::vector kernel_runner::run_kernels(const kernel_selector::KernelsData& kernels_data) { - std::vector run_times; - - stream::ptr stream = _engine.create_stream({}); - - int num_of_kernels_to_run = static_cast(kernels_data.size()); - int num_of_kernels_run = 0; - - kernel_selector::KernelsData::const_iterator batch_start = kernels_data.begin(); - kernel_selector::KernelsData::const_iterator batch_end; - while (num_of_kernels_to_run > 0) { - int current_compilation_batch = std::min(num_of_kernels_to_run, compilation_batch_size); - batch_end = batch_start + current_compilation_batch; - - std::vector kernels; - kernels_cache cache(_engine, {}, program_id); - - for (auto it = batch_start; it < batch_end; it++) { - auto kernel_id = cache.set_kernel_source(it->kernels[0].code.kernelString, false); - - kernels.push_back(cache.get_kernel(kernel_id)); - } - - kernel_arguments_data args; - - prepare_kernel_args(kernels_data, args); - stream->finish(); - - int i = 0; - for (auto it = batch_start; it < batch_end; it++) { - std::vector events; - auto kernel_run_time = std::chrono::nanoseconds::max(); - int num_of_runs = 0; - - for (int iteration = 0; iteration < runs_per_kernel; iteration++) { - event::ptr event; - try { - stream->set_arguments(*kernels[i], it->kernels[0].params, args); - event = stream->enqueue_kernel(*kernels[i], it->kernels[0].params, args, {}); - } catch (std::exception& e) { - std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName - << " with auto-tune index " << it->autoTuneIndex << std::endl - << ", error message:" << e.what(); - } catch (...) { - // Could not run this kernel. Push back NULL event (will be ignored later). - std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName - << " with auto-tune index " << it->autoTuneIndex << std::endl; - } - events.push_back(event); - } - stream->finish(); - - for (auto& event : events) { - if (event.get() != NULL) { - auto profiling_intervals = event->get_profiling_info(); - for (auto const& profiling_interval : profiling_intervals) { - if (profiling_interval.stage == instrumentation::profiling_stage::executing) { - kernel_run_time = std::min(profiling_interval.value->value(), kernel_run_time); - num_of_runs++; - break; - } - } - } - } - - if (num_of_runs > 0) { - run_times.push_back(kernel_run_time); - num_of_kernels_run += 1; - } else { - run_times.push_back(std::chrono::nanoseconds::max()); - } - i++; - } - - num_of_kernels_to_run -= current_compilation_batch; - batch_start += current_compilation_batch; - } - - if (num_of_kernels_run == 0) { - // If all kernels failed to run throw to avoid corrupting cache - throw std::runtime_error("kernel_runner::run_kernels - could not run any of provided kernels"); - } - - return run_times; -} - -} // namespace gpu -} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/kernel_runner.h b/src/plugins/intel_gpu/src/graph/kernel_runner.h deleted file mode 100644 index 6c46bfdfc58..00000000000 --- a/src/plugins/intel_gpu/src/graph/kernel_runner.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "intel_gpu/runtime/engine.hpp" -#include "kernel_selector_common.h" -#include "kernel_selector_helper.h" -#include "kernel_runner_interface.h" -#include - -namespace cldnn { -namespace gpu { - -class kernel_runner : public kernel_selector::KernelRunnerInterface { -public: - kernel_runner(engine& engine_ref, uint32_t program_id, bool weights_and_bias_exist = false, bool zero_points_exist = false); - - std::vector run_kernels(const kernel_selector::KernelsData& kernelsData) override; - -private: - const int compilation_batch_size = 50; - const int runs_per_kernel = 15; - - void prepare_kernel_args(const kernel_selector::KernelsData& kernels_data, - kernel_arguments_data& args); - - engine& _engine; - uint32_t program_id; - bool weights_and_bias_exist; - bool zero_points_exist; - std::vector input_buffers; - std::vector fused_ops_buffers; - std::vector output_buffers; - std::vector weight_buffers; - std::vector bias_buffers; - std::vector weight_zero_point_buffers; - std::vector activation_zero_point_buffers; - std::vector compensation_buffers; -}; - -////////////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace gpu -} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp index 657fd4d6586..8f62d7d6c17 100644 --- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp @@ -837,23 +837,6 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) { } } -kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode) { - switch (mode) { - case ov::intel_gpu::TuningMode::tuning_disabled: - return kernel_selector::tuning_mode::TUNING_DISABLED; - case ov::intel_gpu::TuningMode::tuning_use_cache: - return kernel_selector::tuning_mode::TUNING_USE_CACHE; - case ov::intel_gpu::TuningMode::tuning_tune_and_cache: - return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE; - case ov::intel_gpu::TuningMode::tuning_use_and_update: - return kernel_selector::tuning_mode::TUNING_USE_AND_UPDATE; - case ov::intel_gpu::TuningMode::tuning_retune_and_cache: - return kernel_selector::tuning_mode::TUNING_RETUNE_AND_CACHE; - default: - return kernel_selector::tuning_mode::TUNING_DISABLED; - } -} - kernel_selector::data_tensor convert_data_tensor(const layout& l, const tensor view_offset) { const auto& pad = l.data_padding; const auto& vals_original = l.get_partial_shape(); @@ -1103,7 +1086,6 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p params.engineInfo.computeUnitsCount = device_info.execution_units_count; params.engineInfo.maxThreadsPerExecutionUnit = device_info.num_threads_per_eu > 0 ? device_info.num_threads_per_eu : 7; params.engineInfo.maxThreadsPerDevice = params.engineInfo.maxThreadsPerExecutionUnit * device_info.execution_units_count; - params.engineInfo.deviceCache = program->get_tuning_cache(); params.engineInfo.driverVersion = device_info.driver_version; params.engineInfo.supportedSimdSizes = device_info.supported_simd_sizes; params.engineInfo.vendor_id = device_info.vendor_id; @@ -1121,10 +1103,6 @@ void set_optional_params(const program& program, kernel_selector::optional_param program.get_config().get_property(ov::intel_gpu::allow_static_input_reorder); params.allowInputReordering = false; params.allowOutputReordering = false; - - const auto& tuning_config = program.get_config().get_property(ov::intel_gpu::tuning_config); - params.tuningParams.mode = to_tuning_mode(tuning_config.mode); - params.tuningParams.cacheFilePath = tuning_config.cache_file_path; } void kernel_impl_params::save(BinaryOutputBuffer& ob) const { diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 82850b475e4..7e7b921636a 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -11,7 +11,6 @@ #include #include "kernel_selector_helper.h" -#include "device_cache_reader.h" #include "auto_tuner.h" #include "layout_optimizer.h" #include "pass_manager.h" @@ -108,7 +107,6 @@ program::program(engine& engine_ref, _stream(_engine.create_stream(config)), _config(config), processing_order(), - tuning_cache(nullptr), is_body_program(is_body_program), is_subgroup_local_block_io_supported(-1) { init_primitives(); @@ -141,7 +139,6 @@ program::program(engine& engine_ref, _config(config), _task_executor(task_executor), processing_order(), - tuning_cache(nullptr), is_subgroup_local_block_io_supported(-1) { init_primitives(); set_options(); @@ -161,7 +158,6 @@ program::program(engine& engine) _stream(_engine.create_stream({})), _config(), processing_order(), - tuning_cache(nullptr), is_subgroup_local_block_io_supported(-1) { } program::~program() { query_local_block_io_supported(); @@ -231,16 +227,6 @@ void program::init_kernels() { } } -void program::load_tuning_cache() { - OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "ProgramImpl::LoadTuningCache"); - GPU_DEBUG_DEFINE_MEM_LOGGER("ProgramImpl::LoadTuningCache"); - try { - tuning_cache = kernel_selector::CreateTuningCacheFromFile("cache.json"); - } catch (...) { - tuning_cache = std::make_shared(); - } -} - kernel_id program::add_kernel(const std::shared_ptr& kernelSring) { return _kernels_cache->set_kernel_source(kernelSring, false); } @@ -597,9 +583,6 @@ void program::run_graph_compilation() { apply_opt_pass(); } void program::pre_optimize_graph(bool is_internal) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "ProgramImpl::PreOptimizeGraph"); - if (!is_internal) - load_tuning_cache(); - // trim to outputs apply_opt_pass(); // ToDo remove hidden dependencies from trimm pass diff --git a/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.cpp b/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.cpp index 67d4c47f16a..9927cc69e48 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.cpp @@ -15,6 +15,20 @@ #include #include +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#include +#include +#include +#else +#include +#include +#include +#include +#endif + namespace kernel_selector { TuningCache::TuningCache(const std::string& cacheFilePath, bool createMode) @@ -273,52 +287,10 @@ void TuningCache::Save(const std::string& cacheFilePath) { needsSave = false; } -std::tuple AutoTuner::LoadKernelOnline(const TuningMode tuningMode, - const std::string& cacheFilePath, - const Params& params) { - std::lock_guard lock(mutex); - if (!onlineCache || lastCachePath != cacheFilePath) { - onlineCache = std::make_shared(cacheFilePath, PerformTuning(tuningMode)); - lastCachePath = cacheFilePath; - } - auto result = onlineCache->LoadKernel(params, PerformUpdates(tuningMode)); - - if (onlineCache->NeedsSave() && PerformUpdates(tuningMode)) { - onlineCache->Save(cacheFilePath); - } - return result; -} - -void AutoTuner::StoreKernel(const std::string& cacheFilePath, - const Params& params, - std::string implementationName, - const int tuneIndex) { - std::lock_guard lock(mutex); - if (!onlineCache || lastCachePath != cacheFilePath) { - onlineCache = std::make_shared(cacheFilePath, true); - lastCachePath = cacheFilePath; - } - onlineCache->StoreKernel(params, implementationName, tuneIndex); - onlineCache->Save(cacheFilePath); -} - -void AutoTuner::RemoveKernel(const std::string& cacheFilePath, - const Params& params) { - std::lock_guard lock(mutex); - if (!onlineCache || lastCachePath != cacheFilePath) { - onlineCache = std::make_shared(cacheFilePath, false); - lastCachePath = cacheFilePath; - } - onlineCache->RemoveKernel(params); - if (onlineCache->NeedsSave()) { - onlineCache->Save(cacheFilePath); - } -} - -std::tuple AutoTuner::LoadKernelOffline(TuningCache* deviceCache, - const Params& params) { +std::tuple AutoTuner::LoadKernelOffline(const Params& params) { std::lock_guard lock(mutex); static const uint32_t defaultComputeUnits = 24; + TuningCache* deviceCache = TuningCache::get(); if (!deviceCache) return {}; auto result = deviceCache->LoadKernel(params, false); @@ -328,4 +300,37 @@ std::tuple AutoTuner::LoadKernelOffline(TuningCache* deviceCac return result; } +TuningCache* TuningCache::get() { + static std::mutex m; + static std::shared_ptr cache_instance = nullptr; + std::lock_guard lock(m); + std::string path = "cache.json"; +#ifdef _WIN32 + char module_path[MAX_PATH]; + HMODULE hm = NULL; + GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCSTR)&TuningCache::get, + &hm); + GetModuleFileName(hm, module_path, sizeof(module_path)); + std::string bin_path(module_path); + path = bin_path.substr(0, bin_path.find_last_of("\\")) + "\\cache.json"; +#else + const char* device_info_failed_msg = "Device lookup failed"; + Dl_info dl_info; + dladdr((void*)(device_info_failed_msg), &dl_info); // NOLINT + std::string bin_path(dl_info.dli_fname); + path = bin_path.substr(0, bin_path.find_last_of("/")) + "/cache.json"; +#endif + + if (!cache_instance) { + try { + cache_instance = std::make_shared(path, false); + } catch (...) { + cache_instance = std::make_shared(); + } + } + + return cache_instance.get(); +} + } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.h b/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.h index f3d162ea5ed..f44e1522b81 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.h +++ b/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.h @@ -47,6 +47,8 @@ public: bool NeedsSave() const { return needsSave; } + static TuningCache* get(); + private: Entry LoadKernel_v1(const Params& params, uint32_t computeUnitsCount); Entry LoadKernel_v2(const Params& params, uint32_t computeUnitsCount); @@ -65,21 +67,9 @@ private: class AutoTuner { public: AutoTuner() = default; - std::tuple LoadKernelOnline(const TuningMode tuningMode, - const std::string& cacheFilePath, - const Params& params); - void StoreKernel(const std::string& cacheFilePath, - const Params& params, - std::string implementationName, - const int tuneIndex); - void RemoveKernel(const std::string& cacheFilePath, - const Params& params); - std::tuple LoadKernelOffline(TuningCache* cache, - const Params& params); + std::tuple LoadKernelOffline(const Params& params); private: - std::string lastCachePath; - std::shared_ptr onlineCache; std::mutex mutex; // Mutex to synchronize cache updates /* diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h index 43b44f08634..b80addcb97c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h @@ -443,36 +443,6 @@ struct DimTensor { DimTensor(T b, T f, T w, T z, T y, T x) : b(b), f(f), w(w), z(z), y(y), x(x) {} }; -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// AutoTunerMode -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -enum class TuningMode { - TUNING_DISABLED, // Tuning is disabled. - TUNING_USE_CACHE, // Tuning using the cached data (no on-line tuning for non-existing data). - TUNING_TUNE_AND_CACHE, // Tuning using the cached data if exist, tune and update cache otherwise.attention_params - TUNING_USE_AND_UPDATE, // Tuning using the cached data and other updating tasks. - // Performs updating tasks like removal of invalid caches, promoting to new formats, etc. - // No tuning for non-existing data. - TUNING_RETUNE_AND_CACHE // Perform tuning even if the cached data exists. -}; - -inline bool UseCached(const TuningMode& mode) { - return mode == TuningMode::TUNING_USE_CACHE - || mode == TuningMode::TUNING_TUNE_AND_CACHE - || mode == TuningMode::TUNING_USE_AND_UPDATE; -} - -inline bool PerformTuning(const TuningMode& mode) { - return mode == TuningMode::TUNING_TUNE_AND_CACHE - || mode == TuningMode::TUNING_RETUNE_AND_CACHE; -} - -inline bool PerformUpdates(const TuningMode& mode) { - return mode == TuningMode::TUNING_TUNE_AND_CACHE - || mode == TuningMode::TUNING_USE_AND_UPDATE - || mode == TuningMode::TUNING_RETUNE_AND_CACHE; -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Aliases: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.cpp b/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.cpp deleted file mode 100644 index 588623c0fbf..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "device_cache_reader.h" -#include "auto_tuner.h" -#include -#include "istreamwrapper.h" - -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#define NOMINMAX -#include -#include -#include -#include -#else -#include -#include -#include -#include -#endif - -#include -#include -#include - -namespace kernel_selector { - -std::shared_ptr CreateTuningCacheFromFile(std::string tuning_cache_path) { - if (tuning_cache_path.compare("cache.json") == 0) { -#ifdef _WIN32 - char path[MAX_PATH]; - HMODULE hm = NULL; - GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (LPCSTR)&CreateTuningCacheFromFile, - &hm); - GetModuleFileName(hm, path, sizeof(path)); - std::string bin_path(path); - tuning_cache_path = bin_path.substr(0, bin_path.find_last_of("\\")) + "\\cache.json"; -#else - const char* device_info_failed_msg = "Device lookup failed"; - Dl_info dl_info; - dladdr((void*)(device_info_failed_msg), &dl_info); // NOLINT - std::string bin_path(dl_info.dli_fname); - tuning_cache_path = bin_path.substr(0, bin_path.find_last_of("/")) + "/cache.json"; -#endif - } - - return std::make_shared(tuning_cache_path, false); -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.h b/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.h deleted file mode 100644 index 514de278edb..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.h +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include -#include - -namespace kernel_selector { -class TuningCache; - -std::shared_ptr CreateTuningCacheFromFile(std::string tuning_cache_path); - -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp index fa0266e4f70..021ff5d9071 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp @@ -71,15 +71,12 @@ KernelData kernel_selector_base::get_best_kernel(const Params& params, const opt return kernels[0]; } -KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params, - const optional_params& options, - KernelType kType) const { + +KernelsData kernel_selector_base::GetNaiveBestKernel(const KernelList& all_impls, const Params& params, const optional_params& options) const { KernelsData kernelsData; std::string kernelName; - auto allImplementations = GetAllImplementations(params, options, kType); - - for (const auto& implementation : allImplementations) { + for (const auto& implementation : all_impls) { // TODO: Unify this check with the Validate virtual method. Make // sure that the method is called here only, not in all the // GetKernelsData implementations. @@ -87,28 +84,14 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params, KernelsData kds = implementation->GetKernelsData(params, options); if (kds.size() && kds[0].kernels.size()) { -#ifdef ENABLE_ENV - const auto& it = forceKernels.find(implementation->GetName()); - if (it != forceKernels.end()) { - if (it->second == true) { - ENV_PRINTF("Force: %s\n", it->first.c_str()); - return kds; - } else { - ENV_PRINTF("Deny: %s\n", it->first.c_str()); - } - } else { -#endif - kernelsData = kds; - kernelName = implementation->GetName(); - break; -#ifdef ENABLE_ENV - } -#endif + kernelsData = kds; + kernelName = implementation->GetName(); + break; } } catch (std::runtime_error& ex) { // we have to handle it in order to avoid exception in KernelSelector as much we can kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]"; - GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl; + GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kernel: " << kernelName << " - " << ex.what() << std::endl; } } @@ -120,10 +103,11 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params, return kernelsData; } +KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params, const optional_params& options, KernelType kType) const { + return GetNaiveBestKernel(GetAllImplementations(params, options, kType), params, options); +} -KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, - const optional_params& options, - KernelType kType) const { +KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, const optional_params& options, KernelType kType) const { KernelsData kernelsData; std::string kernelName; @@ -131,16 +115,8 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, auto kernel_params = static_cast(params); bool int8_kernel = kernel_params.inputs[0].GetDType() == Datatype::INT8 || kernel_params.inputs[0].GetDType() == Datatype::UINT8; std::tuple cachedKernelConfig; - if (options.tuningParams.mode == TuningMode::TUNING_DISABLED && !int8_kernel) { // Try to load kernel/config from offline cache -#if ENABLE_OFFLINE_TUNING_CACHE - cachedKernelConfig = autoTuner.LoadKernelOffline(params.engineInfo.deviceCache.get(), params); -#else - return GetNaiveBestKernel(params, options, kType); -#endif - } else if (UseCached(options.tuningParams.mode)) { // Try to load kernel/config from on-line cache - cachedKernelConfig = autoTuner.LoadKernelOnline(options.tuningParams.mode, - options.tuningParams.cacheFilePath, - params); + if (!int8_kernel) { // Try to load kernel/config from offline cache + cachedKernelConfig = autoTuner.LoadKernelOffline(params); } bool hashFoundInCache = !std::get<0>(cachedKernelConfig).empty(); @@ -166,83 +142,7 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, } } - // Cache is not valid, remove it if performing update tasks. - if (hashFoundInCache && PerformUpdates(options.tuningParams.mode)) { - autoTuner.RemoveKernel(options.tuningParams.cacheFilePath, params); - } - - if (hashFoundInCache || // Cache is not valid - hash exists in cache but kernelsData was empty or kernel - // doesn't support the required key. - !PerformTuning(options.tuningParams.mode) || // On-line tuning is not allowed. - !options.tuningParams.runner) { // Runner is invalid - can't run on-line tuning - // Fall back to the default path. - return GetNaiveBestKernel(params, options, kType); - } - - // Start on-line tuning - assert(options.tuningParams.runner); - - for (const auto& implementation : allImplementations) { - const ParamsKey implKey = implementation->GetSupportedKey(); - if (implKey.TuningSupport()) { - try { - KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options); - auto runTimes = options.tuningParams.runner->run_kernels(kds); - - for (size_t i = 0; i < kds.size(); i++) { - kds[i].runTime = runTimes[i].count(); - if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime) { - kernelsData = {kds[i]}; - kernelName = implementation->GetName(); - } - } - } catch (std::runtime_error& ex) { - // we have to handle it in order to avoid exception in KernelSelector as much we can - kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]"; - GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl; - } - } - } - - // try to fallback to reference kernels if no optimized were found during tuning - if (!kernelsData.size()) { - for (const auto& implementation : allImplementations) { - const ParamsKey implKey = implementation->GetSupportedKey(); - // this time, check only implementations that have disabled tuning - if (!implKey.TuningSupport()) { - try { - KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options); - auto runTimes = options.tuningParams.runner->run_kernels(kds); - - for (size_t i = 0; i < kds.size(); i++) { - kds[i].runTime = runTimes[i].count(); - if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime) { - kernelsData = {kds[i]}; - kernelName = implementation->GetName(); - } - } - } catch (std::runtime_error& ex) { - // we have to handle it in order to avoid exception in KernelSelector as much we can - kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]"; - GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl; - } - } - } - } - - if (kernelsData.size()) { - kernelsData[0].kernelName = kernelName; - kernelsData[0].kernels[0].params.layerID = params.layerID; - autoTuner.StoreKernel(options.tuningParams.cacheFilePath, - params, - kernelName, - kernelsData[0].autoTuneIndex); - } else { - // Tuning failed, fall back to naive path - return GetNaiveBestKernel(params, options, kType); - } - - return kernelsData; + return GetNaiveBestKernel(allImplementations, params, options); } KernelList kernel_selector_base::GetAllImplementations(const Params& params, const optional_params& options, KernelType kType) const { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.h index c7ba14ae3bd..01b4e8ed836 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.h @@ -5,7 +5,6 @@ #pragma once #include "kernel_selector_common.h" -#include "kernel_runner_interface.h" #include "auto_tuner.h" #include #include @@ -32,13 +31,17 @@ protected: } virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const = 0; - virtual KernelsData GetNaiveBestKernel(const Params& params, - const optional_params& options, - KernelType kType) const; + KernelsData GetNaiveBestKernel(const KernelList& all_impls, + const Params& params, + const optional_params& options) const; - virtual KernelsData GetAutoTuneBestKernel(const Params& params, - const optional_params& options, - KernelType kType) const; + KernelsData GetNaiveBestKernel(const Params& params, + const optional_params& options, + KernelType kType) const; + + KernelsData GetAutoTuneBestKernel(const Params& params, + const optional_params& options, + KernelType kType) const; KernelList GetAllImplementations(const Params& params, const optional_params& options, KernelType kType) const; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp index 18cb68bced7..f159b5a6903 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp @@ -360,25 +360,6 @@ void ParamsKey::EnableArgMaxMinAxis(ArgMaxMinAxis a) { } } -void ParamsKey::EnableIndexSelectAxis(IndexSelectAxis a) { - switch (a) { - case IndexSelectAxis::X: - key.restrict.val.dedicated.idxsel.axisX = 1; - break; - case IndexSelectAxis::Y: - key.restrict.val.dedicated.idxsel.axisY = 1; - break; - case IndexSelectAxis::FEATURE: - key.restrict.val.dedicated.idxsel.axisFeature = 1; - break; - case IndexSelectAxis::BATCH: - key.restrict.val.dedicated.idxsel.axisBatch = 1; - break; - default: - break; - } -} - void ParamsKey::EnableQuantization(QuantizationType q) { switch (q) { case QuantizationType::NONE: diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h index ca8e911e3b8..1fd6cea0c66 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h @@ -25,7 +25,6 @@ using DataBitField = std::bitset; using WightsBitField = std::bitset; class JitConstants; -class TuningCache; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // fuse_params @@ -102,7 +101,6 @@ class ParamsKey { public: ParamsKey() { key.restrict.raw = 0; - key.enableTuning = 1; key.inputType.raw = 0; key.outputType.raw = 0; key.inputWeightsType.raw = 0; @@ -266,7 +264,6 @@ public: uint32_t raw; } DataTypesKey; - uint32_t enableTuning; DataTypesKey inputType; DataTypesKey outputType; DataTypesKey inputWeightsType; @@ -345,17 +342,9 @@ public: void EnableLSTMDyanmicOptionalHiddenOutput() { key.restrict.val.dedicated.lstm_dynamic.last_hidden = 1; } void EnableLSTMDyanmicOptionalCellOutput() { key.restrict.val.dedicated.lstm_dynamic.last_cell = 1; } void EnableConcatKernelPerInput() { key.restrict.val.dedicated.concat.kernelPerInput = 1; } - void DisableTuning() { key.enableTuning = 0; } void EnableConcatOneKernel() { key.restrict.val.dedicated.concat.oneKernel = 1; } void EnableArgMaxMinAxis(ArgMaxMinAxis a); - void EnableIndexSelectAxis(IndexSelectAxis a); - void EnableFusedConvEltwiseRWOutOpt(); bool Support(const ParamsKey& k) const; - bool TuningSupport() const { - if (key.enableTuning == 1) - return true; - return false; - } bool isEnabledDifferentInputWeightsTypes() const { return key.restrict.val.different_input_weights_types ? true : false; } @@ -405,7 +394,6 @@ struct EngineInfo { std::string deviceId = ""; std::string driverVersion = ""; std::vector supportedSimdSizes = {}; - std::shared_ptr deviceCache; DeviceFeaturesKey get_supported_device_features_key() const; }; @@ -663,18 +651,6 @@ protected: explicit base_params(KernelType kt) : Params(kt, ""), inputs(1), outputs(1) {} }; -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Auto tuner parameters -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class KernelRunnerInterface; -struct TuningParams { - TuningMode mode; - std::string cacheFilePath; - std::shared_ptr runner; - - TuningParams() : mode(TuningMode::TUNING_DISABLED), cacheFilePath(""), runner(nullptr) {} -}; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // optional_params //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -694,8 +670,6 @@ struct optional_params { bool allowOutputReordering = false; // allow kernel to ask graph compiler to reorder the output data before executing the next kernel - TuningParams tuningParams; - virtual ParamsKey GetSupportedKey() const; protected: diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp index 6cf22015c8b..5bb0a615002 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp @@ -18,6 +18,6 @@ binary_convolution_kernel_selector::binary_convolution_kernel_selector() { KernelsData binary_convolution_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { - return GetAutoTuneBestKernel(params, options, KernelType::BINARY_CONVOLUTION); + return GetNaiveBestKernel(params, options, KernelType::BINARY_CONVOLUTION); } } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp index 0ab9d3052aa..999008f99f7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp @@ -64,7 +64,6 @@ ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetSupportedKey() const { k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA); k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS); k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS); - k.DisableTuning(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp index f65d0c03160..102884aa8ad 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp @@ -375,7 +375,6 @@ ParamsKey Convolution_kernel_b_fs_zyx_fsv16_imad::GetSupportedKey() const { k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS); k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS); k.EnableDilation(); - k.DisableTuning(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad.cpp index 181f91ef4d7..b531cad1e02 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad.cpp @@ -87,7 +87,6 @@ ParamsKey ConvolutionKernel_imad::GetSupportedKey() const { k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA); k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS); k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS); - k.DisableTuning(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp index 98784fb4106..53a3dc5f600 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp @@ -38,7 +38,6 @@ ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetSupportedKey() co k.EnableNonBiasTerm(); k.EnableBatching(); k.EnableQuantization(QuantizationType::SYMMETRIC); - k.DisableTuning(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp index 594d1245444..96424963790 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp @@ -38,7 +38,6 @@ ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetSupportedKey() co k.EnableNonBiasTerm(); k.EnableBatching(); k.EnableQuantization(QuantizationType::SYMMETRIC); - k.DisableTuning(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp index 444d0d76f42..52569483fb6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp @@ -40,7 +40,6 @@ ParamsKey ConvolutionKernel_mmad_b_fs_yx_fsv32::GetSupportedKey() const { k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS); k.EnableDifferentTypes(); k.EnableDifferentInputWeightsTypes(); - k.DisableTuning(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp index fc5a9b868d8..ee4605227c6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp @@ -37,7 +37,6 @@ ParamsKey ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::GetSupportedKey() const { k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA); k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS); k.EnableDifferentTypes(); - k.DisableTuning(); k.EnableGroupedConvolution(); k.EnableDifferentInputWeightsTypes(); return k; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp index 945fb8d3d99..beb0b0759b3 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp @@ -43,7 +43,6 @@ ParamsKey ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::GetSupportedKey() const k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS); k.EnableDifferentTypes(); k.EnableDifferentInputWeightsTypes(); - k.DisableTuning(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp index eccdaac1a94..d944b507cd3 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp @@ -36,7 +36,6 @@ ParamsKey ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetSupportedKey() const { k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA); k.EnableDifferentTypes(); k.EnableDifferentInputWeightsTypes(); - k.DisableTuning(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_ref.cpp index 931fec63452..5d888e7783c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_ref.cpp @@ -38,7 +38,6 @@ ParamsKey ConvolutionKernel_Ref::GetSupportedKey() const { k.EnableBiasPerOutput(); k.EnableNonBiasTerm(); k.EnableBatching(); - k.DisableTuning(); k.EnableGroupedConvolution(); k.EnableQuantization(QuantizationType::SYMMETRIC); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_yxfb_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_yxfb_ref.cpp index d2dac688023..4e3c8842eee 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_yxfb_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_yxfb_ref.cpp @@ -22,7 +22,6 @@ ParamsKey ConvolutionKernel_yxfb_Ref::GetSupportedKey() const { k.EnableNonBiasTerm(); k.EnableBatching(); k.EnableDilation(); - k.DisableTuning(); k.EnableGroupedConvolution(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp index 8eae58962e3..511c2813090 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp @@ -23,7 +23,6 @@ ParamsKey DeformableConvolutionKernel_bfyx_conv::GetSupportedKey() const { k.EnableBiasPerFeature(); k.EnableNonBiasTerm(); k.EnableBatching(); - k.DisableTuning(); k.EnableGroupedConvolution(); k.EnableDeformableMode(); k.EnableDeformableMask(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp index e4cf3fdeb05..5cce5457d11 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp @@ -23,7 +23,6 @@ ParamsKey DeformableConvolutionKernel_bfyx_interp::GetSupportedKey() const { k.EnableBiasPerFeature(); k.EnableNonBiasTerm(); k.EnableBatching(); - k.DisableTuning(); k.EnableGroupedConvolution(); k.EnableDeformableMode(); k.EnableDeformableMask(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_ref.cpp index 1dfc90ebefe..f63482a336c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_ref.cpp @@ -27,7 +27,6 @@ ParamsKey DeformableConvolutionKernel_bfyx_Ref::GetSupportedKey() const { k.EnableBiasPerFeature(); k.EnableNonBiasTerm(); k.EnableBatching(); - k.DisableTuning(); k.EnableGroupedConvolution(); k.EnableDeformableMode(); k.EnableDeformableMask(); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 5a6cd0a770e..7a4ecf26e33 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -65,7 +65,6 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::enable_memory_pool, true), std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false), std::make_tuple(ov::intel_gpu::custom_outputs, std::vector{}), - std::make_tuple(ov::intel_gpu::tuning_config, ov::intel_gpu::TuningConfig{}), std::make_tuple(ov::intel_gpu::dump_graphs, ""), std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}), std::make_tuple(ov::intel_gpu::partial_build_program, false), diff --git a/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp deleted file mode 100644 index 988b10af5ca..00000000000 --- a/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "test_utils.h" - -#include -#include -#include - -#include -#include -#include - -namespace { - -enum class cache_version { - version_1, - version_1_2, // version 1 cache, but version 2 file - version_2, - version_2_invalid, - version_2_from_1, - version_2_empty -}; - -std::string reference_impl_name = "convolution_gpu_ref"; -std::string eus_marker = "__EUs__"; - -std::string cache_v1 = -R"__a({ - "__EUs__": { - "18283230515392601293": ["convolution_gpu_ref", 0] - } -})__a"; - -std::string cache_v1_2 = -R"__a({ - "version_2": { - }, - "version_1": { - "__EUs__": { - "18283230515392601293": ["convolution_gpu_ref", 0] - } - } -})__a"; - -std::string cache_v2 = -R"__a({ - "version_2": { - "__EUs__": { - "CONVOLUTION": { - "F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;1_1_1;1_1_1;1_1_1;0_0_0;1;1": ["convolution_gpu_ref", 0] - } - } - } -})__a"; - -std::string cache_v2_from_v1 = -R"__a({ - "version_2": { - "__EUs__": { - "CONVOLUTION": { - "F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;1_1_1;1_1_1;1_1_1;0_0_0;1;1": ["convolution_gpu_ref", 0] - } - } - }, - "version_1": { - "__EUs__": {} - } -})__a"; - -std::string cache_v2_invalid = -R"__a({ - "version_2": { - "__EUs__": { - "CONVOLUTION": { - "F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;1_1_1;1_1_1;1_1_1;0_0_0;1;1": ["non_existent", 0] - } - } - } -})__a"; - -std::string cache_v2_empty = -R"__a({ - "version_2": { - "__EUs__": { - "CONVOLUTION": {} - } - } -})__a"; - -std::string get_cache_version(cache_version version) { - std::string cache; - switch (version) { - case cache_version::version_1: - cache = cache_v1; - break; - case cache_version::version_1_2: - cache = cache_v1_2; - break; - case cache_version::version_2: - cache = cache_v2; - break; - case cache_version::version_2_invalid: - cache = cache_v2_invalid; - break; - case cache_version::version_2_from_1: - cache = cache_v2_from_v1; - break; - case cache_version::version_2_empty: - cache = cache_v2_empty; - break; - default: - throw std::invalid_argument("invalid cache version"); - } - return cache; -} - -std::string get_temporary_cache_file() { - static int i = 0; - std::string tmp_cache_file = "tmp_cldnn_test_cache_" + std::to_string(i) + ".json"; - i += 1; - return tmp_cache_file; -} - -template -void replace(std::string& text, const std::string& replaced, T replacement) { - auto it = text.find(replaced); - while (it != std::string::npos) { - text.replace(it, replaced.length(), std::to_string(replacement)); - it = text.find(replaced); - } -} - -void write(const std::string& filename, const std::string& text) { - std::ofstream file; - file.open(filename); - if (!file.is_open()) - throw std::runtime_error("Could not open file " + filename); - file << text; - file.close(); - if (!file) { - throw std::runtime_error("Failure writing to file " + filename); - } -} - -std::string read(const std::string& filename) { - std::stringstream ss; - std::ifstream file; - file.open(filename); - if (!file.is_open()) - throw std::runtime_error("Could not open file " + filename); - - ss << file.rdbuf(); - file.close(); - if (!file) { - throw std::runtime_error("Failure reading from file " + filename); - } - return ss.str(); -} - -void remove(const std::string& filename) { - std::remove(filename.c_str()); -} - -class cache_test_helper { -public: - cache_test_helper(cldnn::engine& engine, cache_version v) - : _engine(engine) - , _mode(ov::intel_gpu::TuningMode::tuning_disabled) - , cache_filename(get_temporary_cache_file()) - { - auto cache = get_cache_version(v); - auto eus = engine.get_device_info().execution_units_count; - replace(cache, eus_marker, eus); - - write(cache_filename, cache); - } - - virtual ~cache_test_helper() { - remove(cache_filename); - } - - cache_test_helper& with_mode(ov::intel_gpu::TuningMode mode) { - _mode = mode; - return *this; - } - - cache_test_helper& expect_cache(cache_version version) { - compare_cache = version; - return *this; - } - - cache_test_helper& expect_implementation(std::string implementation) { - compare_implementation = implementation; - return *this; - } - - cache_test_helper& expect_implementation_not(std::string implementation) { - compare_implementation = implementation; - compare_implementation.not_equal = true; - return *this; - } - - void test() { - auto w_mem = _engine.allocate_memory(cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 16, 16, 1, 1 })); - auto topology = cldnn::topology( - cldnn::input_layout("input", cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 })), - cldnn::data("weights", w_mem), - cldnn::convolution("conv", input_info("input"), { "weights" }) - ); - - ov::intel_gpu::TuningConfig tune_conf; - tune_conf.cache_file_path = cache_filename; - tune_conf.mode = _mode; - ExecutionConfig config{ - ov::intel_gpu::tuning_config(tune_conf), - ov::intel_gpu::optimize_data(true) - }; - cldnn::network network(_engine, topology, config); - auto in_mem = _engine.allocate_memory(cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 })); - network.set_input_data("input", in_mem); - network.execute(); - - if (compare_implementation.compare) { - std::string exec_impl = network.get_implementation_info("conv"); - auto precision_pos = exec_impl.find("__"); - exec_impl = exec_impl.substr(0, precision_pos); - - if (compare_implementation.not_equal) { - EXPECT_NE(exec_impl, compare_implementation.value); - } else { - ASSERT_EQ(exec_impl, compare_implementation.value); - } - } - - if (compare_cache.compare) { - auto cache = read(cache_filename); - auto expected_cache = get_cache_version(compare_cache.value); - auto eus = _engine.get_device_info().execution_units_count; - replace(expected_cache, eus_marker, eus); - - ASSERT_EQ(cache, expected_cache); - } - } - -private: - template - struct optional_compare { - bool compare; - bool not_equal; - T value; - - optional_compare() : compare(false) {} - optional_compare(T v) : compare(true), not_equal(false), value(v) {} - optional_compare(T v, bool neq) : compare(true), not_equal(neq), value(v) {} - }; - - cldnn::engine& _engine; - - ov::intel_gpu::TuningMode _mode; - - std::string cache_filename; - - optional_compare compare_cache; - optional_compare compare_implementation; -}; - -} // namespace - -class cache_version_test : public testing::TestWithParam { -public: - static std::string to_string(const testing::TestParamInfo& param) { - std::string result; - switch (param.param) { - case cache_version::version_1: - result = "version_1"; - break; - case cache_version::version_1_2: - result = "version_1_2"; - break; - case cache_version::version_2: - result = "version_2"; - break; - case cache_version::version_2_invalid: - result = "version_2_invalid"; - break; - case cache_version::version_2_from_1: - result = "version_2_from_1"; - break; - case cache_version::version_2_empty: - result = "version_2_empty"; - break; - default: - result = std::to_string(static_cast(param.param)); - break; - } - return result; - } -}; - -TEST(cache_test, no_cache_baseline) { - SCOPED_TRACE("default implementation same as reference, cache tests may provide invalid pass"); - auto& engine = tests::get_test_engine(); - auto helper = cache_test_helper(engine, cache_version::version_2); - - helper.with_mode(ov::intel_gpu::TuningMode::tuning_disabled) - .expect_implementation_not(reference_impl_name) - .test(); -} - -TEST_P(cache_version_test, use_only) { - auto version = GetParam(); - auto& engine = tests::get_test_engine(); - - cache_test_helper helper(engine, version); - helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_cache) - .expect_implementation(reference_impl_name) - .expect_cache(version) - .test(); -} - -TEST_P(cache_version_test, update) { - auto version = GetParam(); - auto ex_version = cache_version::version_2; - if (version != cache_version::version_2) { - ex_version = cache_version::version_2_from_1; - } - - auto& engine = tests::get_test_engine(); - - cache_test_helper helper(engine, version); - helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_and_update) - .expect_implementation(reference_impl_name) - .expect_cache(ex_version) - .test(); -} - -INSTANTIATE_TEST_SUITE_P( - smoke, - cache_version_test, - testing::Values(cache_version::version_1, cache_version::version_1_2, cache_version::version_2), - cache_version_test::to_string); - -TEST(cache_test, remove_invalid) { - auto& engine = tests::get_test_engine(); - - cache_test_helper helper(engine, cache_version::version_2_invalid); - helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_and_update) - .expect_implementation_not(reference_impl_name) - .expect_cache(cache_version::version_2_empty) - .test(); -}