[GPU] Move tuning cache loading to kernel selector (#15112)

* [GPU] Move tuning cache loading to kernel selector. Remove tuning modes * [GPU] Removed kernel runner
2023-01-20 15:17:12 +04:00 · 2023-01-20 15:17:12 +04:00 · 34d16b8777
commit 34d16b8777
parent c1a9152d1c
38 changed files with 77 additions and 1089 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@ -17,10 +17,6 @@
 #include <utility>
 #include <set>

-namespace kernel_selector {
-class TuningCache;
-}  // namespace kernel_selector
-
 namespace cldnn {

 struct topology;
@ -248,9 +244,6 @@ public:
    kernel::ptr get_kernel(kernel_id id);
    kernels_cache& get_kernels_cache() const;

-    void load_tuning_cache();
-    std::shared_ptr<kernel_selector::TuningCache> get_tuning_cache() const { return tuning_cache; }
-
    // returns {-1, -1} if it failed to estimate by allocating given batch size
    std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();

@ -270,7 +263,6 @@ private:
    std::vector<program_node*> outputs;
    nodes_ordering processing_order;
    std::unique_ptr<pass_manager> pm;
-    std::shared_ptr<kernel_selector::TuningCache> tuning_cache;
    bool is_body_program;
    int8_t is_subgroup_local_block_io_supported;

--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@ -48,41 +48,6 @@ static constexpr Property<bool, PropertyMutability::RW> partial_build_program{"G
 static constexpr Property<bool, PropertyMutability::RW> allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"};
 static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
 static constexpr Property<std::vector<std::string>, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"};
-
-/// @brief Tuning mode.
-enum class TuningMode {
-    /// @brief Tuning is disabled.
-    tuning_disabled,
-
-    /// @brief Tuning using the cached data (no on-line tuning for non-existing data).
-    tuning_use_cache,
-
-    /// @brief Tuning using the cached data if exist, tune and update cache otherwise.
-    tuning_tune_and_cache,
-
-    /// @brief Tuning using the cached data and update tasks.
-    /// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc.
-    /// No tuning for non-existing data.
-    tuning_use_and_update,
-
-    /// @brief Retune the cache data even if it exists.
-    tuning_retune_and_cache
-};
-
-struct TuningConfig {
-    TuningMode mode;
-    std::string cache_file_path;
-
-    TuningConfig() : mode(TuningMode::tuning_disabled), cache_file_path("") {}
-};
-
-inline std::ostream& operator<<(std::ostream& os, const TuningConfig& val) {
-    os << val.cache_file_path;
-    return os;
-}
-
-static constexpr Property<TuningConfig, PropertyMutability::RW> tuning_config{"GPU_TUNING_CONFIG"};
-
 static constexpr Property<ImplForcingMap, PropertyMutability::RW> force_implementations{"GPU_FORCE_IMPLEMENTATIONS"};
 static constexpr Property<std::string, PropertyMutability::RW> config_file{"CONFIG_FILE"};
 static constexpr Property<bool, PropertyMutability::RW> enable_lp_transformations{"LP_TRANSFORMS_MODE"};
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
@ -9,7 +9,6 @@
 #include "kernel_selector_helper.h"
 #include "arg_max_min/arg_max_min_kernel_selector.h"
 #include "arg_max_min/arg_max_min_kernel_base.h"
-#include "kernel_runner.h"

 namespace cldnn {
 namespace ocl {
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp
@ -8,7 +8,6 @@
 #include "impls/implementation_map.hpp"
 #include "intel_gpu/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
-#include "kernel_runner.h"
 #include "kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.h"
 #include "kernel_selector/kernels/binary_convolution/binary_convolution_params.h"
 #include <algorithm>
@ -75,14 +74,6 @@ public:
        uint32_t dilation_x = dilation.size() >= 1 ? dilation[dilation.size() - 1] : 1;
        params.dilation = {dilation_x, dilation_y, dilation_z};

-        const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config);
-
-        if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache ||
-            tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) {
-            optional_params.tuningParams.runner =
-                std::make_shared<gpu::kernel_runner>(impl_param.get_program().get_engine(), impl_param.get_program().get_id(), true);
-        }
-
        return {params, optional_params};
    }
 };
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp
@ -8,7 +8,6 @@
 #include "impls/implementation_map.hpp"
 #include "intel_gpu/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
-#include "kernel_runner.h"
 #include "convolution/convolution_kernel_selector.h"
 #include "convolution/convolution_params.h"
 #include <algorithm>
@ -166,14 +165,6 @@ public:

        auto& kernel_selector = kernel_selector::convolution_kernel_selector::Instance();

-        const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config);
-
-        if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache ||
-            tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) {
-            conv_optional_params.tuningParams.runner =
-                std::make_shared<gpu::kernel_runner>(arg.get_program().get_engine(), arg.get_program().get_id(), true, true);
-        }
-
        auto best_kernel = kernel_selector.get_best_kernel(conv_params, conv_optional_params);

        return make_unique<convolution_impl>(best_kernel);
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deformable_convolution.cpp
@ -7,7 +7,6 @@
 #include "impls/implementation_map.hpp"
 #include "intel_gpu/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
-#include "kernel_runner.h"
 #include "convolution/convolution_kernel_selector.h"
 #include "convolution/convolution_params.h"
 #include <algorithm>
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp
@ -10,7 +10,6 @@
 #include "fully_connected/fully_connected_params.h"

 #include "intel_gpu/runtime/error_handler.hpp"
-#include "kernel_runner.h"

 #include "intel_gpu/primitives/reorder.hpp"
 #include "intel_gpu/primitives/input_layout.hpp"
@ -119,7 +118,6 @@ public:
            params.quantization = kernel_selector::QuantizationType::NONE;
        }

-        optional_params.tuningParams.runner = std::make_shared<gpu::kernel_runner>(progam.get_engine(), progam.get_id(), true);
        return {params, optional_params};
    }

--- a/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h
+++ b/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h
@ -28,7 +28,6 @@ using namespace cldnn;

 namespace cldnn {
 enum class data_types : size_t;
-enum class tuning_mode;
 struct format;
 struct layout;
 struct program;
@ -65,7 +64,6 @@ using softmax_dim = kernel_selector::SoftmaxDim;
 using mean_subtruct_mode = kernel_selector::MeanSubtractMode;
 using mean_op = kernel_selector::MeanOp;
 using concat_axis = kernel_selector::ConcatAxis;
-using tuning_mode = kernel_selector::TuningMode;
 using sample_type = kernel_selector::ResampleType;
 using coordinate_transformation_mode = kernel_selector::CoordinateTransformationMode;
 using nearest_mode = kernel_selector::NearestMode;
@ -101,7 +99,6 @@ kernel_selector::data_layout to_data_layout(format f);
 cldnn::format from_data_layout(kernel_selector::data_layout l);
 kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped);
 cldnn::format::type from_weights_layout(kernel_selector::weights_layout l);
-kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode);
 kernel_selector::data_tensor convert_data_tensor(const layout& l, const tensor view_offset = tensor {});
 kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped = false);
 layout from_weights_tensor(const kernel_selector::weights_tensor& t);
--- a/src/plugins/intel_gpu/src/graph/kernel_runner.cpp
+++ b/src/plugins/intel_gpu/src/graph/kernel_runner.cpp
@ -1,249 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "kernel_runner.h"
-#include "runtime/kernels_cache.hpp"
-#include "intel_gpu/runtime/stream.hpp"
-#include "weight_bias_params.h"
-#include "kernel_selector_helper.h"
-#include <chrono>
-#include <vector>
-#include <limits>
-#include <algorithm>
-
-namespace cldnn {
-namespace gpu {
-
-kernel_runner::kernel_runner(engine& engine_ref, uint32_t program_id, bool weights_and_bias_exist, bool zero_points_exist)
-    : _engine(engine_ref), program_id(program_id), weights_and_bias_exist(weights_and_bias_exist), zero_points_exist(zero_points_exist) {}
-
-void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kernels_data,
-                                        kernel_arguments_data& args) {
-    const auto& base_params = *static_cast<kernel_selector::base_params*>(kernels_data[0].params.get());
-    // Prepare input buffers
-    if (input_buffers.empty()) {
-        for (const auto& input : base_params.inputs) {
-            int num_of_input_elements = static_cast<int>(input.PhysicalSize());
-            input_buffers.push_back(_engine.allocate_memory(
-                {from_data_type(input.GetDType()), format::bfyx, tensor(1, 1, num_of_input_elements, 1)}));
-        }
-    }
-    for (const auto& input : input_buffers) {
-        args.inputs.push_back(input);
-    }
-    // Prepare fused operations buffers
-    if (fused_ops_buffers.empty()) {
-        for (auto& fused_op : base_params.fused_ops) {
-            for (auto& fused_ops_input : fused_op.tensors) {
-                auto num_of_elements = static_cast<int>(fused_ops_input.PhysicalSize());
-                fused_ops_buffers.push_back(_engine.allocate_memory(
-                    { from_data_type(fused_ops_input.GetDType()), format::bfyx, tensor(1, 1, num_of_elements, 1) }));
-            }
-        }
-    }
-    for (const auto& fused_op_input : fused_ops_buffers) {
-        args.fused_op_inputs.push_back(fused_op_input);
-    }
-    // Prepare output buffer
-    if (output_buffers.empty()) {
-        for (size_t i = 0; i < base_params.outputs.size(); ++i) {
-            int num_of_output_elements = static_cast<int>(base_params.outputs[i].PhysicalSize());
-            output_buffers.push_back(_engine.allocate_memory({from_data_type(base_params.outputs[0].GetDType()),
-                                                             format::bfyx, tensor(1, 1, num_of_output_elements, 1)}));
-        }
-    }
-    for (const auto& output : output_buffers) {
-        args.outputs.push_back(output);
-    }
-
-
-    if (weights_and_bias_exist) {
-        // Prepare weight buffer
-        const auto& weights_bias_params =
-            *static_cast<kernel_selector::weight_bias_params*>(kernels_data[0].params.get());
-        int num_of_weight_elements_ifm = static_cast<int>(weights_bias_params.weights.IFM().v);
-        int num_of_weight_elements_spatial_y = static_cast<int>(weights_bias_params.weights.Y().v);
-        int num_of_weight_elements_spatial_x = static_cast<int>(weights_bias_params.weights.X().v);
-        int num_of_weight_elements_spatial = static_cast<int>(weights_bias_params.weights.PhysicalSize());
-        int num_of_weight_elements_ofm = 1;
-
-        cldnn::format::type fmt = cldnn::format::bfyx;
-
-        if (!cldnn::format::is_image_2d(from_weights_layout(weights_bias_params.weights.GetLayout()))) {
-            if (weight_buffers.empty())
-                weight_buffers.push_back(
-                    _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()),
-                                             fmt,
-                                             tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)}));
-
-            if (weight_buffers[0]->get_layout().format != fmt)
-                weight_buffers[0] =
-                    _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()),
-                                             fmt,
-                                             tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)});
-
-            while (weight_buffers[0]->get_layout().bytes_count() < weights_bias_params.weights.PhysicalSizeInBytes()) {
-                // Weights layout depends on the kernel. Multiply the buffer size by 2 until it is big enough
-                // (to avoid complex computations of the exact buffer size according to the chosen layout).
-                weight_buffers.clear();
-                num_of_weight_elements_spatial *= 2;
-                weight_buffers.push_back(
-                    _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()),
-                                             fmt,
-                                             tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)}));
-            }
-        } else {
-            weight_buffers.clear();
-            fmt = from_weights_layout(weights_bias_params.weights.GetLayout());
-            num_of_weight_elements_ofm = static_cast<int>(weights_bias_params.weights.OFM().v);
-            weight_buffers.push_back(_engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()),
-                                                              fmt,
-                                                              tensor(num_of_weight_elements_ofm,
-                                                                     num_of_weight_elements_ifm,
-                                                                     num_of_weight_elements_spatial_x,
-                                                                     num_of_weight_elements_spatial_y)}));
-        }
-        args.weights = weight_buffers[0];
-
-        // Prepare bias buffer
-        if (!weights_bias_params.bias.empty()) {
-            if (bias_buffers.empty()) {
-                int num_of_bias_elements = static_cast<int>(weights_bias_params.bias[0].PhysicalSize());
-                bias_buffers.push_back(_engine.allocate_memory({from_data_type(weights_bias_params.bias[0].GetDType()),
-                                                                format::bfyx,
-                                                                tensor(1, num_of_bias_elements, 1, 1)}));
-            }
-            args.bias = bias_buffers[0];
-        }
-        if (zero_points_exist) {
-            const auto& zero_point_params =
-                static_cast<const kernel_selector::weight_bias_zero_point_params&>(weights_bias_params);
-            if (!zero_point_params.weights_zero_points.empty()) {
-                if (weight_zero_point_buffers.empty()) {
-                    auto& weight_zero_point = zero_point_params.weights_zero_points[0];
-                    auto num_of_elements = static_cast<int>(weight_zero_point.PhysicalSize());
-                    weight_zero_point_buffers.push_back(
-                        _engine.allocate_memory({
-                            from_data_type(weight_zero_point.GetDType()),
-                            format::bfyx,
-                            tensor(1, num_of_elements, 1, 1) }));
-                }
-                args.weights_zero_points = weight_zero_point_buffers[0];
-            }
-            if (!zero_point_params.activations_zero_points.empty()) {
-                if (activation_zero_point_buffers.empty()) {
-                    auto& activation_zero_point = zero_point_params.activations_zero_points[0];
-                    auto num_of_elements = static_cast<int>(activation_zero_point.PhysicalSize());
-                    activation_zero_point_buffers.push_back(
-                        _engine.allocate_memory({
-                            from_data_type(activation_zero_point.GetDType()),
-                            format::bfyx,
-                            tensor(1, num_of_elements, 1, 1) }));
-                }
-                args.activations_zero_points = activation_zero_point_buffers[0];
-            }
-            if (!zero_point_params.compensation.empty()) {
-                if (compensation_buffers.empty()) {
-                    auto& compensation = zero_point_params.compensation[0];
-                    auto num_of_elements = static_cast<int>(compensation.PhysicalSize());
-                    compensation_buffers.push_back(
-                        _engine.allocate_memory({
-                            from_data_type(compensation.GetDType()),
-                            format::bfyx,
-                            tensor(1, num_of_elements, 1, 1) }));
-                }
-                args.compensation = compensation_buffers[0];
-            }
-        }
-    }
-}
-
-std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_selector::KernelsData& kernels_data) {
-    std::vector<std::chrono::nanoseconds> run_times;
-
-    stream::ptr stream = _engine.create_stream({});
-
-    int num_of_kernels_to_run = static_cast<int>(kernels_data.size());
-    int num_of_kernels_run = 0;
-
-    kernel_selector::KernelsData::const_iterator batch_start = kernels_data.begin();
-    kernel_selector::KernelsData::const_iterator batch_end;
-    while (num_of_kernels_to_run > 0) {
-        int current_compilation_batch = std::min(num_of_kernels_to_run, compilation_batch_size);
-        batch_end = batch_start + current_compilation_batch;
-
-        std::vector<kernel::ptr> kernels;
-        kernels_cache cache(_engine, {}, program_id);
-
-        for (auto it = batch_start; it < batch_end; it++) {
-            auto kernel_id = cache.set_kernel_source(it->kernels[0].code.kernelString, false);
-
-            kernels.push_back(cache.get_kernel(kernel_id));
-        }
-
-        kernel_arguments_data args;
-
-        prepare_kernel_args(kernels_data, args);
-        stream->finish();
-
-        int i = 0;
-        for (auto it = batch_start; it < batch_end; it++) {
-            std::vector<event::ptr> events;
-            auto kernel_run_time = std::chrono::nanoseconds::max();
-            int num_of_runs = 0;
-
-            for (int iteration = 0; iteration < runs_per_kernel; iteration++) {
-                event::ptr event;
-                try {
-                    stream->set_arguments(*kernels[i], it->kernels[0].params, args);
-                    event = stream->enqueue_kernel(*kernels[i], it->kernels[0].params, args, {});
-                } catch (std::exception& e) {
-                    std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName
-                              << " with auto-tune index " << it->autoTuneIndex << std::endl
-                              << ", error message:" << e.what();
-                } catch (...) {
-                    // Could not run this kernel. Push back NULL event (will be ignored later).
-                    std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName
-                              << " with auto-tune index " << it->autoTuneIndex << std::endl;
-                }
-                events.push_back(event);
-            }
-            stream->finish();
-
-            for (auto& event : events) {
-                if (event.get() != NULL) {
-                    auto profiling_intervals = event->get_profiling_info();
-                    for (auto const& profiling_interval : profiling_intervals) {
-                        if (profiling_interval.stage == instrumentation::profiling_stage::executing) {
-                            kernel_run_time = std::min(profiling_interval.value->value(), kernel_run_time);
-                            num_of_runs++;
-                            break;
-                        }
-                    }
-                }
-            }
-
-            if (num_of_runs > 0) {
-                run_times.push_back(kernel_run_time);
-                num_of_kernels_run += 1;
-            } else {
-                run_times.push_back(std::chrono::nanoseconds::max());
-            }
-            i++;
-        }
-
-        num_of_kernels_to_run -= current_compilation_batch;
-        batch_start += current_compilation_batch;
-    }
-
-    if (num_of_kernels_run == 0) {
-        // If all kernels failed to run throw to avoid corrupting cache
-        throw std::runtime_error("kernel_runner::run_kernels - could not run any of provided kernels");
-    }
-
-    return run_times;
-}
-
-}  // namespace gpu
-}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/kernel_runner.h
+++ b/src/plugins/intel_gpu/src/graph/kernel_runner.h
@ -1,45 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "intel_gpu/runtime/engine.hpp"
-#include "kernel_selector_common.h"
-#include "kernel_selector_helper.h"
-#include "kernel_runner_interface.h"
-#include <vector>
-
-namespace cldnn {
-namespace gpu {
-
-class kernel_runner : public kernel_selector::KernelRunnerInterface {
-public:
-    kernel_runner(engine& engine_ref, uint32_t program_id, bool weights_and_bias_exist = false, bool zero_points_exist = false);
-
-    std::vector<std::chrono::nanoseconds> run_kernels(const kernel_selector::KernelsData& kernelsData) override;
-
-private:
-    const int compilation_batch_size = 50;
-    const int runs_per_kernel = 15;
-
-    void prepare_kernel_args(const kernel_selector::KernelsData& kernels_data,
-                             kernel_arguments_data& args);
-
-    engine& _engine;
-    uint32_t program_id;
-    bool weights_and_bias_exist;
-    bool zero_points_exist;
-    std::vector<memory::cptr> input_buffers;
-    std::vector<memory::cptr> fused_ops_buffers;
-    std::vector<memory::ptr> output_buffers;
-    std::vector<memory::cptr> weight_buffers;
-    std::vector<memory::cptr> bias_buffers;
-    std::vector<memory::cptr> weight_zero_point_buffers;
-    std::vector<memory::cptr> activation_zero_point_buffers;
-    std::vector<memory::cptr> compensation_buffers;
-};
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////
-}  // namespace gpu
-}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp
+++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp
@ -837,23 +837,6 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
    }
 }

-kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode) {
-    switch (mode) {
-        case ov::intel_gpu::TuningMode::tuning_disabled:
-            return kernel_selector::tuning_mode::TUNING_DISABLED;
-        case ov::intel_gpu::TuningMode::tuning_use_cache:
-            return kernel_selector::tuning_mode::TUNING_USE_CACHE;
-        case ov::intel_gpu::TuningMode::tuning_tune_and_cache:
-            return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE;
-        case ov::intel_gpu::TuningMode::tuning_use_and_update:
-            return kernel_selector::tuning_mode::TUNING_USE_AND_UPDATE;
-        case ov::intel_gpu::TuningMode::tuning_retune_and_cache:
-            return kernel_selector::tuning_mode::TUNING_RETUNE_AND_CACHE;
-        default:
-            return kernel_selector::tuning_mode::TUNING_DISABLED;
-    }
-}
-
 kernel_selector::data_tensor convert_data_tensor(const layout& l, const tensor view_offset) {
    const auto& pad = l.data_padding;
    const auto& vals_original = l.get_partial_shape();
@ -1103,7 +1086,6 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p
    params.engineInfo.computeUnitsCount = device_info.execution_units_count;
    params.engineInfo.maxThreadsPerExecutionUnit = device_info.num_threads_per_eu > 0 ? device_info.num_threads_per_eu : 7;
    params.engineInfo.maxThreadsPerDevice = params.engineInfo.maxThreadsPerExecutionUnit * device_info.execution_units_count;
-    params.engineInfo.deviceCache = program->get_tuning_cache();
    params.engineInfo.driverVersion = device_info.driver_version;
    params.engineInfo.supportedSimdSizes = device_info.supported_simd_sizes;
    params.engineInfo.vendor_id = device_info.vendor_id;
@ -1121,10 +1103,6 @@ void set_optional_params(const program& program, kernel_selector::optional_param
                                        program.get_config().get_property(ov::intel_gpu::allow_static_input_reorder);
    params.allowInputReordering = false;
    params.allowOutputReordering = false;
-
-    const auto& tuning_config = program.get_config().get_property(ov::intel_gpu::tuning_config);
-    params.tuningParams.mode = to_tuning_mode(tuning_config.mode);
-    params.tuningParams.cacheFilePath = tuning_config.cache_file_path;
 }

 void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@ -11,7 +11,6 @@
 #include <ie_system_conf.h>

 #include "kernel_selector_helper.h"
-#include "device_cache_reader.h"
 #include "auto_tuner.h"
 #include "layout_optimizer.h"
 #include "pass_manager.h"
@ -108,7 +107,6 @@ program::program(engine& engine_ref,
      _stream(_engine.create_stream(config)),
      _config(config),
      processing_order(),
-      tuning_cache(nullptr),
      is_body_program(is_body_program),
      is_subgroup_local_block_io_supported(-1) {
    init_primitives();
@ -141,7 +139,6 @@ program::program(engine& engine_ref,
      _config(config),
      _task_executor(task_executor),
      processing_order(),
-      tuning_cache(nullptr),
      is_subgroup_local_block_io_supported(-1) {
    init_primitives();
    set_options();
@ -161,7 +158,6 @@ program::program(engine& engine)
      _stream(_engine.create_stream({})),
      _config(),
      processing_order(),
-      tuning_cache(nullptr),
      is_subgroup_local_block_io_supported(-1) { }
 program::~program() {
    query_local_block_io_supported();
@ -231,16 +227,6 @@ void program::init_kernels() {
    }
 }

-void program::load_tuning_cache() {
-    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "ProgramImpl::LoadTuningCache");
-    GPU_DEBUG_DEFINE_MEM_LOGGER("ProgramImpl::LoadTuningCache");
-    try {
-        tuning_cache = kernel_selector::CreateTuningCacheFromFile("cache.json");
-    } catch (...) {
-        tuning_cache = std::make_shared<kernel_selector::TuningCache>();
-    }
-}
-
 kernel_id program::add_kernel(const std::shared_ptr<kernel_string>& kernelSring) {
    return _kernels_cache->set_kernel_source(kernelSring, false);
 }
@ -597,9 +583,6 @@ void program::run_graph_compilation() { apply_opt_pass<compile_graph>(); }
 void program::pre_optimize_graph(bool is_internal) {
    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "ProgramImpl::PreOptimizeGraph");

-    if (!is_internal)
-        load_tuning_cache();
-
    // trim to outputs
    apply_opt_pass<trim_to_outputs>();  // ToDo remove hidden dependencies from trimm pass

--- a/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.cpp
@ -15,6 +15,20 @@
 #include <utility>
 #include <tuple>

+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#define NOMINMAX
+#include <windows.h>
+#include <SetupAPI.h>
+#include <devguid.h>
+#include <cstring>
+#else
+#include <unistd.h>
+#include <limits.h>
+#include <link.h>
+#include <dlfcn.h>
+#endif
+
 namespace kernel_selector {

 TuningCache::TuningCache(const std::string& cacheFilePath, bool createMode)
@ -273,52 +287,10 @@ void TuningCache::Save(const std::string& cacheFilePath) {
    needsSave = false;
 }

-std::tuple<std::string, int> AutoTuner::LoadKernelOnline(const TuningMode tuningMode,
-                                                         const std::string& cacheFilePath,
-                                                         const Params& params) {
-    std::lock_guard<std::mutex> lock(mutex);
-    if (!onlineCache || lastCachePath != cacheFilePath) {
-        onlineCache = std::make_shared<TuningCache>(cacheFilePath, PerformTuning(tuningMode));
-        lastCachePath = cacheFilePath;
-    }
-    auto result = onlineCache->LoadKernel(params, PerformUpdates(tuningMode));
-
-    if (onlineCache->NeedsSave() && PerformUpdates(tuningMode)) {
-        onlineCache->Save(cacheFilePath);
-    }
-    return result;
-}
-
-void AutoTuner::StoreKernel(const std::string& cacheFilePath,
-                            const Params& params,
-                            std::string implementationName,
-                            const int tuneIndex) {
-    std::lock_guard<std::mutex> lock(mutex);
-    if (!onlineCache || lastCachePath != cacheFilePath) {
-        onlineCache = std::make_shared<TuningCache>(cacheFilePath, true);
-        lastCachePath = cacheFilePath;
-    }
-    onlineCache->StoreKernel(params, implementationName, tuneIndex);
-    onlineCache->Save(cacheFilePath);
-}
-
-void AutoTuner::RemoveKernel(const std::string& cacheFilePath,
-                             const Params& params) {
-    std::lock_guard<std::mutex> lock(mutex);
-    if (!onlineCache || lastCachePath != cacheFilePath) {
-        onlineCache = std::make_shared<TuningCache>(cacheFilePath, false);
-        lastCachePath = cacheFilePath;
-    }
-    onlineCache->RemoveKernel(params);
-    if (onlineCache->NeedsSave()) {
-        onlineCache->Save(cacheFilePath);
-    }
-}
-
-std::tuple<std::string, int> AutoTuner::LoadKernelOffline(TuningCache* deviceCache,
-                                                          const Params& params) {
+std::tuple<std::string, int> AutoTuner::LoadKernelOffline(const Params& params) {
    std::lock_guard<std::mutex> lock(mutex);
    static const uint32_t defaultComputeUnits = 24;
+    TuningCache* deviceCache = TuningCache::get();
    if (!deviceCache)
        return {};
    auto result = deviceCache->LoadKernel(params, false);
@ -328,4 +300,37 @@ std::tuple<std::string, int> AutoTuner::LoadKernelOffline(TuningCache* deviceCac
    return result;
 }

+TuningCache* TuningCache::get() {
+    static std::mutex m;
+    static std::shared_ptr<TuningCache> cache_instance = nullptr;
+    std::lock_guard<std::mutex> lock(m);
+    std::string path = "cache.json";
+#ifdef _WIN32
+    char module_path[MAX_PATH];
+    HMODULE hm = NULL;
+    GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+        (LPCSTR)&TuningCache::get,
+        &hm);
+    GetModuleFileName(hm, module_path, sizeof(module_path));
+    std::string bin_path(module_path);
+    path = bin_path.substr(0, bin_path.find_last_of("\\")) + "\\cache.json";
+#else
+    const char* device_info_failed_msg = "Device lookup failed";
+    Dl_info dl_info;
+    dladdr((void*)(device_info_failed_msg), &dl_info);  // NOLINT
+    std::string bin_path(dl_info.dli_fname);
+    path = bin_path.substr(0, bin_path.find_last_of("/")) + "/cache.json";
+#endif
+
+    if (!cache_instance) {
+        try {
+            cache_instance = std::make_shared<kernel_selector::TuningCache>(path, false);
+        } catch (...) {
+            cache_instance = std::make_shared<kernel_selector::TuningCache>();
+        }
+    }
+
+    return cache_instance.get();
+}
+
 }  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/auto_tuner.h
@ -47,6 +47,8 @@ public:

    bool NeedsSave() const { return needsSave; }

+    static TuningCache* get();
+
 private:
    Entry LoadKernel_v1(const Params& params, uint32_t computeUnitsCount);
    Entry LoadKernel_v2(const Params& params, uint32_t computeUnitsCount);
@ -65,21 +67,9 @@ private:
 class AutoTuner {
 public:
    AutoTuner() = default;
-    std::tuple<std::string, int> LoadKernelOnline(const TuningMode tuningMode,
-                                                  const std::string& cacheFilePath,
-                                                  const Params& params);
-    void StoreKernel(const std::string& cacheFilePath,
-                     const Params& params,
-                     std::string implementationName,
-                     const int tuneIndex);
-    void RemoveKernel(const std::string& cacheFilePath,
-                      const Params& params);
-    std::tuple<std::string, int> LoadKernelOffline(TuningCache* cache,
-                                                   const Params& params);
+    std::tuple<std::string, int> LoadKernelOffline(const Params& params);

 private:
-    std::string lastCachePath;
-    std::shared_ptr<TuningCache> onlineCache;
    std::mutex mutex;  // Mutex to synchronize cache updates

    /*
--- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h
@ -443,36 +443,6 @@ struct DimTensor {
    DimTensor(T b, T f, T w, T z, T y, T x) : b(b), f(f), w(w), z(z), y(y), x(x) {}
 };

-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// AutoTunerMode
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-enum class TuningMode {
-    TUNING_DISABLED,         // Tuning is disabled.
-    TUNING_USE_CACHE,        // Tuning using the cached data (no on-line tuning for non-existing data).
-    TUNING_TUNE_AND_CACHE,   // Tuning using the cached data if exist, tune and update cache otherwise.attention_params
-    TUNING_USE_AND_UPDATE,   // Tuning using the cached data and other updating tasks.
-                             // Performs updating tasks like removal of invalid caches, promoting to new formats, etc.
-                             // No tuning for non-existing data.
-    TUNING_RETUNE_AND_CACHE  // Perform tuning even if the cached data exists.
-};
-
-inline bool UseCached(const TuningMode& mode) {
-    return mode == TuningMode::TUNING_USE_CACHE
-        || mode == TuningMode::TUNING_TUNE_AND_CACHE
-        || mode == TuningMode::TUNING_USE_AND_UPDATE;
-}
-
-inline bool PerformTuning(const TuningMode& mode) {
-    return mode == TuningMode::TUNING_TUNE_AND_CACHE
-        || mode == TuningMode::TUNING_RETUNE_AND_CACHE;
-}
-
-inline bool PerformUpdates(const TuningMode& mode) {
-    return mode == TuningMode::TUNING_TUNE_AND_CACHE
-        || mode == TuningMode::TUNING_USE_AND_UPDATE
-        || mode == TuningMode::TUNING_RETUNE_AND_CACHE;
-}
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Aliases:
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.cpp
@ -1,52 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "device_cache_reader.h"
-#include "auto_tuner.h"
-#include <limits>
-#include "istreamwrapper.h"
-
-#ifdef _WIN32
-#define WIN32_LEAN_AND_MEAN
-#define NOMINMAX
-#include <windows.h>
-#include <SetupAPI.h>
-#include <devguid.h>
-#include <cstring>
-#else
-#include <unistd.h>
-#include <limits.h>
-#include <link.h>
-#include <dlfcn.h>
-#endif
-
-#include <fstream>
-#include <iostream>
-#include <utility>
-
-namespace kernel_selector {
-
-std::shared_ptr<kernel_selector::TuningCache> CreateTuningCacheFromFile(std::string tuning_cache_path) {
-    if (tuning_cache_path.compare("cache.json") == 0) {
-#ifdef _WIN32
-        char path[MAX_PATH];
-        HMODULE hm = NULL;
-        GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
-            (LPCSTR)&CreateTuningCacheFromFile,
-            &hm);
-        GetModuleFileName(hm, path, sizeof(path));
-        std::string bin_path(path);
-        tuning_cache_path = bin_path.substr(0, bin_path.find_last_of("\\")) + "\\cache.json";
-#else
-        const char* device_info_failed_msg = "Device lookup failed";
-        Dl_info dl_info;
-        dladdr((void*)(device_info_failed_msg), &dl_info);  // NOLINT
-        std::string bin_path(dl_info.dli_fname);
-        tuning_cache_path = bin_path.substr(0, bin_path.find_last_of("/")) + "/cache.json";
-#endif
-    }
-
-    return std::make_shared<kernel_selector::TuningCache>(tuning_cache_path, false);
-}
-}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/device_cache_reader.h
@ -1,14 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include <memory>
-#include <string>
-
-namespace kernel_selector {
-class TuningCache;
-
-std::shared_ptr<kernel_selector::TuningCache> CreateTuningCacheFromFile(std::string tuning_cache_path);
-
-}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp
@ -71,15 +71,12 @@ KernelData kernel_selector_base::get_best_kernel(const Params& params, const opt
    return kernels[0];
 }

-KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params,
-                                                     const optional_params& options,
-                                                     KernelType kType) const {
+
+KernelsData kernel_selector_base::GetNaiveBestKernel(const KernelList& all_impls, const Params& params, const optional_params& options) const {
    KernelsData kernelsData;
    std::string kernelName;

-    auto allImplementations = GetAllImplementations(params, options, kType);
-
-    for (const auto& implementation : allImplementations) {
+    for (const auto& implementation : all_impls) {
        // TODO: Unify this check with the Validate virtual method. Make
        // sure that the method is called here only, not in all the
        // GetKernelsData implementations.
@ -87,28 +84,14 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params,
            KernelsData kds = implementation->GetKernelsData(params, options);

            if (kds.size() && kds[0].kernels.size()) {
-#ifdef ENABLE_ENV
-                const auto& it = forceKernels.find(implementation->GetName());
-                if (it != forceKernels.end()) {
-                    if (it->second == true) {
-                        ENV_PRINTF("Force: %s\n", it->first.c_str());
-                        return kds;
-                    } else {
-                        ENV_PRINTF("Deny: %s\n", it->first.c_str());
-                    }
-                } else {
-#endif
-                    kernelsData = kds;
-                    kernelName = implementation->GetName();
-                    break;
-#ifdef ENABLE_ENV
-                }
-#endif
+                kernelsData = kds;
+                kernelName = implementation->GetName();
+                break;
            }
        } catch (std::runtime_error& ex) {
            // we have to handle it in order to avoid exception in KernelSelector as much we can
            kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
-            GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl;
+            GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kernel: " << kernelName << " - " << ex.what() << std::endl;
        }
    }

@ -120,10 +103,11 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params,

    return kernelsData;
 }
+KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params, const optional_params& options, KernelType kType) const {
+    return GetNaiveBestKernel(GetAllImplementations(params, options, kType), params, options);
+}

-KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params,
-                                                        const optional_params& options,
-                                                        KernelType kType) const {
+KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, const optional_params& options, KernelType kType) const {
    KernelsData kernelsData;
    std::string kernelName;

@ -131,16 +115,8 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params,
    auto kernel_params = static_cast<const base_params&>(params);
    bool int8_kernel = kernel_params.inputs[0].GetDType() == Datatype::INT8 || kernel_params.inputs[0].GetDType() == Datatype::UINT8;
    std::tuple<std::string, int> cachedKernelConfig;
-    if (options.tuningParams.mode == TuningMode::TUNING_DISABLED && !int8_kernel) {  // Try to load kernel/config from offline cache
-#if ENABLE_OFFLINE_TUNING_CACHE
-        cachedKernelConfig = autoTuner.LoadKernelOffline(params.engineInfo.deviceCache.get(), params);
-#else
-        return GetNaiveBestKernel(params, options, kType);
-#endif
-    } else if (UseCached(options.tuningParams.mode)) {  // Try to load kernel/config from on-line cache
-        cachedKernelConfig = autoTuner.LoadKernelOnline(options.tuningParams.mode,
-                                                        options.tuningParams.cacheFilePath,
-                                                        params);
+    if (!int8_kernel) {  // Try to load kernel/config from offline cache
+        cachedKernelConfig = autoTuner.LoadKernelOffline(params);
    }
    bool hashFoundInCache = !std::get<0>(cachedKernelConfig).empty();

@ -166,83 +142,7 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params,
        }
    }

-    // Cache is not valid, remove it if performing update tasks.
-    if (hashFoundInCache && PerformUpdates(options.tuningParams.mode)) {
-        autoTuner.RemoveKernel(options.tuningParams.cacheFilePath, params);
-    }
-
-    if (hashFoundInCache ||  // Cache is not valid - hash exists in cache but kernelsData was empty or kernel
-                             // doesn't support the required key.
-        !PerformTuning(options.tuningParams.mode) ||  // On-line tuning is not allowed.
-        !options.tuningParams.runner) {  // Runner is invalid - can't run on-line tuning
-        // Fall back to the default path.
-        return GetNaiveBestKernel(params, options, kType);
-    }
-
-    // Start on-line tuning
-    assert(options.tuningParams.runner);
-
-    for (const auto& implementation : allImplementations) {
-        const ParamsKey implKey = implementation->GetSupportedKey();
-        if (implKey.TuningSupport()) {
-            try {
-                KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options);
-                auto runTimes = options.tuningParams.runner->run_kernels(kds);
-
-                for (size_t i = 0; i < kds.size(); i++) {
-                    kds[i].runTime = runTimes[i].count();
-                    if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime) {
-                        kernelsData = {kds[i]};
-                        kernelName = implementation->GetName();
-                    }
-                }
-            } catch (std::runtime_error& ex) {
-                // we have to handle it in order to avoid exception in KernelSelector as much we can
-                kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
-                GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl;
-            }
-        }
-    }
-
-    // try to fallback to reference kernels if no optimized were found during tuning
-    if (!kernelsData.size()) {
-        for (const auto& implementation : allImplementations) {
-            const ParamsKey implKey = implementation->GetSupportedKey();
-            // this time, check only implementations that have disabled tuning
-            if (!implKey.TuningSupport()) {
-                try {
-                    KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options);
-                    auto runTimes = options.tuningParams.runner->run_kernels(kds);
-
-                    for (size_t i = 0; i < kds.size(); i++) {
-                        kds[i].runTime = runTimes[i].count();
-                        if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime) {
-                            kernelsData = {kds[i]};
-                            kernelName = implementation->GetName();
-                        }
-                    }
-                } catch (std::runtime_error& ex) {
-                    // we have to handle it in order to avoid exception in KernelSelector as much we can
-                    kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
-                    GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl;
-                }
-            }
-        }
-    }
-
-    if (kernelsData.size()) {
-        kernelsData[0].kernelName = kernelName;
-        kernelsData[0].kernels[0].params.layerID = params.layerID;
-        autoTuner.StoreKernel(options.tuningParams.cacheFilePath,
-                                params,
-                                kernelName,
-                                kernelsData[0].autoTuneIndex);
-    } else {
-        // Tuning failed, fall back to naive path
-        return GetNaiveBestKernel(params, options, kType);
-    }
-
-    return kernelsData;
+    return GetNaiveBestKernel(allImplementations, params, options);
 }

 KernelList kernel_selector_base::GetAllImplementations(const Params& params, const optional_params& options, KernelType kType) const {
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.h
@ -5,7 +5,6 @@
 #pragma once

 #include "kernel_selector_common.h"
-#include "kernel_runner_interface.h"
 #include "auto_tuner.h"
 #include <vector>
 #include <memory>
@ -32,13 +31,17 @@ protected:
    }
    virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const = 0;

-    virtual KernelsData GetNaiveBestKernel(const Params& params,
-                                           const optional_params& options,
-                                           KernelType kType) const;
+    KernelsData GetNaiveBestKernel(const KernelList& all_impls,
+                                   const Params& params,
+                                   const optional_params& options) const;

-    virtual KernelsData GetAutoTuneBestKernel(const Params& params,
-                                              const optional_params& options,
-                                              KernelType kType) const;
+    KernelsData GetNaiveBestKernel(const Params& params,
+                                   const optional_params& options,
+                                   KernelType kType) const;
+
+    KernelsData GetAutoTuneBestKernel(const Params& params,
+                                      const optional_params& options,
+                                      KernelType kType) const;

    KernelList GetAllImplementations(const Params& params, const optional_params& options, KernelType kType) const;

--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp
@ -360,25 +360,6 @@ void ParamsKey::EnableArgMaxMinAxis(ArgMaxMinAxis a) {
    }
 }

-void ParamsKey::EnableIndexSelectAxis(IndexSelectAxis a) {
-    switch (a) {
-        case IndexSelectAxis::X:
-            key.restrict.val.dedicated.idxsel.axisX = 1;
-            break;
-        case IndexSelectAxis::Y:
-            key.restrict.val.dedicated.idxsel.axisY = 1;
-            break;
-        case IndexSelectAxis::FEATURE:
-            key.restrict.val.dedicated.idxsel.axisFeature = 1;
-            break;
-        case IndexSelectAxis::BATCH:
-            key.restrict.val.dedicated.idxsel.axisBatch = 1;
-            break;
-        default:
-            break;
-    }
-}
-
 void ParamsKey::EnableQuantization(QuantizationType q) {
    switch (q) {
        case QuantizationType::NONE:
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h
@ -25,7 +25,6 @@ using DataBitField = std::bitset<DataLayout::DataLayoutCount>;
 using WightsBitField = std::bitset<WeightsLayout::WeightsLayoutCount>;

 class JitConstants;
-class TuningCache;

 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // fuse_params
@ -102,7 +101,6 @@ class ParamsKey {
 public:
    ParamsKey() {
        key.restrict.raw = 0;
-        key.enableTuning = 1;
        key.inputType.raw = 0;
        key.outputType.raw = 0;
        key.inputWeightsType.raw = 0;
@ -266,7 +264,6 @@ public:
            uint32_t raw;
        } DataTypesKey;

-        uint32_t enableTuning;
        DataTypesKey inputType;
        DataTypesKey outputType;
        DataTypesKey inputWeightsType;
@ -345,17 +342,9 @@ public:
    void EnableLSTMDyanmicOptionalHiddenOutput() { key.restrict.val.dedicated.lstm_dynamic.last_hidden = 1; }
    void EnableLSTMDyanmicOptionalCellOutput() { key.restrict.val.dedicated.lstm_dynamic.last_cell = 1; }
    void EnableConcatKernelPerInput() { key.restrict.val.dedicated.concat.kernelPerInput = 1; }
-    void DisableTuning() { key.enableTuning = 0; }
    void EnableConcatOneKernel() { key.restrict.val.dedicated.concat.oneKernel = 1; }
    void EnableArgMaxMinAxis(ArgMaxMinAxis a);
-    void EnableIndexSelectAxis(IndexSelectAxis a);
-    void EnableFusedConvEltwiseRWOutOpt();
    bool Support(const ParamsKey& k) const;
-    bool TuningSupport() const {
-        if (key.enableTuning == 1)
-            return true;
-        return false;
-    }
    bool isEnabledDifferentInputWeightsTypes() const {
        return key.restrict.val.different_input_weights_types ? true : false;
    }
@ -405,7 +394,6 @@ struct EngineInfo {
    std::string deviceId = "";
    std::string driverVersion = "";
    std::vector<size_t> supportedSimdSizes = {};
-    std::shared_ptr<TuningCache> deviceCache;

    DeviceFeaturesKey get_supported_device_features_key() const;
 };
@ -663,18 +651,6 @@ protected:
    explicit base_params(KernelType kt) : Params(kt, ""), inputs(1), outputs(1) {}
 };

-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// Auto tuner parameters
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-class KernelRunnerInterface;
-struct TuningParams {
-    TuningMode mode;
-    std::string cacheFilePath;
-    std::shared_ptr<KernelRunnerInterface> runner;
-
-    TuningParams() : mode(TuningMode::TUNING_DISABLED), cacheFilePath(""), runner(nullptr) {}
-};
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -694,8 +670,6 @@ struct optional_params {
    bool allowOutputReordering =
        false;  // allow kernel to ask graph compiler to reorder the output data before executing the next kernel

-    TuningParams tuningParams;
-
    virtual ParamsKey GetSupportedKey() const;

 protected:
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp
@ -18,6 +18,6 @@ binary_convolution_kernel_selector::binary_convolution_kernel_selector() {

 KernelsData binary_convolution_kernel_selector::GetBestKernels(const Params& params,
                                                               const optional_params& options) const {
-    return GetAutoTuneBestKernel(params, options, KernelType::BINARY_CONVOLUTION);
+    return GetNaiveBestKernel(params, options, KernelType::BINARY_CONVOLUTION);
 }
 }  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp
@ -64,7 +64,6 @@ ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetSupportedKey() const {
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA);
    k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS);
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS);
-    k.DisableTuning();
    return k;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp
@ -375,7 +375,6 @@ ParamsKey Convolution_kernel_b_fs_zyx_fsv16_imad::GetSupportedKey() const {
    k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS);
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS);
    k.EnableDilation();
-    k.DisableTuning();
    return k;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad.cpp
@ -87,7 +87,6 @@ ParamsKey ConvolutionKernel_imad::GetSupportedKey() const {
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA);
    k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS);
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS);
-    k.DisableTuning();
    return k;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp
@ -38,7 +38,6 @@ ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetSupportedKey() co
    k.EnableNonBiasTerm();
    k.EnableBatching();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
-    k.DisableTuning();
    return k;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp
@ -38,7 +38,6 @@ ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetSupportedKey() co
    k.EnableNonBiasTerm();
    k.EnableBatching();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
-    k.DisableTuning();
    return k;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp
@ -40,7 +40,6 @@ ParamsKey ConvolutionKernel_mmad_b_fs_yx_fsv32::GetSupportedKey() const {
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS);
    k.EnableDifferentTypes();
    k.EnableDifferentInputWeightsTypes();
-    k.DisableTuning();
    return k;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp
@ -37,7 +37,6 @@ ParamsKey ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::GetSupportedKey() const {
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA);
    k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS);
    k.EnableDifferentTypes();
-    k.DisableTuning();
    k.EnableGroupedConvolution();
    k.EnableDifferentInputWeightsTypes();
    return k;
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp
@ -43,7 +43,6 @@ ParamsKey ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::GetSupportedKey() const
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS);
    k.EnableDifferentTypes();
    k.EnableDifferentInputWeightsTypes();
-    k.DisableTuning();
    return k;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp
@ -36,7 +36,6 @@ ParamsKey ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetSupportedKey() const {
    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA);
    k.EnableDifferentTypes();
    k.EnableDifferentInputWeightsTypes();
-    k.DisableTuning();
    return k;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_ref.cpp
@ -38,7 +38,6 @@ ParamsKey ConvolutionKernel_Ref::GetSupportedKey() const {
    k.EnableBiasPerOutput();
    k.EnableNonBiasTerm();
    k.EnableBatching();
-    k.DisableTuning();
    k.EnableGroupedConvolution();

    k.EnableQuantization(QuantizationType::SYMMETRIC);
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_yxfb_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_yxfb_ref.cpp
@ -22,7 +22,6 @@ ParamsKey ConvolutionKernel_yxfb_Ref::GetSupportedKey() const {
    k.EnableNonBiasTerm();
    k.EnableBatching();
    k.EnableDilation();
-    k.DisableTuning();
    k.EnableGroupedConvolution();
    return k;
 }
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp
@ -23,7 +23,6 @@ ParamsKey DeformableConvolutionKernel_bfyx_conv::GetSupportedKey() const {
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableBatching();
-    k.DisableTuning();
    k.EnableGroupedConvolution();
    k.EnableDeformableMode();
    k.EnableDeformableMask();
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_interp.cpp
@ -23,7 +23,6 @@ ParamsKey DeformableConvolutionKernel_bfyx_interp::GetSupportedKey() const {
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableBatching();
-    k.DisableTuning();
    k.EnableGroupedConvolution();
    k.EnableDeformableMode();
    k.EnableDeformableMask();
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/deformable_convolution_kernel_bfyx_ref.cpp
@ -27,7 +27,6 @@ ParamsKey DeformableConvolutionKernel_bfyx_Ref::GetSupportedKey() const {
    k.EnableBiasPerFeature();
    k.EnableNonBiasTerm();
    k.EnableBatching();
-    k.DisableTuning();
    k.EnableGroupedConvolution();
    k.EnableDeformableMode();
    k.EnableDeformableMask();
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@ -65,7 +65,6 @@ void ExecutionConfig::set_default() {
        std::make_tuple(ov::intel_gpu::enable_memory_pool, true),
        std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false),
        std::make_tuple(ov::intel_gpu::custom_outputs, std::vector<std::string>{}),
-        std::make_tuple(ov::intel_gpu::tuning_config, ov::intel_gpu::TuningConfig{}),
        std::make_tuple(ov::intel_gpu::dump_graphs, ""),
        std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}),
        std::make_tuple(ov::intel_gpu::partial_build_program, false),
--- a/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/cache_test.cpp
@ -1,353 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_utils.h"
-
-#include <intel_gpu/primitives/input_layout.hpp>
-#include <intel_gpu/primitives/convolution.hpp>
-#include <intel_gpu/primitives/data.hpp>
-
-#include <iostream>
-#include <fstream>
-#include <string>
-
-namespace {
-
-enum class cache_version {
-    version_1,
-    version_1_2,  // version 1 cache, but version 2 file
-    version_2,
-    version_2_invalid,
-    version_2_from_1,
-    version_2_empty
-};
-
-std::string reference_impl_name = "convolution_gpu_ref";
-std::string eus_marker = "__EUs__";
-
-std::string cache_v1 =
-R"__a({
-    "__EUs__": {
-        "18283230515392601293": ["convolution_gpu_ref", 0]
-    }
-})__a";
-
-std::string cache_v1_2 =
-R"__a({
-    "version_2": {
-    },
-    "version_1": {
-        "__EUs__": {
-            "18283230515392601293": ["convolution_gpu_ref", 0]
-        }
-    }
-})__a";
-
-std::string cache_v2 =
-R"__a({
-    "version_2": {
-        "__EUs__": {
-            "CONVOLUTION": {
-                "F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;1_1_1;1_1_1;1_1_1;0_0_0;1;1": ["convolution_gpu_ref", 0]
-            }
-        }
-    }
-})__a";
-
-std::string cache_v2_from_v1 =
-R"__a({
-    "version_2": {
-        "__EUs__": {
-            "CONVOLUTION": {
-                "F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;1_1_1;1_1_1;1_1_1;0_0_0;1;1": ["convolution_gpu_ref", 0]
-            }
-        }
-    },
-    "version_1": {
-        "__EUs__": {}
-    }
-})__a";
-
-std::string cache_v2_invalid =
-R"__a({
-    "version_2": {
-        "__EUs__": {
-            "CONVOLUTION": {
-                "F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;F32_BFYX_v3_p0_0_v3_p0_0_v16_p0_0_v1_p0_0;1_1_1;1_1_1;1_1_1;0_0_0;1;1": ["non_existent", 0]
-            }
-        }
-    }
-})__a";
-
-std::string cache_v2_empty =
-R"__a({
-    "version_2": {
-        "__EUs__": {
-            "CONVOLUTION": {}
-        }
-    }
-})__a";
-
-std::string get_cache_version(cache_version version) {
-    std::string cache;
-    switch (version) {
-    case cache_version::version_1:
-        cache = cache_v1;
-        break;
-    case cache_version::version_1_2:
-        cache = cache_v1_2;
-        break;
-    case cache_version::version_2:
-        cache = cache_v2;
-        break;
-    case cache_version::version_2_invalid:
-        cache = cache_v2_invalid;
-        break;
-    case cache_version::version_2_from_1:
-        cache = cache_v2_from_v1;
-        break;
-    case cache_version::version_2_empty:
-        cache = cache_v2_empty;
-        break;
-    default:
-        throw std::invalid_argument("invalid cache version");
-    }
-    return cache;
-}
-
-std::string get_temporary_cache_file() {
-    static int i = 0;
-    std::string tmp_cache_file = "tmp_cldnn_test_cache_" + std::to_string(i) + ".json";
-    i += 1;
-    return tmp_cache_file;
-}
-
-template <typename T>
-void replace(std::string& text, const std::string& replaced, T replacement) {
-    auto it = text.find(replaced);
-    while (it != std::string::npos) {
-        text.replace(it, replaced.length(), std::to_string(replacement));
-        it = text.find(replaced);
-    }
-}
-
-void write(const std::string& filename, const std::string& text) {
-    std::ofstream file;
-    file.open(filename);
-    if (!file.is_open())
-        throw std::runtime_error("Could not open file " + filename);
-    file << text;
-    file.close();
-    if (!file) {
-        throw std::runtime_error("Failure writing to file " + filename);
-    }
-}
-
-std::string read(const std::string& filename) {
-    std::stringstream ss;
-    std::ifstream file;
-    file.open(filename);
-    if (!file.is_open())
-        throw std::runtime_error("Could not open file " + filename);
-
-    ss << file.rdbuf();
-    file.close();
-    if (!file) {
-        throw std::runtime_error("Failure reading from file " + filename);
-    }
-    return ss.str();
-}
-
-void remove(const std::string& filename) {
-    std::remove(filename.c_str());
-}
-
-class cache_test_helper {
-public:
-    cache_test_helper(cldnn::engine& engine, cache_version v)
-        : _engine(engine)
-        , _mode(ov::intel_gpu::TuningMode::tuning_disabled)
-        , cache_filename(get_temporary_cache_file())
-    {
-        auto cache = get_cache_version(v);
-        auto eus = engine.get_device_info().execution_units_count;
-        replace(cache, eus_marker, eus);
-
-        write(cache_filename, cache);
-    }
-
-    virtual ~cache_test_helper() {
-        remove(cache_filename);
-    }
-
-    cache_test_helper& with_mode(ov::intel_gpu::TuningMode mode) {
-        _mode = mode;
-        return *this;
-    }
-
-    cache_test_helper& expect_cache(cache_version version) {
-        compare_cache = version;
-        return *this;
-    }
-
-    cache_test_helper& expect_implementation(std::string implementation) {
-        compare_implementation = implementation;
-        return *this;
-    }
-
-    cache_test_helper& expect_implementation_not(std::string implementation) {
-        compare_implementation = implementation;
-        compare_implementation.not_equal = true;
-        return *this;
-    }
-
-    void test() {
-        auto w_mem = _engine.allocate_memory(cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 16, 16, 1, 1 }));
-        auto topology = cldnn::topology(
-            cldnn::input_layout("input", cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 })),
-            cldnn::data("weights", w_mem),
-            cldnn::convolution("conv", input_info("input"), { "weights" })
-        );
-
-        ov::intel_gpu::TuningConfig tune_conf;
-        tune_conf.cache_file_path = cache_filename;
-        tune_conf.mode = _mode;
-        ExecutionConfig config{
-            ov::intel_gpu::tuning_config(tune_conf),
-            ov::intel_gpu::optimize_data(true)
-        };
-        cldnn::network network(_engine, topology, config);
-        auto in_mem = _engine.allocate_memory(cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 }));
-        network.set_input_data("input", in_mem);
-        network.execute();
-
-        if (compare_implementation.compare) {
-            std::string exec_impl = network.get_implementation_info("conv");
-            auto precision_pos = exec_impl.find("__");
-            exec_impl = exec_impl.substr(0, precision_pos);
-
-            if (compare_implementation.not_equal) {
-                EXPECT_NE(exec_impl, compare_implementation.value);
-            } else {
-                ASSERT_EQ(exec_impl, compare_implementation.value);
-            }
-        }
-
-        if (compare_cache.compare) {
-            auto cache = read(cache_filename);
-            auto expected_cache = get_cache_version(compare_cache.value);
-            auto eus = _engine.get_device_info().execution_units_count;
-            replace(expected_cache, eus_marker, eus);
-
-            ASSERT_EQ(cache, expected_cache);
-        }
-    }
-
-private:
-    template <typename T>
-    struct optional_compare {
-        bool compare;
-        bool not_equal;
-        T value;
-
-        optional_compare() : compare(false) {}
-        optional_compare(T v) : compare(true), not_equal(false), value(v) {}
-        optional_compare(T v, bool neq) : compare(true), not_equal(neq), value(v) {}
-    };
-
-    cldnn::engine& _engine;
-
-    ov::intel_gpu::TuningMode _mode;
-
-    std::string cache_filename;
-
-    optional_compare<cache_version> compare_cache;
-    optional_compare<std::string> compare_implementation;
-};
-
-}  // namespace
-
-class cache_version_test : public testing::TestWithParam<cache_version> {
-public:
-    static std::string to_string(const testing::TestParamInfo<cache_version>& param) {
-        std::string result;
-        switch (param.param) {
-        case cache_version::version_1:
-            result = "version_1";
-            break;
-        case cache_version::version_1_2:
-            result = "version_1_2";
-            break;
-        case cache_version::version_2:
-            result = "version_2";
-            break;
-        case cache_version::version_2_invalid:
-            result = "version_2_invalid";
-            break;
-        case cache_version::version_2_from_1:
-            result = "version_2_from_1";
-            break;
-        case cache_version::version_2_empty:
-            result = "version_2_empty";
-            break;
-        default:
-            result = std::to_string(static_cast<int>(param.param));
-            break;
-        }
-        return result;
-    }
-};
-
-TEST(cache_test, no_cache_baseline) {
-    SCOPED_TRACE("default implementation same as reference, cache tests may provide invalid pass");
-    auto& engine = tests::get_test_engine();
-    auto helper = cache_test_helper(engine, cache_version::version_2);
-
-    helper.with_mode(ov::intel_gpu::TuningMode::tuning_disabled)
-        .expect_implementation_not(reference_impl_name)
-        .test();
-}
-
-TEST_P(cache_version_test, use_only) {
-    auto version = GetParam();
-    auto& engine = tests::get_test_engine();
-
-    cache_test_helper helper(engine, version);
-    helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_cache)
-        .expect_implementation(reference_impl_name)
-        .expect_cache(version)
-        .test();
-}
-
-TEST_P(cache_version_test, update) {
-    auto version = GetParam();
-    auto ex_version = cache_version::version_2;
-    if (version != cache_version::version_2) {
-        ex_version = cache_version::version_2_from_1;
-    }
-
-    auto& engine = tests::get_test_engine();
-
-    cache_test_helper helper(engine, version);
-    helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_and_update)
-        .expect_implementation(reference_impl_name)
-        .expect_cache(ex_version)
-        .test();
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    smoke,
-    cache_version_test,
-    testing::Values(cache_version::version_1, cache_version::version_1_2, cache_version::version_2),
-    cache_version_test::to_string);
-
-TEST(cache_test, remove_invalid) {
-    auto& engine = tests::get_test_engine();
-
-    cache_test_helper helper(engine, cache_version::version_2_invalid);
-    helper.with_mode(ov::intel_gpu::TuningMode::tuning_use_and_update)
-        .expect_implementation_not(reference_impl_name)
-        .expect_cache(cache_version::version_2_empty)
-        .test();
-}