[GPU] Remote context reuse and internal config update (#14635)

2023-01-11 15:14:03 +04:00 · 2023-01-11 15:14:03 +04:00 · 4feaeaad68
commit 4feaeaad68
parent 1d59a5a29b
198 changed files with 4477 additions and 4876 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/build_options.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/build_options.hpp
@ -1,488 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "intel_gpu/runtime/engine.hpp"
-#include "intel_gpu/primitives/implementation_desc.hpp"
-
-#include "topology.hpp"
-
-#include <memory>
-#include <vector>
-#include <string>
-#include <map>
-#include <utility>
-
-namespace cldnn {
-
-/// @addtogroup cpp_api C++ API
-/// @{
-
-/// @defgroup cpp_program Program compilation
-/// @{
-
-/// @brief Represents user-provided program build option type.
-enum class build_option_type {
-    /// @brief Allow primitives fusing during program build (default: false).
-    fusing,
-
-    /// @brief Enable implicit reordering for user inputs (default: false).
-    optimize_data,
-
-    /// @brief Enable implicit static input reordering for user inputs (default: false).
-    allow_static_input_reorder,
-
-    /// @brief Enable debug mode (default: false).
-    /// @details This option enforce all program primitives to be accessible as outputs.
-    debug,
-
-    /// @brief User selected list of program outputs.
-    outputs,
-
-    /// @brief User defined learning parameters.
-    learning_config,
-
-    /// @brief Tuning config (default: Tuning is disabled).
-    /// @details The tuner will automatically find the optimal kernel/config for each node in the graph,
-    /// by running multiple implementations and configurations per node and storing the optimal one in cache.
-    /// Expect long execution time in the first run.
-    /// After the first run a cache with the tuning results will be created in the path provided.
-    /// This cache will be used in the next runs.
-    tuning_config,
-
-    /// @brief Specifies a directory to which stages of network compilation should be dumped. (default: empty, i.e. no dumping)
-    graph_dumps_dir,
-    /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
-    kernels_cache_dir,
-    /// @brief Name for serialization process
-    serialize_network,
-    load_program,
-    force_implementations,
-    partial_build_program,
-    allow_new_shape_infer
-};
-
-/// @brief Tuning mode.
-enum class tuning_mode {
-    /// @brief Tuning is disabled.
-    tuning_disabled,
-
-    /// @brief Tuning using the cached data (no on-line tuning for non-existing data).
-    tuning_use_cache,
-
-    /// @brief Tuning using the cached data if exist, tune and update cache otherwise.
-    tuning_tune_and_cache,
-
-    /// @brief Tuning using the cached data and update tasks.
-    /// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc.
-    /// No tuning for non-existing data.
-    tuning_use_and_update,
-
-    /// @brief Retune the cache data even if it exists.
-    tuning_retune_and_cache
-};
-
-/// @brief Tuning configuration.
-struct tuning_config_options {
-    tuning_mode mode;
-    std::string cache_file_path;
-
-    tuning_config_options() : mode(tuning_mode::tuning_disabled), cache_file_path("") {}
-};
-
-/// @brief Learning parameters.
-struct learning_params {
-    float momentum = 0.0;
-    float weights_decay = 0.0;
-
-    learning_params() : momentum(0.9f), weights_decay(0.0005f) {}
-};
-
-/// @brief Represents user-provided program build option.
-struct build_option {
-    /// @brief Allow primitives fusing during program build (default: false).
-    static std::shared_ptr<const build_option> fusing(bool enable = false);
-
-    /// @brief Enable implicit reordering for user inputs (default: false).
-    static std::shared_ptr<const build_option> optimize_data(bool enable = false);
-
-    /// @brief Enable implicit reordering for static user inputs (default: false).
-    static std::shared_ptr<const build_option> allow_static_input_reorder(bool enable = false);
-
-    /// @brief Enable debug mode (default: false).
-    /// @details This option enforce all program primitives to be accessible as outputs.
-    static std::shared_ptr<const build_option> debug(bool enable = false);
-
-    /// @brief User selected list of program outputs.
-    static std::shared_ptr<const build_option> outputs(const std::vector<primitive_id>& outs);
-
-    /// @brief Tuning configuration (default: false).
-    /// @details This option will automatically find the optimal kernel/config for each node in the graph,
-    /// by running multiple implementations and configurations per node and storing the optimal one in cache.
-    /// Expect long execution time in the first run (unless the cache only mode is enabled).
-    /// After the first run a cache with the tuning results will be created in the path provided.
-    /// This cache will be used in the next runs.
-    static std::shared_ptr<const build_option> tuning_config(
-        const tuning_config_options& config = tuning_config_options());
-
-    /// @brief Specifies a directory to which stages of network compilation should be dumped (default: empty, i.e. no dumping)
-    static std::shared_ptr<const build_option> graph_dumps_dir(const std::string& dir_path);
-
-    /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
-    static std::shared_ptr<const build_option> kernels_cache_dir(const std::string& dir_path);
-
-    /// @brief Specifies a name for serialization process.
-    static std::shared_ptr<const build_option> serialize_network(const std::string& network_name);
-    /// @brief Specifies a name of load_program process.
-    static std::shared_ptr<const build_option> load_program(const std::string& network_name);
-
-    /// @brief User defined learning parameters.
-    static std::shared_ptr<const build_option> learning_config(const learning_params& params = learning_params());
-    /// @brief Specifies user defined implementation details to use.
-    static std::shared_ptr<const build_option> force_implementations(implementation_forcing_map forcing);
-
-    static std::shared_ptr<const build_option> partial_build_program(bool set = false);
-
-    static std::shared_ptr<const build_option> allow_new_shape_infer(bool set = false);
-
-    virtual ~build_option() = default;
-
-private:
-    /// @brief Returns option type represented by this object.
-    virtual build_option_type get_type() const = 0;
-
-    friend class build_options;
-};
-
-/// @brief @ref build_option specialization for boolean options.
-template <build_option_type OptType>
-struct build_option_bool : build_option {
-    /// @brief Constructs option.
-    /// @param value Is option enabled.
-    explicit build_option_bool(bool value) : _value(value ? 1 : 0) {}
-
-    /// @brief Is option enabled.
-    bool enabled() const { return _value != 0; }
-
-private:
-    build_option_type get_type() const override { return OptType; }
-    uintptr_t _value;
-};
-
-/// @brief @ref build_option specialization for program outputs list.
-struct build_option_outputs : build_option {
-    /// @brief The list of output ids (names)
-    const std::vector<primitive_id> outputs;
-
-    /// @brief Constructs option.
-    /// @param outs List of ouput ids (names)
-    explicit build_option_outputs(const std::vector<primitive_id>& outs)
-        : outputs(outs) {}
-
-private:
-    /// @brief Returns build_option_type::outputs.
-    build_option_type get_type() const override { return build_option_type::outputs; }
-
-    build_option_outputs(const build_option_outputs& other) = delete;
-    build_option_outputs& operator=(const build_option_outputs& other) = delete;
-};
-
-/// @brief @ref build_option specialization for learning config.
-struct build_option_learning_config : build_option {
-    /// @brief Learning parameters.
-    const learning_params params;
-
-    /// @brief Constructs learning config build option.
-    /// @param learning_params Parameters for learning.
-    explicit build_option_learning_config(const learning_params& params)
-        : params(params) {}
-
-private:
-    /// @brief Returns build_option_type::learning_config.
-    build_option_type get_type() const override { return build_option_type::learning_config; }
-
-    build_option_learning_config(const build_option_learning_config& other) = delete;
-    build_option_learning_config& operator=(const build_option_learning_config& other) = delete;
-};
-
-/// @brief @ref build_option specialization for tuning config.
-struct build_option_tuning_config : build_option {
-    /// @brief Tuning configuration
-    const tuning_config_options config;
-
-    /// @brief Constructs tuning config build option.
-    /// @param tuning_config Configuration for the tuning.
-    explicit build_option_tuning_config(const tuning_config_options& tuning_config)
-        : config(tuning_config) {}
-
-private:
-    /// @brief Returns build_option_type::tuning_config.
-    build_option_type get_type() const override { return build_option_type::tuning_config; }
-
-    build_option_tuning_config(const build_option_tuning_config& other) = delete;
-    build_option_tuning_config& operator=(const build_option_tuning_config& other) = delete;
-};
-
-/// @brief @ref build_option specialization for selecting a directory.
-template <build_option_type OptType>
-struct build_option_directory : build_option {
-    const std::string directory_path;
-
-    /// @brief Constructs option.
-    /// @param outs List of ouput ids (names)
-    explicit build_option_directory(const std::string& dir_path) : directory_path(dir_path) {}
-
-private:
-    /// @brief Returns build_option_type::graph_dumps_dir.
-    build_option_type get_type() const override { return build_option_type::graph_dumps_dir; }
-
-    build_option_directory(const build_option_directory& other) = delete;
-    build_option_directory& operator=(const build_option_directory& other) = delete;
-};
-
-/// @brief @ref build_option specialization for selecting a directory.
-template <build_option_type OptType>
-struct build_option_kernels_cache_dir : build_option {
-    const std::string directory_path;
-
-    explicit build_option_kernels_cache_dir(const std::string& dir_path) : directory_path(dir_path) {}
-
-private:
-    /// @brief Returns build_option_type::kernels_cache_dir.
-    build_option_type get_type() const override { return build_option_type::kernels_cache_dir; }
-
-    build_option_kernels_cache_dir(const build_option_kernels_cache_dir& other) = delete;
-    build_option_kernels_cache_dir& operator=(const build_option_kernels_cache_dir& other) = delete;
-};
-
-/// @brief @ref build_option specialization for serialization process.
-template <build_option_type OptType>
-struct build_option_serialization : build_option {
-    const std::string serialization_network_name;
-
-    explicit build_option_serialization(const std::string& name) : serialization_network_name(name) {}
-
-private:
-    build_option_type get_type() const override { return build_option_type::serialize_network; }
-
-    build_option_serialization(const build_option_serialization& other) = delete;
-    build_option_serialization& operator=(const build_option_serialization& other) = delete;
-};
-
-/// @brief @ref build_option specialization for load_program process.
-template <build_option_type OptType>
-struct build_option_load_program : build_option {
-    const std::string load_program_name;
-
-    explicit build_option_load_program(const std::string& name) : load_program_name(name) {}
-
-private:
-    build_option_type get_type() const override { return build_option_type::load_program; }
-
-    build_option_load_program(const build_option_load_program& other) = delete;
-    build_option_load_program& operator=(const build_option_load_program& other) = delete;
-};
-
-struct build_option_force_implementations : build_option {
-    implementation_forcing_map forcing;
-
-    explicit build_option_force_implementations(implementation_forcing_map _forcing) : forcing(std::move(_forcing)) {}
-private:
-    build_option_type get_type() const override { return build_option_type::force_implementations; }
-
-    build_option_force_implementations(const build_option_force_implementations& other) = delete;
-    build_option_force_implementations& operator=(const build_option_force_implementations& other) = delete;
-};
-
-namespace detail {
-/// @brief Helper template to convert @ref build_option_type value to particular @ref build_option class.
-template <build_option_type OptType>
-struct build_option_traits {
-    /// @brief @ref build_option object type which represents the particular @p OptType.
-    typedef build_option object_type;
-    /// @brief Make default @ref build_option corresponding @p OptType
-    static std::shared_ptr<const build_option> make_default();
-};
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-template <>
-struct build_option_traits<build_option_type::fusing> {
-    typedef build_option_bool<build_option_type::fusing> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::fusing(); }
-};
-template <>
-struct build_option_traits<build_option_type::optimize_data> {
-    typedef build_option_bool<build_option_type::optimize_data> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::optimize_data(); }
-};
-template <>
-struct build_option_traits<build_option_type::allow_static_input_reorder> {
-    typedef build_option_bool<build_option_type::allow_static_input_reorder> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::allow_static_input_reorder(); }
-};
-template <>
-struct build_option_traits<build_option_type::debug> {
-    typedef build_option_bool<build_option_type::debug> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::debug(); }
-};
-template <>
-struct build_option_traits<build_option_type::outputs> {
-    typedef build_option_outputs object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::outputs({}); }
-};
-template <>
-struct build_option_traits<build_option_type::learning_config> {
-    typedef build_option_learning_config object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::learning_config(); }
-};
-template <>
-struct build_option_traits<build_option_type::tuning_config> {
-    typedef build_option_tuning_config object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::tuning_config(); }
-};
-template <>
-struct build_option_traits<build_option_type::graph_dumps_dir> {
-    typedef build_option_directory<build_option_type::graph_dumps_dir> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::graph_dumps_dir({}); }
-};
-template <>
-struct build_option_traits<build_option_type::kernels_cache_dir> {
-    typedef build_option_directory<build_option_type::kernels_cache_dir> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::kernels_cache_dir({}); }
-};
-template <>
-struct build_option_traits<build_option_type::serialize_network> {
-    typedef build_option_serialization<build_option_type::serialize_network> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::serialize_network({}); }
-};
-template <>
-struct build_option_traits<build_option_type::load_program> {
-    typedef build_option_load_program<build_option_type::load_program> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::load_program({}); }
-};
-template <>
-struct build_option_traits<build_option_type::force_implementations> {
-    using object_type = build_option_force_implementations;
-    static std::shared_ptr<const build_option> make_default() { return build_option::force_implementations({}); }
-};
-template <>
-struct build_option_traits<build_option_type::partial_build_program> {
-    typedef build_option_bool<build_option_type::partial_build_program> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::partial_build_program(); }
-};
-template <>
-struct build_option_traits<build_option_type::allow_new_shape_infer> {
-    typedef build_option_bool<build_option_type::allow_new_shape_infer> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::allow_new_shape_infer(); }
-};
-
-#endif
-}  // namespace detail
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-inline std::shared_ptr<const build_option> build_option::fusing(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::fusing>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::optimize_data(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::optimize_data>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::allow_static_input_reorder(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::allow_static_input_reorder>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::debug(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::debug>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::outputs(const std::vector<primitive_id>& outs) {
-    return std::make_shared<build_option_outputs>(outs);
-}
-
-inline std::shared_ptr<const build_option> build_option::learning_config(const learning_params& params) {
-    return std::make_shared<build_option_learning_config>(params);
-}
-
-inline std::shared_ptr<const build_option> build_option::tuning_config(const tuning_config_options& config) {
-    return std::make_shared<build_option_tuning_config>(config);
-}
-
-inline std::shared_ptr<const build_option> build_option::graph_dumps_dir(const std::string& dir_path) {
-    return std::make_shared<build_option_directory<build_option_type::graph_dumps_dir>>(dir_path);
-}
-
-inline std::shared_ptr<const build_option> build_option::kernels_cache_dir(const std::string& dir_path) {
-    return std::make_shared<build_option_directory<build_option_type::kernels_cache_dir>>(dir_path);
-}
-inline std::shared_ptr<const build_option> build_option::serialize_network(const std::string& name) {
-    return std::make_shared<build_option_serialization<build_option_type::serialize_network>>(name);
-}
-inline std::shared_ptr<const build_option> build_option::load_program(const std::string& name) {
-    return std::make_shared<build_option_load_program<build_option_type::load_program>>(name);
-}
-inline std::shared_ptr<const build_option> build_option::force_implementations(implementation_forcing_map forcing) {
-    return std::make_shared<build_option_force_implementations>(std::move(forcing));
-}
-
-inline std::shared_ptr<const build_option> build_option::partial_build_program(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::partial_build_program>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::allow_new_shape_infer(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::allow_new_shape_infer>>(enable);
-}
-
-#endif
-
-/// @brief Represents program build options list.
-class build_options {
-public:
-    /// @brief Adds or replace option to the options list
-    void set_option(std::shared_ptr<const build_option> opt) { add_or_replace_option(opt); }
-
-    /// @brief Adds or replace options to the options list
-    template <typename... Args>
-    void set_option(std::shared_ptr<const build_option> opt, Args... args) {
-        add_or_replace_option(opt);
-        set_option(args...);
-    }
-
-    /// @brief Constructs build options list from its arguments.
-    template <typename... Args>
-    explicit build_options(Args... args) {
-        set_option(args...);
-    }
-
-    /// @brief Returns program build option for @p OptType
-    template <build_option_type OptType>
-    std::shared_ptr<const typename detail::build_option_traits<OptType>::object_type> get() const {
-        using T = typename detail::build_option_traits<OptType>::object_type;
-        for (auto& option : _options) {
-            if (option->get_type() == OptType)
-                return std::static_pointer_cast<const T>(option);
-        }
-        return std::static_pointer_cast<const T>(detail::build_option_traits<OptType>::make_default());
-    }
-
-private:
-    friend struct program;
-    std::vector<std::shared_ptr<const build_option>> _options;
-    void set_option(void) {}
-
-    void add_or_replace_option(std::shared_ptr<const build_option> opt) {
-        for (auto& p : _options) {
-            if (p->get_type() == opt->get_type()) {
-                p = opt;
-                return;
-            }
-        }
-        _options.push_back(opt);
-    }
-};
-
-/// @}
-/// @}
-}  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
@ -33,7 +33,7 @@ struct network_output {
    memory::ptr get_memory() const {
        // TODO: in_order queue doesn't create proper output event in some cases which leads to syncronization issues with user app
        // So call finish for associated stream to enusre that the output data is ready.
-        if (_stream->get_queue_type() == queue_types::in_order) {
+        if (_stream->get_queue_type() == QueueTypes::in_order) {
            _stream->finish();
        } else {
            _event->wait();
@ -67,14 +67,15 @@ public:
    };
    using variables_states_map = std::map<std::string, VariableState::Ptr>;

-    explicit network(program::ptr program, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true);
+    explicit network(program::ptr program, const ExecutionConfig& config, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true);
    network(engine& engine,
            const topology& topo,
-            const build_options& options = build_options(),
+            const ExecutionConfig& config = {},
            bool is_internal = false);
    network(engine& engine,
            const std::set<std::shared_ptr<program_node>>& nodes,
-            const build_options& options,
+            const ExecutionConfig& config,
+            std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
            bool is_internal);

    network(program::ptr program, uint16_t stream_id = 0);
@ -82,6 +83,7 @@ public:
    network(program::ptr program, stream::ptr stream, uint16_t stream_id);

    network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, uint16_t stream_id = 0);
+    network(cldnn::BinaryInputBuffer& ifs, const ExecutionConfig& config, stream::ptr stream, engine& engine, uint16_t stream_id = 0);

    ~network();

@ -89,11 +91,12 @@ public:

    static ptr build_network(engine& engine,
                             const topology& topology,
-                             const build_options& options = build_options(),
+                             const ExecutionConfig& config = {},
                             bool is_internal = false);
    static ptr build_network(engine& engine,
                             const std::set<std::shared_ptr<program_node>>& nodes,
-                             const build_options& options,
+                             const ExecutionConfig& config,
+                             std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
                             bool is_internal);

    static ptr allocate_network(stream::ptr stream,
@ -121,7 +124,7 @@ public:

    network_output get_output(const primitive_id& output_id) {
        event::ptr evt;
-        if (get_stream().get_queue_type() == queue_types::out_of_order)
+        if (get_stream().get_queue_type() == QueueTypes::out_of_order)
            evt = get_primitive_event(output_id);
        return network_output(evt, get_output_memory(output_id), get_stream_ptr());
    }
@ -236,10 +239,13 @@ public:
    ICompilationContext& get_compilation_context() const { return *_compilation_context; }
    std::mutex& get_impl_cache_mutex() const { return _in_mem_cache_mutex; }

+    const ExecutionConfig& get_config() const { return _config; }
+
 private:
    using output_chains_map = std::map<primitive_id, std::vector<std::shared_ptr<primitive_inst>>>;
    uint32_t net_id = 0;
    program::ptr _program;
+    ExecutionConfig _config;
    engine& _engine;
    stream::ptr _stream;
    std::unique_ptr<memory_pool> _memory_pool;
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@ -7,7 +7,7 @@
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/stream.hpp"
 #include "intel_gpu/runtime/lru_cache.hpp"
-#include "build_options.hpp"
+#include "intel_gpu/runtime/execution_config.hpp"

 #include <list>
 #include <string>
@ -126,19 +126,22 @@ public:

    program(engine& engine_ref,
            topology const& topology,
-            build_options const& options,
+            const ExecutionConfig& config,
            bool is_internal = false,
            bool no_optimizations = false,
            bool is_body_program = false);
-    /* constructor used to build a program from subset of nodes of other program (used in propagate_constants) */
+
    program(engine& engine_ref,
            std::set<std::shared_ptr<program_node>> const& nodes,
-            build_options const& options,
+            const ExecutionConfig& config,
+            std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
            bool is_internal);
+
    explicit program(engine& engine);
    ~program();
    engine& get_engine() const { return _engine; }
-    const build_options& get_options() const { return options; }
+    const ExecutionConfig& get_config() const { return _config; }
+    InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() const { return _task_executor; }
    std::list<program_node*>& get_inputs() {
        return inputs;
    }  // ToDo: redesign trim to ouptut pass to make it const as_well as get_engine and get options
@ -146,7 +149,6 @@ public:
        return outputs;
    }  // ToDo: redesign reorder-inputs pass to make it const as_well as get_engine and get options
    bool is_loop_body() const { return is_body_program; }
-    bool is_debug_build() const { return options.get<build_option_type::debug>()->enabled(); }
    const nodes_ordering& get_processing_order() const;
    nodes_ordering& get_processing_order();
    uint32_t get_prog_id() { return prog_id; }
@ -230,13 +232,14 @@ public:

    static ptr build_program(engine& engine,
                             const topology& topology,
-                             const build_options& options,
+                             const ExecutionConfig& config,
                             bool is_internal = false,
                             bool no_optimizations = false,
                             bool is_body_program = false);
    static ptr build_program(engine& engine,
                             const std::set<std::shared_ptr<program_node>>& nodes,
-                             const build_options& options,
+                             const ExecutionConfig& config,
+                             std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
                             bool is_internal);
    static void init_primitives();
    void compile();
@ -261,7 +264,8 @@ private:
    stream::ptr _stream;
    // TODO: Consider moving it to engine
    std::unique_ptr<kernels_cache> _kernels_cache;
-    build_options options;
+    ExecutionConfig _config;
+    std::shared_ptr<InferenceEngine::CPUStreamsExecutor> _task_executor = nullptr;
    std::list<program_node*> inputs;
    std::vector<program_node*> outputs;
    nodes_ordering processing_order;
@ -308,6 +312,7 @@ private:
    void cleanup();
    void transfer_memory_to_device();

+    std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config) const;
    /*
    ** Analysis functions
    */
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp
@ -14,8 +14,8 @@
 #include "cpp/ie_cnn_network.h"
 #include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
 #include "intel_gpu/plugin/graph.hpp"
-#include "intel_gpu/plugin/device_config.hpp"
 #include "intel_gpu/plugin/remote_context.hpp"
+#include "intel_gpu/runtime/execution_config.hpp"

 namespace ov {
 namespace intel_gpu {
@ -24,8 +24,8 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
 public:
    typedef std::shared_ptr<CompiledModel> Ptr;

-    CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
-    CompiledModel(std::istream& networkModel, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
+    CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config);
+    CompiledModel(std::istream& networkModel, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config);

    void Export(std::ostream& networkModel) override;
    std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
@ -42,8 +42,8 @@ public:
    std::shared_ptr<InferenceEngine::RemoteContext> GetContext() const override;

    std::vector<std::shared_ptr<Graph>> m_graphs;
-    InferenceEngine::gpu::ClContext::Ptr m_context;
-    Config m_config;
+    InferenceEngine::RemoteContext::Ptr m_context;
+    ExecutionConfig m_config;
    InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
    InferenceEngine::ITaskExecutor::Ptr m_waitExecutor;

--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp
@ -1,105 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <map>
-#include <string>
-
-#include "intel_gpu/plugin/custom_layer.hpp"
-#include "intel_gpu/runtime/debug_configuration.hpp"
-#include "intel_gpu/graph/network.hpp"
-#include "openvino/runtime/intel_gpu/properties.hpp"
-#include <ie_performance_hints.hpp>
-#include <threading/ie_cpu_streams_executor.hpp>
-
-namespace ov {
-namespace intel_gpu {
-
-
-struct Config {
-    Config(std::string device_id = "0") : device_id(device_id),
-                                          throughput_streams(1),
-                                          useProfiling(false),
-                                          dumpCustomKernels(false),
-                                          exclusiveAsyncRequests(false),
-                                          enableDynamicBatch(false),
-                                          enableInt8(true),
-                                          nv12_two_inputs(false),
-                                          queuePriority(cldnn::priority_mode_types::med),
-                                          queueThrottle(cldnn::throttle_mode_types::med),
-                                          max_dynamic_batch(1),
-                                          customLayers({}),
-                                          kernels_cache_dir(""),
-                                          inference_precision(ov::element::f16),
-                                          task_exec_config({"GPU plugin internal task executor",                        // name
-                                                    std::max(1, static_cast<int>(std::thread::hardware_concurrency())), // # of streams
-                                                    1,                                                                  // # of threads per streams
-                                                    InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, // thread binding type
-                                                    1,                                                                  // thread binding step
-                                                    0,                                                                  // thread binding offset
-                                                    1,                                                                  // # of threads
-                                                    InferenceEngine::IStreamsExecutor::Config::ANY}),                   // preferred core type
-                                          enable_loop_unrolling(true) {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->serialize_compile == 1) {
-            task_exec_config._streams = 1;
-        }
-
-        adjustKeyMapValues();
-    }
-
-    uint32_t GetDefaultNStreamsForThroughputMode() const {
-        return 2;
-    }
-    void UpdateFromMap(const std::map<std::string, std::string>& configMap, const cldnn::device_info& info);
-    void adjustKeyMapValues();
-    static bool isNewApiProperty(std::string property);
-    static std::string ConvertPropertyToLegacy(const std::string& key, const std::string& value);
-
-    bool CanShareContextWith(const Config& other) const;
-
-    std::string device_id;
-    uint16_t throughput_streams;
-    bool useProfiling;
-    bool dumpCustomKernels;
-    bool exclusiveAsyncRequests;
-    bool enableDynamicBatch;
-    bool enableInt8;
-    bool nv12_two_inputs;
-    cldnn::priority_mode_types queuePriority;
-    cldnn::throttle_mode_types queueThrottle;
-    int max_dynamic_batch;
-    CustomLayerMap customLayers;
-    std::string kernels_cache_dir;
-    ov::element::Type inference_precision;
-    InferenceEngine::IStreamsExecutor::Config task_exec_config;
-
-    bool enable_loop_unrolling;
-
-    std::map<std::string, std::string> key_config_map;
-    InferenceEngine::PerfHintsConfig  perfHintsConfig;
-};
-
-struct Configs {
-    using conf_iter = std::map<std::string, Config>::iterator;
-    Configs(Config conf = Config()) : configs({std::make_pair(default_device_id, conf.device_id = default_device_id)}) { }
-
-    void CreateConfig(std::string device_id);
-    Config& GetConfig(std::string device_id);
-    Config& GetDefaultDeviceConfig();
-
-    void SetDefaultDeviceID(std::string default_device_id) { this->default_device_id = default_device_id; }
-    std::string GetDefaultDeviceID() { return default_device_id; }
-
-    conf_iter begin() { return configs.begin(); }
-    conf_iter end() { return configs.end(); }
-
-private:
-    std::string default_device_id = "0";
-    std::map<std::string, Config> configs;
-};
-
-}  // namespace intel_gpu
-}  // namespace ov
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
@ -23,8 +23,8 @@

 #include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
 #include "intel_gpu/plugin/custom_layer.hpp"
-#include "intel_gpu/plugin/device_config.hpp"
 #include "intel_gpu/plugin/remote_context.hpp"
+#include "intel_gpu/plugin/remote_blob.hpp"
 #include "intel_gpu/plugin/program.hpp"

 namespace ov {
@ -40,8 +40,11 @@ public:
    typedef std::shared_ptr<Graph> Ptr;
    using variable_states_map = std::map<std::string, std::vector<cldnn::network::VariableState::Ptr>>;

-    Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
-    Graph(cldnn::BinaryInputBuffer& ib, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
+    Graph(InferenceEngine::CNNNetwork& network,
+          RemoteContextImpl::Ptr context,
+          const ExecutionConfig& config,
+          uint16_t stream_id = 0);
+    Graph(cldnn::BinaryInputBuffer& ib, RemoteContextImpl::Ptr context,  const ExecutionConfig& config, uint16_t stream_id = 0);
    explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
    void Export(cldnn::BinaryOutputBuffer &ob);
    std::shared_ptr<ngraph::Function> GetExecGraphInfo();
@ -51,10 +54,10 @@ public:
    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const;
    void UpdatePerfStatistics();

-    const Config& getConfig() const { return m_config; }
-    InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; }
-    std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
-    int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
+    cldnn::engine& get_engine() const { return m_context->get_engine(); }
+    const ExecutionConfig& get_config() const { return m_config; }
+
+    int GetMaxDynamicBatchSize() const { return m_config.get_property(ov::intel_gpu::max_dynamic_batch); }
    const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
    const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); }
    const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); }
@ -85,16 +88,15 @@ public:
    bool use_external_queue() const;

 protected:
-    InferenceEngine::gpu::ClContext::Ptr m_context;
+    RemoteContextImpl::Ptr m_context;
    std::shared_ptr<Program> m_program;
    std::string m_networkName;
-    Config m_config;
+    ExecutionConfig m_config;
    uint16_t m_stream_id;
    uint32_t m_state;
    std::condition_variable m_cv;
    std::mutex m_infer_mutex;

-
    std::vector<std::shared_ptr<cldnn::network>> m_networks;
    std::map<std::string, cldnn::primitive_id> primitiveIDs;
    std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;
@ -104,7 +106,6 @@ protected:

    std::map<std::string, InferenceEngine::SizeVector> outputDims;

-
    std::shared_ptr<cldnn::network> BuildNetwork(std::shared_ptr<cldnn::program> program);
    void Build();
    void UpdateLayersMaps();
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp
@ -70,6 +70,7 @@ private:
    bool m_useStreams = false;
    bool m_useExternalQueue = false;
    std::shared_ptr<Graph> m_graph;
+    InferenceEngine::gpu::ClContext::Ptr m_context = nullptr;

    InferenceEngine::IStreamsExecutor* streamExecutor = nullptr;

@ -90,7 +91,7 @@ private:
    template<typename RemoteBlobType, typename = typename std::enable_if<std::is_same<RemoteBlobType, RemoteCLbuffer>::value ||
                                                                         std::is_same<RemoteBlobType, RemoteUSMbuffer>::value>::type>
    InferenceEngine::Blob::Ptr create_remote_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout,
-                                                  const RemoteBlobImpl::BlobType mem_type, void* mem_ptr = nullptr);
+                                                  const BlobType mem_type, void* mem_ptr = nullptr);
    InferenceEngine::Blob::Ptr create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem);
    void allocate_inputs();
    void allocate_outputs();
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp
@ -79,6 +79,7 @@ private:
    bool m_useStreams = false;
    bool m_useExternalQueue = false;
    std::shared_ptr<Graph> m_graph;
+    InferenceEngine::gpu::ClContext::Ptr m_context = nullptr;

    // dynamic batch stuff
    std::map<std::string, std::vector<buf_info>> batchInputs;
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/internal_properties.hpp
@ -1,23 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "openvino/runtime/properties.hpp"
-
-namespace ov {
-namespace intel_gpu {
-
-/**
- * @brief Read-only property to get GPU driver version
- */
-static constexpr Property<std::string, PropertyMutability::RO> driver_version{"GPU_DRIVER_VERSION"};
-
-/**
- * @brief Read-only property to get GPU driver version
- */
-static constexpr Property<std::string, PropertyMutability::RO> device_id{"GPU_DEVICE_ID"};
-
-}  // namespace intel_gpu
-}  // namespace ov
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp
@ -0,0 +1,23 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/runtime/execution_config.hpp"
+
+namespace ov {
+namespace intel_gpu {
+
+class LegacyAPIHelper {
+public:
+    static ov::AnyMap convert_legacy_properties(const std::map<std::string, std::string>& properties, bool is_new_api);
+    static ov::AnyMap convert_legacy_properties(const ov::AnyMap& properties, bool is_new_api);
+    static std::pair<std::string, ov::Any> convert_legacy_property(const std::pair<std::string, ov::Any>& legacy_property);
+    static std::pair<std::string, ov::Any> convert_to_legacy_property(const std::pair<std::string, ov::Any>& property);
+    static bool is_legacy_property(const std::pair<std::string, ov::Any>& property, bool is_new_api);
+    static bool is_new_api_property(const std::pair<std::string, ov::Any>& property);
+    static std::vector<std::string> get_supported_configs();
+    static std::vector<std::string> get_supported_metrics(bool model_caching_enabled);
+};
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
@ -15,34 +15,37 @@
 namespace ov {
 namespace intel_gpu {

-using CustomLayerPtr = std::shared_ptr<class CustomLayer>;
-
-class Plugin : public InferenceEngine::IInferencePlugin,
-               public InferenceEngine::gpu::details::param_map_obj_getter {
+class Plugin : public InferenceEngine::IInferencePlugin {
    struct impl;
    std::shared_ptr<impl> _impl;
-    bool streamsSet = false;
-    bool throttlingSet = false;
    bool isModelCachingEnabled = false;

+    std::string default_device_id = "0";
    // key: device_id, value: cldnn device
    std::map<std::string, cldnn::device::ptr> device_map;
+    std::map<std::string, ExecutionConfig> m_configs_map;
    // key: cldnn context, value: memory statistics
-    mutable std::map<RemoteCLContext::Ptr, std::map<std::string, uint64_t>> statistics_map;
+    mutable std::map<RemoteContextImpl::Ptr, std::map<std::string, uint64_t>> statistics_map;
    mutable std::mutex engine_mutex;

-    mutable std::map<std::string, RemoteCLContext::Ptr> m_defaultContexts;
+    mutable std::map<std::string, RemoteCLContext::Ptr> m_default_contexts;

-    cldnn::device_info GetDeviceInfo(const std::map<std::string, std::string> &config) const;
-    InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
-                                                         const Config& config) const;
-    void TransformNetwork(std::shared_ptr<ov::Model>& model, const Config& config) const;
-    std::map<std::string, std::string> ConvertPerfHintsToConfig(const std::map<std::string, std::string>& network_config,
-                                                                const Config& plugin_config) const;
+    InferenceEngine::CNNNetwork clone_and_transform_model(const InferenceEngine::CNNNetwork& network,
+                                                          const ExecutionConfig& config) const;
+    void transform_model(std::shared_ptr<ov::Model>& model, const ExecutionConfig& config) const;
+    void register_primitives();
+    void update_memory_statistics(const RemoteContextImpl::Ptr& context) const;
+    std::string get_device_id_from_config(const std::map<std::string, std::string>& config) const;
+    std::string get_device_id(const std::map<std::string, std::string>& config) const;
+    RemoteCLContext::Ptr get_default_context(const std::string& device_id) const;
+
+    std::vector<ov::PropertyName> get_supported_properties() const;
+    std::vector<std::string> get_device_capabilities(const cldnn::device_info& info) const;
+    uint32_t get_optimal_batch_size(const std::map<std::string, InferenceEngine::Parameter>& options) const;
+    uint32_t get_max_batch_size(const std::map<std::string, InferenceEngine::Parameter>& options) const;
+
+    ov::AnyMap preprocess_config(const std::map<std::string, std::string>& orig_config) const;

-    void RegisterPrimitives();
-    void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const;
-    void UpdateStatistics(const RemoteCLContext::Ptr& context) const;
 public:
    Plugin();

@ -54,7 +57,6 @@ public:
                                                                        const std::map<std::string, std::string> &config) override;

    void SetConfig(const std::map<std::string, std::string> &config) override;
-    std::string GetDeviceIDFromConfig(const std::map<std::string, std::string>& config) const;
    InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
    InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
    InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
@ -64,31 +66,6 @@ public:

    std::shared_ptr<InferenceEngine::RemoteContext> CreateContext(const InferenceEngine::ParamMap& params) override;
    std::shared_ptr<InferenceEngine::RemoteContext> GetDefaultContext(const InferenceEngine::ParamMap& params) override;
-
-    struct PluginParams {
-        cldnn::queue_types queue_type;
-        cldnn::engine_types engine_type;
-        cldnn::runtime_types runtime_type;
-        bool use_unified_shared_memory;
-        InferenceEngine::ITaskExecutor::Ptr task_executor;
-    };
-
-    static PluginParams GetParams(const Config& config, const cldnn::device::ptr& dev,
-                                  InferenceEngine::gpu_handle_param external_queue = nullptr) {
-        PluginParams params;
-        params.engine_type = cldnn::engine_types::ocl;
-        params.runtime_type = cldnn::runtime_types::ocl;
-        if (external_queue) {
-            params.queue_type = cldnn::stream::detect_queue_type(params.engine_type, external_queue);
-        } else if (dev->get_info().supports_immad) {
-            params.queue_type = cldnn::queue_types::in_order;
-        } else {
-            params.queue_type = cldnn::queue_types::out_of_order;
-        }
-        params.use_unified_shared_memory = true;
-        params.task_executor = std::make_shared<InferenceEngine::CPUStreamsExecutor>(config.task_exec_config);
-        return params;
-    }
 };

 }  // namespace intel_gpu
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
@ -14,11 +14,14 @@

 #include <cpp/ie_cnn_network.h>
 #include <ngraph/ngraph.hpp>
+#include "gpu/gpu_config.hpp"

-#include "intel_gpu/plugin/device_config.hpp"

+#include "intel_gpu/plugin/custom_layer.hpp"
 #include "intel_gpu/runtime/engine.hpp"
+#include "intel_gpu/runtime/execution_config.hpp"
 #include "intel_gpu/graph/topology.hpp"
+#include "intel_gpu/graph/program.hpp"

 // Forward declarations for cldnn part
 namespace cldnn {
@ -78,20 +81,14 @@ public:

 class Program {
 public:
-    Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config,
+    Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
            bool createTopologyOnly = false, bool partialBuild = false);
-    Program(std::shared_ptr<cldnn::engine> engine, const Config& config)
+    Program(cldnn::engine& engine, const ExecutionConfig& config)
        : m_max_batch(1)
        , m_curBatch(-1)
        , m_config(config)
        , m_engine(engine)
        , queryMode(false) {}
-    Program()
-        : m_max_batch(1)
-        , m_curBatch(-1)
-        , m_config()
-        , m_engine(nullptr)
-        , queryMode(false) {}

    static const cldnn::primitive_id m_preProcessTag;
    static const cldnn::primitive_id m_meanValuesTag;
@ -109,6 +106,7 @@ public:
    std::map<std::string, cldnn::layout> inputLayouts;
    using BlobCacheKey = std::pair<const char*, std::vector<size_t>>;
    std::map<BlobCacheKey, cldnn::primitive_id> blobMemCache;
+    CustomLayerMap m_custom_layers;

    int m_max_batch;
    int m_curBatch;
@ -119,9 +117,8 @@ public:
    const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
    InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; }
    InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; }
-    cldnn::engine& GetEngine() const { return *m_engine; }
-    std::shared_ptr<cldnn::engine> GetEnginePtr() const { return m_engine; }
-    const Config& GetConfig() const { return m_config; }
+    cldnn::engine& get_engine() const { return m_engine; }
+    const ExecutionConfig& get_config() const { return m_config; }
    int GetMaxBatchSizeForSingleProgram();

    bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op);
@ -166,8 +163,8 @@ public:
 private:
    static factories_map_t factories_map;
    std::vector<std::shared_ptr<cldnn::program>> m_programs;
-    Config m_config;
-    std::shared_ptr<cldnn::engine> m_engine;
+    ExecutionConfig m_config;
+    cldnn::engine& m_engine;

    std::shared_ptr<cldnn::topology> m_topology;
    InferenceEngine::InputsDataMap m_networkInputs;
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp
@ -0,0 +1,99 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/plugin/remote_context.hpp"
+
+#include <string>
+#include <map>
+#include <memory>
+#include <atomic>
+
+namespace ov {
+namespace intel_gpu {
+
+class RemoteBlobImpl;
+
+class RemoteAllocator : public InferenceEngine::IAllocator {
+protected:
+    friend class RemoteBlobImpl;
+    std::atomic_flag _lock;
+    std::map<void*, const RemoteBlobImpl*> m_lockedBlobs;
+
+    void regLockedBlob(void* handle, const RemoteBlobImpl* blob);
+
+public:
+    using Ptr = std::shared_ptr<RemoteAllocator>;
+
+    RemoteAllocator() { _lock.clear(std::memory_order_relaxed); }
+    /**
+    * @brief Maps handle to heap memory accessible by any memory manipulation routines.
+    * @return Generic pointer to memory
+    */
+    void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { return handle; };
+    /**
+    * @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
+    * The multiple sequential mappings of the same handle are suppose to get the same
+    * result while there isn't a ref counter supported.
+    */
+    void unlock(void* handle) noexcept override;
+    /**
+    * @brief Allocates memory
+    * @param size The size in bytes to allocate
+    * @return Handle to the allocated resource
+    */
+    void* alloc(size_t size) noexcept override { return nullptr; }
+    /**
+    * @brief Releases handle and all associated memory resources which invalidates the handle.
+    * @return false if handle cannot be released, otherwise - true.
+    */
+    bool free(void* handle) noexcept override { return true; }
+
+    void lock() {
+        while (_lock.test_and_set(std::memory_order_acquire)) {}
+    }
+
+    void unlock() {
+        _lock.clear(std::memory_order_release);
+    }
+};
+
+class USMHostAllocator : public InferenceEngine::IAllocator {
+protected:
+    InferenceEngine::gpu::USMBlob::Ptr _usm_host_blob = nullptr;
+    InferenceEngine::gpu::ClContext::Ptr _context = nullptr;
+
+public:
+    using Ptr = std::shared_ptr<USMHostAllocator>;
+
+    USMHostAllocator(InferenceEngine::gpu::ClContext::Ptr context) : _context(context) { }
+    /**
+    * @brief Maps handle to heap memory accessible by any memory manipulation routines.
+    * @return Generic pointer to memory
+    */
+    void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override;
+
+    /**
+    * @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
+    * The multiple sequential mappings of the same handle are suppose to get the same
+    * result while there isn't a ref counter supported.
+    */
+    void unlock(void* handle) noexcept override;
+
+    /**
+    * @brief Allocates memory
+    * @param size The size in bytes to allocate
+    * @return Handle to the allocated resource
+    */
+    void* alloc(size_t size) noexcept override;
+    /**
+    * @brief Releases handle and all associated memory resources which invalidates the handle.
+    * @return false if handle cannot be released, otherwise - true.
+    */
+    bool free(void* handle) noexcept override;
+};
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp
@ -0,0 +1,171 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/memory.hpp"
+#include "intel_gpu/runtime/engine.hpp"
+#include "intel_gpu/plugin/common_utils.hpp"
+
+#ifndef NOMINMAX
+# define NOMINMAX
+#endif
+
+#ifndef OV_GPU_USE_OPENCL_HPP
+#define OV_GPU_USE_OPENCL_HPP
+#endif
+
+#ifdef _WIN32
+# include <gpu/gpu_context_api_dx.hpp>
+#else
+# include <gpu/gpu_context_api_va.hpp>
+#endif
+
+#include <string>
+#include <map>
+#include <memory>
+
+namespace ov {
+namespace intel_gpu {
+class RemoteContextImpl;
+
+class RemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
+    friend class RemoteAllocator;
+public:
+    explicit RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
+                            cldnn::stream& stream,
+                            const cldnn::layout& layout,
+                            cldnn::shared_handle mem = nullptr,
+                            cldnn::shared_surface surf = 0,
+                            uint32_t plane = 0,
+                            BlobType mem_type = BlobType::BT_BUF_INTERNAL);
+
+    void allocate();
+    bool deallocate() noexcept;
+    InferenceEngine::ParamMap getParams() const;
+    std::string getDeviceName() const noexcept;
+    std::shared_ptr<InferenceEngine::RemoteContext> getContext() const noexcept;
+    InferenceEngine::LockedMemory<void> buffer() noexcept;
+    InferenceEngine::LockedMemory<const void> cbuffer() const noexcept;
+    InferenceEngine::LockedMemory<void> rwmap() noexcept;
+    InferenceEngine::LockedMemory<const void> rmap() const noexcept;
+    InferenceEngine::LockedMemory<void> wmap() noexcept;
+    const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept;
+    void *getHandle() const noexcept { return _handle; }
+
+    void reinterpret(cldnn::layout new_layout);
+
+    bool is_allocated() const noexcept;
+    bool is_locked() const noexcept;
+    cldnn::memory::ptr get_memory() { return m_memory_object; }
+
+protected:
+    std::shared_ptr<InferenceEngine::IAllocator> m_allocator;
+    InferenceEngine::gpu::ClContext::Ptr m_context;
+    cldnn::stream& m_stream;
+
+    // constructor stuff
+    cldnn::shared_handle m_mem;
+    cldnn::shared_surface m_surf;
+
+    uint32_t m_plane;
+    cldnn::layout m_layout;
+    BlobType m_mem_type;
+    size_t m_hash;
+
+    cldnn::memory::ptr m_memory_object;
+
+    mutable std::mutex lockedMutex;
+    mutable size_t lockedCounter;
+    mutable std::unique_ptr<cldnn::mem_lock<uint8_t>> lockedHolder;
+    mutable void* _handle;
+
+    void lock() const;
+    void unlock() const;
+
+    bool supports_caching() const;
+};
+
+template<typename TpublicAPI>
+class TypedRemoteBlob : public TpublicAPI {
+public:
+    using Ptr = std::shared_ptr<TypedRemoteBlob>;
+
+    explicit TypedRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
+                             cldnn::stream& stream,
+                             const InferenceEngine::TensorDesc& desc,
+                             const cldnn::layout& layout,
+                             cldnn::shared_handle mem = nullptr,
+                             cldnn::shared_surface surf = 0,
+                             uint32_t plane = 0,
+                             BlobType mem_type = BlobType::BT_BUF_INTERNAL)
+        : TpublicAPI(desc)
+        , _impl(context, stream, layout, mem, surf, plane, mem_type) {}
+
+    void allocate() noexcept override {
+        try {
+            if (!_impl.is_allocated())
+                _impl.allocate();
+        } catch (...) {}
+    }
+    bool deallocate() noexcept override { return _impl.deallocate(); }
+    InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
+    std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }
+    std::shared_ptr<InferenceEngine::RemoteContext> getContext() const noexcept override { return _impl.getContext(); }
+    InferenceEngine::LockedMemory<void> buffer() noexcept override { return _impl.buffer(); }
+    InferenceEngine::LockedMemory<const void> cbuffer() const noexcept override { return _impl.cbuffer(); }
+    InferenceEngine::LockedMemory<void> rwmap() noexcept override { return _impl.rwmap(); }
+    InferenceEngine::LockedMemory<const void> rmap() const noexcept override { return _impl.rmap(); }
+    InferenceEngine::LockedMemory<void> wmap()noexcept override { return _impl.wmap(); }
+    RemoteBlobImpl* getImpl() { return &_impl; }
+
+protected:
+    const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept override { return _impl.getAllocator(); }
+    void *getHandle() const noexcept override { return _impl.getHandle(); }
+    RemoteBlobImpl _impl;
+};
+
+using RemoteCLbuffer = TypedRemoteBlob<InferenceEngine::gpu::ClBufferBlob>;
+using RemoteUSMbuffer = TypedRemoteBlob<InferenceEngine::gpu::USMBlob>;
+using RemoteCLImage2D = TypedRemoteBlob<InferenceEngine::gpu::ClImage2DBlob>;
+#ifdef _WIN32
+using RemoteD3DBuffer = TypedRemoteBlob<InferenceEngine::gpu::D3DBufferBlob>;
+using RemoteD3DSurface = TypedRemoteBlob<InferenceEngine::gpu::D3DSurface2DBlob>;
+#else
+using RemoteVASurface = TypedRemoteBlob<InferenceEngine::gpu::VASurfaceBlob>;
+#endif
+
+inline RemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) {
+#ifdef _WIN32
+    {
+        auto ptr = blobPtr->as<RemoteD3DSurface>();
+        if (ptr) return ptr->getImpl();
+    }
+    {
+        auto ptr = blobPtr->as<RemoteD3DBuffer>();
+        if (ptr) return ptr->getImpl();
+    }
+#else
+    {
+        auto ptr = blobPtr->as<RemoteVASurface>();
+        if (ptr) return ptr->getImpl();
+    }
+#endif
+    {
+        auto ptr = blobPtr->as<RemoteCLbuffer>();
+        if (ptr) return ptr->getImpl();
+    }
+    {
+        auto ptr = blobPtr->as<RemoteCLImage2D>();
+        if (ptr) return ptr->getImpl();
+    }
+    {
+        auto ptr = blobPtr->as<RemoteUSMbuffer>();
+        if (ptr) return ptr->getImpl();
+    }
+    return nullptr;
+}
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
@ -6,7 +6,7 @@

 #include "intel_gpu/runtime/memory.hpp"
 #include "intel_gpu/runtime/engine.hpp"
-#include "intel_gpu/plugin/device_config.hpp"
+#include "intel_gpu/runtime/lru_cache.hpp"
 #include "intel_gpu/plugin/common_utils.hpp"

 #include <ie_parameter.hpp>
@ -35,12 +35,8 @@

 namespace ov {
 namespace intel_gpu {
-class RemoteAllocator;

-class RemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
-    friend class RemoteAllocator;
-public:
-    enum BlobType {
+enum class BlobType {
    BT_EMPTY,
    BT_BUF_INTERNAL,
    BT_BUF_SHARED,
@ -50,544 +46,139 @@ public:
    BT_IMG_SHARED,
    BT_SURF_SHARED,
    BT_DX_BUF_SHARED,
-    };
-
-    explicit RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
-                            cldnn::stream& stream,
-                            const cldnn::layout& layout,
-                            cldnn::shared_handle mem = nullptr,
-                            cldnn::shared_surface surf = 0,
-                            uint32_t plane = 0,
-                            BlobType mem_type = BT_BUF_INTERNAL);
-
-    void allocate();
-    bool deallocate() noexcept;
-    InferenceEngine::ParamMap getParams() const;
-    std::string getDeviceName() const noexcept;
-    std::shared_ptr<InferenceEngine::RemoteContext> getContext() const noexcept;
-    InferenceEngine::LockedMemory<void> buffer() noexcept;
-    InferenceEngine::LockedMemory<const void> cbuffer() const noexcept;
-    InferenceEngine::LockedMemory<void> rwmap() noexcept;
-    InferenceEngine::LockedMemory<const void> rmap() const noexcept;
-    InferenceEngine::LockedMemory<void> wmap() noexcept;
-    const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept;
-    void *getHandle() const noexcept { return _handle; }
-
-    void reinterpret(cldnn::layout new_layout);
-
-    bool is_allocated() const noexcept;
-    bool is_locked() const noexcept;
-    cldnn::memory::ptr getMemory() { return m_memObject; }
-
-protected:
-    static RemoteAllocator m_allocator;
-    std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;
-    // retain engine ptr to ensure that memory object can be released properly in cases when RemoteContext if deleted before RemoteTensor
-    std::shared_ptr<cldnn::engine> m_engine;
-    cldnn::stream& m_stream;
-
-    // constructor stuff
-    cldnn::shared_handle m_mem;
-    cldnn::shared_surface m_surf;
-
-    uint32_t m_plane;
-    cldnn::layout m_layout;
-    BlobType m_mem_type;
-
-    cldnn::memory::ptr m_memObject;
-
-    mutable std::mutex lockedMutex;
-    mutable size_t lockedCounter;
-    mutable std::unique_ptr<cldnn::mem_lock<uint8_t>> lockedHolder;
-    mutable void* _handle;
-    mutable std::shared_ptr<InferenceEngine::IAllocator> _allocator;
-
-    void lock() const;
-    void unlock() const;
 };

-template<typename TpublicAPI>
-class TypedRemoteBlob : public TpublicAPI {
-public:
-    using Ptr = std::shared_ptr<TypedRemoteBlob>;
-
-    explicit TypedRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
-                             cldnn::stream& stream,
-                             const InferenceEngine::TensorDesc& desc,
-                             const cldnn::layout& layout,
-                             cldnn::shared_handle mem = nullptr,
-                             cldnn::shared_surface surf = 0,
-                             uint32_t plane = 0,
-                             RemoteBlobImpl::BlobType mem_type = RemoteBlobImpl::BlobType::BT_BUF_INTERNAL)
-        : TpublicAPI(desc)
-        , _impl(context, stream, layout, mem, surf, plane, mem_type) {}
-
-    void allocate() noexcept override {
-        try {
-            if (!_impl.is_allocated())
-                _impl.allocate();
-        } catch (...) {}
-    }
-    bool deallocate() noexcept override { return _impl.deallocate(); }
-    InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
-    std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }
-    std::shared_ptr<InferenceEngine::RemoteContext> getContext() const noexcept override { return _impl.getContext(); }
-    InferenceEngine::LockedMemory<void> buffer() noexcept override { return _impl.buffer(); }
-    InferenceEngine::LockedMemory<const void> cbuffer() const noexcept override { return _impl.cbuffer(); }
-    InferenceEngine::LockedMemory<void> rwmap() noexcept override { return _impl.rwmap(); }
-    InferenceEngine::LockedMemory<const void> rmap() const noexcept override { return _impl.rmap(); }
-    InferenceEngine::LockedMemory<void> wmap()noexcept override { return _impl.wmap(); }
-    RemoteBlobImpl* getImpl() { return &_impl; }
-
-protected:
-    const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept override { return _impl.getAllocator(); }
-    void *getHandle() const noexcept override { return _impl.getHandle(); }
-    RemoteBlobImpl _impl;
-};
-
-using RemoteCLbuffer = TypedRemoteBlob<InferenceEngine::gpu::ClBufferBlob>;
-using RemoteUSMbuffer = TypedRemoteBlob<InferenceEngine::gpu::USMBlob>;
-using RemoteCLImage2D = TypedRemoteBlob<InferenceEngine::gpu::ClImage2DBlob>;
-#ifdef _WIN32
-using RemoteD3DBuffer = TypedRemoteBlob<InferenceEngine::gpu::D3DBufferBlob>;
-using RemoteD3DSurface = TypedRemoteBlob<InferenceEngine::gpu::D3DSurface2DBlob>;
-#else
-using RemoteVASurface = TypedRemoteBlob<InferenceEngine::gpu::VASurfaceBlob>;
-#endif
-
-inline RemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) {
-#ifdef _WIN32
-    {
-        auto ptr = blobPtr->as<RemoteD3DSurface>();
-        if (ptr) return ptr->getImpl();
-    }
-    {
-        auto ptr = blobPtr->as<RemoteD3DBuffer>();
-        if (ptr) return ptr->getImpl();
-    }
-#else
-    {
-        auto ptr = blobPtr->as<RemoteVASurface>();
-        if (ptr) return ptr->getImpl();
-    }
-#endif
-    {
-        auto ptr = blobPtr->as<RemoteCLbuffer>();
-        if (ptr) return ptr->getImpl();
-    }
-    {
-        auto ptr = blobPtr->as<RemoteCLImage2D>();
-        if (ptr) return ptr->getImpl();
-    }
-    {
-        auto ptr = blobPtr->as<RemoteUSMbuffer>();
-        if (ptr) return ptr->getImpl();
-    }
-    return nullptr;
+template <typename Result>
+Result extract_object(const InferenceEngine::ParamMap& params, const std::string& key) {
+    auto itrHandle = params.find(key);
+    OPENVINO_ASSERT(itrHandle != params.end(), "[GPU] No parameter ", key, " found in ParamsMap");
+    return itrHandle->second.as<Result>();
 }

-class RemoteAllocator : public InferenceEngine::IAllocator {
-protected:
-    friend class RemoteBlobImpl;
-    std::atomic_flag _lock;
-    std::map<void*, const RemoteBlobImpl*> m_lockedBlobs;
-
-    void regLockedBlob(void* handle, const RemoteBlobImpl* blob);
-
-public:
-    using Ptr = std::shared_ptr<RemoteAllocator>;
-
-    RemoteAllocator() { _lock.clear(std::memory_order_relaxed); }
-    /**
-    * @brief Maps handle to heap memory accessible by any memory manipulation routines.
-    * @return Generic pointer to memory
-    */
-    void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE)  noexcept override { return handle; };
-    /**
-    * @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
-    * The multiple sequential mappings of the same handle are suppose to get the same
-    * result while there isn't a ref counter supported.
-    */
-    void unlock(void* handle) noexcept override;
-    /**
-    * @brief Allocates memory
-    * @param size The size in bytes to allocate
-    * @return Handle to the allocated resource
-    */
-    void* alloc(size_t size) noexcept override { return nullptr; }
-    /**
-    * @brief Releases handle and all associated memory resources which invalidates the handle.
-    * @return false if handle cannot be released, otherwise - true.
-    */
-    bool free(void* handle) noexcept override { return true; }
-
-    void lock() {
-        while (_lock.test_and_set(std::memory_order_acquire)) {}
-    }
-
-    void unlock() {
-        _lock.clear(std::memory_order_release);
-    }
-};
-
-class USMHostAllocator : public InferenceEngine::IAllocator {
-protected:
-    InferenceEngine::gpu::USMBlob::Ptr _usm_host_blob = nullptr;
-    InferenceEngine::gpu::ClContext* _context = nullptr;
-
-public:
-    using Ptr = std::shared_ptr<USMHostAllocator>;
-
-    USMHostAllocator(InferenceEngine::gpu::ClContext* context) : _context(context) { }
-    /**
-    * @brief Maps handle to heap memory accessible by any memory manipulation routines.
-    * @return Generic pointer to memory
-    */
-    void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override {
-        if (!_usm_host_blob)
-            return nullptr;
-        try {
-            return _usm_host_blob->get();
-        } catch (...) {
-            return nullptr;
-        }
-    };
-
-    /**
-    * @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
-    * The multiple sequential mappings of the same handle are suppose to get the same
-    * result while there isn't a ref counter supported.
-    */
-    void unlock(void* handle) noexcept override {}
-
-    /**
-    * @brief Allocates memory
-    * @param size The size in bytes to allocate
-    * @return Handle to the allocated resource
-    */
-    void* alloc(size_t size) noexcept override {
-        try {
-            auto td = InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, InferenceEngine::SizeVector{size}, InferenceEngine::Layout::C);
-            InferenceEngine::ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER)}};
-            _usm_host_blob = std::dynamic_pointer_cast<InferenceEngine::gpu::USMBlob>(_context->CreateBlob(td, params));
-            _usm_host_blob->allocate();
-            if (!getBlobImpl(_usm_host_blob.get())->is_allocated()) {
-                return nullptr;
-            }
-            return _usm_host_blob->get();
-        } catch (...) {
-            return nullptr;
-        }
-    }
-
-    /**
-    * @brief Releases handle and all associated memory resources which invalidates the handle.
-    * @return false if handle cannot be released, otherwise - true.
-    */
-    bool free(void* handle) noexcept override {
-        try {
-            _usm_host_blob = nullptr;
-        } catch(...) { }
-        return true;
-    }
-};
-
-
-class ExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
+class RemoteContextImpl {
 public:
    enum ContextType {
        OCL,
        DEV_SHARED
    };

-    using Ptr = std::shared_ptr<ExecutionContextImpl>;
-    using CPtr = std::shared_ptr<const ExecutionContextImpl>;
+    using Ptr = std::shared_ptr<RemoteContextImpl>;
+    using CPtr = std::shared_ptr<const RemoteContextImpl>;

-    explicit ExecutionContextImpl(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
-                                  const InferenceEngine::ParamMap& params,
-                                  const Config& config = {});
+    RemoteContextImpl(std::string device_name, std::vector<cldnn::device::ptr> devices);
+    RemoteContextImpl(const std::vector<RemoteContextImpl::Ptr>& known_contexts, const InferenceEngine::ParamMap& params);

-    InferenceEngine::ParamMap getParams() const;
-    std::string getDeviceName() const noexcept;
+    InferenceEngine::ParamMap get_params() const;
+    std::string get_device_name() const noexcept;
+    InferenceEngine::MemoryBlob::Ptr create_host_blob(InferenceEngine::gpu::ClContext::Ptr public_context, const InferenceEngine::TensorDesc& desc);
+    InferenceEngine::RemoteBlob::Ptr create_blob(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                 const InferenceEngine::TensorDesc& desc,
+                                                 const InferenceEngine::ParamMap& params = {});

-    std::shared_ptr<cldnn::engine> GetEngine() const { return m_engine; }
-    Config& GetConfig() { return m_config; }
-    ContextType GetType() const { return m_type; }
-    InferenceEngine::gpu_handle_param GetExternalQueue() const { return m_external_queue; }
-    const std::weak_ptr<InferenceEngine::IInferencePlugin> GetPlugin() const { return m_plugin; }
+    cldnn::engine& get_engine() { return *m_engine; }
+    InferenceEngine::gpu_handle_param get_external_queue() const { return m_external_queue; }

-    void lock() {
-        while (m_lock.test_and_set(std::memory_order_acquire)) {}
-    }
+    cldnn::memory::ptr try_get_cached_memory(size_t hash);
+    void add_to_cache(size_t hash, cldnn::memory::ptr memory);

-    void unlock() {
-        m_lock.clear(std::memory_order_release);
-    }
+private:
+    std::string get_device_name(const std::vector<RemoteContextImpl::Ptr>& known_contexts,
+                                const cldnn::device::ptr current_device);
+    InferenceEngine::RemoteBlob::Ptr reuse_surface(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                   const InferenceEngine::TensorDesc& desc,
+                                                   const InferenceEngine::ParamMap& params);
+    InferenceEngine::RemoteBlob::Ptr reuse_memory(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                  const InferenceEngine::TensorDesc& desc,
+                                                  cldnn::shared_handle mem,
+                                                  BlobType blob_type);
+    InferenceEngine::RemoteBlob::Ptr create_buffer(InferenceEngine::gpu::ClContext::Ptr public_context, const InferenceEngine::TensorDesc& desc);
+    InferenceEngine::RemoteBlob::Ptr create_usm(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                const InferenceEngine::TensorDesc& desc,
+                                                BlobType alloc_type);
+    void check_if_shared();

-protected:
-    // TODO: refactor to unique_ptr
    std::shared_ptr<cldnn::engine> m_engine;
    InferenceEngine::gpu_handle_param m_va_display;
    InferenceEngine::gpu_handle_param m_external_queue;
-    Config m_config;
+    static const size_t cache_capacity = 100;

    ContextType m_type;
-    std::weak_ptr<InferenceEngine::IInferencePlugin> m_plugin;
-    std::atomic_flag m_lock;
+    std::string m_device_name = "";
+    const std::string m_plugin_name;
+    cldnn::LruCache<size_t, cldnn::memory::ptr> m_memory_cache;
+    std::mutex m_cache_mutex;
 };

-template<typename TpublicContextAPI>
-class TypedExecutionContext : public TpublicContextAPI {
-    template<typename T1, typename T2>
-    struct _Key {
-        T1 _surf;
-        T2 _plane;
-
-        _Key(T1 surf, T2 plane) : _surf(surf), _plane(plane) {}
-
-        bool operator<(const _Key &that) const {
-            return _surf < that._surf || (_surf == that._surf && _plane < that._plane);
-        }
-    };
-
-#ifdef _WIN32
-    using surf_key = _Key<cldnn::shared_handle, uint32_t>;
-#else
-    using surf_key = _Key<cldnn::shared_surface, uint32_t>;
-#endif
-    std::map<surf_key, InferenceEngine::RemoteBlob::Ptr> shared_surf_reg;
-    std::map<cldnn::shared_handle, InferenceEngine::RemoteBlob::Ptr> shared_obj_reg;
-
-    InferenceEngine::RemoteBlob::Ptr reuse_surf(const InferenceEngine::TensorDesc& tensorDesc, const InferenceEngine::ParamMap& params) {
-        using namespace InferenceEngine;
-        using InferenceEngine::gpu::details::param_map_obj_getter;
-        InferenceEngine::RemoteBlob::Ptr ret = nullptr;
-        auto& stream = _impl.GetEngine()->get_program_stream();
-        uint32_t plane = param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
-#ifdef _WIN32
-        cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
-        surf_key skey(mem, plane);
-#else
-        cldnn::shared_surface surf = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_surface>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
-        surf_key skey(surf, plane);
-#endif
-        std::lock_guard<ExecutionContextImpl> locker(_impl);
-
-        // try to locate previously shared surface
-        auto itr = shared_surf_reg.find(skey);
-        if (itr != shared_surf_reg.end()) {
-            ret = itr->second;
-        } else {
-            // unlickily, not found - create new and insert into registry
-            cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
-                ImageFormatFromLayout(tensorDesc.getLayout()),
-                tensor_from_dims(tensorDesc.getDims()));
-            auto smart_this =
-                std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
-#ifdef _WIN32
-            ret = std::make_shared<RemoteD3DSurface>(smart_this, stream,
-                tensorDesc, layout, mem, 0, plane,
-                RemoteBlobImpl::BlobType::BT_SURF_SHARED);
-#else
-            ret = std::make_shared<RemoteVASurface>(smart_this, stream,
-                tensorDesc, layout, nullptr, surf, plane,
-                RemoteBlobImpl::BlobType::BT_SURF_SHARED);
-#endif
-            shared_surf_reg[skey] = ret;
-        }
-
-        return ret;
-    }
-
-    InferenceEngine::RemoteBlob::Ptr reuse_obj(const InferenceEngine::TensorDesc& tensorDesc,
-                                               cldnn::shared_handle mem,
-                                               RemoteBlobImpl::BlobType blob_type) {
-        InferenceEngine::RemoteBlob::Ptr ret = nullptr;
-
-        std::lock_guard<ExecutionContextImpl> locker(_impl);
-        auto& stream = _impl.GetEngine()->get_program_stream();
-
-        // try to locate previously shared object
-        auto itr = shared_obj_reg.find(mem);
-        if (itr != shared_obj_reg.end()) {
-            ret = itr->second;
-        } else {
-            // unlickily, not found - create new and insert into registry
-            cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
-                                 FormatFromLayout(tensorDesc.getLayout()),
-                                 tensor_from_dims(tensorDesc.getDims()));
-            auto smart_this =
-                std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
-
-            switch (blob_type) {
-            case RemoteBlobImpl::BlobType::BT_BUF_SHARED:
-                ret = std::make_shared<RemoteCLbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
-                break;
-            case RemoteBlobImpl::BlobType::BT_USM_SHARED:
-                ret = std::make_shared<RemoteUSMbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
-                break;
-            case RemoteBlobImpl::BlobType::BT_IMG_SHARED:
-                layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
-                ret = std::make_shared<RemoteCLImage2D>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
-                break;
-#ifdef _WIN32
-            case RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
-                ret = std::make_shared<RemoteD3DBuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
-                break;
-#endif
-            default:
-                break;
-            }
-            shared_obj_reg[mem] = ret;
-        }
-
-        return ret;
-    }
-
-    InferenceEngine::RemoteBlob::Ptr create_buffer(const InferenceEngine::TensorDesc& tensorDesc) {
-        cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
-                             FormatFromLayout(tensorDesc.getLayout()),
-                             tensor_from_dims(tensorDesc.getDims()));
-        auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
-        auto& stream = _impl.GetEngine()->get_program_stream();
-        return std::make_shared<RemoteCLbuffer>(smart_this,
-                                                stream,
-                                                tensorDesc,
-                                                layout,
-                                                nullptr, 0, 0,
-                                                RemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
-    }
-
-    InferenceEngine::RemoteBlob::Ptr create_usm(const InferenceEngine::TensorDesc& tensorDesc, RemoteBlobImpl::BlobType alloc_type) {
-        cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
-                             FormatFromLayout(tensorDesc.getLayout()),
-                             tensor_from_dims(tensorDesc.getDims()));
-        auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
-        auto& stream = _impl.GetEngine()->get_program_stream();
-
-        return std::make_shared<RemoteUSMbuffer>(smart_this,
-                                                 stream,
-                                                 tensorDesc,
-                                                 layout,
-                                                 nullptr, 0, 0,
-                                                 alloc_type);
-    }
-
-    void check_if_shared() {
-        if (GetType() != ExecutionContextImpl::ContextType::DEV_SHARED)
-            IE_THROW() << "Shared context is required to to share this type of memory";
-    }
-
+// Template class below is needed to allow proper cast of user contexts
+// We have the following public classes hierarchy:
+//        RemoteContext
+//              |
+//          ClContext
+//        |          |
+//   VAContext      D3DContext
+// So our implementation must allow casting of context object to proper type user type (ClContext, VAContext or D3DContext)
+// Thus we introduce this template which have 3 instances with different base classes:
+//                RemoteContext
+//                      |
+//        ---------- ClContext -----------
+//        |             |                |
+//   VAContext          |            D3DContext
+//        |             |                |
+// RemoteVAContext  RemoteCLContext  RemoteD3DContext
+//
+// All these context types are just thin wrappers that calls common context internal impl (RemoteContextImpl)
+template<typename PublicContextType>
+class TypedRemoteContext : public PublicContextType {
 public:
-    using Ptr = std::shared_ptr<TypedExecutionContext>;
-    using CPtr = std::shared_ptr<const TypedExecutionContext>;
+    using Ptr = std::shared_ptr<TypedRemoteContext>;

-    explicit TypedExecutionContext(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
-                                   const InferenceEngine::ParamMap& params,
-                                   const Config& config = {})
-        : _impl(plugin, params, config) {}
+    TypedRemoteContext(std::string device_name, std::vector<cldnn::device::ptr> devices)
+        : m_impl(std::make_shared<RemoteContextImpl>(device_name, devices)) {}
+    TypedRemoteContext(const std::vector<RemoteContextImpl::Ptr>& known_contexts, const InferenceEngine::ParamMap& params)
+        : m_impl(std::make_shared<RemoteContextImpl>(known_contexts, params)) {}

-    ~TypedExecutionContext() {
-        shared_surf_reg.clear();
-        shared_obj_reg.clear();
+    InferenceEngine::ParamMap getParams() const override { return m_impl->get_params(); }
+    std::string getDeviceName() const noexcept override { return m_impl->get_device_name(); }
+    InferenceEngine::MemoryBlob::Ptr CreateHostBlob(const InferenceEngine::TensorDesc& desc) override {
+        return m_impl->create_host_blob(std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this()), desc);
+    }
+    InferenceEngine::RemoteBlob::Ptr CreateBlob(const InferenceEngine::TensorDesc& desc, const InferenceEngine::ParamMap& params = {}) override {
+        return m_impl->create_blob(std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this()), desc, params);
    }

-    InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
-    std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }
+    RemoteContextImpl::Ptr get_impl() { return m_impl; }

-    InferenceEngine::MemoryBlob::Ptr CreateHostBlob(const InferenceEngine::TensorDesc& tensorDesc) override {
-        if (_impl.GetEngine()->use_unified_shared_memory())
-            return std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(make_blob_with_precision(tensorDesc, std::make_shared<USMHostAllocator>(this)));
-        else
-            return std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(make_blob_with_precision(tensorDesc));
-    }
-
-    InferenceEngine::RemoteBlob::Ptr CreateBlob(const InferenceEngine::TensorDesc& tensorDesc, const InferenceEngine::ParamMap& params = {}) override {
-        using namespace InferenceEngine;
-        using InferenceEngine::gpu::details::param_map_obj_getter;
-        if (params.empty()) {
-            // user wants plugin to allocate blob by itself and return handle
-            return create_buffer(tensorDesc);
-        } else {
-            // user will supply shared object handle
-            std::string memTypeStr = param_map_obj_getter::_StrFromParams(params, GPU_PARAM_KEY(SHARED_MEM_TYPE));
-
-            bool is_usm = memTypeStr == GPU_PARAM_VALUE(USM_HOST_BUFFER) ||
-                          memTypeStr == GPU_PARAM_VALUE(USM_DEVICE_BUFFER) ||
-                          memTypeStr == GPU_PARAM_VALUE(USM_USER_BUFFER);
-
-            if (is_usm && !_impl.GetEngine()->use_unified_shared_memory()) {
-                IE_THROW(NotAllocated) << "Can't create USM tensor as USM is not supported (or manually disabled) on current device";
-            }
-
-            if (GPU_PARAM_VALUE(VA_SURFACE) == memTypeStr) {
-                check_if_shared();
-                return reuse_surf(tensorDesc, params);
-            } else if (GPU_PARAM_VALUE(USM_HOST_BUFFER) == memTypeStr) {
-                return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
-            } else if (GPU_PARAM_VALUE(USM_DEVICE_BUFFER) == memTypeStr) {
-                return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL);
-            } else {
-                RemoteBlobImpl::BlobType blob_type;
-                cldnn::shared_handle mem = nullptr;
-
-                if (GPU_PARAM_VALUE(OCL_BUFFER) == memTypeStr) {
-                    blob_type = RemoteBlobImpl::BlobType::BT_BUF_SHARED;
-                    mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
-                } else if (GPU_PARAM_VALUE(USM_USER_BUFFER) == memTypeStr) {
-                    blob_type = RemoteBlobImpl::BlobType::BT_USM_SHARED;
-                    mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
-                } else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == memTypeStr) {
-                    blob_type = RemoteBlobImpl::BlobType::BT_IMG_SHARED;
-                    mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
-#ifdef _WIN32
-                } else if (GPU_PARAM_VALUE(DX_BUFFER) == memTypeStr) {
-                    blob_type = RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED;
-                    mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
-                    check_if_shared();
-#endif
-                } else {
-                    IE_THROW() << "Unsupported shared object type " << memTypeStr;
-                }
-
-                return reuse_obj(tensorDesc, mem, blob_type);
-            }
-        }
-    }
-
-    Config& GetConfig() { return _impl.GetConfig(); }
-    ExecutionContextImpl::ContextType GetType() const { return _impl.GetType(); }
-
-    ExecutionContextImpl* getImpl() { return &_impl; }
-
-protected:
-    ExecutionContextImpl _impl;
+private:
+    std::shared_ptr<RemoteContextImpl> m_impl;
 };

-using RemoteCLContext = TypedExecutionContext<InferenceEngine::gpu::ClContext>;
+using RemoteCLContext = TypedRemoteContext<InferenceEngine::gpu::ClContext>;
 #ifdef _WIN32
-using RemoteD3DContext = TypedExecutionContext<InferenceEngine::gpu::D3DContext>;
+using RemoteD3DContext = TypedRemoteContext<InferenceEngine::gpu::D3DContext>;
 #else
-using RemoteVAContext = TypedExecutionContext<InferenceEngine::gpu::VAContext>;
+using RemoteVAContext = TypedRemoteContext<InferenceEngine::gpu::VAContext>;
 #endif

-inline ExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) {
+inline std::shared_ptr<RemoteContextImpl> get_context_impl(InferenceEngine::gpu::ClContext::Ptr context) {
+    OPENVINO_ASSERT(context != nullptr, "[GPU] Couldn't get impl from invalid context object");
 #ifdef _WIN32
-    {
-        auto ptr = ctxPtr->as<RemoteD3DContext>();
-        if (ptr) return ptr->getImpl();
-    }
+    if (auto ptr = context->as<RemoteD3DContext>())
+        return ptr->get_impl();
 #else
-    {
-        auto ptr = ctxPtr->as<RemoteVAContext>();
-        if (ptr) return ptr->getImpl();
-    }
+    if (auto ptr = context->as<RemoteVAContext>())
+        return ptr->get_impl();
 #endif
-    {
-        auto ptr = ctxPtr->as<RemoteCLContext>();
-        if (ptr) return ptr->getImpl();
-    }
-    return nullptr;
+    if (auto ptr = context->as<RemoteCLContext>())
+        return ptr->get_impl();
+
+    OPENVINO_ASSERT(false, "[GPU] Couldn't get context impl from public context object.");
+}
+
+inline std::shared_ptr<RemoteContextImpl> get_context_impl(InferenceEngine::RemoteContext::Ptr context) {
+    OPENVINO_ASSERT(context != nullptr, "[GPU] Couldn't get impl from invalid context object");
+    auto casted = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(context);
+    OPENVINO_ASSERT(casted != nullptr, "[GPU] Couldn't get context impl: Context type is not ClContext or it's derivatives");
+    return get_context_impl(casted);
 }

 }  // namespace intel_gpu
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp
@ -8,19 +8,20 @@

 #include <ngraph/function.hpp>

-#include "intel_gpu/plugin/device_config.hpp"
+#include "intel_gpu/runtime/execution_config.hpp"
+#include "intel_gpu/runtime/device.hpp"

 namespace ov {
 namespace intel_gpu {

 class TransformationsPipeline {
 public:
-    explicit TransformationsPipeline(const Config &conf, const cldnn::device_info &device_info)
+    explicit TransformationsPipeline(const ExecutionConfig &conf, const cldnn::device_info &device_info)
        : config(conf), device_info(device_info) {}
    void apply(std::shared_ptr<ov::Model> func);

 private:
-    Config config;
+    const ExecutionConfig& config;
    cldnn::device_info device_info;
 };

--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp
@ -13,7 +13,7 @@ namespace intel_gpu {
 class VariableState : public InferenceEngine::IVariableStateInternal {
 public:
    VariableState(const std::string& name, const std::vector<cldnn::network::VariableState::Ptr>& states,
-                  std::shared_ptr<cldnn::engine> engine, int currentBatch);
+                  cldnn::engine& engine, int currentBatch);

    /**
     * @brief Reset internal variable state for relevant infer request, to a value specified as
@ -41,7 +41,7 @@ private:
    int currentBatch_;
    std::vector<cldnn::network::VariableState::Ptr> states_;
    InferenceEngine::TensorDesc desc_;
-    std::shared_ptr<cldnn::engine> engine_;
+    cldnn::engine& engine_;
 };

 }  // namespace intel_gpu
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp
@ -49,6 +49,25 @@ inline std::ostream& operator<<(std::ostream& out, const impl_types& impl_type)
    return out;
 }

+inline std::istream& operator>>(std::istream& is, impl_types& impl_type) {
+    std::string str;
+    is >> str;
+    if (str == "cpu") {
+        impl_type = impl_types::cpu;
+    } else if (str == "common") {
+        impl_type = impl_types::common;
+    } else if (str == "ocl") {
+        impl_type = impl_types::ocl;
+    } else if (str == "onednn") {
+        impl_type = impl_types::onednn;
+    } else if (str == "any") {
+        impl_type = impl_types::any;
+    } else {
+        throw ov::Exception{"Unsupported impl type: " + str};
+    }
+    return is;
+}
+
 /// @brief Possible supported shape types.
 enum class shape_types : uint8_t {
    static_shape = 1 << 0,
@ -82,25 +101,35 @@ inline std::ostream& operator<<(std::ostream& out, const shape_types& shape_type
    return out;
 }

-/// @brief Description of primitives implementation.
-struct implementation_desc {
-    format::type output_format;  ///< Output format.
+}  // namespace cldnn
+
+namespace ov {
+namespace intel_gpu {
+
+struct ImplementationDesc {
+    cldnn::format::type output_format;  ///< Output format.
    std::string kernel_name;            ///< GPU kernel name.
-    impl_types impl_type;        ///< GPU implementation type.
+    cldnn::impl_types impl_type;        ///< GPU implementation type.

-    implementation_desc() :
-        output_format(format::any),
+    ImplementationDesc() :
+        output_format(cldnn::format::any),
        kernel_name(""),
-        impl_type(impl_types::any) {}
+        impl_type(cldnn::impl_types::any) {}

-    implementation_desc(format::type output_format,
+    ImplementationDesc(cldnn::format::type output_format,
                        std::string kernel_name,
-                        impl_types impl_type = impl_types::any) :
+                        cldnn::impl_types impl_type = cldnn::impl_types::any) :
        output_format(output_format),
        kernel_name(kernel_name),
        impl_type(impl_type) {}
 };

-using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
+inline std::ostream& operator<<(std::ostream& out, const ImplementationDesc& desc) {
+    out << desc.impl_type << ":" << desc.kernel_name << ":" << desc.output_format;
+    return out;
+}

-}  // namespace cldnn
+using ImplForcingMap = std::map<cldnn::primitive_id, ImplementationDesc>;
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
@ -5,11 +5,12 @@
 #pragma once

 #include "device.hpp"
-#include "engine_configuration.hpp"
 #include "event.hpp"
 #include "memory_caps.hpp"
 #include "memory_pool.hpp"
 #include "layout.hpp"
+#include "execution_config.hpp"
+#include "engine_configuration.hpp"
 #include <threading/ie_cpu_streams_executor.hpp>

 #include <memory>
@ -91,9 +92,6 @@ public:
    /// Checks if the current engine supports speicied allocation @p type
    bool supports_allocation(allocation_type type) const;

-    /// Returns configuration of current engine
-    const engine_configuration& configuration() const { return _configuration; }
-
    /// Returns device structure which represents stores device capabilities
    device_info get_device_info() const;

@ -129,22 +127,23 @@ public:
    uint64_t get_max_memory_size() const;

    /// Create stream object for current engine
-    virtual stream_ptr create_stream() const = 0;
+    virtual stream_ptr create_stream(const ExecutionConfig& config) const = 0;

    /// Creates stream object from user handle
-    virtual stream_ptr create_stream(void *handle) const = 0;
+    virtual stream_ptr create_stream(const ExecutionConfig& config, void *handle) const = 0;

    /// Returns service stream which can be used during program build and optimizations
-    virtual stream& get_program_stream() const = 0;
+    virtual stream& get_service_stream() const = 0;

    virtual allocation_type detect_usm_allocation_type(const void* memory) const = 0;

 #ifdef ENABLE_ONEDNN_FOR_GPU
+    /// Creates onednn engine object which shares device and context with current engine
+    virtual void create_onednn_engine(const ExecutionConfig& config) = 0;
+
    /// Returns onednn engine object which shares device and context with current engine
    virtual dnnl::engine& get_onednn_engine() const = 0;
 #endif
-    /// Return GPU plugin internal task executor
-    const InferenceEngine::ITaskExecutor::Ptr get_task_executor();

    /// Factory method which creates engine object with impl configured by @p engine_type
    /// @param engine_type requested engine type
@ -152,13 +151,7 @@ public:
    /// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported
    /// @param device specifies the device which the engine is created for
    /// @param configuration options for the engine
-    static std::shared_ptr<cldnn::engine> create(engine_types engine_type,
-                                                 runtime_types runtime_type,
-                                                 const device::ptr device,
-                                                 const engine_configuration& configuration = engine_configuration(),
-                                                 const InferenceEngine::ITaskExecutor::Ptr task_executor =
-                                                        std::make_shared<InferenceEngine::CPUStreamsExecutor>(
-                                                                    InferenceEngine::CPUStreamsExecutor::Config()));
+    static std::shared_ptr<cldnn::engine> create(engine_types engine_type, runtime_types runtime_type, const device::ptr device);

    /// Factory method which creates engine object with impl configured by @p engine_type
    /// @param engine_type requested engine type
@ -166,19 +159,12 @@ public:
    /// @param task_executor GPU plugin internal task executor
    /// @param configuration options for the engine
    /// @note engine is created for the first device returned by devices query
-    static std::shared_ptr<cldnn::engine> create(engine_types engine_type,
-                                                 runtime_types runtime_type,
-                                                 const engine_configuration& configuration = engine_configuration(),
-                                                 const InferenceEngine::ITaskExecutor::Ptr task_executor =
-                                                        std::make_shared<InferenceEngine::CPUStreamsExecutor>(
-                                                                    InferenceEngine::CPUStreamsExecutor::Config()));
+    static std::shared_ptr<cldnn::engine> create(engine_types engine_type, runtime_types runtime_type);

 protected:
    /// Create engine for given @p device and @p configuration
-    engine(const device::ptr device, const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor);
-    const InferenceEngine::ITaskExecutor::Ptr _task_executor;
+    engine(const device::ptr device);
    const device::ptr _device;
-    engine_configuration _configuration;
    mutable std::mutex _mutex;

    std::map<allocation_type, std::atomic<uint64_t>> _memory_usage_map;
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp
@ -13,101 +13,23 @@

 namespace cldnn {

-/// @addtogroup cpp_api C++ API
-/// @{
-
-/// @defgroup cpp_engine Execution Engine
-/// @{
-
 /// @brief Defines available engine types
 enum class engine_types : int32_t {
    ocl,
 };

+inline std::ostream& operator<<(std::ostream& os, engine_types type) {
+    switch (type) {
+    case engine_types::ocl: os << "ocl"; break;
+    default: os << "unknown"; break;
+    }
+
+    return os;
+}
+
 /// @brief Defines available runtime types
 enum class runtime_types : int32_t {
    ocl,
 };

-/// @brief Defines available priority mode types
-enum class priority_mode_types : int16_t {
-    disabled,
-    low,
-    med,
-    high
-};
-
-/// @brief Defines available throttle mode types
-enum class throttle_mode_types : int16_t {
-    disabled,
-    low,
-    med,
-    high
-};
-
-/// @brief Defines supported queue types
-enum class queue_types : int16_t {
-    in_order,
-    out_of_order
-};
-
-/// @brief Configuration parameters for created engine.
-struct engine_configuration {
-    const bool enable_profiling;              ///< Enable per-primitive profiling.
-    const queue_types queue_type;             ///< Specifies type of queue used by the runtime
-    const std::string sources_dumps_dir;      ///< Specifies a directory where sources of cldnn::program objects should be dumped.
-                                              ///< Empty by default (means no dumping).
-    const priority_mode_types priority_mode;  ///< Priority mode (support of priority hints in command queue). If cl_khr_priority_hints extension
-                                              ///< is not supported by current OpenCL implementation, the value must be set to cldnn_priority_disabled.
-
-    const throttle_mode_types throttle_mode;  ///< Throttle mode (support of throttle hints in command queue). If cl_khr_throttle_hints extension
-                                              ///< is not supported by current OpenCL implementation, the value must be set to cldnn_throttle_disabled.
-
-    bool use_memory_pool;                     ///< Enables memory usage optimization. memory objects will be reused when possible
-                                              ///< (switched off for older drivers then NEO).
-    bool use_unified_shared_memory;           ///< Enables USM usage
-    const std::string kernels_cache_path;     ///< Path to compiled kernels cache
-    uint16_t throughput_streams;              ///< Number of queues/streams executed in parallel by GPU plugin
-
-    const std::string tuning_cache_path;      ///< Path to tuning kernel cache
-
-    /// @brief Constructs engine configuration with specified options.
-    /// @param enable_profiling Enable per-primitive profiling.
-    /// @param queue_type Specifies type of queue used by the runtime
-    /// @param sources_dumps_dir Specifies a directory where sources of cldnn::program objects should be dumped
-    /// @param priority_mode Priority mode for all streams created within the engine
-    /// @param throttle_mode Throttle mode for all streams created within the engine
-    /// @param use_memory_pool Controls whether engine is allowed to reuse intermediate memory buffers whithin a network
-    /// @param use_unified_shared_memory If this option it true and device supports USM, then engine will use USM for all memory allocations
-    /// @param kernels_cache_path Path to existing directory where plugin can cache compiled kernels
-    /// @param n_threads Max number of host threads used in gpu plugin
-    /// @param throughput_streams Number of queues/streams executed in parallel by GPU plugin
-    /// @param tuning_cache_path Path to tuning kernel cache
-    engine_configuration(
-        bool enable_profiling = false,
-        queue_types queue_type = queue_types::out_of_order,
-        const std::string& sources_dumps_dir = std::string(),
-        priority_mode_types priority_mode = priority_mode_types::med,
-        throttle_mode_types throttle_mode = throttle_mode_types::med,
-        bool use_memory_pool = true,
-        bool use_unified_shared_memory = true,
-        const std::string& kernels_cache_path = "",
-        uint16_t throughput_streams = 1,
-        const std::string& tuning_cache_path = "cache.json")
-        : enable_profiling(enable_profiling)
-        , queue_type(queue_type)
-        , sources_dumps_dir(sources_dumps_dir)
-        , priority_mode(priority_mode)
-        , throttle_mode(throttle_mode)
-        , use_memory_pool(use_memory_pool)
-        , use_unified_shared_memory(use_unified_shared_memory)
-        , kernels_cache_path(kernels_cache_path)
-        , throughput_streams(throughput_streams)
-        , tuning_cache_path(tuning_cache_path) { }
-};
-
-/// @}
-
-/// @}
-
 }  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@ -0,0 +1,162 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/internal_properties.hpp"
+#include "intel_gpu/runtime/device.hpp"
+
+namespace ov {
+namespace intel_gpu {
+
+enum class PropertyVisibility {
+    INTERNAL = 0,
+    PUBLIC = 1
+};
+
+inline std::ostream& operator<<(std::ostream& os, const PropertyVisibility& visibility) {
+    switch (visibility) {
+    case PropertyVisibility::PUBLIC: os << "PUBLIC"; break;
+    case PropertyVisibility::INTERNAL: os << "INTERNAL"; break;
+    default: os << "UNKNOWN"; break;
+    }
+
+    return os;
+}
+
+class BaseValidator {
+public:
+    using Ptr = std::shared_ptr<BaseValidator>;
+    virtual ~BaseValidator() = default;
+    virtual bool is_valid(const ov::Any& v) const = 0;
+};
+
+class FuncValidator : public BaseValidator {
+public:
+explicit FuncValidator(std::function<bool(const ov::Any)> func) : m_func(func) { }
+    bool is_valid(const ov::Any& v) const override {
+        return m_func(v);
+    }
+
+private:
+    std::function<bool(const ov::Any)> m_func;
+};
+
+// PropertyTypeValidator ensures that value can be converted to given property type
+template<typename T>
+class PropertyTypeValidator : public BaseValidator {
+public:
+    bool is_valid(const ov::Any& v) const override {
+        try {
+            v.as<T>();
+            return true;
+        } catch (ov::Exception&) {
+            return false;
+        }
+    }
+};
+
+class ExecutionConfig {
+public:
+    ExecutionConfig();
+    ExecutionConfig(std::initializer_list<ov::AnyMap::value_type> values) : ExecutionConfig() { set_property(ov::AnyMap(values)); }
+    explicit ExecutionConfig(const ov::AnyMap& properties) : ExecutionConfig() { set_property(properties); }
+    explicit ExecutionConfig(const ov::AnyMap::value_type& property) : ExecutionConfig() { set_property(property); }
+
+    void set_default();
+    void set_property(const ov::AnyMap& properties);
+    void set_user_property(const ov::AnyMap& properties);
+    Any get_property(const std::string& name) const;
+    bool is_set_by_user(const std::string& name) const;
+    bool is_supported(const std::string& name) const;
+    void register_property_impl(const std::pair<std::string, ov::Any>& propertiy, PropertyVisibility visibility, BaseValidator::Ptr validator);
+
+    template <PropertyVisibility visibility, typename... PropertyInitializer, typename std::enable_if<(sizeof...(PropertyInitializer) == 0), bool>::type = true>
+    void register_property_impl() { }
+
+    template <PropertyVisibility visibility, typename T,  PropertyMutability mutability, typename ValueT, typename... PropertyInitializer>
+    void register_property_impl(const std::tuple<ov::Property<T, mutability>, ValueT>& property, PropertyInitializer&&... properties) {
+        auto p = std::get<0>(property)(std::get<1>(property));
+        auto v = std::dynamic_pointer_cast<BaseValidator>(std::make_shared<PropertyTypeValidator<T>>());
+        register_property_impl(std::move(p), visibility, std::move(v));
+        register_property_impl<visibility>(properties...);
+    }
+
+    template <PropertyVisibility visibility,
+              typename T,
+              PropertyMutability mutability,
+              typename ValueT,
+              typename ValidatorT,
+              typename... PropertyInitializer>
+    typename std::enable_if<std::is_base_of<BaseValidator, ValidatorT>::value, void>::type
+    register_property_impl(const std::tuple<ov::Property<T, mutability>, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) {
+        auto p = std::get<0>(property)(std::get<1>(property));
+        auto v = std::dynamic_pointer_cast<BaseValidator>(std::make_shared<ValidatorT>(std::get<2>(property)));
+        register_property_impl(std::move(p), visibility, std::move(v));
+        register_property_impl<visibility>(properties...);
+    }
+
+    template <PropertyVisibility visibility,
+              typename T,
+              PropertyMutability mutability,
+              typename ValueT,
+              typename ValidatorT,
+              typename... PropertyInitializer>
+    typename std::enable_if<std::is_same<std::function<bool(const ov::Any&)>, ValidatorT>::value, void>::type
+    register_property_impl(const std::tuple<ov::Property<T, mutability>, ValueT, ValidatorT>& property, PropertyInitializer&&... properties) {
+        auto p = std::get<0>(property)(std::get<1>(property));
+        auto v = std::dynamic_pointer_cast<BaseValidator>(std::make_shared<FuncValidator>(std::get<2>(property)));
+        register_property_impl(std::move(p), visibility, std::move(v));
+        register_property_impl<visibility>(properties...);
+    }
+
+    template <PropertyVisibility visibility, typename... PropertyInitializer>
+    void register_property(PropertyInitializer&&... properties) {
+        register_property_impl<visibility>(properties...);
+    }
+
+    template <typename... Properties>
+    util::EnableIfAllStringAny<void, Properties...> set_property(Properties&&... properties) {
+        set_property(ov::AnyMap{std::forward<Properties>(properties)...});
+    }
+
+    template <typename... Properties>
+    util::EnableIfAllStringAny<void, Properties...> set_user_property(Properties&&... properties) {
+        set_user_property(ov::AnyMap{std::forward<Properties>(properties)...});
+    }
+
+    template <typename T, PropertyMutability mutability>
+    bool is_set_by_user(const ov::Property<T, mutability>& property) const {
+        return is_set_by_user(property.name());
+    }
+
+    template <typename T, PropertyMutability mutability>
+    T get_property(const ov::Property<T, mutability>& property) const {
+        return get_property(property.name()).template as<T>();
+    }
+
+    void apply_user_properties(const cldnn::device_info& info);
+
+    std::string to_string() const;
+
+protected:
+    void apply_hints(const cldnn::device_info& info);
+    void apply_performance_hints(const cldnn::device_info& info);
+    void apply_priority_hints(const cldnn::device_info& info);
+    void apply_debug_options(const cldnn::device_info& info);
+
+private:
+    ov::AnyMap internal_properties;
+    ov::AnyMap user_properties;
+
+    std::map<std::string, PropertyVisibility> supported_properties;
+    std::map<std::string, BaseValidator::Ptr> property_validators;
+};
+
+}  // namespace intel_gpu
+}  // namespace ov
+
+namespace cldnn {
+using ov::intel_gpu::ExecutionConfig;
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp
@ -0,0 +1,99 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/intel_gpu/properties.hpp"
+
+#include "intel_gpu/primitives/implementation_desc.hpp"
+namespace ov {
+namespace intel_gpu {
+
+/**
+ * @brief Read-only property to get GPU driver version
+ */
+static constexpr Property<std::string, PropertyMutability::RO> driver_version{"GPU_DRIVER_VERSION"};
+
+/**
+ * @brief Read-only property to get GPU driver version
+ */
+static constexpr Property<std::string, PropertyMutability::RO> device_id{"GPU_DEVICE_ID"};
+
+enum class QueueTypes : int16_t {
+    in_order,
+    out_of_order
+};
+
+inline std::ostream& operator<<(std::ostream& os, const QueueTypes& val) {
+    switch (val) {
+        case QueueTypes::in_order: os << "in-order"; break;
+        case QueueTypes::out_of_order: os << "out-of-order"; break;
+        default: os << "unknown";
+    }
+
+    return os;
+}
+
+/**
+ * @brief Defines queue type that must be used for model execution
+ */
+static constexpr Property<QueueTypes, PropertyMutability::RW> queue_type{"GPU_QUEUE_TYPE"};
+
+static constexpr Property<bool, PropertyMutability::RW> enable_memory_pool{"GPU_ENABLE_MEMORY_POOL"};
+static constexpr Property<bool, PropertyMutability::RW> optimize_data{"GPU_OPTIMIZE_DATA"};
+static constexpr Property<bool, PropertyMutability::RW> allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"};
+static constexpr Property<bool, PropertyMutability::RW> partial_build_program{"GPU_PARTIAL_BUILD"};
+static constexpr Property<bool, PropertyMutability::RW> allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"};
+static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
+static constexpr Property<std::vector<std::string>, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"};
+
+/// @brief Tuning mode.
+enum class TuningMode {
+    /// @brief Tuning is disabled.
+    tuning_disabled,
+
+    /// @brief Tuning using the cached data (no on-line tuning for non-existing data).
+    tuning_use_cache,
+
+    /// @brief Tuning using the cached data if exist, tune and update cache otherwise.
+    tuning_tune_and_cache,
+
+    /// @brief Tuning using the cached data and update tasks.
+    /// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc.
+    /// No tuning for non-existing data.
+    tuning_use_and_update,
+
+    /// @brief Retune the cache data even if it exists.
+    tuning_retune_and_cache
+};
+
+struct TuningConfig {
+    TuningMode mode;
+    std::string cache_file_path;
+
+    TuningConfig() : mode(TuningMode::tuning_disabled), cache_file_path("") {}
+};
+
+inline std::ostream& operator<<(std::ostream& os, const TuningConfig& val) {
+    os << val.cache_file_path;
+    return os;
+}
+
+static constexpr Property<TuningConfig, PropertyMutability::RW> tuning_config{"GPU_TUNING_CONFIG"};
+
+static constexpr Property<ImplForcingMap, PropertyMutability::RW> force_implementations{"GPU_FORCE_IMPLEMENTATIONS"};
+static constexpr Property<std::string, PropertyMutability::RW> config_file{"CONFIG_FILE"};
+static constexpr Property<bool, PropertyMutability::RW> enable_lp_transformations{"LP_TRANSFORMS_MODE"};
+static constexpr Property<bool, PropertyMutability::RW> enable_dynamic_batch{"DYN_BATCH_ENABLED"};
+static constexpr Property<size_t, PropertyMutability::RW> max_dynamic_batch{"DYN_BATCH_LIMIT"};
+static constexpr Property<bool, PropertyMutability::RW> exclusive_async_requests{"EXCLUSIVE_ASYNC_REQUESTS"};
+static constexpr Property<bool, PropertyMutability::RW> nv12_two_inputs{"GPU_NV12_TWO_INPUTS"};
+
+}  // namespace intel_gpu
+}  // namespace ov
+
+namespace cldnn {
+using ov::intel_gpu::QueueTypes;
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
@ -9,6 +9,8 @@
 #include <functional>
 #include <iostream>

+#include "kernel.hpp"
+
 namespace cldnn {

 struct primitive_impl;
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp
@ -7,6 +7,7 @@
 #include "event.hpp"
 #include "kernel.hpp"
 #include "kernel_args.hpp"
+#include "execution_config.hpp"

 #include <memory>
 #include <vector>
@ -20,7 +21,7 @@ namespace cldnn {
 class stream {
 public:
    using ptr = std::shared_ptr<stream>;
-    explicit stream(queue_types queue_type) : queue_type(queue_type) {}
+    explicit stream(QueueTypes queue_type) : queue_type(queue_type) {}
    virtual ~stream() = default;

    virtual void flush() const = 0;
@ -39,16 +40,16 @@ public:
    virtual event::ptr create_user_event(bool set) = 0;
    virtual event::ptr create_base_event() = 0;

-    queue_types get_queue_type() const { return queue_type; }
+    QueueTypes get_queue_type() const { return queue_type; }

-    static queue_types detect_queue_type(engine_types engine_type, void* queue_handle);
+    static QueueTypes detect_queue_type(engine_types engine_type, void* queue_handle);

 #ifdef ENABLE_ONEDNN_FOR_GPU
-    virtual dnnl::stream& get_onednn_stream() const = 0;
+    virtual dnnl::stream& get_onednn_stream() = 0;
 #endif

 protected:
-    queue_types queue_type;
+    QueueTypes queue_type;
 };

 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/compilation_context.cpp
+++ b/src/plugins/intel_gpu/src/graph/compilation_context.cpp
@ -12,8 +12,8 @@ class CompilationContext : public ICompilationContext {
 public:
    using compilation_queue_t = InferenceEngine::ThreadSafeQueue<ICompilationContext::Task>;

-    CompilationContext(cldnn::engine& engine, size_t program_id) {
-        _kernels_cache = cldnn::make_unique<kernels_cache>(engine, program_id, kernel_selector::KernelBase::get_db().get_batch_header_str());
+    CompilationContext(cldnn::engine& engine, const ExecutionConfig& config, size_t program_id) {
+        _kernels_cache = cldnn::make_unique<kernels_cache>(engine, config, program_id, nullptr, kernel_selector::KernelBase::get_db().get_batch_header_str());
        _worker = std::thread([this](){
            while (!_stop_compilation) {
                CompilationContext::Task task;
@ -47,8 +47,8 @@ private:
    std::atomic_bool _stop_compilation{false};
 };

-std::unique_ptr<ICompilationContext> ICompilationContext::create(cldnn::engine& engine, size_t program_id) {
-    return cldnn::make_unique<CompilationContext>(engine, program_id);
+std::unique_ptr<ICompilationContext> ICompilationContext::create(cldnn::engine& engine, const ExecutionConfig& config, size_t program_id) {
+    return cldnn::make_unique<CompilationContext>(engine, config, program_id);
 }

 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
@ -46,7 +46,7 @@ void add_required_reorders::add_reorder(program& p, program_node* node, program_
 }

 void add_required_reorders::run(program& p) {
-    bool optimize_data = p.get_options().get<build_option_type::optimize_data>()->enabled();
+    bool optimize_data = p.get_config().get_property(ov::intel_gpu::optimize_data);
    auto usr_itr = p.get_processing_order().begin();
    while (usr_itr != p.get_processing_order().end()) {
        auto& usr = *usr_itr++;
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@ -26,7 +26,7 @@ void compile_graph::run(program& p) {
        }
    }

-    auto task_executor = p.get_engine().get_task_executor();
+    auto task_executor = p.get_task_executor();
    auto& proc_order = p.get_processing_order();
    std::vector<InferenceEngine::Task> tasks;
    std::exception_ptr exception;
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp
@ -400,9 +400,9 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no
 }

 void graph_initializations::set_outputs(program& p) {
-    auto outputs_option = p.get_options().get<build_option_type::outputs>();
-    if (!outputs_option->outputs.empty()) {
-        for (auto const& output : outputs_option->outputs) {
+    auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs);
+    if (!custom_outputs.empty()) {
+        for (auto const& output : custom_outputs) {
            auto o_node = p.get_node_ptr(output);
            o_node->set_output(true);
            p.outputs.push_back(o_node.get());
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp
@ -29,7 +29,7 @@ void pre_replace_deconv::run(program& p) {
        if (node->is_type<deconvolution>()) {
            if (node->is_dynamic())
                continue;
-            if (!p.get_options().get<build_option_type::optimize_data>()->enabled())
+            if (!p.get_config().get_property(ov::intel_gpu::optimize_data))
                continue;

            auto& deconv_node = node->as<deconvolution>();
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@ -63,7 +63,7 @@ struct concat_in_place_optimization : pattern_match_optimization_typed<concat_in
 };

 bool concat_noop_optimization::match(concatenation_node& node) {
-    if (node.is_output() && !get_program().is_debug_build())
+    if (node.is_output())
        return false;
    if (node.is_dynamic())
        return false;
@ -82,7 +82,7 @@ bool concat_noop_optimization::optimize(concatenation_node& node) {
 }

 bool concat_in_place_optimization::match(concatenation_node& node) {
-    if (node.is_output() && !get_program().is_debug_build())
+    if (node.is_output())
        return false;
    if (node.has_fused_primitives() || !node.get_fused_activations_funcs().empty())
        return false;
@ -191,8 +191,7 @@ bool concat_in_place_optimization::match(concatenation_node& node) {
        // if an input is marked as network output, prevent optimizations
        // which would affect a form of its output (unless debug flag is set),
        // we also need to restrict input types to those which support padding on all axis
-        if ((input.first->is_output() && !get_program().is_debug_build()) ||
-            !input.first->is_padding_supported(concat_axis, lower_padd_in_axis))
+        if (input.first->is_output() || !input.first->is_padding_supported(concat_axis, lower_padd_in_axis))
            return false;

        // TODO: Investigate if this condition is needed
@ -306,7 +305,6 @@ static bool can_reshape_be_optimized(const reshape_node& node) {

 // ToDo remove friendship relation from  program_node
 void prepare_buffer_fusing::run(program& p) {
-    bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
    /*
    We need to take care of proper ordering by types.
    1. Concats
@ -348,10 +346,10 @@ void prepare_buffer_fusing::run(program& p) {
        if (!can_optimize(node))
            continue;
        // zero copy
-        program_helpers::do_for_types<crop>(*node, [&p, is_debug](crop_node& node) {
+        program_helpers::do_for_types<crop>(*node, [&p](crop_node& node) {
            // if the node is marked as network output, prevent optimizations which would affect a form of its output,
            // unless debug flag is set
-            if (node.is_output() && !is_debug)
+            if (node.is_output())
                return;

            // do not optimize when next node is concatenation which is not output
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@ -227,13 +227,12 @@ void prepare_primitive_fusing::fuse_reorders(program &p) {
 }

 void prepare_primitive_fusing::fuse_activations(program &p) {
-    bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
    std::map<primitive_id, std::vector<std::pair<primitive_id, size_t>>> fusing_history;
    bool use_onednn_impls = false;

 #ifdef ENABLE_ONEDNN_FOR_GPU
    auto& engine = p.get_engine();
-    if (engine.get_device_info().supports_immad && engine.configuration().queue_type == queue_types::in_order)
+    if (engine.get_device_info().supports_immad && p.get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order)
        use_onednn_impls = true;
 #endif

@ -242,7 +241,7 @@ void prepare_primitive_fusing::fuse_activations(program &p) {
        auto node_itr = itr++;
        auto& node = (*node_itr);

-        program_helpers::do_for_types<activation>(*node, [&p, &is_debug, &fusing_history, &use_onednn_impls](activation_node& node) {
+        program_helpers::do_for_types<activation>(*node, [&p, &fusing_history, &use_onednn_impls](activation_node& node) {
            auto& input = node.input();
            auto id = node.id();
            // Restrictions:
@ -251,7 +250,7 @@ void prepare_primitive_fusing::fuse_activations(program &p) {
            // - no activation additional input
            // - input was optimized
            // - can't have fused primitives
-            if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() ||
+            if (node.has_padded_dependency() || input.is_output() || node.is_output() ||
                node.get_dependencies().size() != 1 || input.can_be_optimized() || node.is_constant() ||
                node.has_fused_primitives())
                return;
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
@ -24,7 +24,7 @@ void propagate_constants::run(program& p) {
            handle_constant(p, *node);
    }

-    auto&& to_replace = calculate(p.get_engine(), p.get_options());
+    auto&& to_replace = calculate(p.get_engine(), p.get_config(), p.get_task_executor());

    // remove all nodes which are no longer relevant, i.e. nodes which:
    // 1. are constants, and
@ -108,13 +108,16 @@ bool propagate_constants::has_non_const_user(program_node& node) const {
    return false;
 }

-std::list<std::pair<primitive_id, memory::ptr>> propagate_constants::calculate(engine& engine, build_options bo) {
+std::list<std::pair<primitive_id, memory::ptr>> propagate_constants::calculate(engine& engine,
+                                                                               const ExecutionConfig& config,
+                                                                               std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor) {
    if (!has_non_trivial_constants)
        return {};

-    bo.set_option(build_option::optimize_data(false));
-    bo.set_option(build_option::outputs(const_outputs));
-    network::ptr net = network::build_network(engine, nodes, bo, true);
+    ExecutionConfig cf_config = config;
+    cf_config.set_property(ov::intel_gpu::optimize_data(false));
+    cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs));
+    network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true);
    for (auto& cin : const_inputs)
        net->set_input_data(cin->id(), cin->get_attached_memory_ptr());

--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
@ -30,6 +30,7 @@ void select_preferred_formats::run(program& p) {
        return;

 #ifdef ENABLE_ONEDNN_FOR_GPU
+    engine.create_onednn_engine(p.get_config());
    for (auto n : p.get_processing_order()) {
        // Onednn primitive descriptor creation may fail, for example, due to asymmetric weight.
        try {
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp
@ -75,10 +75,10 @@ public:
        uint32_t dilation_x = dilation.size() >= 1 ? dilation[dilation.size() - 1] : 1;
        params.dilation = {dilation_x, dilation_y, dilation_z};

-        const auto& tuning_config = impl_param.get_program().get_options().get<build_option_type::tuning_config>();
+        const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config);

-        if (tuning_config->config.mode == tuning_mode::tuning_tune_and_cache ||
-            tuning_config->config.mode == tuning_mode::tuning_retune_and_cache) {
+        if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache ||
+            tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) {
            optional_params.tuningParams.runner =
                std::make_shared<gpu::kernel_runner>(impl_param.get_program().get_engine(), impl_param.get_program().get_id(), true);
        }
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp
@ -166,10 +166,10 @@ public:

        auto& kernel_selector = kernel_selector::convolution_kernel_selector::Instance();

-        const auto& tuning_config = arg.get_program().get_options().get<build_option_type::tuning_config>();
+        const auto& tuning_config = impl_param.get_program().get_config().get_property(ov::intel_gpu::tuning_config);

-        if (tuning_config->config.mode == tuning_mode::tuning_tune_and_cache ||
-            tuning_config->config.mode == tuning_mode::tuning_retune_and_cache) {
+        if (tuning_config.mode == ov::intel_gpu::TuningMode::tuning_tune_and_cache ||
+            tuning_config.mode == ov::intel_gpu::TuningMode::tuning_retune_and_cache) {
            conv_optional_params.tuningParams.runner =
                std::make_shared<gpu::kernel_runner>(arg.get_program().get_engine(), arg.get_program().get_id(), true, true);
        }
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp
@ -108,6 +108,7 @@ public:

    static std::unique_ptr<primitive_impl> create(const concatenation_node& arg, const kernel_impl_params& impl_params) {
        auto& engine = impl_params.prog->get_engine();
+        auto& config = impl_params.prog->get_config();
        if (arg.can_be_optimized())
            return make_unique<concatenation_onednn>(engine);
        auto prim = impl_params.typed_desc<concatenation>();
@ -116,7 +117,7 @@ public:

        std::shared_ptr<void> dummy = nullptr;

-        return cldnn::make_unique<concatenation_onednn>(engine, dummy, attr, *desc);
+        return cldnn::make_unique<concatenation_onednn>(engine, config, dummy, attr, *desc);
    }
 };

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
@ -190,11 +190,12 @@ public:

    static std::unique_ptr<primitive_impl> create(const convolution_node& arg, const kernel_impl_params& impl_params) {
        auto& engine = impl_params.prog->get_engine();
+        auto& config = impl_params.prog->get_config();
        auto desc = get_convolution_descriptor(impl_params);
        auto attr = get_primitive_attributes(arg);
        dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr};

-        return cldnn::make_unique<convolution_onednn>(engine, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc, arg.get_transposed()));
+        return cldnn::make_unique<convolution_onednn>(engine, config, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc, arg.get_transposed()));
    }
 };

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp
@ -115,11 +115,12 @@ public:

    static std::unique_ptr<primitive_impl> create(const deconvolution_node& arg, const kernel_impl_params& impl_params) {
        auto& engine = impl_params.prog->get_engine();
+        auto& config = impl_params.prog->get_config();
        auto desc = get_deconvolution_descriptor(impl_params);
        auto attr = get_primitive_attributes(arg);
        dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr};

-        return cldnn::make_unique<deconvolution_onednn>(engine, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc));
+        return cldnn::make_unique<deconvolution_onednn>(engine, config, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc));
    }
 };

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp
@ -177,11 +177,12 @@ public:

    static std::unique_ptr<primitive_impl> create(const fully_connected_node& arg, const kernel_impl_params& impl_params) {
        auto& engine = impl_params.prog->get_engine();
+        auto& config = impl_params.prog->get_config();
        auto desc = get_fully_connected_descriptor(impl_params);
        auto attr = arg.get_onednn_primitive_attributes();
        dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr};

-        return cldnn::make_unique<fully_connected_onednn>(engine, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc));
+        return cldnn::make_unique<fully_connected_onednn>(engine, config, desc, attr, prim_desc, get_weights_reorder(impl_params, prim_desc));
    }
 };

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp
@ -158,11 +158,12 @@ public:

    static std::unique_ptr<primitive_impl> create(const gemm_node& arg, const kernel_impl_params& impl_params) {
        auto& engine = impl_params.prog->get_engine();
+        auto& config = impl_params.prog->get_config();
        auto desc = get_gemm_descriptor(impl_params);
        auto attr = arg.get_onednn_primitive_attributes();
        dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr};

-        return cldnn::make_unique<gemm_onednn>(engine, desc, attr, prim_desc);
+        return cldnn::make_unique<gemm_onednn>(engine, config, desc, attr, prim_desc);
    }
 };

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp
@ -102,11 +102,12 @@ public:

    static std::unique_ptr<primitive_impl> create(const pooling_node& arg, const kernel_impl_params& impl_params) {
        auto& engine = impl_params.prog->get_engine();
+        auto& config = impl_params.prog->get_config();
        auto desc = get_pooling_descriptor(impl_params);
        auto attr = arg.get_onednn_primitive_attributes();
        dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr};

-        return cldnn::make_unique<pooling_onednn>(engine, desc, attr, prim_desc);
+        return cldnn::make_unique<pooling_onednn>(engine, config, desc, attr, prim_desc);
    }
 };

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
@ -40,6 +40,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
    std::unordered_map<uint32_t, std::unordered_map<int, dnnl::memory>> _args;

    typed_primitive_onednn_impl(const engine& engine,
+                                const ExecutionConfig& config,
                                std::shared_ptr<DescType> desc,
                                std::shared_ptr<dnnl::primitive_attr> attrs,
                                const PrimDescType& pd,
@ -49,7 +50,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
          _desc(desc),
          _attrs(attrs),
          _pd(pd) {
-            build_primitive();
+            build_primitive(config);
        }

    typed_primitive_onednn_impl(const engine& engine)
@ -362,8 +363,8 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
    }

 private:
-    std::string get_cache_directory() const {
-        auto path = _engine->configuration().kernels_cache_path;
+    std::string get_cache_directory(const ExecutionConfig& config) const {
+        auto path = config.get_property(ov::cache_dir);
        if (path.empty()) {
            return {};
        }
@ -374,8 +375,8 @@ private:
        return path;
    }

-    std::string generate_cache_path_from_key(std::vector<uint8_t> key) const {
-        auto path = get_cache_directory();
+    std::string generate_cache_path_from_key(const ExecutionConfig& config, std::vector<uint8_t> key) const {
+        auto path = get_cache_directory(config);
        if (path.empty()) {
            return {};
        }
@ -385,8 +386,8 @@ private:
        return path + std::to_string(hash) + ".onednn.cl_cache";
    }

-    void build_primitive() {
-        auto cache_outpath = get_cache_directory();
+    void build_primitive(const ExecutionConfig& config) {
+        auto cache_outpath = get_cache_directory(config);

        if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) {
            if (env_p[0] == '1') {
@ -403,7 +404,7 @@ private:
            std::vector<uint8_t> cache;
            {
                std::lock_guard<std::mutex> lock(cacheAccessMutex);
-                cache = ov::util::load_binary(generate_cache_path_from_key(key));
+                cache = ov::util::load_binary(generate_cache_path_from_key(config, key));
            }

            if (cache.empty()) {
@ -412,7 +413,7 @@ private:

                {
                    std::lock_guard<std::mutex> lock(cacheAccessMutex);
-                    ov::util::save_binary(generate_cache_path_from_key(key), cache);
+                    ov::util::save_binary(generate_cache_path_from_key(config, key), cache);
                }
            } else {
                _prim = PrimType(_pd, cache);
@ -563,9 +564,8 @@ protected:
    event::ptr execute_impl(const std::vector<event::ptr>& /* events */,
                            typed_primitive_inst<PType>& instance) override {
        auto& network = instance.get_network();
-        auto& engine = network.get_engine();
        auto& stream = network.get_stream();
-        auto profiling = engine.configuration().enable_profiling;
+        auto profiling = network.get_config().get_property(ov::enable_profiling);
        auto net_id = network.get_id();
        event::ptr event;

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reduction_onednn.cpp
@ -118,11 +118,12 @@ public:

    static std::unique_ptr<primitive_impl> create(const reduce_node& arg, const kernel_impl_params& impl_params) {
        auto& engine = impl_params.prog->get_engine();
+        auto& config = impl_params.prog->get_config();
        auto desc = get_reduction_descriptor(impl_params);
        auto attr = arg.get_onednn_primitive_attributes();
        dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr};

-        return cldnn::make_unique<reduction_onednn>(engine, desc, attr, prim_desc);
+        return cldnn::make_unique<reduction_onednn>(engine, config, desc, attr, prim_desc);
    }
 };

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp
@ -86,12 +86,13 @@ public:

    static std::unique_ptr<primitive_impl> create(const reorder_node& arg, const kernel_impl_params& impl_params) {
        auto& engine = impl_params.prog->get_engine();
+        auto& config = impl_params.prog->get_config();
        auto attr = arg.get_onednn_primitive_attributes();
        auto desc = get_reorder_descriptor(impl_params, *attr, impl_params.prog->get_engine());

        std::shared_ptr<void> dummy = nullptr;

-        return cldnn::make_unique<reorder_onednn>(engine, dummy, attr, *desc);
+        return cldnn::make_unique<reorder_onednn>(engine, config, dummy, attr, *desc);
    }
 };

--- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
@ -113,7 +113,7 @@ dnnl::memory::desc create_memory_desc_from_format_string(dnnl::memory::dims dims
 template <typename T>
 cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory) {
    auto engine = zp_memory->get_engine();
-    auto& stream = engine->get_program_stream();
+    auto& stream = engine->get_service_stream();

    auto zp_s32_layout = zp_memory->get_layout();
    zp_s32_layout.data_type = data_types::i32;
@ -493,7 +493,7 @@ template <typename T>
 bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) {
    auto ptr = node.get_attached_memory_ptr();
    auto engine = ptr->get_engine();
-    auto& stream = engine->get_program_stream();
+    auto& stream = engine->get_service_stream();
    auto num_elems = node.get_output_layout().count();
    mem_lock<T, mem_lock_type::read> old_data {ptr, stream};
    auto val = old_data[0];
--- a/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp
+++ b/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp
@ -17,7 +17,7 @@ public:
    virtual void cancel() noexcept = 0;
    virtual ~ICompilationContext() = default;

-    static std::unique_ptr<ICompilationContext> create(cldnn::engine& engine, size_t program_id);
+    static std::unique_ptr<ICompilationContext> create(cldnn::engine& engine, const ExecutionConfig& config, size_t program_id);
 };

 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/include/condition_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/condition_inst.h
@ -26,7 +26,7 @@ private:
            add_or_change_input_layout(node);
            _program = program::build_program(node.get_program().get_engine(),
                                              _topology,
-                                              node.get_program().get_options(),
+                                              node.get_program().get_config(),
                                              true);  // rebuild program
        }
        program::ptr get() const { return _program; }
--- a/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h
+++ b/src/plugins/intel_gpu/src/graph/include/kernel_selector_helper.h
@ -101,7 +101,7 @@ kernel_selector::data_layout to_data_layout(format f);
 cldnn::format from_data_layout(kernel_selector::data_layout l);
 kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped);
 cldnn::format::type from_weights_layout(kernel_selector::weights_layout l);
-kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode);
+kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode);
 kernel_selector::data_tensor convert_data_tensor(const layout& l, const tensor view_offset = tensor {});
 kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped = false);
 layout from_weights_tensor(const kernel_selector::weights_tensor& t);
--- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h
+++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h
@ -199,7 +199,7 @@ public:
    void set_optimization_attribute(optimization_attributes_type attribute, int32_t val);
    optimization_attributes get_optimization_attributes() { return _optimization_attributes; }

-    void set_implementation_forcing(const implementation_forcing_map& map);
+    void set_implementation_forcing(const ov::intel_gpu::ImplForcingMap& map);

    void update_formats_map(const convolution_node& node);
    bool is_format_optimized(const convolution_node& node, const format& format, bool use_weak_restrictions = false);
--- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h
@ -311,10 +311,10 @@ public:
            output_names.insert(get_condition_id());
        }

-        auto opts = get_program().get_options();
        std::vector<primitive_id> output_names_vec(output_names.begin(), output_names.end());
-        opts.set_option(build_option::outputs(output_names_vec));
-        body_program = program::build_program(get_program().get_engine(), body, opts, false, false, true);
+        auto config = get_program().get_config();
+        config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
+        body_program = program::build_program(get_program().get_engine(), body, config, false, false, true);
    }

    const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; }
--- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h
+++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
@ -276,7 +276,9 @@ public:

 private:
    void run(program& p) override;
-    std::list<std::pair<primitive_id, memory::ptr>> calculate(engine& engine, build_options bo);
+    std::list<std::pair<primitive_id, memory::ptr>> calculate(engine& engine,
+                                                              const ExecutionConfig& config,
+                                                              std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor);
    bool has_non_const_user(program_node& node) const;
    void handle_constant(program& prog, program_node& node);
    void add_constant(program& prog, program_node& node);
--- a/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h
+++ b/src/plugins/intel_gpu/src/graph/include/program_dump_graph.h
@ -8,8 +8,7 @@
 #include <string>

 namespace cldnn {
-std::string get_dir_path(build_options);
-std::string get_serialization_network_name(build_options);
+std::string get_dir_path(const ExecutionConfig& config);

 void dump_graph_optimized(std::ofstream&, const program&);
 void dump_graph_processing_order(std::ofstream&, const program&);
--- a/src/plugins/intel_gpu/src/graph/kernel_runner.cpp
+++ b/src/plugins/intel_gpu/src/graph/kernel_runner.cpp
@ -162,7 +162,7 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern
 std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_selector::KernelsData& kernels_data) {
    std::vector<std::chrono::nanoseconds> run_times;

-    stream::ptr stream = _engine.create_stream();
+    stream::ptr stream = _engine.create_stream({});

    int num_of_kernels_to_run = static_cast<int>(kernels_data.size());
    int num_of_kernels_run = 0;
@ -174,7 +174,7 @@ std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_se
        batch_end = batch_start + current_compilation_batch;

        std::vector<kernel::ptr> kernels;
-        kernels_cache cache(_engine, program_id);
+        kernels_cache cache(_engine, {}, program_id);

        for (auto it = batch_start; it < batch_end; it++) {
            auto kernel_id = cache.set_kernel_source(it->kernels[0].code.kernelString, false);
--- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp
+++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp
@ -819,17 +819,17 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
    }
 }

-kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode) {
+kernel_selector::tuning_mode to_tuning_mode(ov::intel_gpu::TuningMode mode) {
    switch (mode) {
-        case cldnn::tuning_mode::tuning_disabled:
+        case ov::intel_gpu::TuningMode::tuning_disabled:
            return kernel_selector::tuning_mode::TUNING_DISABLED;
-        case cldnn::tuning_mode::tuning_use_cache:
+        case ov::intel_gpu::TuningMode::tuning_use_cache:
            return kernel_selector::tuning_mode::TUNING_USE_CACHE;
-        case cldnn::tuning_mode::tuning_tune_and_cache:
+        case ov::intel_gpu::TuningMode::tuning_tune_and_cache:
            return kernel_selector::tuning_mode::TUNING_TUNE_AND_CACHE;
-        case cldnn::tuning_mode::tuning_use_and_update:
+        case ov::intel_gpu::TuningMode::tuning_use_and_update:
            return kernel_selector::tuning_mode::TUNING_USE_AND_UPDATE;
-        case cldnn::tuning_mode::tuning_retune_and_cache:
+        case ov::intel_gpu::TuningMode::tuning_retune_and_cache:
            return kernel_selector::tuning_mode::TUNING_RETUNE_AND_CACHE;
        default:
            return kernel_selector::tuning_mode::TUNING_DISABLED;
@ -1041,8 +1041,7 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p
    params.engineInfo.supportedSimdSizes = device_info.supported_simd_sizes;
    params.engineInfo.vendor_id = device_info.vendor_id;

-    auto impl_forcing_bo = program->get_options().get<build_option_type::force_implementations>();
-    const auto& impl_forcing = impl_forcing_bo->forcing;
+    auto impl_forcing = program->get_config().get_property(ov::intel_gpu::force_implementations);

    if (impl_forcing.count(param_info.desc->id) != 0) {
        params.forceImplementation = impl_forcing.at(param_info.desc->id).kernel_name;
@ -1051,14 +1050,14 @@ void set_params(const kernel_impl_params& param_info, kernel_selector::params& p

 void set_optional_params(const program& program, kernel_selector::optional_params& params) {
    params.meaningfulKernelsNames = false;
-    params.allowStaticInputReordering = program.get_options().get<build_option_type::optimize_data>()->enabled() ||
-                                        program.get_options().get<build_option_type::allow_static_input_reorder>()->enabled();
+    params.allowStaticInputReordering = program.get_config().get_property(ov::intel_gpu::optimize_data) ||
+                                        program.get_config().get_property(ov::intel_gpu::allow_static_input_reorder);
    params.allowInputReordering = false;
    params.allowOutputReordering = false;

-    const auto& tuning_config = program.get_options().get<build_option_type::tuning_config>();
-    params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode);
-    params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path;
+    const auto& tuning_config = program.get_config().get_property(ov::intel_gpu::tuning_config);
+    params.tuningParams.mode = to_tuning_mode(tuning_config.mode);
+    params.tuningParams.cacheFilePath = tuning_config.cache_file_path;
 }

 void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@ -1416,7 +1416,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
                const size_t kBatchNum = scores_layout.batch();
                const size_t kClassNum = scores_layout.feature();
                const size_t kNStreams =
-                    static_cast<size_t>(node.get_program().get_engine().configuration().throughput_streams);
+                    static_cast<size_t>(node.get_program().get_config().get_property(ov::streams::num));
                const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast<size_t>(8)) * kNStreams;
                preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu;
            }
@ -1668,7 +1668,7 @@ format layout_optimizer::get_preferred_format(program_node& node) {
    auto output_layout = node.get_output_layout();
    bool use_onednn_impls = _optimization_attributes.use_onednn_impls;

-    bool allow_new_shape_infer = node.get_program().get_options().get<build_option_type::allow_new_shape_infer>()->enabled();
+    bool allow_new_shape_infer = node.get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer);

    if (allow_new_shape_infer) {
        if (node.is_type<shape_of>())
@ -2013,7 +2013,7 @@ bool layout_optimizer::is_format_optimized(const deconvolution_node& node, const
    }
 }

-void layout_optimizer::set_implementation_forcing(const implementation_forcing_map& map) {
+void layout_optimizer::set_implementation_forcing(const ov::intel_gpu::ImplForcingMap& map) {
    for (const auto& kv : map) {
        _forcing_map.emplace(kv.first, std::make_pair(kv.second.output_format, kv.second.impl_type));
    }
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@ -277,8 +277,9 @@ static uint32_t get_unique_net_id() {
 Network will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
 opt pass).
 */
-network::network(program::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream)
+network::network(program::ptr program, const ExecutionConfig& config, stream::ptr stream, bool is_internal, bool is_primary_stream)
    : _program(program)
+    , _config(config)
    , _engine(program->get_engine())
    , _stream(stream)
    , _memory_pool(new memory_pool(program->get_engine()))
@ -304,34 +305,42 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo

    if (is_dynamic()) {
        GPU_DEBUG_DEFINE_MEM_LOGGER("dynamic_network_initialization");
-        _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(program->get_engine(), program->get_id(),
+        _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(program->get_engine(),
+                                                                          program->get_config(),
+                                                                          program->get_id(),
+                                                                          program->get_task_executor(),
                                                                          kernel_selector::KernelBase::get_db().get_batch_header_str()));
        _impls_cache = std::unique_ptr<ImplementationsCache>(new ImplementationsCache(_impls_cache_capacity));
        _in_mem_kernels_cache = std::unique_ptr<KernelsCache>(new KernelsCache(_in_mem_kernels_cache_capacity));
-        _compilation_context = std::move(ICompilationContext::create(program->get_engine(), program->get_id()));
+        _compilation_context = std::move(ICompilationContext::create(program->get_engine(), program->get_config(), program->get_id()));
    }
 }

 network::network(engine& engine,
                 const topology& topo,
-                 const build_options& options,
+                 const ExecutionConfig& config,
                 bool is_internal)
-    : network(program::build_program(engine, topo, options, is_internal), engine.create_stream(), is_internal) {}
+    : network(program::build_program(engine, topo, config, is_internal), config, engine.create_stream(config), is_internal) {}

 network::network(engine& engine,
                 const std::set<std::shared_ptr<program_node>>& nodes,
-                 const build_options& options,
+                 const ExecutionConfig& config,
+                 std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
                 bool is_internal)
-    : network(program::build_program(engine, nodes, options, is_internal), engine.create_stream(), is_internal) {}
+    : network(program::build_program(engine, nodes, config, task_executor, is_internal), config, engine.create_stream(config), is_internal) {}

 network::network(program::ptr program, uint16_t stream_id)
-    : network(program, program->get_engine().create_stream(), false, stream_id == 0) {}
+    : network(program, program->get_config(), program->get_engine().create_stream(program->get_config()), false, stream_id == 0) {}

 network::network(program::ptr program, stream::ptr stream, uint16_t stream_id)
-    : network(program, stream, false, stream_id == 0) {}
+    : network(program, program->get_config(), stream, false, stream_id == 0) {}

 network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engine, uint16_t stream_id)
+    : network(ib, ExecutionConfig{}, stream, engine, stream_id) {}
+
+network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, stream::ptr stream, engine& engine, uint16_t stream_id)
    : _program(nullptr)
+    , _config(config)
    , _engine(engine)
    , _stream(stream)
    , _memory_pool(new memory_pool(engine))
@ -340,7 +349,7 @@ network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engin
    , _reset_arguments(true) {
    net_id = get_unique_net_id();

-    kernels_cache kernels_cache(get_engine(), 0, {""});
+    kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""});
    ib >> kernels_cache;

    int num_data_nodes;
@ -442,7 +451,7 @@ network::~network() {
 //     [ executable primitive_inst ]
 //     [ memory reuse information ]
 void network::save(cldnn::BinaryOutputBuffer& ob) {
-    kernels_cache kernels_cache(get_engine(), 0, {""});
+    kernels_cache kernels_cache(get_engine(), _config, 0, nullptr, {""});
    for (const auto& p_inst : _exec_order) {
        if (p_inst->get_impl() != nullptr)
            kernels_cache.add_kernels(p_inst->get_impl()->get_kernel_ids(), p_inst->get_impl()->get_kernels());
@ -505,26 +514,27 @@ void network::save(cldnn::BinaryOutputBuffer& ob) {
 }

 network::ptr network::allocate_network(stream::ptr stream, program::ptr program, bool is_internal, bool is_primary_stream) {
-    return std::make_shared<network>(program, stream, is_internal, is_primary_stream);
+    return std::make_shared<network>(program, program->get_config(), stream, is_internal, is_primary_stream);
 }

 network::ptr network::allocate_network(engine& engine, program::ptr program, bool is_internal, bool is_primary_stream) {
-    auto stream = engine.create_stream();
-    return std::make_shared<network>(program, stream, is_internal, is_primary_stream);
+    auto stream = engine.create_stream(program->get_config());
+    return std::make_shared<network>(program, program->get_config(), stream, is_internal, is_primary_stream);
 }

 network::ptr network::build_network(engine& engine,
                                    const topology& topology,
-                                    const build_options& options,
+                                    const ExecutionConfig& config,
                                    bool is_internal) {
-    return std::make_shared<network>(engine, topology, options, is_internal);
+    return std::make_shared<network>(engine, topology, config, is_internal);
 }

 network::ptr network::build_network(engine& engine,
                                    const std::set<std::shared_ptr<program_node>>& nodes,
-                                              const build_options& options,
+                                    const ExecutionConfig& config,
+                                    std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
                                    bool is_internal) {
-    return std::make_shared<network>(engine, nodes, options, is_internal);
+    return std::make_shared<network>(engine, nodes, config, task_executor, is_internal);
 }

 void network::validate_primitives() {
@ -963,8 +973,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
    }

    // Store events only in case of OOO queue or enabled Profiling
-    auto store_events = get_stream().get_queue_type() == queue_types::out_of_order ||
-                        get_engine().configuration().enable_profiling;
+    auto store_events = get_stream().get_queue_type() == QueueTypes::out_of_order || _config.get_property(ov::enable_profiling);
    if (store_events) {
        if (_program != nullptr) {
        for (auto& inst : _program->get_processing_order()) {
@ -1113,8 +1122,8 @@ void network::execute_primitive(const std::shared_ptr<primitive_inst>& primitive
    event::ptr ev = primitive->execute(events);

    // Collect events only for OOO queue and Profiling mode
-    if (get_stream().get_queue_type() == queue_types::out_of_order ||
-        get_engine().configuration().enable_profiling) {
+    if (get_stream().get_queue_type() == QueueTypes::out_of_order ||
+        get_config().get_property(ov::enable_profiling)) {
        auto id = primitive->id();
        _events.insert({id, ev});
    }
@ -1203,7 +1212,7 @@ memory::ptr network::get_memory_from_pool(const layout& layout,
                                               std::set<primitive_id> dependencies,
                                               allocation_type type,
                                               bool reusable) {
-    if (get_engine().configuration().use_memory_pool)
+    if (_config.get_property(ov::intel_gpu::enable_memory_pool))
        return _memory_pool->get_memory(layout, id, get_id(), dependencies, type, reusable);
    return _memory_pool->get_memory(layout, type);
 }
--- a/src/plugins/intel_gpu/src/graph/pass_manager.cpp
+++ b/src/plugins/intel_gpu/src/graph/pass_manager.cpp
@ -15,7 +15,7 @@

 pass_manager::pass_manager(program& p) {
    pass_count = 0;
-    auto path = get_dir_path(p.get_options());
+    auto path = get_dir_path(p.get_config());
    if (!path.empty()) {
        graph_opt_log.open(path + std::to_string(p.get_prog_id()) + "_cldnn_graph_optimizer.log");
        if (graph_opt_log.is_open()) {
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@ -96,9 +96,13 @@ void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout
    // check shared image/buffer compatibility, if applicable
    auto params = mem.get_internal_params();
    if (params.mem_type != shared_mem_type::shared_mem_empty) {
-        if (!mem.is_allocated_by(get_network().get_engine())) {
-            CLDNN_ERROR_MESSAGE(_node->id(), "Memory object is not suitable");
-        }
+        auto& net_engine = get_network().get_engine();
+        auto& mem_engine = *mem.get_engine();
+        OPENVINO_ASSERT(mem.is_allocated_by(net_engine), "[GPU] Can't set memory due to engines mismatch. ",
+                        "Network was created for ", &net_engine, " (",
+                        net_engine.get_device_info().dev_name, ") engine",
+                        " while memory object was allocated for ", &mem_engine, "(",
+                        mem_engine.get_device_info().dev_name, ")");

        switch (params.mem_type) {
        case shared_mem_type::shared_mem_vasurface:
@ -182,7 +186,7 @@ void primitive_inst::update_shape() {
        auto& dep = _node->get_dependency(i);
        auto dep_id = dep.id();
        // Events may be not created for in-order queue, so take them for OOO queue only
-        if (_network.has_event(dep.id()) && queue_type == queue_types::out_of_order) {
+        if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) {
            dependencies_events.push_back(_network.get_primitive_event(dep_id));
            GPU_DEBUG_TRACE_DETAIL << id() << ": shape infer waits for " << i << " dependency\n";
        }
@ -192,9 +196,9 @@ void primitive_inst::update_shape() {
    }

    if (has_runtime_deps) {
-        if (!dependencies_events.empty() && queue_type == queue_types::out_of_order) {
+        if (!dependencies_events.empty() && queue_type == QueueTypes::out_of_order) {
            _network.get_stream().wait_for_events(dependencies_events);
-        } else if (queue_type == queue_types::in_order) {
+        } else if (queue_type == QueueTypes::in_order) {
            _network.get_stream().finish();
        }
    }
@ -446,7 +450,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
        dependencies = events;
    } else {
        auto queue_type = get_network().get_stream().get_queue_type();
-        if (queue_type == queue_types::out_of_order) {
+        if (queue_type == QueueTypes::out_of_order) {
            dependencies.reserve(dependencies.size() + _exec_deps.size());
            for (auto& input : _exec_deps) {
                auto id = input->id();
@ -755,7 +759,7 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
                                            uint32_t net_id, bool is_internal, size_t idx) {
    auto get_memory_from_pool = [&](engine& _engine, const layout& layout, const primitive_id id, std::set<primitive_id> dependencies,
            allocation_type type, bool reusable) {
-        if (_engine.configuration().use_memory_pool)
+        if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool))
            return pool.get_memory(layout, id, net_id, dependencies, type, reusable);
        return pool.get_memory(layout, type);
    };
@ -933,10 +937,11 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() {
                in = _node->get_dependency(i).id();
            }
        }
-        build_options bo;
-        bo.set_option(build_option::allow_static_input_reorder(true));
-        bo.set_option(build_option::allow_new_shape_infer(true));
-        auto prog = program::build_program(get_network().get_engine(), t, bo, true, false);
+        ExecutionConfig subgraph_config{
+            ov::intel_gpu::allow_static_input_reorder(true),
+            ov::intel_gpu::allow_new_shape_infer(true)
+        };
+        auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, true, false);

        _unfused_subgraph = network::allocate_network(get_network().get_stream_ptr(), prog, true, get_network().is_primary_stream());
    }
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@ -8,6 +8,8 @@
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "intel_gpu/graph/program.hpp"

+#include <ie_system_conf.h>
+
 #include "kernel_selector_helper.h"
 #include "device_cache_reader.h"
 #include "auto_tuner.h"
@ -98,13 +100,13 @@ using namespace ov::intel_gpu;

 program::program(engine& engine_ref,
                 topology const& topology,
-                 build_options const& options,
+                 const ExecutionConfig& config,
                 bool is_internal,
                 bool no_optimizations,
                 bool is_body_program)
    : _engine(engine_ref),
-      _stream(_engine.create_stream()),
-      options(options),
+      _stream(_engine.create_stream(config)),
+      _config(config),
      processing_order(),
      tuning_cache(nullptr),
      is_body_program(is_body_program),
@ -112,10 +114,13 @@ program::program(engine& engine_ref,
    init_primitives();
    set_options();
    query_local_block_io_supported();
+    _task_executor = make_task_executor(_config);
+
+    GPU_DEBUG_INFO << "Program config\n" << config.to_string();

    pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
    prepare_nodes(topology);
-    _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, prog_id,
+    _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, _config, prog_id, _task_executor,
                                                                      kernel_selector::KernelBase::get_db().get_batch_header_str()));
    program_node::reset_unique_id();

@ -128,11 +133,13 @@ program::program(engine& engine_ref,

 program::program(engine& engine_ref,
                 std::set<std::shared_ptr<program_node>> const& nodes,
-                 build_options const& options,
+                 const ExecutionConfig& config,
+                 std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
                 bool is_internal)
    : _engine(engine_ref),
-      _stream(_engine.create_stream()),
-      options(options),
+      _stream(_engine.create_stream(config)),
+      _config(config),
+      _task_executor(task_executor),
      processing_order(),
      tuning_cache(nullptr),
      is_subgroup_local_block_io_supported(-1) {
@ -140,7 +147,9 @@ program::program(engine& engine_ref,
    set_options();
    query_local_block_io_supported();

-    _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, prog_id,
+    _task_executor = make_task_executor(_config);
+
+    _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, _config, prog_id, _task_executor,
                                                                      kernel_selector::KernelBase::get_db().get_batch_header_str()));
    pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
    prepare_nodes(nodes);
@ -149,8 +158,8 @@ program::program(engine& engine_ref,

 program::program(engine& engine)
    : _engine(engine),
-      _stream(_engine.create_stream()),
-      options(build_options()),
+      _stream(_engine.create_stream({})),
+      _config(),
      processing_order(),
      tuning_cache(nullptr),
      is_subgroup_local_block_io_supported(-1) { }
@ -171,6 +180,42 @@ void program::init_primitives() {
    }
 }

+static void adjust_num_cores(InferenceEngine::CPUStreamsExecutor::Config& config) {
+    if (InferenceEngine::getAvailableCoresTypes().size() == 1) {
+        return;
+    }
+
+    const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores();
+    const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true);
+    const auto total_num_little_cores = total_num_cores - total_num_big_cores;
+    auto core_type = config._threadPreferredCoreType;
+
+    int num_cores = total_num_cores;
+    if (core_type == InferenceEngine::IStreamsExecutor::Config::BIG) {
+        num_cores = total_num_big_cores;
+    } else if (core_type == InferenceEngine::IStreamsExecutor::Config::LITTLE) {
+        num_cores = total_num_little_cores;
+    }
+
+    config._streams = std::min(config._streams, num_cores);
+}
+
+std::shared_ptr<InferenceEngine::CPUStreamsExecutor> program::make_task_executor(const ExecutionConfig& config) const {
+    InferenceEngine::CPUStreamsExecutor::Config task_executor_config("CPU Tasks executor for GPU plugin", 1);
+    task_executor_config._streams = config.get_property(ov::compilation_num_threads);
+    auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority);
+    switch (priority) {
+        case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::LITTLE; break;
+        case ov::hint::Priority::MEDIUM: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::ANY; break;
+        case ov::hint::Priority::HIGH: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::BIG; break;
+        default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
+    }
+
+    adjust_num_cores(task_executor_config);
+
+    return std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
+}
+
 void program::compile() {
    GPU_DEBUG_DEFINE_MEM_LOGGER("compile");
    _kernels_cache->build_all();
@ -190,7 +235,7 @@ void program::load_tuning_cache() {
    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "ProgramImpl::LoadTuningCache");
    GPU_DEBUG_DEFINE_MEM_LOGGER("ProgramImpl::LoadTuningCache");
    try {
-        tuning_cache = kernel_selector::CreateTuningCacheFromFile(get_engine().configuration().tuning_cache_path);
+        tuning_cache = kernel_selector::CreateTuningCacheFromFile("cache.json");
    } catch (...) {
        tuning_cache = std::make_shared<kernel_selector::TuningCache>();
    }
@ -210,18 +255,19 @@ kernels_cache& program::get_kernels_cache() const {

 program::ptr program::build_program(engine& engine,
                                    const topology& topology,
-                                    const build_options& options,
+                                    const ExecutionConfig& config,
                                    bool is_internal,
                                    bool no_optimizations,
                                    bool is_body_program) {
-    return std::make_shared<program>(engine, topology, options, is_internal, no_optimizations, is_body_program);
+    return std::make_shared<program>(engine, topology, config, is_internal, no_optimizations, is_body_program);
 }

 program::ptr program::build_program(engine& engine,
                                    const std::set<std::shared_ptr<program_node>>& nodes,
-                                    const build_options& options,
+                                    const ExecutionConfig& config,
+                                    std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
                                    bool is_internal) {
-    return std::make_shared<program>(engine, nodes, options, is_internal);
+    return std::make_shared<program>(engine, nodes, config, task_executor, is_internal);
 }

 program_node& program::get_node(primitive_id const& id) {
@ -449,20 +495,8 @@ void program::set_options() {
    static std::atomic<uint32_t> id_gen{0};
    prog_id = ++id_gen;
    assert(prog_id != 0);
-
-    if ((options.get<build_option_type::tuning_config>()->config.mode == tuning_mode::tuning_tune_and_cache ||
-         options.get<build_option_type::tuning_config>()->config.mode == tuning_mode::tuning_retune_and_cache) &&
-        !_engine.configuration().enable_profiling) {
-        throw std::invalid_argument("Engine must be created with profiling enabled in tune_and_cache mode!");
-    }
-
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-        options.set_option(cldnn::build_option::graph_dumps_dir(debug_config->dump_graphs));
-    }
-
-    if (!options.get<build_option_type::force_implementations>()->forcing.empty()) {
-        options.set_option(build_option::optimize_data(true));
+    if (!_config.get_property(ov::intel_gpu::force_implementations).empty()) {
+        _config.set_property(ov::intel_gpu::optimize_data(true));
    }
 }

@ -502,7 +536,7 @@ void program::query_local_block_io_supported() {
    kernel_string->batch_compilation = true;

    try {
-        auto _kernels_cache_device_query = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, prog_id,
+        auto _kernels_cache_device_query = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, _config, prog_id, nullptr,
                                                                                            kernel_selector::KernelBase::get_db().get_batch_header_str()));
        auto id = _kernels_cache_device_query->set_kernel_source(kernel_string, false);
        _kernels_cache_device_query->build_all();
@ -533,7 +567,7 @@ void program::build_program(bool is_internal) {
 #endif
        prepare_memory_dependencies();

-        if (options.get<build_option_type::partial_build_program>()->enabled()) {
+        if (_config.get_property(ov::intel_gpu::partial_build_program)) {
            return;
        }

@ -582,7 +616,8 @@ void program::pre_optimize_graph(bool is_internal) {
            node->get_output_layouts();
    }

-    if (options.get<build_option_type::optimize_data>()->enabled()) {
+    bool optimize_data = _config.get_property(ov::intel_gpu::optimize_data);
+    if (optimize_data) {
        apply_opt_pass<prepare_quantization>();
    }

@ -590,7 +625,7 @@ void program::pre_optimize_graph(bool is_internal) {
    set_layout_optimizer_attributes(lo);

    reorder_factory rf;
-    if (options.get<build_option_type::optimize_data>()->enabled()) {
+    if (optimize_data) {
        apply_opt_pass<prepare_primitive_fusing_through>();

        apply_opt_pass<pre_replace_deconv>(lo);
@ -623,7 +658,7 @@ void program::pre_optimize_graph(bool is_internal) {

    apply_opt_pass<prepare_padding>(output_size_handling_enabled);

-    apply_opt_pass<remove_redundant_reorders>(lo, options.get<build_option_type::optimize_data>()->enabled());
+    apply_opt_pass<remove_redundant_reorders>(lo, optimize_data);

    if (!is_internal) {
        // ToDo remove hidden dependencies from propagate_constants pass
@ -631,7 +666,7 @@ void program::pre_optimize_graph(bool is_internal) {
    }

    // try to fuse buffers (i.e. depth_concat in bfyx format) after padding calculations
-    if (options.get<build_option_type::optimize_data>()->enabled()) {
+    if (optimize_data) {
        apply_opt_pass<prepare_buffer_fusing>();
    }

@ -653,17 +688,18 @@ void program::post_optimize_graph(bool is_internal) {

    apply_opt_pass<remove_redundant_reorders>(lo, false, true);  // TODO: do we need it at this place also?

+    auto partial_build = _config.get_property(ov::intel_gpu::partial_build_program);
 #ifdef GPU_DEBUG_CONFIG
    GPU_DEBUG_GET_INSTANCE(debug_config);
-    if (!is_internal && (!options.get<build_option_type::partial_build_program>()->enabled() || !debug_config->dry_run_path.empty())) {
+    if (!is_internal && (!partial_build || !debug_config->dry_run_path.empty())) {
 #else
-    if (!is_internal && !options.get<build_option_type::partial_build_program>()->enabled()) {
+    if (!is_internal && !partial_build) {
 #endif
        // ToDo remove hidden dependencies from propagate_constants pass
        apply_opt_pass<propagate_constants>();
    }

-    if (options.get<build_option_type::optimize_data>()->enabled())
+    if (_config.get_property(ov::intel_gpu::optimize_data))
        apply_opt_pass<remove_redundant_reorders>(lo, false, true, true); // pass to remove output reorders while all others graph optimizations were done

    // update loop input/output primitive mappings
@ -743,17 +779,6 @@ void program::cleanup() {
    for (auto& node : processing_order)
        node->get_output_layout();

-    // in debug build, at the end, mark all nodes as outputs so user can query for buffers of all not-optimized nodes,
-    // including internal ones etc.
-    if (is_debug_build()) {
-        for (auto& node : processing_order) {
-            if (!node->is_output()) {
-                node->set_output(true);
-                outputs.push_back(node);
-            }
-        }
-    }
-
    _kernels_cache->reset();
 }

@ -786,7 +811,7 @@ program::nodes_ordering& program::get_processing_order() { return processing_ord
 const program::nodes_ordering& program::get_processing_order() const { return processing_order; }

 void program::prepare_memory_dependencies() {
-    if (!get_engine().configuration().use_memory_pool)
+    if (!_config.get_property(ov::intel_gpu::enable_memory_pool))
        return;

    apply_opt_pass<basic_memory_dependencies>();
@ -1046,7 +1071,7 @@ bool program::remove_if_dangling(program_node& node) {
    if (!node.dependencies.empty())
        return false;

-    if (!node.is_output() || is_debug_build()) {
+    if (!node.is_output()) {
        if (node.is_input())
            inputs.remove(&node);

@ -1062,7 +1087,7 @@ bool program::extract(program_node& node) {
    if (node.get_dependencies().size() != 1)
        return false;

-    if (node.is_output() && !is_debug_build()) {
+    if (node.is_output()) {
        auto& prev = node.get_dependency(0);
        auto node_id = node.id();

@ -1248,7 +1273,7 @@ void program::remove_nodes(std::vector<program_node*>& to_remove) {
 void program::dump_program(const char* stage,
                           bool with_full_info,
                           std::function<bool(program_node const&)> const& filter) const {
-    std::string path = get_dir_path(options);
+    std::string path = get_dir_path(_config);
    if (path.empty() || !with_full_info) {
        return;
    }
@ -1372,7 +1397,7 @@ program::primitives_info program::get_current_stage_info() const {

 void program::save_pass_info(std::string pass_name) {
    // TODO: Directory path here can be probably changed to some bool flag
-    if (!options.get<build_option_type::graph_dumps_dir>()->directory_path.empty())
+    if (!_config.get_property(ov::intel_gpu::dump_graphs).empty())
        optimizer_passes_info.emplace_back(pass_name, get_current_stage_info());
 }

@ -1400,7 +1425,8 @@ const program::primitives_info& program::get_primitives_info() const { return pr
 void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); }

 void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
-    lo.set_implementation_forcing(options.get<build_option_type::force_implementations>()->forcing);
+    lo.set_implementation_forcing(_config.get_property(ov::intel_gpu::force_implementations));
+

    // first pass to set layout optimization_attributes for topology
    bool can_use_fsv16 = true;
@ -1625,7 +1651,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
    auto& engine = get_engine();
    if (engine.get_device_info().supports_immad &&
        engine.get_device_info().vendor_id == INTEL_VENDOR_ID &&
-        engine.configuration().queue_type == queue_types::in_order)
+        get_config().get_property(ov::intel_gpu::queue_type) == QueueTypes::in_order)
        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 1);
 #endif
 }
--- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp
@ -139,8 +139,8 @@ std::string get_node_id(const program_node* ptr) { return "node_" + std::to_stri
 void dump_full_node(std::ofstream& out, const program_node* node) { out << node->type()->to_string(*node); }
 }  // namespace

-std::string get_dir_path(build_options opts) {
-    auto path = opts.get<build_option_type::graph_dumps_dir>()->directory_path;
+std::string get_dir_path(const ExecutionConfig& config) {
+    auto path = config.get_property(ov::intel_gpu::dump_graphs);
    if (path.empty()) {
        return {};
    }
@ -151,15 +151,6 @@ std::string get_dir_path(build_options opts) {
    return path;
 }

-/// Returns given name for serialization process.
-inline std::string get_serialization_network_name(build_options opts) {
-    return opts.get<build_option_type::serialize_network>()->serialization_network_name;
-}
-
-inline std::string get_load_program_name(build_options opts) {
-    return opts.get<build_option_type::load_program>()->load_program_name;
-}
-
 void dump_graph_init(std::ofstream& graph,
                     const program& program,
                     std::function<bool(program_node const&)> const& filter) {
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@ -245,8 +245,7 @@ bool program_node::is_detached(bool whole_branch) {
 }

 layout program_node::calc_output_layout() const {
-    bool allow_new_shape_infer =
-        get_program().get_options().get<build_option_type::allow_new_shape_infer>()->enabled();
+    bool allow_new_shape_infer = get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
    if (allow_new_shape_infer) {
        auto out_layouts = type()->calc_output_layouts(*this, *get_kernel_impl_params());
        if (!out_layouts.empty()) {
@ -262,8 +261,7 @@ layout program_node::calc_output_layout() const {
 }

 std::vector<layout> program_node::calc_output_layouts() const {
-    bool allow_new_shape_infer =
-        get_program().get_options().get<build_option_type::allow_new_shape_infer>()->enabled();
+    bool allow_new_shape_infer = get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
    if (allow_new_shape_infer) {
        auto out_layouts = type()->calc_output_layouts(*this, *get_kernel_impl_params());
        if (!out_layouts.empty())
@ -802,7 +800,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
                    memory::ptr cur_bin_mem_ptr = cur_node.as<data>().get_attached_memory_ptr();
                    if (cur_bin_mem_ptr == nullptr)
                        throw std::runtime_error("OneDNN post-ops optimization error: nonexistent node for bin + eltw");
-                    auto& stream = cur_bin_mem_ptr->get_engine()->get_program_stream();
+                    auto& stream = cur_bin_mem_ptr->get_engine()->get_service_stream();
                    mem_lock<float, mem_lock_type::read_write> bin_and_eltw_lock(cur_bin_mem_ptr, stream);

                    size_t cur_bin_mem_size = cur_node.get_output_layout().count();
@ -844,7 +842,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
                    memory::ptr prev_bin_mem_ptr = prev_node.as<data>().get_attached_memory_ptr();
                    if (prev_bin_mem_ptr == nullptr)
                        throw std::runtime_error("OneDNN post-ops optimization error: nonexistent node for eltw + bin");
-                    auto& stream = prev_bin_mem_ptr->get_engine()->get_program_stream();
+                    auto& stream = prev_bin_mem_ptr->get_engine()->get_service_stream();
                    mem_lock<float, mem_lock_type::read_write> eltw_and_bin_lock(prev_bin_mem_ptr, stream);

                    size_t prev_bin_mem_size = prev_node.get_output_layout().count();
@ -932,7 +930,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
                    memory::ptr prev_scale_mem_ptr = prev_node.as<data>().get_attached_memory_ptr();
                    if (prev_scale_mem_ptr == nullptr)
                        throw std::runtime_error("OneDNN post-ops optimization error: nonexistent node for eltw + scale");
-                    auto& stream = prev_scale_mem_ptr->get_engine()->get_program_stream();
+                    auto& stream = prev_scale_mem_ptr->get_engine()->get_service_stream();
                    mem_lock<float, mem_lock_type::read_write> eltw_and_scale_lock(prev_scale_mem_ptr, stream);

                    size_t prev_scale_mem_size = prev_node.get_output_layout().count();
--- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
+++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@ -11,6 +11,7 @@
 #include "intel_gpu/plugin/compiled_model.hpp"
 #include "intel_gpu/plugin/async_infer_request.hpp"
 #include "intel_gpu/plugin/async_infer_request_legacy.hpp"
+#include "intel_gpu/plugin/legacy_api_helper.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"

 #include <description_buffer.hpp>
@ -35,30 +36,27 @@ using namespace InferenceEngine::details;
 namespace ov {
 namespace intel_gpu {

-CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config) :
+CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network,
+                             InferenceEngine::RemoteContext::Ptr context,
+                             const ExecutionConfig& config) :
    InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr {
-        if (config.exclusiveAsyncRequests) {
+        if (config.get_property(ov::intel_gpu::exclusive_async_requests)) {
            //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
            return executorManager()->getExecutor("GPU");
-        }  else if (config.throughput_streams > 1) {
+        }  else if (config.get_property(ov::num_streams) > 1) {
            return std::make_shared<InferenceEngine::CPUStreamsExecutor>(
-                IStreamsExecutor::Config{"Intel GPU plugin executor", config.throughput_streams});
+                IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)});
        } else {
            return std::make_shared<InferenceEngine::CPUStreamsExecutor>(
                IStreamsExecutor::Config{"Intel GPU plugin executor", 1});
        }
    }()},
+    m_context(context),
    m_config(config),
    m_taskExecutor{ _taskExecutor },
    m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) {
-    auto casted_context = std::dynamic_pointer_cast<gpu::ClContext>(context);
-
-    OPENVINO_ASSERT((casted_context != nullptr), "Invalid remote context");
-
-    m_context = casted_context;
-
-    auto graph_base = std::make_shared<Graph>(network, m_context, m_config, 0);
-    for (uint16_t n = 0; n < m_config.throughput_streams; n++) {
+    auto graph_base = std::make_shared<Graph>(network, get_context_impl(m_context), m_config, 0);
+    for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
        auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
        m_graphs.push_back(graph);
    }
@ -87,29 +85,27 @@ static InferenceEngine::Layout layout_from_string(const std::string & name) {
    IE_THROW(NetworkNotRead) << "Unknown layout with name '" << name << "'";
 }

-CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config) :
+CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config) :
    InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr {
-        if (config.exclusiveAsyncRequests) {
+        if (config.get_property(ov::intel_gpu::exclusive_async_requests)) {
            //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
            return executorManager()->getExecutor("GPU");
-        }  else if (config.throughput_streams > 1) {
+        }  else if (config.get_property(ov::num_streams) > 1) {
            return std::make_shared<InferenceEngine::CPUStreamsExecutor>(
-                IStreamsExecutor::Config{"Intel GPU plugin executor", config.throughput_streams});
+                IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)});
        } else {
            return std::make_shared<InferenceEngine::CPUStreamsExecutor>(
                IStreamsExecutor::Config{"Intel GPU plugin executor", 1});
        }
    }()},
+    m_context(context),
    m_config(config),
    m_taskExecutor{ _taskExecutor },
    m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) {
-    auto casted_context = std::dynamic_pointer_cast<gpu::ClContext>(context);
+    auto context_impl = get_context_impl(m_context);
+    auto& engine = context_impl->get_engine();

-    OPENVINO_ASSERT((casted_context != nullptr), "Invalid remote context");
-
-    m_context = casted_context;
-
-    cldnn::BinaryInputBuffer ib(networkModel, *getContextImpl(m_context)->GetEngine());
+    cldnn::BinaryInputBuffer ib(networkModel, engine);

    // InputsInfo and OutputsInfor for CNNNetwork
    {
@ -255,8 +251,8 @@ CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptr<Inferen
        setOutputs(new_results);
    }

-    auto graph_base = std::make_shared<Graph>(ib, m_context, m_config, 0);
-    for (uint16_t n = 0; n < m_config.throughput_streams; n++) {
+    auto graph_base = std::make_shared<Graph>(ib, context_impl, m_config, 0);
+    for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
        auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
        m_graphs.push_back(graph);
    }
@ -266,9 +262,9 @@ template <class T>
 IInferRequestInternal::Ptr CompiledModel::GetInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
                                                              const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
    auto ptr = std::make_shared<T>(inputs, outputs, std::static_pointer_cast<CompiledModel>(shared_from_this()));
-    if (m_config.throughput_streams > 1)
+    if (m_config.get_property(ov::num_streams) > 1)
        ptr->EnableStreams();
-    if (m_config.useProfiling)
+    if (m_config.get_property(ov::enable_profiling))
        ptr->EnableProfiling();
    if (m_graphs.front()->use_external_queue())
        ptr->enable_external_queue();
@ -282,9 +278,9 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(InputsDataMap n
    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl");
    auto ptr = std::make_shared<InferRequestLegacy>(networkInputs, networkOutputs,
                                                    std::static_pointer_cast<CompiledModel>(shared_from_this()));
-    if (m_config.throughput_streams > 1)
+    if (m_config.get_property(ov::num_streams) > 1)
        ptr->EnableStreams();
-    if (m_config.useProfiling)
+    if (m_config.get_property(ov::enable_profiling))
        ptr->EnableProfiling();
    if (m_graphs.front()->use_external_queue())
        ptr->enable_external_queue();
@ -469,50 +465,17 @@ std::shared_ptr<ngraph::Function> CompiledModel::GetExecGraphInfo() {
 }

 InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) const {
-    const bool is_new_api = _plugin->IsNewAPI();
-    auto it = m_config.key_config_map.find(name);
-    if (it != m_config.key_config_map.end()) {
-        std::string val = it->second;
-        if (is_new_api) {
-            if (name == ov::enable_profiling) {
-                return val == PluginConfigParams::YES ? true : false;
-            } else if (name == ov::hint::model_priority) {
-                return ov::util::from_string(val, ov::hint::model_priority);
-            } else if (name == ov::intel_gpu::hint::host_task_priority) {
-                return ov::util::from_string(val, ov::intel_gpu::hint::host_task_priority);
-            } else if (name == ov::intel_gpu::hint::queue_priority) {
-                return ov::util::from_string(val, ov::intel_gpu::hint::queue_priority);
-            } else if (name == ov::intel_gpu::hint::queue_throttle) {
-                return ov::util::from_string(val, ov::intel_gpu::hint::queue_throttle);
-            } else if (name == ov::intel_gpu::enable_loop_unrolling) {
-                return val == PluginConfigParams::YES ? true : false;
-            } else if (name == ov::cache_dir) {
-                return ov::util::from_string(val, ov::cache_dir);
-            } else if (name == ov::hint::performance_mode) {
-                return ov::util::from_string(val, ov::hint::performance_mode);
-            } else if (name == ov::compilation_num_threads) {
-                return ov::util::from_string(val, ov::compilation_num_threads);
-            } else if (name == ov::num_streams) {
-                return ov::util::from_string(val, ov::num_streams);
-            } else if (name == ov::hint::num_requests) {
-                return ov::util::from_string(val, ov::hint::num_requests);
-            } else if (name == ov::hint::inference_precision) {
-                return ov::util::from_string(val, ov::hint::inference_precision);
-            } else if (name == ov::device::id) {
-                return ov::util::from_string(val, ov::device::id);
-            } else {
+    auto actual_name = name;
+    if (LegacyAPIHelper::is_legacy_property({name, nullptr}, _plugin->IsNewAPI())) {
+        actual_name = LegacyAPIHelper::convert_legacy_property({name, nullptr}).first;
+    }
+
+    auto val = m_config.get_property(actual_name);
+    if (LegacyAPIHelper::is_legacy_property({name, nullptr}, _plugin->IsNewAPI())) {
+        val = LegacyAPIHelper::convert_to_legacy_property({actual_name, val}).second;
+    }
+
    return val;
-            }
-        } else {
-            if (name == PluginConfigParams::KEY_MODEL_PRIORITY ||
-                name == GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY)
-                return Config::ConvertPropertyToLegacy(name, val);
-            else
-                return val;
-        }
-    } else {
-        IE_THROW() << "Unsupported ExecutableNetwork config key: " << name;
-    }
 }

 InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) const {
@ -550,14 +513,28 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con
        metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
-        std::vector<std::string> configKeys;
-        for (auto && value : m_config.key_config_map)
-            if (!Config::isNewApiProperty(value.first))
-                configKeys.push_back(value.first);
+        static const std::vector<std::string> configKeys {
+            CONFIG_KEY(MODEL_PRIORITY),
+            CONFIG_KEY(PERFORMANCE_HINT),
+            CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS),
+            CONFIG_KEY(PERF_COUNT),
+            CONFIG_KEY(DYN_BATCH_ENABLED),
+            CONFIG_KEY(CONFIG_FILE),
+            CONFIG_KEY(DEVICE_ID),
+            CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
+            CONFIG_KEY(CACHE_DIR),
+            CONFIG_KEY(GPU_THROUGHPUT_STREAMS),
+            GPU_CONFIG_KEY(PLUGIN_PRIORITY),
+            GPU_CONFIG_KEY(PLUGIN_THROTTLE),
+            GPU_CONFIG_KEY(HOST_TASK_PRIORITY),
+            GPU_CONFIG_KEY(NV12_TWO_INPUTS),
+            GPU_CONFIG_KEY(MAX_NUM_THREADS),
+            GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING),
+        };
        IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
    } else if (name == ov::optimal_number_of_infer_requests) {
-        unsigned int nr = m_config.throughput_streams;
-        if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
+        unsigned int nr = m_config.get_property(ov::num_streams);
+        if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY)
            nr *= 2;
        return decltype(ov::optimal_number_of_infer_requests)::value_type {nr};
    } else if (name == ov::execution_devices) {
--- a/src/plugins/intel_gpu/src/plugin/device_config.cpp
+++ b/src/plugins/intel_gpu/src/plugin/device_config.cpp
@ -1,499 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "intel_gpu/plugin/device_config.hpp"
-
-#include <ie_system_conf.h>
-#include <sys/stat.h>
-
-#include <gpu/gpu_config.hpp>
-#include <thread>
-
-#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
-#include "file_utils.h"
-#include "ie_api.h"
-#include "intel_gpu/runtime/itt.hpp"
-#include "openvino/runtime/intel_gpu/properties.hpp"
-#include <openvino/util/common_util.hpp>
-
-#ifdef _WIN32
-#    include <direct.h>
-#    ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
-#        define mkdir(dir, mode) _wmkdir(dir)
-#    else
-#        define mkdir(dir, mode) _mkdir(dir)
-#    endif  // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
-#endif      // _WIN32
-
-using namespace InferenceEngine;
-
-namespace ov {
-namespace intel_gpu {
-
-static void createDirectory(std::string _path) {
-#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
-    std::wstring widepath = ov::util::string_to_wstring(_path.c_str());
-    const wchar_t* path = widepath.c_str();
-#else
-    const char* path = _path.c_str();
-#endif
-
-    auto err = mkdir(path, 0755);
-    if (err != 0 && errno != EEXIST) {
-        IE_THROW() << "Couldn't create directory! (err=" << err << "; errno=" << errno << ")";
-    }
-}
-
-static int getNumberOfCores(const IStreamsExecutor::Config::PreferredCoreType core_type) {
-    const auto total_num_cores = getNumberOfLogicalCPUCores();
-    const auto total_num_big_cores = getNumberOfLogicalCPUCores(true);
-    const auto total_num_little_cores = total_num_cores - total_num_big_cores;
-
-    int num_cores = total_num_cores;
-    if (core_type == IStreamsExecutor::Config::BIG) {
-        num_cores = total_num_big_cores;
-    } else if (core_type == IStreamsExecutor::Config::LITTLE) {
-        num_cores = total_num_little_cores;
-    }
-    return num_cores;
-}
-
-IE_SUPPRESS_DEPRECATED_START
-void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap, const cldnn::device_info& info) {
-    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::UpdateFromMap");
-    for (auto& kvp : configMap) {
-        std::string key = kvp.first;
-        std::string val = kvp.second;
-        const auto hints = perfHintsConfig.SupportedKeys();
-        if (hints.end() != std::find(hints.begin(), hints.end(), key)) {
-            perfHintsConfig.SetConfig(key, val);
-        } else if (key == ov::hint::inference_precision) {
-            std::stringstream ss(val);
-            ss >> inference_precision;
-            OPENVINO_ASSERT(inference_precision == ov::element::f16 ||
-                            inference_precision == ov::element::f32 ||
-                            inference_precision == ov::element::undefined,
-                            "Unexpected inference precision set: ", inference_precision);
-        } else if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0 || key == ov::enable_profiling) {
-            if (val.compare(PluginConfigParams::YES) == 0) {
-                useProfiling = true;
-            } else if (val.compare(PluginConfigParams::NO) == 0) {
-                useProfiling = false;
-            } else {
-                IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
-            }
-        } else if (key.compare(PluginConfigParams::KEY_DYN_BATCH_ENABLED) == 0) {
-            if (val.compare(PluginConfigParams::YES) == 0) {
-                enableDynamicBatch = true;
-            } else if (val.compare(PluginConfigParams::NO) == 0) {
-                enableDynamicBatch = false;
-            } else {
-                IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
-            }
-        } else if (key.compare(GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY) == 0) {
-            std::stringstream ss(val);
-            uint32_t uVal(0);
-            ss >> uVal;
-            if (ss.fail()) {
-                IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
-            }
-            switch (uVal) {
-            case 0:
-            case 2:
-                queuePriority = cldnn::priority_mode_types::med;
-                break;
-            case 1:
-                queuePriority = cldnn::priority_mode_types::low;
-                break;
-            case 3:
-                queuePriority = cldnn::priority_mode_types::high;
-                break;
-            default:
-                IE_THROW(ParameterMismatch) << "Unsupported queue priority value: " << uVal;
-            }
-        } else if (key == ov::intel_gpu::hint::queue_priority) {
-            std::stringstream ss(val);
-            ov::hint::Priority priority;
-            ss >> priority;
-            if (priority == ov::hint::Priority::HIGH)
-                queuePriority = cldnn::priority_mode_types::high;
-            else if (priority == ov::hint::Priority::MEDIUM)
-                queuePriority = cldnn::priority_mode_types::med;
-            else
-                queuePriority = cldnn::priority_mode_types::low;
-        } else if (key.compare(PluginConfigParams::KEY_MODEL_PRIORITY) == 0 || key == ov::hint::model_priority) {
-            if (val.compare(PluginConfigParams::MODEL_PRIORITY_HIGH) == 0 ||
-                val.compare(ov::util::to_string(ov::hint::Priority::HIGH)) == 0) {
-                queuePriority = cldnn::priority_mode_types::high;
-                task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::BIG;
-            } else if (val.compare(PluginConfigParams::MODEL_PRIORITY_MED) == 0 ||
-                       val.compare(ov::util::to_string(ov::hint::Priority::MEDIUM)) == 0) {
-                queuePriority = cldnn::priority_mode_types::med;
-                task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::ANY;
-            } else if (val.compare(PluginConfigParams::MODEL_PRIORITY_LOW) == 0 ||
-                       val.compare(ov::util::to_string(ov::hint::Priority::LOW)) == 0) {
-                queuePriority = cldnn::priority_mode_types::low;
-                task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::LITTLE;
-            } else {
-                IE_THROW() << "Not found appropriate value for config key " << PluginConfigParams::KEY_MODEL_PRIORITY
-                           << ".\n";
-            }
-            if (getAvailableCoresTypes().size() > 1) {
-                if (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG ||
-                    task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::LITTLE) {
-                    task_exec_config._streams = std::min(task_exec_config._streams,
-                                                         getNumberOfCores(task_exec_config._threadPreferredCoreType));
-                }
-            } else {
-                task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::ANY;
-                task_exec_config._streams =
-                    std::min(task_exec_config._streams, static_cast<int>(std::thread::hardware_concurrency()));
-            }
-        } else if (key.compare(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) == 0) {
-            std::stringstream ss(val);
-            uint32_t uVal(0);
-            ss >> uVal;
-            if (ss.fail()) {
-                IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
-            }
-            switch (uVal) {
-            case 0:
-            case 2:
-                queueThrottle = cldnn::throttle_mode_types::med;
-                break;
-            case 1:
-                queueThrottle = cldnn::throttle_mode_types::low;
-                break;
-            case 3:
-                queueThrottle = cldnn::throttle_mode_types::high;
-                break;
-            default:
-                IE_THROW(ParameterMismatch) << "Unsupported queue throttle value: " << uVal;
-            }
-        } else if (key == ov::intel_gpu::hint::queue_throttle) {
-            std::stringstream ss(val);
-            ov::intel_gpu::hint::ThrottleLevel throttle;
-            ss >> throttle;
-            if (throttle == ov::intel_gpu::hint::ThrottleLevel::HIGH)
-                queueThrottle = cldnn::throttle_mode_types::high;
-            else if (throttle == ov::intel_gpu::hint::ThrottleLevel::MEDIUM)
-                queueThrottle = cldnn::throttle_mode_types::med;
-            else
-                queueThrottle = cldnn::throttle_mode_types::low;
-        } else if (key.compare(PluginConfigParams::KEY_CONFIG_FILE) == 0) {
-            std::stringstream ss(val);
-            std::istream_iterator<std::string> begin(ss);
-            std::istream_iterator<std::string> end;
-            std::vector<std::string> configFiles(begin, end);
-            for (auto& file : configFiles) {
-                CustomLayer::LoadFromFile(file, customLayers);
-            }
-        } else if (key.compare(PluginConfigParams::KEY_CACHE_DIR) == 0 || key == ov::cache_dir) {
-            if (!val.empty()) {
-                kernels_cache_dir = val;
-                createDirectory(kernels_cache_dir);
-            }
-        } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) {
-            if (val.compare(PluginConfigParams::YES) == 0) {
-                exclusiveAsyncRequests = true;
-            } else if (val.compare(PluginConfigParams::NO) == 0) {
-                exclusiveAsyncRequests = false;
-            } else {
-                IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
-            }
-        } else if (key.compare(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) == 0 || key == ov::num_streams) {
-            if (val.compare(PluginConfigParams::GPU_THROUGHPUT_AUTO) == 0 ||
-                val.compare(ov::util::to_string(ov::streams::AUTO)) == 0) {
-                throughput_streams = std::max(GetDefaultNStreamsForThroughputMode(), info.num_ccs);
-            } else {
-                int val_i;
-                try {
-                    val_i = std::stoi(val);
-                } catch (const std::exception&) {
-                    IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS
-                               << ". Expected only positive numbers (#streams) or "
-                               << "PluginConfigParams::GPU_THROUGHPUT_AUTO";
-                }
-                if (val_i > 0)
-                    throughput_streams = static_cast<uint16_t>(val_i);
-            }
-        } else if (key.compare(PluginConfigParams::KEY_DEVICE_ID) == 0 || key == ov::device::id) {
-            // Validate if passed value is postivie number.
-            try {
-                int val_i = std::stoi(val);
-                (void)val_i;
-            } catch (const std::exception&) {
-                IE_THROW() << "Wrong value for property key " << ov::device::id.name()
-                           << ". DeviceIDs are only represented by positive numbers";
-            }
-            // Set this value.
-            device_id = val;
-        } else if (key.compare(PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE) == 0) {
-            if (val.compare(PluginConfigParams::YES) == 0) {
-                enableInt8 = true;
-            } else if (val.compare(PluginConfigParams::NO) == 0) {
-                enableInt8 = false;
-            } else {
-                IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
-            }
-        } else if (key.compare(GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS) == 0) {
-            if (val.compare(PluginConfigParams::YES) == 0) {
-                nv12_two_inputs = true;
-            } else if (val.compare(PluginConfigParams::NO) == 0) {
-                nv12_two_inputs = false;
-            } else {
-                IE_THROW(NotFound) << "Unsupported NV12 flag value: " << val;
-            }
-        } else if (key.compare(GPUConfigParams::KEY_GPU_MAX_NUM_THREADS) == 0 || key == ov::compilation_num_threads) {
-            int max_threads = std::max(1, static_cast<int>(std::thread::hardware_concurrency()));
-            try {
-                int val_i = std::stoi(val);
-                if (val_i <= 0 || val_i > max_threads) {
-                    val_i = max_threads;
-                }
-                task_exec_config._streams = std::min(task_exec_config._streams, val_i);
-            } catch (const std::exception&) {
-                IE_THROW() << "Wrong value for property key " << GPUConfigParams::KEY_GPU_MAX_NUM_THREADS << ": " << val
-                           << "\nSpecify the number of threads use for build as an integer."
-                           << "\nOut of range value will be set as a default value, maximum concurrent threads.";
-            }
-        } else if (key.compare(GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING) == 0 ||
-                   key == ov::intel_gpu::enable_loop_unrolling) {
-            if (val.compare(PluginConfigParams::YES) == 0) {
-                enable_loop_unrolling = true;
-            } else if (val.compare(PluginConfigParams::NO) == 0) {
-                enable_loop_unrolling = false;
-            } else {
-                IE_THROW(ParameterMismatch) << "Unsupported KEY_GPU_ENABLE_LOOP_UNROLLING flag value: " << val;
-            }
-        } else if (key.compare(GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY) == 0 ||
-                   key == ov::intel_gpu::hint::host_task_priority) {
-            if (val.compare(GPUConfigParams::GPU_HOST_TASK_PRIORITY_HIGH) == 0 ||
-                val.compare(ov::util::to_string(ov::hint::Priority::HIGH)) == 0) {
-                task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::BIG;
-            } else if (val.compare(GPUConfigParams::GPU_HOST_TASK_PRIORITY_MEDIUM) == 0 ||
-                       val.compare(ov::util::to_string(ov::hint::Priority::MEDIUM)) == 0) {
-                task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::ANY;
-            } else if (val.compare(GPUConfigParams::GPU_HOST_TASK_PRIORITY_LOW) == 0 ||
-                       val.compare(ov::util::to_string(ov::hint::Priority::LOW)) == 0) {
-                task_exec_config._threadPreferredCoreType = IStreamsExecutor::Config::LITTLE;
-            } else {
-                IE_THROW(NotFound) << "Unsupported host task priority by plugin: " << val;
-            }
-        } else {
-            IE_THROW(NotFound) << "Unsupported property key by plugin: " << key;
-        }
-
-        adjustKeyMapValues();
-    }
-}
-
-void Config::adjustKeyMapValues() {
-    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::AdjustKeyMapValues");
-    if (useProfiling) {
-        key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
-        key_config_map[ov::enable_profiling.name()] = PluginConfigParams::YES;
-    } else {
-        key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::NO;
-        key_config_map[ov::enable_profiling.name()] = PluginConfigParams::NO;
-    }
-
-    if (exclusiveAsyncRequests)
-        key_config_map[PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = PluginConfigParams::YES;
-    else
-        key_config_map[PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = PluginConfigParams::NO;
-
-    if (enableDynamicBatch)
-        key_config_map[PluginConfigParams::KEY_DYN_BATCH_ENABLED] = PluginConfigParams::YES;
-    else
-        key_config_map[PluginConfigParams::KEY_DYN_BATCH_ENABLED] = PluginConfigParams::NO;
-
-    if (nv12_two_inputs) {
-        key_config_map[GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS] = PluginConfigParams::YES;
-    } else {
-        key_config_map[GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS] = PluginConfigParams::NO;
-    }
-
-    key_config_map[ov::hint::inference_precision.name()] = inference_precision.get_type_name();
-
-    {
-        if (queuePriority == cldnn::priority_mode_types::high &&
-            (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG ||
-             getAvailableCoresTypes().size() == 1)) {
-            key_config_map[ov::hint::model_priority.name()] =
-                ov::util::to_string(ov::hint::Priority::HIGH);
-        } else if (queuePriority == cldnn::priority_mode_types::low &&
-                   (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::LITTLE ||
-                    getAvailableCoresTypes().size() == 1)) {
-            key_config_map[ov::hint::model_priority.name()] =
-                ov::util::to_string(ov::hint::Priority::LOW);
-        } else if (queuePriority == cldnn::priority_mode_types::med &&
-                   task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::ANY) {
-            key_config_map[ov::hint::model_priority.name()] =
-                ov::util::to_string(ov::hint::Priority::MEDIUM);
-        }
-    }
-    {
-        std::string qp = "0";
-        switch (queuePriority) {
-        case cldnn::priority_mode_types::low:
-            qp = "1";
-            break;
-        case cldnn::priority_mode_types::med:
-            qp = "2";
-            break;
-        case cldnn::priority_mode_types::high:
-            qp = "3";
-            break;
-        default:
-            break;
-        }
-        key_config_map[GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY] = qp;
-    }
-    {
-        std::string priority;
-        if (queuePriority == cldnn::priority_mode_types::high)
-            priority = ov::util::to_string(ov::hint::Priority::HIGH);
-        else if (queuePriority == cldnn::priority_mode_types::low)
-            priority = ov::util::to_string(ov::hint::Priority::LOW);
-        else
-            priority = ov::util::to_string(ov::hint::Priority::MEDIUM);
-        key_config_map[ov::intel_gpu::hint::queue_priority.name()] = priority;
-    }
-    {
-        std::string qt = "0";
-        switch (queueThrottle) {
-        case cldnn::throttle_mode_types::low:
-            qt = "1";
-            break;
-        case cldnn::throttle_mode_types::med:
-            qt = "2";
-            break;
-        case cldnn::throttle_mode_types::high:
-            qt = "3";
-            break;
-        default:
-            break;
-        }
-        key_config_map[GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE] = qt;
-    }
-    {
-        std::string throttleLevel;
-        if (queueThrottle == cldnn::throttle_mode_types::high)
-            throttleLevel = ov::util::to_string(ov::intel_gpu::hint::ThrottleLevel::HIGH);
-        else if (queueThrottle == cldnn::throttle_mode_types::low)
-            throttleLevel = ov::util::to_string(ov::intel_gpu::hint::ThrottleLevel::LOW);
-        else
-            throttleLevel = ov::util::to_string(ov::intel_gpu::hint::ThrottleLevel::MEDIUM);
-        key_config_map[ov::intel_gpu::hint::queue_throttle.name()] = throttleLevel;
-    }
-    {
-        std::string hostTaskPriority;
-        if (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::LITTLE)
-            hostTaskPriority = ov::util::to_string(ov::hint::Priority::LOW);
-        else if (task_exec_config._threadPreferredCoreType == IStreamsExecutor::Config::BIG)
-            hostTaskPriority = ov::util::to_string(ov::hint::Priority::HIGH);
-        else
-            hostTaskPriority = ov::util::to_string(ov::hint::Priority::MEDIUM);
-        key_config_map[ov::intel_gpu::hint::host_task_priority.name()] = hostTaskPriority;
-    }
-
-    key_config_map[PluginConfigParams::KEY_CACHE_DIR] = kernels_cache_dir;
-    key_config_map[ov::cache_dir.name()] = kernels_cache_dir;
-
-    key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams);
-    key_config_map[ov::num_streams.name()] = std::to_string(throughput_streams);
-
-    key_config_map[PluginConfigParams::KEY_DEVICE_ID] = device_id;
-    key_config_map[ov::device::id.name()] = device_id;
-
-    key_config_map[PluginConfigParams::KEY_CONFIG_FILE] = "";
-
-    key_config_map[GPUConfigParams::KEY_GPU_MAX_NUM_THREADS] = std::to_string(task_exec_config._streams);
-    key_config_map[ov::compilation_num_threads.name()] = std::to_string(task_exec_config._streams);
-
-    if (enable_loop_unrolling) {
-        key_config_map[GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING] = PluginConfigParams::YES;
-        key_config_map[ov::intel_gpu::enable_loop_unrolling.name()] = PluginConfigParams::YES;
-    } else {
-        key_config_map[GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING] = PluginConfigParams::NO;
-        key_config_map[ov::intel_gpu::enable_loop_unrolling.name()] = PluginConfigParams::NO;
-    }
-
-    key_config_map[PluginConfigParams::KEY_PERFORMANCE_HINT] = perfHintsConfig.ovPerfHint;
-    key_config_map[ov::hint::performance_mode.name()] = perfHintsConfig.ovPerfHint;
-
-    key_config_map[PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS] =
-        std::to_string(perfHintsConfig.ovPerfHintNumRequests);
-}
-
-bool Config::isNewApiProperty(std::string property) {
-    static const std::set<std::string> new_api_keys{
-        ov::intel_gpu::hint::queue_priority.name(),
-        ov::intel_gpu::hint::queue_throttle.name(),
-        ov::hint::inference_precision.name(),
-        ov::compilation_num_threads.name(),
-        ov::num_streams.name(),
-    };
-    return new_api_keys.find(property) != new_api_keys.end();
-}
-
-std::string Config::ConvertPropertyToLegacy(const std::string& key, const std::string& value) {
-    if (key == PluginConfigParams::KEY_MODEL_PRIORITY) {
-        auto priority = ov::util::from_string(value, ov::hint::model_priority);
-        if (priority == ov::hint::Priority::HIGH)
-            return PluginConfigParams::MODEL_PRIORITY_HIGH;
-        else if (priority == ov::hint::Priority::MEDIUM)
-            return PluginConfigParams::MODEL_PRIORITY_MED;
-        else if (priority == ov::hint::Priority::LOW)
-            return PluginConfigParams::MODEL_PRIORITY_LOW;
-    } else if (key == GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY) {
-        auto priority = ov::util::from_string(value, ov::intel_gpu::hint::host_task_priority);
-        if (priority == ov::hint::Priority::HIGH)
-            return GPUConfigParams::GPU_HOST_TASK_PRIORITY_HIGH;
-        else if (priority == ov::hint::Priority::MEDIUM)
-            return GPUConfigParams::GPU_HOST_TASK_PRIORITY_MEDIUM;
-        else if (priority == ov::hint::Priority::LOW)
-            return GPUConfigParams::GPU_HOST_TASK_PRIORITY_LOW;
-    }
-    IE_THROW() << "Unsupported value for legacy key : " << key;
-}
-
-bool Config::CanShareContextWith(const Config& other) const {
-    return this->throughput_streams == other.throughput_streams &&
-           this->useProfiling == other.useProfiling &&
-           this->dumpCustomKernels == other.dumpCustomKernels &&
-           this->queueThrottle == other.queueThrottle &&
-           this->queuePriority == other.queuePriority &&
-           this->kernels_cache_dir == other.kernels_cache_dir &&
-           this->device_id == other.device_id &&
-           this->task_exec_config._streams == other.task_exec_config._streams &&
-           this->task_exec_config._threadPreferredCoreType == other.task_exec_config._threadPreferredCoreType &&
-           this->enable_loop_unrolling == other.enable_loop_unrolling;
-}
-
-void Configs::CreateConfig(std::string device_id) {
-    if (configs.find(device_id) == configs.end()) {
-        configs.emplace(device_id, Config(device_id));
-    }
-}
-
-Config& Configs::GetConfig(std::string device_id) {
-    if (device_id.empty()) {
-        return GetDefaultDeviceConfig();
-    }
-    if (configs.find(device_id) == configs.end()) {
-        IE_THROW() << "Config for device with " << device_id << " ID is not registered in GPU plugin";
-    }
-    return configs.find(device_id)->second;
-}
-
-Config& Configs::GetDefaultDeviceConfig() {
-    return GetConfig(default_device_id);
-}
-
-IE_SUPPRESS_DEPRECATED_END
-
-}  // namespace intel_gpu
-}  // namespace ov
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@ -45,32 +45,32 @@ using namespace InferenceEngine::details;
 namespace ov {
 namespace intel_gpu {

-Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id)
+Graph::Graph(InferenceEngine::CNNNetwork& network, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id)
    : m_context(context)
    , m_networkName(network.getName())
    , m_config(config)
    , m_stream_id(stream_id)
    , m_state(0) {
-    m_program = std::make_shared<Program>(network, GetEngine(), m_config);
+    m_program = std::make_shared<Program>(network, get_engine(), config);
    if (m_program->m_max_batch > 1)
-        m_config.max_dynamic_batch = m_program->m_max_batch;
+        m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch));
    Build();
 }

-Graph::Graph(cldnn::BinaryInputBuffer &ib, gpu::ClContext::Ptr context, Config config, uint16_t stream_id)
+Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id)
    : m_context(context)
    , m_config(config)
    , m_stream_id(stream_id)
    , m_state(0) {
-    m_program = std::make_shared<Program>(GetEngine(), m_config);
+    m_program = std::make_shared<Program>(get_engine(), config);
    if (m_program->m_max_batch > 1)
-        m_config.max_dynamic_batch = m_program->m_max_batch;
+        m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch));

    ib >> m_program->inputLayouts;
    ib >> primitiveIDs;
    ib >> outputDims;

-    m_networks.emplace_back(std::make_shared<cldnn::network>(ib, GetEngine()->create_stream(), *GetEngine(), m_stream_id));
+    m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id));
 }

 Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
@ -130,21 +130,19 @@ void Graph::Build() {
 }

 bool Graph::use_external_queue() const {
-    auto impl = getContextImpl(m_context);
-    return impl->GetExternalQueue() != nullptr;
+    return m_context->get_external_queue() != nullptr;
 }

 std::shared_ptr<cldnn::network> Graph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::BuildNetwork");
    std::shared_ptr<cldnn::network> network = nullptr;

-    auto impl = getContextImpl(m_context);
-    auto externalQueue = impl->GetExternalQueue();
+    auto externalQueue = m_context->get_external_queue();
    if (externalQueue) {
-        if (m_config.throughput_streams != 1)
+        if (m_config.get_property(ov::num_streams) != 1)
            IE_THROW(ParameterMismatch) << "Throughput streams can't be used with shared queue!\n";
-        auto &engine = m_program->GetEngine();
-        network = std::make_shared<cldnn::network>(program, engine.create_stream(externalQueue), m_stream_id);
+        auto &engine = m_program->get_engine();
+        network = std::make_shared<cldnn::network>(program, engine.create_stream(m_config, externalQueue), m_stream_id);
    } else {
        network = std::make_shared<cldnn::network>(program, m_stream_id);
    }
@ -164,7 +162,7 @@ Graph::variable_states_map Graph::AllocateVariablesMemories() {
        std::vector<cldnn::network::VariableState::Ptr> memoryStates;
        memoryStates.reserve(orderedLayouts.size());
        for (const auto& layout : orderedLayouts)
-            memoryStates.push_back(std::make_shared<cldnn::network::VariableState>(GetEngine()->allocate_memory(layout, false)));
+            memoryStates.push_back(std::make_shared<cldnn::network::VariableState>(get_engine().allocate_memory(layout, false)));
        states.insert({memStateInfo.first, memoryStates });
    }
    return states;
@ -173,7 +171,7 @@ Graph::variable_states_map Graph::AllocateVariablesMemories() {
 std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
                                                                          bool filter_const_primitives) {
    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo");
-    if (m_config.useProfiling) {
+    if (m_config.get_property(ov::enable_profiling)) {
        try {
            // Update may throw an exception for step-by-step runtime graph dump,
            // since network->get_executed_primitives() method can't be called before network execution
--- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
@ -10,6 +10,7 @@
 #include <description_buffer.hpp>
 #include "intel_gpu/plugin/infer_request.hpp"
 #include "intel_gpu/plugin/remote_context.hpp"
+#include "intel_gpu/plugin/remote_allocators.hpp"
 #include "intel_gpu/plugin/compiled_model.hpp"
 #include "intel_gpu/runtime/itt.hpp"
 #include "intel_gpu/plugin/variable_state.hpp"
@ -394,6 +395,8 @@ InferRequest::InferRequest(InputsDataMap networkInputs, OutputsDataMap networkOu
        : IInferRequestInternal(networkInputs, networkOutputs) {
    IE_ASSERT(nullptr != execNetwork);
    streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
+    m_context = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(execNetwork->GetContext());
+    OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequest: wrong context type");
 }

 InferRequest::InferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
@ -402,6 +405,8 @@ InferRequest::InferRequest(const std::vector<std::shared_ptr<const ov::Node>>& i
        : IInferRequestInternal(inputs, outputs) {
    IE_ASSERT(nullptr != execNetwork);
    streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
+    m_context = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(execNetwork->GetContext());
+    OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequest: wrong context type");
 }

 // ----------------------------------------------------------------------------------------- //
@ -450,7 +455,7 @@ void InferRequest::enqueue() {
                                     FormatFromTensorDesc(blobsDesc),
                                     tensor_from_dims(blobsDesc.getDims()));

-                auto mergedBlobs = create_remote_blob<RemoteCLbuffer>(blobsDesc, layout, RemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
+                auto mergedBlobs = create_remote_blob<RemoteCLbuffer>(blobsDesc, layout, BlobType::BT_BUF_INTERNAL);
                dst = mergedBlobs->buffer().as<uint8_t*>();

                _inputs[name] = mergedBlobs;
@ -591,8 +596,8 @@ Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, bool is_dynamic
    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::create_host_blob");
    // Disable USM usage as USMHostAllocator may fail for attempt to allocate 0 bytes
    // If we add WA for such case to avoid driver call, then deallocate method will return false and Blob::setShape call will throw an exception
-    bool use_usm = m_graph->GetEngine()->use_unified_shared_memory() && !is_dynamic;
-    auto alloc = use_usm ? std::make_shared<USMHostAllocator>(m_graph->GetContext().get()) : CreateDefaultAllocator();
+    bool use_usm = m_graph->get_engine().use_unified_shared_memory() && !is_dynamic;
+    auto alloc = use_usm ? std::make_shared<USMHostAllocator>(m_context) : CreateDefaultAllocator();
    auto blob = make_blob_with_precision(desc, alloc);
    blob->allocate();
    return blob;
@ -600,8 +605,8 @@ Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, bool is_dynamic

 template<typename RemoteBlobType, typename>
 InferenceEngine::Blob::Ptr InferRequest::create_remote_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout,
-                                                            const RemoteBlobImpl::BlobType mem_type, void* mem_ptr) {
-    auto blob = std::make_shared<RemoteBlobType>(m_graph->GetContext(),
+                                                            const BlobType mem_type, void* mem_ptr) {
+    auto blob = std::make_shared<RemoteBlobType>(m_context,
                                                 m_graph->GetNetwork()->get_stream(),
                                                 desc,
                                                 layout,
@ -615,12 +620,12 @@ InferenceEngine::Blob::Ptr InferRequest::create_remote_blob(const InferenceEngin
 }

 template InferenceEngine::Blob::Ptr InferRequest::create_remote_blob<RemoteCLbuffer>(const InferenceEngine::TensorDesc&, const cldnn::layout&,
-                                                                                     const RemoteBlobImpl::BlobType, void*);
+                                                                                     const BlobType, void*);
 template InferenceEngine::Blob::Ptr InferRequest::create_remote_blob<RemoteUSMbuffer>(const InferenceEngine::TensorDesc&, const cldnn::layout&,
-                                                                                      const RemoteBlobImpl::BlobType, void*);
+                                                                                      const BlobType, void*);

 Blob::Ptr InferRequest::create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem) {
-    auto blob = create_remote_blob<RemoteUSMbuffer>(desc, layout, RemoteBlobImpl::BlobType::BT_USM_SHARED, usm_host_mem);
+    auto blob = create_remote_blob<RemoteUSMbuffer>(desc, layout, BlobType::BT_USM_SHARED, usm_host_mem);
    OPENVINO_ASSERT(blob, "[GPU] Failed to allocate shared host <-> device blob");
    return blob;
 }
@ -771,7 +776,7 @@ void InferRequest::allocate_inputs() {
                _inputs[name] = create_host_blob(desc, input_layout.is_dynamic());
                // Pre-allocate device input only if USM is not supported; in other case it will be allocated
                // in prepare_input() function later
-                if (input_layout.is_static() && !m_graph->GetEngine()->use_unified_shared_memory()) {
+                if (input_layout.is_static() && !m_graph->get_engine().use_unified_shared_memory()) {
                    _deviceInputs[name] = create_device_blob(desc);
                }
            }
@ -813,7 +818,7 @@ void InferRequest::allocate_outputs() {
            _outputs[no.first] = create_host_blob(desc, output_layout.is_dynamic());
            // Pre-allocate device output only if USM is not supported; in other case it will be allocated
            // in prepare_output() function later
-            if (output_layout.is_static() && !m_graph->GetEngine()->use_unified_shared_memory()) {
+            if (output_layout.is_static() && !m_graph->get_engine().use_unified_shared_memory()) {
                _deviceOutputs[no.first] = create_device_blob(desc);
            }
        }
@ -840,7 +845,7 @@ std::map<std::string, InferenceEngineProfileInfo> InferRequest::GetPerformanceCo
 void InferRequest::allocate_dev_mem_if_needed(InferenceEngine::BlobMap& device_mems, InferenceEngine::Blob::Ptr& user_blob,
                                              const cldnn::primitive_id& blob_name, const cldnn::layout& layout, bool need_lockable_mem) {
    const auto input_ptr = static_cast<const void*>(user_blob->cbuffer());
-    const auto alloc_type = m_graph->GetEngine()->detect_usm_allocation_type(input_ptr);
+    const auto alloc_type = m_graph->get_engine().detect_usm_allocation_type(input_ptr);
    const auto is_usm_host = alloc_type == cldnn::allocation_type::usm_host;
    const auto has_device_blob = device_mems.find(blob_name) != device_mems.end();
    bool can_skip_allocation = false;
@ -851,7 +856,7 @@ void InferRequest::allocate_dev_mem_if_needed(InferenceEngine::BlobMap& device_m
        OPENVINO_ASSERT(impl, str_device_output_unsupported_blob);
        OPENVINO_ASSERT(impl->is_allocated(), str_input_not_allocated);

-        auto impl_mem = impl->getMemory();
+        auto impl_mem = impl->get_memory();
        auto src_ptr = user_blob->cbuffer().as<uint8_t*>();
        // If device mem already exists, we can reuse blob if buffer has usm_host type and points to the same memory,
        // so we don't need to allocate new memory
@ -875,7 +880,7 @@ void InferRequest::allocate_dev_mem_if_needed(InferenceEngine::BlobMap& device_m
            device_mems[blob_name] = create_shared_device_blob(user_blob->getTensorDesc(), layout, user_blob->buffer().as<void*>());
        } else if (need_lockable_mem) {
            device_mems[blob_name] =
-                create_remote_blob<RemoteUSMbuffer>(user_blob->getTensorDesc(), layout, RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
+                create_remote_blob<RemoteUSMbuffer>(user_blob->getTensorDesc(), layout, BlobType::BT_USM_HOST_INTERNAL);
        } else {
            device_mems[blob_name] = create_device_blob(user_blob->getTensorDesc());
        }
@ -894,7 +899,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
    auto remote_ptr = inputBlob->as<gpu::ClBlob>();
    auto& stream = m_graph->GetNetwork()->get_stream();
    const bool is_dev_input = remote_ptr != nullptr;
-    const bool can_use_usm = m_graph->GetEngine()->use_unified_shared_memory();
+    const bool can_use_usm = m_graph->get_engine().use_unified_shared_memory();

    auto conv_to_supported_prec = [](Precision::ePrecision prec) {
        switch (prec) {
@ -951,7 +956,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
            if (!impl->is_allocated()) {
                IE_THROW() << str_input_not_allocated;
            }
-            auto inputMem = impl->getMemory();
+            auto inputMem = impl->get_memory();

            auto input_layout = m_graph->GetInputLayouts().find(inputName);
            if (input_layout != m_graph->GetInputLayouts().end()) {
@ -1003,7 +1008,7 @@ void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::P
    const auto output_id = outputsMap.at(outputName);
    const auto output_layout = m_graph->GetNetwork()->get_node_output_layout(output_id);
    const bool is_static = output_layout.is_static();
-    const bool can_use_usm = m_graph->GetEngine()->use_unified_shared_memory();
+    const bool can_use_usm = m_graph->get_engine().use_unified_shared_memory();
    auto remote_ptr = outputBlob->as<gpu::ClBlob>();
    const bool is_dev_input = remote_ptr != nullptr;

@ -1027,7 +1032,7 @@ void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::P
    if (!impl->is_allocated()) {
        IE_THROW(NotAllocated) << str_output_not_allocated;
    }
-    auto outputMem = impl->getMemory();
+    auto outputMem = impl->get_memory();
    _nw_ptr->set_output_memory(internalName, outputMem);
 }

@ -1038,10 +1043,10 @@ InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngin

    auto l = cldnn::layout(shape, dt, format);

-    if (m_graph->GetEngine()->use_unified_shared_memory()) {
-        return create_remote_blob<RemoteUSMbuffer>(desc, l, RemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL);
+    if (m_graph->get_engine().use_unified_shared_memory()) {
+        return create_remote_blob<RemoteUSMbuffer>(desc, l, BlobType::BT_USM_DEVICE_INTERNAL);
    } else {
-        return create_remote_blob<RemoteCLbuffer>(desc, l, RemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
+        return create_remote_blob<RemoteCLbuffer>(desc, l, BlobType::BT_BUF_INTERNAL);
    }
 }

@ -1049,7 +1054,7 @@ std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> InferReque
    std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> ret{};
    ret.reserve(variables_states_.size());
    for (const auto& pair : variables_states_)
-        ret.push_back(std::make_shared<VariableState>(pair.first, pair.second, m_graph->GetEngine(), m_curBatch));
+        ret.push_back(std::make_shared<VariableState>(pair.first, pair.second, m_graph->get_engine(), m_curBatch));
    return ret;
 }

--- a/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp
@ -10,6 +10,8 @@
 #include <description_buffer.hpp>
 #include "intel_gpu/plugin/infer_request_legacy.hpp"
 #include "intel_gpu/plugin/remote_context.hpp"
+#include "intel_gpu/plugin/remote_blob.hpp"
+#include "intel_gpu/plugin/remote_allocators.hpp"
 #include "intel_gpu/plugin/compiled_model.hpp"
 #include "intel_gpu/runtime/itt.hpp"
 #include "intel_gpu/plugin/variable_state.hpp"
@ -289,7 +291,7 @@ void InferRequestLegacy::SetBlob(const std::string& name, const Blob::Ptr& data)
            bool is_nv12 = nv12_ptr != nullptr;
            int expected_batch = is_batched ? desc.getDims()[0] : 1;
            if (ColorFormat::NV12 == foundInput->getPreProcess().getColorFormat() &&
-                m_graph->getConfig().nv12_two_inputs) {
+                m_graph->get_config().get_property(ov::intel_gpu::nv12_two_inputs)) {
                // try extracting Y and UV remote blobs from it
                // and put them into appropriate network inputs
                // that should then go into biplanar NV12 reorder
@ -500,7 +502,7 @@ void InferRequestLegacy::checkBlobs() {
        auto node = findInputByNodeName(input.first);
        bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
        if (!is_dynamic)
-            checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs);
+            checkInputBlob(input.second, input.first, foundInput, m_graph->get_config().get_property(ov::intel_gpu::nv12_two_inputs));
    }
    for (auto const &output : _outputs) {
        DataPtr foundOutput = nullptr;
@ -619,6 +621,8 @@ InferRequestLegacy::InferRequestLegacy(InputsDataMap networkInputs, OutputsDataM
        : IInferRequestInternal(networkInputs, networkOutputs) {
    IE_ASSERT(nullptr != execNetwork);
    streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
+    m_context = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(execNetwork->GetContext());
+    OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequestLegacy: wrong context type");
 }

 InferRequestLegacy::InferRequestLegacy(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
@ -627,6 +631,8 @@ InferRequestLegacy::InferRequestLegacy(const std::vector<std::shared_ptr<const o
        : IInferRequestInternal(inputs, outputs) {
    IE_ASSERT(nullptr != execNetwork);
    streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
+    m_context = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(execNetwork->GetContext());
+    OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequestLegacy: wrong context type");
 }

 // ----------------------------------------------------------------------------------------- //
@ -698,7 +704,7 @@ void InferRequestLegacy::enqueue() {
                                     FormatFromTensorDesc(blobsDesc),
                                     tensor_from_dims(blobsDesc.getDims()));

-                auto mergedBlobs = std::make_shared<RemoteCLbuffer>(m_graph->GetContext(),
+                auto mergedBlobs = std::make_shared<RemoteCLbuffer>(m_context,
                                                                    m_graph->GetNetwork()->get_stream(),
                                                                    blobsDesc,
                                                                    layout);
@ -914,14 +920,14 @@ Blob::Ptr InferRequestLegacy::create_host_blob(const TensorDesc& desc, std::shar
 }

 Blob::Ptr InferRequestLegacy::create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem) {
-    auto blob = std::make_shared<RemoteUSMbuffer>(m_graph->GetContext(),
+    auto blob = std::make_shared<RemoteUSMbuffer>(m_context,
                                                  m_graph->GetNetwork()->get_stream(),
                                                  desc,
                                                  layout,
                                                  usm_host_mem,
                                                  0,
                                                  0,
-                                                  RemoteBlobImpl::BlobType::BT_USM_SHARED);
+                                                  BlobType::BT_USM_SHARED);
    if (!blob)
        IE_THROW(NotAllocated) << "Failed to allocate shared host <-> device blob";
    blob->allocate();
@ -1009,7 +1015,7 @@ void InferRequestLegacy::allocate_inputs() {
        const TensorDesc& desc = ni.second->getTensorDesc();

        bool is_nv12_input = ColorFormat::NV12 == ni.second->getPreProcess().getColorFormat() &&
-                             m_graph->getConfig().nv12_two_inputs;
+                             m_graph->get_config().get_property(ov::intel_gpu::nv12_two_inputs);

        auto parameter = std::find_if(_parameters.begin(), _parameters.end(), [&](const std::shared_ptr<const ov::Node>& node) {
            return node->get_friendly_name() == name;
@ -1040,10 +1046,10 @@ void InferRequestLegacy::allocate_inputs() {
                Blob::Ptr inputBlob = create_host_blob(desc);
                _inputs[name] = inputBlob;
            } else {
-                if (m_graph->GetEngine()->use_unified_shared_memory()) {
+                if (m_graph->get_engine().use_unified_shared_memory()) {
                    // For USM case we create host blob using custom USM host allocator
                    // and then create shared device blob on top of this buffer
-                    auto host_blob = create_host_blob(desc, std::make_shared<USMHostAllocator>(m_graph->GetContext().get()));
+                    auto host_blob = create_host_blob(desc, std::make_shared<USMHostAllocator>(m_context));
                    _inputs[name] = host_blob;
                    _deviceInputs[name] = create_shared_device_blob(desc, litr->second, host_blob->buffer().as<void*>());
                } else {
@ -1103,10 +1109,10 @@ void InferRequestLegacy::allocate_outputs() {
            auto device_blob = create_device_blob(device_blob_desc, output_layout);
            _deviceOutputs[no.first] = device_blob;
        } else {
-            if (m_graph->GetEngine()->use_unified_shared_memory()) {
+            if (m_graph->get_engine().use_unified_shared_memory()) {
                // For USM case we create host blob using custom USM host allocator
                // and then create shared device blob on top of this buffer
-                auto host_blob = create_host_blob(desc, std::make_shared<USMHostAllocator>(m_graph->GetContext().get()));
+                auto host_blob = create_host_blob(desc, std::make_shared<USMHostAllocator>(m_context));
                _outputs[no.first] = host_blob;
                _deviceOutputs[no.first] = create_shared_device_blob(desc, output_layout, host_blob->buffer().as<void*>());
            } else {
@ -1183,7 +1189,7 @@ void InferRequestLegacy::prepare_input(const cldnn::primitive_id& inputName, Blo
            if (!impl->is_allocated()) {
                IE_THROW() << str_input_not_allocated;
            }
-            auto inputMem = impl->getMemory();
+            auto inputMem = impl->get_memory();

            auto input_layout = m_graph->GetInputLayouts().find(inputName);
            if (input_layout != m_graph->GetInputLayouts().end()) {
@ -1241,25 +1247,25 @@ void InferRequestLegacy::prepare_output(const cldnn::primitive_id& outputName, B
    if (!impl->is_allocated()) {
        IE_THROW(NotAllocated) << str_output_not_allocated;
    }
-    auto outputMem = impl->getMemory();
+    auto outputMem = impl->get_memory();
    _nw_ptr->set_output_memory(internalName, outputMem);
 }

 InferenceEngine::Blob::Ptr InferRequestLegacy::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) {
-    if (m_graph->GetEngine()->use_unified_shared_memory()) {
-        auto blobPtr = std::make_shared<RemoteUSMbuffer>(m_graph->GetContext(),
+    if (m_graph->get_engine().use_unified_shared_memory()) {
+        auto blobPtr = std::make_shared<RemoteUSMbuffer>(m_context,
                                                         m_graph->GetNetwork()->get_stream(),
                                                         desc,
                                                         layout,
                                                         nullptr,
                                                         0,
                                                         0,
-                                                         RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
+                                                         BlobType::BT_USM_HOST_INTERNAL);
        getBlobImpl(blobPtr.get())->allocate();
        checkAlloc(blobPtr, str_device_mem_not_allocated);
        return blobPtr;
    } else {
-        auto blobPtr = std::make_shared<RemoteCLbuffer>(m_graph->GetContext(),
+        auto blobPtr = std::make_shared<RemoteCLbuffer>(m_context,
                                                        m_graph->GetNetwork()->get_stream(),
                                                        desc,
                                                        layout);
@ -1273,7 +1279,7 @@ std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> InferReque
    std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> ret{};
    ret.reserve(variables_states_.size());
    for (const auto& pair : variables_states_)
-        ret.push_back(std::make_shared<VariableState>(pair.first, pair.second, m_graph->GetEngine(), m_curBatch));
+        ret.push_back(std::make_shared<VariableState>(pair.first, pair.second, m_graph->get_engine(), m_curBatch));
    return ret;
 }

--- a/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp
+++ b/src/plugins/intel_gpu/src/plugin/legacy_api_helper.cpp
@ -0,0 +1,272 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/plugin/legacy_api_helper.hpp"
+#include "ie_plugin_config.hpp"
+#include "gpu/gpu_config.hpp"
+
+namespace ov {
+namespace intel_gpu {
+
+bool LegacyAPIHelper::is_new_api_property(const std::pair<std::string, ov::Any>& property) {
+    static const std::vector<std::string> new_properties_list = {
+        ov::intel_gpu::hint::queue_priority.name(),
+        ov::intel_gpu::hint::queue_throttle.name(),
+        ov::hint::inference_precision.name(),
+        ov::compilation_num_threads.name(),
+        ov::num_streams.name(),
+    };
+
+    return std::find(new_properties_list.begin(), new_properties_list.end(), property.first) != new_properties_list.end();
+}
+
+bool LegacyAPIHelper::is_legacy_property(const std::pair<std::string, ov::Any>& property, bool is_new_api) {
+    static const std::vector<std::string> legacy_properties_list = {
+        InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS,
+        InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS,
+        InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY,
+        InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE,
+    };
+
+    static const std::vector<std::string> legacy_property_values_list = {
+        InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY,
+        InferenceEngine::GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY,
+    };
+
+    bool legacy_property = std::find(legacy_properties_list.begin(), legacy_properties_list.end(), property.first) != legacy_properties_list.end();
+    bool need_value_conversion = !is_new_api &&
+        std::find(legacy_property_values_list.begin(), legacy_property_values_list.end(), property.first) != legacy_property_values_list.end();
+
+    return legacy_property || need_value_conversion;
+}
+
+ov::AnyMap LegacyAPIHelper::convert_legacy_properties(const std::map<std::string, std::string>& properties, bool is_new_api) {
+    return convert_legacy_properties(ov::AnyMap(properties.begin(), properties.end()), is_new_api);
+}
+
+ov::AnyMap LegacyAPIHelper::convert_legacy_properties(const ov::AnyMap& properties, bool is_new_api) {
+    ov::AnyMap converted_properties;
+    for (auto& property : properties) {
+        if (is_legacy_property(property, is_new_api)) {
+            auto new_property = convert_legacy_property(property);
+            converted_properties[new_property.first] = new_property.second;
+        } else {
+            converted_properties[property.first] = property.second;
+        }
+    }
+
+    return converted_properties;
+}
+
+std::pair<std::string, ov::Any> LegacyAPIHelper::convert_legacy_property(const std::pair<std::string, ov::Any>& legacy_property) {
+    auto legacy_name = legacy_property.first;
+    if (legacy_name == InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) {
+        ov::Any converted_val{legacy_property.second};
+        auto legacy_val = legacy_property.second.as<std::string>();
+        if (legacy_val == InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO)
+            converted_val = ov::streams::AUTO;
+
+        return { ov::num_streams.name(), converted_val };
+    } else if (legacy_name == InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY) {
+        ov::Any converted_val{nullptr};
+        auto legacy_val = legacy_property.second.as<std::string>();
+        if (legacy_val == InferenceEngine::PluginConfigParams::MODEL_PRIORITY_HIGH) {
+            converted_val = ov::hint::Priority::HIGH;
+        } else if (legacy_val == InferenceEngine::PluginConfigParams::MODEL_PRIORITY_MED) {
+            converted_val = ov::hint::Priority::MEDIUM;
+        } else if (legacy_val == InferenceEngine::PluginConfigParams::MODEL_PRIORITY_LOW) {
+            converted_val = ov::hint::Priority::LOW;
+        } else {
+            converted_val = legacy_val;
+        }
+
+        return { ov::hint::model_priority.name(), converted_val };
+    } else if (legacy_name == InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS) {
+        return { ov::compilation_num_threads.name(), legacy_property.second };
+    } else if (legacy_name == InferenceEngine::GPUConfigParams::KEY_GPU_HOST_TASK_PRIORITY) {
+        ov::Any converted_val{nullptr};
+        auto legacy_val = legacy_property.second.as<std::string>();
+        if (legacy_val == InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_HIGH) {
+            converted_val = ov::hint::Priority::HIGH;
+        } else if (legacy_val == InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_MEDIUM) {
+            converted_val = ov::hint::Priority::MEDIUM;
+        } else if (legacy_val == InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_LOW) {
+            converted_val = ov::hint::Priority::LOW;
+        } else {
+            converted_val = legacy_val;
+        }
+        return { ov::intel_gpu::hint::host_task_priority.name(), converted_val };
+    } else if (legacy_name == InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY) {
+        ov::Any converted_val{nullptr};
+        auto legacy_val = legacy_property.second.as<std::string>();
+        if (!legacy_val.empty()) {
+            std::stringstream ss(legacy_val);
+            uint32_t uVal(0);
+            ss >> uVal;
+            OPENVINO_ASSERT(!ss.fail(), "[GPU] Unsupported property value by plugin: ", legacy_val);
+            switch (uVal) {
+            case 0:
+            case 2:
+                converted_val = ov::hint::Priority::MEDIUM;
+                break;
+            case 1:
+                converted_val = ov::hint::Priority::LOW;
+                break;
+            case 3:
+                converted_val = ov::hint::Priority::HIGH;
+                break;
+            default:
+                OPENVINO_ASSERT(false, "[GPU] Unsupported queue priority value ", uVal);
+            }
+        }
+
+        return { ov::intel_gpu::hint::queue_priority.name(), converted_val };
+    } else if (legacy_name == InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) {
+        ov::Any converted_val{nullptr};
+        auto legacy_val = legacy_property.second.as<std::string>();
+        if (!legacy_val.empty()) {
+            std::stringstream ss(legacy_val);
+            uint32_t uVal(0);
+            ss >> uVal;
+            OPENVINO_ASSERT(!ss.fail(), "[GPU] Unsupported property value by plugin: ", legacy_val);
+            switch (uVal) {
+            case 0:
+            case 2:
+                converted_val = ov::intel_gpu::hint::ThrottleLevel::MEDIUM;
+                break;
+            case 1:
+                converted_val = ov::intel_gpu::hint::ThrottleLevel::LOW;
+                break;
+            case 3:
+                converted_val = ov::intel_gpu::hint::ThrottleLevel::HIGH;
+                break;
+            default:
+                OPENVINO_ASSERT(false, "[GPU] Unsupported queue throttle value ", uVal);
+            }
+        }
+
+        return { ov::intel_gpu::hint::queue_throttle.name(), converted_val };
+    }
+
+    OPENVINO_ASSERT(false, "[GPU] Unhandled legacy property in convert_legacy_property method: ", legacy_property.first);
+}
+
+std::pair<std::string, ov::Any> LegacyAPIHelper::convert_to_legacy_property(const std::pair<std::string, ov::Any>& property) {
+    auto name = property.first;
+    if (name == ov::num_streams.name()) {
+        ov::Any legacy_val{property.second};
+        if (!property.second.empty()) {
+            if (property.second.as<ov::streams::Num>() == ov::streams::AUTO) {
+                legacy_val = InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO;
+            }
+        }
+
+        return { InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, legacy_val };
+    } else if (name == ov::hint::model_priority.name()) {
+        ov::Any legacy_val{nullptr};
+        if (!property.second.empty()) {
+            ov::hint::Priority val = property.second.as<ov::hint::Priority>();
+            switch (val) {
+            case ov::hint::Priority::LOW: legacy_val = InferenceEngine::PluginConfigParams::MODEL_PRIORITY_LOW; break;
+            case ov::hint::Priority::MEDIUM: legacy_val = InferenceEngine::PluginConfigParams::MODEL_PRIORITY_MED; break;
+            case ov::hint::Priority::HIGH: legacy_val = InferenceEngine::PluginConfigParams::MODEL_PRIORITY_HIGH; break;
+            default: OPENVINO_ASSERT(false, "[GPU] Unsupported model priority value ", val);
+            }
+        }
+
+        return { InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY, legacy_val };
+    } else if (name == ov::compilation_num_threads.name()) {
+        return { InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS, property.second };
+    } else if (name == ov::intel_gpu::hint::host_task_priority.name()) {
+        ov::Any legacy_val{nullptr};
+        if (!property.second.empty()) {
+            ov::hint::Priority val = property.second.as<ov::hint::Priority>();
+            switch (val) {
+            case ov::hint::Priority::LOW: legacy_val = InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_LOW; break;
+            case ov::hint::Priority::MEDIUM: legacy_val = InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_MEDIUM; break;
+            case ov::hint::Priority::HIGH: legacy_val = InferenceEngine::GPUConfigParams::GPU_HOST_TASK_PRIORITY_HIGH; break;
+            default: OPENVINO_ASSERT(false, "[GPU] Unsupported host task priority value ", val);
+            }
+        }
+
+        return { InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY, legacy_val };
+    } else if (name == ov::intel_gpu::hint::queue_priority.name()) {
+        ov::Any legacy_val{nullptr};
+        if (!property.second.empty()) {
+            ov::hint::Priority val = property.second.as<ov::hint::Priority>();
+            switch (val) {
+            case ov::hint::Priority::LOW: legacy_val = "1"; break;
+            case ov::hint::Priority::MEDIUM: legacy_val = "2"; break;
+            case ov::hint::Priority::HIGH: legacy_val = "3"; break;
+            default: OPENVINO_ASSERT(false, "[GPU] Unsupported queue throttle value ", val);
+            }
+        }
+
+        return { InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY, legacy_val };
+    } else if (name == ov::intel_gpu::hint::queue_throttle.name()) {
+        ov::Any legacy_val{nullptr};
+        if (!property.second.empty()) {
+            ov::intel_gpu::hint::ThrottleLevel val = property.second.as<ov::intel_gpu::hint::ThrottleLevel>();
+            switch (val) {
+            case ov::intel_gpu::hint::ThrottleLevel::LOW: legacy_val = "1"; break;
+            case ov::intel_gpu::hint::ThrottleLevel::MEDIUM: legacy_val = "2"; break;
+            case ov::intel_gpu::hint::ThrottleLevel::HIGH: legacy_val = "3"; break;
+            default: OPENVINO_ASSERT(false, "[GPU] Unsupported queue throttle value ", val);
+            }
+        }
+        return { InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE, legacy_val };
+    }
+
+    OPENVINO_ASSERT(false, "[GPU] Unhandled legacy property in convert_to_legacy_property method: ", property.first);
+}
+
+std::vector<std::string> LegacyAPIHelper::get_supported_configs() {
+    static const std::vector<std::string> supported_config = {
+        CONFIG_KEY(MODEL_PRIORITY),
+        CONFIG_KEY(PERFORMANCE_HINT),
+        CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS),
+        CONFIG_KEY(PERF_COUNT),
+        CONFIG_KEY(DYN_BATCH_ENABLED),
+        CONFIG_KEY(CONFIG_FILE),
+        CONFIG_KEY(DEVICE_ID),
+        CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
+        CONFIG_KEY(CACHE_DIR),
+        CONFIG_KEY(GPU_THROUGHPUT_STREAMS),
+        GPU_CONFIG_KEY(PLUGIN_PRIORITY),
+        GPU_CONFIG_KEY(PLUGIN_THROTTLE),
+        GPU_CONFIG_KEY(HOST_TASK_PRIORITY),
+        GPU_CONFIG_KEY(NV12_TWO_INPUTS),
+        GPU_CONFIG_KEY(MAX_NUM_THREADS),
+        GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING),
+    };
+
+    return supported_config;
+}
+
+std::vector<std::string> LegacyAPIHelper::get_supported_metrics(bool model_caching_enabled) {
+    std::vector<std::string> supported_metrics = {
+        METRIC_KEY(AVAILABLE_DEVICES),
+        METRIC_KEY(SUPPORTED_METRICS),
+        METRIC_KEY(FULL_DEVICE_NAME),
+        METRIC_KEY(OPTIMIZATION_CAPABILITIES),
+        METRIC_KEY(SUPPORTED_CONFIG_KEYS),
+        METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS),
+        METRIC_KEY(RANGE_FOR_STREAMS),
+        METRIC_KEY(DEVICE_TYPE),
+        METRIC_KEY(DEVICE_GOPS),
+        METRIC_KEY(OPTIMAL_BATCH_SIZE),
+        METRIC_KEY(MAX_BATCH_SIZE),
+        GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE),
+        GPU_METRIC_KEY(UARCH_VERSION),
+        GPU_METRIC_KEY(EXECUTION_UNITS_COUNT),
+        GPU_METRIC_KEY(MEMORY_STATISTICS),
+    };
+    if (model_caching_enabled)
+        supported_metrics.push_back(METRIC_KEY(IMPORT_EXPORT_SUPPORT));
+
+    return supported_metrics;
+}
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/src/plugin/ops/adaptive_pooling.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/adaptive_pooling.cpp
@ -38,7 +38,7 @@ static void CreateAdaptiveMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op
    const cldnn::layout indices_layout{cldnn::element_type_to_data_type(indices_precision),
                                       cldnn::format::get_default_format(indices_shape.size()),
                                       tensor_from_dims(indices_shape)};
-    const auto indices_memory = p.GetEngine().allocate_memory(indices_layout);
+    const auto indices_memory = p.get_engine().allocate_memory(indices_layout);

    const cldnn::primitive_id indices_id_w = layer_type_name + "_md_write";
    const cldnn::mutable_data indices_mutable_prim_w{indices_id_w, indices_memory};
--- a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
@ -202,8 +202,8 @@ void createClDnnConstant(Program& p, const ngraph::Shape& constDims, const std::
        p.profiling_ids.push_back(initialconstPrimID);
    } else {
        GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant]" << std::endl;
-        cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false);
-        auto& stream = p.GetEngine().get_program_stream();
+        cldnn::memory::ptr mem = p.get_engine().allocate_memory(constLayout, false);
+        auto& stream = p.get_engine().get_service_stream();
        cldnn::mem_lock<char> lock{mem, stream};
        auto buf = lock.data();
        auto bufSize = constLayout.bytes_count();
--- a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp
@ -314,7 +314,7 @@ static void DeformableConvolutionImpl(Program& p,
    std::vector<cldnn::primitive_id> weights = {inputs[2].pid};
    // Remove weights from inputs
    inputs.erase(inputs.begin() + 2);
-    auto device_info = p.GetEngine().get_device_info();
+    auto device_info = p.get_engine().get_device_info();
    bool supports_subgroups = device_info.supports_khr_subgroups || device_info.supports_intel_subgroups;
    if (groups == 1 && supports_subgroups) {
        std::string defConvLayerNameInterp = layerName + "_interp";
--- a/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp
@ -74,7 +74,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngr
            tensor_from_dims(op->get_output_shape(1)));

        GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-        shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout));
+        shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayout));

        cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write";
        auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w,
--- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_detection_output.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_detection_output.cpp
@ -33,7 +33,7 @@ static void CreateExperimentalDetectronDetectionOutputOp(
    const cldnn::layout mutable_layout1{cldnn::element_type_to_data_type(mutable_precision1),
                                        cldnn::format::get_default_format(output_shape1.size()),
                                        tensor_from_dims(output_shape1)};
-    cldnn::memory::ptr shared_memory1{p.GetEngine().allocate_memory(mutable_layout1)};
+    cldnn::memory::ptr shared_memory1{p.get_engine().allocate_memory(mutable_layout1)};

    const auto mutable_id_w1 = layer_type_name + "_md_write.1";
    const cldnn::mutable_data mutable_prim_w{mutable_id_w1, shared_memory1};
@ -45,7 +45,7 @@ static void CreateExperimentalDetectronDetectionOutputOp(
    const cldnn::layout mutable_layout2{cldnn::element_type_to_data_type(mutable_precision2),
                                        cldnn::format::get_default_format(output_shape2.size()),
                                        tensor_from_dims(output_shape2)};
-    cldnn::memory::ptr shared_memory2{p.GetEngine().allocate_memory(mutable_layout2)};
+    cldnn::memory::ptr shared_memory2{p.get_engine().allocate_memory(mutable_layout2)};

    const auto mutable_id_w2 = layer_type_name + "_md_write.2";
    const cldnn::mutable_data mutable_prim_w2{mutable_id_w2, shared_memory2};
--- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_generate_proposals_single_image.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_generate_proposals_single_image.cpp
@ -33,7 +33,7 @@ static void CreateExperimentalDetectronGenerateProposalsSingleImageOp(
    const cldnn::layout mutable_layout{cldnn::element_type_to_data_type(mutable_precision),
                                       cldnn::format::get_default_format(output_shape.size()),
                                       tensor_from_dims(output_shape)};
-    cldnn::memory::ptr shared_memory{p.GetEngine().allocate_memory(mutable_layout)};
+    cldnn::memory::ptr shared_memory{p.get_engine().allocate_memory(mutable_layout)};

    const auto mutable_id_w = layer_type_name + "_md_write";
    const cldnn::mutable_data mutable_prim_w{mutable_id_w, shared_memory};
--- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp
@ -22,7 +22,7 @@ static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const s
        cldnn::format::get_default_format(op->get_output_shape(1).size()),
        tensor_from_dims(op->get_output_shape(1)));

-    cldnn::memory::ptr shared_memory {p.GetEngine().allocate_memory(mutableLayout)};
+    cldnn::memory::ptr shared_memory {p.get_engine().allocate_memory(mutableLayout)};

    cldnn::primitive_id experimental_detectron_mutable_id_w = layer_type_name_ID(op) + "_md_write";
    cldnn::mutable_data experimenta_detectron_mutable_prim(experimental_detectron_mutable_id_w,
--- a/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp
@ -32,7 +32,7 @@ static void CreateGenerateProposalsIEInternalOp(
    const cldnn::layout mutable_layout_1{cldnn::element_type_to_data_type(mutable_precision_1),
                                         cldnn::format::get_default_format(output_shape_1.size()),
                                         tensor_from_dims(output_shape_1)};
-    cldnn::memory::ptr shared_memory_1{p.GetEngine().allocate_memory(mutable_layout_1)};
+    cldnn::memory::ptr shared_memory_1{p.get_engine().allocate_memory(mutable_layout_1)};

    const auto mutable_id_w_1 = layer_type_name + "_md_write.1";
    const cldnn::mutable_data mutable_prim_w_1{mutable_id_w_1, shared_memory_1};
@ -45,7 +45,7 @@ static void CreateGenerateProposalsIEInternalOp(
    const cldnn::layout mutable_layout_2{cldnn::element_type_to_data_type(mutable_precision_2),
                                         cldnn::format::get_default_format(output_shape_2.size()),
                                         tensor_from_dims(output_shape_2)};
-    cldnn::memory::ptr shared_memory_2{p.GetEngine().allocate_memory(mutable_layout_2)};
+    cldnn::memory::ptr shared_memory_2{p.get_engine().allocate_memory(mutable_layout_2)};

    const auto mutable_id_w_2 = layer_type_name + "_md_write.2";
    const cldnn::mutable_data mutable_prim_w_2{mutable_id_w_2, shared_memory_2};
--- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp
@ -29,8 +29,8 @@ namespace intel_gpu {

 template<class DATA_TYPE>
 static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
-    auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
-    cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
+    auto mem = p.get_engine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
+    cldnn::mem_lock<int64_t> ptr{mem, p.get_engine().get_service_stream()};
    *ptr.begin() = num;
    return {id, mem};
 }
@ -42,7 +42,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
    const auto format = cldnn::format::get_default_format(op->get_output_shape(output_idx).size());
    const auto tensor = tensor_from_dims(op->get_output_shape(output_idx));
    cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
-    auto mem = p.GetEngine().allocate_memory(output_layout);
+    auto mem = p.get_engine().allocate_memory(output_layout);
    auto md = cldnn::mutable_data(id, {cldnn::input_info(input)}, mem); // cldnn::data cannot set dependency
    return md;
 }
@ -82,7 +82,7 @@ static void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
    }

    // get body topology from ngraph function
-    Program body_program(body_network, p.GetEnginePtr(), p.GetConfig(), true);
+    Program body_program(body_network, p.get_engine(), p.get_config(), true);
    auto body_topology = *body_program.GetTopology();

    // setup input_primitive_maps/ output_primitive_maps and back_edges
--- a/src/plugins/intel_gpu/src/plugin/ops/matrix_nms.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/matrix_nms.cpp
@ -34,7 +34,7 @@ void CreateNmsStaticShapeIE8Op(Program& p, const std::shared_ptr<ngraph::op::int
                                                     cldnn::format::bfyx,
                                                     cldnn::tensor(static_cast<int32_t>(outputIndices), 1, 1, 1));

-    shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
+    shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayoutFirst));

    cldnn::primitive_id matrix_nms_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
    auto matrix_nms_mutable_prim_first = cldnn::mutable_data(matrix_nms_mutable_id_w_first, shared_memory.back());
@ -46,7 +46,7 @@ void CreateNmsStaticShapeIE8Op(Program& p, const std::shared_ptr<ngraph::op::int
                                                      cldnn::format::bfyx,
                                                      cldnn::tensor(static_cast<int32_t>(batches_num), 1, 1, 1));

-    shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
+    shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayoutSecond));

    cldnn::primitive_id matrix_nms_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
    auto matrix_nms_mutable_prim_second = cldnn::mutable_data(matrix_nms_mutable_id_w_second, shared_memory.back());
--- a/src/plugins/intel_gpu/src/plugin/ops/multiclass_nms.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/multiclass_nms.cpp
@ -32,7 +32,7 @@ static void CreateMulticlassNmsIEInternalOp(Program& p, const std::shared_ptr<ng
    const cldnn::layout mutable_layout1{cldnn::element_type_to_data_type(mutable_precision1),
                                        cldnn::format::get_default_format(output_shape1.size()),
                                        tensor_from_dims(output_shape1)};
-    cldnn::memory::ptr shared_memory1{p.GetEngine().allocate_memory(mutable_layout1)};
+    cldnn::memory::ptr shared_memory1{p.get_engine().allocate_memory(mutable_layout1)};

    const auto mutable_id_w1 = layer_type_name + "_md_write.1";
    const cldnn::mutable_data mutable_prim_w{mutable_id_w1, shared_memory1};
@ -44,7 +44,7 @@ static void CreateMulticlassNmsIEInternalOp(Program& p, const std::shared_ptr<ng
    const cldnn::layout mutable_layout2{cldnn::element_type_to_data_type(mutable_precision2),
                                        cldnn::format::get_default_format(output_shape2.size()),
                                        tensor_from_dims(output_shape2)};
-    cldnn::memory::ptr shared_memory2{p.GetEngine().allocate_memory(mutable_layout2)};
+    cldnn::memory::ptr shared_memory2{p.get_engine().allocate_memory(mutable_layout2)};

    const auto mutable_id_w2 = layer_type_name + "_md_write.2";
    const cldnn::mutable_data mutable_prim_w2{mutable_id_w2, shared_memory2};
--- a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp
@ -105,7 +105,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
                    tensor_from_dims(op->get_output_shape(2)));

                GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-                shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
+                shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayoutSecond));

                cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
                auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second,
@ -121,7 +121,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
                    cldnn::tensor(static_cast<int32_t>(outputIndices), 3, 1, 1));

                GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-                shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
+                shared_memory.emplace_back(p.get_engine().allocate_memory(mutableLayoutFirst));

                cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
                auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first,
--- a/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp
@ -36,8 +36,8 @@ static void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0
    // We create fake scale constant and fill it with ones to keep the same behavior as current primitive
    auto scale = std::make_shared<ngraph::op::v0::Constant>(op->get_output_element_type(0), ngraph::Shape{1}, std::vector<float>{1.0});
    cldnn::layout constLayout = cldnn::layout(cldnn::element_type_to_data_type(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1});
-    auto mem = p.GetEngine().allocate_memory(constLayout, false);
-    cldnn::mem_lock<int8_t> tmpPointer{mem, p.GetEngine().get_program_stream()};
+    auto mem = p.get_engine().allocate_memory(constLayout, false);
+    cldnn::mem_lock<int8_t> tmpPointer{mem, p.get_engine().get_service_stream()};
    auto buf = tmpPointer.data();
    auto bufSize = scale->get_output_tensor(0).size();

--- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
@ -108,8 +108,8 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
        if (bufIter != p.blobMemCache.end()) {
            meanBlobID = bufIter->second;
        } else {
-            auto mem = p.GetEngine().allocate_memory(meanBlobLayout, false);
-            cldnn::mem_lock<int8_t> tmpPointer{ mem, p.GetEngine().get_program_stream() };
+            auto mem = p.get_engine().allocate_memory(meanBlobLayout, false);
+            cldnn::mem_lock<int8_t> tmpPointer{ mem, p.get_engine().get_service_stream() };
            auto buf = tmpPointer.data();
            auto bufSize = meanBlobLayout.bytes_count();

@ -197,7 +197,7 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
        p.inputLayouts.insert({ inputInfo->name(), networkInputLayout });
        p.add_primitive(*op, cldnn::input_layout(inputName, networkInputLayout));
    } else {
-        if (ColorFormat::NV12 == preProcess.getColorFormat() && p.GetConfig().nv12_two_inputs) {
+        if (ColorFormat::NV12 == preProcess.getColorFormat() && p.get_config().get_property(ov::intel_gpu::nv12_two_inputs)) {
            // for NV12, create two input layouts with reorder instead of one,
            // and then would expect compound blob in inferRequest
            if (InferenceEngine::Layout::NCHW != l &&
--- a/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp
@ -90,7 +90,7 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v8::Ma
    cldnn::layout mutableLayout = cldnn::layout(cldnn::element_type_to_data_type(mutable_precision),
                                                cldnn::format::get_default_format(output_shape.size()),
                                                tensor_from_dims(output_shape));
-    const auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
+    const auto shared_memory = p.get_engine().allocate_memory(mutableLayout);
    const cldnn::primitive_id maxpool_mutable_id_w = layer_type_name + "_md_write";
    auto indices_mutable_prim = cldnn::mutable_data(maxpool_mutable_id_w,
                                                          shared_memory);
--- a/src/plugins/intel_gpu/src/plugin/ops/proposal.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/proposal.cpp
@ -65,7 +65,7 @@ static void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::P
                                                    tensor_from_dims(op->get_output_shape(1)));

        GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-        auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
+        auto shared_memory = p.get_engine().allocate_memory(mutableLayout);

        cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
        auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w,
--- a/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp
@ -28,8 +28,8 @@ namespace intel_gpu {

 template<class DATA_TYPE>
 static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
-    auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
-    cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
+    auto mem = p.get_engine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
+    cldnn::mem_lock<int64_t> ptr{mem, p.get_engine().get_service_stream()};
    *ptr.begin() = num;
    return {id, mem};
 }
@ -41,7 +41,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
    const auto format = cldnn::format::get_default_format(op->get_output_shape(output_idx).size());
    const auto tensor = tensor_from_dims(op->get_output_shape(output_idx));
    cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
-    auto mem = p.GetEngine().allocate_memory(output_layout);
+    auto mem = p.get_engine().allocate_memory(output_layout);
    auto md = cldnn::mutable_data(id, {cldnn::input_info(input)}, mem); // cldnn::data cannot set dependency
    return md;
 }
@ -51,7 +51,7 @@ static void CreateTensorIteratorOp(Program &p, const std::shared_ptr<TensorItera

    // get body topology from ngraph function
    InferenceEngine::CNNNetwork body_network(op->get_body());
-    Program body_program(body_network, p.GetEnginePtr(), p.GetConfig(), true);
+    Program body_program(body_network, p.get_engine(), p.get_config(), true);
    auto body_topology = *body_program.GetTopology();

    // setup input_primitive_maps/ output_primitive_maps and back_edges
--- a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp
@ -66,7 +66,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
                                                        tensor_from_dims(op->get_output_shape(1)));

            GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-            auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
+            auto shared_memory = p.get_engine().allocate_memory(mutableLayout);

            cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write";
            auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w,
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
--- a/src/plugins/intel_gpu/src/plugin/program.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program.cpp
@ -16,6 +16,10 @@
 #include "intel_gpu/primitives/mutable_data.hpp"
 #include "intel_gpu/primitives/data.hpp"

+#ifdef __linux__
+# include <dlfcn.h>
+#endif
+
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;

@ -121,7 +125,7 @@ bool Program::IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
    return dyn_shape_batch_found;
 }

-Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config,
+Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
    bool createTopologyOnly, bool partialBuild)
    : m_curBatch(-1)
    , m_config(config)
@ -136,30 +140,60 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::en
        IE_THROW() << "Function pointer inside CNNNetwork is nullptr";
    }

+    // locate global custom kernel config
+    // and auto-load kernels from it
+#ifdef _WIN32
+    CHAR mpath[MAX_PATH + 1];
+    HMODULE nModule;
+    GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+        (LPCSTR)CustomLayer::LoadFromFile,
+        &nModule);
+    GetModuleFileName(nModule, mpath, sizeof(mpath));
+#elif __linux__
+    Dl_info dl_info;
+    dladdr(reinterpret_cast<void *>(CustomLayer::LoadFromFile), &dl_info);
+    const char* mpath = dl_info.dli_fname;
+#endif
+    std::string configFile(mpath);
+    std::size_t dir_split_pos = configFile.find_last_of("/\\");
+    std::string config_path;
+
+    if (dir_split_pos != std::string::npos) {
+        // path contains directory
+        config_path = configFile.substr(0, dir_split_pos);
+    }
+    config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml";
+
+    CustomLayer::LoadFromFile(config_path, m_custom_layers, true);
+    auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file);
+    CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty());
+
    auto ops = func->get_ordered_ops();

    bool dyn_shape_batch_found = false;
    std::map<std::string, ngraph::PartialShape> shapes;
    std::map<std::string, std::pair<int64_t, int64_t>> batch_dim;
-    if (m_config.enableDynamicBatch) {
+    auto enable_dynamic_batch = m_config.get_property(ov::intel_gpu::enable_dynamic_batch);
+    if (enable_dynamic_batch) {
+        m_config.set_property(ov::intel_gpu::max_dynamic_batch(network.getBatchSize()));
        // in case of legacy dynamic batch,
        // we assume 4D input with 0 batch dim
        auto param = func->get_parameters().front();
        auto pname = getParamName(param);
        shapes[pname] = param->get_output_partial_shape(0);
        batch_dim[pname].first = 0;
-        batch_dim[pname].second = m_config.max_dynamic_batch;
+        batch_dim[pname].second = m_config.get_property(ov::intel_gpu::max_dynamic_batch);
    } else {
        dyn_shape_batch_found = IsDynBatchModel(func, shapes, batch_dim);
        if (dyn_shape_batch_found) {
-            m_config.max_dynamic_batch = batch_dim.begin()->second.second;
+            m_config.set_property(ov::intel_gpu::max_dynamic_batch(batch_dim.begin()->second.second));
        }
    }

    int m_bv_sz = GetMaxBatchSizeForSingleProgram();
-    m_max_batch = m_config.max_dynamic_batch;
+    m_max_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch);

-    if (dyn_shape_batch_found || config.max_dynamic_batch > 1) {
+    if (dyn_shape_batch_found || m_max_batch > 1) {
        // compile log2 networks to serve dynamic batch requests
        for (int b = m_bv_sz - 1; b >= 0; b--) {
            inputLayouts.clear();
@ -188,8 +222,8 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::en
            }
            new_func->reshape(new_shapes);
            {
-                auto deviceInfo = engine->get_device_info();
-                TransformationsPipeline transformations(config, deviceInfo);
+                auto deviceInfo = engine.get_device_info();
+                TransformationsPipeline transformations(m_config, deviceInfo);
                transformations.apply(new_func);
            }

@ -275,9 +309,10 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::en
 }

 int Program::GetMaxBatchSizeForSingleProgram() {
-    if (m_config.max_dynamic_batch > 1) {
+    auto max_dynamic_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch);
+    if (max_dynamic_batch > 1) {
        // calculate number of networks necessary based on binary log
-        unsigned int tmp = m_config.max_dynamic_batch;
+        unsigned int tmp = max_dynamic_batch;
        unsigned int mask = 1U << 31;
        unsigned int ldigit = 31;

@ -324,7 +359,6 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
                                                      InferenceEngine::OutputsDataMap networkOutputs,
                                                      bool createTopologyOnly, bool partialBuild) {
    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::BuildProgram");
-    cldnn::build_options options;

    for (const auto& op : ops) {
        if (op->is_dynamic()) {
@ -333,11 +367,10 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
        }
    }

-    options.set_option(cldnn::build_option::allow_new_shape_infer(allow_new_shape_infer));
-    options.set_option(cldnn::build_option::optimize_data(true));
-    if (partialBuild) {
-        options.set_option(cldnn::build_option::partial_build_program(true));
-    }
+    m_config.set_property(ov::intel_gpu::partial_build_program(partialBuild));
+    m_config.set_property(ov::intel_gpu::optimize_data(true));
+    m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer));
+
    PrepareBuild(networkInputs, networkOutputs);
    {
        GPU_DEBUG_DEFINE_MEM_LOGGER("CreateSingleLayerPrimitives");
@ -351,7 +384,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
        OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateProgram");
        cldnn::program::ptr program;
        try {
-            program = cldnn::program::build_program(*m_engine, *m_topology, options);
+            program = cldnn::program::build_program(m_engine, *m_topology, m_config);
        } catch (std::exception& e) {
            IE_THROW() << "cldnn program build failed! " << e.what();
        }
@ -396,8 +429,8 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::s
    bool is_created = false;
    const ngraph::NodeTypeInfo* op_type_info = &op->get_type_info();
    while (op_type_info != nullptr) {
-        auto customLayer = m_config.customLayers.find(op->get_type_name());
-        if (customLayer != m_config.customLayers.end()) {
+        auto customLayer = m_custom_layers.find(op->get_type_name());
+        if (customLayer != m_custom_layers.end()) {
            CreateCustomOp(*this, op, customLayer->second);
            return;
        }
@ -488,7 +521,7 @@ void Program::add_primitive(const ngraph::Node& op, std::shared_ptr<cldnn::primi
            prim->origin_op_type_name = prim->type_string();
    }

-    if (this->m_config.useProfiling && should_profile) {
+    if (this->m_config.get_property(ov::enable_profiling) && should_profile) {
        profiling_ids.push_back(prim_id);
        init_profile_info(*prim);
    }
--- a/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp
+++ b/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp
@ -0,0 +1,68 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include "intel_gpu/plugin/remote_allocators.hpp"
+#include "intel_gpu/plugin/remote_blob.hpp"
+
+using namespace InferenceEngine;
+using namespace InferenceEngine::gpu;
+using namespace InferenceEngine::details;
+
+namespace ov {
+namespace intel_gpu {
+
+void RemoteAllocator::regLockedBlob(void* handle, const RemoteBlobImpl* blob) {
+    std::lock_guard<RemoteAllocator> locker(*this);
+    auto iter = m_lockedBlobs.find(handle);
+    if (iter == m_lockedBlobs.end()) {
+        m_lockedBlobs.emplace(handle, blob);
+    }
+}
+
+void RemoteAllocator::unlock(void* handle) noexcept {
+    std::lock_guard<RemoteAllocator> locker(*this);
+    auto iter = m_lockedBlobs.find(handle);
+    if (iter != m_lockedBlobs.end()) {
+        iter->second->unlock();
+        m_lockedBlobs.erase(iter);
+    }
+}
+
+void* USMHostAllocator::lock(void* handle, InferenceEngine::LockOp) noexcept {
+    if (!_usm_host_blob)
+        return nullptr;
+    try {
+        return _usm_host_blob->get();
+    } catch (...) {
+        return nullptr;
+    }
+};
+
+void USMHostAllocator::unlock(void* handle) noexcept {}
+
+void* USMHostAllocator::alloc(size_t size) noexcept {
+    try {
+        auto td = TensorDesc(Precision::U8, SizeVector{size}, InferenceEngine::Layout::C);
+        ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER)}};
+        _usm_host_blob = std::dynamic_pointer_cast<USMBlob>(_context->CreateBlob(td, params));
+        _usm_host_blob->allocate();
+        if (!getBlobImpl(_usm_host_blob.get())->is_allocated()) {
+            return nullptr;
+        }
+        return _usm_host_blob->get();
+    } catch (...) {
+        return nullptr;
+    }
+}
+
+bool USMHostAllocator::free(void* handle) noexcept {
+    try {
+        _usm_host_blob = nullptr;
+    } catch(...) { }
+    return true;
+}
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/src/plugin/remote_blob.cpp
+++ b/src/plugins/intel_gpu/src/plugin/remote_blob.cpp
@ -0,0 +1,285 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include "intel_gpu/plugin/remote_context.hpp"
+#include "intel_gpu/plugin/remote_blob.hpp"
+#include "intel_gpu/plugin/remote_allocators.hpp"
+#include "intel_gpu/plugin/plugin.hpp"
+#include "intel_gpu/runtime/itt.hpp"
+#include "intel_gpu/runtime/device_query.hpp"
+
+using namespace InferenceEngine;
+using namespace InferenceEngine::gpu;
+using namespace InferenceEngine::details;
+
+namespace ov {
+namespace intel_gpu {
+
+RemoteBlobImpl::RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
+                               cldnn::stream& stream,
+                               const cldnn::layout& layout,
+                               cldnn::shared_handle mem,
+                               cldnn::shared_surface surf,
+                               uint32_t plane,
+                               BlobType mem_type)
+    : m_allocator(std::make_shared<RemoteAllocator>())
+    , m_context(context)
+    , m_stream(stream)
+    , m_mem(mem)
+    , m_surf(surf)
+    , m_plane(plane)
+    , m_layout(layout)
+    , m_mem_type(mem_type)
+    , m_memory_object(nullptr)
+    , lockedCounter(0)
+    , lockedHolder(nullptr)
+    , _handle(nullptr) {
+    if (supports_caching()) {
+        m_hash = cldnn::hash_combine(0, m_mem);
+        m_hash = cldnn::hash_combine(m_hash, m_surf);
+        m_hash = cldnn::hash_combine(m_hash, plane);
+        m_hash = cldnn::hash_combine(m_hash, static_cast<std::underlying_type<cldnn::format::type>::type>(layout.format));
+        m_hash = cldnn::hash_combine(m_hash, static_cast<std::underlying_type<cldnn::data_types>::type>(layout.data_type));
+        for (auto& d : layout.get_shape()) {
+            m_hash = cldnn::hash_combine(m_hash, d);
+        }
+    }
+}
+
+AnyMap RemoteBlobImpl::getParams() const {
+    OPENVINO_ASSERT(is_allocated(), "[GPU] Can't get RemoteBlob params as blob wasn't allocated properly");
+    auto params = m_memory_object->get_internal_params();
+
+    switch (m_mem_type) {
+    case BlobType::BT_BUF_INTERNAL:
+    case BlobType::BT_BUF_SHARED:
+        return{
+            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) },
+            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
+            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
+        };
+    case BlobType::BT_USM_SHARED:
+        return{
+            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_USER_BUFFER) },
+            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
+            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
+        };
+    case BlobType::BT_USM_HOST_INTERNAL:
+        return{
+            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER) },
+            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
+            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
+        };
+    case BlobType::BT_USM_DEVICE_INTERNAL:
+        return{
+            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_DEVICE_BUFFER) },
+            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
+            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
+        };
+#ifdef _WIN32
+    case BlobType::BT_DX_BUF_SHARED:
+        return{
+            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) },
+            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
+            { GPU_PARAM_KEY(VA_DEVICE),   params.user_device },
+            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem },
+            { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface }
+        };
+#endif
+    case BlobType::BT_IMG_SHARED:
+        return{
+            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) },
+            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
+            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
+        };
+    case BlobType::BT_SURF_SHARED:
+        return{
+            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
+            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
+            { GPU_PARAM_KEY(VA_DEVICE),   params.user_device },
+            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem },
+            { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface },
+            { GPU_PARAM_KEY(VA_PLANE),  params.plane }
+        };
+    default:
+        IE_THROW() << "Unsupported shared object type " << static_cast<int>(m_mem_type);
+    }
+}
+
+bool RemoteBlobImpl::deallocate() noexcept {
+    m_memory_object.reset();
+    return m_memory_object == nullptr;
+}
+
+bool RemoteBlobImpl::is_allocated() const noexcept {
+    return m_memory_object != nullptr;
+}
+
+bool RemoteBlobImpl::is_locked() const noexcept {
+    return lockedHolder != nullptr;
+}
+
+void RemoteBlobImpl::allocate() {
+    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteBlobImpl::Allocate");
+
+    auto context = get_context_impl(m_context);
+    auto enable_caching = supports_caching();
+
+    if (enable_caching) {
+        m_memory_object = context->try_get_cached_memory(m_hash);
+        if (m_memory_object)
+            return;
+    }
+
+
+    auto& engine = context->get_engine();
+
+    switch (m_mem_type) {
+    case BlobType::BT_BUF_INTERNAL: {
+        m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem);
+        break;
+    }
+    case BlobType::BT_USM_HOST_INTERNAL: {
+        m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host);
+        break;
+    }
+    case BlobType::BT_USM_DEVICE_INTERNAL: {
+        m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_device);
+        break;
+    }
+    case BlobType::BT_BUF_SHARED: {
+        m_memory_object = engine.share_buffer(m_layout, m_mem);
+        break;
+    }
+    case BlobType::BT_USM_SHARED: {
+        m_memory_object = engine.share_usm(m_layout, m_mem);
+        break;
+    }
+#ifdef _WIN32
+    case BlobType::BT_SURF_SHARED: {
+        m_memory_object = engine.share_surface(m_layout, m_mem, m_plane);
+        break;
+    }
+    case BlobType::BT_DX_BUF_SHARED: {
+        m_memory_object = engine.share_dx_buffer(m_layout, m_mem);
+        break;
+    }
+#else
+    case BlobType::BT_SURF_SHARED: {
+        m_memory_object = engine.share_surface(m_layout, m_surf, m_plane);
+        break;
+    }
+#endif
+    case BlobType::BT_IMG_SHARED: {
+        m_memory_object = engine.share_image(m_layout, m_mem);
+        break;
+    }
+    default:
+        m_memory_object.reset();
+    }
+
+    if (enable_caching)
+        context->add_to_cache(m_hash, m_memory_object);
+}
+
+const std::shared_ptr<IAllocator>& RemoteBlobImpl::getAllocator() const noexcept {
+    return m_allocator;
+};
+
+std::string RemoteBlobImpl::getDeviceName() const noexcept {
+    return m_context->getDeviceName();
+};
+
+std::shared_ptr<InferenceEngine::RemoteContext> RemoteBlobImpl::getContext() const noexcept {
+    return m_context;
+}
+
+void RemoteBlobImpl::reinterpret(cldnn::layout new_layout) {
+    OPENVINO_ASSERT(m_layout.bytes_count() >= new_layout.bytes_count(),
+                    "[GPU] Can't reinterpret blob to the size bigger than allocated memory buffer");
+    m_layout = new_layout;
+    auto engine = m_memory_object->get_engine();
+    m_memory_object = engine->reinterpret_buffer(*m_memory_object, new_layout);
+}
+
+void RemoteBlobImpl::lock() const {
+    if (!is_allocated()) {
+        IE_THROW(NotAllocated) << "[GPU] Remote blob can't be locked as it's not allocated";
+    }
+
+    std::lock_guard<std::mutex> locker(lockedMutex);
+    if (lockedCounter == 0) {
+        lockedHolder = std::unique_ptr<cldnn::mem_lock<uint8_t>>(new cldnn::mem_lock<uint8_t>(m_memory_object, m_stream));
+        auto ptr = lockedHolder->data();
+        _handle = reinterpret_cast<void*>(ptr);
+        auto casted_allocator = std::dynamic_pointer_cast<RemoteAllocator>(m_allocator);
+        OPENVINO_ASSERT(casted_allocator, "[GPU] Invalid remote allocator type");
+        casted_allocator->regLockedBlob(_handle, this);
+    }
+    lockedCounter++;
+}
+
+void RemoteBlobImpl::unlock() const {
+    std::lock_guard<std::mutex> locker(lockedMutex);
+    lockedCounter--;
+    if (lockedCounter == 0)
+        lockedHolder.reset();
+}
+
+LockedMemory<void> RemoteBlobImpl::buffer() noexcept {
+    try {
+        lock();
+        return LockedMemory<void>(m_allocator.get(), _handle, 0);
+    } catch (...) {
+        return LockedMemory<void>(nullptr, nullptr, 0);
+    }
+}
+
+LockedMemory<const void> RemoteBlobImpl::cbuffer() const noexcept {
+    try {
+        lock();
+        return LockedMemory<const void>(m_allocator.get(), _handle, 0);
+    } catch (...) {
+        return LockedMemory<const void>(nullptr, nullptr, 0);
+    }
+}
+
+LockedMemory<void> RemoteBlobImpl::rwmap() noexcept {
+    try {
+        lock();
+        return LockedMemory<void>(m_allocator.get(), _handle, 0);
+    } catch (...) {
+        return LockedMemory<void>(nullptr, nullptr, 0);
+    }
+}
+
+LockedMemory<const void> RemoteBlobImpl::rmap() const noexcept {
+    try {
+        lock();
+        return LockedMemory<const void>(m_allocator.get(), _handle, 0);
+    } catch (...) {
+        return LockedMemory<const void>(nullptr, nullptr, 0);
+    }
+}
+
+LockedMemory<void> RemoteBlobImpl::wmap() noexcept {
+    try {
+        lock();
+        return LockedMemory<void>(m_allocator.get(), _handle, 0);
+    } catch (...) {
+        return LockedMemory<void>(nullptr, nullptr, 0);
+    }
+}
+
+bool RemoteBlobImpl::supports_caching() const {
+    return m_mem_type == BlobType::BT_BUF_SHARED ||
+           m_mem_type == BlobType::BT_USM_SHARED ||
+           m_mem_type == BlobType::BT_IMG_SHARED ||
+           m_mem_type == BlobType::BT_SURF_SHARED ||
+           m_mem_type == BlobType::BT_DX_BUF_SHARED;
+}
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/src/plugin/remote_context.cpp
+++ b/src/plugins/intel_gpu/src/plugin/remote_context.cpp
@ -4,7 +4,8 @@

 #include <memory>
 #include "intel_gpu/plugin/remote_context.hpp"
-#include "intel_gpu/plugin/plugin.hpp"
+#include "intel_gpu/plugin/remote_blob.hpp"
+#include "intel_gpu/plugin/remote_allocators.hpp"
 #include "intel_gpu/runtime/itt.hpp"
 #include "intel_gpu/runtime/device_query.hpp"

@ -14,285 +15,28 @@ using namespace InferenceEngine::details;

 namespace ov {
 namespace intel_gpu {
-RemoteAllocator RemoteBlobImpl::m_allocator;

-RemoteBlobImpl::RemoteBlobImpl(ClContext::Ptr context,
-    cldnn::stream& stream,
-    const cldnn::layout& layout,
-    cldnn::shared_handle mem,
-    cldnn::shared_surface surf,
-    uint32_t plane,
-    BlobType mem_type)
-    : m_context(context)
-    , m_stream(stream)
-    , m_mem(mem)
-    , m_surf(surf)
-    , m_plane(plane)
-    , m_layout(layout)
-    , m_mem_type(mem_type)
-    , m_memObject(nullptr)
-    , lockedCounter(0)
-    , lockedHolder(nullptr)
-    , _handle(nullptr)
-    , _allocator(nullptr) {
-    auto _impl = getContextImpl(m_context.lock());
-    m_engine = _impl->GetEngine();
-
-    // Verify shared buffer/usm memory and ensure that requested byte size is not greater than allocated one
-    switch (m_mem_type) {
-    case BlobType::BT_BUF_SHARED: {
-        m_engine->share_buffer(m_layout, m_mem);
-        break;
-    }
-    case BlobType::BT_USM_SHARED: {
-        m_engine->share_usm(m_layout, m_mem);
-        break;
-    }
-    default: break;
-    }
-}
-
-AnyMap RemoteBlobImpl::getParams() const {
-    assert(m_memObject != nullptr);
-    auto params = m_memObject->get_internal_params();
-
-    switch (m_mem_type) {
-    case BT_BUF_INTERNAL:
-    case BT_BUF_SHARED:
-        return{
-            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) },
-            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
-            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
-        };
-    case BT_USM_SHARED:
-        return{
-            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_USER_BUFFER) },
-            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
-            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
-        };
-    case BT_USM_HOST_INTERNAL:
-        return{
-            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER) },
-            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
-            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
-        };
-    case BT_USM_DEVICE_INTERNAL:
-        return{
-            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_DEVICE_BUFFER) },
-            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
-            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
-        };
-#ifdef _WIN32
-    case BT_DX_BUF_SHARED:
-        return{
-            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) },
-            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
-            { GPU_PARAM_KEY(VA_DEVICE),   params.user_device },
-            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem },
-            { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface }
-        };
-#endif
-    case BT_IMG_SHARED:
-        return{
-            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) },
-            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
-            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem }
-        };
-    case BT_SURF_SHARED:
-        return{
-            { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
-            { GPU_PARAM_KEY(OCL_CONTEXT), params.context },
-            { GPU_PARAM_KEY(VA_DEVICE),   params.user_device },
-            { GPU_PARAM_KEY(MEM_HANDLE),  params.mem },
-            { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface },
-            { GPU_PARAM_KEY(VA_PLANE),  params.plane }
-        };
-    default:
-        IE_THROW() << "Unsupported shared object type " << m_mem_type;
-    }
-}
-
-bool RemoteBlobImpl::deallocate() noexcept {
-    m_memObject.reset();
-    return m_memObject == nullptr;
-}
-
-bool RemoteBlobImpl::is_allocated() const noexcept {
-    return m_memObject != nullptr;
-}
-
-bool RemoteBlobImpl::is_locked() const noexcept {
-    return lockedHolder != nullptr;
-}
-
-void RemoteBlobImpl::allocate() {
-    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteBlobImpl::Allocate");
-    assert(m_memObject == nullptr);
-
-    auto _impl = getContextImpl(m_context.lock());
-    std::lock_guard<ExecutionContextImpl> locker(*_impl);
-
-    switch (m_mem_type) {
-    case BlobType::BT_BUF_INTERNAL: {
-        m_memObject = m_engine->allocate_memory(m_layout, cldnn::allocation_type::cl_mem);
-        break;
-    }
-    case BlobType::BT_USM_HOST_INTERNAL: {
-        m_memObject = m_engine->allocate_memory(m_layout, cldnn::allocation_type::usm_host);
-        break;
-    }
-    case BlobType::BT_USM_DEVICE_INTERNAL: {
-        m_memObject = m_engine->allocate_memory(m_layout, cldnn::allocation_type::usm_device);
-        break;
-    }
-    case BlobType::BT_BUF_SHARED: {
-        m_memObject = m_engine->share_buffer(m_layout, m_mem);
-        break;
-    }
-    case BlobType::BT_USM_SHARED: {
-        m_memObject = m_engine->share_usm(m_layout, m_mem);
-        break;
-    }
-#ifdef _WIN32
-    case BlobType::BT_SURF_SHARED: {
-        m_memObject = m_engine->share_surface(m_layout, m_mem, m_plane);
-        break;
-    }
-    case BlobType::BT_DX_BUF_SHARED: {
-        m_memObject = m_engine->share_dx_buffer(m_layout, m_mem);
-        break;
-    }
-#else
-    case BlobType::BT_SURF_SHARED: {
-        m_memObject = m_engine->share_surface(m_layout, m_surf, m_plane);
-        break;
-    }
-#endif
-    case BlobType::BT_IMG_SHARED: {
-        m_memObject = m_engine->share_image(m_layout, m_mem);
-        break;
-    }
-    default:
-        m_memObject.reset();
-    }
-}
-
-const std::shared_ptr<IAllocator>& RemoteBlobImpl::getAllocator() const noexcept {
-    if (!_allocator) {
-        _allocator = std::shared_ptr<IAllocator>(&m_allocator, [] (IAllocator*) {});
-    }
-    return _allocator;
-};
-
-std::string RemoteBlobImpl::getDeviceName() const noexcept {
-    return getContextImpl(m_context.lock())->getDeviceName();
-};
-
-std::shared_ptr<InferenceEngine::RemoteContext> RemoteBlobImpl::getContext() const noexcept {
-    return m_context.lock();
-}
-
-void RemoteBlobImpl::reinterpret(cldnn::layout new_layout) {
-    OPENVINO_ASSERT(m_layout.bytes_count() >= new_layout.bytes_count(),
-                    "[GPU] Can't reinterpret blob to the size bigger than allocated memory buffer");
-    m_layout = new_layout;
-    auto engine = m_memObject->get_engine();
-    m_memObject = engine->reinterpret_buffer(*m_memObject, new_layout);
-}
-
-void RemoteBlobImpl::lock() const {
-    if (!is_allocated()) {
-        IE_THROW(NotAllocated) << "[GPU] Remote blob can't be locked as it's not allocated";
-    }
-
-    std::lock_guard<std::mutex> locker(lockedMutex);
-    if (lockedCounter == 0) {
-        lockedHolder = std::unique_ptr<cldnn::mem_lock<uint8_t>>(new cldnn::mem_lock<uint8_t>(m_memObject, m_stream));
-        auto ptr = lockedHolder->data();
-        _handle = reinterpret_cast<void*>(ptr);
-        m_allocator.regLockedBlob(_handle, this);
-    }
-    lockedCounter++;
-}
-
-void RemoteBlobImpl::unlock() const {
-    std::lock_guard<std::mutex> locker(lockedMutex);
-    lockedCounter--;
-    if (lockedCounter == 0)
-        lockedHolder.reset();
-}
-
-LockedMemory<void> RemoteBlobImpl::buffer() noexcept {
-    try {
-        lock();
-        return LockedMemory<void>(reinterpret_cast<IAllocator*>(&m_allocator), _handle, 0);
-    } catch (...) {
-        return LockedMemory<void>(nullptr, nullptr, 0);
-    }
-}
-
-LockedMemory<const void> RemoteBlobImpl::cbuffer() const noexcept {
-    try {
-        lock();
-        return LockedMemory<const void>(reinterpret_cast<IAllocator*>(&m_allocator), _handle, 0);
-    } catch (...) {
-        return LockedMemory<const void>(nullptr, nullptr, 0);
-    }
-}
-
-LockedMemory<void> RemoteBlobImpl::rwmap() noexcept {
-    try {
-        lock();
-        return LockedMemory<void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
-    } catch (...) {
-        return LockedMemory<void>(nullptr, nullptr, 0);
-    }
-}
-
-LockedMemory<const void> RemoteBlobImpl::rmap() const noexcept {
-    try {
-        lock();
-        return LockedMemory<const void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
-    } catch (...) {
-        return LockedMemory<const void>(nullptr, nullptr, 0);
-    }
-}
-
-LockedMemory<void> RemoteBlobImpl::wmap() noexcept {
-    try {
-        lock();
-        return LockedMemory<void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
-    } catch (...) {
-        return LockedMemory<void>(nullptr, nullptr, 0);
-    }
-}
-
-void RemoteAllocator::regLockedBlob(void* handle, const RemoteBlobImpl* blob) {
-    std::lock_guard<RemoteAllocator> locker(*this);
-    auto iter = m_lockedBlobs.find(handle);
-    if (iter == m_lockedBlobs.end()) {
-        m_lockedBlobs.emplace(handle, blob);
-    }
-}
-
-void RemoteAllocator::unlock(void* handle) noexcept {
-    std::lock_guard<RemoteAllocator> locker(*this);
-    auto iter = m_lockedBlobs.find(handle);
-    if (iter != m_lockedBlobs.end()) {
-        iter->second->unlock();
-        m_lockedBlobs.erase(iter);
-    }
-}
-
-ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptr<IInferencePlugin> plugin,
-    const AnyMap& params,
-    const Config& config)
+RemoteContextImpl::RemoteContextImpl(std::string device_name, std::vector<cldnn::device::ptr> devices)
        : m_va_display(nullptr)
        , m_external_queue(nullptr)
-        , m_config(config)
        , m_type(ContextType::OCL)
-        , m_plugin(plugin) {
-    m_lock.clear(std::memory_order_relaxed);
+        , m_device_name(device_name)
+        , m_memory_cache(cache_capacity) {
+    OPENVINO_ASSERT(devices.size() == 1, "[GPU] Currently context can be created for single device only");
+    // TODO: Parameterize this based on plugin config and compilation options
+    auto engine_type = cldnn::engine_types::ocl;
+    auto runtime_type = cldnn::runtime_types::ocl;
+
+    m_engine = cldnn::engine::create(engine_type, runtime_type, devices.front());
+
+    GPU_DEBUG_LOG << "Initialize RemoteContext for " << m_device_name << " (" << m_engine->get_device_info().dev_name << ")" << std::endl;
+}
+
+RemoteContextImpl::RemoteContextImpl(const std::vector<RemoteContextImpl::Ptr>& known_contexts, const AnyMap& params)
+        : m_va_display(nullptr)
+        , m_external_queue(nullptr)
+        , m_type(ContextType::OCL)
+        , m_memory_cache(cache_capacity) {
    gpu_handle_param _context_id = nullptr;
    gpu_handle_param _va_device = nullptr;
    int ctx_device_id = 0;
@ -300,18 +44,18 @@ ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptr<IInferencePlugi

    if (params.size()) {
        // parameter map is non-empty
-        std::string contextTypeStr = _StrFromParams(params, GPU_PARAM_KEY(CONTEXT_TYPE));
+        std::string contextTypeStr = extract_object<std::string>(params, GPU_PARAM_KEY(CONTEXT_TYPE));

        if (GPU_PARAM_VALUE(OCL) == contextTypeStr) {
-            _context_id = _ObjFromParamSimple<gpu_handle_param>(params, GPU_PARAM_KEY(OCL_CONTEXT));
+            _context_id = extract_object<gpu_handle_param>(params, GPU_PARAM_KEY(OCL_CONTEXT));

            if (params.find(GPU_PARAM_KEY(OCL_QUEUE)) != params.end())
-                m_external_queue = _ObjFromParamSimple<gpu_handle_param>(params, GPU_PARAM_KEY(OCL_QUEUE));
+                m_external_queue = extract_object<gpu_handle_param>(params, GPU_PARAM_KEY(OCL_QUEUE));

            if (params.find(GPU_PARAM_KEY(OCL_CONTEXT_DEVICE_ID)) != params.end())
-                ctx_device_id = _ObjFromParamSimple<int>(params, GPU_PARAM_KEY(OCL_CONTEXT_DEVICE_ID));
+                ctx_device_id = extract_object<int>(params, GPU_PARAM_KEY(OCL_CONTEXT_DEVICE_ID));
        } else if (GPU_PARAM_VALUE(VA_SHARED) == contextTypeStr) {
-            m_va_display = _va_device = _ObjFromParamSimple<gpu_handle_param>(params, GPU_PARAM_KEY(VA_DEVICE));
+            m_va_display = _va_device = extract_object<gpu_handle_param>(params, GPU_PARAM_KEY(VA_DEVICE));
            m_type = ContextType::DEV_SHARED;
        } else {
            IE_THROW() << "Invalid execution context type" << contextTypeStr;
@ -329,29 +73,15 @@ ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptr<IInferencePlugi
    cldnn::device_query device_query(engine_type, runtime_type, _context_id, _va_device, ctx_device_id, target_tile_id);
    auto device_map = device_query.get_available_devices();

-    auto iter = device_map.find(std::to_string(cldnn::device_query::device_id));
-    if (iter == device_map.end())
-        iter = device_map.find(m_config.device_id);
-    if (iter == device_map.end())
-        iter = device_map.begin();
-    auto& dev = iter->second;
+    OPENVINO_ASSERT(device_map.size() == 1, "[GPU] Only one device expected in case of context sharing");

-    auto engine_params = Plugin::GetParams(m_config, dev, m_external_queue);
-    m_engine = cldnn::engine::create(engine_params.engine_type,
-                                     engine_params.runtime_type, dev,
-                                     cldnn::engine_configuration(m_config.useProfiling,
-                                         engine_params.queue_type,
-                                         std::string(),
-                                         m_config.queuePriority,
-                                         m_config.queueThrottle,
-                                         true,
-                                         engine_params.use_unified_shared_memory,
-                                         m_config.kernels_cache_dir,
-                                         m_config.throughput_streams),
-                                     engine_params.task_executor);
+    m_engine = cldnn::engine::create(engine_type, runtime_type, device_map.begin()->second);
+    m_device_name = get_device_name(known_contexts, m_engine->get_device());
+
+    GPU_DEBUG_LOG << "Initialize RemoteContext for " << m_device_name << " (" << m_engine->get_device_info().dev_name << ")" << std::endl;
 }

-AnyMap ExecutionContextImpl::getParams() const {
+AnyMap RemoteContextImpl::get_params() const {
    AnyMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } };

    switch (m_type) {
@ -370,26 +100,191 @@ AnyMap ExecutionContextImpl::getParams() const {
    return ret;
 }

-std::string ExecutionContextImpl::getDeviceName() const noexcept {
-    auto devName = m_plugin.lock()->GetName();
-
-    auto engine_type = cldnn::engine_types::ocl;
-    auto runtime_type = cldnn::runtime_types::ocl;
-    try {
-        // Use actual runtime and engine types
-        cldnn::device_query device_query(engine_type, runtime_type);
-        auto all_devices = device_query.get_available_devices();
-        auto current_device = m_engine->get_device();
-
-        for (auto& kv : all_devices) {
-            if (current_device->is_same(kv.second))
-                return devName + "." + kv.first;
+// For external contexts we try to match underlying handles with default contexts created by plugin to find device name
+std::string RemoteContextImpl::get_device_name(const std::vector<RemoteContextImpl::Ptr>& known_contexts,
+                                               const cldnn::device::ptr current_device) {
+    std::string device_name = "GPU";
+    for (auto& c : known_contexts) {
+        if (c->get_engine().get_device()->is_same(current_device)) {
+            device_name = c->get_device_name();
+            break;
        }
-    } catch (...) { }
+    }
+    return device_name;
+}

-    if (!m_config.device_id.empty())
-        devName += "." + m_config.device_id;
-    return devName;
+std::string RemoteContextImpl::get_device_name() const noexcept {
+    return m_device_name;
+}
+
+cldnn::memory::ptr RemoteContextImpl::try_get_cached_memory(size_t hash) {
+    std::lock_guard<std::mutex> lock(m_cache_mutex);
+    if (m_memory_cache.has(hash))
+        return m_memory_cache.get(hash);
+
+    return nullptr;
+}
+
+void RemoteContextImpl::add_to_cache(size_t hash, cldnn::memory::ptr memory) {
+    std::lock_guard<std::mutex> lock(m_cache_mutex);
+    m_memory_cache.add(hash, memory);
+}
+
+InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::reuse_surface(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                                  const InferenceEngine::TensorDesc& desc,
+                                                                  const InferenceEngine::ParamMap& params) {
+    using namespace InferenceEngine;
+    auto& stream = m_engine->get_service_stream();
+    uint32_t plane = extract_object<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
+#ifdef _WIN32
+    cldnn::shared_handle surf = extract_object<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
+#else
+    cldnn::shared_surface surf = extract_object<cldnn::shared_surface>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
+#endif
+
+    cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()),
+                         ImageFormatFromLayout(desc.getLayout()),
+                         tensor_from_dims(desc.getDims()));
+
+#ifdef _WIN32
+    auto blob = std::make_shared<RemoteD3DSurface>(public_context, stream,
+                                                   desc, layout, surf, 0, plane,
+                                                   BlobType::BT_SURF_SHARED);
+#else
+    auto blob = std::make_shared<RemoteVASurface>(public_context, stream,
+                                                  desc, layout, nullptr, surf, plane,
+                                                  BlobType::BT_SURF_SHARED);
+#endif
+
+    return blob;
+}
+
+InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::reuse_memory(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                                 const InferenceEngine::TensorDesc& desc,
+                                                                 cldnn::shared_handle mem,
+                                                                 BlobType blob_type) {
+    auto& stream = m_engine->get_service_stream();
+
+    cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()),
+                         FormatFromLayout(desc.getLayout()),
+                         tensor_from_dims(desc.getDims()));
+
+    switch (blob_type) {
+    case BlobType::BT_BUF_SHARED: {
+        return std::make_shared<RemoteCLbuffer>(public_context, stream, desc, layout, mem, 0, 0, blob_type);
+    }
+    case BlobType::BT_USM_SHARED: {
+        return std::make_shared<RemoteUSMbuffer>(public_context, stream, desc, layout, mem, 0, 0, blob_type);
+    }
+    case BlobType::BT_IMG_SHARED: {
+        layout.format = ImageFormatFromLayout(desc.getLayout());
+        return std::make_shared<RemoteCLImage2D>(public_context, stream, desc, layout, mem, 0, 0, blob_type);
+    }
+#ifdef _WIN32
+    case BlobType::BT_DX_BUF_SHARED: {
+        return std::make_shared<RemoteD3DBuffer>(public_context, stream, desc, layout, mem, 0, 0, blob_type);
+    }
+#endif
+    default:
+        break;
+    }
+
+    return nullptr;
+}
+
+InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_buffer(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                                  const InferenceEngine::TensorDesc& desc) {
+    cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()),
+                         FormatFromLayout(desc.getLayout()),
+                         tensor_from_dims(desc.getDims()));
+    auto& stream = m_engine->get_service_stream();
+    return std::make_shared<RemoteCLbuffer>(public_context,
+                                            stream,
+                                            desc,
+                                            layout,
+                                            nullptr, 0, 0,
+                                            BlobType::BT_BUF_INTERNAL);
+}
+
+InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_usm(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                               const InferenceEngine::TensorDesc& desc,
+                                                               BlobType alloc_type) {
+    cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()),
+                         FormatFromLayout(desc.getLayout()),
+                         tensor_from_dims(desc.getDims()));
+    auto& stream = m_engine->get_service_stream();
+
+    return std::make_shared<RemoteUSMbuffer>(public_context,
+                                             stream,
+                                             desc,
+                                             layout,
+                                             nullptr, 0, 0,
+                                             alloc_type);
+}
+
+void RemoteContextImpl::check_if_shared() {
+    OPENVINO_ASSERT(m_type == RemoteContextImpl::ContextType::DEV_SHARED, "[GPU] Shared context is required to to share this type of memory");
+}
+
+InferenceEngine::MemoryBlob::Ptr RemoteContextImpl::create_host_blob(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                                     const InferenceEngine::TensorDesc& desc) {
+    if (m_engine->use_unified_shared_memory())
+        return std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(make_blob_with_precision(desc, std::make_shared<USMHostAllocator>(public_context)));
+    else
+        return std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(make_blob_with_precision(desc));
+}
+
+InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_blob(InferenceEngine::gpu::ClContext::Ptr public_context,
+                                                                const InferenceEngine::TensorDesc& desc,
+                                                                const InferenceEngine::ParamMap& params) {
+    using namespace InferenceEngine;
+    if (params.empty()) {
+        // user wants plugin to allocate blob by itself and return handle
+        return create_buffer(public_context, desc);
+    } else {
+        // user will supply shared object handle
+        std::string mem_type = extract_object<std::string>(params, GPU_PARAM_KEY(SHARED_MEM_TYPE));
+
+        bool is_usm = mem_type == GPU_PARAM_VALUE(USM_HOST_BUFFER) ||
+                      mem_type == GPU_PARAM_VALUE(USM_DEVICE_BUFFER) ||
+                      mem_type == GPU_PARAM_VALUE(USM_USER_BUFFER);
+
+        OPENVINO_ASSERT(!is_usm || m_engine->use_unified_shared_memory(),
+                        "[GPU] Can't create USM tensor as USM is not supported (or manually disabled) on current device");
+
+        if (GPU_PARAM_VALUE(VA_SURFACE) == mem_type) {
+            check_if_shared();
+            return reuse_surface(public_context, desc, params);
+        } else if (GPU_PARAM_VALUE(USM_HOST_BUFFER) == mem_type) {
+            return create_usm(public_context, desc, BlobType::BT_USM_HOST_INTERNAL);
+        } else if (GPU_PARAM_VALUE(USM_DEVICE_BUFFER) == mem_type) {
+            return create_usm(public_context, desc, BlobType::BT_USM_DEVICE_INTERNAL);
+        } else {
+            BlobType blob_type;
+            cldnn::shared_handle mem = nullptr;
+
+            if (GPU_PARAM_VALUE(OCL_BUFFER) == mem_type) {
+                blob_type = BlobType::BT_BUF_SHARED;
+                mem = extract_object<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
+            } else if (GPU_PARAM_VALUE(USM_USER_BUFFER) == mem_type) {
+                blob_type = BlobType::BT_USM_SHARED;
+                mem = extract_object<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
+            } else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == mem_type) {
+                blob_type = BlobType::BT_IMG_SHARED;
+                mem = extract_object<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
+#ifdef _WIN32
+            } else if (GPU_PARAM_VALUE(DX_BUFFER) == mem_type) {
+                blob_type = BlobType::BT_DX_BUF_SHARED;
+                mem = extract_object<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
+                check_if_shared();
+#endif
+            } else {
+                OPENVINO_ASSERT(false, "[GPU] Unsupported shared object type ", mem_type);
+            }
+
+            return reuse_memory(public_context, desc, mem, blob_type);
+        }
+    }
 }

 }  // namespace intel_gpu
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@ -127,11 +127,12 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {

    const auto defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_support;
    bool enableInt8;
+    bool enable_loop_unrolling = config.get_property(ov::intel_gpu::enable_loop_unrolling);
    {
        ngraph::pass::Manager manager;
        manager.set_per_pass_validation(false);

-        enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func);
+        enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func);
        if (enableInt8) {
            manager.register_pass<ov::pass::MarkDequantizationSubgraph>(
                std::vector<ngraph::element::Type>{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 });
@ -144,7 +145,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
        manager.register_pass<ngraph::pass::WrapInterpolateIntoTransposes>();
        manager.register_pass<ngraph::pass::TransposeSinking>();

-        if (!config.enable_loop_unrolling) {
+        if (!enable_loop_unrolling) {
            manager.register_pass<ngraph::pass::BidirectionalLSTMSequenceDecomposition>();
            manager.register_pass<ngraph::pass::BidirectionalGRUSequenceDecomposition>();
            manager.register_pass<ngraph::pass::BidirectionalRNNSequenceDecomposition>();
@ -158,7 +159,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
        manager.register_pass<ngraph::pass::GRUCellDecomposition>();
        manager.register_pass<ngraph::pass::RNNCellDecomposition>();

-        if (config.enable_loop_unrolling) {
+        if (enable_loop_unrolling) {
            manager.register_pass<ngraph::pass::BidirectionalLSTMSequenceDecomposition>();
            manager.register_pass<ngraph::pass::BidirectionalGRUSequenceDecomposition>();
            manager.register_pass<ngraph::pass::BidirectionalRNNSequenceDecomposition>();
@ -205,14 +206,14 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
        };

        // Add conversion from FP data types to infer precision if it's specified
-        if (config.inference_precision != ov::element::undefined) {
-            auto inference_precision = config.inference_precision;
-            if (!fp_precision_supported(inference_precision))
-                inference_precision = fallback_precision;
+        auto infer_precision = config.get_property(ov::hint::inference_precision);
+        if (infer_precision != ov::element::undefined) {
+            if (!fp_precision_supported(infer_precision))
+                infer_precision = fallback_precision;

            for (auto& et : fp_element_types) {
-                if (et != inference_precision) {
-                    convert_precision_list.push_back({et, inference_precision});
+                if (et != infer_precision) {
+                    convert_precision_list.push_back({et, infer_precision});
                }
            }
        }
@ -330,7 +331,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                return isCellPrimitiveSupported(node);
            });

-        if (config.enable_loop_unrolling) {
+        if (enable_loop_unrolling) {
            pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator,
                    ngraph::pass::ConvertGRUSequenceToTensorIterator,
                    ngraph::pass::ConvertLSTMSequenceToTensorIterator>(
@ -550,10 +551,10 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
        manager.register_pass<ngraph::pass::UnrollTensorIterator>();
        auto pass_config = manager.get_pass_config();
        pass_config->set_callback<ngraph::pass::UnrollTensorIterator>(
-            [this](const std::shared_ptr<const ngraph::Node> &node) -> bool {
+            [enable_loop_unrolling](const std::shared_ptr<const ngraph::Node> &node) -> bool {
                auto sub_graph_op = std::dynamic_pointer_cast<const ngraph::op::util::SubGraphOp>(node);
                int64_t num_iter = sub_graph_op->get_num_iterations();
-                if (!config.enable_loop_unrolling)
+                if (!enable_loop_unrolling)
                    return num_iter != 1;
                return num_iter >= 16;
            });
--- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp
+++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp
@ -9,17 +9,16 @@ namespace intel_gpu {

 VariableState::VariableState(const std::string &name,
                             const std::vector<cldnn::network::VariableState::Ptr> &states,
-    std::shared_ptr<cldnn::engine> engine, int currentBatch) :
-    InferenceEngine::IVariableStateInternal {name},
-    currentBatch_ {currentBatch},
-    states_ {states},
-    desc_{
+                             cldnn::engine& engine, int currentBatch)
+    : InferenceEngine::IVariableStateInternal {name}
+    , currentBatch_ {currentBatch}
+    , states_ {states}
+    , desc_ {
        PrecisionFromDataType(states.front()->memory->get_layout().data_type),
        AggregateShape(states.front()->memory->get_layout()),
        InferenceEngine::Layout::ANY
-    },
-    engine_ {std::move(engine)} {
-}
+    }
+    , engine_(engine) { }

 void VariableState::Reset() {
    IterateOverStates([this](cldnn::network::VariableState &state) {
@ -31,11 +30,11 @@ void VariableState::SetState(const InferenceEngine::Blob::Ptr &newState) {
    auto lock = std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(newState)->rmap();
    auto data = lock.as<char*>();
    IterateOverStates([&data, this](cldnn::network::VariableState &state) {
-        state.memory->copy_from(engine_->get_program_stream(), data);
+        state.memory->copy_from(engine_.get_service_stream(), data);
        data += state.memory->get_layout().bytes_count();
        state.is_set = true;
    });
-    engine_->get_program_stream().enqueue_barrier();
+    engine_.get_service_stream().enqueue_barrier();
 }

 InferenceEngine::Blob::CPtr VariableState::GetState() const {
@ -44,7 +43,7 @@ InferenceEngine::Blob::CPtr VariableState::GetState() const {
    auto blobLock = std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(blob)->wmap();
    auto data = blobLock.as<char*>();
    IterateOverStates([&data, this](cldnn::network::VariableState &state) {
-        cldnn::mem_lock<char, cldnn::mem_lock_type::read> lock { state.memory, engine_->get_program_stream() };
+        cldnn::mem_lock<char, cldnn::mem_lock_type::read> lock { state.memory, engine_.get_service_stream() };
        std::copy(lock.begin(), lock.end(), data);
        data += state.memory->get_layout().bytes_count();
    });
--- a/src/plugins/intel_gpu/src/runtime/engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/engine.cpp
@ -56,10 +56,8 @@ static size_t get_cpu_ram_size() {

 namespace cldnn {

-engine::engine(const device::ptr device, const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor)
-: _task_executor(task_executor)
-, _device(device)
-, _configuration(configuration) {}
+engine::engine(const device::ptr device)
+    : _device(device) {}

 device_info engine::get_device_info() const {
    return _device->get_info();
@ -74,7 +72,7 @@ bool engine::use_unified_shared_memory() const {
    GPU_DEBUG_IF(debug_config->disable_usm) {
        return false;
    }
-    if (_device->get_mem_caps().supports_usm() && _configuration.use_unified_shared_memory) {
+    if (_device->get_mem_caps().supports_usm()) {
        return true;
    }
    return false;
@ -248,19 +246,11 @@ void engine::subtract_memory_used(uint64_t bytes, allocation_type type) {
    }
 }

-const InferenceEngine::ITaskExecutor::Ptr engine::get_task_executor() {
-    return _task_executor;
-}
-
-std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type,
-                                              runtime_types runtime_type,
-                                              const device::ptr device,
-                                              const engine_configuration& configuration,
-                                              const InferenceEngine::ITaskExecutor::Ptr task_executor) {
+std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type, runtime_types runtime_type, const device::ptr device) {
    std::shared_ptr<cldnn::engine> ret;
    switch (engine_type) {
    case engine_types::ocl:
-        ret = ocl::create_ocl_engine(device, runtime_type, configuration, task_executor);
+        ret = ocl::create_ocl_engine(device, runtime_type);
        break;
    default:
        throw std::runtime_error("Invalid engine type");
@ -270,17 +260,14 @@ std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type,
    return ret;
 }

-std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type,
-                                              runtime_types runtime_type,
-                                              const engine_configuration& configuration,
-                                              const InferenceEngine::ITaskExecutor::Ptr task_executor) {
+std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type, runtime_types runtime_type) {
    device_query query(engine_type, runtime_type);
    auto devices = query.get_available_devices();

    auto iter = devices.find(std::to_string(device_query::device_id));
    auto& device = iter != devices.end() ? iter->second : devices.begin()->second;

-    return engine::create(engine_type, runtime_type, device, configuration, task_executor);
+    return engine::create(engine_type, runtime_type, device);
 }

 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp
+++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@ -0,0 +1,196 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/runtime/execution_config.hpp"
+#include "intel_gpu/runtime/debug_configuration.hpp"
+
+#include <thread>
+
+namespace ov {
+namespace intel_gpu {
+
+ExecutionConfig::ExecutionConfig() {
+    set_default();
+}
+
+class InferencePrecisionValidator : public BaseValidator {
+public:
+    bool is_valid(const ov::Any& v) const override {
+        auto precision = v.as<ov::element::Type>();
+        return precision == ov::element::f16 || precision == ov::element::f32;
+    }
+};
+
+class PerformanceModeValidator : public BaseValidator {
+public:
+    bool is_valid(const ov::Any& v) const override {
+        auto mode = v.as<ov::hint::PerformanceMode>();
+        return mode == ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT ||
+               mode == ov::hint::PerformanceMode::THROUGHPUT ||
+               mode == ov::hint::PerformanceMode::LATENCY ||
+               mode == ov::hint::PerformanceMode::UNDEFINED;
+    }
+};
+
+void ExecutionConfig::set_default() {
+    register_property<PropertyVisibility::PUBLIC>(
+        std::make_tuple(ov::device::id, "0"),
+        std::make_tuple(ov::enable_profiling, false),
+        std::make_tuple(ov::cache_dir, ""),
+        std::make_tuple(ov::num_streams, 1),
+        std::make_tuple(ov::compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency()))),
+        std::make_tuple(ov::hint::inference_precision, ov::element::f16, InferencePrecisionValidator()),
+        std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM),
+        std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()),
+        std::make_tuple(ov::hint::num_requests, 0),
+
+        std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM),
+        std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM),
+        std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM),
+        std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true),
+
+        // Legacy API properties
+        std::make_tuple(ov::intel_gpu::enable_dynamic_batch, false),
+        std::make_tuple(ov::intel_gpu::exclusive_async_requests, false),
+        std::make_tuple(ov::intel_gpu::nv12_two_inputs, false),
+        std::make_tuple(ov::intel_gpu::config_file, ""),
+        std::make_tuple(ov::intel_gpu::enable_lp_transformations, false));
+
+    register_property<PropertyVisibility::INTERNAL>(
+        std::make_tuple(ov::intel_gpu::max_dynamic_batch, 1),
+        std::make_tuple(ov::intel_gpu::queue_type, QueueTypes::out_of_order),
+        std::make_tuple(ov::intel_gpu::optimize_data, false),
+        std::make_tuple(ov::intel_gpu::enable_memory_pool, true),
+        std::make_tuple(ov::intel_gpu::allow_static_input_reorder, false),
+        std::make_tuple(ov::intel_gpu::custom_outputs, std::vector<std::string>{}),
+        std::make_tuple(ov::intel_gpu::tuning_config, ov::intel_gpu::TuningConfig{}),
+        std::make_tuple(ov::intel_gpu::dump_graphs, ""),
+        std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}),
+        std::make_tuple(ov::intel_gpu::partial_build_program, false),
+        std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false));
+}
+
+void ExecutionConfig::register_property_impl(const std::pair<std::string, ov::Any>& property, PropertyVisibility visibility, BaseValidator::Ptr validator) {
+    property_validators[property.first] = validator;
+    supported_properties[property.first] = visibility;
+    internal_properties[property.first] = property.second;
+}
+
+void ExecutionConfig::set_property(const AnyMap& config) {
+    for (auto& kv : config) {
+        auto& name = kv.first;
+        auto& val = kv.second;
+        OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attepmpt to set property ", name, " (", val.as<std::string>(), ") which was not registered!\n");
+        OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name,  ": ", val.as<std::string>());
+        internal_properties[name] = val;
+    }
+}
+
+bool ExecutionConfig::is_supported(const std::string& name) const {
+    bool supported = supported_properties.find(name) != supported_properties.end();
+    bool has_validator = property_validators.find(name) != property_validators.end();
+
+    return supported && has_validator;
+}
+
+bool ExecutionConfig::is_set_by_user(const std::string& name) const {
+    return user_properties.find(name) != user_properties.end();
+}
+
+void ExecutionConfig::set_user_property(const AnyMap& config) {
+    for (auto& kv : config) {
+        auto& name = kv.first;
+        auto& val = kv.second;
+        bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC;
+        OPENVINO_ASSERT(supported, "[GPU] Attepmpt to set user property ", name, " (", val.as<std::string>(), ") which was not registered or internal!\n");
+        OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name,  ": `", val.as<std::string>(), "`");
+
+        user_properties[kv.first] = kv.second;
+    }
+}
+
+Any ExecutionConfig::get_property(const std::string& name) const {
+    if (user_properties.find(name) != user_properties.end()) {
+        return user_properties.at(name);
+    }
+
+    OPENVINO_ASSERT(internal_properties.find(name) != internal_properties.end(), "[GPU] Can't get internal property with name ", name);
+    return internal_properties.at(name);
+}
+
+void ExecutionConfig::apply_performance_hints(const cldnn::device_info& info) {
+    if (is_set_by_user(ov::hint::performance_mode)) {
+        const auto mode = get_property(ov::hint::performance_mode);
+        if (!is_set_by_user(ov::num_streams)) {
+            if (mode == ov::hint::PerformanceMode::LATENCY) {
+                set_property(ov::num_streams(1));
+            } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) {
+                set_property(ov::num_streams(ov::streams::AUTO));
+            }
+        }
+    }
+
+    if (get_property(ov::num_streams) == ov::streams::AUTO) {
+        int32_t n_streams = std::max<int32_t>(info.num_ccs, 2);
+        set_property(ov::num_streams(n_streams));
+    }
+}
+
+void ExecutionConfig::apply_priority_hints(const cldnn::device_info& info) {
+    if (is_set_by_user(ov::hint::model_priority)) {
+        const auto priority = get_property(ov::hint::model_priority);
+        if (!is_set_by_user(ov::intel_gpu::hint::queue_priority)) {
+            set_property(ov::intel_gpu::hint::queue_priority(priority));
+        }
+    }
+}
+
+void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
+    GPU_DEBUG_GET_INSTANCE(debug_config);
+    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
+        set_property(ov::intel_gpu::dump_graphs(debug_config->dump_graphs));
+    }
+
+    GPU_DEBUG_IF(debug_config->serialize_compile == 1) {
+        set_property(ov::compilation_num_threads(1));
+    }
+}
+
+void ExecutionConfig::apply_hints(const cldnn::device_info& info) {
+    apply_performance_hints(info);
+    apply_priority_hints(info);
+    apply_debug_options(info);
+}
+
+void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
+    // Copy internal properties before applying hints to ensure that
+    // a property set by hint won't be overriden by a value in user config.
+    // E.g num_streams=AUTO && hint=THROUGHPUT
+    // If we apply hints first and then copy all values from user config to internal one,
+    // then we'll get num_streams=AUTO in final config while some integer number is expected.
+    for (auto& kv : user_properties) {
+        internal_properties[kv.first] = kv.second;
+    }
+    apply_hints(info);
+    if (!is_set_by_user(ov::intel_gpu::enable_lp_transformations)) {
+        set_property(ov::intel_gpu::enable_lp_transformations(info.supports_imad || info.supports_immad));
+    }
+    user_properties.clear();
+}
+
+std::string ExecutionConfig::to_string() const {
+    std::stringstream s;
+    s << "internal properties:\n";
+    for (auto& kv : internal_properties) {
+        s << "\t" << kv.first << ": " << kv.second.as<std::string>() << std::endl;
+    }
+    s << "user properties:\n";
+    for (auto& kv : user_properties) {
+        s << "\t" << kv.first << ": " << kv.second.as<std::string>() << std::endl;
+    }
+    return s.str();
+}
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
@ -58,7 +58,7 @@ namespace cldnn {
 std::mutex kernels_cache::_mutex;

 std::string kernels_cache::get_cache_path() const {
-    auto path = _engine.configuration().kernels_cache_path;
+    auto path = _config.get_property(ov::cache_dir);
    if (path.empty()) {
        return {};
    }
@ -76,7 +76,7 @@ bool kernels_cache::is_cache_enabled() const {
        }
    }

-    return !_engine.configuration().kernels_cache_path.empty();
+    return !_config.get_property(ov::cache_dir).empty();
 }

 size_t kernels_cache::get_max_kernels_per_batch() const {
@ -156,8 +156,16 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
    }
 }

-kernels_cache::kernels_cache(engine& engine, uint32_t prog_id, const std::vector<std::string>& batch_header_str)
-                                : _engine(engine), _prog_id(prog_id), batch_header_str(std::move(batch_header_str)) { }
+kernels_cache::kernels_cache(engine& engine,
+                             const ExecutionConfig& config,
+                             uint32_t prog_id,
+                             InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
+                             const std::vector<std::string>& batch_header_str)
+    : _engine(engine)
+    , _task_executor(task_executor)
+    , _config(config)
+    , _prog_id(prog_id)
+    , batch_header_str(std::move(batch_header_str)) { }

 kernel_id kernels_cache::set_kernel_source(
    const std::shared_ptr<kernel_string>& kernel_string,
@ -188,8 +196,8 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program&

    auto& cl_build_engine = dynamic_cast<const ocl::ocl_engine&>(build_engine);

-    bool dump_sources = !_engine.configuration().sources_dumps_dir.empty() || batch.dump_custom_program;
-    std::string dump_sources_dir = _engine.configuration().sources_dumps_dir;
+    bool dump_sources = batch.dump_custom_program;
+    std::string dump_sources_dir = "";
    GPU_DEBUG_GET_INSTANCE(debug_config);
    GPU_DEBUG_IF(!debug_config->dump_sources.empty()) {
        dump_sources = true;
@ -371,25 +379,21 @@ void kernels_cache::build_all() {
    if (!_pending_compilation)
        return;

-    std::unique_ptr<ocl::ocl_engine> _build_engine = nullptr;
-    if (_engine.type() == engine_types::ocl) {
-        _build_engine = std::unique_ptr<ocl::ocl_engine>(new ocl::ocl_engine(_engine.get_device(), runtime_types::ocl,
-                                                                    _engine.configuration(), _engine.get_task_executor()));
-    }
+    ocl::ocl_engine& _build_engine = downcast<ocl::ocl_engine>(_engine);
    std::vector<batch_program> batches;
    {
        std::lock_guard<std::mutex> lock(_mutex);
        get_program_source(_kernels_code, &batches);
    }

-    auto _task_executor = _engine.get_task_executor();
+    if (_task_executor) {
        std::exception_ptr exception;
        std::vector<InferenceEngine::Task> tasks;
        for (size_t idx = 0; idx < batches.size(); idx++) {
            auto& batch = batches[idx];
            tasks.push_back([this, &_build_engine, &batch, &exception] {
                try {
-                build_batch(*_build_engine, batch);
+                    build_batch(_build_engine, batch);
                } catch(...) {
                    exception = std::current_exception();
                }
@ -401,6 +405,11 @@ void kernels_cache::build_all() {
        if (exception) {
            std::rethrow_exception(exception);
        }
+    } else {
+        for (size_t idx = 0; idx < batches.size(); idx++) {
+            build_batch(_build_engine, batches[idx]);
+        }
+    }

    {
        std::lock_guard<std::mutex> lock(_mutex);
@ -458,8 +467,7 @@ void kernels_cache::compile() {

    std::unique_ptr<ocl::ocl_engine> _build_engine = nullptr;
    if (_engine.type() == engine_types::ocl) {
-        _build_engine = std::unique_ptr<ocl::ocl_engine>(new ocl::ocl_engine(_engine.get_device(), runtime_types::ocl,
-                                                                    _engine.configuration(), _engine.get_task_executor()));
+        _build_engine = std::unique_ptr<ocl::ocl_engine>(new ocl::ocl_engine(_engine.get_device(), runtime_types::ocl));
    }

    // create batches
@ -497,8 +505,7 @@ void kernels_cache::save(BinaryOutputBuffer& ob) const {
    }
    ob << entry_point_to_id;

-    std::unique_ptr<ocl::ocl_engine> build_engine =
-        cldnn::make_unique<ocl::ocl_engine>(_engine.get_device(), runtime_types::ocl, _engine.configuration(), _engine.get_task_executor());
+    std::unique_ptr<ocl::ocl_engine> build_engine = cldnn::make_unique<ocl::ocl_engine>(_engine.get_device(), runtime_types::ocl);

    std::vector<std::vector<unsigned char>> precompiled_kernels;

@ -540,7 +547,7 @@ void kernels_cache::load(BinaryInputBuffer& ib) {
    OPENVINO_ASSERT(_engine.type() == engine_types::ocl, "[GPU] Not supported engine type");

    std::unique_ptr<ocl::ocl_engine> build_engine =
-        cldnn::make_unique<ocl::ocl_engine>(_engine.get_device(), runtime_types::ocl, _engine.configuration(), _engine.get_task_executor());
+        cldnn::make_unique<ocl::ocl_engine>(_engine.get_device(), runtime_types::ocl);

    std::map<std::string, std::string> entry_point_to_id;
    std::vector<std::vector<unsigned char>> precompiled_kernels;
--- a/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp
+++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.hpp
@ -7,6 +7,7 @@
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/kernel.hpp"
+#include "intel_gpu/runtime/execution_config.hpp"

 #include <map>
 #include <mutex>
@ -76,6 +77,8 @@ public:
 private:
    static std::mutex _mutex;
    engine& _engine;
+    InferenceEngine::CPUStreamsExecutor::Ptr _task_executor;
+    ExecutionConfig _config;
    uint32_t _prog_id = 0;
    kernels_code _kernels_code;
    size_t _kernel_idx = 0;
@ -91,7 +94,11 @@ private:
    size_t get_max_kernels_per_batch() const;

 public:
-    explicit kernels_cache(engine& engine, uint32_t prog_id, const std::vector<std::string>& batch_header_str = {});
+    explicit kernels_cache(engine& engine,
+                           const ExecutionConfig& config,
+                           uint32_t prog_id,
+                           InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr,
+                           const std::vector<std::string>& batch_header_str = {});
    kernel_id set_kernel_source(const std::shared_ptr<kernel_string>& kernel_string,
                                bool dump_custom_program);
    kernel::ptr get_kernel(kernel_id id) const;
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp
@ -14,20 +14,20 @@ command_queues_builder::command_queues_builder()
    : _profiling(false),
      _out_of_order(false),
      _supports_queue_families(false),
-      _priority_mode(priority_mode_types::disabled),
-      _throttle_mode(throttle_mode_types::disabled) {}
+      _priority_mode(),
+      _throttle_mode() {}

 #if CL_TARGET_OPENCL_VERSION >= 200
 std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
    std::vector<cl_queue_properties> properties;

-    if (_priority_mode != priority_mode_types::disabled) {
+    if (_priority_mode.has_value()) {
        unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
-        switch (_priority_mode) {
-            case priority_mode_types::high:
+        switch (_priority_mode.value()) {
+            case ov::hint::Priority::HIGH:
                cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
                break;
-            case priority_mode_types::low:
+            case ov::hint::Priority::LOW:
                cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
                break;
            default:
@ -37,13 +37,13 @@ std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl
        properties.insert(properties.end(), {CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value});
    }

-    if (_throttle_mode != throttle_mode_types::disabled) {
+    if (_throttle_mode.has_value()) {
        unsigned cl_queue_throttle_value = CL_QUEUE_THROTTLE_MED_KHR;
-        switch (_throttle_mode) {
-            case throttle_mode_types::high:
+        switch (_throttle_mode.value()) {
+            case ov::intel_gpu::hint::ThrottleLevel::HIGH:
                cl_queue_throttle_value = CL_QUEUE_THROTTLE_HIGH_KHR;
                break;
-            case throttle_mode_types::low:
+            case ov::intel_gpu::hint::ThrottleLevel::LOW:
                cl_queue_throttle_value = CL_QUEUE_THROTTLE_LOW_KHR;
                break;
            default:
@ -107,27 +107,19 @@ ocl_queue_type command_queues_builder::build(const cl::Context& context, const c
 #else
    queue = clCreateCommandQueue(context.get(), device.get(), properties, &error_code);
 #endif
-    if (error_code != CL_SUCCESS) {
-        CLDNN_ERROR_MESSAGE("Command queues builders",
-                            "clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code));
-    }
-
+    OPENVINO_ASSERT(error_code == CL_SUCCESS, "[GPU] Command queues builder returned ", error_code, " error code");
    return queue;
 }

-void command_queues_builder::set_priority_mode(priority_mode_types priority, bool extension_support) {
+void command_queues_builder::set_priority_mode(ov::hint::Priority priority, bool extension_support) {
    if (extension_support) {
        _priority_mode = priority;
-    } else {
-        _priority_mode = priority_mode_types::disabled;
    }
 }

-void command_queues_builder::set_throttle_mode(throttle_mode_types throttle, bool extension_support) {
+void command_queues_builder::set_throttle_mode(ov::intel_gpu::hint::ThrottleLevel throttle, bool extension_support) {
    if (extension_support) {
        _throttle_mode = throttle;
-    } else {
-        _throttle_mode = throttle_mode_types::disabled;
    }
 }

--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp
@ -6,6 +6,7 @@

 #include "ocl_common.hpp"
 #include "intel_gpu/runtime/engine.hpp"
+#include "intel_gpu/runtime/optionals.hpp"

 namespace cldnn {
 namespace ocl {
@ -14,8 +15,8 @@ class command_queues_builder {
 public:
    command_queues_builder();
    ocl_queue_type build(const cl::Context& context, const cl::Device& device);
-    void set_throttle_mode(throttle_mode_types throttle, bool extension_support);
-    void set_priority_mode(priority_mode_types priority, bool extension_support);
+    void set_throttle_mode(ov::intel_gpu::hint::ThrottleLevel throttle, bool extension_support);
+    void set_priority_mode(ov::hint::Priority priority, bool extension_support);
    void set_profiling(bool flag) { _profiling = flag; }
    void set_out_of_order(bool flag) { _out_of_order = flag; }
    void set_supports_queue_families(bool extension_support);
@ -24,8 +25,8 @@ private:
    bool _profiling;
    bool _out_of_order;
    bool _supports_queue_families;
-    priority_mode_types _priority_mode;
-    throttle_mode_types _throttle_mode;
+    optional_value<ov::hint::Priority> _priority_mode;
+    optional_value<ov::intel_gpu::hint::ThrottleLevel> _throttle_mode;
 #if CL_TARGET_OPENCL_VERSION >= 200
    std::vector<cl_queue_properties> get_properties(const cl::Device& device, uint16_t stream_id = 0);
 #else
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
@ -288,7 +288,7 @@ bool ocl_device::is_same(const device::ptr other) {
    if (!casted)
        return false;

-    return _context == casted->get_context() && _device == casted->get_device() && _platform == casted->get_platform();
+    return _device == casted->get_device() && _platform == casted->get_platform();
 }

 }  // namespace ocl
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
@ -41,9 +41,8 @@ namespace ocl {
 ocl_error::ocl_error(cl::Error const& err)
    : ov::Exception("[GPU] " + std::string(err.what()) + std::string(", error code: ") + std::to_string(err.err())) {}

-ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type,
-            const engine_configuration& conf, const InferenceEngine::ITaskExecutor::Ptr task_executor)
-    : engine(dev, conf, task_executor) {
+ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type)
+    : engine(dev) {
    OPENVINO_ASSERT(runtime_type == runtime_types::ocl, "[GPU] Invalid runtime type specified for OCL engine. Only OCL runtime is supported");

    auto casted = dynamic_cast<ocl_device*>(dev.get());
@ -52,12 +51,11 @@ ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type,
    casted->get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions);

    _usm_helper.reset(new cl::UsmHelper(get_cl_context(), get_cl_device(), use_unified_shared_memory()));
-
-    _program_stream.reset(new ocl_stream(*this));
+    _service_stream.reset(new ocl_stream(*this, ExecutionConfig()));
 }

 #ifdef ENABLE_ONEDNN_FOR_GPU
-dnnl::engine& ocl_engine::get_onednn_engine() const {
+void ocl_engine::create_onednn_engine(const ExecutionConfig& config) {
    const std::lock_guard<std::mutex> lock(onednn_mutex);
    OPENVINO_ASSERT(_device->get_info().vendor_id == INTEL_VENDOR_ID, "[GPU] OneDNN engine can be used for Intel GPUs only");
    if (!_onednn_engine) {
@ -65,12 +63,12 @@ dnnl::engine& ocl_engine::get_onednn_engine() const {
        if (!casted)
            throw ov::Exception("[GPU] Invalid device type stored in ocl_engine");

-        auto config = this->configuration();
-        if (config.kernels_cache_path.empty()) {
+        std::string cache_dir = config.get_property(ov::cache_dir);
+        if (cache_dir.empty()) {
            _onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
        } else {
            // Use cached blob
-            auto path = config.kernels_cache_path;
+            auto path = cache_dir;
            if (path.back() != '/' && path.back() != '\\') {
                path += "/";
            }
@ -79,7 +77,7 @@ dnnl::engine& ocl_engine::get_onednn_engine() const {
            if (blob_id.empty()) {
                // Create engine without cache_blob
                _onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
-                return *_onednn_engine;
+                return;
            }

            std::string id_str(blob_id.begin(), blob_id.end());
@ -98,7 +96,10 @@ dnnl::engine& ocl_engine::get_onednn_engine() const {
            }
        }
    }
+}

+dnnl::engine& ocl_engine::get_onednn_engine() const {
+    OPENVINO_ASSERT(_onednn_engine, "[GPU] Can't get onednn engine handle as it was not initialized. Please check that create_onednn_engine() was called");
    return *_onednn_engine;
 }
 #endif
@ -154,7 +155,7 @@ memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type ty
        }

        if (reset || res->is_memory_reset_needed(layout)) {
-            res->fill(get_program_stream());
+            res->fill(get_service_stream());
        }

        return res;
@ -266,26 +267,24 @@ bool ocl_engine::extension_supported(std::string extension) const {
    return _extensions.find(extension) != std::string::npos;
 }

-stream::ptr ocl_engine::create_stream() const {
-    return std::make_shared<ocl_stream>(*this);
+stream::ptr ocl_engine::create_stream(const ExecutionConfig& config) const {
+    return std::make_shared<ocl_stream>(*this, config);
 }

-stream::ptr ocl_engine::create_stream(void* handle) const {
-    return std::make_shared<ocl_stream>(*this, handle);
+stream::ptr ocl_engine::create_stream(const ExecutionConfig& config, void* handle) const {
+    return std::make_shared<ocl_stream>(*this, config, handle);
 }

-stream& ocl_engine::get_program_stream() const {
-    return *_program_stream;
+stream& ocl_engine::get_service_stream() const {
+    return *_service_stream;
 }

-std::shared_ptr<cldnn::engine> ocl_engine::create(const device::ptr device, runtime_types runtime_type,
-                                                  const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) {
-    return std::make_shared<ocl::ocl_engine>(device, runtime_type, configuration, task_executor);
+std::shared_ptr<cldnn::engine> ocl_engine::create(const device::ptr device, runtime_types runtime_type) {
+    return std::make_shared<ocl::ocl_engine>(device, runtime_type);
 }

-std::shared_ptr<cldnn::engine> create_ocl_engine(const device::ptr device, runtime_types runtime_type,
-                                                 const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) {
-    return ocl_engine::create(device, runtime_type, configuration, task_executor);
+std::shared_ptr<cldnn::engine> create_ocl_engine(const device::ptr device, runtime_types runtime_type) {
+    return ocl_engine::create(device, runtime_type);
 }

 }  // namespace ocl
--- a/Show More
+++ b/Show More