[GPU] Model caching unit tests (#15413)

* gpu model caching unit tests * added serialization unit tests * added save and load for quantize primitive_inst * reduced the range of inputs for Gemm tests * updated the copyright year
2023-02-22 14:53:43 +09:00 · 2023-02-22 14:53:43 +09:00 · a6ff809ad7
commit a6ff809ad7
parent d464f38788
121 changed files with 8511 additions and 6665 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp
@ -43,6 +43,9 @@ public:
    void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
    void* getKernlImplParams() const { return _impl_params; }

+    std::streampos tellg() { return stream.tellg(); }
+    void seekg(std::streampos pos) { stream.seekg(pos); }
+
 private:
    std::istream& stream;
    void* _impl_params;
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/input_info_serializer.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/input_info_serializer.hpp
@ -0,0 +1,31 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <type_traits>
+#include "buffer.hpp"
+
+namespace cldnn {
+struct input_info;
+
+template <typename BufferType>
+class Serializer<BufferType, input_info, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
+public:
+    static void save(BufferType& buffer, const input_info& input) {
+        buffer << input.pid;
+        buffer << input.idx;
+    }
+};
+
+template <typename BufferType>
+class Serializer<BufferType, input_info, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
+public:
+    static void load(BufferType& buffer, input_info& input) {
+        buffer >> input.pid;
+        buffer >> input.idx;
+    }
+};
+
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/activation.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/activation.hpp
@ -5,6 +5,7 @@
 #pragma once
 #include "primitive.hpp"
 #include <vector>
+#include "intel_gpu/graph/serialization/string_serializer.hpp"

 namespace cldnn {

@ -74,6 +75,10 @@ struct activation_additional_params {
 struct activation : public primitive_base<activation> {
    CLDNN_DECLARE_PRIMITIVE(activation)

+    activation() : primitive_base("", {}) {}
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION
+
    /// @brief Constructs Relu primitive.
    /// @param id This primitive id.
    /// @param input Input primitive id.
@ -137,6 +142,18 @@ struct activation : public primitive_base<activation> {
               additional_params_input.empty() == rhs_casted.additional_params_input.empty();
    }

+    void save(BinaryOutputBuffer& ob) const override {
+        ob << make_data(&activation_function, sizeof(activation_func));
+        ob << make_data(&additional_params, sizeof(activation_additional_params));
+        ob << additional_params_input;
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        ib >> make_data(&activation_function, sizeof(activation_func));
+        ib >> make_data(&additional_params, sizeof(activation_additional_params));
+        ib >> additional_params_input;
+    }
+
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        if (additional_params_input.empty())
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp
@ -5,6 +5,9 @@
 #pragma once
 #include "primitive.hpp"
 #include "openvino/op/util/attr_types.hpp"
+#include "intel_gpu/graph/serialization/input_info_serializer.hpp"
+#include "intel_gpu/graph/serialization/string_serializer.hpp"
+#include "intel_gpu/graph/serialization/vector_serializer.hpp"

 #include <algorithm>
 #include <vector>
@ -19,6 +22,10 @@ namespace cldnn {
 struct arg_max_min : public primitive_base<arg_max_min> {
    CLDNN_DECLARE_PRIMITIVE(arg_max_min)

+    arg_max_min() : primitive_base("", {}) {}
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION
+
    /// @brief Constructs arg_max_min primitive.
    /// @param id This primitive id.
    /// @param input Input primitive id.
@ -95,5 +102,29 @@ struct arg_max_min : public primitive_base<arg_max_min> {
               sort == rhs_casted.sort &&
               values_first == rhs_casted.values_first;
    }
+
+    uint32_t get_output_nums() const { return (input_size() == 3 ? 2 : output_size()); }
+    bool has_second_output() const { return get_output_nums() == 2; }
+    bool use_multiple_outputs() const { return input_size() != 3; }
+
+    void save(BinaryOutputBuffer& ob) const override {
+        ob << input;
+        ob << num_outputs;
+        ob << make_data(&mode, sizeof(ov::op::TopKMode));
+        ob << top_k;
+        ob << axis;
+        ob << make_data(&sort, sizeof(ov::op::TopKSortType));
+        ob << values_first;
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        ib >> input;
+        ib >> num_outputs;
+        ib >> make_data(&mode, sizeof(ov::op::TopKMode));
+        ib >> top_k;
+        ib >> axis;
+        ib >> make_data(&sort, sizeof(ov::op::TopKSortType));
+        ib >> values_first;
+    }
 };
 }  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp
@ -7,6 +7,7 @@
 #include "activation.hpp"
 #include <vector>
 #include <algorithm>
+#include "intel_gpu/graph/serialization/string_serializer.hpp"

 namespace cldnn {

@ -189,6 +190,11 @@ protected:

 struct lstm_gemm : public primitive_base<lstm_gemm> {
    CLDNN_DECLARE_PRIMITIVE(lstm_gemm)
+
+    lstm_gemm() : primitive_base("", {}) {}
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION
+
    /// @brief Constructs lstm layer.
    /// @param id This primitive id.
    /// @param input input primitive id.
@ -242,6 +248,22 @@ struct lstm_gemm : public primitive_base<lstm_gemm> {
               hidden.empty() == rhs_casted.hidden.empty();
    }

+    void save(BinaryOutputBuffer& ob) const override {
+        ob << weights;
+        ob << recurrent;
+        ob << bias;
+        ob << hidden;
+        ob << direction;
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        ib >> weights;
+        ib >> recurrent;
+        ib >> bias;
+        ib >> hidden;
+        ib >> direction;
+    }
+
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
@ -257,6 +279,11 @@ protected:

 struct lstm_elt : public primitive_base<lstm_elt> {
    CLDNN_DECLARE_PRIMITIVE(lstm_elt)
+
+    lstm_elt() : primitive_base("", {}) {}
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION
+
    using vec_activation = std::vector<activation_func>;
    using vec_activation_param = std::vector<activation_additional_params>;

@ -342,6 +369,22 @@ struct lstm_elt : public primitive_base<lstm_elt> {
        #undef cmp_fields
    }

+    void save(BinaryOutputBuffer& ob) const override {
+        ob << cell;
+        ob << clip;
+        ob << input_forget;
+        ob << make_data(&offset_order, sizeof(lstm_weights_order));
+        ob << direction;
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        ib >> cell;
+        ib >> clip;
+        ib >> input_forget;
+        ib >> make_data(&offset_order, sizeof(lstm_weights_order));
+        ib >> direction;
+    }
+
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp
@ -6,6 +6,8 @@
 #include "primitive.hpp"
 #include "intel_gpu/runtime/memory.hpp"
 #include <vector>
+#include "intel_gpu/graph/serialization/string_serializer.hpp"
+#include "intel_gpu/graph/serialization/vector_serializer.hpp"

 namespace cldnn {

@ -24,6 +26,10 @@ enum class reorder_mean_mode {
 struct reorder : public primitive_base<reorder> {
    CLDNN_DECLARE_PRIMITIVE(reorder)

+    reorder() : primitive_base("", {}), output_format(format::any) {}
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION
+
    /// @brief reorder memory types
    enum class memory_type {
        buffer,
@ -178,6 +184,24 @@ struct reorder : public primitive_base<reorder> {
               mean.empty() == rhs_casted.mean.empty();
    }

+    void save(BinaryOutputBuffer& ob) const override {
+        ob << make_data(&output_format, sizeof(format));
+        ob << mean;
+        ob << subtract_per_feature;
+        ob << make_data(&mean_mode, sizeof(reorder_mean_mode));
+        ob << make_data(&input_mem_type, sizeof(memory_type));
+        ob << truncate;
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        ib >> make_data(&output_format, sizeof(format));
+        ib >> mean;
+        ib >> subtract_per_feature;
+        ib >> make_data(&mean_mode, sizeof(reorder_mean_mode));
+        ib >> make_data(&input_mem_type, sizeof(memory_type));
+        ib >> truncate;
+    }
+
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        if (mean.empty())
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/roi_pooling.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/roi_pooling.hpp
@ -13,6 +13,10 @@ namespace cldnn {
 struct roi_pooling : public primitive_base<roi_pooling> {
    CLDNN_DECLARE_PRIMITIVE(roi_pooling)

+    roi_pooling() : primitive_base("", {}) {}
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION
+
    roi_pooling(const primitive_id& id,
                const input_info& input_data,
                const input_info& input_rois,
@ -118,6 +122,36 @@ struct roi_pooling : public primitive_base<roi_pooling> {
               cmp_fields(spatial_bins_y);
        #undef cmp_fields
    }
+
+    void save(BinaryOutputBuffer& ob) const override {
+        ob << make_data(&mode, sizeof(pooling_mode));
+        ob << position_sensitive;
+        ob << pooled_width;
+        ob << pooled_height;
+        ob << spatial_scale;
+        ob << trans_std;
+        ob << no_trans;
+        ob << output_dim;
+        ob << part_size;
+        ob << group_size;
+        ob << spatial_bins_x;
+        ob << spatial_bins_y;
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        ib >> make_data(&mode, sizeof(pooling_mode));
+        ib >> position_sensitive;
+        ib >> pooled_width;
+        ib >> pooled_height;
+        ib >> spatial_scale;
+        ib >> trans_std;
+        ib >> no_trans;
+        ib >> output_dim;
+        ib >> part_size;
+        ib >> group_size;
+        ib >> spatial_bins_x;
+        ib >> spatial_bins_y;
+    }
 };

 }  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp
@ -114,58 +114,6 @@ struct kernel_arguments_data {
    const scalars_desc* scalars = nullptr;
 };

-struct kernel_arguments_data_idx {
-    std::vector<int32_t> inputs;
-    int32_t weights;
-    int32_t recurrent;
-    int32_t hidden;
-    int32_t cell;
-    int32_t bias;
-    int32_t weights_zero_points;
-    int32_t activations_zero_points;
-    int32_t compensation;
-    int32_t lookup_table;
-    int32_t scale_table;
-    int32_t slope;
-
-    std::vector<int32_t> fused_op_inputs;
-    scalars_desc scalars;
-
-    template <typename BufferType>
-    void save(BufferType& ob) const {
-        ob << inputs;
-        ob << weights;
-        ob << recurrent;
-        ob << hidden;
-        ob << cell;
-        ob << bias;
-        ob << weights_zero_points;
-        ob << activations_zero_points;
-        ob << compensation;
-        ob << lookup_table;
-        ob << scale_table;
-        ob << slope;
-        ob << fused_op_inputs;
-    }
-
-    template <typename BufferType>
-    void load(BufferType& ib) {
-        ib >> inputs;
-        ib >> weights;
-        ib >> recurrent;
-        ib >> hidden;
-        ib >> cell;
-        ib >> bias;
-        ib >> weights_zero_points;
-        ib >> activations_zero_points;
-        ib >> compensation;
-        ib >> lookup_table;
-        ib >> scale_table;
-        ib >> slope;
-        ib >> fused_op_inputs;
-    }
-};
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // KernelString
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/common/wait_for_events.cpp
@ -31,7 +31,6 @@ public:

    void init_kernels(const kernels_cache&) override {}
    void set_arguments(primitive_inst& /*instance*/) override {}
-    void set_arguments(kernel_arguments_data_idx& /*instance*/) override {}
    kernel_arguments_data get_arguments(const primitive_inst& /*instance*/) const override {
        kernel_arguments_data args;
        return args;
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp
@ -163,3 +163,4 @@ attach_activation_impl::attach_activation_impl() {
 }  // namespace cldnn

 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::activation_impl)
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::activation)
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp
@ -51,7 +51,7 @@ protected:
    kernel_arguments_data get_arguments(const typed_primitive_inst<arg_max_min>& instance) const override {
        kernel_arguments_data args = parent::get_arguments(instance);

-        if (instance.node->has_second_output()) {
+        if (instance.get_typed_desc<arg_max_min>()->has_second_output()) {
            if (args.inputs.size() > 1) {
                args.inputs.erase(args.inputs.begin() + 1);  // erase constant input in case of TOP_K
            }
@ -138,3 +138,4 @@ attach_arg_max_min_impl::attach_arg_max_min_impl() {
 }  // namespace cldnn

 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::arg_max_min_impl)
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::arg_max_min)
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp
@ -102,3 +102,4 @@ attach_lstm_elt_impl::attach_lstm_elt_impl() {
 }  // namespace cldnn

 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_elt_impl)
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::lstm_elt)
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_gemm.cpp
@ -103,3 +103,4 @@ attach_lstm_gemm_impl::attach_lstm_gemm_impl() {
 }  // namespace cldnn

 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_gemm_impl)
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::lstm_gemm)
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp
@ -31,19 +31,19 @@ protected:
            args.inputs.push_back(instance.input_memory_ptr(i));
        }

-        if (instance.has_num_select_per_class() && !instance.node->num_select_per_class_node().is_constant()) {
+        if (instance.has_num_select_per_class() && !instance.num_select_per_class_inst()->is_constant()) {
            args.inputs.push_back(instance.num_select_per_class_mem());
        }

-        if (instance.has_iou_threshold() && !instance.node->iou_threshold_node().is_constant()) {
+        if (instance.has_iou_threshold() && !instance.iou_threshold_inst()->is_constant()) {
            args.inputs.push_back(instance.iou_threshold_mem());
        }

-        if (instance.has_score_threshold() && !instance.node->score_threshold_node().is_constant()) {
+        if (instance.has_score_threshold() && !instance.score_threshold_inst()->is_constant()) {
            args.inputs.push_back(instance.score_threshold_mem());
        }

-        if (instance.has_soft_nms_sigma() && !instance.node->soft_nms_sigma_node().is_constant()) {
+        if (instance.has_soft_nms_sigma() && !instance.soft_nms_sigma_inst()->is_constant()) {
            args.inputs.push_back(instance.soft_nms_sigma_mem());
        }

--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@ -33,7 +33,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
    kernel_selector::kernel_data _kernel_data;
    std::vector<kernel_id> _kernel_ids;
    std::vector<kernel::ptr> _kernels;
-    kernel_arguments_data_idx _kernel_args;

    typed_primitive_impl_ocl() :  _kernel_data({}), _kernel_ids({}), _kernels({}) {
        _kernel_data.weightsReorderParams.engine = kernel_selector::generic_kernel_params::Engine::NONE;
@ -75,7 +74,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
        ob << _kernel_data.internalBufferSizes;
        ob << _kernel_data.kernels;
        ob << _kernel_ids;
-        ob << _kernel_args;
    }

    void load(BinaryInputBuffer& ib) override {
@ -83,7 +81,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
        ib >> _kernel_data.internalBufferSizes;
        ib >> _kernel_data.kernels;
        ib >> _kernel_ids;
-        ib >> _kernel_args;
    }

    template<typename ImplType>
@ -126,38 +123,6 @@ protected:
        return args;
    }

-    kernel_arguments_data get_arguments_by_idx(const typed_primitive_inst<PType>& instance) const {
-        kernel_arguments_data args;
-
-        for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) {
-            args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i]));
-        }
-
-        args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights;
-        args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent;
-        args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden;
-        args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell;
-        args.bias = (_kernel_args.bias >= 0) ? instance.dep_memory_ptr(_kernel_args.bias) : args.bias;
-        args.weights_zero_points = (_kernel_args.weights_zero_points >= 0) ?
-                                    instance.dep_memory_ptr(_kernel_args.weights_zero_points) : args.weights_zero_points;
-        args.activations_zero_points = (_kernel_args.activations_zero_points >= 0) ?
-                                        instance.dep_memory_ptr(_kernel_args.activations_zero_points) : args.activations_zero_points;
-        args.compensation = (_kernel_args.compensation >= 0) ? instance.dep_memory_ptr(_kernel_args.compensation) : args.compensation;
-        args.lookup_table = (_kernel_args.lookup_table >= 0) ? instance.dep_memory_ptr(_kernel_args.lookup_table) : args.lookup_table;
-        args.scale_table = (_kernel_args.scale_table >= 0) ? instance.dep_memory_ptr(_kernel_args.scale_table) : args.scale_table;
-        args.slope = (_kernel_args.slope >= 0) ? instance.dep_memory_ptr(_kernel_args.slope) : args.slope;
-
-        for (size_t i = 0; i < _kernel_args.fused_op_inputs.size(); i++) {
-            args.fused_op_inputs.push_back(instance.dep_memory_ptr(_kernel_args.fused_op_inputs[i]));
-        }
-
-        for (size_t i = 0; i < instance.outputs_memory_count(); i++) {
-            args.outputs.push_back(instance.output_memory_ptr(i));
-        }
-
-        return args;
-    }
-
    event::ptr aggregate_events(const std::vector<event::ptr>& events, stream& stream, bool group = false, bool is_output = false) const {
        if (events.size() == 1 && !is_output)
            return events[0];
@ -211,31 +176,21 @@ protected:
        stream& stream = instance.get_network().get_stream();
        size_t k_idx = 0;
        for (size_t kd_idx = 0; kd_idx < _kernel_data.kernels.size(); ++kd_idx) {
-            kernel_arguments_data args;
            if (_kernel_data.kernels[kd_idx].skip_execution) {
                continue;
            }

-            if (_kernel_args.inputs.size() > 0) {
-                args = get_arguments_by_idx(instance);
-            } else {
-                args = get_arguments(instance);
-            }
+            auto args = get_arguments(instance);
+            args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;

            for (const auto& m : instance.get_intermediates_memories()) {
                args.intermediates.push_back(m);
            }

-            args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
-
            stream.set_arguments(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args);
        }
    }

-    void set_arguments_impl(kernel_arguments_data_idx& args_idx) override {
-        this->_kernel_args = args_idx;
-    }
-
    kernel_arguments_data get_arguments_impl(const typed_primitive_inst<PType>& instance) const override {
        for (size_t k = 0; k < _kernels.size(); ++k) {
            auto args = get_arguments(instance);
@ -274,20 +229,13 @@ protected:
                is_output_event = instance.is_output_event();
            }

-            kernel_arguments_data args;
-
-            if (_kernel_args.inputs.size() > 0) {
-                args = get_arguments_by_idx(instance);
-            } else {
-                args = get_arguments(instance);
-
-                for (const auto& m : instance.get_intermediates_memories()) {
-                    args.intermediates.push_back(m);
-                }
-            }
-
+            auto args = get_arguments(instance);
            args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;

+            for (const auto& m : instance.get_intermediates_memories()) {
+                args.intermediates.push_back(m);
+            }
+
            auto ev = stream.enqueue_kernel(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args, tmp_events, is_output_event);
            new_events.push_back(ev);
            all_events.push_back(ev);
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp
@ -34,8 +34,8 @@ protected:
        for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
            args.inputs.push_back(instance.input_memory_ptr(i));
        }
-        if (instance.node->get_scale_shift_opt()) {
-            if (instance.node->get_dependencies().size() == 9) {
+        if (instance.scale_shift_opt) {
+            if (instance.dependencies().size() == 9) {
                args.inputs.push_back(instance.dep_memory_ptr(5));
                args.inputs.push_back(instance.dep_memory_ptr(6));
                args.inputs.push_back(instance.dep_memory_ptr(7));
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp
@ -140,3 +140,4 @@ attach_reorder_impl::attach_reorder_impl() {
 }  // namespace cldnn

 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reorder_impl)
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::reorder)
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_pooling.cpp
@ -49,7 +49,7 @@ protected:
    kernel_arguments_data get_arguments(const typed_primitive_inst<roi_pooling>& instance) const override {
        kernel_arguments_data args;

-        if (instance.argument->mode == pooling_mode::deformable_bilinear && !instance.argument->no_trans)
+        if (instance.get_typed_desc<roi_pooling>()->mode == pooling_mode::deformable_bilinear && !instance.get_typed_desc<roi_pooling>()->no_trans)
            args.inputs = {
                instance.input_memory_ptr(),
                instance.rois_memory(),
@ -109,3 +109,4 @@ attach_roi_pooling_impl::attach_roi_pooling_impl() {
 }  // namespace cldnn

 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::roi_pooling_impl)
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::roi_pooling)
--- a/src/plugins/intel_gpu/src/graph/include/activation_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/activation_inst.h
@ -58,7 +58,7 @@ public:

    memory::ptr slope_memory() const { return dep_memory_ptr(1); }

-    bool is_parameterized() const { return !argument->additional_params_input.empty(); }
+    bool is_parameterized() const { return !get_typed_desc<activation>()->additional_params_input.empty(); }
 };

 using activation_inst = typed_primitive_inst<activation>;
--- a/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h
@ -19,12 +19,6 @@ public:
    typed_program_node(std::shared_ptr<primitive> prim, program& prog) : parent(prim, prog) {}
    program_node& input() const { return get_dependency(0); }

-    uint32_t get_output_nums() const {
-        return (get_primitive()->input_size() == 3 ? 2 : get_primitive()->output_size());
-    }
-    bool has_second_output() const { return get_output_nums() == 2; }
-    bool use_multiple_outputs() const { return get_primitive()->input_size() != 3; }
-
    std::vector<size_t> get_shape_infer_dependencies() const override { return {1}; }
 };

--- a/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h
@ -45,16 +45,16 @@ public:
    typed_primitive_inst(network& network, lstm_elt_node const& node);

    memory::ptr cell_memory() const { return dep_memory_ptr(1); }
-    bool cell_term() const { return !argument->cell.empty(); }
-    lstm_weights_order offset_order() const { return argument->offset_order; }
+    bool cell_term() const { return !get_typed_desc<lstm_elt>()->cell.empty(); }
+    lstm_weights_order offset_order() const { return get_typed_desc<lstm_elt>()->offset_order; }
    float clip() const {
-        float clip_val = argument->clip;
+        float clip_val = get_typed_desc<lstm_elt>()->clip;
        if (clip_val < 0)
            throw std::range_error("Clip value < 0");
        return clip_val;
    }
-    bool input_forget() const { return argument->input_forget; }
-    uint32_t direction() const { return argument->direction; }
+    bool input_forget() const { return get_typed_desc<lstm_elt>()->input_forget; }
+    uint32_t direction() const { return get_typed_desc<lstm_elt>()->direction; }
 };

 using lstm_elt_inst = typed_primitive_inst<lstm_elt>;
--- a/src/plugins/intel_gpu/src/graph/include/lstm_gemm_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/lstm_gemm_inst.h
@ -44,9 +44,9 @@ public:
    memory::ptr recurrent_memory() const { return dep_memory_ptr(2); }
    memory::ptr bias_memory() const { return dep_memory_ptr(3); }
    memory::ptr hidden_memory() const { return bias_term() ? dep_memory_ptr(4) : dep_memory_ptr(3); }
-    bool bias_term() const { return !argument->bias.empty(); }
-    bool hidden_term() const { return !argument->hidden.empty(); }
-    uint32_t direction() const { return argument->direction; }
+    bool bias_term() const { return !get_typed_desc<lstm_gemm>()->bias.empty(); }
+    bool hidden_term() const { return !get_typed_desc<lstm_gemm>()->hidden.empty(); }
+    uint32_t direction() const { return get_typed_desc<lstm_gemm>()->direction; }
 };

 using lstm_gemm_inst = typed_primitive_inst<lstm_gemm>;
--- a/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h
@ -64,10 +64,10 @@ public:
    typed_primitive_inst(network& network, const multiclass_nms_node& node) : parent(network, node) {}

    memory::ptr output_indices_memory() const {
-        return dep_memory_ptr(node->get_dependencies().size() - 2);
+        return dep_memory_ptr(dependencies().size() - 2);
    }
    memory::ptr output_num_memory() const {
-        return dep_memory_ptr(node->get_dependencies().size() - 1);
+        return dep_memory_ptr(dependencies().size() - 1);
    }
 };

--- a/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h
@ -91,6 +91,27 @@ class typed_primitive_inst<non_max_suppression> : public typed_primitive_inst_ba
    using parent = typed_primitive_inst_base<non_max_suppression>;
    using parent::parent;

+    size_t get_iou_threshold_offset() const {
+        size_t offset = 2;
+        offset += has_num_select_per_class();
+        return offset;
+    }
+
+    size_t get_score_threshold_offset() const {
+        size_t offset = 2;
+        offset += has_num_select_per_class();
+        offset += has_iou_threshold();
+        return offset;
+    }
+
+    size_t get_soft_nms_sigma_offset() const {
+        size_t offset = 2;
+        offset += has_num_select_per_class();
+        offset += has_iou_threshold();
+        offset += has_score_threshold();
+        return offset;
+    }
+
 public:
    typed_primitive_inst(network& network, non_max_suppression_node const& node)
        : parent(network, node)
@ -113,29 +134,32 @@ public:
    memory::ptr num_select_per_class_mem() const {
        return dep_memory_ptr(2);
    }
+    std::shared_ptr<const primitive_inst> num_select_per_class_inst() const {
+        return dependencies().at(2).first;
+    }

    bool has_iou_threshold() const { return !get_typed_desc<non_max_suppression>()->iou_threshold.empty(); }
    memory::ptr iou_threshold_mem() const {
-        size_t offset = 2;
-        offset += has_num_select_per_class();
-        return dep_memory_ptr(offset);
+        return dep_memory_ptr(get_iou_threshold_offset());
+    }
+    std::shared_ptr<const primitive_inst> iou_threshold_inst() const {
+        return dependencies().at(get_iou_threshold_offset()).first;
    }

    bool has_score_threshold() const { return !get_typed_desc<non_max_suppression>()->score_threshold.empty(); }
    memory::ptr score_threshold_mem() const {
-        size_t offset = 2;
-        offset += has_num_select_per_class();
-        offset += has_iou_threshold();
-        return dep_memory_ptr(offset);
+        return dep_memory_ptr(get_score_threshold_offset());
+    }
+    std::shared_ptr<const primitive_inst> score_threshold_inst() const {
+        return dependencies().at(get_score_threshold_offset()).first;
    }

    bool has_soft_nms_sigma() const { return !get_typed_desc<non_max_suppression>()->soft_nms_sigma.empty(); }
    memory::ptr soft_nms_sigma_mem() const {
-        size_t offset = 2;
-        offset += has_num_select_per_class();
-        offset += has_iou_threshold();
-        offset += has_score_threshold();
-        return dep_memory_ptr(offset);
+        return dep_memory_ptr(get_soft_nms_sigma_offset());
+    }
+    std::shared_ptr<const primitive_inst> soft_nms_sigma_inst() const {
+        return dependencies().at(get_soft_nms_sigma_offset()).first;
    }

    bool has_second_output() const { return !get_typed_desc<non_max_suppression>()->second_output.empty(); }
--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@ -49,7 +49,6 @@ struct primitive_impl {
    virtual void set_node_params(const program_node&) {}
    virtual std::string get_type() const = 0;
    virtual void set_arguments(primitive_inst& instance) = 0;
-    virtual void set_arguments(kernel_arguments_data_idx& args_idx) = 0;
    virtual kernel_arguments_data get_arguments(const primitive_inst& instance) const = 0;
    virtual event::ptr execute(const std::vector<event::ptr>& events, primitive_inst& instance) = 0;
    std::string get_kernel_name() const { return _kernel_name; }
@ -288,7 +287,6 @@ protected:
    memory::ptr allocate_internal_buffer(size_t idx);
    static std::vector<std::shared_ptr<primitive_inst>> build_exec_deps(
        std::vector<std::pair<std::shared_ptr<primitive_inst>, int32_t>> const& mem_deps);
-    void convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const;
    int32_t get_index_in_deps(memory::cptr arg) const;

    // event function called by primitive_inst::execute after checking if primitive should rerun and before calling
@ -382,16 +380,11 @@ private:
        return set_arguments_impl(reinterpret_cast<typed_primitive_inst<PType>&>(instance));
    }

-    void set_arguments(kernel_arguments_data_idx& args_idx) override {
-        return set_arguments_impl(args_idx);
-    }
-
    kernel_arguments_data get_arguments(const primitive_inst& instance) const override {
        return get_arguments_impl(reinterpret_cast<const typed_primitive_inst<PType>&>(instance));
    }

    virtual void set_arguments_impl(typed_primitive_inst<PType>& /*instance*/) {}
-    virtual void set_arguments_impl(kernel_arguments_data_idx& /*args_idx*/) {}
    virtual kernel_arguments_data get_arguments_impl(const typed_primitive_inst<PType>& /*instance*/) const {
        kernel_arguments_data args;
        return args;
--- a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h
@ -167,8 +167,12 @@ public:
    static std::vector<layout> calc_output_layouts(quantize_node const& node, kernel_impl_params const& impl_param);
    static layout calc_output_layout(quantize_node const& node, kernel_impl_params const& impl_param);
    static std::string to_string(quantize_node const& node);
+    void save(BinaryOutputBuffer& ob) const override;
+    void load(BinaryInputBuffer& ib) override;

    typed_primitive_inst(network& network, quantize_node const& desc);
+
+    bool scale_shift_opt;   // This is for serialization. Please do not remove it.
 };

 using quantize_inst = typed_primitive_inst<quantize>;
--- a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h
@ -65,7 +65,7 @@ public:
    memory::ptr mean_nv12_memory() const { return dep_memory_ptr(2); }
    memory::ptr mean_memory() const { return dep_memory_ptr(1); }

-    bool has_mean() const { return !argument->mean.empty(); }
+    bool has_mean() const { return !get_typed_desc<reorder>()->mean.empty(); }

    void update_output_memory() override;
    bool requires_reinterpret() const { return _req_reinterpr; }
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@ -409,7 +409,8 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
    for (const auto& p_inst : _exec_order) {
        ib >> *p_inst;
        _primitives[p_inst->id()] = p_inst;
-        p_inst->init_kernels(kernels_cache);
+        if (p_inst->get_impl() != nullptr)
+            p_inst->init_kernels(kernels_cache);
    }

    for (auto& item : _primitives) {
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@ -1142,44 +1142,12 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {

    if (_impl != nullptr) {
        ob << true;
-        kernel_arguments_data args = _impl->get_arguments(*this);
-        kernel_arguments_data_idx args_idx;
-        convert_args(args, args_idx);
-        _impl->set_arguments(args_idx);
        ob << _impl;
    } else {
        ob << false;
    }
 }

-void primitive_inst::convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const {
-    if (args.inputs.size() > 0) {
-        args_idx.inputs.resize(args.inputs.size());
-        for (uint32_t idx = 0; idx < args.inputs.size(); ++idx) {
-            args_idx.inputs[idx] = get_index_in_deps(args.inputs[idx]);
-        }
-    }
-
-    args_idx.weights = (args.weights == nullptr) ? -1 : get_index_in_deps(args.weights);
-    args_idx.recurrent = (args.recurrent == nullptr) ? -1 : get_index_in_deps(args.recurrent);
-    args_idx.hidden = (args.hidden == nullptr) ? -1 : get_index_in_deps(args.hidden);
-    args_idx.cell = (args.cell == nullptr) ? -1 : get_index_in_deps(args.cell);
-    args_idx.bias = (args.bias == nullptr) ? -1 : get_index_in_deps(args.bias);
-    args_idx.weights_zero_points = (args.weights_zero_points == nullptr) ? -1 : get_index_in_deps(args.weights_zero_points);
-    args_idx.activations_zero_points = (args.activations_zero_points == nullptr) ? -1 : get_index_in_deps(args.activations_zero_points);
-    args_idx.compensation = (args.compensation == nullptr) ? -1 : get_index_in_deps(args.compensation);
-    args_idx.lookup_table = (args.lookup_table == nullptr) ? -1 : get_index_in_deps(args.lookup_table);
-    args_idx.scale_table = (args.scale_table == nullptr) ? -1 : get_index_in_deps(args.scale_table);
-    args_idx.slope = (args.slope == nullptr) ? -1 : get_index_in_deps(args.slope);
-
-    if (args.fused_op_inputs.size() > 0) {
-        args_idx.fused_op_inputs.resize(args.fused_op_inputs.size());
-        for (uint32_t idx = 0; idx < args.fused_op_inputs.size(); ++idx) {
-            args_idx.fused_op_inputs[idx] = get_index_in_deps(args.fused_op_inputs[idx]);
-        }
-    }
-}
-
 int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const {
    for (uint32_t idx = 0; idx < _deps.size(); ++idx) {
        if (arg == dep_memory_ptr(idx))
--- a/src/plugins/intel_gpu/src/graph/quantize.cpp
+++ b/src/plugins/intel_gpu/src/graph/quantize.cpp
@ -74,6 +74,17 @@ std::string quantize_inst::to_string(quantize_node const& node) {
    return primitive_description.str();
 }

-quantize_inst::typed_primitive_inst(network& network, quantize_node const& node) : parent(network, node) {}
+quantize_inst::typed_primitive_inst(network& network, quantize_node const& node) : parent(network, node) {
+    scale_shift_opt = node.get_scale_shift_opt();
+}

+void quantize_inst::save(cldnn::BinaryOutputBuffer& ob) const {
+    parent::save(ob);
+    ob << scale_shift_opt;
+}
+
+void quantize_inst::load(BinaryInputBuffer& ib) {
+    parent::load(ib);
+    ib >> scale_shift_opt;
+}
 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
+++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@ -229,9 +229,10 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote
        setOutputs(new_results);
    }

-    auto graph_base = std::make_shared<Graph>(ib, context_impl, m_config, 0);
+    auto pos = ib.tellg();
    for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
-        auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
+        ib.seekg(pos);
+        auto graph = std::make_shared<Graph>(ib, context_impl, m_config, n);
        m_graphs.push_back(graph);
    }
 }
@ -317,10 +318,6 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() {
 }

 bool CompiledModel::is_serializable() {
-    // Model with multiple graphs is not yet supported.
-    if (m_graphs.size() != 1)
-        return false;
-
    // Dynamic model serialization is not yet supported.
    if (m_graphs[0]->GetNetwork()->is_dynamic())
        return false;
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@ -84,7 +84,11 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const
    ib >> primitiveIDs;
    ib >> outputDims;

-    m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id));
+    size_t num_networks;
+    ib >> num_networks;
+    for (size_t i = 0; i < num_networks; ++i) {
+        m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id));
+    }
 }

 Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
@ -500,9 +504,10 @@ void Graph::Export(cldnn::BinaryOutputBuffer &ob) {
    ob << primitiveIDs;
    ob << outputDims;

-    auto m_network = m_networks.back();
-
-    m_network->save(ob);
+    ob << m_networks.size();
+    for (auto net : m_networks) {
+        net->save(ob);
+    }
 }

 std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfo() {
--- a/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp
@ -1695,24 +1695,7 @@ struct activation_random_test : testing::TestWithParam<activation_random_test_pa

        ExecutionConfig config{ov::intel_gpu::custom_outputs(std::vector<std::string>{"activation"})};

-        std::shared_ptr<cldnn::network> net;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net = std::make_shared<cldnn::network>(engine, topo, config);
-        }
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        net->set_input_data("in", in_mem);

--- a/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp
@ -135,24 +135,7 @@ public:
        topology.add(adaptive_pooling("adaptive_avg_pooling_blocked", input_info("input_reordered"), params.outputTensor));
        topology.add(reorder("adaptive_avg_pooling", input_info("adaptive_avg_pooling_blocked"), plain_layout, data_type));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input", input);

--- a/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp
@ -162,25 +162,7 @@ public:
            result_id = reorder_result_id;
        }

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data(input_data_id, input_mem);

--- a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp
@ -123,24 +123,7 @@ void test_add_reorders_gpu_basic_reshape_and_tile(bool is_caching_test) {
    set_values(input, input_vec);
    tile_ref<T>(input, output_ref, 2, 4);

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input);

--- a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
@ -701,24 +701,7 @@ void test_top_k_layer_tests_sort_probabilities_by_indices(bool is_caching_test)

    set_values(input, input_vec);

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input);
    auto outputs = network->execute();
@ -868,24 +851,7 @@ void test_top_k_layer_md_sync(bool is_caching_test) {
                             true));
    topology.add(mutable_data("arg_max.1", { input_info("arg_max.0") }, shared_memory));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input1", input1);
    auto outputs = network->execute();
--- a/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp
@ -677,24 +677,7 @@ void test_batch_to_space_fp32_gpu_i41021_bs1221_cb0201_ce0810_b_fs_yx_fsv16(bool
                                                                           tensor(format::bfyx, {1,8,3,1}, 1)));
    topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f32));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("Input", input);

--- a/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp
@ -230,24 +230,7 @@ TEST_P(binary_convolution_test, conv) {
    topology_bin.add(binary_convolution(output_name, input_info(input_name), {output_name + weights_suffix},
                                        stride, pad, dilation, os_size, 1, p.pad_value, p.dt));

-    cldnn::network::ptr network_bin;
-
-    if (p.is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology_bin, config);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network_bin = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network_bin = std::make_shared<cldnn::network>(engine, topology_bin, config);
-    }
+    cldnn::network::ptr network_bin = get_network(engine, topology_bin, config, get_test_stream_ptr(), p.is_caching_test);

    network_bin->set_input_data(input_name, input);

--- a/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp
@ -85,25 +85,7 @@ public:
                                   pad_mode,
                                   pad_value),
                            reorder("output", input_info("border"), cldnn::format::bfyx, T_dt));
-        std::shared_ptr<cldnn::network> target_network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, target_topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                target_network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            target_network = std::make_shared<cldnn::network>(engine, target_topology);
-        }
-
+        cldnn::network::ptr target_network = get_network(engine, target_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
        target_network->set_input_data("input", input);
        auto target_output = target_network->execute().at("output").get_memory();
        cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());
--- a/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp
@ -212,24 +212,7 @@ void start_broadcast_test_5d(format cldnn_format, data_types cldnn_data_type, st

    set_values(input, input_data);

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input);
    auto outputs = network->execute();
--- a/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp
@ -59,24 +59,7 @@ struct bucketize_test : testing::TestWithParam<bucketize_test_params<I, B, O>> {
        topology.add(
            reorder("plane_bucketize_left_bound", input_info("bucketize_left_bound"), format::bfyx, type_to_data_type<O>::value));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input", input);
        network->set_input_data("buckets", buckets);
--- a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp
@ -127,24 +127,7 @@ void start_cl_mem_check_2_inputs(bool is_caching_test) {
    topology.add(input2);
    topology.add(reorder("reorder", input_info("input"), input_info("input2"), output_layout));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(*engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, *engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), *engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(*engine, topology);
-    }
+    cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input_memory);
    network->set_input_data("input2", input_memory2);
--- a/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp
@ -36,24 +36,7 @@ void exexute_network(cldnn::engine& engine, const ExecutionConfig& cfg, bool is_
    };
    set_values(input, input_vec);

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology, cfg);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, cfg, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology, cfg);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, cfg, get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input);
    auto outputs = network->execute();
--- a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp
@ -981,24 +981,7 @@ public:
        topology.add(pooling("pool_final", input_info("conv"), pooling_mode::max, {1, 1}, {1, 1}));
        topology.add(reorder("reorder", input_info("pool_final"), layout(data_type, format::bfyx, {(int32_t)batch_num, (int32_t)output_f, (int32_t)input_y, (int32_t)input_x})));

-        std::shared_ptr<cldnn::network> concat_network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                concat_network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            concat_network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr concat_network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

        for (size_t i = 0; i < in_features.size(); i++) {
            concat_network->set_input_data(input_ids[i], in_memory[i]);
--- a/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp
@ -593,24 +593,7 @@ void test_convert_color_i420_to_rgb_three_planes_surface_u8(bool is_caching_test
    topology.add(convert_color("convert_color", { input_info("input"), input_info("input2"), input_info("input3") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
                               cldnn::convert_color::memory_type::image, output_layout));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(*engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, *engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), *engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(*engine, topology);
-    }
+    cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input_memory);
    network->set_input_data("input2", input_memory2);
--- a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp
@ -9422,24 +9422,7 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_bloxk_size_1(
    config.set_property(ov::intel_gpu::optimize_data(true));
    ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" };
    config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology, config);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology, config);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input_mem);

--- a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp
@ -1213,24 +1213,7 @@ TEST_P(crop_gpu, pad_test) {
    ExecutionConfig config;
    config.set_property(ov::intel_gpu::optimize_data(true));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology, config);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology, config);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input);
    auto outputs = network->execute();
--- a/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp
@ -104,24 +104,7 @@ public:
        topology.add(ctc_loss("ctc_loss", inputs_ids, p.preprocess_collapse_repeated, p.ctc_merge_repeated, p.unique));
        topology.add(reorder("reordered_ctc_loss", input_info("ctc_loss"), plane_format, float_data_type));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        for (auto& input : inputs) {
            network->set_input_data(std::get<0>(input), std::get<1>(input));
--- a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp
@ -185,24 +185,7 @@ public:
        topology.add(input_layout("Input0", input->get_layout()));
        topology.add(cum_sum("cum_sum", input_info("Input0"), axis, exclusive, reverse));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("Input0", input);

--- a/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp
@ -547,24 +547,7 @@ void test_custom_gpu_primitive_u8_add_basic_in2x2x2x2(bool is_caching_test) {
         2, 60,  0, 20
    });

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input);
    network->set_input_data("input2", input2);
--- a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp
@ -2360,24 +2360,7 @@ void test_deconvolution_f16_fw_gpu_basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1
    ov::intel_gpu::ExecutionConfig config;
    config.set_property(ov::intel_gpu::optimize_data(true));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology, config);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology, config);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input);

--- a/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp
@ -992,24 +992,7 @@ void test_depth_concatenate_f32_gpu_basic_bfwzyx_along_w(bool is_caching_test) {

    ExecutionConfig config;
    config.set_property(ov::intel_gpu::optimize_data(true));
-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology, config);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology, config);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input1", input1);

--- a/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/depth_to_space_gpu_test.cpp
@ -393,24 +393,7 @@ void test_depth_to_space_fp32_gpu_d1822_bs2_depth_first(bool is_caching_test) {
        depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::depth_first)
    );

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("Input0", input1);

--- a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp
@ -147,25 +147,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -200,25 +182,7 @@ public:
        topology.add(detection_output("detection_output_1", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
        topology.add(detection_output("detection_output_2", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -260,25 +224,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -326,25 +272,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -386,25 +314,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -457,25 +367,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -538,25 +430,7 @@ public:
            prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id
        ));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -606,25 +480,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -685,25 +541,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -751,25 +589,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -820,25 +640,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -884,25 +686,7 @@ public:

        topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
@ -965,25 +749,7 @@ public:
            prior_is_normalized, this->img_size, this->img_size
        ));

-        ExecutionConfig config;
-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology, config);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input_location", input_location);
        network->set_input_data("input_confidence", input_confidence);
--- a/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp
@ -118,24 +118,7 @@ public:
        // It's simpler to use "bfwzyx" format for all cases, as input and output can have different ranks
        topology.add(reorder("out", input_info("dft"), format::bfwzyx, data_type));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data("input", input);
        const auto outputs = network->execute();
--- a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
@ -4155,24 +4155,7 @@ struct eltwise_random_test : testing::TestWithParam<eltwise_random_test_params>
        ExecutionConfig config_opt;
        config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"eltwise_opt"}));

-        std::shared_ptr<cldnn::network> net_opt;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo_opt, config_opt);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                net_opt = std::make_shared<cldnn::network>(ib, config_opt, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net_opt = std::make_shared<cldnn::network>(engine, topo_opt, config_opt);
-        }
+        cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);

        net_opt->set_input_data("input1", input1);
        net_opt->set_input_data("input2", input2);
--- a/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp
@ -1394,24 +1394,7 @@ void test_embedding_bag_fp32_gpu_extended5_6(bool is_caching_test) {
            embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
    );

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("Input0", emb_table);
    network->set_input_data("Input1", indices);
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp
@ -143,24 +143,7 @@ public:
        const primitive_id eddo_id = "experimental_detectron_detection_output";
        topology.add(reorder(eddo_id, input_info(b_eddo_primitive) /*b_eddo_id*/, format::bfyx, data_type));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data(input_boxes_id, input_boxes);
        network->set_input_data(input_deltas_id, input_deltas);
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp
@ -241,24 +241,7 @@ public:
        const primitive_id reorder_result_id = edgpsi_id + "Reordered";
        topology.add(reorder(reorder_result_id, input_info(edgpsi_primitive), format::bfyx, data_type));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data(input_im_info_id, input_im_info);
        network->set_input_data(input_anchors_id, input_anchors);
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp
@ -62,24 +62,7 @@ public:
                                                                 params.imageShape.first,
                                                                 params.imageShape.second));

-        cldnn::network::ptr network;
-
-        if (params.is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), params.is_caching_test);

        network->set_input_data(priors_id, prior_input);

--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp
@ -53,24 +53,7 @@ void test_experimental_detectron_roi_feature_extractor_gpu_fp32_one_level(bool i
    topology.add(activation(activation_abs_id, feature_extractor_id,  activation_func::abs));
    topology.add(mutable_data(second_output_r_id, {feature_extractor_id}, second_output));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data(input_rois_id, roi_input);
    network->set_input_data(input_level_1_id, level_1);
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp
@ -159,22 +159,7 @@ TEST(experimental_detectron_topk_rois_gpu_test, export_import) {
                                                  rois_num));
    topology.add(reorder("plane_output", input_info(experimental_detectron_topk_rois_id), format::bfyx, test_data_type));

-    cldnn::network::ptr network;
-
-    {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), true);

    network->set_input_data(input_rois_id, roi_input);
    network->set_input_data(input_indices_id, roi_indices);
--- a/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp
@ -518,24 +518,7 @@ void test_extract_image_patches_gpu_basic5(bool is_caching_test) {
    topology.add(input_layout("Input0", input->get_layout()));
    topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("Input0", input);
    auto outputs = network->execute();
--- a/src/plugins/intel_gpu/tests/test_cases/eye.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/eye.cpp
@ -85,24 +85,7 @@ public:
            tp.add(reorder("output", input_info("eye"), oupput_fmt, type_to_data_type<OutputType>::value));
        }

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine_, tp);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine_);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine_);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine_, tp);
-        }
+        cldnn::network::ptr network = get_network(engine_, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        auto outputs = network->execute();

--- a/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp
@ -30,24 +30,7 @@ inline void DoTest(engine& engine,
        gather_elements("gather_elements", input_info("InputData"), input_info("InputIndices"), input1->get_layout().format, output_tensor, axis)
    );

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("InputData", input0);
    network->set_input_data("InputIndices", input1);
--- a/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp
@ -1938,24 +1938,7 @@ void test_gather_gpu_u8_322_axisF(bool is_caching_test) {
    topology.add(
        gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1}));

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("InputDictionary", input1);
    network->set_input_data("InputText", input2);
--- a/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp
@ -39,24 +39,7 @@ inline void DoTestBase(engine& engine,
    topology.add(input_layout("InputIndices", input1->get_layout()));
    topology.add(gather_nd_inst);

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("InputData", input0);
    network->set_input_data("InputIndices", input1);
--- a/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp
@ -213,24 +213,7 @@ public:
        const primitive_id reorder_result_id = result_id + "_reordered";
        topology.add(reorder(reorder_result_id, input_info(result_id), plain_layout, data_type));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data(step_id, step_input);
        network->set_input_data(parent_id, parent_input);
--- a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp
@ -95,7 +95,7 @@ protected:

 public:
    virtual ~GemmGPUTest() {}
-    void test() {
+    void test(bool is_caching_test = false) {

        fill_gemm_params();

@ -124,12 +124,31 @@ public:
        tp.add(g);
        tp.add(reorder("output", input_info("gemm_output"), format::bfyx, data_types::f32));

-        network network(engine, tp);
-        for (auto &input : network_inputs) {
-            network.set_input_data(input.first, input.second);
+        cldnn::network::ptr network;
+        if (is_caching_test) {
+            std::cout << "cached" << std::endl;
+            membuf mem_buf;
+            {
+                cldnn::network _network(engine, tp);
+                process_program(_network.get_program());
+                std::ostream out_mem(&mem_buf);
+                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
+                _network.save(ob);
+            }
+            {
+                std::istream in_mem(&mem_buf);
+                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
+                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
+            }
+        } else {
+            network = std::make_shared<cldnn::network>(engine, tp);
+            process_program(network->get_program());
        }
-        auto outputs = network.execute();
-        process_program(network.get_program());
+
+        for (auto &input : network_inputs) {
+            network->set_input_data(input.first, input.second);
+        }
+        auto outputs = network->execute();
        auto output = outputs.at("output").get_memory();
        cldnn::mem_lock<float> output_ptr(output, get_test_stream());

@ -160,7 +179,7 @@ class GemmGPUTestRandom : public GemmGPUTest {
            auto &v = input_data[i];
            v.resize(size);
            for(size_t i = 0; i < size; ++i) {
-                v[i] = generate_random_value() / 10.f;
+                v[i] = generate_random_value() / 20.f;
            }
        }
    }
@ -233,12 +252,13 @@ INSTANTIATE_TEST_SUITE_P(
        ::testing::Values(false), ::testing::Values(true),
        ::testing::Values(1.0f), ::testing::Values(0.0f)));

-TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
+template <typename T>
+void test_basic_bfyx_t2_inplace_crop_with_pad(bool is_caching_test) {
    auto& engine = get_test_engine();
    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 3 } });
    auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 1 } });

-    std::vector<float> input_data = {
+    std::vector<T> input_data = {
        1.f, -2.f,  3.f, -4.f,
        5.f,  6.f, 1.f, 2.f,
        3.f, 3.f, 2.f, -1.f,
@ -248,13 +268,13 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
        3.f, 3.f, 2.f, -1.f,
    };

-    std::vector<float> input_data2 = {
+    std::vector<T> input_data2 = {
        2.f, 5.f, -4.f, -7.f,
    };
    set_values(input, input_data);
    set_values(input2, input_data2);

-    std::vector<float> out_data = {
+    std::vector<T> out_data = {
        8.f, 22.f, 20.f
    };

@ -274,13 +294,13 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {

    ExecutionConfig config;
    config.set_property(ov::intel_gpu::optimize_data(true));
-    network network(engine, topology, config);
-    network.set_input_data("input", input);
-    network.set_input_data("input2", input2);
-    auto outputs = network.execute();
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    network->set_input_data("input2", input2);
+    auto outputs = network->execute();

    auto output = outputs.at("output").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

    ASSERT_EQ(output_ptr.size(), (uint32_t)3);
    for (uint32_t i = 0; i < out_data.size(); ++i) {
@ -288,6 +308,10 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
    }
 }

+TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
+    test_basic_bfyx_t2_inplace_crop_with_pad<float>(false);
+}
+
 TEST(gemm_gpu, dynamic) {
    auto& engine = get_test_engine();
    ov::Shape in1_shape = { 1, 1, 3, 4 };
@ -745,7 +769,7 @@ INSTANTIATE_TEST_SUITE_P(
        GemmGPUTestRandom,
                ::testing::Combine(
                    ::testing::Values(std::vector<std::vector<int32_t>>{{ 5, 1, 500, 9 }, { 5, 1, 1, 500 }}),
-            ::testing::Values(std::vector<std::vector<float>>{{}, {}}),
+                    ::testing::Values(std::vector<std::vector<float>>{{}, {}}),
                    ::testing::ValuesIn(planar_formats),
                    ::testing::ValuesIn(float_types),
                    ::testing::Values(std::vector<float>{}),
@ -1182,7 +1206,7 @@ public:
        return (x % x_size) * x_pitch + (y % y_size) * y_pitch + (f % f_num) * f_pitch + (b % b_num) * b_pitch;
    }

-    void execute(gemm_params& p) {
+    void execute(gemm_params& p, bool is_caching_test = false) {
        auto& engine = get_test_engine();
        if (!engine.get_device_info().supports_immad)
            return;
@ -1294,13 +1318,13 @@ public:
 #endif
        cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_bfyx", gemm_impl} }));

-        network network(engine, topology, cfg);
-        network.set_input_data("input0", input0_mem);
-        network.set_input_data("input1", input1_mem);
+        cldnn::network::ptr network = get_network(engine, topology, cfg, get_test_stream_ptr(), is_caching_test);
+        network->set_input_data("input0", input0_mem);
+        network->set_input_data("input1", input1_mem);
        if (p.beta != 0) {
-            network.set_input_data("input2", input2_mem);
+            network->set_input_data("input2", input2_mem);
        }
-        auto outputs = network.execute();
+        auto outputs = network->execute();
        auto output = outputs.at("reorder_bfyx").get_memory();
        cldnn::mem_lock<float> output_ptr(output, get_test_stream());

@ -1649,4 +1673,50 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_fp16_tiled_nn_broadcast_tests, ::testing

 #endif // ENABLE_ONEDNN_FOR_GPU

+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(GemmGPUTest, basic_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(GemmGPUTestRandom, basic_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+TEST_P(gemm_int8_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_uint8_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_int8_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_uint8_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_int8_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_int8_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_uint8_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
+#else
+TEST_P(gemm_int8_transposition_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_int8_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_int8_leftovers_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_int8_combo_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_int8_slm_combo_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_tiled_nn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_tiled_nt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_tiled_tn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_tiled_tt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp32_tiled_nn_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_tiled_nn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_tiled_nt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_tiled_tn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_tiled_tt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
+TEST_P(gemm_fp16_tiled_nn_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p); }
+#endif // ENABLE_ONEDNN_FOR_GPU
+#endif // RUN_ALL_MODEL_CACHING_TESTS
+TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad_cached) {
+    test_basic_bfyx_t2_inplace_crop_with_pad<float>(true);
+}
 } // namespace
--- a/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp
@ -355,24 +355,7 @@ public:
        const primitive_id reorder_result_id = generate_proposals_id + "Reordered";
        topology.add(reorder(reorder_result_id, input_info(generate_proposals_id), format::bfyx, data_type));

-        cldnn::network::ptr network;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topology);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-                network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-            }
-        } else {
-            network = std::make_shared<cldnn::network>(engine, topology);
-        }
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

        network->set_input_data(input_im_info_id, input_im_info);
        network->set_input_data(input_anchors_id, input_anchors);
--- a/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp
@ -7,6 +7,12 @@
 #include "intel_gpu/primitives/grid_sample.hpp"
 #include "test_utils/test_utils.h"

+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+    #define RUN_CACHING_TEST false, true
+#else
+    #define RUN_CACHING_TEST false
+#endif
+
 using namespace cldnn;
 using namespace tests;

@ -24,7 +30,7 @@ struct grid_sample_test_inputs {
 };

 template <class TD, class TG>
-using grid_sample_test_params = std::tuple<grid_sample_test_inputs<TD, TG>, format::type>;
+using grid_sample_test_params = std::tuple<grid_sample_test_inputs<TD, TG>, format::type, bool>;

 template <class T>
 float getError();
@ -45,7 +51,8 @@ public:
    void test() {
        format::type fmt;
        grid_sample_test_inputs<TD, TG> p;
-        std::tie(p, fmt) = testing::TestWithParam<grid_sample_test_params<TD, TG>>::GetParam();
+        bool is_caching_test;
+        std::tie(p, fmt, is_caching_test) = testing::TestWithParam<grid_sample_test_params<TD, TG>>::GetParam();

        auto& engine = get_test_engine();
        const auto data_data_type = type_to_data_type<TD>::value;
@ -68,10 +75,10 @@ public:
        topology.add(grid_sample("grid_sample", { input_info("reordered_data"), input_info("reordered_grid") }, p.attributes));
        topology.add(reorder("plane_grid_sample", input_info("grid_sample"), plane_format, data_data_type));

-        network network(engine, topology);
-        network.set_input_data("data", data);
-        network.set_input_data("grid", grid);
-        const auto outputs = network.execute();
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        network->set_input_data("data", data);
+        network->set_input_data("grid", grid);
+        const auto outputs = network->execute();

        ASSERT_EQ(outputs.size(), std::size_t(1));
        ASSERT_EQ(outputs.begin()->first, "plane_grid_sample");
@ -88,11 +95,13 @@ public:
    static std::string PrintToStringParamName(const testing::TestParamInfo<grid_sample_test_params<TD, TG>>& info) {
        format::type fmt;
        grid_sample_test_inputs<TD, TG> p;
-        std::tie(p, fmt) = info.param;
+        bool is_caching_test;
+        std::tie(p, fmt, is_caching_test) = info.param;

        std::ostringstream result;
        result << "TestName=" << p.test_name << ";";
-        result << "Format=" << fmt_to_str(fmt);
+        result << "Format=" << fmt_to_str(fmt) << ";";
+        result << "Cached=" << bool_to_str(is_caching_test) << ";";
        return result.str();
    }
 };
@ -673,13 +682,23 @@ TEST_P(grid_sample_gpu_test_FLOAT16_FLOAT16, test) {
 INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_float_float,
                         grid_sample_gpu_test_float_float,
                         testing::Combine(testing::ValuesIn(getParamsToCheckLayouts<float, float>()),
-                                          testing::ValuesIn(layout_formats)),
+                                          testing::ValuesIn(layout_formats),
+                                          testing::Values(RUN_CACHING_TEST)),
                         grid_sample_gpu_test_float_float::PrintToStringParamName);

 INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_FLOAT16_FLOAT16,
                         grid_sample_gpu_test_FLOAT16_FLOAT16,
                         testing::Combine(testing::ValuesIn(getParamsToCheckLogic<FLOAT16, FLOAT16>()),
-                                          testing::Values(format::bfyx)),
+                                          testing::Values(format::bfyx),
+                                          testing::Values(RUN_CACHING_TEST)),
                         grid_sample_gpu_test_FLOAT16_FLOAT16::PrintToStringParamName);

+#ifndef RUN_ALL_MODEL_CACHING_TESTS
+INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_FLOAT16_FLOAT16_cached,
+                         grid_sample_gpu_test_FLOAT16_FLOAT16,
+                         testing::Combine(testing::ValuesIn(getNearestParamsOddDimensionsOuterGrids<FLOAT16, FLOAT16>()),
+                                          testing::Values(format::bfyx),
+                                          testing::Values(true)),
+                         grid_sample_gpu_test_FLOAT16_FLOAT16::PrintToStringParamName);
+#endif
 }  // namespace
--- a/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp
@ -73,24 +73,7 @@ void test_loop_gpu_basic_no_concat(bool is_caching_test)
             input_primitive_maps, output_primitive_maps, back_edges, 8)
    );

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

    network->set_input_data("input", input_mem);
    network->set_input_data("trip_count", trip_count_mem);
@ -191,25 +174,7 @@ void test_loop_gpu_basic_concat(bool is_caching_test)
             input_primitive_maps, output_primitive_maps, back_edges, trip_count)
    );

-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, topology);
-    }
-
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
    network->set_input_data("input", input_mem);
    network->set_input_data("trip_count", trip_count_mem);
    network->set_input_data("initial_condition", initial_condition_mem);
@ -349,25 +314,7 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test)
    /////////////////////////////////
    // network execution
    /////////////////////////////////
-    cldnn::network::ptr network;
-
-    if (is_caching_test) {
-        membuf mem_buf;
-        {
-            cldnn::network _network(engine, main_topology);
-            std::ostream out_mem(&mem_buf);
-            BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-            _network.save(ob);
-        }
-        {
-            std::istream in_mem(&mem_buf);
-            BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
-            network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
-        }
-    } else {
-        network = std::make_shared<cldnn::network>(engine, main_topology);
-    }
-
+    cldnn::network::ptr network = get_network(engine, main_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
    network->set_input_data("input", input_mem);
    network->set_input_data("trip_count", trip_count_mem);
    network->set_input_data("initial_condition", initial_condition_mem);
--- a/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp
@ -11,7 +11,8 @@
 using namespace cldnn;
 using namespace ::tests;

-TEST(lrn_fp32_gpu, basic) {
+template <typename T>
+void test_fp32_basic(bool is_caching_test) {
    //  input : 1x16x1x1
    //  Output : 1x16x1x1
    auto& engine = get_test_engine();
@ -22,11 +23,9 @@ TEST(lrn_fp32_gpu, basic) {
    const size_t x = 1;

    auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
-    std::vector<float> inputVals(b * f * y * x);
-    std::generate(inputVals.begin(), inputVals.end(), []() {
-        static float n = 0;
-        return n++;
-    });
+    std::vector<T> inputVals(b * f * y * x);
+    T n = 0;
+    std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });

    set_values(input, inputVals);

@ -38,11 +37,11 @@ TEST(lrn_fp32_gpu, basic) {
    float beta = 1.f;
    topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    network.set_input_data("input", input);
+    network->set_input_data("input", input);

-    auto outputs = network.execute();
+    auto outputs = network->execute();

    auto output = outputs.at("lrn").get_memory();
    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@ -60,7 +59,12 @@ TEST(lrn_fp32_gpu, basic) {
    }
 }

-TEST(lrn_fp32_gpu, basic2) {
+TEST(lrn_fp32_gpu, basic) {
+    test_fp32_basic<float>(false);
+}
+
+template <typename T>
+void test_fp32_basic2(bool is_caching_test) {
    //  input : 1x16x1x1
    //  Output : 1x16x1x1
    auto& engine = get_test_engine();
@ -71,11 +75,9 @@ TEST(lrn_fp32_gpu, basic2) {
    const size_t x = 1;

    auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
-    std::vector<float> inputVals(b * f * y * x);
-    std::generate(inputVals.begin(), inputVals.end(), []() {
-        static float n = 0;
-        return n++;
-    });
+    std::vector<T> inputVals(b * f * y * x);
+    T n = 0;
+    std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });

    set_values(input, inputVals);

@ -87,11 +89,11 @@ TEST(lrn_fp32_gpu, basic2) {
    float beta = 1.f;
    topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    network.set_input_data("input", input);
+    network->set_input_data("input", input);

-    auto outputs = network.execute();
+    auto outputs = network->execute();

    auto output = outputs.at("lrn").get_memory();
    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@ -109,7 +111,12 @@ TEST(lrn_fp32_gpu, basic2) {
    }
 }

-TEST(lrn_fp16_gpu, basic1) {
+TEST(lrn_fp32_gpu, basic2) {
+    test_fp32_basic2<float>(false);
+}
+
+template <typename T>
+void test_fp16_basic1(bool is_caching_test) {
    //  input : 1x16x1x1
    //  Output : 1x16x1x1
    auto& engine = get_test_engine();
@ -120,11 +127,9 @@ TEST(lrn_fp16_gpu, basic1) {
    const size_t x = 1;

    auto input = engine.allocate_memory({ data_types::f16, format::b_fs_yx_fsv16, { b, f, x, y } });
-    std::vector<half_t> inputVals(b * f * y * x);
-    std::generate(inputVals.begin(), inputVals.end(), []() {
-        static float n = 0;
-        return half_t(n++);
-    });
+    std::vector<T> inputVals(b * f * y * x);
+    float n = 0;
+    std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return T(n++); });

    set_values(input, inputVals);

@ -136,11 +141,11 @@ TEST(lrn_fp16_gpu, basic1) {
    float beta = 1.f;
    topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    network.set_input_data("input", input);
+    network->set_input_data("input", input);

-    auto outputs = network.execute();
+    auto outputs = network->execute();

    auto output = outputs.at("lrn").get_memory();
    cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
@ -158,7 +163,12 @@ TEST(lrn_fp16_gpu, basic1) {
    }
 }

-TEST(lrn_fp32_gpu, basic3) {
+TEST(lrn_fp16_gpu, basic1) {
+    test_fp16_basic1<half_t>(false);
+}
+
+template <typename T>
+void test_fp32_basic3(bool is_caching_test) {
    //  input : 2x16x4x4
    //  Output : 2x16x4x4
    auto& engine = get_test_engine();
@ -169,11 +179,9 @@ TEST(lrn_fp32_gpu, basic3) {
    const size_t x = 4;

    auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
-    std::vector<float> inputVals(b * f * y * x);
-    std::generate(inputVals.begin(), inputVals.end(), []() {
-        static float n = 0;
-        return n++;
-    });
+    std::vector<T> inputVals(b * f * y * x);
+    T n = 0;
+    std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });

    set_values(input, inputVals);

@ -185,11 +193,11 @@ TEST(lrn_fp32_gpu, basic3) {
    float beta = 0.75f;
    topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    network.set_input_data("input", input);
+    network->set_input_data("input", input);

-    auto outputs = network.execute();
+    auto outputs = network->execute();

    auto output = outputs.at("lrn").get_memory();
    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@ -249,3 +257,24 @@ TEST(lrn_fp32_gpu, basic3) {
        ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i])) << i;
    }
 }
+
+TEST(lrn_fp32_gpu, basic3) {
+    test_fp32_basic3<float>(false);
+}
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST(lrn_fp32_gpu, basic_cached) {
+    test_fp32_basic<float>(true);
+}
+
+TEST(lrn_fp32_gpu, basic2_cached) {
+    test_fp32_basic2<float>(true);
+}
+
+TEST(lrn_fp16_gpu, basic1_cached) {
+    test_fp16_basic1<half_t>(true);
+}
+#endif
+TEST(lrn_fp32_gpu, basic3_cached) {
+    test_fp32_basic3<float>(true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp
@ -191,7 +191,7 @@ void lstm_reference(VVVVF<T>& input, VVVVF<T>& hidden, VVVVF<T>& cell,

 template<typename T>
 void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
-    bool hasBias = true, bool hasHidden = true) {
+    bool hasBias, bool hasHidden, bool is_caching_test = false) {
    int min_random = -2, max_random = 2;

    VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
@ -244,13 +244,13 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,

    topology.add(lstm_gemm("lstm_gemm", input_info("input"), "weights", "recurrent", hasBias ? "biases" : "", hasHidden ? "hidden" : ""));

-    network network(engine, topology);
-    network.set_input_data("input", input);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
    if (hasHidden) {
-        network.set_input_data("hidden", hidden);
+        network->set_input_data("hidden", hidden);
    }

-    auto outputs = network.execute();
+    auto outputs = network->execute();
    ASSERT_EQ(outputs.size(), size_t(1));

    auto output = outputs.begin()->second.get_memory();
@ -264,8 +264,8 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,

 template<typename T>
 void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_size,
-    int /* input_size */, int hidden_size, bool hasCell = true,
-    T clip_threshold = (T)0.f, bool input_forget = false) {
+    int /* input_size */, int hidden_size, bool hasCell,
+    T clip_threshold, bool input_forget, bool is_caching_test = false) {
    // tempGEMM  = [        1, direction,           batch, 4 * hidden_size ] input
    // cell      = [        1, direction,           batch,     hidden_size ] optional
    // output    = [        2, direction,           batch,     hidden_size ] output concat[hidden, cell]
@ -307,13 +307,13 @@ void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_
    }
    topology.add(lstm_elt("lstm_elt", input_info("tempGEMM"), hasCell ? "cell" : "", clip_threshold, input_forget));

-    network network(engine, topology);
-    network.set_input_data("tempGEMM", tempGEMM);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("tempGEMM", tempGEMM);
    if (hasCell) {
-        network.set_input_data("cell", cell);
+        network->set_input_data("cell", cell);
    }

-    auto outputs = network.execute();
+    auto outputs = network->execute();
    ASSERT_EQ(outputs.size(), size_t(1));

    auto output = outputs.begin()->second.get_memory();
@ -390,7 +390,7 @@ void generate_lstm_topology(topology& t, memory::ptr input, memory::ptr hidden,

 template<typename T>
 void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
-    bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true) {
+    bool hasBias, bool hasInitialHidden, bool hasInitialCell, bool is_caching_test = false) {
    std::cout << "Input Size = " << input_size << " Hidden Size = " << hidden_size << " Sequence Len = " << sequence_len << " Batch Size = " << batch_size << std::endl;
    int min_random = -2, max_random = 2;
    VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
@ -430,11 +430,11 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
    generate_lstm_topology(topology, input, hidden, cell, weights, recurrent, biases, sequence_len,
        hasBias, hasInitialHidden, hasInitialCell);

-    network network(engine, topology);
-    network.set_input_data("input", input);
-    if (hasInitialHidden) network.set_input_data("hidden", hidden);
-    if (hasInitialCell) network.set_input_data("cell", cell);
-    auto outputs = network.execute();
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    if (hasInitialHidden) network->set_input_data("hidden", hidden);
+    if (hasInitialCell) network->set_input_data("cell", cell);
+    auto outputs = network->execute();

    ASSERT_EQ(outputs.size(), size_t(1));
    size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
@ -457,8 +457,8 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
 // -------------------------------------------------------
 template<typename T>
 void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
-                            bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true,
-                            T clip_threshold = 0, bool input_forget = false) {
+                            bool hasBias, bool hasInitialHidden, bool hasInitialCell,
+                            T clip_threshold, bool input_forget, bool is_caching_test = false) {
    std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
            << " Sequence Len = " << sequence_len << " Direction = " << direction << " Batch Size = " << batch_size << std::endl;
    int min_random = -2, max_random = 2;
@ -596,14 +596,14 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
        prev_lstm_id = lstm_id;
    }

-    network network(engine, topology);
-    network.set_input_data("input", input);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
    for (int i = 0; i < layers; ++i) {
        std::string sid = get_string_id(i);
-        if (hasInitialHidden) network.set_input_data("hidden" + sid, hidden[i]);
-        if (hasInitialCell) network.set_input_data("cell" + sid, cell[i]);
+        if (hasInitialHidden) network->set_input_data("hidden" + sid, hidden[i]);
+        if (hasInitialCell) network->set_input_data("cell" + sid, cell[i]);
    }
-    auto outputs = network.execute();
+    auto outputs = network->execute();
    {
        ASSERT_EQ(outputs.size(), size_t(1));
        size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
@ -637,7 +637,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc

 // -------------------------------------------------------
 template<typename T>
-void lstm_gpu_output_test(const lstm_output_selection& output_selection, int directions) {
+void lstm_gpu_output_test(const lstm_output_selection& output_selection, int directions, bool is_caching_test = false) {
    int layers = 1;
    int sequence_len = 4;
    int batch_size = 3;
@ -722,12 +722,12 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir
        topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0}));
    }

-    network network(engine, topology);
-    network.set_input_data("input", input);
-    network.set_input_data("hidden", hidden);
-    network.set_input_data("cell", cell);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    network->set_input_data("hidden", hidden);
+    network->set_input_data("cell", cell);

-    auto outputs = network.execute();
+    auto outputs = network->execute();
 	uint32_t ref_num_output_primitives = 1;  // Output will return atleast 1 primitive

 	if (emit_last_cell) {
@ -798,7 +798,7 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir

 // -------------------------------------------------------
 template<typename T>
-void lstm_gpu_format_test(const cldnn::format& format, int directions) {
+void lstm_gpu_format_test(const cldnn::format& format, int directions, bool is_caching_test = false) {
    int layers = 1;
    int sequence_len = 6;
    int batch_size = 3;
@ -886,13 +886,14 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) {
        topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0}));
    }

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+
    std::map<primitive_id, network_output> outputs;

-    network.set_input_data("input", input);
-    network.set_input_data("hidden", hidden);
-    network.set_input_data("cell", cell);
-    outputs = network.execute();
+    network->set_input_data("input", input);
+    network->set_input_data("hidden", hidden);
+    network->set_input_data("cell", cell);
+    outputs = network->execute();

    uint32_t ref_num_output_primitives = 1;  // Output will return atleast 1 primitive

@ -979,7 +980,7 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) {

 // -------------------------------------------------------
 template<typename T>
-void lstm_gpu_users_test() {
+void lstm_gpu_users_test(bool is_caching_test = false) {
    int sequence_len = 2;
    int batch_size = 1;
    int input_size = 1;
@ -1052,13 +1053,14 @@ void lstm_gpu_users_test() {
    std::vector<input_info> output_ids_offsets { input_info("lstm"), input_info("hidden") };
    topology.add(concatenation("concatenation", output_ids_offsets, 1));

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+
    std::map<primitive_id, network_output> outputs;

-    network.set_input_data("input", input);
-    network.set_input_data("hidden", hidden);
-    network.set_input_data("cell", cell);
-    outputs = network.execute();
+    network->set_input_data("input", input);
+    network->set_input_data("hidden", hidden);
+    network->set_input_data("cell", cell);
+    outputs = network->execute();

    // check if the number of returned primitives match the expected number of output primitives
    ASSERT_EQ(size_t(1), outputs.size());
@ -1081,9 +1083,9 @@ void lstm_gpu_users_test() {
 template<typename T>
 void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int direction,
 						              int batch_size, int input_size, int hidden_size,
-						              bool has_bias = true, bool has_initial_hidden = true,
-						              bool has_initial_cell = true, float clip_threshold = 0,
-						              bool input_forget = false)
+						              bool has_bias, bool has_initial_hidden,
+						              bool has_initial_cell, float clip_threshold,
+						              bool input_forget, bool is_caching_test = false)
 {
 	std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
 		<< " Sequence Len = " << sequence_len << " Direction = " << direction << " Batch Size = " << batch_size << std::endl;
@ -1210,14 +1212,14 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
 		prev_node_id = output_crop_id;
 	}

-	network network(engine, topology);
-	network.set_input_data("input", input);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+	network->set_input_data("input", input);
 	for (int i = 0; i < layers; ++i) {
 		std::string sid = get_string_id(i);
-		if (has_initial_hidden) network.set_input_data("hidden" + sid, hidden[i]);
-		if (has_initial_cell) network.set_input_data("cell" + sid, cell[i]);
+		if (has_initial_hidden) network->set_input_data("hidden" + sid, hidden[i]);
+		if (has_initial_cell) network->set_input_data("cell" + sid, cell[i]);
 	}
-	auto outputs = network.execute();
+	auto outputs = network->execute();
 	{
 		ASSERT_EQ(outputs.size(), size_t(1));
 		size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
@ -1254,7 +1256,7 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
 template<typename T>
 void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
                         int directions, size_t layers, size_t chains, int sequence_len,
-                         const lstm_output_selection& output_selection)
+                         const lstm_output_selection& output_selection, bool is_caching_test = false)
 {
    int min_random = -2, max_random = 2;
    bool has_bias = false;
@ -1553,15 +1555,15 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
    }

    // Creating network out of the above designed topology
-    cldnn::network network(engine, topology);
-    network.set_input_data("input", input);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
    for (size_t layer = 0; layer < layers; layer++) {
        std::string sid = get_string_id(layer);
-        if (has_initial_hidden) network.set_input_data("hidden:000:" + sid, hidden[0][layer]); // 0 is the chain link index
-        if (has_initial_cell) network.set_input_data("cell:000:" + sid, cell[0][layer]); // 0 is the chain link index
+        if (has_initial_hidden) network->set_input_data("hidden:000:" + sid, hidden[0][layer]); // 0 is the chain link index
+        if (has_initial_cell) network->set_input_data("cell:000:" + sid, cell[0][layer]); // 0 is the chain link index
    }

-    auto outputs = network.execute();
+    auto outputs = network->execute();
    for (auto itr = outputs.begin(); itr != outputs.end(); itr++)
    {
        auto output_layout = itr->second.get_memory()->get_layout();
@ -1666,23 +1668,23 @@ TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_bias_f32) {

 // LSTM ELT Tests
 TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f32) {
-    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f);
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f, false);
 }

 TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f32) {
-    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, 1);
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, true);
 }

 TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f32) {
-    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, 1);
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, true);
 }

 TEST(lstm_elt_gpu, generic_lstm_elt_test_f32) {
-    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true);
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, false);
 }

 TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f32) {
-    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false);
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false, 0.f, false);
 }

 TEST(lstm_custom_gpu, generic_lstm_custom_f32) {
@ -1720,35 +1722,35 @@ TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_cell_f32) {
 // generic_lstm_gpu_test paramters:
 // layers, sequence, dir, batch, input, hidden, bias, initial_h, initial_cell, threshold, coupled_input_forget
 TEST(lstm_gpu, generic_lstm_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_bias_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true);
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_hidden_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true);
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_bias_hidden_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true);
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_cell_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false);
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_bias_cell_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false);
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_hidden_cell_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false);
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false);
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false, 0, false);
 }

 TEST(DISABLED_lstm_gpu, generic_lstm_clip_f32) {
@ -1765,46 +1767,46 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f32) {

 TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f32) {
    default_offset_type = lstm_weights_order::ifoz;
-    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
    default_offset_type = lstm_weights_order::iofz;
 }

 TEST(lstm_gpu, generic_lstm_canonical_f32) {
-    generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true);
+    generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true, 0, false);
 }

 // bidirectional support
 TEST(lstm_gpu, generic_lstm_bi_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false);
+    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_bi_bias_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false);
+    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false);
+    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f32) {
-    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true);
+    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true, 0, false);
 }

 // multi-layer support
 TEST(lstm_gpu, generic_lstm_stacked_no_seq_f32) {
-    generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_stacked_seq_f32) {
-    generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_stacked_bi_f32) {
-    generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32) {
-    generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
 }

 // optional outputs support
@ -1864,11 +1866,11 @@ TEST(lstm_gpu, lstm_users_f32) {

 // Test for LSTM with concatenated input
 TEST(lstm_gpu, generic_lstm_concatenated_input) {
-    lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true);
+    lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_concatenated_input_multi_layer) {
-    lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true);
+    lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true, 0, false);
 }

 // test for LSTM with chain and stack (multilayer)
@ -1938,55 +1940,55 @@ TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f16) {
 }

 TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f16) {
-    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f);
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f, false);
 }

 TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f16) {
-    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, 1);
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, true);
 }

 TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f16) {
-    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, 1);
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, true);
 }

 TEST(lstm_elt_gpu, generic_lstm_elt_test_f16) {
-    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true);
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, false);
 }

 TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f16) {
-    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false);
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false, 0.f, false);
 }

 TEST(lstm_gpu, generic_lstm_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_bias_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_hidden_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_bias_hidden_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_cell_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_bias_cell_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_hidden_cell_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false, 0, false);
 }

 TEST(DISABLED_lstm_gpu, generic_lstm_clip_f16) {
@ -2003,37 +2005,396 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f16) {

 TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16) {
    default_offset_type = lstm_weights_order::ifoz;
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
    default_offset_type = lstm_weights_order::iofz;
 }

 TEST(lstm_gpu, generic_lstm_canonical_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true);
+    generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true, 0, false);
 }

 // bidirectional support
 TEST(lstm_gpu, generic_lstm_bi_bias_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16) {
-    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true);
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true, 0, false);
 }

 // multi-layer support
 TEST(lstm_gpu, generic_lstm_stacked_seq_f16) {
-    generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true, 0, false);
 }

 TEST(lstm_gpu, generic_lstm_stacked_bi_f16) {
-    generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true);
+    generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
 }

 // TODO: Add tests for the following:
 // integration testing using multi-layer and chained LSTMs
 // LSTMs single input
 // optional activation list
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f32_cached) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, true, true, true);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_bias_f32_cached) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, false, true, true);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_f32_cached) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, true, false, true);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f32_cached) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, false, false, true);
+}
+
+TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_test_f32_cached) {
+    generic_lstm_gemm_gpu_test<float>(5, 1, 1, 1024, 1024, true, true, true);
+}
+
+TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_bias_f32_cached) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 1, 256, 2, false, true, true);
+}
+
+TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_f32_cached) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 1, 64, 2, true, false, true);
+}
+
+TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_bias_f32_cached) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 1, 64, 2, false, false, true);
+}
+
+TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f32_cached) {
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f, false, true);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f32_cached) {
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, true, true);
+}
+
+TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f32_cached) {
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, true, true);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_test_f32_cached) {
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, false, true);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f32_cached) {
+    generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false, 0.f, false, true);
+}
+
+TEST(lstm_custom_gpu, generic_lstm_custom_f32_cached) {
+    generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, true, true, true);
+}
+
+TEST(lstm_custom_gpu, generic_lstm_custom_no_biasf32_cached) {
+    generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, true, true, true);
+}
+
+TEST(lstm_custom_gpu, generic_lstm_custom_no_hidden_f32_cached) {
+    generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, false, true, true);
+}
+
+TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_f32_cached) {
+    generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, false, true, true);
+}
+
+TEST(lstm_custom_gpu, generic_lstm_custom_no_cell_f32_cached) {
+    generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, true, false, true);
+}
+
+TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_cell_f32_cached) {
+    generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, true, false, true);
+}
+
+TEST(lstm_custom_gpu, generic_lstm_custom_no_hidden_cell_f32_cached) {
+    generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, false, false, true);
+}
+
+TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_cell_f32_cached) {
+    generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, false, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_hidden_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_hidden_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_cell_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_cell_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_hidden_cell_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false, 0, false, true);
+}
+
+TEST(DISABLED_lstm_gpu, generic_lstm_clip_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0, true);
+}
+
+TEST(lstm_gpu, generic_lstm_input_forget_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1, true);
+}
+
+TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1, true);
+}
+
+TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f32_cached) {
+    default_offset_type = lstm_weights_order::ifoz;
+    generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
+    default_offset_type = lstm_weights_order::iofz;
+}
+
+TEST(lstm_gpu, generic_lstm_canonical_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_bi_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_bi_bias_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f32_cached) {
+    generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_stacked_no_seq_f32_cached) {
+    generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_stacked_seq_f32_cached) {
+    generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_stacked_bi_f32_cached) {
+    generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32_cached) {
+    generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, output_test_sequence_f32_cached) {
+    lstm_gpu_output_test<float>(lstm_output_selection::sequence, 1, true);
+}
+
+TEST(lstm_gpu, output_test_hidden_f32_cached) {
+    lstm_gpu_output_test<float>(lstm_output_selection::hidden, 1, true);
+}
+
+TEST(lstm_gpu, output_test_hidden_cell_f32_cached) {
+    lstm_gpu_output_test<float>(lstm_output_selection::hidden_cell, 1, true);
+}
+
+TEST(lstm_gpu, output_test_sequence_cell_f32_cached) {
+    lstm_gpu_output_test<float>(lstm_output_selection::sequence_cell, 1, true);
+}
+
+TEST(lstm_gpu, output_test_sequence_bi_f32_cached) {
+    lstm_gpu_output_test<float>(lstm_output_selection::sequence, 2, true);
+}
+
+TEST(lstm_gpu, output_test_hidden_bi_f32_cached) {
+    lstm_gpu_output_test<float>(lstm_output_selection::hidden, 2, true);
+}
+
+TEST(lstm_gpu, output_test_hidden_cell_bi_f32_cached) {
+    lstm_gpu_output_test<float>(lstm_output_selection::hidden_cell, 2, true);
+}
+
+TEST(lstm_gpu, output_test_sequence_cell_bi_f32_cached) {
+    lstm_gpu_output_test<float>(lstm_output_selection::sequence_cell, 2, true);
+}
+
+TEST(lstm_gpu, lstm_gpu_format_bfyx_f32_cached) {
+    lstm_gpu_format_test<float>(cldnn::format::bfyx, 1, true);
+}
+
+TEST(lstm_gpu, lstm_gpu_format_bfyx_bi_f32_cached) {
+    lstm_gpu_format_test<float>(cldnn::format::bfyx, 2, true);
+}
+
+TEST(lstm_gpu, lstm_gpu_format_fyxb_f32_cached) {
+    lstm_gpu_format_test<float>(cldnn::format::fyxb, 1, true);
+}
+
+TEST(lstm_gpu, lstm_gpu_format_fyxb_bi_f32_cached) {
+    lstm_gpu_format_test<float>(cldnn::format::fyxb, 2, true);
+}
+
+TEST(lstm_gpu, lstm_users_f32_cached) {
+    lstm_gpu_users_test<float>(true);
+}
+
+TEST(lstm_gpu, generic_lstm_concatenated_input_cached) {
+    lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_concatenated_input_multi_layer_cached) {
+    lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_chained_unidirectional_f32_cached) {
+    lstm_gpu_chain_test<float>(1, 2, 4, 1, 1, 2, 1, lstm_output_selection::sequence_cell, true);
+}
+
+TEST(lstm_gpu, generic_lstm_chained_bidirectional_f32_cached) {
+    lstm_gpu_chain_test<float>(1, 2, 4, 2, 1, 1, 1, lstm_output_selection::sequence_cell, true);
+}
+
+TEST(lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32_cached) {
+    lstm_gpu_chain_test<float>(2, 2, 4, 2, 1, 2, 5, lstm_output_selection::sequence_cell, true);
+}
+
+TEST(lstm_gpu, generic_lstm_chained_stacked_bidirectional_f32_cached) {
+    lstm_gpu_chain_test<float>(2, 2, 4, 2, 4, 2, 5, lstm_output_selection::sequence_cell, true);
+}
+
+// FP16 Half precision tests
+TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f16_cached) {
+    generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, true, true, true);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_bias_f16_cached) {
+    generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, false, true, true);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_f16_cached) {
+    generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, true, false, true);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f16_cached) {
+    generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, false, false, true);
+}
+
+TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f16_cached) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f, false, true);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f16_cached) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, true, true);
+}
+
+TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f16_cached) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, true, true);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_test_f16_cached) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, false, true);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f16_cached) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false, 0.f, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_hidden_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_hidden_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_cell_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_cell_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_hidden_cell_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false, 0, false, true);
+}
+
+TEST(DISABLED_lstm_gpu, generic_lstm_clip_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0, true);
+}
+
+TEST(lstm_gpu, generic_lstm_input_forget_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1, true);
+}
+
+TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1, true);
+}
+
+TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16_cached) {
+    default_offset_type = lstm_weights_order::ifoz;
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
+    default_offset_type = lstm_weights_order::iofz;
+}
+
+TEST(lstm_gpu, generic_lstm_canonical_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true, 0, false, true);
+}
+
+// bidirectional support
+TEST(lstm_gpu, generic_lstm_bi_bias_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true, 0, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_stacked_seq_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
+}
+#endif
+TEST(lstm_gpu, generic_lstm_stacked_bi_f16_cached) {
+    generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp
@ -48,7 +48,7 @@ struct matrix_nms_test_inputs {
    std::string test_name;
 };

-using matrix_nms_test_params = std::tuple<matrix_nms_test_inputs, format::type>;
+using matrix_nms_test_params = std::tuple<matrix_nms_test_inputs, format::type, bool>;

 template <class T>
 struct matrix_nms_gpu_test : public testing::TestWithParam<matrix_nms_test_params> {
@ -56,7 +56,8 @@ public:
    void test() {
        format::type blocked_format;
        matrix_nms_test_inputs test_inputs;
-        std::tie(test_inputs, blocked_format) = testing::TestWithParam<matrix_nms_test_params>::GetParam();
+        bool is_caching_test;
+        std::tie(test_inputs, blocked_format, is_caching_test) = testing::TestWithParam<matrix_nms_test_params>::GetParam();

        const auto data_type = type_to_data_type<T>::value;
        const auto plain_format = format::bfyx;
@ -106,11 +107,12 @@ public:
                                attrs));
        topology.add(reorder("matrix_nms", input_info("reordered_matrix_nms"), plain_format, data_type));

-        network network(engine, topology);
-        network.set_input_data("boxes", boxes);
-        network.set_input_data("scores", scores);
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-        auto outputs = network.execute();
+        network->set_input_data("boxes", boxes);
+        network->set_input_data("scores", scores);
+
+        auto outputs = network->execute();

        auto output = outputs.at("matrix_nms").get_memory();
        cldnn::mem_lock<T> output_ptr(output, get_test_stream());
@ -124,14 +126,16 @@ public:
            ASSERT_NEAR(expected_output[i], output_ptr[i], THRESHOLD);
        }

-        ASSERT_EQ(test_inputs.expected_selected_boxes.size(), selected_boxes_ptr.size());
-        for (size_t i = 0; i < test_inputs.expected_selected_boxes.size(); ++i) {
-            ASSERT_EQ(test_inputs.expected_selected_boxes[i], selected_boxes_ptr[i]);
-        }
+        if (!is_caching_test) {
+            ASSERT_EQ(test_inputs.expected_selected_boxes.size(), selected_boxes_ptr.size());
+            for (size_t i = 0; i < test_inputs.expected_selected_boxes.size(); ++i) {
+                ASSERT_EQ(test_inputs.expected_selected_boxes[i], selected_boxes_ptr[i]);
+            }

-        ASSERT_EQ(test_inputs.expected_valid_outputs.size(), valid_outputs_ptr.size());
-        for (size_t i = 0; i < test_inputs.expected_valid_outputs.size(); ++i) {
-            ASSERT_EQ(test_inputs.expected_valid_outputs[i], valid_outputs_ptr[i]);
+            ASSERT_EQ(test_inputs.expected_valid_outputs.size(), valid_outputs_ptr.size());
+            for (size_t i = 0; i < test_inputs.expected_valid_outputs.size(); ++i) {
+                ASSERT_EQ(test_inputs.expected_valid_outputs[i], valid_outputs_ptr[i]);
+            }
        }
    }

@ -158,7 +162,8 @@ public:
        result << "Normalized=" << bool_to_str(test_inputs.normalized) << "_";
        result << "sort_result_type=" << sort_res_type_str << "_";
        result << "decay_function=" << decay_function_str << "_";
-        result << "Format=" << fmt_to_str(std::get<1>(info.param));
+        result << "Format=" << fmt_to_str(std::get<1>(info.param)) << "_";
+        result << "Cached=" << bool_to_str(std::get<2>(info.param));

        if (!test_inputs.test_name.empty())
            result << "_TN=" << test_inputs.test_name;
@ -630,6 +635,12 @@ const std::vector<format::type> layout_formats = {format::bfyx,
                                                  format::bs_fs_yx_bsv32_fsv32,
                                                  format::bs_fs_yx_bsv32_fsv16};

+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+const std::vector<bool> run_caching_test = {false, true};
+#else
+const std::vector<bool> run_caching_test = {false};
+#endif
+
 #define INSTANTIATE_MATRIX_NMS_TEST_SUITE(input_type, func)                                                \
    using matrix_nms_gpu_test_##input_type##func = matrix_nms_gpu_test<input_type>;                        \
    TEST_P(matrix_nms_gpu_test_##input_type##func, test) {                                                 \
@ -637,7 +648,8 @@ const std::vector<format::type> layout_formats = {format::bfyx,
    }                                                                                                      \
    INSTANTIATE_TEST_SUITE_P(matrix_nms_test_##input_type##func,                                           \
                             matrix_nms_gpu_test_##input_type##func,                                       \
-                             testing::Combine(testing::Values(func()), testing::ValuesIn(layout_formats)), \
+                             testing::Combine(testing::Values(func()), testing::ValuesIn(layout_formats),  \
+                                              testing::ValuesIn(run_caching_test)),                        \
                             matrix_nms_gpu_test_##input_type##func::PrintToStringParamName);

 INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_smoke_inputs)
@ -668,6 +680,14 @@ INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_top_k_inputs)
 INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_single_box_inputs)
 INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_no_output_inputs)

+#ifndef RUN_ALL_MODEL_CACHING_TESTS
+INSTANTIATE_TEST_SUITE_P(matrix_nms_test_FLOAT16get_matrix_nms_smoke_inputs_cached,
+                         matrix_nms_gpu_test_FLOAT16get_matrix_nms_smoke_inputs,
+                         testing::Combine(testing::Values(get_matrix_nms_smoke_inputs()), testing::ValuesIn(layout_formats),
+                                          testing::Values(true)),
+                         matrix_nms_gpu_test_FLOAT16get_matrix_nms_smoke_inputs::PrintToStringParamName);
+#endif
+
 #undef INSTANTIATE_MATRIX_NMS_TEST_SUITE

 }  // namespace
--- a/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp
@ -57,6 +57,8 @@ struct MulticlassNmsParams {
    std::vector<T> expected_selected_outputs;
    std::vector<T_IND> expected_selected_indices;
    std::vector<T_IND> expected_selected_num;
+
+    bool is_caching_test;
 };

 template<typename T, typename T_IND>
@ -170,15 +172,16 @@ public:
            topology.add(reorder("multiclass_nms", input_info("multiclass_nms_reordered"), plain_format, data_type));
            ExecutionConfig config;
            config.set_property(ov::intel_gpu::optimize_data(false));
-            network network(engine, topology, config);

-            network.set_input_data("input_boxes", input_boxes);
-            network.set_input_data("input_scores", input_scores);
+            cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), param.is_caching_test);
+
+            network->set_input_data("input_boxes", input_boxes);
+            network->set_input_data("input_scores", input_scores);
            if (param.has_roisnum) {
-                network.set_input_data("input_roisnum", input_roisnum);
+                network->set_input_data("input_roisnum", input_roisnum);
            }

-            const auto outputs = network.execute();
+            const auto outputs = network->execute();

            const auto output_boxes = outputs.at("multiclass_nms").get_memory();
            const cldnn::mem_lock<T> output_boxes_ptr(output_boxes, get_test_stream());
@ -209,13 +212,17 @@ public:
                    get_test_stream());
            ASSERT_EQ(output_selected_num_ptr.size(), param.num_batches) << "format=" << fmt_to_str(target_format);

-            for (size_t i = 0; i < param.num_batches; ++i) {
-                ASSERT_EQ(param.expected_selected_num[i], output_selected_num_ptr[i])
-                                    << "format=" << fmt_to_str(target_format) << " i=" << i;
+            if (!param.is_caching_test) {
+                for (size_t i = 0; i < param.num_batches; ++i) {
+                    ASSERT_EQ(param.expected_selected_num[i], output_selected_num_ptr[i])
+                                        << "format=" << fmt_to_str(target_format) << " i=" << i;
+                }
            }

            for (size_t box = 0; box < dim; ++box) {
-                ASSERT_EQ(param.expected_selected_indices[box], output_selected_indices_ptr[box]) << "box=" << box;
+                if (!param.is_caching_test) {
+                    ASSERT_EQ(param.expected_selected_indices[box], output_selected_indices_ptr[box]) << "box=" << box;
+                }

                for (size_t j = 0; j < 6; ++j) {
                    const auto idx = box * 6 + j;
@ -266,7 +273,7 @@ TEST_P(multiclass_nms_test_blocked, basic) {
 }

 template<typename T, typename T_IND>
-std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
+std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams(bool is_caching_test = false) {
    std::vector<MulticlassNmsParams<T, T_IND>> params = {
        {"by_score",
         cldnn::multiclass_nms::sort_result_type::score,
@ -292,7 +299,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                       0.00, 0.90, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
                       -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
         std::vector<T_IND>{3, 0, 0, 3, -1, -1},
-         std::vector<T_IND>{4}},
+         std::vector<T_IND>{4},
+         is_caching_test},

        {"by_class_id",
         cldnn::multiclass_nms::sort_result_type::classid,
@ -306,7 +314,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                          1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
                          -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
            std::vector<T_IND>{3, 0, 0, 3, -1, -1},
-            std::vector<T_IND>{4}},
+            std::vector<T_IND>{4},
+            is_caching_test},

        {"three_inputs",
         cldnn::multiclass_nms::sort_result_type::score,
@ -346,7 +355,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                       -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
        std::vector<T_IND>{1, 0, -1, -1, -1, -1,
                           2, 3, -1, -1, -1, -1},
-        std::vector<T_IND>{2, 2}},
+        std::vector<T_IND>{2, 2},
+        is_caching_test},

        {"across_batches_by_score",
         cldnn::multiclass_nms::sort_result_type::score,
@ -384,7 +394,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                      -1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
                      -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
        std::vector<T_IND>{3, 0, 6, 0, -1, -1, 3, 9, 4, 5, -1, -1},
-        std::vector<T_IND>{4, 4}},
+        std::vector<T_IND>{4, 4},
+        is_caching_test},

        {"across_batches_by_class_id",
         cldnn::multiclass_nms::sort_result_type::classid,
@ -423,7 +434,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                      -1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
                      -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
         std::vector<T_IND>{3, 0, 0, 3, -1, -1, 4, 5, 6, 9, -1, -1},
-         std::vector<T_IND>{4, 4}},
+         std::vector<T_IND>{4, 4},
+         is_caching_test},

        {"normalized",
         cldnn::multiclass_nms::sort_result_type::score,
@ -449,7 +461,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
         getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 1.00,
                       1.00, 0.00, 0.00, 0.00, 0.75, 0.00, 0.10, 1.00, 1.10}),
         std::vector<T_IND>{3, 0, 1},
-         std::vector<T_IND>{3}},
+         std::vector<T_IND>{3},
+         is_caching_test},

        {"identical_boxes",
         cldnn::multiclass_nms::sort_result_type::score,
@ -477,7 +490,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                       -1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
                       -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
        std::vector<T_IND>{0, -1, -1},
-        std::vector<T_IND>{1}},
+        std::vector<T_IND>{1},
+        is_caching_test},

        {"limit_output_size",
         cldnn::multiclass_nms::sort_result_type::score,
@ -501,7 +515,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
         std::vector<T_IND>{},
         getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
         std::vector<T_IND>{3, 0},
-         std::vector<T_IND>{2}},
+         std::vector<T_IND>{2},
+         is_caching_test},

        {"single_box",
         cldnn::multiclass_nms::sort_result_type::score,
@ -525,7 +540,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {

         getValues<T>({0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
         std::vector<T_IND>{0},
-         std::vector<T_IND>{1}},
+         std::vector<T_IND>{1},
+         is_caching_test},

        {"iou_threshold",
         cldnn::multiclass_nms::sort_result_type::score,
@ -551,7 +567,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
         getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00,
                       0.00, 1.00, 1.00, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
        std::vector<T_IND>{3, 0, -1},
-        std::vector<T_IND>{2}},
+        std::vector<T_IND>{2},
+        is_caching_test},

        {"iou_and_score_thresholds",
         cldnn::multiclass_nms::sort_result_type::score,
@ -577,7 +594,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
         getValues<T>({0.00, 0.96, 0.00, 10.00, 1.00, 11.00, -1.0, -1.0, -1.0,
                       -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
         std::vector<T_IND>{3, -1, -1},
-         std::vector<T_IND>{1}},
+         std::vector<T_IND>{1},
+         is_caching_test},

        {"no_output",
         cldnn::multiclass_nms::sort_result_type::score,
@ -607,7 +625,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                      -1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
                      -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
         std::vector<T_IND>{-1, -1, -1, -1, -1, -1},
-         std::vector<T_IND>{0}},
+         std::vector<T_IND>{0},
+         is_caching_test},

        {"background_class",
         cldnn::multiclass_nms::sort_result_type::classid,
@ -648,7 +667,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                      1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
                      -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
        std::vector<T_IND>{0, 3, -1, 6, 9, -1},
-        std::vector<T_IND>{2, 2}},
+        std::vector<T_IND>{2, 2},
+        is_caching_test},

        {"keep_top_k",
         cldnn::multiclass_nms::sort_result_type::classid,
@ -681,7 +701,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                       1.00, 0.95, 0.00, 0.00, 1.00, 1.00,
                       1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),
        std::vector<T_IND>{3, 0, 0, 4, 6, 9},
-        std::vector<T_IND>{3, 3}},
+        std::vector<T_IND>{3, 3},
+        is_caching_test},

        {"normalized_by_classid",
         cldnn::multiclass_nms::sort_result_type::classid,
@ -735,14 +756,15 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
                            -1, -1, -1, -1, -1, -1,
                            2, 4, 5, 6, 9, 11,
                            -1, -1, -1, -1, -1, -1},
-         std::vector<T_IND>{6, 6}},
+         std::vector<T_IND>{6, 6},
+         is_caching_test},
    };

    return params;
 }

 template<typename T, typename T_IND>
-std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout() {
+std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout(bool is_caching_test = false) {
    MulticlassNmsParams<T, T_IND> param = {
        "blocked_format_three_inputs",
        cldnn::multiclass_nms::sort_result_type::score,
@ -798,7 +820,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout() {
        std::vector<T_IND>{1, 0, -1, -1, -1, -1,
                           2, 3, -1, -1, -1, -1},
        std::vector<T_IND>{2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        is_caching_test
    };

    const auto indices_size = param.num_batches * param.num_boxes;
@ -829,8 +852,24 @@ INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test,
                     PrintToStringParamName());

 INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_blocked,
-                     multiclass_nms_test_f32_i32,
+                     multiclass_nms_test_blocked,
                     ::testing::ValuesIn(getParamsForBlockedLayout<float, int32_t>()),
                     PrintToStringParamName());

+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_cached,
+                     multiclass_nms_test_f32_i32,
+                     ::testing::ValuesIn(getMulticlassNmsParams<float, int32_t>(true)),
+                     PrintToStringParamName());
+
+INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_cached,
+                     multiclass_nms_test_f16_i64,
+                     ::testing::ValuesIn(getMulticlassNmsParams<half_t, int64_t>(true)),
+                     PrintToStringParamName());
+#endif
+INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_blocked_cached,
+                     multiclass_nms_test_blocked,
+                     ::testing::ValuesIn(getParamsForBlockedLayout<float, int32_t>(true)),
+                     PrintToStringParamName());
+
 };  // namespace
--- a/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp
@ -104,112 +104,80 @@ void mvn_compute_mean_within_channels(cldnn::memory::ptr output, bool normalize_
    }
 }

-TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx) {
+template <typename T>
+void test_mvn_test_across_channels_outside_sqrt_bfyx(bool is_caching_test) {
    // mvn across channels fp32 test with normalize_variance set to false
    using namespace cldnn;
    using namespace ::tests;

    auto& engine = get_test_engine();

-    auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}});
+    cldnn::data_types input_data_type = std::is_same<T, FLOAT16>::value ? data_types::f16 : data_types::f32;

-    tests::set_random_values<float>(input, true, 8, 100);
+    auto input = engine.allocate_memory({input_data_type, format::bfyx, {7, 10, 17, 13}});
+
+    tests::set_random_values<T>(input, true, 8, 100);

    topology topology;
    topology.add(input_layout("input", input->get_layout()));
    topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, true));

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    network.set_input_data("input", input);
+    network->set_input_data("input", input);

-    auto outputs = network.execute();
+    auto outputs = network->execute();
    ASSERT_EQ(outputs.size(), size_t(1));
    ASSERT_EQ(outputs.begin()->first, "mvn");

    auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_across_channels<float>(output, false);
+    mvn_compute_mean_across_channels<T>(output, false);
+}
+
+TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx) {
+    test_mvn_test_across_channels_outside_sqrt_bfyx<float>(false);
+}
+
+template <typename T>
+void test_mvn_test_across_channels_inside_sqrt_bfyx(bool is_caching_test) {
+    // mvn across channels fp32 test with normalize_variance set to false
+    using namespace cldnn;
+    using namespace tests;
+
+    auto& engine = get_test_engine();
+
+    cldnn::data_types input_data_type = std::is_same<T, FLOAT16>::value ? data_types::f16 : data_types::f32;
+
+    auto input = engine.allocate_memory({input_data_type, format::bfyx, {7, 10, 17, 13}});
+
+    tests::set_random_values<T>(input, true, 8, 100);
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
+
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+
+    network->set_input_data("input", input);
+
+    auto outputs = network->execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "mvn");
+
+    auto output = outputs.begin()->second.get_memory();
+    mvn_compute_mean_across_channels<T>(output, false);
 }

 TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx) {
-    // mvn across channels fp32 test with normalize_variance set to false
-    using namespace cldnn;
-    using namespace tests;
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}});
-
-    tests::set_random_values<float>(input, true, 8, 100);
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-    ASSERT_EQ(outputs.size(), size_t(1));
-    ASSERT_EQ(outputs.begin()->first, "mvn");
-
-    auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_across_channels<float>(output, false);
+    test_mvn_test_across_channels_inside_sqrt_bfyx<float>(false);
 }

-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_outside_sqrt_fp16) {
-    // mvn across channels fp16 test with normalize_variance set to false
-    using namespace cldnn;
-    using namespace ::tests;
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}});
-
-    tests::set_random_values<FLOAT16>(input, true, 8, 100);
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, true));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-    ASSERT_EQ(outputs.size(), size_t(1));
-    ASSERT_EQ(outputs.begin()->first, "mvn");
-
-    auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_across_channels<FLOAT16>(output, false);
+TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_fp16) {
+    test_mvn_test_across_channels_outside_sqrt_bfyx<FLOAT16>(false);
 }

 TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_fp16) {
-    // mvn across channels fp16 test with normalize_variance set to false
-    using namespace cldnn;
-    using namespace tests;
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}});
-
-    tests::set_random_values<FLOAT16>(input, true, 8, 100);
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-    ASSERT_EQ(outputs.size(), size_t(1));
-    ASSERT_EQ(outputs.begin()->first, "mvn");
-
-    auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_across_channels<FLOAT16>(output, false);
+    test_mvn_test_across_channels_inside_sqrt_bfyx<FLOAT16>(false);
 }

 TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance) {
@ -666,7 +634,7 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
        }
    }

-    void execute(const mvn_basic_test_params& params, engine& eng) {
+    void execute(const mvn_basic_test_params& params, engine& eng, bool is_caching_test) {
        auto& size = params.input_size;
        auto& output_pad = params.output_pad;

@ -695,11 +663,11 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
        prim.output_paddings = {output_pad};
        topo.add(prim);

-        network net(eng, topo);
+        cldnn::network::ptr net = get_network(eng, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-        net.set_input_data("input", input);
+        net->set_input_data("input", input);

-        auto outputs = net.execute();
+        auto outputs = net->execute();
        ASSERT_EQ(outputs.size(), size_t(1));
        ASSERT_EQ(outputs.begin()->first, "mvn");

@ -710,7 +678,7 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {

 TEST_P(mvn_random_test, random) {
    auto& engine = tests::get_test_engine();
-    this->execute(GetParam(), engine);
+    this->execute(GetParam(), engine, false);
 }

 struct mvn_test_case_generator : std::vector<mvn_basic_test_params> {
@ -857,7 +825,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
        }
    }

-    void execute(const mvn_basic_test_params& params) {
+    void execute(const mvn_basic_test_params& params, bool is_caching_test) {
        auto& size = params.input_size;
        auto& output_pad = params.output_pad;
        auto& engine = get_test_engine();
@ -888,10 +856,11 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
        config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"mvn"}));
        config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn", {format::type::bfyx, "mvn_gpu_bfyx_opt"}} }));

-        network net(engine, topo, config);
-        net.set_input_data("input", input);
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

-        auto outputs = net.execute();
+        net->set_input_data("input", input);
+
+        auto outputs = net->execute();
        auto output = outputs.at("mvn").get_memory();

        topology topo_opt;
@ -904,10 +873,11 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
        config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"mvn_opt", "input_to_target_layout"}));
        config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn_opt", {params.input_format, "mvn_gpu_b_fs_yx_fsv16_imad"}} }));

-        network net_opt(engine, topo_opt, config_opt);
-        net_opt.set_input_data("input", input);
+        cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);

-        auto outputs_opt = net_opt.execute();
+        net_opt->set_input_data("input", input);
+
+        auto outputs_opt = net_opt->execute();
        auto output_opt = outputs_opt.at("mvn_opt").get_memory();

        auto output_dtype = output->get_layout().data_type;
@ -933,7 +903,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
 };

 TEST_P(mvn_random_test_bsv32, random) {
-    this->execute(GetParam());
+    this->execute(GetParam(), false);
 }

 struct mvn_test_case_generator_bsv32 : std::vector<mvn_basic_test_params> {
@ -964,3 +934,29 @@ INSTANTIATE_TEST_SUITE_P(mvn_fsv16,
                        mvn_random_test_bsv32,
                        testing::ValuesIn(mvn_test_case_generator_bsv32()
                                              .bsv32_tests(format::b_fs_yx_fsv16, data_types::i8)));
+
+TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_cached) {
+    test_mvn_test_across_channels_outside_sqrt_bfyx<float>(true);
+}
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_cached) {
+    test_mvn_test_across_channels_inside_sqrt_bfyx<float>(true);
+}
+
+TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_fp16_cached) {
+    test_mvn_test_across_channels_outside_sqrt_bfyx<FLOAT16>(true);
+}
+
+TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_fp16_cached) {
+    test_mvn_test_across_channels_inside_sqrt_bfyx<FLOAT16>(true);
+}
+
+TEST_P(mvn_random_test, random_cached) {
+    auto& engine = tests::get_test_engine();
+    this->execute(GetParam(), engine, true);
+}
+
+TEST_P(mvn_random_test_bsv32, random_cached) {
+    this->execute(GetParam(), true);
+}
+#endif
--- a/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp
@ -128,24 +128,7 @@ struct non_max_suppression_basic : public testing::Test {
        ExecutionConfig config;
        config.set_property(ov::intel_gpu::optimize_data(true));

-        cldnn::network::ptr net;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
-                net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net = std::make_shared<cldnn::network>(engine, topo, config);
-        }
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        auto boxes_mem = this->get_boxes_memory(engine);
        auto scores_mem = this->get_scores_memory(engine);
@ -206,24 +189,7 @@ struct non_max_suppression_basic : public testing::Test {
        ExecutionConfig config;
        config.set_property(ov::intel_gpu::optimize_data(true));

-        cldnn::network::ptr net;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
-                net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net = std::make_shared<cldnn::network>(engine, topo, config);
-        }
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        auto boxes_mem = this->get_boxes_memory(engine);
        auto scores_mem = this->get_scores_memory(engine);
@ -294,24 +260,7 @@ struct non_max_suppression_basic : public testing::Test {
        ExecutionConfig config;
        config.set_property(ov::intel_gpu::optimize_data(true));

-        cldnn::network::ptr net;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
-                net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net = std::make_shared<cldnn::network>(engine, topo, config);
-        }
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        auto boxes_mem = this->get_boxes_memory(engine);
        auto scores_mem = this->get_scores_memory(engine);
@ -430,24 +379,7 @@ struct non_max_suppression_basic : public testing::Test {
        config.set_property(ov::intel_gpu::optimize_data(true));
        config.set_property(ov::intel_gpu::allow_new_shape_infer(true));

-        cldnn::network::ptr net;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
-                net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net = std::make_shared<cldnn::network>(engine, topo, config);
-        }
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        auto boxes_mem = this->get_boxes_memory(engine);
        auto scores_mem = this->get_scores_memory(engine);
@ -556,24 +488,7 @@ struct non_max_suppression_basic : public testing::Test {
        ExecutionConfig config;
        config.set_property(ov::intel_gpu::optimize_data(true));

-        cldnn::network::ptr net;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
-                net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net = std::make_shared<cldnn::network>(engine, topo, config);
-        }
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        auto boxes_mem = this->get_boxes_memory(engine);
        auto scores_mem = this->get_scores_memory(engine);
@ -630,24 +545,7 @@ struct non_max_suppression_basic : public testing::Test {
        ExecutionConfig config;
        config.set_property(ov::intel_gpu::optimize_data(true));

-        cldnn::network::ptr net;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
-                net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net = std::make_shared<cldnn::network>(engine, topo, config);
-        }
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        auto boxes_mem = this->get_boxes_memory(engine);
        auto scores_mem = this->get_scores_memory(engine);
@ -708,24 +606,7 @@ struct non_max_suppression_basic : public testing::Test {
        ExecutionConfig config;
        config.set_property(ov::intel_gpu::optimize_data(true));

-        cldnn::network::ptr net;
-
-        if (is_caching_test) {
-            membuf mem_buf;
-            {
-                cldnn::network _network(engine, topo, config);
-                std::ostream out_mem(&mem_buf);
-                BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
-                _network.save(ob);
-            }
-            {
-                std::istream in_mem(&mem_buf);
-                BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
-                net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
-            }
-        } else {
-            net = std::make_shared<cldnn::network>(engine, topo, config);
-        }
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        auto boxes_mem = this->get_boxes_memory(engine);
        auto scores_mem = this->get_scores_memory(engine);
--- a/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp
@ -54,6 +54,57 @@ struct normalize_basic : public testing::Test {
        return inputVals;
    }

+    void execute(bool is_caching_test) {
+        //  Input  : 1x2x3x3
+        //  Output : 1x2x3x3
+        auto& engine = get_test_engine();
+        const unsigned b = 1;
+        const unsigned f = 2;
+        const unsigned y = 3;
+        const unsigned x = 3;
+
+        auto input = engine.allocate_memory({this->data_type, format::bfyx, {b, f, y, x}});
+        auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}});
+
+        auto inputVals = this->get_input_values(b, f, y, x);
+        std::vector<float> weightVals(f);
+        for (auto& it : weightVals) {
+            it = 1.f;
+        }
+
+        set_values(input, inputVals);
+        set_values(weights, weightVals);
+
+        topology topology;
+        topology.add(input_layout("Input0", input->get_layout()));
+        topology.add(data("Input1", weights));
+        topology.add(reorder("reordered_Input0", input_info("Input0"), this->format, this->data_type));
+        topology.add(reorder("reordered_Input1", input_info("Input1"), this->format, data_types::f32));
+        topology.add(normalize("normalize2", input_info("reordered_Input0"), "reordered_Input1", this->across_spatial));
+        topology.add(reorder("plane_normalize2", input_info("normalize2"), format::bfyx, this->output_data_type));
+
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+
+        network->set_input_data("Input0", input);
+
+        auto outputs = network->execute();
+
+        auto output = outputs.at("plane_normalize2").get_memory();
+        if (this->data_type == data_types::f16) {
+            cldnn::mem_lock<half_t> output_ptr(output, get_test_stream());
+            auto expected_results = this->get_expected_result();
+            for (size_t i = 0; i < expected_results.size(); ++i) {
+                ASSERT_NEAR(expected_results[i], output_ptr[i], 0.001);
+            }
+        } else {
+            cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+            auto expected_results = this->get_expected_result();
+            for (size_t i = 0; i < expected_results.size(); ++i) {
+                ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i]));
+            }
+        }
+    }
+
 private:
    static const std::vector<output_type> get_expected_result(std::true_type) {
        static const std::vector<float> result = {0.f,
@ -144,52 +195,23 @@ using format_types = testing::Types<normalize_input_types<format::bfyx, float, f
 TYPED_TEST_SUITE(normalize_basic, format_types);

 TYPED_TEST(normalize_basic, basic) {
-    //  Input  : 1x2x3x3
-    //  Output : 1x2x3x3
-    auto& engine = get_test_engine();
-    const unsigned b = 1;
-    const unsigned f = 2;
-    const unsigned y = 3;
-    const unsigned x = 3;
-
-    auto input = engine.allocate_memory({this->data_type, format::bfyx, {b, f, y, x}});
-    auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}});
-
-    auto inputVals = this->get_input_values(b, f, y, x);
-    std::vector<float> weightVals(f);
-    for (auto& it : weightVals) {
-        it = 1.f;
-    }
-
-    set_values(input, inputVals);
-    set_values(weights, weightVals);
-
-    topology topology;
-    topology.add(input_layout("Input0", input->get_layout()));
-    topology.add(data("Input1", weights));
-    topology.add(reorder("reordered_Input0", input_info("Input0"), this->format, this->data_type));
-    topology.add(reorder("reordered_Input1", input_info("Input1"), this->format, data_types::f32));
-    topology.add(normalize("normalize2", input_info("reordered_Input0"), "reordered_Input1", this->across_spatial));
-    topology.add(reorder("plane_normalize2", input_info("normalize2"), format::bfyx, this->output_data_type));
-
-    network network(engine, topology);
-
-    network.set_input_data("Input0", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("plane_normalize2").get_memory();
-    if (this->data_type == data_types::f16) {
-        cldnn::mem_lock<half_t> output_ptr(output, get_test_stream());
-        auto expected_results = this->get_expected_result();
-        for (size_t i = 0; i < expected_results.size(); ++i) {
-            ASSERT_NEAR(expected_results[i], output_ptr[i], 0.001);
-        }
-    } else {
-        cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-        auto expected_results = this->get_expected_result();
-        for (size_t i = 0; i < expected_results.size(); ++i) {
-            ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i]));
-        }
-    }
+    this->execute(false);
 }
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TYPED_TEST(normalize_basic, basic_cached) {
+    this->execute(true);
+}
+#else
+template <typename NormalizeInput>
+struct normalize_basic_cached : public normalize_basic<NormalizeInput> {
+};
+
+using format_types_cached = testing::Types<normalize_input_types<format::bfyx, float, false>>;
+
+TYPED_TEST_SUITE(normalize_basic_cached, format_types_cached);
+
+TYPED_TEST(normalize_basic_cached, basic) {
+    this->execute(true);
+}
+#endif
--- a/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp
@ -66,7 +66,7 @@ VVVVF<T> one_hot_cpu(VVVVF<T> &input, uint16_t axis,

 template <typename T>
 void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, tensor shape,
-    uint16_t one_hot_axis, int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0, int output_padding_x = 0) {
+    uint16_t one_hot_axis, int input_padding_y, int input_padding_x, int output_padding_y, int output_padding_x, bool is_caching_test) {
    std::vector<tensor::value_type> output_dims = { shape.batch[0], shape.feature[0],
        shape.spatial[1], shape.spatial[0] };
    int32_t one_hot_limit = output_dims[one_hot_axis];
@ -84,9 +84,9 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp
    topology.add(input_layout("input", input->get_layout()));
    topology.add(one_hot("output", input_info("input"), shape, one_hot_axis, one_hot_limit));

-    network network(engine, topology);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();
    ASSERT_EQ(outputs.size(), size_t(1));
    ASSERT_EQ(outputs.begin()->first, "output");

@ -130,17 +130,33 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp
 }

 TEST(one_hot_gpu_i32, generic) {
-    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0);
-    generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1);
-    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2);
-    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3);
+    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, false);
+    generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, false);
+    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, false);
+    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, false);
 }

 TEST(one_hot_gpu_i64, generic) {
-    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0);
-    generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1);
-    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2);
-    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3);
+    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, false);
+    generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, false);
+    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, false);
+    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, false);
+}
+
+TEST(one_hot_gpu_i32, generic_cached) {
+    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, true);
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+    generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, true);
+    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, true);
+    generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, true);
+}
+
+TEST(one_hot_gpu_i64, generic_cached) {
+    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, true);
+    generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, true);
+    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, true);
+    generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, true);
+#endif
 }

 TEST(one_hot_gpu_i32, bfzyx_ax4) {
--- a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp
@ -1627,7 +1627,7 @@ public:
    template<data_types Data_Type>
    void run_test(const std::vector<cldnn::tensor::value_type>& sizes, cldnn::format format_fsv,
                  const std::string & permute_opt = "permute_tile_8x8_4x4_fsv",
-                  std::vector<uint16_t> permute_order = {});
+                  std::vector<uint16_t> permute_order = {}, bool is_caching_test = false);
 };

 template<>
@ -1654,7 +1654,7 @@ void TiledPermuteTest::set_random_values<int8_t>(const cldnn::memory::ptr mem) c

 template<data_types Data_Type>
 void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& sizes, cldnn::format format_fsv,
-                                const std::string & permute_opt, std::vector<uint16_t> permute_order)
+                                const std::string & permute_opt, std::vector<uint16_t> permute_order, bool is_caching_test)
 {
    // convert half_t to FLOAT16
    using type_ = typename data_type_to_type<Data_Type>::type;
@ -1690,9 +1690,9 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
    ov::intel_gpu::ImplementationDesc permute_ref = { format_fsv, "permute_ref" };
    config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_ref} }));

-    cldnn::network network_ref(engine, topology_ref, config_ref);
-    network_ref.set_input_data("input", input);
-    auto outputs_ref = network_ref.execute();
+    cldnn::network::ptr network_ref = get_network(engine, topology_ref, config_ref, get_test_stream_ptr(), is_caching_test);
+    network_ref->set_input_data("input", input);
+    auto outputs_ref = network_ref->execute();
    auto output_ref = outputs_ref.begin()->second.get_memory();
    cldnn::mem_lock<type> output_ref_ptr(output_ref, get_test_stream());

@ -1701,9 +1701,9 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
    ov::intel_gpu::ImplementationDesc permute_tile_opt = { format_fsv, permute_opt };
    config_tile.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_tile_opt} }));

-    cldnn::network network_tile(engine, topology_ref, config_tile);
-    network_tile.set_input_data("input", input);
-    auto outputs_tile = network_tile.execute();
+    cldnn::network::ptr network_tile = get_network(engine, topology_ref, config_tile, get_test_stream_ptr(), is_caching_test);
+    network_tile->set_input_data("input", input);
+    auto outputs_tile = network_tile->execute();
    auto output_tile = outputs_tile.begin()->second.get_memory();
    cldnn::mem_lock<type> output_tile_ptr(output_tile, get_test_stream());

@ -1920,3 +1920,59 @@ TEST_P(permute_bfzyx_to_bfyxz, combined) {
    auto p = GetParam();
    run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_bfzyx_to_bfyxz", {0, 1, 3, 4, 2});
 }
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(permute_tile_fsv_4d, f16_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::f16>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_tile_fsv_4d, f32_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_tile_fsv_4d, i8_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::i8>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_tile_fsv_4d, i32_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::i32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_tile_fsv_4d, i64_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::i64>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_tile_fsv_5d, f16_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::f16>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_tile_fsv_5d, f32_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_tile_fsv_5d, i8_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::i8>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_tile_fsv_5d, i32_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::i32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
+
+TEST_P(permute_bfzyx_to_bfyxz, combined_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_bfzyx_to_bfyxz", {0, 1, 3, 4, 2}, true);
+}
+#endif
+TEST_P(permute_tile_fsv_5d, i64_cached) {
+    auto p = GetParam();
+    run_test<cldnn::data_types::i64>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp
@ -1933,11 +1933,12 @@ public:
        return "pool";
    }

-    virtual void run_expect(const VVVVVF<output_t>& expected) {
+    virtual void run_expect(const VVVVVF<output_t>& expected, bool is_caching_test) {
        auto& eng = get_test_engine();
        auto topo = build_topology(eng);
        ExecutionConfig config(ov::intel_gpu::optimize_data(true));
-        cldnn::network net(eng, topo, config);
+
+        cldnn::network::ptr net = get_network(eng, topo, config, get_test_stream_ptr(), is_caching_test);

        auto input_size = tensor(batch(batch_num()), feature(input_features()), spatial(input_x(), input_y(), input_z()));
        auto input_lay = layout(input_type(),
@ -1956,20 +1957,22 @@ public:
                        }
        set_values(input_mem, input_flat);

-        net.set_input_data("input", input_mem);
-        auto result = net.execute();
+        net->set_input_data("input", input_mem);
+        auto result = net->execute();
        auto out_mem = result.at(output_id()).get_memory();
        auto out_lay = out_mem->get_layout();
        cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());

-        std::string kernel;
-        for (auto i : net.get_primitives_info()) {
-            if (i.original_id == "pool") {
-                kernel = i.kernel_id;
+        if (!is_caching_test) {
+            std::string kernel;
+            for (auto i : net->get_primitives_info()) {
+                if (i.original_id == "pool") {
+                    kernel = i.kernel_id;
+                }
            }
+            std::cout << kernel << std::endl;
+            SCOPED_TRACE("\nkernel: " + kernel);
        }
-        std::cout << kernel << std::endl;
-        SCOPED_TRACE("\nkernel: " + kernel);

        ASSERT_EQ(out_lay.data_type, output_type());
        ASSERT_EQ(out_lay.batch(), expected.size());
@ -2115,10 +2118,10 @@ public:
        this->set_offsets(o_x, o_y, o_z);
    }

-    void run_random(const pooling_random_test_params& params) {
+    void run_random(const pooling_random_test_params& params, bool is_caching_test) {
        param_set_up(params);
        auto reference = calculate_reference();
-        ASSERT_NO_FATAL_FAILURE(this->run_expect(reference));
+        ASSERT_NO_FATAL_FAILURE(this->run_expect(reference, is_caching_test));
    }
 };

@ -2131,22 +2134,22 @@ struct pooling_random_test : public testing::TestWithParam<pooling_random_test_p

 TEST_P(pooling_random_test, max_i8) {
    auto test_case = max_pooling_i8_random_test();
-    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
 }

 TEST_P(pooling_random_test, max_u8) {
    auto test_case = max_pooling_u8_random_test();
-    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
 }

 TEST_P(pooling_random_test, avg_i8) {
    auto test_case = avg_pooling_i8_random_test();
-    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
 }

 TEST_P(pooling_random_test, avg_u8) {
    auto test_case = avg_pooling_u8_random_test();
-    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
 }

 INSTANTIATE_TEST_SUITE_P(
@ -2243,22 +2246,22 @@ using pooling_random_test_fp16_fp32 = pooling_random_test;

 TEST_P(pooling_random_test_fp16_fp32, avg_fp16) {
    auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::average>();
-    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
 }

 TEST_P(pooling_random_test_fp16_fp32, max_fp16) {
    auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::max>();
-    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
 }

 TEST_P(pooling_random_test_fp16_fp32, avg_fp32) {
    auto test_case = pooling_random_test_base<float, pooling_mode::average>();
-    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
 }

 TEST_P(pooling_random_test_fp16_fp32, max_fp32) {
    auto test_case = pooling_random_test_base<float, pooling_mode::max>();
-    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
 }

 INSTANTIATE_TEST_SUITE_P(
@ -3249,3 +3252,44 @@ TEST(pooling_forward_gpu_onednn, basic_max_pooling_int8) {
 }

 #endif   // ENABLE_ONEDNN_FOR_GPU
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(pooling_random_test, max_i8_cached) {
+    auto test_case = max_pooling_i8_random_test();
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
+}
+
+TEST_P(pooling_random_test, max_u8_cached) {
+    auto test_case = max_pooling_u8_random_test();
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
+}
+
+TEST_P(pooling_random_test, avg_i8_cached) {
+    auto test_case = avg_pooling_i8_random_test();
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
+}
+
+TEST_P(pooling_random_test, avg_u8_cached) {
+    auto test_case = avg_pooling_u8_random_test();
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
+}
+
+TEST_P(pooling_random_test_fp16_fp32, avg_fp16_cached) {
+    auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::average>();
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
+}
+
+TEST_P(pooling_random_test_fp16_fp32, max_fp16_cached) {
+    auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::max>();
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
+}
+
+TEST_P(pooling_random_test_fp16_fp32, avg_fp32_cached) {
+    auto test_case = pooling_random_test_base<float, pooling_mode::average>();
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
+}
+#endif  // RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(pooling_random_test_fp16_fp32, max_fp32_cached) {
+    auto test_case = pooling_random_test_base<float, pooling_mode::max>();
+    ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
+}
--- a/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp
@ -40,7 +40,7 @@ using prior_box_param = std::tuple<format,                       // Input and ou
 template <class InputType, class OutputType>
 class PriorBoxGPUTest : public ::testing::TestWithParam<prior_box_param<InputType, OutputType>> {
 public:
-    void SetUp() override {
+    void execute(bool is_caching_test) {
        const auto input_data_type = type_to_data_type<InputType>::value;
        const auto output_data_type = type_to_data_type<OutputType>::value;
        const auto plain_format = format::bfyx;
@ -92,8 +92,10 @@ public:

        ExecutionConfig config;
        config.set_property(ov::intel_gpu::optimize_data(false));
-        network network(engine, topo, config);
-        const auto outputs = network.execute();
+
+        cldnn::network::ptr network = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
+
+        const auto outputs = network->execute();
        const auto output = outputs.at("prior_box").get_memory();

        cldnn::mem_lock<OutputType> output_ptr(output, get_test_stream());
@ -107,7 +109,9 @@ public:
 };

 using prior_box_test_i32_f32 = PriorBoxGPUTest<int32_t, float>;
-TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32) {}
+TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32) {
+    this->execute(false);
+}

 INSTANTIATE_TEST_SUITE_P(
        prior_box_test_all_formats,
@ -261,4 +265,37 @@ INSTANTIATE_TEST_SUITE_P(
            0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
            0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1
        })));
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32_cached) {
+    this->execute(true);
+}
+#else
+using prior_box_test_i32_f32_cached = PriorBoxGPUTest<int32_t, float>;
+TEST_P(prior_box_test_i32_f32_cached, prior_box_test_i32_f32) {
+    this->execute(true);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+        prior_box_test_four_variances,
+        prior_box_test_i32_f32_cached,
+        testing::Combine(
+        testing::Values(format::bfyx),
+        testing::Values(std::vector<int32_t>{2, 2}),
+        testing::Values(std::vector<int32_t>{10, 10}),
+        testing::Values(
+            prior_box_attributes{{2.0f}, {5.0f}, {1.5f}, {}, {}, {}, false, false, 0.0f, 0.0f, {0.1, 0.2, 0.3, 0.4}, true, true}),
+        testing::Values(std::vector<float>{
+            0.15, 0.15, 0.35, 0.35, 0.0918861, 0.0918861, 0.408114, 0.408114, 0.127526, 0.16835, 0.372474, 0.33165,
+            0.65, 0.15, 0.85, 0.35,
+            0.591886, 0.0918861, 0.908114, 0.408114, 0.627526, 0.16835, 0.872474, 0.33165, 0.15, 0.65, 0.35, 0.85,
+            0.0918861, 0.591886, 0.408114, 0.908114,
+            0.127526, 0.66835, 0.372474, 0.83165, 0.65, 0.65, 0.85, 0.85, 0.591886, 0.591886, 0.908114, 0.908114,
+            0.627526, 0.66835, 0.872474, 0.83165,
+            0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
+            0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
+            0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
+            0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4
+        })));
+#endif
 }  // namespace
--- a/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/propagate_constants_gpu_test.cpp
@ -15,7 +15,8 @@ using namespace ::tests;

 //We expect additional reorder to be added in between "weights1" and "reshape1".
 //This situation should be handled properly by propagate constants optimization phase
-TEST(propagate_constants, copy_dependecies_from_nodes) {
+template <typename T>
+void test_copy_dependecies_from_nodes(bool is_caching_test) {
    auto& engine = get_test_engine();
    ExecutionConfig config;
    config.set_property(ov::intel_gpu::optimize_data(true));
@ -24,8 +25,8 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
    auto weights1 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 1 } });
    auto weights2 = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 1, 1, 2 } });

-    set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) });
-    set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) });
+    set_values(input, { T(1.1f), T(1.2f), T(1.3f), T(1.4f) });
+    set_values(weights1, { T(2.1f), T(3.1f) });
    set_values(weights2, { 1.1f, 0.1f });

    topology topology;
@ -37,10 +38,10 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
    topology.add(reorder("reorder1", input_info("reshape1"), layout(data_types::f32, format::byxf, tensor(4))));
    topology.add(concatenation("concat", { input_info("reorder1"), input_info("weights2") }, 3));
    topology.add(convolution("conv2", { input_info("reorder2") }, { "concat" }));
-    network network(engine, topology, config);
-    network.set_input_data("input", input);
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);

-    auto outputs = network.execute();
+    auto outputs = network->execute();

    float epsilon = 1e-2f;
    for (auto& it : outputs) {
@ -48,3 +49,11 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
        ASSERT_NEAR(7.8f, output[0], epsilon);
    }
 }
+
+TEST(propagate_constants, copy_dependecies_from_nodes) {
+    test_copy_dependecies_from_nodes<FLOAT16>(false);
+}
+
+TEST(propagate_constants, copy_dependecies_from_nodes_cached) {
+    test_copy_dependecies_from_nodes<FLOAT16>(true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp
@ -15,116 +15,123 @@ template <typename T>
 struct pyramid_roi_align_typed_test : testing::Test {
    static const data_types data_type = type_to_data_type<T>::value;
    using Type = T;
+
+    void execute(bool is_caching_test) {
+        auto& engine = get_test_engine();
+
+        const int rois_num = 3;
+        const int output_size = 2;
+        const int sampling_points = 2;
+        const int starting_level = 2;
+        const int P2_scale = 1;
+        const int P3_scale = 2;
+        const int P4_scale = 4;
+        const int P5_scale = 8;
+        const int P2_size = 8;
+        const int P3_size = P2_size * P2_scale / P3_scale;
+        const int P4_size = P2_size * P2_scale / P4_scale;
+        const int P5_size = P2_size * P2_scale / P5_scale;
+
+        std::vector<Type> rois_data = {
+            Type(0.f), Type(0.f), Type(1.f), Type(1.f),
+            Type(0.f), Type(0.f), Type(0.5f), Type(0.5f),
+            Type(0.5f), Type(0.5f), Type(0.75f), Type(0.75f)
+        };
+
+        std::vector<Type> P2_data = {
+            Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
+            Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
+            Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
+            Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
+            Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
+            Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
+            Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
+            Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
+        };
+
+        std::vector<Type> P3_data = {
+            Type(9.f), Type(13.f), Type(17.f), Type(21.f),
+            Type(9.f), Type(13.f), Type(17.f), Type(21.f),
+            Type(9.f), Type(13.f), Type(17.f), Type(21.f),
+            Type(9.f), Type(13.f), Type(17.f), Type(21.f),
+        };
+
+        std::vector<Type> P4_data = {
+        Type(11.f), Type(19.f),
+        Type(11.f), Type(19.f),
+        };
+
+        std::vector<Type> P5_data = {
+            Type(15.f)
+        };
+
+        auto rois_lay = layout(this->data_type, format::bfyx, tensor(batch(rois_num), feature(4)));
+        auto P2_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P2_size, P2_size));
+        auto P3_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P3_size, P3_size));
+        auto P4_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P4_size, P4_size));
+        auto P5_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P5_size, P5_size));
+
+        auto rois_mem = engine.allocate_memory(rois_lay);
+        auto P2_mem = engine.allocate_memory(P2_lay);
+        auto P3_mem = engine.allocate_memory(P3_lay);
+        auto P4_mem = engine.allocate_memory(P4_lay);
+        auto P5_mem = engine.allocate_memory(P5_lay);
+
+        tests::set_values(rois_mem, rois_data);
+        tests::set_values(P2_mem, P2_data);
+        tests::set_values(P3_mem, P3_data);
+        tests::set_values(P4_mem, P4_data);
+        tests::set_values(P5_mem, P5_data);
+
+        topology topo;
+        topo.add(data("P2", P2_mem));
+        topo.add(data("P3", P3_mem));
+        topo.add(data("P4", P4_mem));
+        topo.add(data("P5", P5_mem));
+        topo.add(input_layout("rois", rois_lay));
+        topo.add(pyramid_roi_align("pyramid",
+                                input_info("rois"),
+                                input_info("P2"),
+                                input_info("P3"),
+                                input_info("P4"),
+                                input_info("P5"),
+                                output_size,
+                                sampling_points,
+                                { P2_scale, P3_scale, P4_scale, P5_scale },
+                                starting_level));
+
+        cldnn::network::ptr net = get_network(engine, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+
+        net->set_input_data("rois", rois_mem);
+
+        std::vector<float> expected_out = {
+            // RoI 0,0 - 1,1 from P4
+            14.f, 18.f, 14.f, 18.f,
+            // RoI 0,0 - 0.5,0.5 from P3
+            11.25f, 14.25f, 11.25f, 14.25f,
+            // RoI 0.5,0.5 - 0.75,0.75 from P2
+            12.15625f, 13.03125f, 7.40625f, 8.28125f,
+        };
+
+        auto result = net->execute();
+
+        auto out_mem = result.at("pyramid").get_memory();
+        cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());
+
+        ASSERT_EQ(expected_out.size(), out_ptr.size());
+        for (size_t i = 0; i < expected_out.size(); ++i) {
+            ASSERT_EQ(expected_out[i], static_cast<float>(out_ptr[i])) << "at i = " << i;
+        }
+    }
 };
 using pyramid_roi_align_types = testing::Types<float, half_t>;

 TYPED_TEST_SUITE(pyramid_roi_align_typed_test, pyramid_roi_align_types);

 TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels) {
-    using Type = typename pyramid_roi_align_typed_test<TypeParam>::Type;
-
-    auto& engine = get_test_engine();
-
-    const int rois_num = 3;
-    const int output_size = 2;
-    const int sampling_points = 2;
-    const int starting_level = 2;
-    const int P2_scale = 1;
-    const int P3_scale = 2;
-    const int P4_scale = 4;
-    const int P5_scale = 8;
-    const int P2_size = 8;
-    const int P3_size = P2_size * P2_scale / P3_scale;
-    const int P4_size = P2_size * P2_scale / P4_scale;
-    const int P5_size = P2_size * P2_scale / P5_scale;
-
-    std::vector<Type> rois_data = {
-        Type(0.f), Type(0.f), Type(1.f), Type(1.f),
-        Type(0.f), Type(0.f), Type(0.5f), Type(0.5f),
-        Type(0.5f), Type(0.5f), Type(0.75f), Type(0.75f)
-    };
-
-    std::vector<Type> P2_data = {
-        Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
-        Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
-        Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
-        Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
-        Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
-        Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
-        Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
-        Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
-    };
-
-    std::vector<Type> P3_data = {
-        Type(9.f), Type(13.f), Type(17.f), Type(21.f),
-        Type(9.f), Type(13.f), Type(17.f), Type(21.f),
-        Type(9.f), Type(13.f), Type(17.f), Type(21.f),
-        Type(9.f), Type(13.f), Type(17.f), Type(21.f),
-    };
-
-    std::vector<Type> P4_data = {
-      Type(11.f), Type(19.f),
-      Type(11.f), Type(19.f),
-    };
-
-    std::vector<Type> P5_data = {
-        Type(15.f)
-    };
-
-    auto rois_lay = layout(this->data_type, format::bfyx, tensor(batch(rois_num), feature(4)));
-    auto P2_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P2_size, P2_size));
-    auto P3_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P3_size, P3_size));
-    auto P4_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P4_size, P4_size));
-    auto P5_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P5_size, P5_size));
-
-    auto rois_mem = engine.allocate_memory(rois_lay);
-    auto P2_mem = engine.allocate_memory(P2_lay);
-    auto P3_mem = engine.allocate_memory(P3_lay);
-    auto P4_mem = engine.allocate_memory(P4_lay);
-    auto P5_mem = engine.allocate_memory(P5_lay);
-
-    tests::set_values(rois_mem, rois_data);
-    tests::set_values(P2_mem, P2_data);
-    tests::set_values(P3_mem, P3_data);
-    tests::set_values(P4_mem, P4_data);
-    tests::set_values(P5_mem, P5_data);
-
-    topology topo;
-    topo.add(data("P2", P2_mem));
-    topo.add(data("P3", P3_mem));
-    topo.add(data("P4", P4_mem));
-    topo.add(data("P5", P5_mem));
-    topo.add(input_layout("rois", rois_lay));
-    topo.add(pyramid_roi_align("pyramid",
-                               input_info("rois"),
-                               input_info("P2"),
-                               input_info("P3"),
-                               input_info("P4"),
-                               input_info("P5"),
-                               output_size,
-                               sampling_points,
-                               { P2_scale, P3_scale, P4_scale, P5_scale },
-                               starting_level));
-
-    cldnn::network net(engine, topo);
-    net.set_input_data("rois", rois_mem);
-
-    std::vector<float> expected_out = {
-        // RoI 0,0 - 1,1 from P4
-        14.f, 18.f, 14.f, 18.f,
-        // RoI 0,0 - 0.5,0.5 from P3
-        11.25f, 14.25f, 11.25f, 14.25f,
-        // RoI 0.5,0.5 - 0.75,0.75 from P2
-        12.15625f, 13.03125f, 7.40625f, 8.28125f,
-    };
-
-    auto result = net.execute();
-
-    auto out_mem = result.at("pyramid").get_memory();
-    cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());
-
-    ASSERT_EQ(expected_out.size(), out_ptr.size());
-    for (size_t i = 0; i < expected_out.size(); ++i) {
-        ASSERT_EQ(expected_out[i], static_cast<float>(out_ptr[i])) << "at i = " << i;
-    }
+    this->execute(false);
+}
+
+TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels_cached) {
+    this->execute(true);
 }
--- a/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp
@ -794,7 +794,7 @@ struct quantize_random_test : testing::TestWithParam<quantize_random_test_params
        }
    }

-    void execute_compare(const quantize_random_test_params& params, bool check_result) {
+    void execute_compare(const quantize_random_test_params& params, bool check_result, bool is_caching_test) {
        auto& engine = get_test_engine();

        auto in_layout = layout(params.input_type, params.in_format, params.input_size);
@ -840,10 +840,11 @@ struct quantize_random_test : testing::TestWithParam<quantize_random_test_params
        ExecutionConfig config;
        config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"quantize"}));

-        network net(engine, topo, config);
-        net.set_input_data("input", input);
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

-        auto result = net.execute();
+        net->set_input_data("input", input);
+
+        auto result = net->execute();
        auto output = result.at("quantize").get_memory();

        auto input_opt = engine.allocate_memory(in_layout);
@ -909,7 +910,7 @@ struct quantize_random_test_param_generator : std::vector<quantize_random_test_p

 TEST_P(quantize_random_test, random) {
    auto param = GetParam();
-    execute_compare(param, true);
+    execute_compare(param, true, false);
 }

 INSTANTIATE_TEST_SUITE_P(quantize_smoke,
@ -919,3 +920,23 @@ INSTANTIATE_TEST_SUITE_P(quantize_smoke,
                            .simple_params(data_types::f32, data_types::u8, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, 5)
                            .simple_params(data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, 5)
                        ));
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(quantize_random_test, random_cached) {
+    auto param = GetParam();
+    execute_compare(param, true, true);
+}
+#else
+using quantize_random_test_cached = quantize_random_test;
+
+TEST_P(quantize_random_test_cached, random) {
+    auto param = GetParam();
+    execute_compare(param, true, true);
+}
+
+INSTANTIATE_TEST_SUITE_P(quantize_smoke,
+                        quantize_random_test_cached,
+                        testing::Values(
+                            quantize_random_test_params{ data_types::f32, data_types::u8, {1, 16, 10, 10}, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, 5}
+                        ));
+#endif
--- a/src/plugins/intel_gpu/tests/test_cases/random_uniform_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/random_uniform_gpu_test.cpp
@ -30,7 +30,7 @@ struct RandomUniformParams {
 template<typename T>
 struct random_uniform_gpu_test : public ::testing::TestWithParam<RandomUniformParams<T> > {
 public:
-    void test() {
+    void test(bool is_caching_test) {

        auto data_type = type_to_data_type<T>::value;
        RandomUniformParams<T> params = testing::TestWithParam<RandomUniformParams<T> >::GetParam();
@ -56,13 +56,13 @@ public:
        ExecutionConfig config;
        config.set_property(ov::intel_gpu::optimize_data(true));

-        cldnn::network net{engine, topology, config};
+        cldnn::network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

-        net.set_input_data("shape", shape);
-        net.set_input_data("min_val", min_val);
-        net.set_input_data("max_val", max_val);
+        net->set_input_data("shape", shape);
+        net->set_input_data("min_val", min_val);
+        net->set_input_data("max_val", max_val);

-        auto result = net.execute();
+        auto result = net->execute();

        auto out_mem = result.at("random_uniform").get_memory();
        cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
@ -105,20 +105,20 @@ using random_uniform_gpu_test_f32 = random_uniform_gpu_test<float>;
 using random_uniform_gpu_test_f16 = random_uniform_gpu_test<half_t>;

 TEST_P(random_uniform_gpu_test_i32, random_int32) {
-    ASSERT_NO_FATAL_FAILURE(test());
+    ASSERT_NO_FATAL_FAILURE(test(false));
 }

 TEST_P(random_uniform_gpu_test_i64, random_int64) {
-    ASSERT_NO_FATAL_FAILURE(test());
+    ASSERT_NO_FATAL_FAILURE(test(false));
 }


 TEST_P(random_uniform_gpu_test_f32, random_f32) {
-    ASSERT_NO_FATAL_FAILURE(test());
+    ASSERT_NO_FATAL_FAILURE(test(false));
 }

 TEST_P(random_uniform_gpu_test_f16, random_f16) {
-    ASSERT_NO_FATAL_FAILURE(test());
+    ASSERT_NO_FATAL_FAILURE(test(false));
 }

 INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_int32,
@ -186,3 +186,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_f16,
                                 }
                         ),
                         PrintToStringParamName());
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(random_uniform_gpu_test_i32, random_int32_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(random_uniform_gpu_test_i64, random_int64_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(random_uniform_gpu_test_f32, random_f32_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+#endif
+TEST_P(random_uniform_gpu_test_f16, random_f16_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
--- a/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp
@ -482,7 +482,7 @@ protected:
    }

 public:
-    void execute() {
+    void execute(bool is_caching_test) {
        int input_dim = static_cast<int>(input_format.dimension());
        cldnn::format layout_format = input_format;

@ -530,12 +530,11 @@ public:
        config.set_property(ov::intel_gpu::optimize_data(true));
        ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name};
        config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
-        network network(engine, topology, config);
-        network.set_input_data("input", input_mem);
+        cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+        network->set_input_data("input", input_mem);
+        network->execute();

-        network.execute();
-
-        auto out_mem = network.get_output("reduce").get_memory();
+        auto out_mem = network->get_output("reduce").get_memory();
        cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());
        auto out_lay = out_mem->get_layout();

@ -573,13 +572,13 @@ public:
 };

 class general_reduce_gpu_i8_i8 : public ReduceTestBase<data_types::i8, data_types::i8> {};
-TEST_P(general_reduce_gpu_i8_i8, base) { execute(); }
+TEST_P(general_reduce_gpu_i8_i8, base) { execute(false); }

 class general_reduce_gpu_i8_f32 : public ReduceTestBase<data_types::i8, data_types::f32> {};
-TEST_P(general_reduce_gpu_i8_f32, base) { execute(); }
+TEST_P(general_reduce_gpu_i8_f32, base) { execute(false); }

 class general_reduce_gpu_f32_f32 : public ReduceTestBase<data_types::f32, data_types::f32> {};
-TEST_P(general_reduce_gpu_f32_f32, base) { execute(); }
+TEST_P(general_reduce_gpu_f32_f32, base) { execute(false); }

 INSTANTIATE_TEST_SUITE_P(reduce_gpu_b_fs_yx_fsv16_i8_i8,
                        general_reduce_gpu_i8_i8,
@ -770,7 +769,8 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_reduce_gpu_ref_f32_f32,
                        ),
                        general_reduce_gpu::PrintToStringParamName);

-TEST(reduce_gpu, common_bfyx) {
+template <typename T>
+void test_common_bfyx(bool is_caching_test) {
    auto& engine = get_test_engine();
    auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 1, 1}});

@ -780,26 +780,30 @@ TEST(reduce_gpu, common_bfyx) {
    topology.add(input_layout("input", input->get_layout()));
    topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 0));

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    network.set_input_data("input", input);
+    network->set_input_data("input", input);

-    auto outputs = network.execute();
+    auto outputs = network->execute();

    ASSERT_EQ(outputs.size(), size_t(1));
    ASSERT_EQ(outputs.begin()->first, "reduce");

    auto output = outputs.at("reduce").get_memory();

-    std::vector<float> ref_data = {1.0f};
+    std::vector<T> ref_data = {1.0f};

-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

    for (size_t i = 0; i < ref_data.size(); ++i) {
        ASSERT_TRUE(are_equal(ref_data[i], output_ptr[i]));
    }
 }

+TEST(reduce_gpu, common_bfyx) {
+    test_common_bfyx<float>(false);
+}
+
 TEST(reduce_gpu, common_bfyx_keepdims) {
    auto& engine = get_test_engine();
    auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 3, 4, 1}});
@ -1823,7 +1827,7 @@ protected:
    }

 public:
-    void execute() {
+    void execute(bool is_caching_test) {

        int input_dim = static_cast<int>(input_format.dimension());
        cldnn::format layout_format = input_format;
@ -1891,12 +1895,12 @@ public:
            config.set_property(ov::intel_gpu::optimize_data(true));
            ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name};
            config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
-            network network(engine, topology, config);
-            network.set_input_data("input", input_mem);
+            cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+            network->set_input_data("input", input_mem);

-            network.execute();
+            network->execute();

-            auto out_mem = network.get_output("reduce").get_memory();
+            auto out_mem = network->get_output("reduce").get_memory();
            cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());
            auto out_lay = out_mem->get_layout();

@ -1939,10 +1943,10 @@ public:


 class general_reduce_gpu_xy_f32 : public ReduceXYWithBigTensorTestBase<data_types::f32, data_types::f32> {};
-TEST_P(general_reduce_gpu_xy_f32, base) { execute(); }
+TEST_P(general_reduce_gpu_xy_f32, base) { execute(false); }

 class general_reduce_gpu_xy_i8 : public ReduceXYWithBigTensorTestBase<data_types::i8, data_types::i8> {};
-TEST_P(general_reduce_gpu_xy_i8, base) { execute(); }
+TEST_P(general_reduce_gpu_xy_i8, base) { execute(false); }

 INSTANTIATE_TEST_SUITE_P(reduce_gpu_b_fs_yx_fsv16_xy_f32,
                        general_reduce_gpu_xy_f32,
@ -2111,7 +2115,7 @@ INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_i8_f32,
                            TestParamType_general_reduce_gpu(17, 3, 1, 1, 14, 11, format::b_fs_yx_fsv16, reduce_mode::mean, {1}, "reduce_gpu_b_fs_yx_fsv16", true, data_types::i8, false, data_types::f32)
                        ), general_reduce_gpu::PrintToStringParamName);

- INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_f16_f16,
+INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_f16_f16,
                        onednn_reduce_gpu_f16_f16,
                        ::testing::Values(
                            TestParamType_general_reduce_gpu(3, 3, 1, 1, 3, 2, format::b_fs_yx_fsv16, reduce_mode::sum, {3, 2, 1, 0}, "reduce_gpu_b_fs_yx_fsv16", false, data_types::f16, false, data_types::f16),
@ -2134,3 +2138,19 @@ INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_i8_f32,
                            TestParamType_general_reduce_gpu(17, 3, 1, 1, 14, 11, format::b_fs_yx_fsv16, reduce_mode::mean, {1}, "reduce_gpu_b_fs_yx_fsv16", true, data_types::f16, false, data_types::f16)
                        ), general_reduce_gpu::PrintToStringParamName);
 #endif  // ENABLE_ONEDNN_FOR_GPU
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(general_reduce_gpu_i8_i8, base_cached) { execute(true); }
+
+TEST_P(general_reduce_gpu_i8_f32, base_cached) { execute(true); }
+
+TEST_P(general_reduce_gpu_f32_f32, base_cached) { execute(true); }
+
+TEST_P(general_reduce_gpu_xy_f32, base_cached) { execute(true); }
+
+TEST_P(general_reduce_gpu_xy_i8, base_cached) { execute(true); }
+#endif  // RUN_ALL_MODEL_CACHING_TESTS
+
+TEST(reduce_gpu, common_bfyx_cached) {
+    test_common_bfyx<float>(true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp
@ -165,7 +165,7 @@ struct region_yolo_test_params {
 };

 template <typename T>
-void runRegionTest(region_yolo_test_params& params) {
+void runRegionTest(region_yolo_test_params& params, bool is_caching_test = false) {
    auto& engine = get_test_engine();
    const tensor kInputTensor(params.tensor[0], params.tensor[1], params.tensor[2], params.tensor[3]);
    auto inputData = generate_random_1d<T>(params.tensor[0] * params.tensor[1] * params.tensor[2] * params.tensor[3], -1, 1);
@ -180,10 +180,11 @@ void runRegionTest(region_yolo_test_params& params) {
                             params.regionNum, static_cast<uint32_t>(params.mask.size()), params.softMax));
    topology.add(reorder("reorder_post", input_info("region_yolo"), format::bfyx, params.dataType));

-    network network(engine, topology);
-    network.set_input_data("InputData", inputPrim);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    auto outputs = network.execute();
+    network->set_input_data("InputData", inputPrim);
+
+    auto outputs = network->execute();
    auto output = outputs.at("reorder_post").get_memory();
    cldnn::mem_lock<T> outputData(output, get_test_stream());

@ -239,3 +240,44 @@ TEST(region_yolo_gpu_fp16, byxf_softmax) {
    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, true};
    runRegionTest<FLOAT16>(params);
 }
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST(region_yolo_gpu_fp32, bfyx_cached) {
+    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::bfyx, false};
+    runRegionTest<float>(params, true);
+}
+
+TEST(region_yolo_gpu_fp32, bfyx_softmax_cached) {
+    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::bfyx, true};
+    runRegionTest<float>(params, true);
+}
+
+TEST(region_yolo_gpu_fp32, byxf_cached) {
+    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::byxf, false};
+    runRegionTest<float>(params, true);
+}
+
+TEST(region_yolo_gpu_fp32, byxf_softmax_cached) {
+    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::byxf, true};
+    runRegionTest<float>(params, true);
+}
+
+TEST(region_yolo_gpu_fp16, bfyx_cached) {
+    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::bfyx, false};
+    runRegionTest<FLOAT16>(params, true);
+}
+
+TEST(region_yolo_gpu_fp16, bfyx_softmax_cached) {
+    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::bfyx, true};
+    runRegionTest<FLOAT16>(params, true);
+}
+
+TEST(region_yolo_gpu_fp16, byxf_cached) {
+    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, false};
+    runRegionTest<FLOAT16>(params, true);
+}
+#endif  // RUN_ALL_MODEL_CACHING_TESTS
+TEST(region_yolo_gpu_fp16, byxf_softmax_cached) {
+    region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, true};
+    runRegionTest<FLOAT16>(params, true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp
@ -14,7 +14,8 @@ using namespace cldnn;
 using namespace ::tests;
 using namespace testing;

-TEST(removing_output_node, multiple_outputs) {
+template <typename T>
+void test_multiple_outputs(bool is_caching_test) {
    // Tests split with crop implementation
    //                                                   _ strided_slice(bfyx)
    //                                                  |
@ -58,19 +59,19 @@ TEST(removing_output_node, multiple_outputs) {
    topology.add(data("input4", strides));
    topology.add(strided_slice("strided_slice", input_info("shuffle_channels"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {6, 1, 1, 1}));

-    std::vector<float> input_vec = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
-    std::vector<float> out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f };
+    std::vector<T> input_vec = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    std::vector<T> out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f };
    set_values(input, input_vec);

    ExecutionConfig config;
    config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "shuffle_channels", "reshape", "strided_slice" }));

-    network network(engine, topology, config);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();

    auto output = outputs.at("reshape").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

    ASSERT_TRUE(output->get_layout().get_tensor() == after_reshape);

@ -80,7 +81,7 @@ TEST(removing_output_node, multiple_outputs) {
    // checking the output node has the same name after output node deleting due to StridedSlice optimization
    ASSERT_TRUE(outputs.find("strided_slice") != outputs.end());
    auto output2 = outputs.at("strided_slice").get_memory();
-    cldnn::mem_lock<float> output_ptr2(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr2(output, get_test_stream());

    ASSERT_TRUE(output2->get_layout().get_tensor() == after_strided_slice);

@ -88,7 +89,12 @@ TEST(removing_output_node, multiple_outputs) {
        ASSERT_EQ(output_ptr2[i], out_vec[i]);
 }

-TEST(removing_output_node, output_node_optimization) {
+TEST(removing_output_node, multiple_outputs) {
+    test_multiple_outputs<float>(false);
+}
+
+template <typename T>
+void test_output_node_optimization(bool is_caching_test) {
    //  Filter : 2x3
    //  Stride : 2x1
    //  Input  : 4x5
@ -115,7 +121,7 @@ TEST(removing_output_node, output_node_optimization) {

    set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
    set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
-    VVF<float> output_vec = {
+    VVF<T> output_vec = {
            { 20.0f, 27.0f, 38.0f },
            { 17.0f, 19.0f, 19.0f } };

@ -125,17 +131,17 @@ TEST(removing_output_node, output_node_optimization) {
    topology.add(convolution("conv", input_info("input"), { "weights" }, { 2, 1 }));
    topology.add(activation("relu", input_info("conv"), activation_func::relu));

-    network network(engine, topology);
-    network.set_input_data("input", input);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);

    // checking the output node has the same name after output node deleting due to ReLU optimization
-    auto outputs = network.execute();
+    auto outputs = network->execute();
    ASSERT_EQ(outputs.size(), size_t(1));
    ASSERT_EQ(outputs.begin()->first, "relu");

    auto output_memory = outputs.at("relu").get_memory();
    auto output_layout = output_memory->get_layout();
-    cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output_memory, get_test_stream());

    int y_size = output_layout.spatial(1);
    int x_size = output_layout.spatial(0);
@ -152,3 +158,16 @@ TEST(removing_output_node, output_node_optimization) {
        }
    }
 }
+
+TEST(removing_output_node, output_node_optimization) {
+    test_output_node_optimization<float>(false);
+}
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST(removing_output_node, multiple_outputs_cached) {
+    test_multiple_outputs<float>(true);
+}
+#endif
+TEST(removing_output_node, output_node_optimization_cached) {
+    test_output_node_optimization<float>(true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp
@ -41,7 +41,8 @@ static void compare_result(std::map<cldnn::primitive_id, cldnn::network_output>
 static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
    const data_types input_data_type, const data_types output_data_type,
    cldnn::format input_format, cldnn::format output_format,
-    int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in = 0, int32_t w_in = 0) {
+    int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in, int32_t w_in,
+    bool is_caching_test) {
    auto& engine = get_test_engine();

    tensor ts;
@ -87,10 +88,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
    ov::intel_gpu::ImplementationDesc reorder_ref = { output_format, "reorder_data" };
    config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_ref} }));

-    network network_ref(engine, topology, config_ref);
-    network_ref.set_input_data("input", input);
+    cldnn::network::ptr network_ref = get_network(engine, topology, config_ref, get_test_stream_ptr(), is_caching_test);

-    auto outputs_ref = network_ref.execute();
+    network_ref->set_input_data("input", input);
+
+    auto outputs_ref = network_ref->execute();
    cldnn::event::ptr e1 = outputs_ref.at("reorder").get_event();
    e1->wait();

@ -99,10 +101,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
    ov::intel_gpu::ImplementationDesc reorder_optimized = { output_format, kernel_name };
    config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_optimized} }));

-    network network(engine, topology, config);
-    network.set_input_data("input", input);
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

-    auto outputs = network.execute();
+    network->set_input_data("input", input);
+
+    auto outputs = network->execute();
    cldnn::event::ptr e2 = outputs.at("reorder").get_event();
    e2->wait();

@ -123,124 +126,124 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,

 TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_f32) {
    // b_fs_yx_fsv32 -> bfyx
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3, 0, 0, false);
    // b_fs_zyx_fsv32 -> bfzyx
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4, 0, false);
    // incremental dims
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_different_datatype) {
    // f32 -> other types
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, false);
    // i32 -> other types
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_f32) {
    // u-net
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388, 0, 0, false);
    // b_fs_yx_fsv16 -> bfyx
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3, 0, 0, false);
    // b_fs_zyx_fsv16 -> bfzyx
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17, 0, false);
    // incremental dims
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_different_datatype) {
    // f32 -> other types
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
    // i32 -> other types
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_f32) {
    // bfyx_to_b_fs_yx_fsv4
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4, 0, 0, false);
    // bfyx_to_b_fs_yx_fsv16
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
    // bfyx to b_fs_yx_fsv32
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2, 0, 0, false);
    // bfyx to fs_b_yx_fsv32
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2, 0, 0, false);
    // bfzyx to b_fs_zyx_fsv16
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2, 0, false);
    // bfzyx to b_fs_zyx_fsv32
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32) {
    // bfyx to double blocked format (bs_fs_yx_bsv16_fsv16)
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4);                    // no
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4);               // b
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4);               // f
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4);               // x
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4);       // b-f-x
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, false);
    // bfzyx to double blocked format (bs_fs_zyx_bsv16_fsv16)
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16);              // no
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2);          // b
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3);          // f
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4);          // x
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2);  // b-f-x
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv16_fsv32) {
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv16) {
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv32) {
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
 }

 TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_different_datatype) {
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
-    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
 }

 TEST(reorder_gpu_optimization, bfyx_to_fsv16_without_f_remainder) {
@ -2575,12 +2578,14 @@ public:
    static const int max_random = 200;
    std::vector<primitive_id> executed_prims;

-    void execute(T& p) {
+    void execute(T& p, bool is_caching_test) {
        auto input_prim = this->get_mem(get_input_layout(p));
-        network network_test(this->engine, this->topology_test, this->config);
-        network_test.set_input_data("input", input_prim);

-        executed_prims = network_test.get_executed_primitive_ids();
+        cldnn::network::ptr network_test = get_network(this->engine, this->topology_test, this->config, get_test_stream_ptr(), is_caching_test);
+
+        network_test->set_input_data("input", input_prim);
+
+        executed_prims = network_test->get_executed_primitive_ids();
    }

    bool check_optimized_out(T& p, primitive_id target_id) {
@ -2659,7 +2664,7 @@ TEST_P(testing_removal_reorder, removal_reorder_1d_along_f) {
                reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
    );

-    execute(p);
+    execute(p, false);

    ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
 }
@ -2682,7 +2687,7 @@ TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input) {
        reorder("reorder_output", input_info("resample"), p.default_format, data_types::f32)
    );

-    execute(p);
+    execute(p, false);

    ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
 }
@ -2713,7 +2718,7 @@ TEST_P(testing_removal_reorder, removal_no_padded_reorder) {

    setup_with_build_ops(config);

-    execute(p);
+    execute(p, false);

    ASSERT_EQ(check_optimized_out(p, "reorder_conv"), true);
 }
@ -2743,7 +2748,7 @@ TEST_P(testing_removal_reorder, removal_padded_reorder) {

    setup_with_build_ops(config);

-    execute(p);
+    execute(p, false);

    ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
 }
@ -2810,3 +2815,166 @@ TEST(reorder_onednn_gpu, basic_convert_int8) {
    }
 }
 #endif // ENABLE_ONEDNN_FOR_GPU
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_f32_cached) {
+    // b_fs_yx_fsv32 -> bfyx
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3, 0, 0, true);
+    // b_fs_zyx_fsv32 -> bfzyx
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4, 0, true);
+    // incremental dims
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_different_datatype_cached) {
+    // f32 -> other types
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, true);
+    // i32 -> other types
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_f32_cached) {
+    // u-net
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388, 0, 0, true);
+    // b_fs_yx_fsv16 -> bfyx
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3, 0, 0, true);
+    // b_fs_zyx_fsv16 -> bfzyx
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17, 0, true);
+    // incremental dims
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_different_datatype_cached) {
+    // f32 -> other types
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    // i32 -> other types
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_f32_cached) {
+    // bfyx_to_b_fs_yx_fsv4
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4, 0, 0, true);
+    // bfyx_to_b_fs_yx_fsv16
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
+    // bfyx to b_fs_yx_fsv32
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2, 0, 0, true);
+    // bfyx to fs_b_yx_fsv32
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2, 0, 0, true);
+    // bfzyx to b_fs_zyx_fsv16
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2, 0, true);
+    // bfzyx to b_fs_zyx_fsv32
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_cached) {
+    // bfyx to double blocked format (bs_fs_yx_bsv16_fsv16)
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, true);
+    // bfzyx to double blocked format (bs_fs_zyx_bsv16_fsv16)
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv16_fsv32_cached) {
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv16_cached) {
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv32_cached) {
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
+}
+
+TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_different_datatype_cached) {
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
+    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
+}
+
+TEST_P(testing_removal_reorder, removal_reorder_1d_along_f_cached) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+                reorder("reorder_input", input_info("input"), format::b_fs_yx_fsv16, data_types::f16),
+                data("weights", get_mem(get_weights_layout(p))),
+                data("bias1", get_mem(get_bias_layout(p))),
+                reorder("reorder_bias1", input_info("bias1"), format::b_fs_yx_fsv16, data_types::f16),
+                convolution("conv_prim", input_info("reorder_input"), {"weights"}, std::vector<primitive_id>{}, 1, p.stride, p.pad),
+                reorder("reorder_conv", input_info("conv_prim"), format::b_fs_yx_fsv16, data_types::f16),
+                eltwise("add_bias1", { input_info("reorder_conv"), input_info("reorder_bias1") }, eltwise_mode::sum),
+                reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
+    );
+
+    execute(p, true);
+
+    ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
+}
+#endif
+TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input_cached) {
+    auto p = GetParam();
+    layout reorder_layout(data_types::u8, format::b_fs_yx_fsv32, p.in_shape, padding({0, }, 0));
+
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("weights_sec", get_mem(get_weights_layout(p))),
+        reorder("reorder_fp32", input_info("input"), format::bfyx, data_types::f32),
+        convolution("conv_prim", input_info("reorder_fp32"), { "weights" }, { "bias" }, 1, p.stride, p.pad, {1, 1}, p.in_shape, data_types::u8, false),
+        reorder("reorder_conv", input_info("conv_prim"), reorder_layout),
+        convolution("conv_output", input_info("reorder_conv"), { "weights_sec" }, 1, p.stride, p.pad),
+        reorder("reorder_bfyx", input_info("conv_output"), format::b_fs_yx_fsv32, data_types::f32),
+        resample("resample", input_info("reorder_bfyx"), p.out_shape, 1),
+        reorder("reorder_output", input_info("resample"), p.default_format, data_types::f32)
+    );
+
+    execute(p, true);
+
+    ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp
@ -290,21 +290,21 @@ template<typename T>
 struct reorg_yolo_test
        : public ::testing::TestWithParam<ReorgYoloParamsWithLayout<T> > {
 public:
-    void test() {
+    void test(bool is_caching_test) {
        ReorgYoloParams<T> params;
        format::type target_format;
        bool should_fail;
        std::tie(params, target_format, should_fail) = this->GetParam();

        if (should_fail) {
-            ASSERT_ANY_THROW(run_test(params, target_format));
+            ASSERT_ANY_THROW(run_test(params, target_format, is_caching_test));
        } else {
-            ASSERT_NO_FATAL_FAILURE(run_test(params, target_format));
+            ASSERT_NO_FATAL_FAILURE(run_test(params, target_format, is_caching_test));
        }
    }

 private:
-    void run_test(const ReorgYoloParams<T>& params, const format::type target_format) {
+    void run_test(const ReorgYoloParams<T>& params, const format::type target_format, bool is_caching_test) {
        const auto data_type = type_to_data_type<T>::value;
        const format::type plain_format = format::bfyx;

@ -320,9 +320,9 @@ private:
        topology.add(reorg_yolo("reorg_yolo", input_info("input_reordered"), params.stride));
        topology.add(reorder("reorg_yolo_reordered", input_info("reorg_yolo"), plain_format, data_type));

-        network network(engine, topology);
-        network.set_input_data("input", input);
-        const auto result = network.execute();
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        network->set_input_data("input", input);
+        const auto result = network->execute();

        auto out_mem = result.at("reorg_yolo_reordered").get_memory();
        cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
@ -339,11 +339,11 @@ using test_f32 = reorg_yolo_test<float>;
 using test_f16 = reorg_yolo_test<half_t>;

 TEST_P(test_f32, basic) {
-    test();
+    test(false);
 }

 TEST_P(test_f16, basic) {
-    test();
+    test(false);
 }


@ -371,3 +371,12 @@ INSTANTIATE_TEST_SUITE_P(reorg_yolo_invalid_input,
                                 ::testing::Values(format::bfyx),
                                 ::testing::Values(true)),
                         PrintToStringParamName());
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(test_f32, basic_cached) {
+    test(true);
+}
+#endif
+TEST_P(test_f16, basic_cached) {
+    test(true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp
@ -12,7 +12,8 @@
 using namespace cldnn;
 using namespace ::tests;

-TEST(resample_gpu, basic_in2x3x2x2_nearest) {
+template <typename T>
+void test_basic_in2x3x2x2_nearest(bool is_caching_test) {
    //  Input  : 2x2x3x2
    //  Output : 2x2x6x4
    //  Sample Type: Nearest
@ -46,16 +47,16 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) {
        12.f, 9.f, -17.f,
    });

-    cldnn::network net{ engine, topology };
+    cldnn::network::ptr net = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    net.set_input_data("input", input);
+    net->set_input_data("input", input);

-    auto outputs = net.execute();
+    auto outputs = net->execute();

    auto output = outputs.at("upsampling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

-    float answers[96] = {
+    T answers[96] = {
        1.f, 1.f, 2.f,   2.f,   -10.f,  -10.f,
        1.f, 1.f, 2.f,   2.f,   -10.f,  -10.f,
        3.f, 3.f, 4.f,   4.f,   -14.f,  -14.f,
@ -86,6 +87,10 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) {
    }
 }

+TEST(resample_gpu, basic_in2x3x2x2_nearest) {
+    test_basic_in2x3x2x2_nearest<float>(false);
+}
+
 TEST(resample_gpu, basic_in2x3x2x2_bilinear) {
    //  Input  : 1x1x2x2
    //  Output : 1x1x4x4
@ -456,7 +461,7 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
        }
    }

-    void execute(const resample_random_test_params& params) {
+    void execute(const resample_random_test_params& params, bool is_caching_test) {
        auto& engine = get_test_engine();

        auto in_layout = layout(params.input_type, params.in_format, params.input_size);
@ -467,26 +472,27 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
        topo.add(prim);

        ExecutionConfig config(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample", {params.out_format, ""}} }));
-        cldnn::network net(engine, topo, config);
+        cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);

        auto in_mem = engine.allocate_memory(in_layout);
        fill_random(in_mem);
-        net.set_input_data("in", in_mem);
+        net->set_input_data("in", in_mem);

-        auto result = net.execute();
+        auto result = net->execute();
        auto output = result.at("resample").get_memory();

        std::string kernel = "";
-        for (auto& info : net.get_primitives_info()) {
-            if (info.original_id == "resample")
-                kernel = info.kernel_id;
+        if (!is_caching_test) {
+            for (auto& info : net->get_primitives_info()) {
+                if (info.original_id == "resample")
+                    kernel = info.kernel_id;
+            }
        }
-
    }
 };

 TEST_P(resample_random_test, random) {
-    execute(GetParam());
+    execute(GetParam(), false);
 }

 struct resample_random_test_param_generator : std::vector<resample_random_test_params> {
@ -611,7 +617,7 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
        }
    }

-    void execute_compare(const caffe_resample_random_test_params& params, bool check_result) {
+    void execute_compare(const caffe_resample_random_test_params& params, bool check_result, bool is_caching_test) {
        auto& engine = get_test_engine();

        auto in_layout = layout(params.input_type, params.in_format, params.input_size);
@ -647,12 +653,12 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
        config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt"}));
        config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, "resample_opt"}} }));

-        cldnn::network net_opt(engine, topo_opt, config_opt);
+        cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);

        // Use in_mem from ref network
-        net_opt.set_input_data("in", in_mem);
+        net_opt->set_input_data("in", in_mem);

-        auto result_opt = net_opt.execute();
+        auto result_opt = net_opt->execute();
        auto output_opt = result_opt.at("resample_opt").get_memory();

        if (check_result == true) {
@ -695,7 +701,7 @@ struct caffe_resample_random_test_param_generator : std::vector<caffe_resample_r

 TEST_P(caffe_resample_random_test, random) {
    auto param = GetParam();
-    execute_compare(param, true);
+    execute_compare(param, true, false);
 }

 INSTANTIATE_TEST_SUITE_P(caffe_smoke_caffe_fsv16,
@ -2004,7 +2010,8 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
        }
    }

-    void execute_compare(const resample_opt_random_test_params& params, bool check_result, const std::string& kernel = "resample_opt") {
+    void execute_compare(const resample_opt_random_test_params& params, bool check_result,
+                         bool is_caching_test, const std::string& kernel = "resample_opt") {
        auto& engine = get_test_engine();

        const format origin_format = format::dimension(params.in_format) == 4 ? format::bfyx : format::bfzyx;
@ -2042,13 +2049,13 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
        ExecutionConfig config_opt;
        config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt", "res_to_bfyx"}));

-        network net_opt(engine, topo_opt, config_opt);
+        cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);

        // Use in_mem from ref network
-        net_opt.set_input_data("in", in_mem);
+        net_opt->set_input_data("in", in_mem);

        // first execution of opt
-        auto result_opt = net_opt.execute();
+        auto result_opt = net_opt->execute();
        auto output_opt = result_opt.at("res_to_bfyx").get_memory();
        if (!format::is_simple_data_format(params.in_format)) {
            ASSERT_FALSE(format::is_simple_data_format(result_opt.at("resample_opt").get_memory()->get_layout().format));
@ -2176,7 +2183,7 @@ struct resample_opt_random_test_ext : resample_opt_random_test

 TEST_P(resample_opt_random_test, random) {
    auto param = GetParam();
-    execute_compare(param, true);
+    execute_compare(param, true, false);
 }

 TEST_P(resample_opt_random_test_ext, DISABLED_random) {
@ -2329,3 +2336,22 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_simple,
                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
                            }
                        ));
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(resample_random_test, random_cached) {
+    execute(GetParam(), true);
+}
+
+TEST_P(caffe_resample_random_test, random_cached) {
+    auto param = GetParam();
+    execute_compare(param, true, true);
+}
+
+TEST_P(resample_opt_random_test, random_cached) {
+    auto param = GetParam();
+    execute_compare(param, true, true);
+}
+#endif
+TEST(resample_gpu, basic_in2x3x2x2_nearest_cached) {
+    test_basic_in2x3x2x2_nearest<float>(true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp
@ -26,7 +26,7 @@ void verify_int(const int32_t& output_value, const int32_t& value) {
 template <class ElemType>
 void generic_reshape_test(format fmt, tensor const& input_size, tensor const& reshape_size,
    bool /* in_place */, padding const& input_padd = padding(),
-    padding const& output_padd = padding()) {
+    padding const& output_padd = padding(), bool is_caching_test = false) {
    auto& engine = get_test_engine();

    //allocate input memory
@ -68,9 +68,9 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re
    ExecutionConfig config;
    config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{reshape_input, "reshape"}));

-    network net(engine, tpl, config);
-    net.set_input_data("input", input);
-    auto outputs = net.execute();
+    cldnn::network::ptr net = get_network(engine, tpl, config, get_test_stream_ptr(), is_caching_test);
+    net->set_input_data("input", input);
+    auto outputs = net->execute();

    ASSERT_TRUE(outputs.size() == 2 && outputs.count("reshape") == 1 && outputs.count(reshape_input) == 1);
    auto net_input = outputs.at(reshape_input).get_memory();
@ -411,7 +411,8 @@ TEST(reshape_gpu_f32, basic_5dim_in_place) {
        true);
 }

-TEST(reshape_gpu_f32, multiple_users_with_reorder) {
+template <typename T>
+void test_multiple_users_with_reorder(bool is_caching_test) {
    // Tests split with crop implementation
    //                                                   _ REORDER(yxfb) --> RELU(yxfb)
    //                                                  |
@ -452,29 +453,34 @@ TEST(reshape_gpu_f32, multiple_users_with_reorder) {
    topology.add(activation("relu1", input_info("reorder1"), activation_func::relu));
    topology.add(activation("relu2", input_info("reshape"), activation_func::relu));

-    std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
-    std::vector<float> out1 = {0.f, 2.f, 0.f, 4.0f};
-    std::vector<float> out2 = {0.f, 2.f, 0.f, 4.0f};
+    std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
+    std::vector<T> out1 = {0.f, 2.f, 0.f, 4.0f};
+    std::vector<T> out2 = {0.f, 2.f, 0.f, 4.0f};
    set_values(input, input_vec);

-    network network(engine, topology);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();

    auto output = outputs.at("relu1").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

    for (size_t i = 0; i < out1.size(); i++)
        ASSERT_EQ(output_ptr[i], out1[i]);

    auto output_2 = outputs.at("relu2").get_memory();
-    cldnn::mem_lock<float> output_ptr_2(output_2, get_test_stream());
+    cldnn::mem_lock<T> output_ptr_2(output_2, get_test_stream());

    for (size_t i = 0; i < out2.size(); i++)
        ASSERT_EQ(output_ptr_2[i], out2[i]);
 }

-TEST(reshape_gpu_f32, calc_output_shape) {
+TEST(reshape_gpu_f32, multiple_users_with_reorder) {
+    test_multiple_users_with_reorder<float>(false);
+}
+
+template <typename T>
+void test_calc_output_shape(bool is_caching_test) {
    //  INPUT(bfyx,2x2x1x1) -- RESHAPE(1, 1, 0, -1)

    //  Input:
@ -495,9 +501,9 @@ TEST(reshape_gpu_f32, calc_output_shape) {

    set_values(input, {-1.f, 2.f, -3.f, 4.f});

-    network network(engine, topology);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();

    ASSERT_EQ(outputs.size(), size_t(1));
    ASSERT_EQ(outputs.begin()->first, "reshape");
@ -509,15 +515,20 @@ TEST(reshape_gpu_f32, calc_output_shape) {

    ASSERT_TRUE(output->get_layout().get_tensor() == tensor(1, 1, 1, 4));

-    float answers[4] = {-1.f, 2.f, -3.f, 4.f};
+    T answers[4] = {-1.f, 2.f, -3.f, 4.f};

-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());
    for (int i = 0; i < 4; i++) {
        ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
    }
 }

-TEST(reshape_gpu_f32, basic_bfwzyx) {
+TEST(reshape_gpu_f32, calc_output_shape) {
+    test_calc_output_shape<float>(false);
+}
+
+template <typename T>
+void test_basic_bfwzyx(bool is_caching_test) {
    // input:  bfwzyx, (3, 3, 2, 2, 1, 1)
    // reshape: (1, 1, 2, 2, 3, 3), pad (0, 0, 0, 0, 0, 1)

@ -562,9 +573,9 @@ TEST(reshape_gpu_f32, basic_bfwzyx) {

    set_values(input, input_data);

-    network network(engine, topology);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();

    ASSERT_EQ(outputs.size(), size_t(1));
    ASSERT_EQ(outputs.begin()->first, "reshape");
@ -582,7 +593,12 @@ TEST(reshape_gpu_f32, basic_bfwzyx) {
    }
 }

-TEST(reshape_gpu_f32, shrink_chain_partial) {
+TEST(reshape_gpu_f32, basic_bfwzyx) {
+    test_basic_bfwzyx<float>(false);
+}
+
+template <typename T>
+void test_shrink_chain_partial(bool is_caching_test) {
    auto& engine = get_test_engine();
    auto batch_num = 2;
    auto feature_num = 2;
@ -592,8 +608,8 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
    auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
    auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});

-    std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
-    std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
+    std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
+    std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
    set_values(scale_in, scale_vals);
    set_values(shift_in, scale_shifts);

@ -609,8 +625,53 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
    topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
    topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));

-    std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
-    std::vector<float> out = {5.f, 12.f, 15.f, 32.0f};
+    std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
+    std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
+    set_values(input, input_vec);
+
+    ExecutionConfig config;
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();
+
+    auto output = outputs.at("out_reorder").get_memory();
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());
+
+    for (size_t i = 0; i < out.size(); i++)
+        ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
+}
+
+TEST(reshape_gpu_f32, shrink_chain_partial) {
+    test_shrink_chain_partial<float>(false);
+}
+
+template <typename T>
+void test_shrink_chain_full(bool is_caching_test) {
+    auto& engine = get_test_engine();
+    auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
+    auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
+    auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
+
+    std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
+    std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
+    set_values(scale_in, scale_vals);
+    set_values(shift_in, scale_shifts);
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(data("scale_in", scale_in));
+    topology.add(data("shift_in", shift_in));
+    topology.add(activation("relu", input_info("input"), activation_func::relu));
+    topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
+    topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
+    topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
+    topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod));
+    topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
+    topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
+
+    std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
+    std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
    set_values(input, input_vec);

    ExecutionConfig config;
@ -620,85 +681,54 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
    auto outputs = network.execute();

    auto output = outputs.at("out_reorder").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

    for (size_t i = 0; i < out.size(); i++)
        ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
 }

 TEST(reshape_gpu_f32, shrink_chain_full) {
+    test_shrink_chain_full<float>(false);
+}
+
+template <typename T>
+void test_shrink_chain_out(bool is_caching_test) {
    auto& engine = get_test_engine();
    auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
    auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
    auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});

-    std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
-    std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
+    std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
+    std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
    set_values(scale_in, scale_vals);
    set_values(shift_in, scale_shifts);

    topology topology;
    topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("scale_in", scale_in));
-    topology.add(data("shift_in", shift_in));
    topology.add(activation("relu", input_info("input"), activation_func::relu));
    topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
    topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
    topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
-    topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod));
-    topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
-    topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));

-    std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
-    std::vector<float> out = {5.f, 12.f, 15.f, 32.0f};
+    std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
+    std::vector<T> out = {0.f, 2.f, 0.f, 4.0f};
    set_values(input, input_vec);

    ExecutionConfig config;
    config.set_property(ov::intel_gpu::optimize_data(true));
-    network network(engine, topology, config);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();

-    auto output = outputs.at("out_reorder").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    auto output = outputs.at("reshape1").get_memory();
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

    for (size_t i = 0; i < out.size(); i++)
        ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
 }

 TEST(reshape_gpu_f32, shrink_chain_out) {
-    auto& engine = get_test_engine();
-    auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
-    auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
-    auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
-
-    std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
-    std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
-    set_values(scale_in, scale_vals);
-    set_values(shift_in, scale_shifts);
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(activation("relu", input_info("input"), activation_func::relu));
-    topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
-    topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
-    topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
-
-    std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
-    std::vector<float> out = {0.f, 2.f, 0.f, 4.0f};
-    set_values(input, input_vec);
-
-    ExecutionConfig config;
-    config.set_property(ov::intel_gpu::optimize_data(true));
-    network network(engine, topology, config);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
-
-    auto output = outputs.at("reshape1").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-
-    for (size_t i = 0; i < out.size(); i++)
-        ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
+    test_shrink_chain_out<float>(false);
 }

 TEST(reshape_gpu_f32, basic_runtime_static_shape) {
@ -910,3 +940,369 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const_optimized_out) {
        ASSERT_TRUE(are_equal(input_data[i], output_ptr[i]));
    }
 }
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST(reshape_gpu_f32, basic_2dim_in_place_cached) {
+    generic_reshape_test<float>(
+        format::bfyx,
+        tensor(1, 1, 2, 2),
+        tensor(1, 1, 4, 1),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_f16, basic_2dim_in_place_cached) {
+    generic_reshape_test<FLOAT16>(
+        format::bfyx,
+        tensor(1, 1, 2, 2),
+        tensor(1, 1, 1, 4),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_i8, basic_2dim_in_place_cached) {
+    generic_reshape_test<int8_t>(
+        format::bfyx,
+        tensor(1, 1, 2, 2),
+        tensor(1, 1, 1, 4),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_i32, basic_2dim_in_place_cached) {
+    generic_reshape_test<int32_t>(
+        format::bfyx,
+        tensor(1, 1, 2, 2),
+        tensor(1, 1, 1, 4),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_i64, basic_2dim_in_place_cached) {
+    generic_reshape_test<int64_t>(
+        format::bfyx,
+        tensor(1, 1, 2, 2),
+        tensor(1, 1, 1, 4),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_f32, basic_4dim_in_place_cached) {
+    generic_reshape_test<float>(
+        format::yxfb,
+        tensor(9, 9, 2, 4),
+        tensor(27, 2, 3, 4),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_f16, basic_4dim_in_place_cached) {
+    generic_reshape_test<FLOAT16>(
+        format::yxfb,
+        tensor(9, 9, 2, 4),
+        tensor(3, 4, 27, 2),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_i32, basic_4dim_in_place_cached) {
+    generic_reshape_test<int32_t>(
+        format::yxfb,
+        tensor(9, 9, 2, 4),
+        tensor(3, 4, 27, 2),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_i64, basic_4dim_in_place_cached) {
+    generic_reshape_test<int64_t>(
+        format::yxfb,
+        tensor(9, 9, 2, 4),
+        tensor(3, 4, 27, 2),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshpape_gpu_f32, basic_2dim_output_padd_cached) {
+    generic_reshape_test<float>(
+        format::byxf,
+        tensor(1, 1, 4, 2),
+        tensor(1, 1, 8, 1),
+        false,
+        padding(),
+        padding(std::vector<int>{0, 0, 1, 1}),
+        true);
+}
+
+TEST(reshape_gpu_f16, basic_2dim_output_padd_cached) {
+    generic_reshape_test<FLOAT16>(
+        format::byxf,
+        tensor(1, 1, 3, 4),
+        tensor(1, 1, 2, 6),
+        false,
+        padding(),
+        padding(std::vector<int>{0, 0, 2, 2}),
+        true);
+}
+
+TEST(reshape_gpu_i8, basic_2dim_output_padd_cached) {
+    generic_reshape_test<int8_t>(
+        format::byxf,
+        tensor(1, 1, 3, 4),
+        tensor(1, 1, 2, 6),
+        false,
+        padding(),
+        padding(std::vector<int>{0, 0, 2, 2}),
+        true);
+}
+
+TEST(reshape_gpu_i32, basic_2dim_output_padd_cached) {
+    generic_reshape_test<int32_t>(
+        format::byxf,
+        tensor(1, 1, 3, 4),
+        tensor(1, 1, 2, 6),
+        false,
+        padding(),
+        padding(std::vector<int>{0, 0, 2, 2}),
+        true);
+}
+
+TEST(reshape_gpu_i64, basic_2dim_output_padd_cached) {
+    generic_reshape_test<int64_t>(
+        format::byxf,
+        tensor(1, 1, 3, 4),
+        tensor(1, 1, 2, 6),
+        false,
+        padding(),
+        padding(std::vector<int>{0, 0, 2, 2}),
+        true);
+}
+
+TEST(reshape_gpu_f32, basic_2dim_input_padd_cached) {
+    generic_reshape_test<float>(
+        format::fyxb,
+        tensor(1, 1, 2, 5),
+        tensor(1, 1, 5, 2),
+        false,
+        padding({0, 0, 3, 2}, {0, 0, 1, 4}),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_f16, basic_2dim_input_padd_cached) {
+    generic_reshape_test<FLOAT16>(
+        format::fyxb,
+        tensor(1, 1, 3, 3),
+        tensor(1, 1, 1, 9),
+        false,
+        padding({0, 0, 4, 1}, {0, 0, 2, 3}),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_i8, basic_2dim_input_padd_cached) {
+    generic_reshape_test<int8_t>(
+        format::fyxb,
+        tensor(1, 1, 3, 3),
+        tensor(1, 1, 1, 9),
+        false,
+        padding({0, 0, 4, 1}, {0, 0, 2, 3}),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_i32, basic_2dim_input_padd_cached) {
+    generic_reshape_test<int32_t>(
+        format::fyxb,
+        tensor(1, 1, 3, 3),
+        tensor(1, 1, 1, 9),
+        false,
+        padding({0, 0, 4, 1}, {0, 0, 2, 3}),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_i64, basic_2dim_input_padd_cached) {
+    generic_reshape_test<int64_t>(
+        format::fyxb,
+        tensor(1, 1, 3, 3),
+        tensor(1, 1, 1, 9),
+        false,
+        padding({0, 0, 4, 1}, {0, 0, 2, 3}),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_f32, basic_2dim_input_output_padd_cached) {
+    generic_reshape_test<float>(
+        format::byxf,
+        tensor(1, 1, 5, 7),
+        tensor(1, 1, 7, 5),
+        false,
+        padding({0, 0, 4, 4}, {0, 0, 1, 1}),
+        padding({0, 0, 0, 0}, {0, 0, 3, 0}),
+        true);
+}
+
+TEST(reshape_gpu_f16, basic_2dim_input_output_padd_cached) {
+    generic_reshape_test<FLOAT16>(
+        format::byxf,
+        tensor(1, 1, 6, 6),
+        tensor(1, 1, 3, 12),
+        false,
+        padding({0, 0, 1, 1}, {0, 0, 0, 0}),
+        padding({0, 0, 2, 1}, {0, 0, 1, 2}),
+        true);
+}
+
+TEST(reshape_gpu_i8, basic_2dim_input_output_padd_cached) {
+    generic_reshape_test<int8_t>(
+        format::byxf,
+        tensor(1, 1, 5, 7),
+        tensor(1, 1, 7, 5),
+        false,
+        padding({0, 0, 4, 4}, {0, 0, 1, 1}),
+        padding({0, 0, 0, 0}, {0, 0, 3, 0}),
+        true);
+}
+
+TEST(reshape_gpu_i32, basic_2dim_input_output_padd_cached) {
+    generic_reshape_test<int32_t>(
+        format::byxf,
+        tensor(1, 1, 5, 7),
+        tensor(1, 1, 7, 5),
+        false,
+        padding({0, 0, 4, 4}, {0, 0, 1, 1}),
+        padding({0, 0, 0, 0}, {0, 0, 3, 0}),
+        true);
+}
+
+TEST(reshape_gpu_i64, basic_2dim_input_output_padd_cached) {
+    generic_reshape_test<int64_t>(
+        format::byxf,
+        tensor(1, 1, 5, 7),
+        tensor(1, 1, 7, 5),
+        false,
+        padding({0, 0, 4, 4}, {0, 0, 1, 1}),
+        padding({0, 0, 0, 0}, {0, 0, 3, 0}),
+        true);
+}
+
+TEST(reshpape_gpu_f32, basic_4dim_output_padd_cached) {
+    generic_reshape_test<float>(
+        format::bfyx,
+        tensor(2, 5, 7, 3),
+        tensor(1, 14, 15, 1),
+        false,
+        padding(),
+        padding({1, 0, 0, 1}, {0, 2, 3, 0}),
+        true);
+}
+
+TEST(reshape_gpu_f16, basic_4dim_output_padd_cached) {
+    generic_reshape_test<FLOAT16>(
+        format::bfyx,
+        tensor(5, 4, 2, 2),
+        tensor(40, 2, 1, 1),
+        false,
+        padding(),
+        padding({0, 2, 0, 1}, {0, 2, 3, 0}),
+        true);
+}
+
+TEST(reshape_gpu_f32, basic_4dim_input_padd_cached) {
+    generic_reshape_test<float>(
+        format::yxfb,
+        tensor(8, 128, 3, 3),
+        tensor(16, 8, 8, 9),
+        false,
+        padding({0, 1, 3, 3}, {0, 1, 1, 1}),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_f16, basic_4dim_input_padd_cached) {
+    generic_reshape_test<FLOAT16>(
+        format::yxfb,
+        tensor(2, 32, 8, 8),
+        tensor(8, 128, 1, 4),
+        false,
+        padding({2, 2, 1, 0}, {1, 2, 2, 0}),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_f32, basic_4dim_input_output_padd_cached) {
+    generic_reshape_test<float>(
+        format::fyxb,
+        tensor(8, 1024, 25, 25),
+        tensor(8, 64, 100, 100),
+        false,
+        padding({2, 0, 2, 1}, {0, 1, 4, 0}),
+        padding({1, 2, 3, 4}, {0, 4, 1, 1}),
+        true);
+}
+
+TEST(reshape_gpu_f16, basic_4dim_input_output_padd_cached) {
+    generic_reshape_test<FLOAT16>(
+        format::byxf,
+        tensor(32, 3, 227, 227),
+        tensor(8, 12, 227, 227),
+        false,
+        padding({0, 1, 4, 4}, {0, 1, 1, 1}),
+        padding({0, 29, 29, 0}, {0, 0, 0, 0}),
+        true);
+}
+
+TEST(reshape_gpu_f32, basic_5dim_in_place_cached) {
+    generic_reshape_test<float>(
+        format::bfzyx,
+        tensor(9, 9, 2, 4, 2),
+        tensor(27, 2, 1, 4, 6),
+        true,
+        padding(),
+        padding(),
+        true);
+}
+
+TEST(reshape_gpu_f32, multiple_users_with_reorder_cached) {
+    test_multiple_users_with_reorder<float>(true);
+}
+
+TEST(reshape_gpu_f32, calc_output_shape_cached) {
+    test_calc_output_shape<float>(true);
+}
+
+TEST(reshape_gpu_f32, basic_bfwzyx_cached) {
+    test_basic_bfwzyx<float>(true);
+}
+
+TEST(reshape_gpu_f32, shrink_chain_partial_cached) {
+    test_shrink_chain_partial<float>(true);
+}
+
+TEST(reshape_gpu_f32, shrink_chain_full_cached) {
+    test_shrink_chain_full<float>(true);
+}
+#endif
+TEST(reshape_gpu_f32, shrink_chain_out_cached) {
+    test_shrink_chain_out<float>(true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp
@ -43,7 +43,7 @@ struct ReverseParams {
 template <typename T, reverse_mode mode>
 struct reverse_gpu_test : public ::testing::TestWithParam<ReverseParams<T, mode>> {
 public:
-    void test() {
+    void test(bool is_caching_test = false) {
        auto data_type = type_to_data_type<T>::value;
        ReverseParams<T, mode> params = testing::TestWithParam<ReverseParams<T, mode>>::GetParam();
        auto& engine = get_test_engine();
@ -76,10 +76,10 @@ public:
            tp.add(reverse(reverse_id, input_info(reverse_input_id), input_info(axes_id), mode));
        }

-        network network(engine, tp);
-        network.set_input_data(reverse_input_id, reverse_input);
-        network.set_input_data(axes_id, reverse_axes);
-        auto result = network.execute();
+        cldnn::network::ptr network = get_network(engine, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        network->set_input_data(reverse_input_id, reverse_input);
+        network->set_input_data(axes_id, reverse_axes);
+        auto result = network->execute();

        auto out_mem = result.at(ouput_op_name).get_memory();
        cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
@ -422,3 +422,52 @@ INSTANTIATE_TEST_SUITE_P(smoke_reverse_f16_index,
                         reverse_gpu_test_f16_index,
                         ::testing::ValuesIn(generateIndexParams<half_t>()),
                         PrintToStringParamName());
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST_P(reverse_gpu_test_int32_mask, reverse_i32_mask_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_int32_index, reverse_i32_index_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_int64_mask, reverse_i64_mask_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_int64_index, reverse_i64_index_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_float_mask, reverse_float_mask_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_float_index, reverse_float_index_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_int8_mask, reverse_int8_mask_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_int8_index, reverse_int8_index_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_uint8_mask, reverse_uint8_mask_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_uint8_index, reverse_uint8_index_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+
+TEST_P(reverse_gpu_test_f16_mask, reverse_f16_mask_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
+#endif
+TEST_P(reverse_gpu_test_f16_index, reverse_f16_index_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
+}
--- a/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp
@ -12,7 +12,8 @@
 using namespace cldnn;
 using namespace ::tests;

-TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
+template <typename T>
+void test_fp32_d2_2_ba1_sa0(bool is_caching_test) {
    auto& engine = get_test_engine();

    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
@ -35,17 +36,17 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
            reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
    );

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    network.set_input_data("input", input);
-    network.set_input_data("seq_lengths", seq_lengths);
+    network->set_input_data("input", input);
+    network->set_input_data("seq_lengths", seq_lengths);

-    auto outputs = network.execute();
+    auto outputs = network->execute();

    auto output = outputs.at("reverse_sequence").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

-    std::vector<float> expected_results = {
+    std::vector<T> expected_results = {
            0.0f, 3.0f, 2.0f, 1.0f
    };

@ -54,7 +55,12 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
    }
 }

-TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
+TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
+    test_fp32_d2_2_ba1_sa0<float>(false);
+}
+
+template <typename T>
+void test_fp32_d3_3_3_ba0_sa1(bool is_caching_test) {
    auto& engine = get_test_engine();

    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
@ -79,17 +85,17 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
            reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
    );

-    network network(engine, topology);
+    cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-    network.set_input_data("input", input);
-    network.set_input_data("seq_lengths", seq_lengths);
+    network->set_input_data("input", input);
+    network->set_input_data("seq_lengths", seq_lengths);

-    auto outputs = network.execute();
+    auto outputs = network->execute();

    auto output = outputs.at("reverse_sequence").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());

-    std::vector<float> expected_results = {
+    std::vector<T> expected_results = {
            3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
            12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
            21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
@ -100,6 +106,10 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
    }
 }

+TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
+    test_fp32_d3_3_3_ba0_sa1<float>(false);
+}
+
 TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) {
    auto& engine = get_test_engine();

@ -603,3 +613,12 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) {
        ASSERT_EQ(expected_results[i], half_to_float(output_ptr[i]));
    }
 }
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0_cached) {
+    test_fp32_d2_2_ba1_sa0<float>(true);
+}
+#endif
+TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1_cached) {
+    test_fp32_d3_3_3_ba0_sa1<float>(true);
+}
--- a/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp
@ -66,7 +66,8 @@ struct roi_align_test : public testing::Test {

    void execute(const std::vector<TD>& expected_output,
                 roi_align::PoolingMode pooling_mode,
-                 roi_align::AlignedMode aligned_mode) const {
+                 roi_align::AlignedMode aligned_mode,
+                 bool is_caching_test) const {
        auto& engine = get_test_engine();

        auto input = get_memory(engine, input_lt, input_data);
@ -90,12 +91,13 @@ struct roi_align_test : public testing::Test {
                               aligned_mode));
        topology.add(reorder("out", input_info("roi_align"), plain_format, device_data_type));

-        network network(engine, topology);
-        network.set_input_data("input", input);
-        network.set_input_data("coords", coords);
-        network.set_input_data("roi_ind", roi_ind);
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);

-        auto outputs = network.execute();
+        network->set_input_data("input", input);
+        network->set_input_data("coords", coords);
+        network->set_input_data("roi_ind", roi_ind);
+
+        auto outputs = network->execute();

        auto output = outputs.at("out").get_memory();
        cldnn::mem_lock<TD> output_ptr(output, get_test_stream());
@ -158,19 +160,41 @@ TYPED_TEST(roi_align_test, avg_asymmetric) {
    using TD = typename TypeParam::DataType;
    const std::vector<TD>
        expected_output{TD(3.f), TD(3.75f), TD(4.75f), TD(5.f), TD(3.f), TD(5.5f), TD(2.75f), TD(3.75f)};
-    this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric);
+    this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric, false);
 }

 TYPED_TEST(roi_align_test, avg_half_pixel_for_nn) {
    using TD = typename TypeParam::DataType;
    const std::vector<TD> expected_output =
        {TD(3.14f), TD(2.16f), TD(2.86f), TD(5.03f), TD(1.83f), TD(5.84f), TD(2.77f), TD(3.44f)};
-    this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn);
+    this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn, false);
 }

 TYPED_TEST(roi_align_test, max_half_pixel) {
    using TD = typename TypeParam::DataType;
    const std::vector<TD> expected_output =
        {TD(4.375f), TD(4.9375f), TD(5.6875f), TD(5.625f), TD(4.625f), TD(7.125f), TD(3.3125f), TD(4.3125f)};
-    this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel);
+    this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel, false);
+}
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+TYPED_TEST(roi_align_test, avg_asymmetric_cached) {
+    using TD = typename TypeParam::DataType;
+    const std::vector<TD>
+        expected_output{TD(3.f), TD(3.75f), TD(4.75f), TD(5.f), TD(3.f), TD(5.5f), TD(2.75f), TD(3.75f)};
+    this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric, true);
+}
+
+TYPED_TEST(roi_align_test, avg_half_pixel_for_nn_cached) {
+    using TD = typename TypeParam::DataType;
+    const std::vector<TD> expected_output =
+        {TD(3.14f), TD(2.16f), TD(2.86f), TD(5.03f), TD(1.83f), TD(5.84f), TD(2.77f), TD(3.44f)};
+    this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn, true);
+}
+#endif
+TYPED_TEST(roi_align_test, max_half_pixel_cached) {
+    using TD = typename TypeParam::DataType;
+    const std::vector<TD> expected_output =
+        {TD(4.375f), TD(4.9375f), TD(5.6875f), TD(5.625f), TD(4.625f), TD(7.125f), TD(3.3125f), TD(4.3125f)};
+    this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel, true);
 }
--- a/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp
@ -116,7 +116,7 @@ using roi_pooling_test_params = std::tuple<roi_pooling_test_inputs<T>,
 template <class T>
 struct roi_pooling_gpu_test : public testing::TestWithParam<roi_pooling_test_params<T>> {
 public:
-    void test() {
+    void test(bool is_caching_test) {
        format::type fmt;
        pooling_mode mode;
        bool position_sensitive;
@ -185,11 +185,12 @@ public:

        topology.add(reorder("reordered_roi_pooling", input_info("roi_pooling"), plane_format, type_to_data_type<T>::value));

-        network network(engine, topology);
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+
        for (auto& input : inputs) {
-            network.set_input_data(input.first, input.second);
+            network->set_input_data(input.first, input.second);
        }
-        const auto outputs = network.execute();
+        const auto outputs = network->execute();

        ASSERT_EQ(outputs.size(), size_t(1));
        ASSERT_EQ(outputs.begin()->first, "reordered_roi_pooling");
@ -236,7 +237,11 @@ public:
 using roi_pooling_gpu_test_float = roi_pooling_gpu_test<float>;

 TEST_P(roi_pooling_gpu_test_float, test) {
-    ASSERT_NO_FATAL_FAILURE(test());
+    ASSERT_NO_FATAL_FAILURE(test(false));
+}
+
+TEST_P(roi_pooling_gpu_test_float, test_cached) {
+    ASSERT_NO_FATAL_FAILURE(test(true));
 }

 const std::vector<roi_pooling_test_inputs<float>> roi_pooling_max_inputs = {
--- a/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp
@ -37,7 +37,7 @@ using roll_test_params = std::tuple<roll_test_input<T>, format::type>;

 template <class T>
 struct roll_test : testing::TestWithParam<roll_test_params<T>> {
-    void test() {
+    void test(bool is_caching_test) {
        roll_test_input<T> p;
        format::type input_format;
        std::tie(p, input_format) = testing::TestWithParam<roll_test_params<T>>::GetParam();
@ -54,9 +54,9 @@ struct roll_test : testing::TestWithParam<roll_test_params<T>> {
        topology.add(roll("roll", input_info("reordered_input"), tensor(input_format, p.shift)));
        topology.add(reorder("reordered_roll", input_info("roll"), plane_format, type_to_data_type<T>::value));

-        network network(engine, topology);
-        network.set_input_data("input", input);
-        const auto outputs = network.execute();
+        cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
+        network->set_input_data("input", input);
+        const auto outputs = network->execute();

        auto output = outputs.at("reordered_roll").get_memory();
        cldnn::mem_lock<T> output_ptr(output, get_test_stream());
@ -226,7 +226,7 @@ std::vector<format::type> formats6d = {format::bfwzyx};
 #define INSTANTIATE_ROLL_TEST_SUITE(type, func, formats)                                                    \
    class roll_test_##type##func : public roll_test<type> {};                                               \
    TEST_P(roll_test_##type##func, roll_##type##func) {                                                     \
-        test();                                                                                             \
+        test(false);                                                                                        \
    }                                                                                                       \
    INSTANTIATE_TEST_SUITE_P(roll_smoke_##type##func,                                                       \
                             roll_test_##type##func,                                                        \
@ -257,4 +257,33 @@ INSTANTIATE_ROLL_TEST_SUITE(float, getRollFloatingPointAdditionalLogic, {format:

 #undef INSTANTIATE_ROLL_TEST_SUITE

+#define INSTANTIATE_ROLL_TEST_SUITE_CACHED(type, func)           \
+    TEST_P(roll_test_##type##func, roll_##type##func##_cached) { \
+        test(true);                                              \
+    }
+
+#ifdef RUN_ALL_MODEL_CACHING_TESTS
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParamsToCheckLogic)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParamsToCheckLogic)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParamsToCheckLogic)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParamsToCheckLogic)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParamsToCheckLayouts)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParamsToCheckLayouts)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParamsToCheckLayouts)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParamsToCheckLayouts)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParams5D)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParams5D)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParams5D)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParams5D)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParams6D)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParams6D)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParams6D)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParams6D)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(FLOAT16, getRollFloatingPointParams)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(float, getRollFloatingPointParams)
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(FLOAT16, getRollFloatingPointAdditionalLogic)
+#endif
+INSTANTIATE_ROLL_TEST_SUITE_CACHED(float, getRollFloatingPointAdditionalLogic)
+
+#undef INSTANTIATE_ROLL_TEST_SUITE_CACHED
 }  // namespace
--- a/Show More
+++ b/Show More