[GPU] Impl cldnn::condition to support dynamic shape (#18051)

* [GPU] Impl cldnn::condition to support dynamic shape (#18051) * Impl CreateIfOp * Update calc_output_layouts and execute_impl * Enable gpu unit test * Create gpu functional test * [GPU] Follow-up code review (#18051) * remove redundant codes * create custom execute method for condition_inst * change name from update_loop_primitive_map to update_inner_program_io_map * [GPU] Fix gpu func test failures for fp16 * Add more test-cases to support fp16 and nested if case * [GPU] remove redundant codes * refactoring var names * fix windows build error * [GPU] Fix windows build issue * [GPU] update calc_output_layouts * [GPU] remove custom condition_inst::execute * Remove virtual keyword from primitive_inst::execute() * [GPU] Share single task executor between main program and inner program * [GPU] Fix input rank issue for const inner network in condition op * [GPU] apply calc_output_layouts for roi_align Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com> * [GPU] avoid checking allow_new_shape_infer for inner program --------- Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
2023-06-28 00:05:26 +09:00 · 2023-06-28 00:05:26 +09:00 · 50897e86e6
commit 50897e86e6
parent c2afa2aefc
25 changed files with 1658 additions and 648 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp
@ -22,6 +22,7 @@
 namespace cldnn {

 struct program;
+struct network;


 struct kernel_impl_params {
@ -54,6 +55,9 @@ struct kernel_impl_params {

    std::map<size_t, memory::ptr> memory_deps = {};
    size_t primary_input_idx = 0;
+    std::vector<std::shared_ptr<program>> inner_progs = {};
+    std::vector<std::shared_ptr<network>> inner_nets = {};
+    std::vector<std::map<size_t, primitive_id>> io_output_maps = {};

    kernel_impl_params() : prog(nullptr), strm(nullptr), desc(nullptr), unique_id(0) {}

--- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
@ -79,7 +79,9 @@ public:
    network(engine& engine,
            const topology& topo,
            const ExecutionConfig& config = {},
-            bool is_internal = false);
+            bool is_internal = false,
+            InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr);
+
    network(engine& engine,
            const std::set<std::shared_ptr<program_node>>& nodes,
            const ExecutionConfig& config,
@ -100,7 +102,9 @@ public:
    static ptr build_network(engine& engine,
                             const topology& topology,
                             const ExecutionConfig& config = {},
+                             std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor = nullptr,
                             bool is_internal = false);
+
    static ptr build_network(engine& engine,
                             const std::set<std::shared_ptr<program_node>>& nodes,
                             const ExecutionConfig& config,
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@ -125,6 +125,7 @@ public:
    program(engine& engine_ref,
            topology const& topology,
            const ExecutionConfig& config,
+            InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
            bool is_internal = false,
            bool no_optimizations = false,
            bool is_body_program = false);
@ -236,6 +237,13 @@ public:
                             bool is_internal = false,
                             bool no_optimizations = false,
                             bool is_body_program = false);
+    static ptr build_program(engine& engine,
+                             const topology& topology,
+                             const ExecutionConfig& config,
+                             InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
+                             bool is_internal = false,
+                             bool no_optimizations = false,
+                             bool is_body_program = false);
    static ptr build_program(engine& engine,
                             const std::set<std::shared_ptr<program_node>>& nodes,
                             const ExecutionConfig& config,
@ -253,6 +261,8 @@ public:
    ICompilationContext& get_compilation_context() const { return *_compilation_context; }
    void cancel_compilation_context();

+    static std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config);
+
 private:
    uint32_t prog_id = 0;
    engine& _engine;
@ -307,9 +317,6 @@ private:
    void post_optimize_graph(bool is_internal);
    void transfer_memory_to_device();

-    InferenceEngine::CPUStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags = "") const;
-    std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config) const;
-
    /*
    ** Analysis functions
    */
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
@ -235,6 +235,7 @@ REGISTER_FACTORY(v8, AdaptiveAvgPool);
 REGISTER_FACTORY(v8, AdaptiveMaxPool);
 REGISTER_FACTORY(v8, Softmax);
 REGISTER_FACTORY(v8, PriorBox);
+REGISTER_FACTORY(v8, If);

 // ------------------------------ Supported v9 ops ------------------------------ //
 REGISTER_FACTORY(v9, GridSample)
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
@ -83,7 +83,8 @@ class Program {
 public:
    Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
            bool createTopologyOnly = false, bool partialBuild = false,
-            InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
+            InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr,
+            InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr, bool innerProgram = false);
    Program(cldnn::engine& engine, const ExecutionConfig& config,
            InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);

@ -158,6 +159,8 @@ public:
    bool use_new_shape_infer() const { return allow_new_shape_infer; }
    bool requires_new_shape_infer(const ngraph::Node& op) const;

+    InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() { return m_task_executor; }
+
 private:
    static factories_map_t factories_map;
    std::vector<std::shared_ptr<cldnn::program>> m_programs;
@ -173,6 +176,8 @@ private:

    bool queryMode;

+    InferenceEngine::CPUStreamsExecutor::Ptr m_task_executor;
+
    void EnableQueryMode() { queryMode = true; }
    void DisableQueryMode() { queryMode = false; }

@ -183,7 +188,7 @@ private:
    std::shared_ptr<cldnn::program> BuildProgram(const std::vector<std::shared_ptr<ngraph::Node>>& ops,
                                                 InferenceEngine::InputsDataMap networkInputs,
                                                 InferenceEngine::OutputsDataMap networkOutputs,
-                                                 bool createTopologyOnly = false, bool partialBuild = false);
+                                                 bool createTopologyOnly = false, bool partialBuild = false, bool innerProgram = false);

    void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
    void ChangeInputBatch(int batch);
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp
@ -5,64 +5,73 @@
 #pragma once
 #include "primitive.hpp"
 #include "intel_gpu/graph/topology.hpp"
+#include "intel_gpu/graph/program.hpp"
 #include <vector>

 namespace cldnn {

-/// @brief Function, which will be used during comparison.
-enum cond_functions : int32_t { EQUAL, GREATER, LESS };
-
 /// @brief Adds primitive, which works like "if".
 ///
 /// @details
-/// @n   Applies comparision between 2 inputs.
-/// @n   Compare data - sizes of that input specifes the range of the comparison.
-/// @n   Offset - offset in memory, when comparing values.
+/// @n   Applies comparision using pred primitive which has 1D tensor or scalar value
 struct condition : public primitive_base<condition> {
    CLDNN_DECLARE_PRIMITIVE(condition)

+    /// @brief branch has compiled program, input_map and output_map
+    ///
+    struct branch {
+        std::map<primitive_id, primitive_id> input_map;
+        std::map<size_t, primitive_id> output_map;
+        program::ptr inner_program;
+
+        std::string str() {
+            std::stringstream ss;
+            ss << "branch: { " << std::endl;
+            ss<< "* input_map : [(outer_id,inner_id),";
+            for (auto& in_iter : input_map) {
+                ss << "(" << in_iter.first << "," << in_iter.second << "),";
+            }
+            ss << "]," << std::endl;
+
+            ss << "* output_map : [(outer_idx,inner_id),";
+            for (auto& out_iter : output_map) {
+                ss << "(" << out_iter.first << ","<< out_iter.second << "),";
+            }
+            ss << "]" << std::endl;
+            ss << "}" << std::endl;
+            return ss.str();
+        }
+    };
+
    /// @brief Constructs condition primitive / layer.
    ///
    /// @param id                 An identifier of new primitive.
-    /// @param input              An identifier of primitive which is an input for newly created
-    ///                           condition primitive.
-    /// @param topology_true      Topology containg primitives, which will be executed when comparsion results
-    ///                           true.
-    /// @param topology_false     Topology containg primitives, which will be executed when comparsion results
-    ///                           false..
-    /// @param compare_Data       An identifier of primitive which contains compare values
-    /// @param func               Used function during comparison.
-    /// @param offset             Offset for compare data.
+    /// @param inputs             A list of Input primitive ids (pred, inputs(optional)).
+    ///                           pred is condition's predicate primitive which has scalar value determining whether to execute branch_true or branch_false.
+    ///                           sometimes, if
+    /// @param branch_true        Branch containg primitives, which will be executed when pred is true. then body in ngraph
+    /// @param branch_false       Branch containg primitives, which will be executed when pred is false. else body in ngraph
    /// @param output_padding     Optional padding for output from primitive.
    condition(const primitive_id& id,
-              const input_info& input,
-              const topology& topology_true,
-              const topology& topology_false,
-              const primitive_id& compare_data,
-              const cond_functions& func,
-              const tensor& offset = {0, 0, 0, 0, 0},
-              const padding& output_padding = padding())
-        : primitive_base(id, {input}, {output_padding}),
-          topology_true(topology_true),
-          topology_false(topology_false),
-          compare_data(compare_data),
-          function(func),
-          offset(offset) {}
+            const std::vector<input_info>& inputs,
+            const branch& branch_true,
+            const branch& branch_false,
+            const padding& output_padding = padding())
+        : primitive_base(id, inputs, {output_padding}),
+        branch_true(branch_true),
+        branch_false(branch_false) {}

-    /// @brief An identifier of topology, which will be executed when comparison returns true.
-    topology topology_true;
-    /// @brief An identifier of topology, which will be executed when comparison returns false.
-    topology topology_false;
-    /// @brief An identifier of primitive which contains compare values.
-    primitive_id compare_data;
-    /// @brief Used function during comparison.
-    cond_functions function;
-    /// @brief Offset for compare data.
-    tensor offset;
+    branch branch_true;
+    branch branch_false;

 protected:
-    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {compare_data}; }
+    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
 };
+
+static inline std::ostream& operator<< (std::ostream& os, condition::branch& info) {
+    os << info.str();
+    return os;
+}
 }  // namespace cldnn
  /// @}
  /// @}
--- a/src/plugins/intel_gpu/src/graph/condition.cpp
+++ b/src/plugins/intel_gpu/src/graph/condition.cpp
@ -3,7 +3,7 @@
 //

 #include "condition_inst.h"
-
+#include "program_node.h"
 #include "intel_gpu/runtime/error_handler.hpp"
 #include "json_object.h"
 #include "primitive_type_base.h"
@ -12,6 +12,39 @@
 namespace cldnn {
 GPU_DEFINE_PRIMITIVE_TYPE_ID(condition)

+const size_t idx_branch_true    = 0;
+const size_t idx_branch_false   = 1;
+
+static std::map<primitive_id, layout> get_out_layout_map(cldnn::program::ptr prog) {
+    std::map<primitive_id, layout> out_layout_map;
+    for (auto& o : prog->get_outputs()) {
+        out_layout_map.insert({o->id(), o->get_output_layout()});
+    }
+    return out_layout_map;
+}
+
+static std::map<primitive_id, layout> get_out_layout_map(cldnn::network::ptr net) {
+    std::map<primitive_id, layout> out_layout_map;
+    for (auto& o : net->get_outputs()) {
+        out_layout_map.insert({o->id(), o->get_output_layout()});
+    }
+    return out_layout_map;
+}
+
+static std::vector<layout> get_output_layouts(std::map<primitive_id, layout>&& outputs, const std::map<size_t, cldnn::primitive_id> &io_output_map) {
+    std::vector<layout> out_layouts;
+    for (auto out : outputs) {
+        for (auto& io_output : io_output_map) {
+            auto inner_prim_id = io_output.second;
+            if (out.first == inner_prim_id) {
+                out_layouts.push_back(out.second);
+            }
+        }
+    }
+    OPENVINO_ASSERT(out_layouts.size() > 0, "Not found any matched output");
+    return out_layouts;
+}
+
 /*
    Calc_output_layout method is called only when output layout is invalidated.
    It means, that it is called when:
@ -20,38 +53,155 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(condition)
    In this both cases, we need to recalc branch_true and branch_false.
    !* We can be sure, that this method was called AT LEAST once during graph compilation.*!
 */
-layout condition_inst::calc_output_layout(condition_node const& node, kernel_impl_params const& impl_param) {
-    assert(static_cast<bool>(impl_param.desc->output_data_types[0]) == false &&
-           "Output data type forcing is not supported for condition_node!");
-    node.set_branches();
+layout condition_inst::calc_output_layout(condition_node const& /* node */, kernel_impl_params const& impl_param) {
+    OPENVINO_ASSERT(static_cast<bool>(impl_param.desc->output_data_types[0]) == false, "Output data type forcing is not supported for condition_node!");
+    OPENVINO_ASSERT(impl_param.get_input_layout(0).count() == 1, "layout of compare_data of condition should be {1,1,1,1}");

-    auto branch_true_output = node.get_branch_true()->get_outputs();
-    auto branch_false_output = node.get_branch_false()->get_outputs();
-    CLDNN_ERROR_NOT_EQUAL(impl_param.desc->id,
-                          "Count of branch true outputs",
-                          branch_true_output.size(),
-                          "expected outputs size",
-                          1,
-                          "Branch true should have one output.");
-    CLDNN_ERROR_NOT_EQUAL(impl_param.desc->id,
-                          "Count of branch false outputs",
-                          branch_false_output.size(),
-                          "expected outputs size",
-                          1,
-                          "Branch false should have one output.");
+    OPENVINO_ASSERT(impl_param.inner_progs.size() == 2, "If(Condition) contains incorrect number of inner programs ", impl_param.inner_progs.size());
+    OPENVINO_ASSERT(impl_param.io_output_maps.size() == 2, "If(Condition) contains incorrect number of io output maps ", impl_param.io_output_maps.size());
+
+    auto layouts_true  = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_true]),  impl_param.io_output_maps[idx_branch_true]);
+    auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);

-    auto layout_true = branch_true_output.at(0)->get_output_layout();
-    auto layout_false = branch_false_output.at(0)->get_output_layout();
    CLDNN_ERROR_LAYOUT_MISMATCH(impl_param.desc->id,
                                "Branch true output layout",
-                                layout_true,
+                                layouts_true[0],
                                "branch false output layout",
-                                layout_false,
+                                layouts_false[0],
                                "Layout of the branches should be the same.");

-    return layout_true;
+    return layouts_true[0];
 }

+template <class T>
+static bool convert_data(memory::ptr mem, stream& stream) {
+    mem_lock<T, mem_lock_type::read> lock_data{mem, stream};
+    return (static_cast<float>(*lock_data.data()) != 0.f);
+}
+
+bool condition_inst::get_pred_from_memory(memory::ptr mem, stream& stream) {
+    auto mem_dt = mem->get_layout().data_type;
+    switch (mem_dt) {
+        case cldnn::data_types::f32:
+            return convert_data<float>(mem, stream);
+        case cldnn::data_types::f16:
+            return convert_data<half_t>(mem, stream);
+        case cldnn::data_types::i64:
+            return convert_data<int64_t>(mem, stream);
+        case cldnn::data_types::i32:
+            return convert_data<int32_t>(mem, stream);
+        case cldnn::data_types::i8:
+            return convert_data<int8_t>(mem, stream);
+        case cldnn::data_types::u8:
+            return convert_data<uint8_t>(mem, stream);
+        case cldnn::data_types::bin:
+        default:
+            return convert_data<uint32_t>(mem, stream);
+    }
+}
+
+static ov::PartialShape resolve_shape(const ov::PartialShape& true_pshape, const ov::PartialShape& false_pshape) {
+    // true_pshape - shape of output from then_body
+    // false_pshape - shape of output from else_body
+    auto then_rank = true_pshape.rank();
+    auto else_rank = false_pshape.rank();
+
+    // if rangs of shapes are not equal or rang of one of them is dynamic function
+    // return shape with dynamic rank
+    if (then_rank.is_dynamic() || else_rank.is_dynamic()) {
+        return ov::PartialShape::dynamic();
+    }
+    if (then_rank.get_length() != else_rank.get_length()) {
+        // Union of scalar and 1D case
+        if (then_rank.get_length() <= 1 && else_rank.get_length() <= 1) {
+            return ov::PartialShape::dynamic(1);
+        } else {
+            return ov::PartialShape::dynamic();
+        }
+    }
+    std::vector<ov::Dimension> new_dims;
+
+    // If rangs are equal each dimesion of then_body output is union with each dimension of
+    // else_body
+    for (auto then_it = true_pshape.cbegin(), else_it = false_pshape.cbegin(); then_it != true_pshape.cend();
+         then_it++, else_it++) {
+        if ((*then_it).is_dynamic() || (*else_it).is_dynamic()) {
+            new_dims.push_back(ov::Dimension::dynamic());
+        } else if (*then_it == *else_it) {
+            new_dims.emplace_back(*then_it);
+        } else {
+            auto dim_min = std::min((*then_it).get_min_length(), (*else_it).get_min_length());
+            auto dim_max = std::max((*then_it).get_min_length(), (*else_it).get_min_length());
+            new_dims.emplace_back(dim_min, dim_max);
+        }
+    }
+
+    return ov::PartialShape(new_dims);
+}
+
+static std::vector<layout> resolve_shape(std::vector<layout>& target_list, std::vector<layout>& other_list) {
+    std::vector<layout> resolved_layout;
+    for (size_t i = 0; i < target_list.size(); i++) {
+        auto target = target_list[i];
+        auto other = other_list[i];
+        auto target_pshape  = target.get_partial_shape();
+        auto other_pshape   = other.get_partial_shape();
+        auto target_rank    = target_pshape.rank();
+        auto other_rank     = other_pshape.rank();
+        if (target_rank.get_length() == 0 && other_rank.get_length() == 1) {
+            resolved_layout.push_back({ov::PartialShape{1}, target.data_type, target.format});
+        } else {
+            resolved_layout.push_back(target);
+        }
+    }
+    return resolved_layout;
+}
+
+template<typename ShapeType>
+std::vector<layout> condition_inst::calc_output_layouts(condition_node const& /* node */, kernel_impl_params const& impl_param) {
+    if (impl_param.inner_nets.empty()) {
+        OPENVINO_ASSERT(impl_param.inner_progs.empty() == false, "The count of inner programs should not be zero");
+        auto layouts_true  = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_true]),  impl_param.io_output_maps[idx_branch_true]);
+        auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);
+
+        const size_t num_outputs = impl_param.output_layouts.size();
+        OPENVINO_ASSERT((num_outputs == layouts_true.size() && num_outputs == layouts_false.size()),
+                            "The number of outputs for each branch should be same!");
+        std::vector<layout> output_layouts;
+
+        for (size_t i = 0; i < num_outputs; i++) {
+            if (layouts_true[i] == layouts_false[i]) {
+                output_layouts.push_back(layouts_true[i]);
+            } else {
+                OPENVINO_ASSERT(layouts_true[i].data_type == layouts_false[i].data_type, "data type of each branches should be same");
+                OPENVINO_ASSERT(layouts_true[i].format == layouts_false[i].format, "output format of each branches should be same");
+                auto out_layout = resolve_shape(layouts_true[i].get_partial_shape(), layouts_false[i].get_partial_shape());
+                output_layouts.push_back(layout{out_layout, layouts_true[i].data_type, layouts_true[i].format });
+            }
+        }
+
+        return output_layouts;
+    } else {
+        auto layouts_true  = get_output_layouts(get_out_layout_map(impl_param.inner_nets[idx_branch_true]),  impl_param.io_output_maps[idx_branch_true]);
+        auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_nets[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);
+        const size_t num_outputs = impl_param.output_layouts.size();
+        OPENVINO_ASSERT((num_outputs == layouts_true.size() && num_outputs == layouts_false.size()),
+                            "The number of outputs for each branch should be same!");
+
+        auto& memory_deps = impl_param.memory_deps;
+        OPENVINO_ASSERT(memory_deps.count(0) > 0, "The count of memory deps should not be zero");
+        auto mem_ptr = memory_deps.at(0);
+        auto pred = condition_inst::get_pred_from_memory(mem_ptr, impl_param.get_stream());
+        if (pred) {
+            return resolve_shape(layouts_true, layouts_false);
+        } else {
+            return resolve_shape(layouts_false, layouts_true);
+        }
+    }
+}
+
+template std::vector<layout> condition_inst::calc_output_layouts<ov::PartialShape>(condition_node const& node, const kernel_impl_params& impl_param);
+
 std::string condition_inst::to_string(condition_node const& node) {
    auto desc = node.get_primitive();
    auto node_info = node.desc_to_json();
@ -69,23 +219,35 @@ Condition primitive is resuing memory with the input.
 */
 condition_inst::typed_primitive_inst(network& network, condition_node const& node)
    : parent(network, node),
-      _net_true(network::allocate_network(node.get_program().get_engine(), node.get_branch_true(), true)),
-      _net_false(network::allocate_network(node.get_program().get_engine(), node.get_branch_false(), true)) {
-    auto compare_tensor = node.compare().get_output_layout().get_tensor();
-    auto input_tensor = node.input().get_output_layout().get_tensor();
-    CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(),
-                                          "Compare tensor",
-                                          compare_tensor,
-                                          "input tensor",
-                                          input_tensor,
-                                          "Compare primitive is too big.");
+      _net_true(network::allocate_network(node.get_program().get_engine(), node.get_branch_true().inner_program)),
+      _net_false(network::allocate_network(node.get_program().get_engine(), node.get_branch_false().inner_program)) {
+    this->set_inner_networks({_net_true, _net_false});
+}

-    auto compare_with_offster_tensor = compare_tensor + node.offset();
-    CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(),
-                                          "Offset with compare tensor",
-                                          compare_with_offster_tensor,
-                                          "input tensor",
-                                          input_tensor,
-                                          "Offset is too big.");
+void condition_inst::update_output_layout() {
+    auto memory_deps = _node->get_const_memory_deps();
+    for (auto& i : _node->get_shape_infer_dependencies()) {
+        if (memory_deps.count(i) > 0 || i >= _node->get_dependencies().size()) {
+            continue;
+        }
+        auto dep_id = _node->get_dependency(i).id();
+
+        auto dep_mem = _network.get_output_memory(dep_id);
+        memory_deps.insert({i, dep_mem});
+    }
+    _impl_params->memory_deps = memory_deps;
+
+    auto new_layouts = _node->type()->calc_output_layouts(*_node, *_impl_params);
+    if (new_layouts.empty()) {
+        auto new_layout = _node->type()->calc_output_layout(*_node, *_impl_params);
+        new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[0], new_layout.data_padding);
+        _impl_params->output_layouts[0] = new_layout;
+    } else {
+        for (size_t i = 0; i != new_layouts.size(); ++i) {
+            auto new_layout = new_layouts[i];
+            new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[i], new_layout.data_padding);
+            _impl_params->output_layouts[i] = new_layout;
+        }
+    }
 }
 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@ -12,6 +12,7 @@
 #include "quantize_inst.h"
 #include "arg_max_min_inst.h"
 #include "fully_connected_inst.h"
+#include "condition_inst.h"
 #include "program_node.h"

 #include <iostream>
@ -72,6 +73,9 @@ void compile_graph::run(program& p) {
        if (node->is_dynamic() && !is_planar)
            can_select_impl = false;

+        if (node->is_type<condition>())
+            can_select_impl = true;
+
        if (can_select_impl) {
            tasks.push_back([node, &exception, change_initial_impl, original_impl_type] {
                try {
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/update_inner_program_io_map.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/update_inner_program_io_map.cpp
@ -0,0 +1,41 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "pass_manager.h"
+#include "program_helpers.h"
+#include "loop_inst.h"
+#include "condition_inst.h"
+
+#include <iterator>
+#include <vector>
+#include <memory>
+
+using namespace cldnn;
+
+void update_inner_program_io_map::run(program& p) {
+    for (auto& node : p.get_processing_order()) {
+        if (node->is_type<loop>()) {
+            loop_node& node2 = node->as<loop>();
+            for (const auto& info : p.get_optimized()) {
+                if (info.second.size() != 1) {
+                    continue;
+                }
+                const primitive_id& old_primitive_id = info.first;
+                const primitive_id& new_primitive_id = info.second.front();
+                node2.update_primitive_map(old_primitive_id, new_primitive_id);
+                node2.update_primitive_map(old_primitive_id, new_primitive_id, false); // update internal id
+            }
+        } else if (node->is_type<condition>()) {
+            condition_node& cond = node->as<condition>();
+            for (const auto& info : p.get_optimized()) {
+                if (info.second.size() != 1) {
+                    continue;
+                }
+                const primitive_id& old_primitive_id = info.first;
+                const primitive_id& new_primitive_id = info.second.front();
+                cond.update_primitive_map(old_primitive_id, new_primitive_id);
+            }
+        }
+    }
+}
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/update_loop_primitive_map.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/update_loop_primitive_map.cpp
@ -1,31 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "pass_manager.h"
-#include "program_helpers.h"
-#include "loop_inst.h"
-
-#include <iterator>
-#include <vector>
-#include <memory>
-
-using namespace cldnn;
-
-void update_loop_primitive_map::run(program& p) {
-    for (auto& node : p.get_processing_order()) {
-        if (!node->is_type<loop>()) {
-            continue;
-        }
-        loop_node& node2 = node->as<loop>();
-        for (const auto& info : p.get_optimized()) {
-            if (info.second.size() != 1) {
-                continue;
-            }
-            const primitive_id& old_primitive_id = info.first;
-            const primitive_id& new_primitive_id = info.second.front();
-            node2.update_primitive_map(old_primitive_id, new_primitive_id);
-            node2.update_primitive_map(old_primitive_id, new_primitive_id, false); // update internal id
-        }
-    }
-}
--- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
@ -36,16 +36,34 @@ struct condition_impl : typed_primitive_impl<condition> {
        auto ev = instance.get_network().get_stream().create_user_event(false);
        set_node_params(instance.get_node());

-        bool exec_branch = choose_branch_to_exec(instance);
-        memory::ptr memory_to_copy;
-        if (exec_branch)
-            memory_to_copy = execute_branch(instance.get_net_true(), instance.result_id(), instance.input_memory_ptr());
-        else
-            memory_to_copy = execute_branch(instance.get_net_false(), instance.result_id(), instance.input_memory_ptr());
-        // just copy memory
-        mem_lock<float, mem_lock_type::read> inp_ptr{memory_to_copy, instance.get_network().get_stream()};
-        mem_lock<float, mem_lock_type::write> out_ptr{instance.output_memory_ptr(), instance.get_network().get_stream()};
-        std::copy(inp_ptr.begin(), inp_ptr.end(), out_ptr.begin());
+        auto pred = condition_inst::get_pred_from_memory(instance.pred_memory_ptr(), instance.get_network().get_stream());
+        network::ptr executed_net = pred? instance.get_net_true() : instance.get_net_false();
+        auto branch = pred? instance.get_branch_true() : instance.get_branch_false();
+
+        // Set input memory of inner network before its execution
+        for (size_t mem_idx = 0; mem_idx < instance.inputs_memory_count(); mem_idx++) {
+            const primitive_id& input_external_id = instance.dependencies().at(mem_idx).first->id();
+            auto iter = branch.input_map.find(input_external_id);
+            if (iter != branch.input_map.end()) {
+                const primitive_id& input_internal_id = iter->second;
+                auto mem_ptr = instance.input_memory_ptr(mem_idx);
+                executed_net->set_input_data(input_internal_id, mem_ptr);
+            }
+        }
+
+        executed_net->execute({});
+
+        // Update output layout of impl_param in condition_inst
+        instance.update_output_layout();
+
+        // Set output memory of condition_inst to inner network output memory after inner network execution
+        for (auto out_mem_map : branch.output_map) {
+            auto out_mem_idx = out_mem_map.first;
+            auto inner_out_id = out_mem_map.second;
+            auto mem_ptr = executed_net->get_output(inner_out_id).get_memory();
+            instance.set_output_memory(mem_ptr, false, out_mem_idx);
+        }
+
        ev->set();
        return ev;
    }
@ -58,85 +76,22 @@ struct condition_impl : typed_primitive_impl<condition> {

 private:
    primitive_id _node_id;
-
-    /*
-    Add functions here.
-    */
-    bool check_condition(const float value_1, const float value_2, const cond_functions& func) const {
-        switch (func) {
-            case cond_functions::EQUAL:
-                return value_1 == value_2;
-                break;
-            case cond_functions::GREATER:
-                return value_1 > value_2;
-                break;
-            case cond_functions::LESS:
-                return value_1 < value_2;
-                break;
-            default:
-                throw("Unknown comparision function for: " + _node_id);
-                break;
-        }
-    }
-
-    /*
-    Loop over memory and check condition.
-    Returns boolean flag, which says what branch should be executed.
-    */
-    bool choose_branch_to_exec(condition_inst& instance) const {
-        mem_lock<float, mem_lock_type::read> lock_compare_data{instance.compare_memory_ptr(), instance.get_network().get_stream()};
-        auto compare_layout = instance.compare_memory().get_layout();
-        auto compare_ptr = lock_compare_data.begin();
-
-        mem_lock<float, mem_lock_type::read> lock_input{instance.input_memory_ptr(), instance.get_network().get_stream()};
-        auto input_layout = instance.input_memory().get_layout();
-        auto input_ptr = lock_input.begin();
-
-        auto function = instance.argument->function;
-        auto& offset = instance.argument->offset;
-
-        for (auto b = 0; b < compare_layout.batch(); b++) {
-            for (auto f = 0; f < compare_layout.feature(); f++) {
-                for (auto z = 0; z < compare_layout.spatial(2); z++) {
-                    for (auto y = 0; y < compare_layout.spatial(1); y++) {
-                        for (auto x = 0; x < compare_layout.spatial(0); x++) {
-                            tensor input_tensor{
-                                batch(b + offset.batch[0]),
-                                feature(f + offset.feature[0]),
-                                spatial(x + offset.spatial[0], y + offset.spatial[1], z + offset.spatial[2], 0) };
-                            auto input_idx = input_layout.get_linear_offset(input_tensor);
-                            tensor compare_tensor{ batch(b), feature(f), spatial(x, y, z, 0) };
-                            auto compare_idx = compare_layout.get_linear_offset(compare_tensor);
-                            if (!check_condition(input_ptr[input_idx], compare_ptr[compare_idx], function))
-                                return false;
-                        }
-                    }
-                }
-            }
-        }
-        return true;
-    }
-
-    memory::ptr execute_branch(network::ptr branch,
-                           const primitive_id& input_id,
-                           memory::ptr input_memory) const {
-        branch->set_input_data(input_id, input_memory);
-        branch->execute({});
-        return branch->get_outputs().at(0)->output_memory_ptr();
-    }
 };

 namespace detail {

 attach_condition_common::attach_condition_common() {
-    implementation_map<condition>::add(impl_types::common, condition_impl::create, {
-        std::make_tuple(data_types::f32, format::bfyx),
-        std::make_tuple(data_types::f32, format::yxfb),
-    });
+    implementation_map<condition>::add(impl_types::common,
+                                    shape_types::dynamic_shape,
+                                    condition_impl::create,
+                                    {},
+                                    {});
+    implementation_map<condition>::add(impl_types::common, condition_impl::create, {});
 }

 }  // namespace detail
 }  // namespace common
 }  // namespace cldnn

+// TODO: Change code like cldnn::loop
 ASSIGN_TYPE_NAME(cldnn::common::condition_impl)
--- a/src/plugins/intel_gpu/src/graph/include/condition_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/condition_inst.h
@ -18,63 +18,43 @@ struct typed_program_node<condition> : public typed_program_node_base<condition>
 private:
    using parent = typed_program_node_base<condition>;

-    class branch {
-    public:
-        explicit branch(const topology& tpl) : _topology(tpl) {}
-
-        void set(const program_node& node) {
-            add_or_change_input_layout(node);
-            _program = program::build_program(node.get_program().get_engine(),
-                                              _topology,
-                                              node.get_program().get_config(),
-                                              true);  // rebuild program
-        }
-        program::ptr get() const { return _program; }
-
-    private:
-        topology _topology;
-        program::ptr _program = nullptr;
-
-        void add_or_change_input_layout(const program_node& node) {
-            auto layout = node.get_input_layout(0);
-            auto input_id = node.as<condition>().result_id();
-            if (_topology.get_primitives().count(input_id) == 0) {
-                _topology.add_primitive(std::make_shared<input_layout>(input_id, layout));
-                for (auto& prim : _topology.get_primitives()) {
-                    for (auto& inp : prim.second->input) {
-                        if (inp.pid == node.id())
-                            inp.pid = input_id;
-                    }
-                }
-            } else {
-                _topology.change_input_layout(input_id, layout);
-            }
-        }
-    };
-
 public:
    using parent::parent;

-    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
+    typed_program_node(std::shared_ptr<condition> prim, program& prog)
        : parent(prim, prog),
-          _branch_true(this->get_primitive()->topology_true),
-          _branch_false(this->get_primitive()->topology_false) {}
+          _branch_true(prim->branch_true),
+          _branch_false(prim->branch_false) {}

-    program_node& input() const { return get_dependency(0); }
-    program_node& compare() const { return get_dependency(1); }
-    cond_functions func() const { return get_primitive()->function; }
-    tensor offset() const { return get_primitive()->offset; }
-    void set_branches() const {
-        _branch_true.set(*this);
-        _branch_false.set(*this);
+    condition::branch get_branch_true() const { return _branch_true; }
+    condition::branch get_branch_false() const { return _branch_false; }
+
+    using parent::get_kernel_impl_params;
+    std::unique_ptr<kernel_impl_params> get_kernel_impl_params(const std::vector<layout>& in_layouts, const std::vector<layout>& out_layouts) const override {
+        auto params = parent::get_kernel_impl_params(in_layouts, out_layouts);
+        params->inner_progs = { _branch_true.inner_program, _branch_false.inner_program };
+        params->io_output_maps = { _branch_true.output_map, _branch_false.output_map };
+        return params;
+    }
+
+    void update_primitive_map(const primitive_id& prevID, const primitive_id& newID) {
+        auto replace_external_id = [&](std::map<primitive_id, primitive_id>& input_map, const primitive_id& prevID, const primitive_id& newID) {
+            auto iter = input_map.find(prevID);
+            if (iter != input_map.end()) {
+                primitive_id new_external_id = newID;
+                primitive_id internal_id = iter->second;
+                input_map.erase(iter);
+                input_map.insert({new_external_id, internal_id});
+            }
+        };
+
+        replace_external_id(_branch_true.input_map, prevID, newID);
+        replace_external_id(_branch_false.input_map, prevID, newID);
    }
-    program::ptr get_branch_true() const { return _branch_true.get(); }
-    program::ptr get_branch_false() const { return _branch_false.get(); }
-    primitive_id result_id() const { return id() + ":result"; }

 private:
-    mutable branch _branch_true;
-    mutable branch _branch_false;
+    condition::branch& _branch_true;
+    condition::branch& _branch_false;
 };

 using condition_node = typed_program_node<condition>;
@ -85,17 +65,20 @@ class typed_primitive_inst<condition> : public typed_primitive_inst_base<conditi
    using parent::parent;

 public:
-    static layout calc_output_layout(condition_node const& node, kernel_impl_params const& impl_param);
+    template<typename ShapeType>
+    static std::vector<layout> calc_output_layouts(condition_node const& /*node*/, kernel_impl_params const& impl_param);
+    static layout calc_output_layout(condition_node const& /* node */, kernel_impl_params const& impl_param);
    static std::string to_string(condition_node const& node);
+    static bool get_pred_from_memory(memory::ptr mem, stream& stream);
    typed_primitive_inst(network& network, condition_node const& node);

-    memory::ptr input_memory_ptr() const { return dep_memory_ptr(0); }
-    memory::ptr compare_memory_ptr() const { return dep_memory_ptr(1); }
-    memory& input_memory() const { return dep_memory(0); }
-    memory& compare_memory() const { return dep_memory(1); }
+    memory::ptr pred_memory_ptr() const { return dep_memory_ptr(0); }
    network::ptr get_net_true() const { return _net_true; }
    network::ptr get_net_false() const { return _net_false; }
-    primitive_id result_id() const { return node->result_id(); }
+    condition::branch get_branch_true() const { return node->get_branch_true(); }
+    condition::branch get_branch_false() const { return node->get_branch_false(); }
+
+    void update_output_layout();

 private:
    network::ptr _net_true;
--- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h
@ -308,7 +308,7 @@ public:
        std::vector<primitive_id> output_names_vec(output_names.begin(), output_names.end());
        auto config = get_program().get_config();
        config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
-        body_program = program::build_program(get_program().get_engine(), body, config, false, false, true);
+        body_program = program::build_program(get_program().get_engine(), body, config, get_program().get_task_executor(), false, false, true);
    }

    const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; }
--- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h
+++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
@ -378,9 +378,9 @@ public:
    void run(program& p) override;
 };

-class update_loop_primitive_map : public base_pass {
+class update_inner_program_io_map : public base_pass {
 public:
-    update_loop_primitive_map() : base_pass("update_loop_primitive_map") {}
+    update_inner_program_io_map() : base_pass("update_inner_program_io_map") {}

 private:
    void run(program& p) override;
--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@ -232,7 +232,7 @@ public:
    bool is_constant() const { return _is_constant; }
    bool needs_completion_event() const { return _needs_completion_event; }
    bool has_unfused_subgraph() const { return (_unfused_subgraph != nullptr); }
-
+    bool has_inner_networks() const;
    void allocate_internal_buffers();
    static memory::ptr allocate_output(engine& engine, memory_pool& pool, const program_node& _node,
            const kernel_impl_params& impl_params, uint32_t net_id, bool is_internal, size_t idx = 0, bool reset_mem = true, bool is_output_buffer = false);
@ -257,6 +257,9 @@ public:
    void set_output_layout(const layout& new_out_lay, size_t idx = 0) {
        _impl_params->output_layouts[idx] = new_out_lay;
    }
+    void set_inner_networks(const std::vector<network::ptr> inner_nets) {
+        _impl_params->inner_nets = inner_nets;
+    }
 #ifdef ENABLE_ONEDNN_FOR_GPU
    std::vector<cldnn::fused_primitive_desc_onednn>& get_fused_primitives_onednn() const { return _impl_params->fused_desc_onednn; }
 #endif // ENABLE_ONEDNN_FOR_GPU
--- a/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h
@ -16,6 +16,8 @@ class typed_primitive_inst<roi_align> : public typed_primitive_inst_base<roi_ali
    using parent::parent;

 public:
+    template<typename ShapeType>
+    static std::vector<layout> calc_output_layouts(roi_align_node const& node, kernel_impl_params const& impl_param);
    static layout calc_output_layout(roi_align_node const& node, kernel_impl_params const& impl_param);
    static std::string to_string(roi_align_node const& node);

--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@ -13,6 +13,7 @@
 #include "reshape_inst.h"
 #include "arg_max_min_inst.h"
 #include "shape_of_inst.h"
+#include "condition_inst.h"
 #include <sstream>

 #include "gemm_inst.h"
@ -1410,6 +1411,8 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format

    if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
        preferred_impl = _forcing_map.at(node.id()).second;
+    } else if (node.is_type<condition>()) {
+        preferred_impl = impl_types::common;
    } else if (node.is_type<detection_output>()) {
        const auto& program = node.get_program();
        const auto& device_info = program.get_engine().get_device_info();
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@ -342,8 +342,9 @@ network::network(program::ptr program, const ExecutionConfig& config, stream::pt
 network::network(engine& engine,
                 const topology& topo,
                 const ExecutionConfig& config,
-                 bool is_internal)
-    : network(program::build_program(engine, topo, config, is_internal), config, engine.create_stream(config), is_internal) {}
+                 bool is_internal,
+                 InferenceEngine::CPUStreamsExecutor::Ptr task_executor)
+    : network(program::build_program(engine, topo, config, task_executor, is_internal), config, engine.create_stream(config), is_internal) {}

 network::network(engine& engine,
                 const std::set<std::shared_ptr<program_node>>& nodes,
@ -653,8 +654,9 @@ network::ptr network::allocate_network(engine& engine, program::ptr program, boo
 network::ptr network::build_network(engine& engine,
                                    const topology& topology,
                                    const ExecutionConfig& config,
+                                    std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
                                    bool is_internal) {
-    return std::make_shared<network>(engine, topology, config, is_internal);
+    return std::make_shared<network>(engine, topology, config, is_internal, task_executor);
 }

 network::ptr network::build_network(engine& engine,
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@ -21,6 +21,7 @@
 #include "gemm_inst.h"
 #include "assign_inst.h"
 #include "read_value_inst.h"
+#include "condition_inst.h"
 #include "experimental_detectron_roi_feature_extractor_inst.hpp"
 #include "compilation_context.hpp"
 #include "implementation_map.hpp"
@ -619,6 +620,10 @@ void primitive_inst::do_runtime_in_place_concat() {
    GPU_DEBUG_TRACE_DETAIL << "[In place concat] " << concat_inst->id() << ": can_be_optimized " << std::endl;
 }

+bool primitive_inst::has_inner_networks() const {
+    return (_impl_params->inner_nets.size() > 0);
+}
+
 event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
    const auto primitive_id = id();
    OPENVINO_ASSERT(_has_valid_input, primitive_id, " has invalid/unset input");
@ -626,7 +631,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {

    bool need_args_update = false;
    std::vector<event::ptr> dependencies;
-    if (is_dynamic()) {
+    if (is_dynamic() && !has_inner_networks()) {
        do_runtime_in_place_concat();
        OPENVINO_ASSERT(_node != nullptr, "[GPU] Invalid primitive_inst object for dynamic shapes case: program_node can't be null");
        update_shape();
@ -679,11 +684,11 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
                    dependencies.push_back(ev_reset);
            }
        }
+
+        OPENVINO_ASSERT(_impl_params->get_output_layout().is_static(),
+                        "[GPU] Can't execute ", primitive_id, " primitive as output layout is dynamic in runtime");
    }
    update_shape_done_by_other = false; // reset
-    OPENVINO_ASSERT(_impl_params->get_output_layout().is_static(),
-                    "[GPU] Can't execute ", primitive_id, " primitive as output layout is dynamic in runtime");
-
    OPENVINO_ASSERT(_impl != nullptr, "[GPU] Implementation is nullptr for ", primitive_id,  " primitive");

    // Output buffer may be changed under the following conditions, so we need to set args to kernel on each iteration
@ -1253,7 +1258,7 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() {
            ov::intel_gpu::allow_static_input_reorder(true),
            ov::intel_gpu::allow_new_shape_infer(true)
        };
-        auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, true, false);
+        auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, get_network().get_program()->get_task_executor(), true, false);

        _unfused_subgraph = network::allocate_network(get_network().get_stream_ptr(), prog, true, get_network().is_primary_stream());
    }
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@ -66,6 +66,7 @@
 #include "loop_inst.h"
 #include "reverse_inst.h"
 #include "unique_inst.hpp"
+#include "condition_inst.h"
 #include "to_string_utils.h"

 // TODO: Remove once we have interface for kernels cache
@ -103,15 +104,58 @@
 using namespace cldnn;
 using namespace ov::intel_gpu;

+static void adjust_num_cores(InferenceEngine::CPUStreamsExecutor::Config& config) {
+    if (InferenceEngine::getAvailableCoresTypes().size() == 1) {
+        return;
+    }
+
+    const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores();
+    const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true);
+    const auto total_num_little_cores = total_num_cores - total_num_big_cores;
+    auto core_type = config._threadPreferredCoreType;
+
+    int num_cores = total_num_cores;
+    if (core_type == InferenceEngine::IStreamsExecutor::Config::BIG) {
+        num_cores = total_num_big_cores;
+    } else if (core_type == InferenceEngine::IStreamsExecutor::Config::LITTLE) {
+        num_cores = total_num_little_cores;
+    }
+
+    config._streams = std::min(config._streams, num_cores);
+}
+
+static InferenceEngine::CPUStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags) {
+    InferenceEngine::CPUStreamsExecutor::Config task_executor_config(tags, 1);
+    task_executor_config._streams = config.get_property(ov::compilation_num_threads);
+    auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority);
+    switch (priority) {
+        case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::LITTLE; break;
+        case ov::hint::Priority::MEDIUM: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::ANY; break;
+        case ov::hint::Priority::HIGH: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::BIG; break;
+        default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
+    }
+
+    adjust_num_cores(task_executor_config);
+
+    return task_executor_config;
+}
+
+std::shared_ptr<InferenceEngine::CPUStreamsExecutor> program::make_task_executor(const ExecutionConfig& config) {
+    InferenceEngine::CPUStreamsExecutor::Config task_executor_config = make_task_executor_config(config, "CPU Tasks executor for GPU plugin");
+    return std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
+}
+
 program::program(engine& engine_ref,
                 topology const& topology,
                 const ExecutionConfig& config,
+                 InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
                 bool is_internal,
                 bool no_optimizations,
                 bool is_body_program)
    : _engine(engine_ref),
      _stream(_engine.create_stream(config)),
      _config(config),
+      _task_executor(task_executor),
      processing_order(),
      is_body_program(is_body_program) {
    _config.apply_user_properties(_engine.get_device_info());
@ -162,7 +206,8 @@ void program::init_program() {

    pm = std::unique_ptr<pass_manager>(new pass_manager(*this));

-    _task_executor = make_task_executor(_config);
+    if (_task_executor == nullptr)
+        _task_executor = program::make_task_executor(_config);
    _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, _config, prog_id, _task_executor,
                                                                      kernel_selector::KernelBase::get_db().get_batch_header_str()));

@ -194,58 +239,27 @@ void program::init_primitives() {
    }
 }

-static void adjust_num_cores(InferenceEngine::CPUStreamsExecutor::Config& config) {
-    if (InferenceEngine::getAvailableCoresTypes().size() == 1) {
-        return;
-    }
-
-    const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores();
-    const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true);
-    const auto total_num_little_cores = total_num_cores - total_num_big_cores;
-    auto core_type = config._threadPreferredCoreType;
-
-    int num_cores = total_num_cores;
-    if (core_type == InferenceEngine::IStreamsExecutor::Config::BIG) {
-        num_cores = total_num_big_cores;
-    } else if (core_type == InferenceEngine::IStreamsExecutor::Config::LITTLE) {
-        num_cores = total_num_little_cores;
-    }
-
-    config._streams = std::min(config._streams, num_cores);
-}
-
-InferenceEngine::CPUStreamsExecutor::Config program::make_task_executor_config(const ExecutionConfig& config, std::string tags) const {
-    InferenceEngine::CPUStreamsExecutor::Config task_executor_config(tags, 1);
-    task_executor_config._streams = config.get_property(ov::compilation_num_threads);
-    auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority);
-    switch (priority) {
-        case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::LITTLE; break;
-        case ov::hint::Priority::MEDIUM: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::ANY; break;
-        case ov::hint::Priority::HIGH: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::BIG; break;
-        default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
-    }
-
-    adjust_num_cores(task_executor_config);
-
-    return task_executor_config;
-}
-
-std::shared_ptr<InferenceEngine::CPUStreamsExecutor> program::make_task_executor(const ExecutionConfig& config) const {
-    InferenceEngine::CPUStreamsExecutor::Config task_executor_config = make_task_executor_config(config, "CPU Tasks executor for GPU plugin");
-    return std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
-}
-
 kernels_cache& program::get_kernels_cache() const {
    return *_kernels_cache;
 }

+program::ptr program::build_program(engine& engine,
+                                    const topology& topology,
+                                    const ExecutionConfig& config,
+                                    InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
+                                    bool is_internal,
+                                    bool no_optimizations,
+                                    bool is_body_program) {
+    return std::make_shared<program>(engine, topology, config, task_executor, is_internal, no_optimizations, is_body_program);
+}
+
 program::ptr program::build_program(engine& engine,
                                    const topology& topology,
                                    const ExecutionConfig& config,
                                    bool is_internal,
                                    bool no_optimizations,
                                    bool is_body_program) {
-    return std::make_shared<program>(engine, topology, config, is_internal, no_optimizations, is_body_program);
+    return std::make_shared<program>(engine, topology, config, nullptr, is_internal, no_optimizations, is_body_program);
 }

 program::ptr program::build_program(engine& engine,
@ -597,8 +611,8 @@ void program::post_optimize_graph(bool is_internal) {
    if (_config.get_property(ov::intel_gpu::optimize_data))
        apply_opt_pass<remove_redundant_reorders>(lo, false, true, true); // pass to remove output reorders while all others graph optimizations were done

-    // update loop input/output primitive mappings
-    apply_opt_pass<update_loop_primitive_map>();
+    // update inner program input/output primitive mappings
+    apply_opt_pass<update_inner_program_io_map>();

    // Recalculate processing order after all graph transformation to keep optimal primitives ordering
    // for OOO queue
@ -1007,12 +1021,18 @@ bool program::extract(program_node& node) {
        if (user->is_type<loop>()) {
            loop_node& loop = *user;
            loop.update_primitive_map(node.id(), input.id());
+        } else if (user->is_type<condition>()) {
+            condition_node& cond = *user;
+            cond.update_primitive_map(node.id(), input.id());
        }

        for (auto& dep : node.dependencies) {
            if (dep.first->is_type<loop>()) {
                loop_node& loop = *dep.first;
                loop.update_primitive_map(node.id(), user->id());
+            } else if (dep.first->is_type<condition>()) {
+                condition_node& cond = *dep.first;
+                cond.update_primitive_map(node.id(), user->id());
            }
        }
    }
--- a/src/plugins/intel_gpu/src/graph/roi_align.cpp
+++ b/src/plugins/intel_gpu/src/graph/roi_align.cpp
@ -25,6 +25,19 @@ layout roi_align_inst::calc_output_layout(roi_align_node const& node, kernel_imp
                  {num_rois, num_channels, primitive->pooled_h, primitive->pooled_w});
 }

+template<typename ShapeType>
+std::vector<layout> roi_align_inst::calc_output_layouts(roi_align_node const& node, kernel_impl_params const& impl_param) {
+    auto primitive = impl_param.typed_desc<roi_align>();
+    auto input_layout = impl_param.get_input_layout(0);
+    auto rois_layout = impl_param.get_input_layout(1);
+    auto num_rois = rois_layout.get_partial_shape()[0];
+    auto num_channels = input_layout.get_partial_shape()[1];
+    return {layout({num_rois, num_channels, primitive->pooled_h, primitive->pooled_w}, input_layout.data_type, input_layout.format) };
+}
+
+template
+std::vector<layout> roi_align_inst::calc_output_layouts<ov::PartialShape>(roi_align_node const& node, const kernel_impl_params& impl_param);
+
 std::string roi_align_inst::to_string(roi_align_node const& node) {
    auto node_info = node.desc_to_json();
    json_composite roi_align_info;
--- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
@ -0,0 +1,91 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "intel_gpu/plugin/program.hpp"
+#include "ngraph/op/if.hpp"
+#include "ie_ngraph_utils.hpp"
+
+#include "intel_gpu/primitives/condition.hpp"
+
+namespace ov {
+namespace intel_gpu {
+
+const size_t idx_true = 0;
+const size_t idx_false = 1;
+
+static cldnn::condition::branch gen_branch(Program& p, const std::shared_ptr<ngraph::op::v8::If>& op, size_t idx) {
+    cldnn::condition::branch branch;
+    const auto& internal_body = (idx == idx_true)? op->get_then_body() : op->get_else_body();
+
+    InferenceEngine::CNNNetwork body_network(internal_body);
+    {
+        // CNNNetwork change the input/output data type to fp32 when input/output data type is fp16
+        // To run internal body, rollback input/output data to original one.
+        size_t tidx = 0;
+        auto& model_inputs = internal_body->get_parameters();
+        for (auto& in : body_network.getInputsInfo()) {
+            auto input_data_type = InferenceEngine::details::convertPrecision(model_inputs[tidx++]->get_output_tensor(0).get_element_type());
+            if (in.second->getPrecision() != input_data_type)
+                in.second->setPrecision(input_data_type);
+        }
+
+        tidx = 0;
+        for (auto& out : body_network.getOutputsInfo()) {
+            const auto& model_output = internal_body->get_output_op(tidx++);
+            auto output_data_type = InferenceEngine::details::convertPrecision(model_output->get_output_tensor(0).get_element_type());
+            if (out.second->getPrecision() != output_data_type)
+                out.second->setPrecision(output_data_type);
+        }
+    }
+
+    auto config = p.get_config();
+    config.set_property(ov::intel_gpu::max_dynamic_batch(1));
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic()));
+
+    Program prog(body_network, p.get_engine(), config, false, false, nullptr, nullptr, p.get_task_executor(), true);
+    branch.inner_program = prog.GetCompiledProgram();
+
+    auto& input_map = branch.input_map;
+    auto external_inputs = p.GetInputInfo(op);
+    auto internal_inputs = internal_body->get_parameters();
+    auto input_desc_vec = op->get_input_descriptions(static_cast<int>(idx));
+    for (auto& in_desc : input_desc_vec) {
+        const auto& external_id = external_inputs.at(in_desc->m_input_index).pid;
+        const auto& internal_id = layer_type_name_ID(internal_inputs.at(in_desc->m_body_parameter_index));
+        input_map.insert({external_id, internal_id});
+    }
+
+    auto& output_map = branch.output_map;
+    auto internal_outputs = internal_body->get_results();
+    auto output_desc_vec = op->get_output_descriptions(static_cast<int>(idx));
+    for (auto& out_desc : output_desc_vec) {
+        const auto& internal_id = layer_type_name_ID(internal_outputs.at(out_desc->m_body_value_index));
+        output_map.insert({out_desc->m_output_index, internal_id});
+    }
+
+    return branch;
+}
+
+static void CreateIfOp(Program& p, const std::shared_ptr<ngraph::op::v8::If>& op) {
+    auto inputs = p.GetInputInfo(op);
+    OPENVINO_ASSERT(inputs.size() >= 1, "Invalid inputs count (Not allowed no input)");
+    auto compare_node_pshape = op->get_input_partial_shape(0);
+    auto p_input_name = inputs[0].pid;
+    std::string type_name_str = op->get_input_node_ptr(0)->get_type_name();
+
+    const std::string layerName = layer_type_name_ID(op);
+    auto branch_true = gen_branch(p, op, idx_true);
+    auto branch_false = gen_branch(p, op, idx_false);
+
+    const cldnn::condition conditionPrimitive(layerName,
+                                inputs,
+                                branch_true,
+                                branch_false);
+
+    p.add_primitive(*op, conditionPrimitive);
+}
+
+REGISTER_FACTORY_IMPL(v8, If);
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/src/plugin/program.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program.cpp
@ -16,6 +16,8 @@
 #include "intel_gpu/primitives/mutable_data.hpp"
 #include "intel_gpu/primitives/data.hpp"

+#include <ie_system_conf.h>
+
 #ifdef __linux__
 # include <dlfcn.h>
 #endif
@ -121,11 +123,15 @@ bool Program::IsDynBatchModel(const std::shared_ptr<ov::Model>& model,

 Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
    bool createTopologyOnly, bool partialBuild,
-    InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
+    InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs,
+    InferenceEngine::CPUStreamsExecutor::Ptr task_executor, bool innerProgram)
    : m_curBatch(-1)
    , m_config(config)
    , m_engine(engine)
-    , queryMode(false) {
+    , queryMode(false)
+    , m_task_executor(task_executor) {
+    if (m_task_executor == nullptr)
+        m_task_executor = cldnn::program::make_task_executor(m_config);
    // Extract inputs/outputs info from CNNNetwork
    auto networkInputs = (inputs != nullptr) ? *inputs : network.getInputsInfo();
    auto networkOutputs = (outputs != nullptr) ? *outputs : network.getOutputsInfo();
@ -179,7 +185,8 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co
    int m_bv_sz = GetMaxBatchSizeForSingleProgram();
    m_max_batch = static_cast<int>(m_config.get_property(ov::intel_gpu::max_dynamic_batch));

-    if (dyn_shape_batch_found || m_max_batch > 1) {
+    // Do not apply dynamic batch for inner program (only single batch is allowed)
+    if (!innerProgram && (dyn_shape_batch_found || m_max_batch > 1)) {
        // compile log2 networks to serve dynamic batch requests
        for (int b = m_bv_sz - 1; b >= 0; b--) {
            inputLayouts.clear();
@ -290,7 +297,7 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co
            m_input_batch_dim = batch_dim;
        }
    } else {
-        m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));
+        m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild, innerProgram));
    }
 }

@ -301,6 +308,7 @@ Program::Program(cldnn::engine& engine, const ExecutionConfig& config,
        , m_config(config)
        , m_engine(engine)
        , queryMode(false) {
+    m_task_executor = cldnn::program::make_task_executor(m_config);
    if (inputs != nullptr)
        m_networkInputs = *inputs;
    if (outputs != nullptr)
@ -356,9 +364,11 @@ void Program::CleanupBuild() {
 std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::shared_ptr<ngraph::Node>>& ops,
                                                      InferenceEngine::InputsDataMap networkInputs,
                                                      InferenceEngine::OutputsDataMap networkOutputs,
-                                                      bool createTopologyOnly, bool partialBuild) {
+                                                      bool createTopologyOnly, bool partialBuild, bool innerProgram) {
    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::BuildProgram");
-
+    // std::cout << "BuildProgram " << createTopologyOnly << ", " << partialBuild << ", " << innerProgram << std::endl;
+    // In the case of inner program, allow_new_shape_infer flag is setted by outside of program.
+    // So, do not check allow_new_shape_infer for inner program build
    for (const auto& op : ops) {
        if (requires_new_shape_infer(*op)) {
            allow_new_shape_infer = true;
@ -366,6 +376,10 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
        }
    }

+    if (innerProgram) {
+        allow_new_shape_infer = (m_config.get_property(ov::intel_gpu::allow_new_shape_infer) || allow_new_shape_infer);
+    }
+
    m_config.set_property(ov::intel_gpu::partial_build_program(partialBuild));
    m_config.set_property(ov::intel_gpu::optimize_data(true));
    m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer));
@ -383,7 +397,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
        OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateProgram");
        cldnn::program::ptr program;
        try {
-            program = cldnn::program::build_program(m_engine, *m_topology, m_config);
+            program = cldnn::program::build_program(m_engine, *m_topology, m_config, get_task_executor());
        } catch (std::exception& e) {
            OPENVINO_ASSERT(false, "GPU program build failed!\n", e.what());
        }
--- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/condition.cpp
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/condition.cpp
@ -0,0 +1,696 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "shared_test_classes/base/utils/ranges.hpp"
+#include <common_test_utils/ov_tensor_utils.hpp>
+
+
+using namespace InferenceEngine;
+using namespace ov::test;
+
+namespace GPULayerTestsDefinitions {
+
+class InnerBodyGenerator {
+public:
+    using ptr = std::shared_ptr<InnerBodyGenerator>;
+
+enum InnerBodyType {
+    /**
+     * Simple inner body with single constant value
+    */
+    Type01 = 1,
+    /**
+     * Inner body with eltwise sum
+    */
+    Type02 = 2,
+    /**
+     * Inner body with eltwise multiply
+    */
+    Type03 = 3,
+    /**
+     * Inner body with eltwise sum and pooling
+     * output shape is different with type02 and type03 for same input shape
+    */
+    Type04 = 4,
+    /**
+     * Inner body with nested condition case
+     */
+    Type05 = 5
+};
+
+public:
+    InnerBodyGenerator() { }
+
+    virtual std::shared_ptr<ngraph::Function> get_function() { return _func; }
+    virtual std::shared_ptr<ngraph::opset9::Parameter> get_input() { return _param; }
+    virtual std::shared_ptr<ngraph::opset1::Result> get_result() { return _result; }
+
+    // virtual void create_body(ngraph::Shape input_shape, ngraph::element::Type prc) {
+    virtual void create_body(ov::PartialShape& input_shape, ngraph::element::Type prc) {
+        _func = generate(input_shape, prc);
+        _param = (_func->get_parameters().size() > 0)? _func->get_parameters().front() : nullptr;
+        _result = _func->get_results().front();
+    }
+
+protected:
+    virtual std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) = 0;
+
+    std::shared_ptr<ngraph::Function> _func;
+    std::shared_ptr<ngraph::opset9::Parameter> _param;
+    std::shared_ptr<ngraph::opset1::Result> _result;
+};
+
+class InnerBodyType01 : public InnerBodyGenerator {
+protected:
+    std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
+        auto constantA   = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 2), {2.0f});
+        constantA->set_friendly_name("body1_constantA");
+        auto constantB   = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 2), {12.0f});
+        constantB->set_friendly_name("body1_constantB");
+        auto add        = std::make_shared<ngraph::opset9::Add>(constantA, constantB);
+        add->set_friendly_name("body1_add");
+        auto result     = std::make_shared<ngraph::opset1::Result>(add);
+        auto o_layout = result->get_layout();
+        result->set_friendly_name("body1_result");
+        auto body       = std::make_shared<ngraph::Function>(
+            ngraph::OutputVector {result},
+            ngraph::ParameterVector{},
+            "constant");
+        return body;
+    }
+};
+
+class InnerBodyType02 : public InnerBodyGenerator {
+protected:
+    std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
+        auto constant   = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 10.0f);
+        constant->set_friendly_name("body2_const");
+        auto data       = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
+        data->set_friendly_name("body2_data");
+        auto sum        = std::make_shared<ngraph::opset9::Multiply>(data, constant);
+        sum->set_friendly_name("body2_mul");
+        auto result     = std::make_shared<ngraph::opset1::Result>(sum);
+        result->set_friendly_name("body2_result");
+        auto body       = std::make_shared<ngraph::Function>(
+            ngraph::OutputVector {result},
+            ngraph::ParameterVector{data},
+            "eltwise_mul");
+        return body;
+    }
+};
+
+class InnerBodyType03 : public InnerBodyGenerator {
+protected:
+    std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
+        auto constant   = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
+        constant->set_friendly_name("body3_constant");
+        auto data       = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
+        data->set_friendly_name("body3_data");
+        auto add        = std::make_shared<ngraph::opset9::Add>(data, constant);
+        add->set_friendly_name("body3_add");
+        auto result     = std::make_shared<ngraph::opset1::Result>(add);
+        result->set_friendly_name("body3_result");
+        auto body       = std::make_shared<ngraph::Function>(
+            ngraph::OutputVector {result},
+            ngraph::ParameterVector{data},
+            "eltwise_sum");
+        return body;
+    }
+};
+
+class InnerBodyType04 : public InnerBodyGenerator {
+protected:
+    std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
+        auto scale      = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
+        scale->set_friendly_name("body4_scale");
+        auto data       = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
+        data->set_friendly_name("body4_data");
+        auto mul        = std::make_shared<ngraph::opset9::Multiply>(data, scale);
+        mul->set_friendly_name("body4_mul");
+        auto pooling    = generate_pooling(mul, input_shape);
+        pooling->set_friendly_name("body4_pool");
+        auto result     = std::make_shared<ngraph::opset1::Result>(pooling);
+        result->set_friendly_name("body4_result");
+        auto body       = std::make_shared<ngraph::Function>(
+            ngraph::OutputVector {result},
+            ngraph::ParameterVector{data},
+            "eltwise_mul_pooling");
+        return body;
+    }
+
+
+    struct poolSpecificParams {
+            ngraph::helpers::PoolingTypes   pooling_type;   // Pooling type, max or avg
+            std::vector<size_t>             kernel_size;    // Kernel size
+            std::vector<size_t>             stride;         // Stride
+            std::vector<size_t>             pad_begin;      // Pad begin
+            std::vector<size_t>             pad_end;        // Pad end
+            ngraph::op::RoundingType        rounding_type;  // Rounding type
+            ngraph::op::PadType             pad_type;       // Pad type
+            bool                            exclued_pad;    // Exclude pad
+    };
+
+    std::shared_ptr<ov::Node> generate_pooling(const ngraph::Output<ov::Node> &in, ov::PartialShape& input_shape) {
+        poolSpecificParams params;
+        switch (input_shape.rank().get_length()) {
+            case 5:
+            {
+                params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
+                                                    {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0},
+                                                    ngraph::op::RoundingType::CEIL,
+                                                    ngraph::op::PadType::SAME_LOWER, true };
+                break;
+            }
+            case 4:
+            {
+                params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
+                                                    {2, 2}, {2, 2}, {0, 0}, {0, 0},
+                                                    ngraph::op::RoundingType::CEIL,
+                                                    ngraph::op::PadType::SAME_LOWER, true };
+                break;
+            }
+            case 3:
+            {
+                params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
+                                                    {2}, {2}, {0}, {0},
+                                                    ngraph::op::RoundingType::CEIL,
+                                                    ngraph::op::PadType::SAME_LOWER, true };
+                break;
+            }
+            default:
+            {
+                OPENVINO_ASSERT(false, "Not allowed other rank");
+            }
+        }
+        return ngraph::builder::makePooling(in, params.stride, params.pad_begin,
+                                            params.pad_end, params.kernel_size, params.rounding_type,
+                                            params.pad_type, params.exclued_pad, params.pooling_type);
+    }
+};
+
+class InnerBodyType05 : public InnerBodyGenerator {
+protected:
+    std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
+        auto constant   = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
+        constant->set_friendly_name("body5_constant");
+        auto data       = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
+        data->set_friendly_name("body5_data");
+        auto add        = std::make_shared<ngraph::opset9::Add>(data, constant);
+        add->set_friendly_name("body5_add");
+        std::vector<int> axes;
+        for (int i = 0, r = 0; i < input_shape.rank().get_length(); i++) {
+            axes.push_back(r--);
+        }
+        std::vector<size_t> shapeAxes;
+        shapeAxes.push_back(axes.size());
+
+        auto reductionAxesNode = std::dynamic_pointer_cast<ngraph::Node>(
+                std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes));
+
+        const auto reduce = ngraph::builder::makeReduce(add, reductionAxesNode, false, ngraph::helpers::ReductionType::Min);
+        reduce->set_friendly_name("body5_reduce");
+        auto constant_ref   = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 10.0f);
+        constant_ref->set_friendly_name("body5_ref_constant");
+
+        auto pred = std::make_shared<ngraph::opset3::GreaterEqual>(reduce, constant_ref);
+        pred->set_friendly_name("nested_pred");
+
+        auto nested_body_then_generator = std::make_shared<InnerBodyType03>();
+        auto nested_body_else_generator = std::make_shared<InnerBodyType04>();
+
+        auto nested_input_shape = add->get_output_partial_shape(0);
+        nested_body_then_generator->create_body(nested_input_shape, prc);
+        nested_body_else_generator->create_body(nested_input_shape, prc);
+        nested_body_then_generator->get_function()->set_friendly_name("nested_then_inner_body");
+        nested_body_else_generator->get_function()->set_friendly_name("nested_else_inner_body");
+
+        auto cond_nested = std::make_shared<ngraph::opset8::If>(pred);
+        cond_nested->set_friendly_name("if_operator_nested");
+        cond_nested->set_else_body(nested_body_else_generator->get_function());
+        cond_nested->set_then_body(nested_body_then_generator->get_function());
+        cond_nested->set_input(add, nested_body_then_generator->get_input(), nested_body_else_generator->get_input());
+        cond_nested->set_output(nested_body_then_generator->get_result(), nested_body_else_generator->get_result());
+
+        auto result     = std::make_shared<ngraph::opset1::Result>(cond_nested);
+        result->set_friendly_name("body5_result");
+        auto body       = std::make_shared<ngraph::Function>(
+            ngraph::OutputVector {result},
+            ngraph::ParameterVector{data},
+            "eltwise_sum");
+        return body;
+    }
+};
+
+static std::shared_ptr<InnerBodyGenerator> get_inner_body_generator(InnerBodyGenerator::InnerBodyType type) {
+    std::shared_ptr<InnerBodyGenerator> generator_ptr;
+    switch (type) {
+        case InnerBodyGenerator::InnerBodyType::Type01:
+        {
+            return std::make_shared<InnerBodyType01>();
+        }
+        case InnerBodyGenerator::InnerBodyType::Type02:
+        {
+            return std::make_shared<InnerBodyType02>();
+        }
+        case InnerBodyGenerator::InnerBodyType::Type03:
+        {
+            return std::make_shared<InnerBodyType03>();
+        }
+        case InnerBodyGenerator::InnerBodyType::Type04:
+        {
+            return std::make_shared<InnerBodyType04>();
+        }
+        case InnerBodyGenerator::InnerBodyType::Type05:
+        {
+            return std::make_shared<InnerBodyType05>();
+        }
+        default:
+        {
+            OPENVINO_ASSERT(false, "Not supported type");
+        }
+    }
+}
+
+class TestModelGenerator {
+public:
+    enum PredicateTypes {
+        PARAM,
+        NODE
+    };
+
+public:
+    TestModelGenerator(InnerBodyGenerator::InnerBodyType then_body_type,
+                        InnerBodyGenerator::InnerBodyType else_body_type,
+                        PredicateTypes pred_type,
+                        ngraph::element::Type prc,
+                        ov::PartialShape input_shape,
+                        bool cond_execution_value = false) {
+                            body_then_generator = get_inner_body_generator(then_body_type);
+                            body_else_generator = get_inner_body_generator(else_body_type);
+
+                            body_then_generator->create_body(input_shape, prc);
+                            body_else_generator->create_body(input_shape, prc);
+                            body_else_generator->get_function()->set_friendly_name("else_inner_body");
+                            body_then_generator->get_function()->set_friendly_name("then_inner_body");
+
+                            ngraph::ParameterVector params{};
+                            auto predicate = create_cond_execution(pred_type, params, ngraph::element::boolean, ngraph::Shape{});
+                            predicate->set_friendly_name("if_predicate");
+                            auto data = create_condition_input(params, prc, input_shape);
+                            data->set_friendly_name("input_data");
+                            auto cond = std::make_shared<ngraph::opset8::If>(predicate);
+                            cond->set_friendly_name("if_operator");
+                            cond->set_else_body(body_else_generator->get_function());
+                            cond->set_then_body(body_then_generator->get_function());
+                            cond->set_input(data, body_then_generator->get_input(), body_else_generator->get_input());
+                            cond->set_output(body_then_generator->get_result(), body_else_generator->get_result());
+                            auto result = std::make_shared<ngraph::opset1::Result>(cond);
+                            result->set_friendly_name("outer_result");
+                            function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
+                        }
+    std::shared_ptr<ngraph::Function> get_function() { return function; }
+
+private:
+    std::shared_ptr<ngraph::Node> create_condition_input(ngraph::ParameterVector& params,
+        const ngraph::element::Type prc, const ov::PartialShape& shape,
+        int value = 0, bool is_static = false) {
+        if (is_static)
+            return std::make_shared<ngraph::opset9::Constant>(prc, shape.to_shape(), value);
+
+        auto input = std::make_shared<ngraph::opset9::Parameter>(prc, shape);
+        params.push_back(input);
+        return input;
+    }
+
+    std::shared_ptr<ngraph::Node> create_cond_execution(PredicateTypes pred_type,
+                                                        ngraph::ParameterVector& params,
+                                                        const ngraph::element::Type prc = ngraph::element::u8,
+                                                        const ngraph::Shape shape = ngraph::Shape{}) {
+        std::shared_ptr<ngraph::Node> pred;
+        switch (pred_type) {
+            case PredicateTypes::PARAM:
+            {
+                pred = create_condition_input(params, prc, shape);
+                break;
+            }
+            case PredicateTypes::NODE:
+            {
+                auto param_cond = create_condition_input(params, prc, shape);
+                param_cond->set_friendly_name("param_cond");
+                auto const_cond = create_condition_input(params, prc, ngraph::Shape{}, 1, true);
+                const_cond->set_friendly_name("const_cond");
+                pred = std::make_shared<ngraph::opset3::GreaterEqual>(param_cond, const_cond);
+                pred->set_friendly_name("pred");
+                break;
+            }
+            default:
+            {
+                OPENVINO_ASSERT(false, "Not supported type");
+            }
+        }
+        return pred;
+    }
+
+private:
+    std::shared_ptr<ngraph::Function> function;
+    InnerBodyGenerator::ptr body_then_generator;
+    InnerBodyGenerator::ptr body_else_generator;
+};
+
+static std::ostream& operator<<(std::ostream& os, const InnerBodyGenerator::InnerBodyType type) {
+    switch (type) {
+        case InnerBodyGenerator::InnerBodyType::Type01:
+        {
+            os << "Type01";
+            break;
+        }
+        case InnerBodyGenerator::InnerBodyType::Type02:
+        {
+            os << "Type02";
+            break;
+        }
+        case InnerBodyGenerator::InnerBodyType::Type03:
+        {
+            os << "Type03";
+            break;
+        }
+        case InnerBodyGenerator::InnerBodyType::Type04:
+        {
+            os << "Type04";
+            break;
+        }
+        case InnerBodyGenerator::InnerBodyType::Type05:
+        {
+            os << "Type05";
+            break;
+        }
+        default:
+        {
+            os << "NONE";
+            break;
+        }
+    }
+    return os;
+}
+
+static std::ostream& operator<<(std::ostream& os, const TestModelGenerator::PredicateTypes type) {
+    switch (type) {
+        case TestModelGenerator::PredicateTypes::PARAM:
+        {
+            os << "PARAM";
+            break;
+        }
+        case TestModelGenerator::PredicateTypes::NODE:
+        {
+            os << "NODE";
+            break;
+        }
+        default:
+        {
+            os << "NONE";
+            break;
+        }
+    }
+    return os;
+}
+
+using ConditionParams = typename std::tuple<
+        InferenceEngine::SizeVector,    // Shape
+        InferenceEngine::Precision,     // Precision
+        TestModelGenerator::PredicateTypes,  // if predicate type
+        LayerTestsUtils::TargetDevice   // Device name
+>;
+
+class StaticConditionLayerGPUTest : public testing::WithParamInterface<ConditionParams>,
+                        virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<ConditionParams>& obj) {
+        InferenceEngine::SizeVector data_shape;
+        InferenceEngine::Precision data_prc;
+        TestModelGenerator::PredicateTypes pred;
+        std::string targetDevice;
+
+        std::tie(data_shape, data_prc, pred, targetDevice) = obj.param;
+        std::ostringstream result;
+        result << "IS=" << CommonTestUtils::vec2str(data_shape) << "_";
+        result << "netPRC=" << std::to_string(data_prc) << "_";
+        result << "ifCond=" << pred << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        auto res_str = result.str();
+        std::replace(res_str.begin(), res_str.end(), '-', '_');
+        return res_str;
+    }
+
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_GPU;
+        TestModelGenerator::PredicateTypes pred;
+        std::tie(data_shape, data_prc, pred, targetDevice) = GetParam();
+        const auto ngShape = ov::PartialShape{data_shape};
+        const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc);
+        TestModelGenerator model_generator(InnerBodyGenerator::InnerBodyType::Type02,
+                                            InnerBodyGenerator::InnerBodyType::Type03,
+                                            pred,
+                                            prc,
+                                            ngShape);
+        function = model_generator.get_function();
+    }
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
+        auto tensor_desc = info.getTensorDesc();
+        auto blob = make_blob_with_precision(tensor_desc);
+        blob->allocate();
+
+        if (tensor_desc.getLayout() == InferenceEngine::SCALAR) {
+            auto prc = tensor_desc.getPrecision();
+            auto scalar_1d = CommonTestUtils::make_reshape_view(blob, {1});
+            if (prc == InferenceEngine::Precision::BOOL) {
+                auto mem_blob = dynamic_cast<InferenceEngine::MemoryBlob*>(blob.get());
+                auto mem = mem_blob->rwmap();
+                auto data_ptr = mem.as<bool*>();
+                *data_ptr = false;
+            } else {
+                CommonTestUtils::fill_data_with_broadcast(scalar_1d, 0, {20.f});
+            }
+        } else {
+            CommonTestUtils::fill_data_with_broadcast(blob, 0, {20.f});
+        }
+        return blob;
+    }
+
+    InferenceEngine::SizeVector data_shape;
+    InferenceEngine::Precision data_prc;
+};
+
+TEST_P(StaticConditionLayerGPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED();
+    Run();
+}
+
+std::vector<InferenceEngine::Precision> netPrecisions_static = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16,
+    InferenceEngine::Precision::I8
+};
+
+std::vector<InferenceEngine::SizeVector> inputs_shape = {
+    {3, 6}
+};
+
+std::vector<GPULayerTestsDefinitions::TestModelGenerator::PredicateTypes> if_cond_types = {
+    GPULayerTestsDefinitions::TestModelGenerator::PredicateTypes::PARAM
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_static, StaticConditionLayerGPUTest,
+                testing::Combine(
+                    testing::ValuesIn(inputs_shape),
+                    testing::ValuesIn(netPrecisions_static),
+                    testing::ValuesIn(if_cond_types),
+                    testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)),
+                StaticConditionLayerGPUTest::getTestCaseName);
+
+
+/// Dynamic shape test
+struct InnerBodyTypeParams {
+    InnerBodyGenerator::InnerBodyType then_body_type;
+    InnerBodyGenerator::InnerBodyType else_body_type;
+};
+
+using ConditionGPUParams = typename std::tuple<
+        InputShape,                         // Input Shapes
+        InnerBodyTypeParams,                // Inner body type
+        InferenceEngine::Precision,         // Precision
+        TestModelGenerator::PredicateTypes, // if predicate type
+        LayerTestsUtils::TargetDevice       // Device name
+>;
+
+class DynamicConditionLayerGPUTest : public testing::WithParamInterface<ConditionGPUParams>,
+                                virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<ConditionGPUParams>& obj) {
+        InputShape inputShapes;
+        InnerBodyTypeParams bodyParams;
+        InferenceEngine::Precision dataPrc;
+        TestModelGenerator::PredicateTypes condType;
+        std::string targetDevice;
+
+        std::tie(inputShapes, bodyParams, dataPrc, condType, targetDevice) = obj.param;
+        std::ostringstream result;
+        result << "IS=(";
+        result << CommonTestUtils::partialShape2str({inputShapes.first}) << "_";
+        for (size_t i = 0lu; i < inputShapes.second.size(); i++) {
+            result << "{";
+            result << CommonTestUtils::vec2str(inputShapes.second[i]) << "_";
+            result << "}_";
+        }
+        result << ")_";
+        result << "innerBody={" << bodyParams.then_body_type << ", " << bodyParams.else_body_type << "}_";
+        result << "netPRC=" << dataPrc << "_";
+        result << "ifCond=" << condType << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        auto res_str = result.str();
+        std::replace(res_str.begin(), res_str.end(), '-', '_');
+        return res_str;
+    }
+
+protected:
+    void SetUp() override {
+        InputShape inputShapes;
+        InnerBodyTypeParams bodyParams;
+        InferenceEngine::Precision dataPrc;
+        TestModelGenerator::PredicateTypes condType;
+        std::tie(inputShapes, bodyParams, dataPrc, condType, targetDevice) = GetParam();
+        auto num_second = inputShapes.second.size();
+        std::vector<ov::Shape> condSecondVec;
+        for (size_t i = 0; i < num_second; i++) {
+            condSecondVec.push_back({});
+        }
+        auto condShapes = ov::test::InputShape(ov::PartialShape({}), condSecondVec);
+        init_input_shapes({condShapes, inputShapes});
+
+        const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrc);
+        TestModelGenerator model_generator(bodyParams.then_body_type,
+                                            bodyParams.else_body_type,
+                                            condType,
+                                            prc,
+                                            inputShapes.first);
+        function = model_generator.get_function();
+        function->set_friendly_name("if_operator_outer");
+    }
+
+    /**
+     * @brief Override generate_inputs to support boolean param for if(condition) operator.
+     *
+     * @param targetInputStaticShapes
+     */
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        ov::Shape input_shape;
+        for (auto& shape : targetInputStaticShapes) {
+            if (shape.size() > 1) {
+                input_shape = shape;
+                break;
+            }
+        }
+
+        inputs.clear();
+        for (const auto &param : function->get_parameters()) {
+            if (param->get_output_element_type(0) == ov::element::boolean) {
+                auto tensor = ov::Tensor{ov::element::boolean, {}};
+                auto p_data = tensor.data<ov::element_type_traits<ov::element::boolean>::value_type>();
+                p_data[0] = (niter++ % 2);
+
+                inputs.insert({param, tensor});
+            } else {
+                ov::test::utils::InputGenerateData inGenData;
+                inGenData.range         = 10;
+                inGenData.start_from    = 0;
+                inGenData.resolution    = 128;
+                inGenData.seed          = 1;
+                auto tensor = ov::test::utils::create_and_fill_tensor(param->get_element_type(), input_shape, inGenData.range,
+                                                                        inGenData.start_from, inGenData.resolution, inGenData.seed);
+                inputs.insert({param, tensor});
+            }
+        }
+    }
+    size_t niter = 0;
+};
+
+TEST_P(DynamicConditionLayerGPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    run();
+}
+
+const std::vector<InferenceEngine::Precision> netPrecisions_f32 = {
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions_f16 = {
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<ov::test::InputShape> dynamicInputShapes_f32 = {
+    ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{4, 1, 1, 64, 32}, {6, 1, 1, 8, 4}, {8, 1, 1, 24, 16}}),
+    ov::test::InputShape(ov::PartialShape({1, 1, -1, -1}), {{1, 1, 64, 32}, {1, 1, 8, 4}, {1, 1, 24, 16}})
+};
+
+const std::vector<ov::test::InputShape> dynamicInputShapes_f16 = {
+    ov::test::InputShape(ov::PartialShape({1, 1, -1, -1}), {{1, 1, 64, 32}, {1, 1, 8, 4}, {1, 1, 24, 16}}),
+    ov::test::InputShape(ov::PartialShape({-1, -1, -1}), {{2, 24, 16}, {2, 64, 32}, {2, 8, 4}})
+};
+
+const std::vector<InnerBodyTypeParams> innerBodyTypes_f32 = {
+    {
+        InnerBodyGenerator::InnerBodyType::Type01,
+        InnerBodyGenerator::InnerBodyType::Type02
+    },
+    {
+        InnerBodyGenerator::InnerBodyType::Type02,
+        InnerBodyGenerator::InnerBodyType::Type03
+    }
+};
+
+const std::vector<InnerBodyTypeParams> innerBodyTypes_f16 = {
+    {
+        InnerBodyGenerator::InnerBodyType::Type04,
+        InnerBodyGenerator::InnerBodyType::Type03
+    },
+    {
+        InnerBodyGenerator::InnerBodyType::Type02,
+        InnerBodyGenerator::InnerBodyType::Type05
+    }
+};
+
+const std::vector<TestModelGenerator::PredicateTypes> condTypes = {
+    TestModelGenerator::PredicateTypes::PARAM,
+    TestModelGenerator::PredicateTypes::NODE
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f32, DynamicConditionLayerGPUTest,
+                testing::Combine(
+                    testing::ValuesIn(dynamicInputShapes_f32),                          // input shapes
+                    testing::ValuesIn(innerBodyTypes_f32),                              // inner body type
+                    testing::ValuesIn(netPrecisions_f32),                               // network precision
+                    testing::ValuesIn(condTypes),                                       // cond type
+                    testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)),         // device type
+                DynamicConditionLayerGPUTest::getTestCaseName);
+
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f16, DynamicConditionLayerGPUTest,
+                testing::Combine(
+                    testing::ValuesIn(dynamicInputShapes_f16),                          // input shapes
+                    testing::ValuesIn(innerBodyTypes_f16),                              // inner body type
+                    testing::ValuesIn(netPrecisions_f16),                               // network precision
+                    testing::ValuesIn(condTypes),                                       // cond type
+                    testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)),         // device type
+                DynamicConditionLayerGPUTest::getTestCaseName);
+} // namespace GPULayerTestsDefinitions
--- a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp
@ -18,138 +18,151 @@ using namespace cldnn;
 using namespace ::tests;

 namespace {
-bool is_output_equal(const cldnn::memory::ptr mem, const std::vector<float>& ref)
+template <class T>
+bool is_output_equal(const cldnn::memory::ptr mem, const std::vector<T>& ref)
 {
-    cldnn::mem_lock<float> ptr(mem, get_test_stream());
+    cldnn::mem_lock<T> ptr(mem, get_test_stream());
    for (size_t i = 0; i < mem->get_layout().count(); i++) {
        if (!are_equal(ptr[i], ref[i])) return false;
    }
    return true;
 }

-topology generate_simple_branch (bool branch_true_false, const primitive_id& input_id)
+topology generate_simple_branch (bool branch_true_false, const primitive_id& id, const primitive_id& input_id, const data_types dt = data_types::f32)
 {
    topology branch;
    if (branch_true_false) {
        branch.add(
-            pooling(input_id + "_when_true", input_id, cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+            input_layout(input_id, { dt, format::bfyx,{ 1, 1, 4, 1 } }),
+            pooling(id + "_when_true", input_id, cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 })
        );
    } else {
        branch.add(
-            pooling(input_id + "_when_false", input_id, cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+            input_layout(input_id, { dt, format::bfyx,{ 1, 1, 4, 1 } }),
+            pooling(id + "_when_false", input_id, cldnn::pooling_mode::average, { 1, 2 }, { 1, 2 })
        );
    }
    return branch;
 }
-
-std::pair<std::vector<float>, std::vector<float>> get_values_to_compare(const cldnn::tensor& offset,
-                                                                        const cldnn::tensor& range,
-                                                                        const std::vector<float>& values,
-                                                                        const cldnn::layout& input_lay,
-                                                                        const cond_functions& func) {
-    std::vector<float> ret_true;
-    std::vector<float> ret_false;
-    auto mem_desc = generic_test::get_linear_memory_desc(input_lay);
-    for (int32_t b = 0; b < range.batch[0]; b++) {
-        for (int32_t f = 0; f < range.feature[0]; f++) {
-            for (int32_t y = 0; y < range.spatial[1]; y++) {
-                for (int32_t x = 0; x < range.spatial[0]; x++) {
-                    auto linear_idx = generic_test::get_linear_index(
-                        input_lay,
-                        offset.batch[0] + b,
-                        offset.feature[0] + f,
-                        offset.spatial[1] + y,
-                        offset.spatial[0] + x,
-                        mem_desc);
-
-                    switch (func) {
-                    case cond_functions::EQUAL:
-                        ret_true.push_back(values.at(linear_idx));
-                        ret_false.push_back(-1.0f);
-                        break;
-                    case cond_functions::GREATER:
-                        ret_true.push_back(values.at(linear_idx) - 1.0f);
-                        ret_false.push_back(99.0f);
-                        break;
-                    case cond_functions::LESS:
-                        ret_true.push_back(values.at(linear_idx) + 1.0f);
-                        ret_false.push_back(-1.0f);
-                        break;
-                    }
-                }
-            }
-        }
-    }
-    return { ret_true, ret_false };
-}
-
 }  // namespace

-TEST(DISABLED_condition_gpu, basic_equal_comp) {
-    auto& engine = get_test_engine();
-    ExecutionConfig config = get_test_default_config(engine);
-    config.set_property(ov::intel_gpu::optimize_data(true));
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
-    auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
-    auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+template < typename DataType>
+struct condition_data_types {
+    using type = DataType;
+    static const data_types data_type = type_to_data_type<DataType>::value;
+};

-    topology branch_true = generate_simple_branch(true, "condi");
-    topology branch_false = generate_simple_branch(false, "condi");
+template <typename ConditionDataType>
+class condition_gpu_basic_test : public ::testing::Test {
+public:

-    topology topology;
-    topology.add(
-        input_layout("input", input->get_layout())
-    );
-    topology.add(
-        input_layout("compare", compare->get_layout())
-    );
-    topology.add(
-        input_layout("scale_data", scale_mem->get_layout())
-    );
-    topology.add(
-        condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
-    );
-    topology.add(
-        eltwise("output", { input_info("condi"), input_info("scale_data") }, eltwise_mode::prod)
-    );
+    using input_type = typename ConditionDataType::type;
+    std::vector<input_type> convert_data(std::vector<int> in_vec) {
+        const size_t vec_size = in_vec.size();
+        std::vector<input_type> converted_data_vec(vec_size);
+        for (size_t i = 0; i < vec_size; i++) {
+            converted_data_vec[i] = (input_type)in_vec[i];
+        }
+        return converted_data_vec;
+    }

-    network net(engine, topology, config);
-    set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f });
-    set_values(scale_mem, { 10.0f });
-    net.set_input_data("input", input);
-    net.set_input_data("scale_data", scale_mem);
+    void run_test() {
+        auto& engine = get_test_engine();

-    decltype(net.execute()) out;
+        auto dat_dt = ConditionDataType::data_type;

-    //WHEN TRUE
-    set_values(compare, { 1.0f });
-    net.set_input_data("compare", compare);
-    out = net.execute();
-    auto out_data_true = out.at("output").get_memory();
-    ASSERT_TRUE(is_output_equal(out_data_true, {20.0f, 40.0f}));
+        ExecutionConfig config = get_test_default_config(engine);
+        config.set_property(ov::intel_gpu::optimize_data(true));
+        auto input = engine.allocate_memory({ dat_dt, format::bfyx,{ 1, 1, 4, 1 } });
+        auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
+        auto scale_mem = engine.allocate_memory({ dat_dt, format::bfyx,{ 1, 1, 1, 1 } });

-    //WHEN FALSE
-    set_values(compare, { 4.0f });
-    net.set_input_data("compare", compare);
-    out = net.execute();
-    auto out_data_false = out.at("output").get_memory();
-    ASSERT_TRUE(is_output_equal(out_data_false, { 15.0f, 35.0f }));
+        primitive_id input_id           = "input";
+        primitive_id pred_id            = "predicate";
+        primitive_id branch_input_id    = "branch_input";
+        primitive_id cond_id            = "condi";
+        primitive_id scale_data_id      = "scale_data";
+        primitive_id output_id          = "output";

+        condition::branch branch_true;
+        {
+            cldnn::topology branch_true_topology   = generate_simple_branch(true,  cond_id, branch_input_id, dat_dt);
+            branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
+            branch_true.input_map.insert({input_id, branch_input_id});
+            branch_true.output_map.insert({0, "condi_when_true"});
+        }
+        condition::branch branch_false;
+        {
+            cldnn::topology branch_false_topology  = generate_simple_branch(false, cond_id, branch_input_id, dat_dt);
+            branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
+            branch_false.input_map.insert({input_id, branch_input_id});
+            branch_false.output_map.insert({0, "condi_when_false"});
+        }
+
+        cldnn::topology topology;
+        topology.add(
+            input_layout(input_id, input->get_layout())
+        );
+        topology.add(
+            input_layout(pred_id, predicate->get_layout())
+        );
+        topology.add(
+            input_layout(scale_data_id, scale_mem->get_layout())
+        );
+        topology.add(
+            condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
+        );
+        topology.add(
+            eltwise(output_id, { input_info(cond_id), input_info(scale_data_id) }, eltwise_mode::prod)
+        );
+
+        network net(engine, topology, config);
+        set_values(input, convert_data({ 1, 2, 3, 4 }));
+        set_values(scale_mem, convert_data({ 10 }));
+        net.set_input_data(input_id, input);
+        net.set_input_data(scale_data_id, scale_mem);
+
+        decltype(net.execute()) out;
+
+        //WHEN TRUE
+        set_values(predicate, { 1 });
+        net.set_input_data(pred_id, predicate);
+        out = net.execute();
+        auto out_data_true = out.at(output_id).get_memory();
+        ASSERT_TRUE(is_output_equal(out_data_true, convert_data({ 20, 40 })));
+
+        //WHEN FALSE
+        set_values(predicate, { 0 });
+        net.set_input_data(pred_id, predicate);
+        out = net.execute();
+        auto out_data_false = out.at(output_id).get_memory();
+        ASSERT_TRUE(is_output_equal(out_data_false, convert_data({ 15, 35 })));
+    }
+};
+
+using test_data_types = testing::Types<condition_data_types<FLOAT16>,
+                                    condition_data_types<float>>;
+
+TYPED_TEST_SUITE(condition_gpu_basic_test, test_data_types);
+
+TYPED_TEST(condition_gpu_basic_test, simple_basic_test) {
+    this->run_test();
 }

-TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
+TEST(condition_gpu, basic_range_equal_comp) {
    auto& engine = get_test_engine();
    ExecutionConfig config = get_test_default_config(engine);
    config.set_property(ov::intel_gpu::optimize_data(true));
-    auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
-    auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });

-    auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+    auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });

-    topology branch_true = generate_simple_branch(true, "condi");
-    topology branch_false = generate_simple_branch(false, "condi");
+    primitive_id condi_id = "condi";
+    primitive_id branch_input_id = "branch_input";
+    primitive_id concat_id = "concat";

-    topology topology;
+    cldnn::topology topology;
    topology.add(
        input_layout("input0", input0->get_layout())
    );
@ -157,32 +170,48 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
        input_layout("input1", input1->get_layout())
    );
    topology.add(
-        input_layout("compare", compare->get_layout())
+        input_layout("predicate", predicate->get_layout())
    );
    topology.add(
        concatenation("concat", { input_info("input0"), input_info("input1") }, 3)
    );
+
+    condition::branch branch_true;
+    {
+        cldnn::topology branch_true_topology  = generate_simple_branch(true,  condi_id, branch_input_id);
+        branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
+        branch_true.input_map.insert({concat_id, branch_input_id});
+        branch_true.output_map.insert({0, "condi_when_true"});
+    }
+    condition::branch branch_false;
+    {
+        cldnn::topology branch_false_topology = generate_simple_branch(false, condi_id, branch_input_id);
+        branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
+        branch_false.input_map.insert({concat_id, branch_input_id});
+        branch_false.output_map.insert({0, "condi_when_false"});
+    }
+
    topology.add(
-        condition("condi", input_info("concat"), branch_true, branch_false, "compare", cond_functions::EQUAL)
+        condition("condi", {input_info("predicate"), input_info("concat")}, branch_true, branch_false)
    );

    std::vector<float> input0_data = {
-        1, 2, 3, 4
+        1, 2
    };
    std::vector<float> input1_data = {
-        5, 6, 7, 8
+        3, 4
    };
-    std::vector<float> compare_data_true = {
-        1, 2, 3
+    std::vector<uint8_t> predicate_data_true = {
+        1
    };
    std::vector<float> pooling_when_true_data = {
-        2, 4, 6, 8
+        2, 4
    };
-    std::vector<float> compare_data_false = {
-        1, 2, 10
+    std::vector<uint8_t> predicate_data_false = {
+        0
    };
    std::vector<float> pooling_when_false_data = {
-        1.5, 3.5, 5.5, 7.5
+        1.5, 3.5
    };

    set_values(input0, input0_data);
@ -194,121 +223,23 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
    decltype(net.execute()) outputs;

    //CHECK TRUE
-    set_values(compare, compare_data_true);
-    net.set_input_data("compare", compare);
+    set_values(predicate, predicate_data_true);
+    net.set_input_data("predicate", predicate);
    outputs = net.execute();

    auto out_data_true = outputs.at("condi").get_memory();
    ASSERT_TRUE(is_output_equal(out_data_true, pooling_when_true_data));

    //CHECK FALSE
-    set_values(compare, compare_data_false);
-    net.set_input_data("compare", compare);
+    set_values(predicate, predicate_data_false);
+    net.set_input_data("predicate", predicate);
    outputs = net.execute();

    auto out_data_false = outputs.at("condi").get_memory();
    ASSERT_TRUE(is_output_equal(out_data_false, pooling_when_false_data));
 }

-TEST(DISABLED_condition_gpu, generic_test_true_false) {
-    auto& engine = get_test_engine();
-    ExecutionConfig config = get_test_default_config(engine);
-    config.set_property(ov::intel_gpu::optimize_data(true));
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 2, 5, 1 } });
-    std::vector<float> input_data(50);
-    std::iota(input_data.begin(), input_data.end(), 0.0f);
-
-    std::vector<cond_functions> functions = {
-        cond_functions::EQUAL,
-        cond_functions::GREATER,
-        cond_functions::LESS,
-    };
-
-    // ranges, with data when condition is true or false
-    std::vector<cldnn::tensor> ranges = {
-        {1, 1, 1, 1},
-        {1, 1, 3, 1},
-        {2, 1, 1, 1},
-        {2, 1, 1, 1}
-    };
-
-    std::vector<cldnn::tensor> offsets = {
-        { 0, 0, 0, 0},
-        { 0, 0, 1, 0},
-        { 0, 0, 2, 0},
-        { 2, 0, 0, 0},
-        { 2, 1, 1, 0}
-    };
-
-    std::vector<float> pooling_when_true_data = {
-        2, 4, 7, 9, 12, 14, 17,
-        19, 22, 24, 27, 29, 32,
-        34, 37, 39, 42, 44, 47, 49
-    };
-
-    std::vector<float> pooling_when_false_data = {
-        1, 3, 6, 8, 11, 13, 16,
-        18, 21, 23, 26, 28, 31,
-        33, 36, 38, 41, 43, 46, 48
-    };
-
-    for (auto const& func : functions) {
-        for (auto const& range : ranges) {
-            for (auto const& offset : offsets) {
-                auto comp_values = get_values_to_compare(offset, range, input_data, input->get_layout(), func);
-                auto comp_values_true = comp_values.first;
-                auto comp_values_false = comp_values.second;
-
-                auto compare = engine.allocate_memory({ data_types::f32, format::bfyx, range });
-
-                topology branch_true;
-                topology branch_false;
-                branch_true.add(
-                    pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 1, 1, 3, 1 }, { 1, 1, 2, 1 })
-                );
-                branch_false.add(
-                    pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::average, { 1, 1, 3, 1 }, { 1, 1, 2, 1 })
-                );
-
-                topology topology;
-                topology.add(
-                    input_layout("input", input->get_layout())
-                );
-                topology.add(
-                    input_layout("compare", compare->get_layout())
-                );
-                topology.add(
-                    condition("condi", input_info("input"), branch_true, branch_false, "compare", func, offset)
-                );
-
-                set_values(input, input_data);
-                network net(engine, topology, config);
-                net.set_input_data("input", input);
-
-                decltype(net.execute()) outputs;
-
-                //CHECK TRUE
-                set_values(compare, comp_values_true);
-                net.set_input_data("compare", compare);
-                outputs = net.execute();
-
-                auto out_data_true = outputs.at("condi").get_memory();
-                ASSERT_TRUE(is_output_equal(out_data_true, pooling_when_true_data));
-
-                //CHECK FALSE
-                set_values(compare, comp_values_false);
-                net.set_input_data("compare", compare);
-                outputs = net.execute();
-
-                auto out_data_false = outputs.at("condi").get_memory();
-                ASSERT_TRUE(is_output_equal(out_data_false, pooling_when_false_data));
-
-            }
-        }
-    }
-}
-
-TEST(DISABLED_condition_gpu, basic_stacked_ifs) {
+TEST(condition_gpu, basic_stacked_ifs) {
    /*
        <prims...>
        <if>
@ -324,61 +255,95 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) {
    ExecutionConfig config = get_test_default_config(engine);
    config.set_property(ov::intel_gpu::optimize_data(true));
    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
-    auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
-    auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto predicate2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });

-    topology condi_1_true = generate_simple_branch(true, "condi");
-    topology condi_1_false = generate_simple_branch(false, "condi");
+    primitive_id input_id           = "input";
+    primitive_id pred_id         = "predicate";
+    primitive_id predicate2_id        = "predicate2";
+    primitive_id branch_input_id    = "branch_input";
+    primitive_id cond_id            = "condi";
+    primitive_id cond2_id           = "condi2";
+    primitive_id scale_data_id      = "scale_data";
+    primitive_id output_id          = "output";
+
+    topology condi_1_true = generate_simple_branch(true, cond_id, branch_input_id);
+    topology condi_1_false = generate_simple_branch(false, cond_id, branch_input_id);
    topology condi_2_true;
    condi_2_true.add(
-        activation("activ_when_true", input_info("condi2"), activation_func::log2)
+        input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
+        activation("activ_when_true", input_info(branch_input_id), activation_func::log2)
    );
    topology condi_2_false;
    condi_2_false.add(
-        activation("activ_when_false", input_info("condi2"), activation_func::relu)
+        input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
+        activation("activ_when_false", input_info(branch_input_id), activation_func::relu)
    );

+    condition::branch branch_condi_1_true;
+    branch_condi_1_true.inner_program = program::build_program(engine, condi_1_true, config, true);
+    branch_condi_1_true.input_map.insert({input_id, branch_input_id});
+    branch_condi_1_true.output_map.insert({0, "condi_when_true"});
+
+    condition::branch branch_condi_1_false;
+    branch_condi_1_false.inner_program = program::build_program(engine, condi_1_false, config, true);
+    branch_condi_1_false.input_map.insert({input_id, branch_input_id});
+    branch_condi_1_false.output_map.insert({0, "condi_when_false"});
+
+    condition::branch branch_condi_2_true;
+    branch_condi_2_true.inner_program = program::build_program(engine, condi_2_true, config, true);
+    branch_condi_2_true.input_map.insert({cond_id, branch_input_id});
+    branch_condi_2_true.output_map.insert({0, "activ_when_true"});
+
+    condition::branch branch_condi_2_false;
+    branch_condi_2_false.inner_program = program::build_program(engine, condi_2_false, config, true);
+    branch_condi_2_false.input_map.insert({cond_id, branch_input_id});
+    branch_condi_2_false.output_map.insert({0, "activ_when_false"});
+
    topology topology;
    topology.add(
-        input_layout("input", input->get_layout())
+        input_layout(input_id, input->get_layout())
    );
    topology.add(
-        input_layout("compare", compare->get_layout())
+        input_layout(pred_id, predicate->get_layout())
    );
    topology.add(
-        condition("condi", input_info("input"), condi_1_true, condi_1_false, "compare", cond_functions::EQUAL)
+        condition(cond_id, { input_info(pred_id), input_info(input_id) }, branch_condi_1_true, branch_condi_1_false)
    );
    topology.add(
-        input_layout("compare2", compare2->get_layout())
+        input_layout(predicate2_id, predicate2->get_layout())
    );
    topology.add(
-        condition("condi2", input_info("condi"), condi_2_true, condi_2_false, "compare2", cond_functions::GREATER)
+        condition(cond2_id, { input_info(predicate2_id), input_info(cond_id) }, branch_condi_2_true, branch_condi_2_false)
    );

    std::vector<float> input_data = {
        1, 2, 3, 4
    };
-    std::vector<float> compare_data = {
+    std::vector<uint8_t> predicate_data = {
        1
    };
-    std::vector<float> compare_2_data = {
-        0.0f, 0.0f
+    std::vector<uint8_t> predicate_2_data = {
+        0
    };
    set_values(input, input_data);
-    set_values(compare, compare_data);
-    set_values(compare2, compare_2_data);
+    set_values(predicate, predicate_data);
+    set_values(predicate2, predicate_2_data);

    network net(engine, topology, config);
-    net.set_input_data("input", input);
-    net.set_input_data("compare", compare);
-    net.set_input_data("compare2", compare2);
+    net.set_input_data(input_id, input);
+    net.set_input_data(pred_id, predicate);
+    net.set_input_data(predicate2_id, predicate2);
    auto outputs = net.execute();

-    auto out_data = outputs.at("condi2").get_memory();
-    ASSERT_TRUE(is_output_equal(out_data, {1.0f, 2.0f}));
+    std::vector<float> ref_data = {
+        2.0f, 4.0f
+    };
+    auto out_data = outputs.at(cond2_id).get_memory();
+    ASSERT_TRUE(is_output_equal(out_data, ref_data));
 }

-TEST(DISABLED_condition_gpu, basic_nested_ifs) {
+TEST(condition_gpu, basic_nested_ifs) {
    /*
    <prims...>
    <if 0>
@ -394,191 +359,243 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) {
    ExecutionConfig config = get_test_default_config(engine);
    config.set_property(ov::intel_gpu::optimize_data(true));
    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
-    auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
-    auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto predicate2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
    auto scale_5_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
    set_values(scale_5_mem, { 5.0f });
    auto scale_10_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
    set_values(scale_10_mem, { 10.0f });

-    topology nested_true;
+    condition::branch nested_true;
    {
-        nested_true.add(eltwise("scale_5", { input_info("condi_nested"), input_info("scale_5_data") }, eltwise_mode::prod),
-            data("scale_5_data", scale_5_mem));
+        cldnn::topology nested_true_topology;
+        nested_true_topology.add(
+            input_layout("branch_input1", { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
+            data("scale_5_data", scale_5_mem),
+            eltwise("scale_5", { input_info("branch_input1"), input_info("scale_5_data") }, eltwise_mode::prod)
+        );
+        nested_true.inner_program = program::build_program(engine, nested_true_topology, config, true);
+        nested_true.input_map.insert({"pooling_when_true", "branch_input1"});
+        nested_true.output_map.insert({0, "scale_5"});
    }
-    topology nested_false;
+    condition::branch nested_false;
    {
-        nested_false.add(eltwise("scale_10", { input_info("condi_nested"), input_info("scale_10_data") }, eltwise_mode::prod),
-            data("scale_10_data", scale_10_mem));
+        cldnn::topology nested_false_topology;
+        nested_false_topology.add(
+            input_layout("branch_input2", { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
+            data("scale_10_data", scale_10_mem),
+            eltwise("scale_10", { input_info("branch_input2"), input_info("scale_10_data") }, eltwise_mode::prod)
+        );
+        nested_false.inner_program = program::build_program(engine, nested_false_topology, config, true);
+        nested_false.input_map.insert({"pooling_when_true", "branch_input2"});
+        nested_false.output_map.insert({0, "scale_10"});
    }

-    topology branch_true;
-    branch_true.add(
-        pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
-    );
-    branch_true.add(
-        input_layout("compare2", compare2->get_layout())
-    );
+    condition::branch branch_true;
+    {
+        cldnn::topology branch_true_topology;
+        branch_true_topology.add(
+            input_layout("branch_input3", { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
+            pooling("pooling_when_true", input_info("branch_input3"), cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 }),
+            input_layout("predicate2", predicate2->get_layout()),
+            condition( "condi_nested", {input_info("predicate2"), input_info("pooling_when_true")}, nested_true, nested_false)
+        );
+        branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
+        branch_true.input_map.insert({"input", "branch_input3"});
+        branch_true.output_map.insert({0, "condi_nested"});
+    }

-    branch_true.add(
-        condition(
-        "condi_nested",
-        input_info("pooling_when_true"),
-        nested_true,
-        nested_false,
-        "compare2",
-        cond_functions::EQUAL)
-    );
+    condition::branch branch_false;
+    {
+        cldnn::topology branch_false_topology;
+        branch_false_topology.add(
+            input_layout("branch_input4", { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
+            pooling("pooling_when_false", input_info("branch_input4"), cldnn::pooling_mode::average, { 1, 2 }, { 1, 2 })
+        );
+        branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
+        branch_false.input_map.insert({"input", "branch_input4"});
+        branch_false.output_map.insert({0, "pooling_when_false"});
+    }

-    topology branch_false;
-    branch_false.add(
-        pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
-    );
-
-    topology topology;
+    cldnn::topology topology;
    topology.add(
        input_layout("input", input->get_layout())
    );

    topology.add(
-        input_layout("compare", compare->get_layout())
+        input_layout("predicate", predicate->get_layout())
    );

    topology.add(
-        condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
+        condition("condi", {input_info("predicate"), input_info("input")}, branch_true, branch_false)
    );

    std::vector<float> input_data = {
        1.0f, 2.0f, 3.0f, 4.0f
    };
-    std::vector<float> compare_data = {
+    std::vector<float> predicate_data = {
        1.0f
    };
-    std::vector<float> compare_2_data = {
+    std::vector<float> predicate_2_data = {
        2.0f, 4.0f
    };
    set_values(input, input_data);
-    set_values(compare, compare_data);
-    set_values(compare2, compare_2_data);
+    set_values(predicate, predicate_data);
+    set_values(predicate2, predicate_2_data);

    network net(engine, topology, config);
    net.set_input_data("input", input);
-    net.set_input_data("compare", compare);
-    net.set_input_data("compare2", compare2);
+    net.set_input_data("predicate", predicate);
+    net.set_input_data("predicate2", predicate2);
    auto outputs = net.execute();

    auto out_data = outputs.at("condi").get_memory();
-    ASSERT_TRUE(is_output_equal(out_data, { 10.0f, 20.0f }));
+    ASSERT_TRUE(is_output_equal(out_data, std::vector<float>({ 10.0f, 20.0f })));
 }

-TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) {
+TEST(condition_gpu, negative_predicate_wrong_layout) {
    auto& engine = get_test_engine();
    ExecutionConfig config = get_test_default_config(engine);
    config.set_property(ov::intel_gpu::optimize_data(true));
    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
-    auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } });
+    auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } });

-    topology branch_true = generate_simple_branch(true, "condi");
-    topology branch_false = generate_simple_branch(false, "condi");
+    primitive_id input_id           = "input";
+    primitive_id pred_id         = "predicate";
+    primitive_id branch_input_id    = "branch_input";
+    primitive_id cond_id            = "condi";
+
+    condition::branch branch_true;
+    {
+        cldnn::topology branch_true_topology   = generate_simple_branch(true,  cond_id, branch_input_id, data_types::f32);
+        branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
+        branch_true.input_map.insert({input_id, branch_input_id});
+        branch_true.output_map.insert({0, "condi_when_true"});
+    }
+    condition::branch branch_false;
+    {
+        cldnn::topology branch_false_topology  = generate_simple_branch(false, cond_id, branch_input_id, data_types::f32);
+        branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
+        branch_false.input_map.insert({input_id, branch_input_id});
+        branch_false.output_map.insert({0, "condi_when_false"});
+    }

    topology topology;
    topology.add(
-        input_layout("input", input->get_layout())
+        input_layout(input_id, input->get_layout())
    );
    topology.add(
-        input_layout("compare", compare->get_layout())
+        input_layout(pred_id, predicate->get_layout())
    );
    topology.add(
-        condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
+        condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
    );

    EXPECT_ANY_THROW(network net(engine, topology, config););
 }

-TEST(DISABLED_condition_gpu, negative_too_big_offset) {
+TEST(condition_gpu, negative_not_same_layouts) {
    auto& engine = get_test_engine();
    ExecutionConfig config = get_test_default_config(engine);
    config.set_property(ov::intel_gpu::optimize_data(true));
    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
-    auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+    auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
+
+    primitive_id input_id           = "input";
+    primitive_id pred_id         = "predicate";
+    primitive_id branch_input_id    = "branch_input";
+    primitive_id cond_id            = "condi";
+
+    condition::branch branch_true;
+    {
+        primitive_id pool_id = "pooling_when_true";
+        topology branch_true_topology;
+        branch_true_topology.add(
+            input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
+            pooling(pool_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 })
+        );
+        branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
+        branch_true.input_map.insert({input_id, branch_input_id});
+        branch_true.output_map.insert({0, pool_id});
+    }
+
+    condition::branch branch_false;
+    {
+        primitive_id pool_id = "pooling_when_false";
+        topology branch_false_topology;
+        branch_false_topology.add(
+            input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
+            pooling(pool_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 1, 4 }, { 1, 4 })
+        );
+        branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
+        branch_false.input_map.insert({input_id, branch_input_id});
+        branch_false.output_map.insert({0, pool_id});
+    }

-    topology branch_true = generate_simple_branch(true, "condi");
-    topology branch_false = generate_simple_branch(false, "condi");

    topology topology;
    topology.add(
-        input_layout("input", input->get_layout())
+        input_layout(input_id, input->get_layout())
    );
    topology.add(
-        input_layout("compare", compare->get_layout())
+        input_layout(pred_id, predicate->get_layout())
    );
    topology.add(
-        condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL, {1, 1, 2, 1})
+        condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
    );

    EXPECT_ANY_THROW(network net(engine, topology, config););
 }

-TEST(DISABLED_condition_gpu, negative_not_same_layouts) {
+TEST(condition_gpu, negative_same_names_within_different_networks) {
    auto& engine = get_test_engine();
    ExecutionConfig config = get_test_default_config(engine);
    config.set_property(ov::intel_gpu::optimize_data(true));
    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
-    auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });

-    topology branch_true;
-    branch_true.add(
-        pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
-    );
+    primitive_id input_id           = "input";
+    primitive_id pred_id         = "predicate";
+    primitive_id branch_input_id    = "branch_input";
+    primitive_id cond_id            = "condi";
+    primitive_id duplicated_id      = "pooling_check_name";

-    topology branch_false;
-    branch_false.add(
-        pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 4, 1 }, { 0, 0, 4, 1 })
-    );
+    condition::branch branch_true;
+    {
+        topology branch_true_topology;
+        branch_true_topology.add(
+            input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
+            pooling(duplicated_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
+        );
+        branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
+        branch_true.input_map.insert({input_id, branch_input_id});
+        branch_true.output_map.insert({0, duplicated_id});
+    }
+
+    condition::branch branch_false;
+    {
+        topology branch_false_topology;
+        branch_false_topology.add(
+            input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
+            pooling("pooling_when_false", input_info(branch_input_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
+        );
+        branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
+        branch_false.input_map.insert({input_id, branch_input_id});
+        branch_false.output_map.insert({0, "pooling_when_false"});
+    }

    topology topology;
    topology.add(
-        input_layout("input", input->get_layout())
+        input_layout(input_id, input->get_layout())
    );
    topology.add(
-        input_layout("compare", compare->get_layout())
+        input_layout(pred_id, predicate->get_layout())
    );
    topology.add(
-        condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
-    );
-
-    EXPECT_ANY_THROW(network net(engine, topology, config););
-}
-
-TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) {
-    auto& engine = get_test_engine();
-    ExecutionConfig config = get_test_default_config(engine);
-    config.set_property(ov::intel_gpu::optimize_data(true));
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
-    auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
-
-    topology branch_true;
-    branch_true.add(
-        pooling("pooling_check_name", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
-    );
-
-    topology branch_false;
-    branch_false.add(
-        pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
-    );
-
-    topology topology;
-    topology.add(
-        input_layout("input", input->get_layout())
-    );
-    topology.add(
-        input_layout("compare", compare->get_layout())
-    );
-    topology.add(
-        condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
-    );
-    topology.add(
-        pooling("pooling_check_name", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+        condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
+    );
+    topology.add(
+        pooling(duplicated_id, input_info(cond_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
    );

    EXPECT_ANY_THROW(network net(engine, topology, config););