[GPU] Impl cldnn::condition to support dynamic shape (#18051)

* [GPU] Impl cldnn::condition to support dynamic shape (#18051)
* Impl CreateIfOp
* Update calc_output_layouts and execute_impl
* Enable gpu unit test
* Create gpu functional test

* [GPU] Follow-up code review (#18051)
* remove redundant codes
* create custom execute method for condition_inst
* change name from update_loop_primitive_map to update_inner_program_io_map

* [GPU] Fix gpu func test failures for fp16

* Add more test-cases to support fp16 and nested if case

* [GPU] remove redundant codes
* refactoring var names
* fix windows build error

* [GPU] Fix windows build issue

* [GPU] update calc_output_layouts

* [GPU] remove custom condition_inst::execute

* Remove virtual keyword from primitive_inst::execute()

* [GPU] Share single task executor between main program and inner program

* [GPU] Fix input rank issue for const inner network in condition op

* [GPU] apply calc_output_layouts for roi_align

Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>

* [GPU] avoid checking allow_new_shape_infer for inner program

---------

Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
This commit is contained in:
Paul Youngsoo Ahn 2023-06-28 00:05:26 +09:00 committed by GitHub
parent c2afa2aefc
commit 50897e86e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1658 additions and 648 deletions

View File

@ -22,6 +22,7 @@
namespace cldnn {
struct program;
struct network;
struct kernel_impl_params {
@ -54,6 +55,9 @@ struct kernel_impl_params {
std::map<size_t, memory::ptr> memory_deps = {};
size_t primary_input_idx = 0;
std::vector<std::shared_ptr<program>> inner_progs = {};
std::vector<std::shared_ptr<network>> inner_nets = {};
std::vector<std::map<size_t, primitive_id>> io_output_maps = {};
kernel_impl_params() : prog(nullptr), strm(nullptr), desc(nullptr), unique_id(0) {}

View File

@ -79,7 +79,9 @@ public:
network(engine& engine,
const topology& topo,
const ExecutionConfig& config = {},
bool is_internal = false);
bool is_internal = false,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr);
network(engine& engine,
const std::set<std::shared_ptr<program_node>>& nodes,
const ExecutionConfig& config,
@ -100,7 +102,9 @@ public:
static ptr build_network(engine& engine,
const topology& topology,
const ExecutionConfig& config = {},
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor = nullptr,
bool is_internal = false);
static ptr build_network(engine& engine,
const std::set<std::shared_ptr<program_node>>& nodes,
const ExecutionConfig& config,

View File

@ -125,6 +125,7 @@ public:
program(engine& engine_ref,
topology const& topology,
const ExecutionConfig& config,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
bool is_internal = false,
bool no_optimizations = false,
bool is_body_program = false);
@ -236,6 +237,13 @@ public:
bool is_internal = false,
bool no_optimizations = false,
bool is_body_program = false);
static ptr build_program(engine& engine,
const topology& topology,
const ExecutionConfig& config,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
bool is_internal = false,
bool no_optimizations = false,
bool is_body_program = false);
static ptr build_program(engine& engine,
const std::set<std::shared_ptr<program_node>>& nodes,
const ExecutionConfig& config,
@ -253,6 +261,8 @@ public:
ICompilationContext& get_compilation_context() const { return *_compilation_context; }
void cancel_compilation_context();
static std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config);
private:
uint32_t prog_id = 0;
engine& _engine;
@ -307,9 +317,6 @@ private:
void post_optimize_graph(bool is_internal);
void transfer_memory_to_device();
InferenceEngine::CPUStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags = "") const;
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config) const;
/*
** Analysis functions
*/

View File

@ -235,6 +235,7 @@ REGISTER_FACTORY(v8, AdaptiveAvgPool);
REGISTER_FACTORY(v8, AdaptiveMaxPool);
REGISTER_FACTORY(v8, Softmax);
REGISTER_FACTORY(v8, PriorBox);
REGISTER_FACTORY(v8, If);
// ------------------------------ Supported v9 ops ------------------------------ //
REGISTER_FACTORY(v9, GridSample)

View File

@ -83,7 +83,8 @@ class Program {
public:
Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
bool createTopologyOnly = false, bool partialBuild = false,
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr, bool innerProgram = false);
Program(cldnn::engine& engine, const ExecutionConfig& config,
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
@ -158,6 +159,8 @@ public:
bool use_new_shape_infer() const { return allow_new_shape_infer; }
bool requires_new_shape_infer(const ngraph::Node& op) const;
InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() { return m_task_executor; }
private:
static factories_map_t factories_map;
std::vector<std::shared_ptr<cldnn::program>> m_programs;
@ -173,6 +176,8 @@ private:
bool queryMode;
InferenceEngine::CPUStreamsExecutor::Ptr m_task_executor;
void EnableQueryMode() { queryMode = true; }
void DisableQueryMode() { queryMode = false; }
@ -183,7 +188,7 @@ private:
std::shared_ptr<cldnn::program> BuildProgram(const std::vector<std::shared_ptr<ngraph::Node>>& ops,
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
bool createTopologyOnly = false, bool partialBuild = false);
bool createTopologyOnly = false, bool partialBuild = false, bool innerProgram = false);
void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
void ChangeInputBatch(int batch);

View File

@ -5,64 +5,73 @@
#pragma once
#include "primitive.hpp"
#include "intel_gpu/graph/topology.hpp"
#include "intel_gpu/graph/program.hpp"
#include <vector>
namespace cldnn {
/// @brief Function, which will be used during comparison.
enum cond_functions : int32_t { EQUAL, GREATER, LESS };
/// @brief Adds primitive, which works like "if".
///
/// @details
/// @n Applies comparision between 2 inputs.
/// @n Compare data - sizes of that input specifes the range of the comparison.
/// @n Offset - offset in memory, when comparing values.
/// @n Applies comparision using pred primitive which has 1D tensor or scalar value
struct condition : public primitive_base<condition> {
CLDNN_DECLARE_PRIMITIVE(condition)
/// @brief branch has compiled program, input_map and output_map
///
struct branch {
std::map<primitive_id, primitive_id> input_map;
std::map<size_t, primitive_id> output_map;
program::ptr inner_program;
std::string str() {
std::stringstream ss;
ss << "branch: { " << std::endl;
ss<< "* input_map : [(outer_id,inner_id),";
for (auto& in_iter : input_map) {
ss << "(" << in_iter.first << "," << in_iter.second << "),";
}
ss << "]," << std::endl;
ss << "* output_map : [(outer_idx,inner_id),";
for (auto& out_iter : output_map) {
ss << "(" << out_iter.first << ","<< out_iter.second << "),";
}
ss << "]" << std::endl;
ss << "}" << std::endl;
return ss.str();
}
};
/// @brief Constructs condition primitive / layer.
///
/// @param id An identifier of new primitive.
/// @param input An identifier of primitive which is an input for newly created
/// condition primitive.
/// @param topology_true Topology containg primitives, which will be executed when comparsion results
/// true.
/// @param topology_false Topology containg primitives, which will be executed when comparsion results
/// false..
/// @param compare_Data An identifier of primitive which contains compare values
/// @param func Used function during comparison.
/// @param offset Offset for compare data.
/// @param inputs A list of Input primitive ids (pred, inputs(optional)).
/// pred is condition's predicate primitive which has scalar value determining whether to execute branch_true or branch_false.
/// sometimes, if
/// @param branch_true Branch containg primitives, which will be executed when pred is true. then body in ngraph
/// @param branch_false Branch containg primitives, which will be executed when pred is false. else body in ngraph
/// @param output_padding Optional padding for output from primitive.
condition(const primitive_id& id,
const input_info& input,
const topology& topology_true,
const topology& topology_false,
const primitive_id& compare_data,
const cond_functions& func,
const tensor& offset = {0, 0, 0, 0, 0},
const padding& output_padding = padding())
: primitive_base(id, {input}, {output_padding}),
topology_true(topology_true),
topology_false(topology_false),
compare_data(compare_data),
function(func),
offset(offset) {}
const std::vector<input_info>& inputs,
const branch& branch_true,
const branch& branch_false,
const padding& output_padding = padding())
: primitive_base(id, inputs, {output_padding}),
branch_true(branch_true),
branch_false(branch_false) {}
/// @brief An identifier of topology, which will be executed when comparison returns true.
topology topology_true;
/// @brief An identifier of topology, which will be executed when comparison returns false.
topology topology_false;
/// @brief An identifier of primitive which contains compare values.
primitive_id compare_data;
/// @brief Used function during comparison.
cond_functions function;
/// @brief Offset for compare data.
tensor offset;
branch branch_true;
branch branch_false;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {compare_data}; }
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
};
static inline std::ostream& operator<< (std::ostream& os, condition::branch& info) {
os << info.str();
return os;
}
} // namespace cldnn
/// @}
/// @}

View File

@ -3,7 +3,7 @@
//
#include "condition_inst.h"
#include "program_node.h"
#include "intel_gpu/runtime/error_handler.hpp"
#include "json_object.h"
#include "primitive_type_base.h"
@ -12,6 +12,39 @@
namespace cldnn {
GPU_DEFINE_PRIMITIVE_TYPE_ID(condition)
const size_t idx_branch_true = 0;
const size_t idx_branch_false = 1;
static std::map<primitive_id, layout> get_out_layout_map(cldnn::program::ptr prog) {
std::map<primitive_id, layout> out_layout_map;
for (auto& o : prog->get_outputs()) {
out_layout_map.insert({o->id(), o->get_output_layout()});
}
return out_layout_map;
}
static std::map<primitive_id, layout> get_out_layout_map(cldnn::network::ptr net) {
std::map<primitive_id, layout> out_layout_map;
for (auto& o : net->get_outputs()) {
out_layout_map.insert({o->id(), o->get_output_layout()});
}
return out_layout_map;
}
static std::vector<layout> get_output_layouts(std::map<primitive_id, layout>&& outputs, const std::map<size_t, cldnn::primitive_id> &io_output_map) {
std::vector<layout> out_layouts;
for (auto out : outputs) {
for (auto& io_output : io_output_map) {
auto inner_prim_id = io_output.second;
if (out.first == inner_prim_id) {
out_layouts.push_back(out.second);
}
}
}
OPENVINO_ASSERT(out_layouts.size() > 0, "Not found any matched output");
return out_layouts;
}
/*
Calc_output_layout method is called only when output layout is invalidated.
It means, that it is called when:
@ -20,38 +53,155 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(condition)
In this both cases, we need to recalc branch_true and branch_false.
!* We can be sure, that this method was called AT LEAST once during graph compilation.*!
*/
layout condition_inst::calc_output_layout(condition_node const& node, kernel_impl_params const& impl_param) {
assert(static_cast<bool>(impl_param.desc->output_data_types[0]) == false &&
"Output data type forcing is not supported for condition_node!");
node.set_branches();
layout condition_inst::calc_output_layout(condition_node const& /* node */, kernel_impl_params const& impl_param) {
OPENVINO_ASSERT(static_cast<bool>(impl_param.desc->output_data_types[0]) == false, "Output data type forcing is not supported for condition_node!");
OPENVINO_ASSERT(impl_param.get_input_layout(0).count() == 1, "layout of compare_data of condition should be {1,1,1,1}");
auto branch_true_output = node.get_branch_true()->get_outputs();
auto branch_false_output = node.get_branch_false()->get_outputs();
CLDNN_ERROR_NOT_EQUAL(impl_param.desc->id,
"Count of branch true outputs",
branch_true_output.size(),
"expected outputs size",
1,
"Branch true should have one output.");
CLDNN_ERROR_NOT_EQUAL(impl_param.desc->id,
"Count of branch false outputs",
branch_false_output.size(),
"expected outputs size",
1,
"Branch false should have one output.");
OPENVINO_ASSERT(impl_param.inner_progs.size() == 2, "If(Condition) contains incorrect number of inner programs ", impl_param.inner_progs.size());
OPENVINO_ASSERT(impl_param.io_output_maps.size() == 2, "If(Condition) contains incorrect number of io output maps ", impl_param.io_output_maps.size());
auto layouts_true = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_true]), impl_param.io_output_maps[idx_branch_true]);
auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);
auto layout_true = branch_true_output.at(0)->get_output_layout();
auto layout_false = branch_false_output.at(0)->get_output_layout();
CLDNN_ERROR_LAYOUT_MISMATCH(impl_param.desc->id,
"Branch true output layout",
layout_true,
layouts_true[0],
"branch false output layout",
layout_false,
layouts_false[0],
"Layout of the branches should be the same.");
return layout_true;
return layouts_true[0];
}
template <class T>
static bool convert_data(memory::ptr mem, stream& stream) {
mem_lock<T, mem_lock_type::read> lock_data{mem, stream};
return (static_cast<float>(*lock_data.data()) != 0.f);
}
bool condition_inst::get_pred_from_memory(memory::ptr mem, stream& stream) {
auto mem_dt = mem->get_layout().data_type;
switch (mem_dt) {
case cldnn::data_types::f32:
return convert_data<float>(mem, stream);
case cldnn::data_types::f16:
return convert_data<half_t>(mem, stream);
case cldnn::data_types::i64:
return convert_data<int64_t>(mem, stream);
case cldnn::data_types::i32:
return convert_data<int32_t>(mem, stream);
case cldnn::data_types::i8:
return convert_data<int8_t>(mem, stream);
case cldnn::data_types::u8:
return convert_data<uint8_t>(mem, stream);
case cldnn::data_types::bin:
default:
return convert_data<uint32_t>(mem, stream);
}
}
static ov::PartialShape resolve_shape(const ov::PartialShape& true_pshape, const ov::PartialShape& false_pshape) {
// true_pshape - shape of output from then_body
// false_pshape - shape of output from else_body
auto then_rank = true_pshape.rank();
auto else_rank = false_pshape.rank();
// if rangs of shapes are not equal or rang of one of them is dynamic function
// return shape with dynamic rank
if (then_rank.is_dynamic() || else_rank.is_dynamic()) {
return ov::PartialShape::dynamic();
}
if (then_rank.get_length() != else_rank.get_length()) {
// Union of scalar and 1D case
if (then_rank.get_length() <= 1 && else_rank.get_length() <= 1) {
return ov::PartialShape::dynamic(1);
} else {
return ov::PartialShape::dynamic();
}
}
std::vector<ov::Dimension> new_dims;
// If rangs are equal each dimesion of then_body output is union with each dimension of
// else_body
for (auto then_it = true_pshape.cbegin(), else_it = false_pshape.cbegin(); then_it != true_pshape.cend();
then_it++, else_it++) {
if ((*then_it).is_dynamic() || (*else_it).is_dynamic()) {
new_dims.push_back(ov::Dimension::dynamic());
} else if (*then_it == *else_it) {
new_dims.emplace_back(*then_it);
} else {
auto dim_min = std::min((*then_it).get_min_length(), (*else_it).get_min_length());
auto dim_max = std::max((*then_it).get_min_length(), (*else_it).get_min_length());
new_dims.emplace_back(dim_min, dim_max);
}
}
return ov::PartialShape(new_dims);
}
static std::vector<layout> resolve_shape(std::vector<layout>& target_list, std::vector<layout>& other_list) {
std::vector<layout> resolved_layout;
for (size_t i = 0; i < target_list.size(); i++) {
auto target = target_list[i];
auto other = other_list[i];
auto target_pshape = target.get_partial_shape();
auto other_pshape = other.get_partial_shape();
auto target_rank = target_pshape.rank();
auto other_rank = other_pshape.rank();
if (target_rank.get_length() == 0 && other_rank.get_length() == 1) {
resolved_layout.push_back({ov::PartialShape{1}, target.data_type, target.format});
} else {
resolved_layout.push_back(target);
}
}
return resolved_layout;
}
template<typename ShapeType>
std::vector<layout> condition_inst::calc_output_layouts(condition_node const& /* node */, kernel_impl_params const& impl_param) {
if (impl_param.inner_nets.empty()) {
OPENVINO_ASSERT(impl_param.inner_progs.empty() == false, "The count of inner programs should not be zero");
auto layouts_true = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_true]), impl_param.io_output_maps[idx_branch_true]);
auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);
const size_t num_outputs = impl_param.output_layouts.size();
OPENVINO_ASSERT((num_outputs == layouts_true.size() && num_outputs == layouts_false.size()),
"The number of outputs for each branch should be same!");
std::vector<layout> output_layouts;
for (size_t i = 0; i < num_outputs; i++) {
if (layouts_true[i] == layouts_false[i]) {
output_layouts.push_back(layouts_true[i]);
} else {
OPENVINO_ASSERT(layouts_true[i].data_type == layouts_false[i].data_type, "data type of each branches should be same");
OPENVINO_ASSERT(layouts_true[i].format == layouts_false[i].format, "output format of each branches should be same");
auto out_layout = resolve_shape(layouts_true[i].get_partial_shape(), layouts_false[i].get_partial_shape());
output_layouts.push_back(layout{out_layout, layouts_true[i].data_type, layouts_true[i].format });
}
}
return output_layouts;
} else {
auto layouts_true = get_output_layouts(get_out_layout_map(impl_param.inner_nets[idx_branch_true]), impl_param.io_output_maps[idx_branch_true]);
auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_nets[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);
const size_t num_outputs = impl_param.output_layouts.size();
OPENVINO_ASSERT((num_outputs == layouts_true.size() && num_outputs == layouts_false.size()),
"The number of outputs for each branch should be same!");
auto& memory_deps = impl_param.memory_deps;
OPENVINO_ASSERT(memory_deps.count(0) > 0, "The count of memory deps should not be zero");
auto mem_ptr = memory_deps.at(0);
auto pred = condition_inst::get_pred_from_memory(mem_ptr, impl_param.get_stream());
if (pred) {
return resolve_shape(layouts_true, layouts_false);
} else {
return resolve_shape(layouts_false, layouts_true);
}
}
}
template std::vector<layout> condition_inst::calc_output_layouts<ov::PartialShape>(condition_node const& node, const kernel_impl_params& impl_param);
std::string condition_inst::to_string(condition_node const& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();
@ -69,23 +219,35 @@ Condition primitive is resuing memory with the input.
*/
condition_inst::typed_primitive_inst(network& network, condition_node const& node)
: parent(network, node),
_net_true(network::allocate_network(node.get_program().get_engine(), node.get_branch_true(), true)),
_net_false(network::allocate_network(node.get_program().get_engine(), node.get_branch_false(), true)) {
auto compare_tensor = node.compare().get_output_layout().get_tensor();
auto input_tensor = node.input().get_output_layout().get_tensor();
CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(),
"Compare tensor",
compare_tensor,
"input tensor",
input_tensor,
"Compare primitive is too big.");
_net_true(network::allocate_network(node.get_program().get_engine(), node.get_branch_true().inner_program)),
_net_false(network::allocate_network(node.get_program().get_engine(), node.get_branch_false().inner_program)) {
this->set_inner_networks({_net_true, _net_false});
}
auto compare_with_offster_tensor = compare_tensor + node.offset();
CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(),
"Offset with compare tensor",
compare_with_offster_tensor,
"input tensor",
input_tensor,
"Offset is too big.");
void condition_inst::update_output_layout() {
auto memory_deps = _node->get_const_memory_deps();
for (auto& i : _node->get_shape_infer_dependencies()) {
if (memory_deps.count(i) > 0 || i >= _node->get_dependencies().size()) {
continue;
}
auto dep_id = _node->get_dependency(i).id();
auto dep_mem = _network.get_output_memory(dep_id);
memory_deps.insert({i, dep_mem});
}
_impl_params->memory_deps = memory_deps;
auto new_layouts = _node->type()->calc_output_layouts(*_node, *_impl_params);
if (new_layouts.empty()) {
auto new_layout = _node->type()->calc_output_layout(*_node, *_impl_params);
new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[0], new_layout.data_padding);
_impl_params->output_layouts[0] = new_layout;
} else {
for (size_t i = 0; i != new_layouts.size(); ++i) {
auto new_layout = new_layouts[i];
new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[i], new_layout.data_padding);
_impl_params->output_layouts[i] = new_layout;
}
}
}
} // namespace cldnn

View File

@ -12,6 +12,7 @@
#include "quantize_inst.h"
#include "arg_max_min_inst.h"
#include "fully_connected_inst.h"
#include "condition_inst.h"
#include "program_node.h"
#include <iostream>
@ -72,6 +73,9 @@ void compile_graph::run(program& p) {
if (node->is_dynamic() && !is_planar)
can_select_impl = false;
if (node->is_type<condition>())
can_select_impl = true;
if (can_select_impl) {
tasks.push_back([node, &exception, change_initial_impl, original_impl_type] {
try {

View File

@ -0,0 +1,41 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "pass_manager.h"
#include "program_helpers.h"
#include "loop_inst.h"
#include "condition_inst.h"
#include <iterator>
#include <vector>
#include <memory>
using namespace cldnn;
void update_inner_program_io_map::run(program& p) {
for (auto& node : p.get_processing_order()) {
if (node->is_type<loop>()) {
loop_node& node2 = node->as<loop>();
for (const auto& info : p.get_optimized()) {
if (info.second.size() != 1) {
continue;
}
const primitive_id& old_primitive_id = info.first;
const primitive_id& new_primitive_id = info.second.front();
node2.update_primitive_map(old_primitive_id, new_primitive_id);
node2.update_primitive_map(old_primitive_id, new_primitive_id, false); // update internal id
}
} else if (node->is_type<condition>()) {
condition_node& cond = node->as<condition>();
for (const auto& info : p.get_optimized()) {
if (info.second.size() != 1) {
continue;
}
const primitive_id& old_primitive_id = info.first;
const primitive_id& new_primitive_id = info.second.front();
cond.update_primitive_map(old_primitive_id, new_primitive_id);
}
}
}
}

View File

@ -1,31 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "pass_manager.h"
#include "program_helpers.h"
#include "loop_inst.h"
#include <iterator>
#include <vector>
#include <memory>
using namespace cldnn;
void update_loop_primitive_map::run(program& p) {
for (auto& node : p.get_processing_order()) {
if (!node->is_type<loop>()) {
continue;
}
loop_node& node2 = node->as<loop>();
for (const auto& info : p.get_optimized()) {
if (info.second.size() != 1) {
continue;
}
const primitive_id& old_primitive_id = info.first;
const primitive_id& new_primitive_id = info.second.front();
node2.update_primitive_map(old_primitive_id, new_primitive_id);
node2.update_primitive_map(old_primitive_id, new_primitive_id, false); // update internal id
}
}
}

View File

@ -36,16 +36,34 @@ struct condition_impl : typed_primitive_impl<condition> {
auto ev = instance.get_network().get_stream().create_user_event(false);
set_node_params(instance.get_node());
bool exec_branch = choose_branch_to_exec(instance);
memory::ptr memory_to_copy;
if (exec_branch)
memory_to_copy = execute_branch(instance.get_net_true(), instance.result_id(), instance.input_memory_ptr());
else
memory_to_copy = execute_branch(instance.get_net_false(), instance.result_id(), instance.input_memory_ptr());
// just copy memory
mem_lock<float, mem_lock_type::read> inp_ptr{memory_to_copy, instance.get_network().get_stream()};
mem_lock<float, mem_lock_type::write> out_ptr{instance.output_memory_ptr(), instance.get_network().get_stream()};
std::copy(inp_ptr.begin(), inp_ptr.end(), out_ptr.begin());
auto pred = condition_inst::get_pred_from_memory(instance.pred_memory_ptr(), instance.get_network().get_stream());
network::ptr executed_net = pred? instance.get_net_true() : instance.get_net_false();
auto branch = pred? instance.get_branch_true() : instance.get_branch_false();
// Set input memory of inner network before its execution
for (size_t mem_idx = 0; mem_idx < instance.inputs_memory_count(); mem_idx++) {
const primitive_id& input_external_id = instance.dependencies().at(mem_idx).first->id();
auto iter = branch.input_map.find(input_external_id);
if (iter != branch.input_map.end()) {
const primitive_id& input_internal_id = iter->second;
auto mem_ptr = instance.input_memory_ptr(mem_idx);
executed_net->set_input_data(input_internal_id, mem_ptr);
}
}
executed_net->execute({});
// Update output layout of impl_param in condition_inst
instance.update_output_layout();
// Set output memory of condition_inst to inner network output memory after inner network execution
for (auto out_mem_map : branch.output_map) {
auto out_mem_idx = out_mem_map.first;
auto inner_out_id = out_mem_map.second;
auto mem_ptr = executed_net->get_output(inner_out_id).get_memory();
instance.set_output_memory(mem_ptr, false, out_mem_idx);
}
ev->set();
return ev;
}
@ -58,85 +76,22 @@ struct condition_impl : typed_primitive_impl<condition> {
private:
primitive_id _node_id;
/*
Add functions here.
*/
bool check_condition(const float value_1, const float value_2, const cond_functions& func) const {
switch (func) {
case cond_functions::EQUAL:
return value_1 == value_2;
break;
case cond_functions::GREATER:
return value_1 > value_2;
break;
case cond_functions::LESS:
return value_1 < value_2;
break;
default:
throw("Unknown comparision function for: " + _node_id);
break;
}
}
/*
Loop over memory and check condition.
Returns boolean flag, which says what branch should be executed.
*/
bool choose_branch_to_exec(condition_inst& instance) const {
mem_lock<float, mem_lock_type::read> lock_compare_data{instance.compare_memory_ptr(), instance.get_network().get_stream()};
auto compare_layout = instance.compare_memory().get_layout();
auto compare_ptr = lock_compare_data.begin();
mem_lock<float, mem_lock_type::read> lock_input{instance.input_memory_ptr(), instance.get_network().get_stream()};
auto input_layout = instance.input_memory().get_layout();
auto input_ptr = lock_input.begin();
auto function = instance.argument->function;
auto& offset = instance.argument->offset;
for (auto b = 0; b < compare_layout.batch(); b++) {
for (auto f = 0; f < compare_layout.feature(); f++) {
for (auto z = 0; z < compare_layout.spatial(2); z++) {
for (auto y = 0; y < compare_layout.spatial(1); y++) {
for (auto x = 0; x < compare_layout.spatial(0); x++) {
tensor input_tensor{
batch(b + offset.batch[0]),
feature(f + offset.feature[0]),
spatial(x + offset.spatial[0], y + offset.spatial[1], z + offset.spatial[2], 0) };
auto input_idx = input_layout.get_linear_offset(input_tensor);
tensor compare_tensor{ batch(b), feature(f), spatial(x, y, z, 0) };
auto compare_idx = compare_layout.get_linear_offset(compare_tensor);
if (!check_condition(input_ptr[input_idx], compare_ptr[compare_idx], function))
return false;
}
}
}
}
}
return true;
}
memory::ptr execute_branch(network::ptr branch,
const primitive_id& input_id,
memory::ptr input_memory) const {
branch->set_input_data(input_id, input_memory);
branch->execute({});
return branch->get_outputs().at(0)->output_memory_ptr();
}
};
namespace detail {
attach_condition_common::attach_condition_common() {
implementation_map<condition>::add(impl_types::common, condition_impl::create, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f32, format::yxfb),
});
implementation_map<condition>::add(impl_types::common,
shape_types::dynamic_shape,
condition_impl::create,
{},
{});
implementation_map<condition>::add(impl_types::common, condition_impl::create, {});
}
} // namespace detail
} // namespace common
} // namespace cldnn
// TODO: Change code like cldnn::loop
ASSIGN_TYPE_NAME(cldnn::common::condition_impl)

View File

@ -18,63 +18,43 @@ struct typed_program_node<condition> : public typed_program_node_base<condition>
private:
using parent = typed_program_node_base<condition>;
class branch {
public:
explicit branch(const topology& tpl) : _topology(tpl) {}
void set(const program_node& node) {
add_or_change_input_layout(node);
_program = program::build_program(node.get_program().get_engine(),
_topology,
node.get_program().get_config(),
true); // rebuild program
}
program::ptr get() const { return _program; }
private:
topology _topology;
program::ptr _program = nullptr;
void add_or_change_input_layout(const program_node& node) {
auto layout = node.get_input_layout(0);
auto input_id = node.as<condition>().result_id();
if (_topology.get_primitives().count(input_id) == 0) {
_topology.add_primitive(std::make_shared<input_layout>(input_id, layout));
for (auto& prim : _topology.get_primitives()) {
for (auto& inp : prim.second->input) {
if (inp.pid == node.id())
inp.pid = input_id;
}
}
} else {
_topology.change_input_layout(input_id, layout);
}
}
};
public:
using parent::parent;
typed_program_node(std::shared_ptr<primitive> prim, program& prog)
typed_program_node(std::shared_ptr<condition> prim, program& prog)
: parent(prim, prog),
_branch_true(this->get_primitive()->topology_true),
_branch_false(this->get_primitive()->topology_false) {}
_branch_true(prim->branch_true),
_branch_false(prim->branch_false) {}
program_node& input() const { return get_dependency(0); }
program_node& compare() const { return get_dependency(1); }
cond_functions func() const { return get_primitive()->function; }
tensor offset() const { return get_primitive()->offset; }
void set_branches() const {
_branch_true.set(*this);
_branch_false.set(*this);
condition::branch get_branch_true() const { return _branch_true; }
condition::branch get_branch_false() const { return _branch_false; }
using parent::get_kernel_impl_params;
std::unique_ptr<kernel_impl_params> get_kernel_impl_params(const std::vector<layout>& in_layouts, const std::vector<layout>& out_layouts) const override {
auto params = parent::get_kernel_impl_params(in_layouts, out_layouts);
params->inner_progs = { _branch_true.inner_program, _branch_false.inner_program };
params->io_output_maps = { _branch_true.output_map, _branch_false.output_map };
return params;
}
void update_primitive_map(const primitive_id& prevID, const primitive_id& newID) {
auto replace_external_id = [&](std::map<primitive_id, primitive_id>& input_map, const primitive_id& prevID, const primitive_id& newID) {
auto iter = input_map.find(prevID);
if (iter != input_map.end()) {
primitive_id new_external_id = newID;
primitive_id internal_id = iter->second;
input_map.erase(iter);
input_map.insert({new_external_id, internal_id});
}
};
replace_external_id(_branch_true.input_map, prevID, newID);
replace_external_id(_branch_false.input_map, prevID, newID);
}
program::ptr get_branch_true() const { return _branch_true.get(); }
program::ptr get_branch_false() const { return _branch_false.get(); }
primitive_id result_id() const { return id() + ":result"; }
private:
mutable branch _branch_true;
mutable branch _branch_false;
condition::branch& _branch_true;
condition::branch& _branch_false;
};
using condition_node = typed_program_node<condition>;
@ -85,17 +65,20 @@ class typed_primitive_inst<condition> : public typed_primitive_inst_base<conditi
using parent::parent;
public:
static layout calc_output_layout(condition_node const& node, kernel_impl_params const& impl_param);
template<typename ShapeType>
static std::vector<layout> calc_output_layouts(condition_node const& /*node*/, kernel_impl_params const& impl_param);
static layout calc_output_layout(condition_node const& /* node */, kernel_impl_params const& impl_param);
static std::string to_string(condition_node const& node);
static bool get_pred_from_memory(memory::ptr mem, stream& stream);
typed_primitive_inst(network& network, condition_node const& node);
memory::ptr input_memory_ptr() const { return dep_memory_ptr(0); }
memory::ptr compare_memory_ptr() const { return dep_memory_ptr(1); }
memory& input_memory() const { return dep_memory(0); }
memory& compare_memory() const { return dep_memory(1); }
memory::ptr pred_memory_ptr() const { return dep_memory_ptr(0); }
network::ptr get_net_true() const { return _net_true; }
network::ptr get_net_false() const { return _net_false; }
primitive_id result_id() const { return node->result_id(); }
condition::branch get_branch_true() const { return node->get_branch_true(); }
condition::branch get_branch_false() const { return node->get_branch_false(); }
void update_output_layout();
private:
network::ptr _net_true;

View File

@ -308,7 +308,7 @@ public:
std::vector<primitive_id> output_names_vec(output_names.begin(), output_names.end());
auto config = get_program().get_config();
config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
body_program = program::build_program(get_program().get_engine(), body, config, false, false, true);
body_program = program::build_program(get_program().get_engine(), body, config, get_program().get_task_executor(), false, false, true);
}
const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; }

View File

@ -378,9 +378,9 @@ public:
void run(program& p) override;
};
class update_loop_primitive_map : public base_pass {
class update_inner_program_io_map : public base_pass {
public:
update_loop_primitive_map() : base_pass("update_loop_primitive_map") {}
update_inner_program_io_map() : base_pass("update_inner_program_io_map") {}
private:
void run(program& p) override;

View File

@ -232,7 +232,7 @@ public:
bool is_constant() const { return _is_constant; }
bool needs_completion_event() const { return _needs_completion_event; }
bool has_unfused_subgraph() const { return (_unfused_subgraph != nullptr); }
bool has_inner_networks() const;
void allocate_internal_buffers();
static memory::ptr allocate_output(engine& engine, memory_pool& pool, const program_node& _node,
const kernel_impl_params& impl_params, uint32_t net_id, bool is_internal, size_t idx = 0, bool reset_mem = true, bool is_output_buffer = false);
@ -257,6 +257,9 @@ public:
void set_output_layout(const layout& new_out_lay, size_t idx = 0) {
_impl_params->output_layouts[idx] = new_out_lay;
}
void set_inner_networks(const std::vector<network::ptr> inner_nets) {
_impl_params->inner_nets = inner_nets;
}
#ifdef ENABLE_ONEDNN_FOR_GPU
std::vector<cldnn::fused_primitive_desc_onednn>& get_fused_primitives_onednn() const { return _impl_params->fused_desc_onednn; }
#endif // ENABLE_ONEDNN_FOR_GPU

View File

@ -16,6 +16,8 @@ class typed_primitive_inst<roi_align> : public typed_primitive_inst_base<roi_ali
using parent::parent;
public:
template<typename ShapeType>
static std::vector<layout> calc_output_layouts(roi_align_node const& node, kernel_impl_params const& impl_param);
static layout calc_output_layout(roi_align_node const& node, kernel_impl_params const& impl_param);
static std::string to_string(roi_align_node const& node);

View File

@ -13,6 +13,7 @@
#include "reshape_inst.h"
#include "arg_max_min_inst.h"
#include "shape_of_inst.h"
#include "condition_inst.h"
#include <sstream>
#include "gemm_inst.h"
@ -1410,6 +1411,8 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
preferred_impl = _forcing_map.at(node.id()).second;
} else if (node.is_type<condition>()) {
preferred_impl = impl_types::common;
} else if (node.is_type<detection_output>()) {
const auto& program = node.get_program();
const auto& device_info = program.get_engine().get_device_info();

View File

@ -342,8 +342,9 @@ network::network(program::ptr program, const ExecutionConfig& config, stream::pt
network::network(engine& engine,
const topology& topo,
const ExecutionConfig& config,
bool is_internal)
: network(program::build_program(engine, topo, config, is_internal), config, engine.create_stream(config), is_internal) {}
bool is_internal,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor)
: network(program::build_program(engine, topo, config, task_executor, is_internal), config, engine.create_stream(config), is_internal) {}
network::network(engine& engine,
const std::set<std::shared_ptr<program_node>>& nodes,
@ -653,8 +654,9 @@ network::ptr network::allocate_network(engine& engine, program::ptr program, boo
network::ptr network::build_network(engine& engine,
const topology& topology,
const ExecutionConfig& config,
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
bool is_internal) {
return std::make_shared<network>(engine, topology, config, is_internal);
return std::make_shared<network>(engine, topology, config, is_internal, task_executor);
}
network::ptr network::build_network(engine& engine,

View File

@ -21,6 +21,7 @@
#include "gemm_inst.h"
#include "assign_inst.h"
#include "read_value_inst.h"
#include "condition_inst.h"
#include "experimental_detectron_roi_feature_extractor_inst.hpp"
#include "compilation_context.hpp"
#include "implementation_map.hpp"
@ -619,6 +620,10 @@ void primitive_inst::do_runtime_in_place_concat() {
GPU_DEBUG_TRACE_DETAIL << "[In place concat] " << concat_inst->id() << ": can_be_optimized " << std::endl;
}
bool primitive_inst::has_inner_networks() const {
return (_impl_params->inner_nets.size() > 0);
}
event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
const auto primitive_id = id();
OPENVINO_ASSERT(_has_valid_input, primitive_id, " has invalid/unset input");
@ -626,7 +631,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
bool need_args_update = false;
std::vector<event::ptr> dependencies;
if (is_dynamic()) {
if (is_dynamic() && !has_inner_networks()) {
do_runtime_in_place_concat();
OPENVINO_ASSERT(_node != nullptr, "[GPU] Invalid primitive_inst object for dynamic shapes case: program_node can't be null");
update_shape();
@ -679,11 +684,11 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
dependencies.push_back(ev_reset);
}
}
OPENVINO_ASSERT(_impl_params->get_output_layout().is_static(),
"[GPU] Can't execute ", primitive_id, " primitive as output layout is dynamic in runtime");
}
update_shape_done_by_other = false; // reset
OPENVINO_ASSERT(_impl_params->get_output_layout().is_static(),
"[GPU] Can't execute ", primitive_id, " primitive as output layout is dynamic in runtime");
OPENVINO_ASSERT(_impl != nullptr, "[GPU] Implementation is nullptr for ", primitive_id, " primitive");
// Output buffer may be changed under the following conditions, so we need to set args to kernel on each iteration
@ -1253,7 +1258,7 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() {
ov::intel_gpu::allow_static_input_reorder(true),
ov::intel_gpu::allow_new_shape_infer(true)
};
auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, true, false);
auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, get_network().get_program()->get_task_executor(), true, false);
_unfused_subgraph = network::allocate_network(get_network().get_stream_ptr(), prog, true, get_network().is_primary_stream());
}

View File

@ -66,6 +66,7 @@
#include "loop_inst.h"
#include "reverse_inst.h"
#include "unique_inst.hpp"
#include "condition_inst.h"
#include "to_string_utils.h"
// TODO: Remove once we have interface for kernels cache
@ -103,15 +104,58 @@
using namespace cldnn;
using namespace ov::intel_gpu;
static void adjust_num_cores(InferenceEngine::CPUStreamsExecutor::Config& config) {
if (InferenceEngine::getAvailableCoresTypes().size() == 1) {
return;
}
const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores();
const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true);
const auto total_num_little_cores = total_num_cores - total_num_big_cores;
auto core_type = config._threadPreferredCoreType;
int num_cores = total_num_cores;
if (core_type == InferenceEngine::IStreamsExecutor::Config::BIG) {
num_cores = total_num_big_cores;
} else if (core_type == InferenceEngine::IStreamsExecutor::Config::LITTLE) {
num_cores = total_num_little_cores;
}
config._streams = std::min(config._streams, num_cores);
}
static InferenceEngine::CPUStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags) {
InferenceEngine::CPUStreamsExecutor::Config task_executor_config(tags, 1);
task_executor_config._streams = config.get_property(ov::compilation_num_threads);
auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority);
switch (priority) {
case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::LITTLE; break;
case ov::hint::Priority::MEDIUM: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::ANY; break;
case ov::hint::Priority::HIGH: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::BIG; break;
default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
}
adjust_num_cores(task_executor_config);
return task_executor_config;
}
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> program::make_task_executor(const ExecutionConfig& config) {
InferenceEngine::CPUStreamsExecutor::Config task_executor_config = make_task_executor_config(config, "CPU Tasks executor for GPU plugin");
return std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
}
program::program(engine& engine_ref,
topology const& topology,
const ExecutionConfig& config,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
bool is_internal,
bool no_optimizations,
bool is_body_program)
: _engine(engine_ref),
_stream(_engine.create_stream(config)),
_config(config),
_task_executor(task_executor),
processing_order(),
is_body_program(is_body_program) {
_config.apply_user_properties(_engine.get_device_info());
@ -162,7 +206,8 @@ void program::init_program() {
pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
_task_executor = make_task_executor(_config);
if (_task_executor == nullptr)
_task_executor = program::make_task_executor(_config);
_kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, _config, prog_id, _task_executor,
kernel_selector::KernelBase::get_db().get_batch_header_str()));
@ -194,58 +239,27 @@ void program::init_primitives() {
}
}
static void adjust_num_cores(InferenceEngine::CPUStreamsExecutor::Config& config) {
if (InferenceEngine::getAvailableCoresTypes().size() == 1) {
return;
}
const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores();
const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true);
const auto total_num_little_cores = total_num_cores - total_num_big_cores;
auto core_type = config._threadPreferredCoreType;
int num_cores = total_num_cores;
if (core_type == InferenceEngine::IStreamsExecutor::Config::BIG) {
num_cores = total_num_big_cores;
} else if (core_type == InferenceEngine::IStreamsExecutor::Config::LITTLE) {
num_cores = total_num_little_cores;
}
config._streams = std::min(config._streams, num_cores);
}
InferenceEngine::CPUStreamsExecutor::Config program::make_task_executor_config(const ExecutionConfig& config, std::string tags) const {
InferenceEngine::CPUStreamsExecutor::Config task_executor_config(tags, 1);
task_executor_config._streams = config.get_property(ov::compilation_num_threads);
auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority);
switch (priority) {
case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::LITTLE; break;
case ov::hint::Priority::MEDIUM: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::ANY; break;
case ov::hint::Priority::HIGH: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::BIG; break;
default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
}
adjust_num_cores(task_executor_config);
return task_executor_config;
}
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> program::make_task_executor(const ExecutionConfig& config) const {
InferenceEngine::CPUStreamsExecutor::Config task_executor_config = make_task_executor_config(config, "CPU Tasks executor for GPU plugin");
return std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
}
kernels_cache& program::get_kernels_cache() const {
return *_kernels_cache;
}
program::ptr program::build_program(engine& engine,
const topology& topology,
const ExecutionConfig& config,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
bool is_internal,
bool no_optimizations,
bool is_body_program) {
return std::make_shared<program>(engine, topology, config, task_executor, is_internal, no_optimizations, is_body_program);
}
program::ptr program::build_program(engine& engine,
const topology& topology,
const ExecutionConfig& config,
bool is_internal,
bool no_optimizations,
bool is_body_program) {
return std::make_shared<program>(engine, topology, config, is_internal, no_optimizations, is_body_program);
return std::make_shared<program>(engine, topology, config, nullptr, is_internal, no_optimizations, is_body_program);
}
program::ptr program::build_program(engine& engine,
@ -597,8 +611,8 @@ void program::post_optimize_graph(bool is_internal) {
if (_config.get_property(ov::intel_gpu::optimize_data))
apply_opt_pass<remove_redundant_reorders>(lo, false, true, true); // pass to remove output reorders while all others graph optimizations were done
// update loop input/output primitive mappings
apply_opt_pass<update_loop_primitive_map>();
// update inner program input/output primitive mappings
apply_opt_pass<update_inner_program_io_map>();
// Recalculate processing order after all graph transformation to keep optimal primitives ordering
// for OOO queue
@ -1007,12 +1021,18 @@ bool program::extract(program_node& node) {
if (user->is_type<loop>()) {
loop_node& loop = *user;
loop.update_primitive_map(node.id(), input.id());
} else if (user->is_type<condition>()) {
condition_node& cond = *user;
cond.update_primitive_map(node.id(), input.id());
}
for (auto& dep : node.dependencies) {
if (dep.first->is_type<loop>()) {
loop_node& loop = *dep.first;
loop.update_primitive_map(node.id(), user->id());
} else if (dep.first->is_type<condition>()) {
condition_node& cond = *dep.first;
cond.update_primitive_map(node.id(), user->id());
}
}
}

View File

@ -25,6 +25,19 @@ layout roi_align_inst::calc_output_layout(roi_align_node const& node, kernel_imp
{num_rois, num_channels, primitive->pooled_h, primitive->pooled_w});
}
template<typename ShapeType>
std::vector<layout> roi_align_inst::calc_output_layouts(roi_align_node const& node, kernel_impl_params const& impl_param) {
auto primitive = impl_param.typed_desc<roi_align>();
auto input_layout = impl_param.get_input_layout(0);
auto rois_layout = impl_param.get_input_layout(1);
auto num_rois = rois_layout.get_partial_shape()[0];
auto num_channels = input_layout.get_partial_shape()[1];
return {layout({num_rois, num_channels, primitive->pooled_h, primitive->pooled_w}, input_layout.data_type, input_layout.format) };
}
template
std::vector<layout> roi_align_inst::calc_output_layouts<ov::PartialShape>(roi_align_node const& node, const kernel_impl_params& impl_param);
std::string roi_align_inst::to_string(roi_align_node const& node) {
auto node_info = node.desc_to_json();
json_composite roi_align_info;

View File

@ -0,0 +1,91 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/plugin/program.hpp"
#include "ngraph/op/if.hpp"
#include "ie_ngraph_utils.hpp"
#include "intel_gpu/primitives/condition.hpp"
namespace ov {
namespace intel_gpu {
const size_t idx_true = 0;
const size_t idx_false = 1;
static cldnn::condition::branch gen_branch(Program& p, const std::shared_ptr<ngraph::op::v8::If>& op, size_t idx) {
cldnn::condition::branch branch;
const auto& internal_body = (idx == idx_true)? op->get_then_body() : op->get_else_body();
InferenceEngine::CNNNetwork body_network(internal_body);
{
// CNNNetwork change the input/output data type to fp32 when input/output data type is fp16
// To run internal body, rollback input/output data to original one.
size_t tidx = 0;
auto& model_inputs = internal_body->get_parameters();
for (auto& in : body_network.getInputsInfo()) {
auto input_data_type = InferenceEngine::details::convertPrecision(model_inputs[tidx++]->get_output_tensor(0).get_element_type());
if (in.second->getPrecision() != input_data_type)
in.second->setPrecision(input_data_type);
}
tidx = 0;
for (auto& out : body_network.getOutputsInfo()) {
const auto& model_output = internal_body->get_output_op(tidx++);
auto output_data_type = InferenceEngine::details::convertPrecision(model_output->get_output_tensor(0).get_element_type());
if (out.second->getPrecision() != output_data_type)
out.second->setPrecision(output_data_type);
}
}
auto config = p.get_config();
config.set_property(ov::intel_gpu::max_dynamic_batch(1));
config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic()));
Program prog(body_network, p.get_engine(), config, false, false, nullptr, nullptr, p.get_task_executor(), true);
branch.inner_program = prog.GetCompiledProgram();
auto& input_map = branch.input_map;
auto external_inputs = p.GetInputInfo(op);
auto internal_inputs = internal_body->get_parameters();
auto input_desc_vec = op->get_input_descriptions(static_cast<int>(idx));
for (auto& in_desc : input_desc_vec) {
const auto& external_id = external_inputs.at(in_desc->m_input_index).pid;
const auto& internal_id = layer_type_name_ID(internal_inputs.at(in_desc->m_body_parameter_index));
input_map.insert({external_id, internal_id});
}
auto& output_map = branch.output_map;
auto internal_outputs = internal_body->get_results();
auto output_desc_vec = op->get_output_descriptions(static_cast<int>(idx));
for (auto& out_desc : output_desc_vec) {
const auto& internal_id = layer_type_name_ID(internal_outputs.at(out_desc->m_body_value_index));
output_map.insert({out_desc->m_output_index, internal_id});
}
return branch;
}
static void CreateIfOp(Program& p, const std::shared_ptr<ngraph::op::v8::If>& op) {
auto inputs = p.GetInputInfo(op);
OPENVINO_ASSERT(inputs.size() >= 1, "Invalid inputs count (Not allowed no input)");
auto compare_node_pshape = op->get_input_partial_shape(0);
auto p_input_name = inputs[0].pid;
std::string type_name_str = op->get_input_node_ptr(0)->get_type_name();
const std::string layerName = layer_type_name_ID(op);
auto branch_true = gen_branch(p, op, idx_true);
auto branch_false = gen_branch(p, op, idx_false);
const cldnn::condition conditionPrimitive(layerName,
inputs,
branch_true,
branch_false);
p.add_primitive(*op, conditionPrimitive);
}
REGISTER_FACTORY_IMPL(v8, If);
} // namespace intel_gpu
} // namespace ov

View File

@ -16,6 +16,8 @@
#include "intel_gpu/primitives/mutable_data.hpp"
#include "intel_gpu/primitives/data.hpp"
#include <ie_system_conf.h>
#ifdef __linux__
# include <dlfcn.h>
#endif
@ -121,11 +123,15 @@ bool Program::IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
bool createTopologyOnly, bool partialBuild,
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor, bool innerProgram)
: m_curBatch(-1)
, m_config(config)
, m_engine(engine)
, queryMode(false) {
, queryMode(false)
, m_task_executor(task_executor) {
if (m_task_executor == nullptr)
m_task_executor = cldnn::program::make_task_executor(m_config);
// Extract inputs/outputs info from CNNNetwork
auto networkInputs = (inputs != nullptr) ? *inputs : network.getInputsInfo();
auto networkOutputs = (outputs != nullptr) ? *outputs : network.getOutputsInfo();
@ -179,7 +185,8 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co
int m_bv_sz = GetMaxBatchSizeForSingleProgram();
m_max_batch = static_cast<int>(m_config.get_property(ov::intel_gpu::max_dynamic_batch));
if (dyn_shape_batch_found || m_max_batch > 1) {
// Do not apply dynamic batch for inner program (only single batch is allowed)
if (!innerProgram && (dyn_shape_batch_found || m_max_batch > 1)) {
// compile log2 networks to serve dynamic batch requests
for (int b = m_bv_sz - 1; b >= 0; b--) {
inputLayouts.clear();
@ -290,7 +297,7 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co
m_input_batch_dim = batch_dim;
}
} else {
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild, innerProgram));
}
}
@ -301,6 +308,7 @@ Program::Program(cldnn::engine& engine, const ExecutionConfig& config,
, m_config(config)
, m_engine(engine)
, queryMode(false) {
m_task_executor = cldnn::program::make_task_executor(m_config);
if (inputs != nullptr)
m_networkInputs = *inputs;
if (outputs != nullptr)
@ -356,9 +364,11 @@ void Program::CleanupBuild() {
std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::shared_ptr<ngraph::Node>>& ops,
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
bool createTopologyOnly, bool partialBuild) {
bool createTopologyOnly, bool partialBuild, bool innerProgram) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::BuildProgram");
// std::cout << "BuildProgram " << createTopologyOnly << ", " << partialBuild << ", " << innerProgram << std::endl;
// In the case of inner program, allow_new_shape_infer flag is setted by outside of program.
// So, do not check allow_new_shape_infer for inner program build
for (const auto& op : ops) {
if (requires_new_shape_infer(*op)) {
allow_new_shape_infer = true;
@ -366,6 +376,10 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
}
}
if (innerProgram) {
allow_new_shape_infer = (m_config.get_property(ov::intel_gpu::allow_new_shape_infer) || allow_new_shape_infer);
}
m_config.set_property(ov::intel_gpu::partial_build_program(partialBuild));
m_config.set_property(ov::intel_gpu::optimize_data(true));
m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer));
@ -383,7 +397,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateProgram");
cldnn::program::ptr program;
try {
program = cldnn::program::build_program(m_engine, *m_topology, m_config);
program = cldnn::program::build_program(m_engine, *m_topology, m_config, get_task_executor());
} catch (std::exception& e) {
OPENVINO_ASSERT(false, "GPU program build failed!\n", e.what());
}

View File

@ -0,0 +1,696 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <tuple>
#include <string>
#include <vector>
#include <memory>
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "common_test_utils/test_constants.hpp"
#include "shared_test_classes/base/utils/ranges.hpp"
#include <common_test_utils/ov_tensor_utils.hpp>
using namespace InferenceEngine;
using namespace ov::test;
namespace GPULayerTestsDefinitions {
class InnerBodyGenerator {
public:
using ptr = std::shared_ptr<InnerBodyGenerator>;
enum InnerBodyType {
/**
* Simple inner body with single constant value
*/
Type01 = 1,
/**
* Inner body with eltwise sum
*/
Type02 = 2,
/**
* Inner body with eltwise multiply
*/
Type03 = 3,
/**
* Inner body with eltwise sum and pooling
* output shape is different with type02 and type03 for same input shape
*/
Type04 = 4,
/**
* Inner body with nested condition case
*/
Type05 = 5
};
public:
InnerBodyGenerator() { }
virtual std::shared_ptr<ngraph::Function> get_function() { return _func; }
virtual std::shared_ptr<ngraph::opset9::Parameter> get_input() { return _param; }
virtual std::shared_ptr<ngraph::opset1::Result> get_result() { return _result; }
// virtual void create_body(ngraph::Shape input_shape, ngraph::element::Type prc) {
virtual void create_body(ov::PartialShape& input_shape, ngraph::element::Type prc) {
_func = generate(input_shape, prc);
_param = (_func->get_parameters().size() > 0)? _func->get_parameters().front() : nullptr;
_result = _func->get_results().front();
}
protected:
virtual std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) = 0;
std::shared_ptr<ngraph::Function> _func;
std::shared_ptr<ngraph::opset9::Parameter> _param;
std::shared_ptr<ngraph::opset1::Result> _result;
};
class InnerBodyType01 : public InnerBodyGenerator {
protected:
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
auto constantA = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 2), {2.0f});
constantA->set_friendly_name("body1_constantA");
auto constantB = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 2), {12.0f});
constantB->set_friendly_name("body1_constantB");
auto add = std::make_shared<ngraph::opset9::Add>(constantA, constantB);
add->set_friendly_name("body1_add");
auto result = std::make_shared<ngraph::opset1::Result>(add);
auto o_layout = result->get_layout();
result->set_friendly_name("body1_result");
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {result},
ngraph::ParameterVector{},
"constant");
return body;
}
};
class InnerBodyType02 : public InnerBodyGenerator {
protected:
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
auto constant = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 10.0f);
constant->set_friendly_name("body2_const");
auto data = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
data->set_friendly_name("body2_data");
auto sum = std::make_shared<ngraph::opset9::Multiply>(data, constant);
sum->set_friendly_name("body2_mul");
auto result = std::make_shared<ngraph::opset1::Result>(sum);
result->set_friendly_name("body2_result");
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {result},
ngraph::ParameterVector{data},
"eltwise_mul");
return body;
}
};
class InnerBodyType03 : public InnerBodyGenerator {
protected:
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
auto constant = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
constant->set_friendly_name("body3_constant");
auto data = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
data->set_friendly_name("body3_data");
auto add = std::make_shared<ngraph::opset9::Add>(data, constant);
add->set_friendly_name("body3_add");
auto result = std::make_shared<ngraph::opset1::Result>(add);
result->set_friendly_name("body3_result");
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {result},
ngraph::ParameterVector{data},
"eltwise_sum");
return body;
}
};
class InnerBodyType04 : public InnerBodyGenerator {
protected:
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
auto scale = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
scale->set_friendly_name("body4_scale");
auto data = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
data->set_friendly_name("body4_data");
auto mul = std::make_shared<ngraph::opset9::Multiply>(data, scale);
mul->set_friendly_name("body4_mul");
auto pooling = generate_pooling(mul, input_shape);
pooling->set_friendly_name("body4_pool");
auto result = std::make_shared<ngraph::opset1::Result>(pooling);
result->set_friendly_name("body4_result");
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {result},
ngraph::ParameterVector{data},
"eltwise_mul_pooling");
return body;
}
struct poolSpecificParams {
ngraph::helpers::PoolingTypes pooling_type; // Pooling type, max or avg
std::vector<size_t> kernel_size; // Kernel size
std::vector<size_t> stride; // Stride
std::vector<size_t> pad_begin; // Pad begin
std::vector<size_t> pad_end; // Pad end
ngraph::op::RoundingType rounding_type; // Rounding type
ngraph::op::PadType pad_type; // Pad type
bool exclued_pad; // Exclude pad
};
std::shared_ptr<ov::Node> generate_pooling(const ngraph::Output<ov::Node> &in, ov::PartialShape& input_shape) {
poolSpecificParams params;
switch (input_shape.rank().get_length()) {
case 5:
{
params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
{2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0},
ngraph::op::RoundingType::CEIL,
ngraph::op::PadType::SAME_LOWER, true };
break;
}
case 4:
{
params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
{2, 2}, {2, 2}, {0, 0}, {0, 0},
ngraph::op::RoundingType::CEIL,
ngraph::op::PadType::SAME_LOWER, true };
break;
}
case 3:
{
params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
{2}, {2}, {0}, {0},
ngraph::op::RoundingType::CEIL,
ngraph::op::PadType::SAME_LOWER, true };
break;
}
default:
{
OPENVINO_ASSERT(false, "Not allowed other rank");
}
}
return ngraph::builder::makePooling(in, params.stride, params.pad_begin,
params.pad_end, params.kernel_size, params.rounding_type,
params.pad_type, params.exclued_pad, params.pooling_type);
}
};
class InnerBodyType05 : public InnerBodyGenerator {
protected:
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
auto constant = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
constant->set_friendly_name("body5_constant");
auto data = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
data->set_friendly_name("body5_data");
auto add = std::make_shared<ngraph::opset9::Add>(data, constant);
add->set_friendly_name("body5_add");
std::vector<int> axes;
for (int i = 0, r = 0; i < input_shape.rank().get_length(); i++) {
axes.push_back(r--);
}
std::vector<size_t> shapeAxes;
shapeAxes.push_back(axes.size());
auto reductionAxesNode = std::dynamic_pointer_cast<ngraph::Node>(
std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes));
const auto reduce = ngraph::builder::makeReduce(add, reductionAxesNode, false, ngraph::helpers::ReductionType::Min);
reduce->set_friendly_name("body5_reduce");
auto constant_ref = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 10.0f);
constant_ref->set_friendly_name("body5_ref_constant");
auto pred = std::make_shared<ngraph::opset3::GreaterEqual>(reduce, constant_ref);
pred->set_friendly_name("nested_pred");
auto nested_body_then_generator = std::make_shared<InnerBodyType03>();
auto nested_body_else_generator = std::make_shared<InnerBodyType04>();
auto nested_input_shape = add->get_output_partial_shape(0);
nested_body_then_generator->create_body(nested_input_shape, prc);
nested_body_else_generator->create_body(nested_input_shape, prc);
nested_body_then_generator->get_function()->set_friendly_name("nested_then_inner_body");
nested_body_else_generator->get_function()->set_friendly_name("nested_else_inner_body");
auto cond_nested = std::make_shared<ngraph::opset8::If>(pred);
cond_nested->set_friendly_name("if_operator_nested");
cond_nested->set_else_body(nested_body_else_generator->get_function());
cond_nested->set_then_body(nested_body_then_generator->get_function());
cond_nested->set_input(add, nested_body_then_generator->get_input(), nested_body_else_generator->get_input());
cond_nested->set_output(nested_body_then_generator->get_result(), nested_body_else_generator->get_result());
auto result = std::make_shared<ngraph::opset1::Result>(cond_nested);
result->set_friendly_name("body5_result");
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {result},
ngraph::ParameterVector{data},
"eltwise_sum");
return body;
}
};
static std::shared_ptr<InnerBodyGenerator> get_inner_body_generator(InnerBodyGenerator::InnerBodyType type) {
std::shared_ptr<InnerBodyGenerator> generator_ptr;
switch (type) {
case InnerBodyGenerator::InnerBodyType::Type01:
{
return std::make_shared<InnerBodyType01>();
}
case InnerBodyGenerator::InnerBodyType::Type02:
{
return std::make_shared<InnerBodyType02>();
}
case InnerBodyGenerator::InnerBodyType::Type03:
{
return std::make_shared<InnerBodyType03>();
}
case InnerBodyGenerator::InnerBodyType::Type04:
{
return std::make_shared<InnerBodyType04>();
}
case InnerBodyGenerator::InnerBodyType::Type05:
{
return std::make_shared<InnerBodyType05>();
}
default:
{
OPENVINO_ASSERT(false, "Not supported type");
}
}
}
class TestModelGenerator {
public:
enum PredicateTypes {
PARAM,
NODE
};
public:
TestModelGenerator(InnerBodyGenerator::InnerBodyType then_body_type,
InnerBodyGenerator::InnerBodyType else_body_type,
PredicateTypes pred_type,
ngraph::element::Type prc,
ov::PartialShape input_shape,
bool cond_execution_value = false) {
body_then_generator = get_inner_body_generator(then_body_type);
body_else_generator = get_inner_body_generator(else_body_type);
body_then_generator->create_body(input_shape, prc);
body_else_generator->create_body(input_shape, prc);
body_else_generator->get_function()->set_friendly_name("else_inner_body");
body_then_generator->get_function()->set_friendly_name("then_inner_body");
ngraph::ParameterVector params{};
auto predicate = create_cond_execution(pred_type, params, ngraph::element::boolean, ngraph::Shape{});
predicate->set_friendly_name("if_predicate");
auto data = create_condition_input(params, prc, input_shape);
data->set_friendly_name("input_data");
auto cond = std::make_shared<ngraph::opset8::If>(predicate);
cond->set_friendly_name("if_operator");
cond->set_else_body(body_else_generator->get_function());
cond->set_then_body(body_then_generator->get_function());
cond->set_input(data, body_then_generator->get_input(), body_else_generator->get_input());
cond->set_output(body_then_generator->get_result(), body_else_generator->get_result());
auto result = std::make_shared<ngraph::opset1::Result>(cond);
result->set_friendly_name("outer_result");
function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
}
std::shared_ptr<ngraph::Function> get_function() { return function; }
private:
std::shared_ptr<ngraph::Node> create_condition_input(ngraph::ParameterVector& params,
const ngraph::element::Type prc, const ov::PartialShape& shape,
int value = 0, bool is_static = false) {
if (is_static)
return std::make_shared<ngraph::opset9::Constant>(prc, shape.to_shape(), value);
auto input = std::make_shared<ngraph::opset9::Parameter>(prc, shape);
params.push_back(input);
return input;
}
std::shared_ptr<ngraph::Node> create_cond_execution(PredicateTypes pred_type,
ngraph::ParameterVector& params,
const ngraph::element::Type prc = ngraph::element::u8,
const ngraph::Shape shape = ngraph::Shape{}) {
std::shared_ptr<ngraph::Node> pred;
switch (pred_type) {
case PredicateTypes::PARAM:
{
pred = create_condition_input(params, prc, shape);
break;
}
case PredicateTypes::NODE:
{
auto param_cond = create_condition_input(params, prc, shape);
param_cond->set_friendly_name("param_cond");
auto const_cond = create_condition_input(params, prc, ngraph::Shape{}, 1, true);
const_cond->set_friendly_name("const_cond");
pred = std::make_shared<ngraph::opset3::GreaterEqual>(param_cond, const_cond);
pred->set_friendly_name("pred");
break;
}
default:
{
OPENVINO_ASSERT(false, "Not supported type");
}
}
return pred;
}
private:
std::shared_ptr<ngraph::Function> function;
InnerBodyGenerator::ptr body_then_generator;
InnerBodyGenerator::ptr body_else_generator;
};
static std::ostream& operator<<(std::ostream& os, const InnerBodyGenerator::InnerBodyType type) {
switch (type) {
case InnerBodyGenerator::InnerBodyType::Type01:
{
os << "Type01";
break;
}
case InnerBodyGenerator::InnerBodyType::Type02:
{
os << "Type02";
break;
}
case InnerBodyGenerator::InnerBodyType::Type03:
{
os << "Type03";
break;
}
case InnerBodyGenerator::InnerBodyType::Type04:
{
os << "Type04";
break;
}
case InnerBodyGenerator::InnerBodyType::Type05:
{
os << "Type05";
break;
}
default:
{
os << "NONE";
break;
}
}
return os;
}
static std::ostream& operator<<(std::ostream& os, const TestModelGenerator::PredicateTypes type) {
switch (type) {
case TestModelGenerator::PredicateTypes::PARAM:
{
os << "PARAM";
break;
}
case TestModelGenerator::PredicateTypes::NODE:
{
os << "NODE";
break;
}
default:
{
os << "NONE";
break;
}
}
return os;
}
using ConditionParams = typename std::tuple<
InferenceEngine::SizeVector, // Shape
InferenceEngine::Precision, // Precision
TestModelGenerator::PredicateTypes, // if predicate type
LayerTestsUtils::TargetDevice // Device name
>;
class StaticConditionLayerGPUTest : public testing::WithParamInterface<ConditionParams>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<ConditionParams>& obj) {
InferenceEngine::SizeVector data_shape;
InferenceEngine::Precision data_prc;
TestModelGenerator::PredicateTypes pred;
std::string targetDevice;
std::tie(data_shape, data_prc, pred, targetDevice) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(data_shape) << "_";
result << "netPRC=" << std::to_string(data_prc) << "_";
result << "ifCond=" << pred << "_";
result << "targetDevice=" << targetDevice << "_";
auto res_str = result.str();
std::replace(res_str.begin(), res_str.end(), '-', '_');
return res_str;
}
protected:
void SetUp() override {
targetDevice = CommonTestUtils::DEVICE_GPU;
TestModelGenerator::PredicateTypes pred;
std::tie(data_shape, data_prc, pred, targetDevice) = GetParam();
const auto ngShape = ov::PartialShape{data_shape};
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc);
TestModelGenerator model_generator(InnerBodyGenerator::InnerBodyType::Type02,
InnerBodyGenerator::InnerBodyType::Type03,
pred,
prc,
ngShape);
function = model_generator.get_function();
}
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
auto tensor_desc = info.getTensorDesc();
auto blob = make_blob_with_precision(tensor_desc);
blob->allocate();
if (tensor_desc.getLayout() == InferenceEngine::SCALAR) {
auto prc = tensor_desc.getPrecision();
auto scalar_1d = CommonTestUtils::make_reshape_view(blob, {1});
if (prc == InferenceEngine::Precision::BOOL) {
auto mem_blob = dynamic_cast<InferenceEngine::MemoryBlob*>(blob.get());
auto mem = mem_blob->rwmap();
auto data_ptr = mem.as<bool*>();
*data_ptr = false;
} else {
CommonTestUtils::fill_data_with_broadcast(scalar_1d, 0, {20.f});
}
} else {
CommonTestUtils::fill_data_with_broadcast(blob, 0, {20.f});
}
return blob;
}
InferenceEngine::SizeVector data_shape;
InferenceEngine::Precision data_prc;
};
TEST_P(StaticConditionLayerGPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
Run();
}
std::vector<InferenceEngine::Precision> netPrecisions_static = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
InferenceEngine::Precision::I8
};
std::vector<InferenceEngine::SizeVector> inputs_shape = {
{3, 6}
};
std::vector<GPULayerTestsDefinitions::TestModelGenerator::PredicateTypes> if_cond_types = {
GPULayerTestsDefinitions::TestModelGenerator::PredicateTypes::PARAM
};
INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_static, StaticConditionLayerGPUTest,
testing::Combine(
testing::ValuesIn(inputs_shape),
testing::ValuesIn(netPrecisions_static),
testing::ValuesIn(if_cond_types),
testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)),
StaticConditionLayerGPUTest::getTestCaseName);
/// Dynamic shape test
struct InnerBodyTypeParams {
InnerBodyGenerator::InnerBodyType then_body_type;
InnerBodyGenerator::InnerBodyType else_body_type;
};
using ConditionGPUParams = typename std::tuple<
InputShape, // Input Shapes
InnerBodyTypeParams, // Inner body type
InferenceEngine::Precision, // Precision
TestModelGenerator::PredicateTypes, // if predicate type
LayerTestsUtils::TargetDevice // Device name
>;
class DynamicConditionLayerGPUTest : public testing::WithParamInterface<ConditionGPUParams>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<ConditionGPUParams>& obj) {
InputShape inputShapes;
InnerBodyTypeParams bodyParams;
InferenceEngine::Precision dataPrc;
TestModelGenerator::PredicateTypes condType;
std::string targetDevice;
std::tie(inputShapes, bodyParams, dataPrc, condType, targetDevice) = obj.param;
std::ostringstream result;
result << "IS=(";
result << CommonTestUtils::partialShape2str({inputShapes.first}) << "_";
for (size_t i = 0lu; i < inputShapes.second.size(); i++) {
result << "{";
result << CommonTestUtils::vec2str(inputShapes.second[i]) << "_";
result << "}_";
}
result << ")_";
result << "innerBody={" << bodyParams.then_body_type << ", " << bodyParams.else_body_type << "}_";
result << "netPRC=" << dataPrc << "_";
result << "ifCond=" << condType << "_";
result << "targetDevice=" << targetDevice << "_";
auto res_str = result.str();
std::replace(res_str.begin(), res_str.end(), '-', '_');
return res_str;
}
protected:
void SetUp() override {
InputShape inputShapes;
InnerBodyTypeParams bodyParams;
InferenceEngine::Precision dataPrc;
TestModelGenerator::PredicateTypes condType;
std::tie(inputShapes, bodyParams, dataPrc, condType, targetDevice) = GetParam();
auto num_second = inputShapes.second.size();
std::vector<ov::Shape> condSecondVec;
for (size_t i = 0; i < num_second; i++) {
condSecondVec.push_back({});
}
auto condShapes = ov::test::InputShape(ov::PartialShape({}), condSecondVec);
init_input_shapes({condShapes, inputShapes});
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrc);
TestModelGenerator model_generator(bodyParams.then_body_type,
bodyParams.else_body_type,
condType,
prc,
inputShapes.first);
function = model_generator.get_function();
function->set_friendly_name("if_operator_outer");
}
/**
* @brief Override generate_inputs to support boolean param for if(condition) operator.
*
* @param targetInputStaticShapes
*/
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
ov::Shape input_shape;
for (auto& shape : targetInputStaticShapes) {
if (shape.size() > 1) {
input_shape = shape;
break;
}
}
inputs.clear();
for (const auto &param : function->get_parameters()) {
if (param->get_output_element_type(0) == ov::element::boolean) {
auto tensor = ov::Tensor{ov::element::boolean, {}};
auto p_data = tensor.data<ov::element_type_traits<ov::element::boolean>::value_type>();
p_data[0] = (niter++ % 2);
inputs.insert({param, tensor});
} else {
ov::test::utils::InputGenerateData inGenData;
inGenData.range = 10;
inGenData.start_from = 0;
inGenData.resolution = 128;
inGenData.seed = 1;
auto tensor = ov::test::utils::create_and_fill_tensor(param->get_element_type(), input_shape, inGenData.range,
inGenData.start_from, inGenData.resolution, inGenData.seed);
inputs.insert({param, tensor});
}
}
}
size_t niter = 0;
};
TEST_P(DynamicConditionLayerGPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
}
const std::vector<InferenceEngine::Precision> netPrecisions_f32 = {
InferenceEngine::Precision::FP32
};
const std::vector<InferenceEngine::Precision> netPrecisions_f16 = {
InferenceEngine::Precision::FP16
};
const std::vector<ov::test::InputShape> dynamicInputShapes_f32 = {
ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{4, 1, 1, 64, 32}, {6, 1, 1, 8, 4}, {8, 1, 1, 24, 16}}),
ov::test::InputShape(ov::PartialShape({1, 1, -1, -1}), {{1, 1, 64, 32}, {1, 1, 8, 4}, {1, 1, 24, 16}})
};
const std::vector<ov::test::InputShape> dynamicInputShapes_f16 = {
ov::test::InputShape(ov::PartialShape({1, 1, -1, -1}), {{1, 1, 64, 32}, {1, 1, 8, 4}, {1, 1, 24, 16}}),
ov::test::InputShape(ov::PartialShape({-1, -1, -1}), {{2, 24, 16}, {2, 64, 32}, {2, 8, 4}})
};
const std::vector<InnerBodyTypeParams> innerBodyTypes_f32 = {
{
InnerBodyGenerator::InnerBodyType::Type01,
InnerBodyGenerator::InnerBodyType::Type02
},
{
InnerBodyGenerator::InnerBodyType::Type02,
InnerBodyGenerator::InnerBodyType::Type03
}
};
const std::vector<InnerBodyTypeParams> innerBodyTypes_f16 = {
{
InnerBodyGenerator::InnerBodyType::Type04,
InnerBodyGenerator::InnerBodyType::Type03
},
{
InnerBodyGenerator::InnerBodyType::Type02,
InnerBodyGenerator::InnerBodyType::Type05
}
};
const std::vector<TestModelGenerator::PredicateTypes> condTypes = {
TestModelGenerator::PredicateTypes::PARAM,
TestModelGenerator::PredicateTypes::NODE
};
INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f32, DynamicConditionLayerGPUTest,
testing::Combine(
testing::ValuesIn(dynamicInputShapes_f32), // input shapes
testing::ValuesIn(innerBodyTypes_f32), // inner body type
testing::ValuesIn(netPrecisions_f32), // network precision
testing::ValuesIn(condTypes), // cond type
testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)), // device type
DynamicConditionLayerGPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f16, DynamicConditionLayerGPUTest,
testing::Combine(
testing::ValuesIn(dynamicInputShapes_f16), // input shapes
testing::ValuesIn(innerBodyTypes_f16), // inner body type
testing::ValuesIn(netPrecisions_f16), // network precision
testing::ValuesIn(condTypes), // cond type
testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)), // device type
DynamicConditionLayerGPUTest::getTestCaseName);
} // namespace GPULayerTestsDefinitions

View File

@ -18,138 +18,151 @@ using namespace cldnn;
using namespace ::tests;
namespace {
bool is_output_equal(const cldnn::memory::ptr mem, const std::vector<float>& ref)
template <class T>
bool is_output_equal(const cldnn::memory::ptr mem, const std::vector<T>& ref)
{
cldnn::mem_lock<float> ptr(mem, get_test_stream());
cldnn::mem_lock<T> ptr(mem, get_test_stream());
for (size_t i = 0; i < mem->get_layout().count(); i++) {
if (!are_equal(ptr[i], ref[i])) return false;
}
return true;
}
topology generate_simple_branch (bool branch_true_false, const primitive_id& input_id)
topology generate_simple_branch (bool branch_true_false, const primitive_id& id, const primitive_id& input_id, const data_types dt = data_types::f32)
{
topology branch;
if (branch_true_false) {
branch.add(
pooling(input_id + "_when_true", input_id, cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
input_layout(input_id, { dt, format::bfyx,{ 1, 1, 4, 1 } }),
pooling(id + "_when_true", input_id, cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 })
);
} else {
branch.add(
pooling(input_id + "_when_false", input_id, cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
input_layout(input_id, { dt, format::bfyx,{ 1, 1, 4, 1 } }),
pooling(id + "_when_false", input_id, cldnn::pooling_mode::average, { 1, 2 }, { 1, 2 })
);
}
return branch;
}
std::pair<std::vector<float>, std::vector<float>> get_values_to_compare(const cldnn::tensor& offset,
const cldnn::tensor& range,
const std::vector<float>& values,
const cldnn::layout& input_lay,
const cond_functions& func) {
std::vector<float> ret_true;
std::vector<float> ret_false;
auto mem_desc = generic_test::get_linear_memory_desc(input_lay);
for (int32_t b = 0; b < range.batch[0]; b++) {
for (int32_t f = 0; f < range.feature[0]; f++) {
for (int32_t y = 0; y < range.spatial[1]; y++) {
for (int32_t x = 0; x < range.spatial[0]; x++) {
auto linear_idx = generic_test::get_linear_index(
input_lay,
offset.batch[0] + b,
offset.feature[0] + f,
offset.spatial[1] + y,
offset.spatial[0] + x,
mem_desc);
switch (func) {
case cond_functions::EQUAL:
ret_true.push_back(values.at(linear_idx));
ret_false.push_back(-1.0f);
break;
case cond_functions::GREATER:
ret_true.push_back(values.at(linear_idx) - 1.0f);
ret_false.push_back(99.0f);
break;
case cond_functions::LESS:
ret_true.push_back(values.at(linear_idx) + 1.0f);
ret_false.push_back(-1.0f);
break;
}
}
}
}
}
return { ret_true, ret_false };
}
} // namespace
TEST(DISABLED_condition_gpu, basic_equal_comp) {
auto& engine = get_test_engine();
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
template < typename DataType>
struct condition_data_types {
using type = DataType;
static const data_types data_type = type_to_data_type<DataType>::value;
};
topology branch_true = generate_simple_branch(true, "condi");
topology branch_false = generate_simple_branch(false, "condi");
template <typename ConditionDataType>
class condition_gpu_basic_test : public ::testing::Test {
public:
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
);
topology.add(
input_layout("scale_data", scale_mem->get_layout())
);
topology.add(
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
);
topology.add(
eltwise("output", { input_info("condi"), input_info("scale_data") }, eltwise_mode::prod)
);
using input_type = typename ConditionDataType::type;
std::vector<input_type> convert_data(std::vector<int> in_vec) {
const size_t vec_size = in_vec.size();
std::vector<input_type> converted_data_vec(vec_size);
for (size_t i = 0; i < vec_size; i++) {
converted_data_vec[i] = (input_type)in_vec[i];
}
return converted_data_vec;
}
network net(engine, topology, config);
set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f });
set_values(scale_mem, { 10.0f });
net.set_input_data("input", input);
net.set_input_data("scale_data", scale_mem);
void run_test() {
auto& engine = get_test_engine();
decltype(net.execute()) out;
auto dat_dt = ConditionDataType::data_type;
//WHEN TRUE
set_values(compare, { 1.0f });
net.set_input_data("compare", compare);
out = net.execute();
auto out_data_true = out.at("output").get_memory();
ASSERT_TRUE(is_output_equal(out_data_true, {20.0f, 40.0f}));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ dat_dt, format::bfyx,{ 1, 1, 4, 1 } });
auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
auto scale_mem = engine.allocate_memory({ dat_dt, format::bfyx,{ 1, 1, 1, 1 } });
//WHEN FALSE
set_values(compare, { 4.0f });
net.set_input_data("compare", compare);
out = net.execute();
auto out_data_false = out.at("output").get_memory();
ASSERT_TRUE(is_output_equal(out_data_false, { 15.0f, 35.0f }));
primitive_id input_id = "input";
primitive_id pred_id = "predicate";
primitive_id branch_input_id = "branch_input";
primitive_id cond_id = "condi";
primitive_id scale_data_id = "scale_data";
primitive_id output_id = "output";
condition::branch branch_true;
{
cldnn::topology branch_true_topology = generate_simple_branch(true, cond_id, branch_input_id, dat_dt);
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
branch_true.input_map.insert({input_id, branch_input_id});
branch_true.output_map.insert({0, "condi_when_true"});
}
condition::branch branch_false;
{
cldnn::topology branch_false_topology = generate_simple_branch(false, cond_id, branch_input_id, dat_dt);
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
branch_false.input_map.insert({input_id, branch_input_id});
branch_false.output_map.insert({0, "condi_when_false"});
}
cldnn::topology topology;
topology.add(
input_layout(input_id, input->get_layout())
);
topology.add(
input_layout(pred_id, predicate->get_layout())
);
topology.add(
input_layout(scale_data_id, scale_mem->get_layout())
);
topology.add(
condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
);
topology.add(
eltwise(output_id, { input_info(cond_id), input_info(scale_data_id) }, eltwise_mode::prod)
);
network net(engine, topology, config);
set_values(input, convert_data({ 1, 2, 3, 4 }));
set_values(scale_mem, convert_data({ 10 }));
net.set_input_data(input_id, input);
net.set_input_data(scale_data_id, scale_mem);
decltype(net.execute()) out;
//WHEN TRUE
set_values(predicate, { 1 });
net.set_input_data(pred_id, predicate);
out = net.execute();
auto out_data_true = out.at(output_id).get_memory();
ASSERT_TRUE(is_output_equal(out_data_true, convert_data({ 20, 40 })));
//WHEN FALSE
set_values(predicate, { 0 });
net.set_input_data(pred_id, predicate);
out = net.execute();
auto out_data_false = out.at(output_id).get_memory();
ASSERT_TRUE(is_output_equal(out_data_false, convert_data({ 15, 35 })));
}
};
using test_data_types = testing::Types<condition_data_types<FLOAT16>,
condition_data_types<float>>;
TYPED_TEST_SUITE(condition_gpu_basic_test, test_data_types);
TYPED_TEST(condition_gpu_basic_test, simple_basic_test) {
this->run_test();
}
TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
TEST(condition_gpu, basic_range_equal_comp) {
auto& engine = get_test_engine();
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
topology branch_true = generate_simple_branch(true, "condi");
topology branch_false = generate_simple_branch(false, "condi");
primitive_id condi_id = "condi";
primitive_id branch_input_id = "branch_input";
primitive_id concat_id = "concat";
topology topology;
cldnn::topology topology;
topology.add(
input_layout("input0", input0->get_layout())
);
@ -157,32 +170,48 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
input_layout("input1", input1->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
input_layout("predicate", predicate->get_layout())
);
topology.add(
concatenation("concat", { input_info("input0"), input_info("input1") }, 3)
);
condition::branch branch_true;
{
cldnn::topology branch_true_topology = generate_simple_branch(true, condi_id, branch_input_id);
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
branch_true.input_map.insert({concat_id, branch_input_id});
branch_true.output_map.insert({0, "condi_when_true"});
}
condition::branch branch_false;
{
cldnn::topology branch_false_topology = generate_simple_branch(false, condi_id, branch_input_id);
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
branch_false.input_map.insert({concat_id, branch_input_id});
branch_false.output_map.insert({0, "condi_when_false"});
}
topology.add(
condition("condi", input_info("concat"), branch_true, branch_false, "compare", cond_functions::EQUAL)
condition("condi", {input_info("predicate"), input_info("concat")}, branch_true, branch_false)
);
std::vector<float> input0_data = {
1, 2, 3, 4
1, 2
};
std::vector<float> input1_data = {
5, 6, 7, 8
3, 4
};
std::vector<float> compare_data_true = {
1, 2, 3
std::vector<uint8_t> predicate_data_true = {
1
};
std::vector<float> pooling_when_true_data = {
2, 4, 6, 8
2, 4
};
std::vector<float> compare_data_false = {
1, 2, 10
std::vector<uint8_t> predicate_data_false = {
0
};
std::vector<float> pooling_when_false_data = {
1.5, 3.5, 5.5, 7.5
1.5, 3.5
};
set_values(input0, input0_data);
@ -194,121 +223,23 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
decltype(net.execute()) outputs;
//CHECK TRUE
set_values(compare, compare_data_true);
net.set_input_data("compare", compare);
set_values(predicate, predicate_data_true);
net.set_input_data("predicate", predicate);
outputs = net.execute();
auto out_data_true = outputs.at("condi").get_memory();
ASSERT_TRUE(is_output_equal(out_data_true, pooling_when_true_data));
//CHECK FALSE
set_values(compare, compare_data_false);
net.set_input_data("compare", compare);
set_values(predicate, predicate_data_false);
net.set_input_data("predicate", predicate);
outputs = net.execute();
auto out_data_false = outputs.at("condi").get_memory();
ASSERT_TRUE(is_output_equal(out_data_false, pooling_when_false_data));
}
TEST(DISABLED_condition_gpu, generic_test_true_false) {
auto& engine = get_test_engine();
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 2, 5, 1 } });
std::vector<float> input_data(50);
std::iota(input_data.begin(), input_data.end(), 0.0f);
std::vector<cond_functions> functions = {
cond_functions::EQUAL,
cond_functions::GREATER,
cond_functions::LESS,
};
// ranges, with data when condition is true or false
std::vector<cldnn::tensor> ranges = {
{1, 1, 1, 1},
{1, 1, 3, 1},
{2, 1, 1, 1},
{2, 1, 1, 1}
};
std::vector<cldnn::tensor> offsets = {
{ 0, 0, 0, 0},
{ 0, 0, 1, 0},
{ 0, 0, 2, 0},
{ 2, 0, 0, 0},
{ 2, 1, 1, 0}
};
std::vector<float> pooling_when_true_data = {
2, 4, 7, 9, 12, 14, 17,
19, 22, 24, 27, 29, 32,
34, 37, 39, 42, 44, 47, 49
};
std::vector<float> pooling_when_false_data = {
1, 3, 6, 8, 11, 13, 16,
18, 21, 23, 26, 28, 31,
33, 36, 38, 41, 43, 46, 48
};
for (auto const& func : functions) {
for (auto const& range : ranges) {
for (auto const& offset : offsets) {
auto comp_values = get_values_to_compare(offset, range, input_data, input->get_layout(), func);
auto comp_values_true = comp_values.first;
auto comp_values_false = comp_values.second;
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx, range });
topology branch_true;
topology branch_false;
branch_true.add(
pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 1, 1, 3, 1 }, { 1, 1, 2, 1 })
);
branch_false.add(
pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::average, { 1, 1, 3, 1 }, { 1, 1, 2, 1 })
);
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
);
topology.add(
condition("condi", input_info("input"), branch_true, branch_false, "compare", func, offset)
);
set_values(input, input_data);
network net(engine, topology, config);
net.set_input_data("input", input);
decltype(net.execute()) outputs;
//CHECK TRUE
set_values(compare, comp_values_true);
net.set_input_data("compare", compare);
outputs = net.execute();
auto out_data_true = outputs.at("condi").get_memory();
ASSERT_TRUE(is_output_equal(out_data_true, pooling_when_true_data));
//CHECK FALSE
set_values(compare, comp_values_false);
net.set_input_data("compare", compare);
outputs = net.execute();
auto out_data_false = outputs.at("condi").get_memory();
ASSERT_TRUE(is_output_equal(out_data_false, pooling_when_false_data));
}
}
}
}
TEST(DISABLED_condition_gpu, basic_stacked_ifs) {
TEST(condition_gpu, basic_stacked_ifs) {
/*
<prims...>
<if>
@ -324,61 +255,95 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) {
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
auto predicate2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
topology condi_1_true = generate_simple_branch(true, "condi");
topology condi_1_false = generate_simple_branch(false, "condi");
primitive_id input_id = "input";
primitive_id pred_id = "predicate";
primitive_id predicate2_id = "predicate2";
primitive_id branch_input_id = "branch_input";
primitive_id cond_id = "condi";
primitive_id cond2_id = "condi2";
primitive_id scale_data_id = "scale_data";
primitive_id output_id = "output";
topology condi_1_true = generate_simple_branch(true, cond_id, branch_input_id);
topology condi_1_false = generate_simple_branch(false, cond_id, branch_input_id);
topology condi_2_true;
condi_2_true.add(
activation("activ_when_true", input_info("condi2"), activation_func::log2)
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
activation("activ_when_true", input_info(branch_input_id), activation_func::log2)
);
topology condi_2_false;
condi_2_false.add(
activation("activ_when_false", input_info("condi2"), activation_func::relu)
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
activation("activ_when_false", input_info(branch_input_id), activation_func::relu)
);
condition::branch branch_condi_1_true;
branch_condi_1_true.inner_program = program::build_program(engine, condi_1_true, config, true);
branch_condi_1_true.input_map.insert({input_id, branch_input_id});
branch_condi_1_true.output_map.insert({0, "condi_when_true"});
condition::branch branch_condi_1_false;
branch_condi_1_false.inner_program = program::build_program(engine, condi_1_false, config, true);
branch_condi_1_false.input_map.insert({input_id, branch_input_id});
branch_condi_1_false.output_map.insert({0, "condi_when_false"});
condition::branch branch_condi_2_true;
branch_condi_2_true.inner_program = program::build_program(engine, condi_2_true, config, true);
branch_condi_2_true.input_map.insert({cond_id, branch_input_id});
branch_condi_2_true.output_map.insert({0, "activ_when_true"});
condition::branch branch_condi_2_false;
branch_condi_2_false.inner_program = program::build_program(engine, condi_2_false, config, true);
branch_condi_2_false.input_map.insert({cond_id, branch_input_id});
branch_condi_2_false.output_map.insert({0, "activ_when_false"});
topology topology;
topology.add(
input_layout("input", input->get_layout())
input_layout(input_id, input->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
input_layout(pred_id, predicate->get_layout())
);
topology.add(
condition("condi", input_info("input"), condi_1_true, condi_1_false, "compare", cond_functions::EQUAL)
condition(cond_id, { input_info(pred_id), input_info(input_id) }, branch_condi_1_true, branch_condi_1_false)
);
topology.add(
input_layout("compare2", compare2->get_layout())
input_layout(predicate2_id, predicate2->get_layout())
);
topology.add(
condition("condi2", input_info("condi"), condi_2_true, condi_2_false, "compare2", cond_functions::GREATER)
condition(cond2_id, { input_info(predicate2_id), input_info(cond_id) }, branch_condi_2_true, branch_condi_2_false)
);
std::vector<float> input_data = {
1, 2, 3, 4
};
std::vector<float> compare_data = {
std::vector<uint8_t> predicate_data = {
1
};
std::vector<float> compare_2_data = {
0.0f, 0.0f
std::vector<uint8_t> predicate_2_data = {
0
};
set_values(input, input_data);
set_values(compare, compare_data);
set_values(compare2, compare_2_data);
set_values(predicate, predicate_data);
set_values(predicate2, predicate_2_data);
network net(engine, topology, config);
net.set_input_data("input", input);
net.set_input_data("compare", compare);
net.set_input_data("compare2", compare2);
net.set_input_data(input_id, input);
net.set_input_data(pred_id, predicate);
net.set_input_data(predicate2_id, predicate2);
auto outputs = net.execute();
auto out_data = outputs.at("condi2").get_memory();
ASSERT_TRUE(is_output_equal(out_data, {1.0f, 2.0f}));
std::vector<float> ref_data = {
2.0f, 4.0f
};
auto out_data = outputs.at(cond2_id).get_memory();
ASSERT_TRUE(is_output_equal(out_data, ref_data));
}
TEST(DISABLED_condition_gpu, basic_nested_ifs) {
TEST(condition_gpu, basic_nested_ifs) {
/*
<prims...>
<if 0>
@ -394,191 +359,243 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) {
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
auto predicate2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
auto scale_5_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
set_values(scale_5_mem, { 5.0f });
auto scale_10_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
set_values(scale_10_mem, { 10.0f });
topology nested_true;
condition::branch nested_true;
{
nested_true.add(eltwise("scale_5", { input_info("condi_nested"), input_info("scale_5_data") }, eltwise_mode::prod),
data("scale_5_data", scale_5_mem));
cldnn::topology nested_true_topology;
nested_true_topology.add(
input_layout("branch_input1", { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
data("scale_5_data", scale_5_mem),
eltwise("scale_5", { input_info("branch_input1"), input_info("scale_5_data") }, eltwise_mode::prod)
);
nested_true.inner_program = program::build_program(engine, nested_true_topology, config, true);
nested_true.input_map.insert({"pooling_when_true", "branch_input1"});
nested_true.output_map.insert({0, "scale_5"});
}
topology nested_false;
condition::branch nested_false;
{
nested_false.add(eltwise("scale_10", { input_info("condi_nested"), input_info("scale_10_data") }, eltwise_mode::prod),
data("scale_10_data", scale_10_mem));
cldnn::topology nested_false_topology;
nested_false_topology.add(
input_layout("branch_input2", { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
data("scale_10_data", scale_10_mem),
eltwise("scale_10", { input_info("branch_input2"), input_info("scale_10_data") }, eltwise_mode::prod)
);
nested_false.inner_program = program::build_program(engine, nested_false_topology, config, true);
nested_false.input_map.insert({"pooling_when_true", "branch_input2"});
nested_false.output_map.insert({0, "scale_10"});
}
topology branch_true;
branch_true.add(
pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
);
branch_true.add(
input_layout("compare2", compare2->get_layout())
);
condition::branch branch_true;
{
cldnn::topology branch_true_topology;
branch_true_topology.add(
input_layout("branch_input3", { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
pooling("pooling_when_true", input_info("branch_input3"), cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 }),
input_layout("predicate2", predicate2->get_layout()),
condition( "condi_nested", {input_info("predicate2"), input_info("pooling_when_true")}, nested_true, nested_false)
);
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
branch_true.input_map.insert({"input", "branch_input3"});
branch_true.output_map.insert({0, "condi_nested"});
}
branch_true.add(
condition(
"condi_nested",
input_info("pooling_when_true"),
nested_true,
nested_false,
"compare2",
cond_functions::EQUAL)
);
condition::branch branch_false;
{
cldnn::topology branch_false_topology;
branch_false_topology.add(
input_layout("branch_input4", { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
pooling("pooling_when_false", input_info("branch_input4"), cldnn::pooling_mode::average, { 1, 2 }, { 1, 2 })
);
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
branch_false.input_map.insert({"input", "branch_input4"});
branch_false.output_map.insert({0, "pooling_when_false"});
}
topology branch_false;
branch_false.add(
pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
);
topology topology;
cldnn::topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
input_layout("predicate", predicate->get_layout())
);
topology.add(
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
condition("condi", {input_info("predicate"), input_info("input")}, branch_true, branch_false)
);
std::vector<float> input_data = {
1.0f, 2.0f, 3.0f, 4.0f
};
std::vector<float> compare_data = {
std::vector<float> predicate_data = {
1.0f
};
std::vector<float> compare_2_data = {
std::vector<float> predicate_2_data = {
2.0f, 4.0f
};
set_values(input, input_data);
set_values(compare, compare_data);
set_values(compare2, compare_2_data);
set_values(predicate, predicate_data);
set_values(predicate2, predicate_2_data);
network net(engine, topology, config);
net.set_input_data("input", input);
net.set_input_data("compare", compare);
net.set_input_data("compare2", compare2);
net.set_input_data("predicate", predicate);
net.set_input_data("predicate2", predicate2);
auto outputs = net.execute();
auto out_data = outputs.at("condi").get_memory();
ASSERT_TRUE(is_output_equal(out_data, { 10.0f, 20.0f }));
ASSERT_TRUE(is_output_equal(out_data, std::vector<float>({ 10.0f, 20.0f })));
}
TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) {
TEST(condition_gpu, negative_predicate_wrong_layout) {
auto& engine = get_test_engine();
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } });
auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } });
topology branch_true = generate_simple_branch(true, "condi");
topology branch_false = generate_simple_branch(false, "condi");
primitive_id input_id = "input";
primitive_id pred_id = "predicate";
primitive_id branch_input_id = "branch_input";
primitive_id cond_id = "condi";
condition::branch branch_true;
{
cldnn::topology branch_true_topology = generate_simple_branch(true, cond_id, branch_input_id, data_types::f32);
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
branch_true.input_map.insert({input_id, branch_input_id});
branch_true.output_map.insert({0, "condi_when_true"});
}
condition::branch branch_false;
{
cldnn::topology branch_false_topology = generate_simple_branch(false, cond_id, branch_input_id, data_types::f32);
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
branch_false.input_map.insert({input_id, branch_input_id});
branch_false.output_map.insert({0, "condi_when_false"});
}
topology topology;
topology.add(
input_layout("input", input->get_layout())
input_layout(input_id, input->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
input_layout(pred_id, predicate->get_layout())
);
topology.add(
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
);
EXPECT_ANY_THROW(network net(engine, topology, config););
}
TEST(DISABLED_condition_gpu, negative_too_big_offset) {
TEST(condition_gpu, negative_not_same_layouts) {
auto& engine = get_test_engine();
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
primitive_id input_id = "input";
primitive_id pred_id = "predicate";
primitive_id branch_input_id = "branch_input";
primitive_id cond_id = "condi";
condition::branch branch_true;
{
primitive_id pool_id = "pooling_when_true";
topology branch_true_topology;
branch_true_topology.add(
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
pooling(pool_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 })
);
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
branch_true.input_map.insert({input_id, branch_input_id});
branch_true.output_map.insert({0, pool_id});
}
condition::branch branch_false;
{
primitive_id pool_id = "pooling_when_false";
topology branch_false_topology;
branch_false_topology.add(
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
pooling(pool_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 1, 4 }, { 1, 4 })
);
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
branch_false.input_map.insert({input_id, branch_input_id});
branch_false.output_map.insert({0, pool_id});
}
topology branch_true = generate_simple_branch(true, "condi");
topology branch_false = generate_simple_branch(false, "condi");
topology topology;
topology.add(
input_layout("input", input->get_layout())
input_layout(input_id, input->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
input_layout(pred_id, predicate->get_layout())
);
topology.add(
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL, {1, 1, 2, 1})
condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
);
EXPECT_ANY_THROW(network net(engine, topology, config););
}
TEST(DISABLED_condition_gpu, negative_not_same_layouts) {
TEST(condition_gpu, negative_same_names_within_different_networks) {
auto& engine = get_test_engine();
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
topology branch_true;
branch_true.add(
pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
);
primitive_id input_id = "input";
primitive_id pred_id = "predicate";
primitive_id branch_input_id = "branch_input";
primitive_id cond_id = "condi";
primitive_id duplicated_id = "pooling_check_name";
topology branch_false;
branch_false.add(
pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 4, 1 }, { 0, 0, 4, 1 })
);
condition::branch branch_true;
{
topology branch_true_topology;
branch_true_topology.add(
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
pooling(duplicated_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
);
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
branch_true.input_map.insert({input_id, branch_input_id});
branch_true.output_map.insert({0, duplicated_id});
}
condition::branch branch_false;
{
topology branch_false_topology;
branch_false_topology.add(
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
pooling("pooling_when_false", input_info(branch_input_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
);
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
branch_false.input_map.insert({input_id, branch_input_id});
branch_false.output_map.insert({0, "pooling_when_false"});
}
topology topology;
topology.add(
input_layout("input", input->get_layout())
input_layout(input_id, input->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
input_layout(pred_id, predicate->get_layout())
);
topology.add(
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
);
EXPECT_ANY_THROW(network net(engine, topology, config););
}
TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) {
auto& engine = get_test_engine();
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
topology branch_true;
branch_true.add(
pooling("pooling_check_name", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
);
topology branch_false;
branch_false.add(
pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
);
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
input_layout("compare", compare->get_layout())
);
topology.add(
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
);
topology.add(
pooling("pooling_check_name", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
);
topology.add(
pooling(duplicated_id, input_info(cond_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
);
EXPECT_ANY_THROW(network net(engine, topology, config););