[GPU] Impl cldnn::condition to support dynamic shape (#18051)
* [GPU] Impl cldnn::condition to support dynamic shape (#18051) * Impl CreateIfOp * Update calc_output_layouts and execute_impl * Enable gpu unit test * Create gpu functional test * [GPU] Follow-up code review (#18051) * remove redundant codes * create custom execute method for condition_inst * change name from update_loop_primitive_map to update_inner_program_io_map * [GPU] Fix gpu func test failures for fp16 * Add more test-cases to support fp16 and nested if case * [GPU] remove redundant codes * refactoring var names * fix windows build error * [GPU] Fix windows build issue * [GPU] update calc_output_layouts * [GPU] remove custom condition_inst::execute * Remove virtual keyword from primitive_inst::execute() * [GPU] Share single task executor between main program and inner program * [GPU] Fix input rank issue for const inner network in condition op * [GPU] apply calc_output_layouts for roi_align Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com> * [GPU] avoid checking allow_new_shape_infer for inner program --------- Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
This commit is contained in:
parent
c2afa2aefc
commit
50897e86e6
@ -22,6 +22,7 @@
|
||||
namespace cldnn {
|
||||
|
||||
struct program;
|
||||
struct network;
|
||||
|
||||
|
||||
struct kernel_impl_params {
|
||||
@ -54,6 +55,9 @@ struct kernel_impl_params {
|
||||
|
||||
std::map<size_t, memory::ptr> memory_deps = {};
|
||||
size_t primary_input_idx = 0;
|
||||
std::vector<std::shared_ptr<program>> inner_progs = {};
|
||||
std::vector<std::shared_ptr<network>> inner_nets = {};
|
||||
std::vector<std::map<size_t, primitive_id>> io_output_maps = {};
|
||||
|
||||
kernel_impl_params() : prog(nullptr), strm(nullptr), desc(nullptr), unique_id(0) {}
|
||||
|
||||
|
@ -79,7 +79,9 @@ public:
|
||||
network(engine& engine,
|
||||
const topology& topo,
|
||||
const ExecutionConfig& config = {},
|
||||
bool is_internal = false);
|
||||
bool is_internal = false,
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr);
|
||||
|
||||
network(engine& engine,
|
||||
const std::set<std::shared_ptr<program_node>>& nodes,
|
||||
const ExecutionConfig& config,
|
||||
@ -100,7 +102,9 @@ public:
|
||||
static ptr build_network(engine& engine,
|
||||
const topology& topology,
|
||||
const ExecutionConfig& config = {},
|
||||
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor = nullptr,
|
||||
bool is_internal = false);
|
||||
|
||||
static ptr build_network(engine& engine,
|
||||
const std::set<std::shared_ptr<program_node>>& nodes,
|
||||
const ExecutionConfig& config,
|
||||
|
@ -125,6 +125,7 @@ public:
|
||||
program(engine& engine_ref,
|
||||
topology const& topology,
|
||||
const ExecutionConfig& config,
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
|
||||
bool is_internal = false,
|
||||
bool no_optimizations = false,
|
||||
bool is_body_program = false);
|
||||
@ -236,6 +237,13 @@ public:
|
||||
bool is_internal = false,
|
||||
bool no_optimizations = false,
|
||||
bool is_body_program = false);
|
||||
static ptr build_program(engine& engine,
|
||||
const topology& topology,
|
||||
const ExecutionConfig& config,
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
|
||||
bool is_internal = false,
|
||||
bool no_optimizations = false,
|
||||
bool is_body_program = false);
|
||||
static ptr build_program(engine& engine,
|
||||
const std::set<std::shared_ptr<program_node>>& nodes,
|
||||
const ExecutionConfig& config,
|
||||
@ -253,6 +261,8 @@ public:
|
||||
ICompilationContext& get_compilation_context() const { return *_compilation_context; }
|
||||
void cancel_compilation_context();
|
||||
|
||||
static std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config);
|
||||
|
||||
private:
|
||||
uint32_t prog_id = 0;
|
||||
engine& _engine;
|
||||
@ -307,9 +317,6 @@ private:
|
||||
void post_optimize_graph(bool is_internal);
|
||||
void transfer_memory_to_device();
|
||||
|
||||
InferenceEngine::CPUStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags = "") const;
|
||||
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config) const;
|
||||
|
||||
/*
|
||||
** Analysis functions
|
||||
*/
|
||||
|
@ -235,6 +235,7 @@ REGISTER_FACTORY(v8, AdaptiveAvgPool);
|
||||
REGISTER_FACTORY(v8, AdaptiveMaxPool);
|
||||
REGISTER_FACTORY(v8, Softmax);
|
||||
REGISTER_FACTORY(v8, PriorBox);
|
||||
REGISTER_FACTORY(v8, If);
|
||||
|
||||
// ------------------------------ Supported v9 ops ------------------------------ //
|
||||
REGISTER_FACTORY(v9, GridSample)
|
||||
|
@ -83,7 +83,8 @@ class Program {
|
||||
public:
|
||||
Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
|
||||
bool createTopologyOnly = false, bool partialBuild = false,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr,
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr, bool innerProgram = false);
|
||||
Program(cldnn::engine& engine, const ExecutionConfig& config,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
|
||||
@ -158,6 +159,8 @@ public:
|
||||
bool use_new_shape_infer() const { return allow_new_shape_infer; }
|
||||
bool requires_new_shape_infer(const ngraph::Node& op) const;
|
||||
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() { return m_task_executor; }
|
||||
|
||||
private:
|
||||
static factories_map_t factories_map;
|
||||
std::vector<std::shared_ptr<cldnn::program>> m_programs;
|
||||
@ -173,6 +176,8 @@ private:
|
||||
|
||||
bool queryMode;
|
||||
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr m_task_executor;
|
||||
|
||||
void EnableQueryMode() { queryMode = true; }
|
||||
void DisableQueryMode() { queryMode = false; }
|
||||
|
||||
@ -183,7 +188,7 @@ private:
|
||||
std::shared_ptr<cldnn::program> BuildProgram(const std::vector<std::shared_ptr<ngraph::Node>>& ops,
|
||||
InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs,
|
||||
bool createTopologyOnly = false, bool partialBuild = false);
|
||||
bool createTopologyOnly = false, bool partialBuild = false, bool innerProgram = false);
|
||||
|
||||
void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
|
||||
void ChangeInputBatch(int batch);
|
||||
|
@ -5,64 +5,73 @@
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include "intel_gpu/graph/topology.hpp"
|
||||
#include "intel_gpu/graph/program.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
/// @brief Function, which will be used during comparison.
|
||||
enum cond_functions : int32_t { EQUAL, GREATER, LESS };
|
||||
|
||||
/// @brief Adds primitive, which works like "if".
|
||||
///
|
||||
/// @details
|
||||
/// @n Applies comparision between 2 inputs.
|
||||
/// @n Compare data - sizes of that input specifes the range of the comparison.
|
||||
/// @n Offset - offset in memory, when comparing values.
|
||||
/// @n Applies comparision using pred primitive which has 1D tensor or scalar value
|
||||
struct condition : public primitive_base<condition> {
|
||||
CLDNN_DECLARE_PRIMITIVE(condition)
|
||||
|
||||
/// @brief branch has compiled program, input_map and output_map
|
||||
///
|
||||
struct branch {
|
||||
std::map<primitive_id, primitive_id> input_map;
|
||||
std::map<size_t, primitive_id> output_map;
|
||||
program::ptr inner_program;
|
||||
|
||||
std::string str() {
|
||||
std::stringstream ss;
|
||||
ss << "branch: { " << std::endl;
|
||||
ss<< "* input_map : [(outer_id,inner_id),";
|
||||
for (auto& in_iter : input_map) {
|
||||
ss << "(" << in_iter.first << "," << in_iter.second << "),";
|
||||
}
|
||||
ss << "]," << std::endl;
|
||||
|
||||
ss << "* output_map : [(outer_idx,inner_id),";
|
||||
for (auto& out_iter : output_map) {
|
||||
ss << "(" << out_iter.first << ","<< out_iter.second << "),";
|
||||
}
|
||||
ss << "]" << std::endl;
|
||||
ss << "}" << std::endl;
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief Constructs condition primitive / layer.
|
||||
///
|
||||
/// @param id An identifier of new primitive.
|
||||
/// @param input An identifier of primitive which is an input for newly created
|
||||
/// condition primitive.
|
||||
/// @param topology_true Topology containg primitives, which will be executed when comparsion results
|
||||
/// true.
|
||||
/// @param topology_false Topology containg primitives, which will be executed when comparsion results
|
||||
/// false..
|
||||
/// @param compare_Data An identifier of primitive which contains compare values
|
||||
/// @param func Used function during comparison.
|
||||
/// @param offset Offset for compare data.
|
||||
/// @param inputs A list of Input primitive ids (pred, inputs(optional)).
|
||||
/// pred is condition's predicate primitive which has scalar value determining whether to execute branch_true or branch_false.
|
||||
/// sometimes, if
|
||||
/// @param branch_true Branch containg primitives, which will be executed when pred is true. then body in ngraph
|
||||
/// @param branch_false Branch containg primitives, which will be executed when pred is false. else body in ngraph
|
||||
/// @param output_padding Optional padding for output from primitive.
|
||||
condition(const primitive_id& id,
|
||||
const input_info& input,
|
||||
const topology& topology_true,
|
||||
const topology& topology_false,
|
||||
const primitive_id& compare_data,
|
||||
const cond_functions& func,
|
||||
const tensor& offset = {0, 0, 0, 0, 0},
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, {output_padding}),
|
||||
topology_true(topology_true),
|
||||
topology_false(topology_false),
|
||||
compare_data(compare_data),
|
||||
function(func),
|
||||
offset(offset) {}
|
||||
const std::vector<input_info>& inputs,
|
||||
const branch& branch_true,
|
||||
const branch& branch_false,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, inputs, {output_padding}),
|
||||
branch_true(branch_true),
|
||||
branch_false(branch_false) {}
|
||||
|
||||
/// @brief An identifier of topology, which will be executed when comparison returns true.
|
||||
topology topology_true;
|
||||
/// @brief An identifier of topology, which will be executed when comparison returns false.
|
||||
topology topology_false;
|
||||
/// @brief An identifier of primitive which contains compare values.
|
||||
primitive_id compare_data;
|
||||
/// @brief Used function during comparison.
|
||||
cond_functions function;
|
||||
/// @brief Offset for compare data.
|
||||
tensor offset;
|
||||
branch branch_true;
|
||||
branch branch_false;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {compare_data}; }
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<< (std::ostream& os, condition::branch& info) {
|
||||
os << info.str();
|
||||
return os;
|
||||
}
|
||||
} // namespace cldnn
|
||||
/// @}
|
||||
/// @}
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "condition_inst.h"
|
||||
|
||||
#include "program_node.h"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
#include "json_object.h"
|
||||
#include "primitive_type_base.h"
|
||||
@ -12,6 +12,39 @@
|
||||
namespace cldnn {
|
||||
GPU_DEFINE_PRIMITIVE_TYPE_ID(condition)
|
||||
|
||||
const size_t idx_branch_true = 0;
|
||||
const size_t idx_branch_false = 1;
|
||||
|
||||
static std::map<primitive_id, layout> get_out_layout_map(cldnn::program::ptr prog) {
|
||||
std::map<primitive_id, layout> out_layout_map;
|
||||
for (auto& o : prog->get_outputs()) {
|
||||
out_layout_map.insert({o->id(), o->get_output_layout()});
|
||||
}
|
||||
return out_layout_map;
|
||||
}
|
||||
|
||||
static std::map<primitive_id, layout> get_out_layout_map(cldnn::network::ptr net) {
|
||||
std::map<primitive_id, layout> out_layout_map;
|
||||
for (auto& o : net->get_outputs()) {
|
||||
out_layout_map.insert({o->id(), o->get_output_layout()});
|
||||
}
|
||||
return out_layout_map;
|
||||
}
|
||||
|
||||
static std::vector<layout> get_output_layouts(std::map<primitive_id, layout>&& outputs, const std::map<size_t, cldnn::primitive_id> &io_output_map) {
|
||||
std::vector<layout> out_layouts;
|
||||
for (auto out : outputs) {
|
||||
for (auto& io_output : io_output_map) {
|
||||
auto inner_prim_id = io_output.second;
|
||||
if (out.first == inner_prim_id) {
|
||||
out_layouts.push_back(out.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
OPENVINO_ASSERT(out_layouts.size() > 0, "Not found any matched output");
|
||||
return out_layouts;
|
||||
}
|
||||
|
||||
/*
|
||||
Calc_output_layout method is called only when output layout is invalidated.
|
||||
It means, that it is called when:
|
||||
@ -20,38 +53,155 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(condition)
|
||||
In this both cases, we need to recalc branch_true and branch_false.
|
||||
!* We can be sure, that this method was called AT LEAST once during graph compilation.*!
|
||||
*/
|
||||
layout condition_inst::calc_output_layout(condition_node const& node, kernel_impl_params const& impl_param) {
|
||||
assert(static_cast<bool>(impl_param.desc->output_data_types[0]) == false &&
|
||||
"Output data type forcing is not supported for condition_node!");
|
||||
node.set_branches();
|
||||
layout condition_inst::calc_output_layout(condition_node const& /* node */, kernel_impl_params const& impl_param) {
|
||||
OPENVINO_ASSERT(static_cast<bool>(impl_param.desc->output_data_types[0]) == false, "Output data type forcing is not supported for condition_node!");
|
||||
OPENVINO_ASSERT(impl_param.get_input_layout(0).count() == 1, "layout of compare_data of condition should be {1,1,1,1}");
|
||||
|
||||
auto branch_true_output = node.get_branch_true()->get_outputs();
|
||||
auto branch_false_output = node.get_branch_false()->get_outputs();
|
||||
CLDNN_ERROR_NOT_EQUAL(impl_param.desc->id,
|
||||
"Count of branch true outputs",
|
||||
branch_true_output.size(),
|
||||
"expected outputs size",
|
||||
1,
|
||||
"Branch true should have one output.");
|
||||
CLDNN_ERROR_NOT_EQUAL(impl_param.desc->id,
|
||||
"Count of branch false outputs",
|
||||
branch_false_output.size(),
|
||||
"expected outputs size",
|
||||
1,
|
||||
"Branch false should have one output.");
|
||||
OPENVINO_ASSERT(impl_param.inner_progs.size() == 2, "If(Condition) contains incorrect number of inner programs ", impl_param.inner_progs.size());
|
||||
OPENVINO_ASSERT(impl_param.io_output_maps.size() == 2, "If(Condition) contains incorrect number of io output maps ", impl_param.io_output_maps.size());
|
||||
|
||||
auto layouts_true = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_true]), impl_param.io_output_maps[idx_branch_true]);
|
||||
auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);
|
||||
|
||||
auto layout_true = branch_true_output.at(0)->get_output_layout();
|
||||
auto layout_false = branch_false_output.at(0)->get_output_layout();
|
||||
CLDNN_ERROR_LAYOUT_MISMATCH(impl_param.desc->id,
|
||||
"Branch true output layout",
|
||||
layout_true,
|
||||
layouts_true[0],
|
||||
"branch false output layout",
|
||||
layout_false,
|
||||
layouts_false[0],
|
||||
"Layout of the branches should be the same.");
|
||||
|
||||
return layout_true;
|
||||
return layouts_true[0];
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static bool convert_data(memory::ptr mem, stream& stream) {
|
||||
mem_lock<T, mem_lock_type::read> lock_data{mem, stream};
|
||||
return (static_cast<float>(*lock_data.data()) != 0.f);
|
||||
}
|
||||
|
||||
bool condition_inst::get_pred_from_memory(memory::ptr mem, stream& stream) {
|
||||
auto mem_dt = mem->get_layout().data_type;
|
||||
switch (mem_dt) {
|
||||
case cldnn::data_types::f32:
|
||||
return convert_data<float>(mem, stream);
|
||||
case cldnn::data_types::f16:
|
||||
return convert_data<half_t>(mem, stream);
|
||||
case cldnn::data_types::i64:
|
||||
return convert_data<int64_t>(mem, stream);
|
||||
case cldnn::data_types::i32:
|
||||
return convert_data<int32_t>(mem, stream);
|
||||
case cldnn::data_types::i8:
|
||||
return convert_data<int8_t>(mem, stream);
|
||||
case cldnn::data_types::u8:
|
||||
return convert_data<uint8_t>(mem, stream);
|
||||
case cldnn::data_types::bin:
|
||||
default:
|
||||
return convert_data<uint32_t>(mem, stream);
|
||||
}
|
||||
}
|
||||
|
||||
static ov::PartialShape resolve_shape(const ov::PartialShape& true_pshape, const ov::PartialShape& false_pshape) {
|
||||
// true_pshape - shape of output from then_body
|
||||
// false_pshape - shape of output from else_body
|
||||
auto then_rank = true_pshape.rank();
|
||||
auto else_rank = false_pshape.rank();
|
||||
|
||||
// if rangs of shapes are not equal or rang of one of them is dynamic function
|
||||
// return shape with dynamic rank
|
||||
if (then_rank.is_dynamic() || else_rank.is_dynamic()) {
|
||||
return ov::PartialShape::dynamic();
|
||||
}
|
||||
if (then_rank.get_length() != else_rank.get_length()) {
|
||||
// Union of scalar and 1D case
|
||||
if (then_rank.get_length() <= 1 && else_rank.get_length() <= 1) {
|
||||
return ov::PartialShape::dynamic(1);
|
||||
} else {
|
||||
return ov::PartialShape::dynamic();
|
||||
}
|
||||
}
|
||||
std::vector<ov::Dimension> new_dims;
|
||||
|
||||
// If rangs are equal each dimesion of then_body output is union with each dimension of
|
||||
// else_body
|
||||
for (auto then_it = true_pshape.cbegin(), else_it = false_pshape.cbegin(); then_it != true_pshape.cend();
|
||||
then_it++, else_it++) {
|
||||
if ((*then_it).is_dynamic() || (*else_it).is_dynamic()) {
|
||||
new_dims.push_back(ov::Dimension::dynamic());
|
||||
} else if (*then_it == *else_it) {
|
||||
new_dims.emplace_back(*then_it);
|
||||
} else {
|
||||
auto dim_min = std::min((*then_it).get_min_length(), (*else_it).get_min_length());
|
||||
auto dim_max = std::max((*then_it).get_min_length(), (*else_it).get_min_length());
|
||||
new_dims.emplace_back(dim_min, dim_max);
|
||||
}
|
||||
}
|
||||
|
||||
return ov::PartialShape(new_dims);
|
||||
}
|
||||
|
||||
static std::vector<layout> resolve_shape(std::vector<layout>& target_list, std::vector<layout>& other_list) {
|
||||
std::vector<layout> resolved_layout;
|
||||
for (size_t i = 0; i < target_list.size(); i++) {
|
||||
auto target = target_list[i];
|
||||
auto other = other_list[i];
|
||||
auto target_pshape = target.get_partial_shape();
|
||||
auto other_pshape = other.get_partial_shape();
|
||||
auto target_rank = target_pshape.rank();
|
||||
auto other_rank = other_pshape.rank();
|
||||
if (target_rank.get_length() == 0 && other_rank.get_length() == 1) {
|
||||
resolved_layout.push_back({ov::PartialShape{1}, target.data_type, target.format});
|
||||
} else {
|
||||
resolved_layout.push_back(target);
|
||||
}
|
||||
}
|
||||
return resolved_layout;
|
||||
}
|
||||
|
||||
template<typename ShapeType>
|
||||
std::vector<layout> condition_inst::calc_output_layouts(condition_node const& /* node */, kernel_impl_params const& impl_param) {
|
||||
if (impl_param.inner_nets.empty()) {
|
||||
OPENVINO_ASSERT(impl_param.inner_progs.empty() == false, "The count of inner programs should not be zero");
|
||||
auto layouts_true = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_true]), impl_param.io_output_maps[idx_branch_true]);
|
||||
auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);
|
||||
|
||||
const size_t num_outputs = impl_param.output_layouts.size();
|
||||
OPENVINO_ASSERT((num_outputs == layouts_true.size() && num_outputs == layouts_false.size()),
|
||||
"The number of outputs for each branch should be same!");
|
||||
std::vector<layout> output_layouts;
|
||||
|
||||
for (size_t i = 0; i < num_outputs; i++) {
|
||||
if (layouts_true[i] == layouts_false[i]) {
|
||||
output_layouts.push_back(layouts_true[i]);
|
||||
} else {
|
||||
OPENVINO_ASSERT(layouts_true[i].data_type == layouts_false[i].data_type, "data type of each branches should be same");
|
||||
OPENVINO_ASSERT(layouts_true[i].format == layouts_false[i].format, "output format of each branches should be same");
|
||||
auto out_layout = resolve_shape(layouts_true[i].get_partial_shape(), layouts_false[i].get_partial_shape());
|
||||
output_layouts.push_back(layout{out_layout, layouts_true[i].data_type, layouts_true[i].format });
|
||||
}
|
||||
}
|
||||
|
||||
return output_layouts;
|
||||
} else {
|
||||
auto layouts_true = get_output_layouts(get_out_layout_map(impl_param.inner_nets[idx_branch_true]), impl_param.io_output_maps[idx_branch_true]);
|
||||
auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_nets[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]);
|
||||
const size_t num_outputs = impl_param.output_layouts.size();
|
||||
OPENVINO_ASSERT((num_outputs == layouts_true.size() && num_outputs == layouts_false.size()),
|
||||
"The number of outputs for each branch should be same!");
|
||||
|
||||
auto& memory_deps = impl_param.memory_deps;
|
||||
OPENVINO_ASSERT(memory_deps.count(0) > 0, "The count of memory deps should not be zero");
|
||||
auto mem_ptr = memory_deps.at(0);
|
||||
auto pred = condition_inst::get_pred_from_memory(mem_ptr, impl_param.get_stream());
|
||||
if (pred) {
|
||||
return resolve_shape(layouts_true, layouts_false);
|
||||
} else {
|
||||
return resolve_shape(layouts_false, layouts_true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template std::vector<layout> condition_inst::calc_output_layouts<ov::PartialShape>(condition_node const& node, const kernel_impl_params& impl_param);
|
||||
|
||||
std::string condition_inst::to_string(condition_node const& node) {
|
||||
auto desc = node.get_primitive();
|
||||
auto node_info = node.desc_to_json();
|
||||
@ -69,23 +219,35 @@ Condition primitive is resuing memory with the input.
|
||||
*/
|
||||
condition_inst::typed_primitive_inst(network& network, condition_node const& node)
|
||||
: parent(network, node),
|
||||
_net_true(network::allocate_network(node.get_program().get_engine(), node.get_branch_true(), true)),
|
||||
_net_false(network::allocate_network(node.get_program().get_engine(), node.get_branch_false(), true)) {
|
||||
auto compare_tensor = node.compare().get_output_layout().get_tensor();
|
||||
auto input_tensor = node.input().get_output_layout().get_tensor();
|
||||
CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(),
|
||||
"Compare tensor",
|
||||
compare_tensor,
|
||||
"input tensor",
|
||||
input_tensor,
|
||||
"Compare primitive is too big.");
|
||||
_net_true(network::allocate_network(node.get_program().get_engine(), node.get_branch_true().inner_program)),
|
||||
_net_false(network::allocate_network(node.get_program().get_engine(), node.get_branch_false().inner_program)) {
|
||||
this->set_inner_networks({_net_true, _net_false});
|
||||
}
|
||||
|
||||
auto compare_with_offster_tensor = compare_tensor + node.offset();
|
||||
CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(),
|
||||
"Offset with compare tensor",
|
||||
compare_with_offster_tensor,
|
||||
"input tensor",
|
||||
input_tensor,
|
||||
"Offset is too big.");
|
||||
void condition_inst::update_output_layout() {
|
||||
auto memory_deps = _node->get_const_memory_deps();
|
||||
for (auto& i : _node->get_shape_infer_dependencies()) {
|
||||
if (memory_deps.count(i) > 0 || i >= _node->get_dependencies().size()) {
|
||||
continue;
|
||||
}
|
||||
auto dep_id = _node->get_dependency(i).id();
|
||||
|
||||
auto dep_mem = _network.get_output_memory(dep_id);
|
||||
memory_deps.insert({i, dep_mem});
|
||||
}
|
||||
_impl_params->memory_deps = memory_deps;
|
||||
|
||||
auto new_layouts = _node->type()->calc_output_layouts(*_node, *_impl_params);
|
||||
if (new_layouts.empty()) {
|
||||
auto new_layout = _node->type()->calc_output_layout(*_node, *_impl_params);
|
||||
new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[0], new_layout.data_padding);
|
||||
_impl_params->output_layouts[0] = new_layout;
|
||||
} else {
|
||||
for (size_t i = 0; i != new_layouts.size(); ++i) {
|
||||
auto new_layout = new_layouts[i];
|
||||
new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[i], new_layout.data_padding);
|
||||
_impl_params->output_layouts[i] = new_layout;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "quantize_inst.h"
|
||||
#include "arg_max_min_inst.h"
|
||||
#include "fully_connected_inst.h"
|
||||
#include "condition_inst.h"
|
||||
#include "program_node.h"
|
||||
|
||||
#include <iostream>
|
||||
@ -72,6 +73,9 @@ void compile_graph::run(program& p) {
|
||||
if (node->is_dynamic() && !is_planar)
|
||||
can_select_impl = false;
|
||||
|
||||
if (node->is_type<condition>())
|
||||
can_select_impl = true;
|
||||
|
||||
if (can_select_impl) {
|
||||
tasks.push_back([node, &exception, change_initial_impl, original_impl_type] {
|
||||
try {
|
||||
|
@ -0,0 +1,41 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "pass_manager.h"
|
||||
#include "program_helpers.h"
|
||||
#include "loop_inst.h"
|
||||
#include "condition_inst.h"
|
||||
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
void update_inner_program_io_map::run(program& p) {
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
if (node->is_type<loop>()) {
|
||||
loop_node& node2 = node->as<loop>();
|
||||
for (const auto& info : p.get_optimized()) {
|
||||
if (info.second.size() != 1) {
|
||||
continue;
|
||||
}
|
||||
const primitive_id& old_primitive_id = info.first;
|
||||
const primitive_id& new_primitive_id = info.second.front();
|
||||
node2.update_primitive_map(old_primitive_id, new_primitive_id);
|
||||
node2.update_primitive_map(old_primitive_id, new_primitive_id, false); // update internal id
|
||||
}
|
||||
} else if (node->is_type<condition>()) {
|
||||
condition_node& cond = node->as<condition>();
|
||||
for (const auto& info : p.get_optimized()) {
|
||||
if (info.second.size() != 1) {
|
||||
continue;
|
||||
}
|
||||
const primitive_id& old_primitive_id = info.first;
|
||||
const primitive_id& new_primitive_id = info.second.front();
|
||||
cond.update_primitive_map(old_primitive_id, new_primitive_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "pass_manager.h"
|
||||
#include "program_helpers.h"
|
||||
#include "loop_inst.h"
|
||||
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
void update_loop_primitive_map::run(program& p) {
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
if (!node->is_type<loop>()) {
|
||||
continue;
|
||||
}
|
||||
loop_node& node2 = node->as<loop>();
|
||||
for (const auto& info : p.get_optimized()) {
|
||||
if (info.second.size() != 1) {
|
||||
continue;
|
||||
}
|
||||
const primitive_id& old_primitive_id = info.first;
|
||||
const primitive_id& new_primitive_id = info.second.front();
|
||||
node2.update_primitive_map(old_primitive_id, new_primitive_id);
|
||||
node2.update_primitive_map(old_primitive_id, new_primitive_id, false); // update internal id
|
||||
}
|
||||
}
|
||||
}
|
@ -36,16 +36,34 @@ struct condition_impl : typed_primitive_impl<condition> {
|
||||
auto ev = instance.get_network().get_stream().create_user_event(false);
|
||||
set_node_params(instance.get_node());
|
||||
|
||||
bool exec_branch = choose_branch_to_exec(instance);
|
||||
memory::ptr memory_to_copy;
|
||||
if (exec_branch)
|
||||
memory_to_copy = execute_branch(instance.get_net_true(), instance.result_id(), instance.input_memory_ptr());
|
||||
else
|
||||
memory_to_copy = execute_branch(instance.get_net_false(), instance.result_id(), instance.input_memory_ptr());
|
||||
// just copy memory
|
||||
mem_lock<float, mem_lock_type::read> inp_ptr{memory_to_copy, instance.get_network().get_stream()};
|
||||
mem_lock<float, mem_lock_type::write> out_ptr{instance.output_memory_ptr(), instance.get_network().get_stream()};
|
||||
std::copy(inp_ptr.begin(), inp_ptr.end(), out_ptr.begin());
|
||||
auto pred = condition_inst::get_pred_from_memory(instance.pred_memory_ptr(), instance.get_network().get_stream());
|
||||
network::ptr executed_net = pred? instance.get_net_true() : instance.get_net_false();
|
||||
auto branch = pred? instance.get_branch_true() : instance.get_branch_false();
|
||||
|
||||
// Set input memory of inner network before its execution
|
||||
for (size_t mem_idx = 0; mem_idx < instance.inputs_memory_count(); mem_idx++) {
|
||||
const primitive_id& input_external_id = instance.dependencies().at(mem_idx).first->id();
|
||||
auto iter = branch.input_map.find(input_external_id);
|
||||
if (iter != branch.input_map.end()) {
|
||||
const primitive_id& input_internal_id = iter->second;
|
||||
auto mem_ptr = instance.input_memory_ptr(mem_idx);
|
||||
executed_net->set_input_data(input_internal_id, mem_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
executed_net->execute({});
|
||||
|
||||
// Update output layout of impl_param in condition_inst
|
||||
instance.update_output_layout();
|
||||
|
||||
// Set output memory of condition_inst to inner network output memory after inner network execution
|
||||
for (auto out_mem_map : branch.output_map) {
|
||||
auto out_mem_idx = out_mem_map.first;
|
||||
auto inner_out_id = out_mem_map.second;
|
||||
auto mem_ptr = executed_net->get_output(inner_out_id).get_memory();
|
||||
instance.set_output_memory(mem_ptr, false, out_mem_idx);
|
||||
}
|
||||
|
||||
ev->set();
|
||||
return ev;
|
||||
}
|
||||
@ -58,85 +76,22 @@ struct condition_impl : typed_primitive_impl<condition> {
|
||||
|
||||
private:
|
||||
primitive_id _node_id;
|
||||
|
||||
/*
|
||||
Add functions here.
|
||||
*/
|
||||
bool check_condition(const float value_1, const float value_2, const cond_functions& func) const {
|
||||
switch (func) {
|
||||
case cond_functions::EQUAL:
|
||||
return value_1 == value_2;
|
||||
break;
|
||||
case cond_functions::GREATER:
|
||||
return value_1 > value_2;
|
||||
break;
|
||||
case cond_functions::LESS:
|
||||
return value_1 < value_2;
|
||||
break;
|
||||
default:
|
||||
throw("Unknown comparision function for: " + _node_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Loop over memory and check condition.
|
||||
Returns boolean flag, which says what branch should be executed.
|
||||
*/
|
||||
bool choose_branch_to_exec(condition_inst& instance) const {
|
||||
mem_lock<float, mem_lock_type::read> lock_compare_data{instance.compare_memory_ptr(), instance.get_network().get_stream()};
|
||||
auto compare_layout = instance.compare_memory().get_layout();
|
||||
auto compare_ptr = lock_compare_data.begin();
|
||||
|
||||
mem_lock<float, mem_lock_type::read> lock_input{instance.input_memory_ptr(), instance.get_network().get_stream()};
|
||||
auto input_layout = instance.input_memory().get_layout();
|
||||
auto input_ptr = lock_input.begin();
|
||||
|
||||
auto function = instance.argument->function;
|
||||
auto& offset = instance.argument->offset;
|
||||
|
||||
for (auto b = 0; b < compare_layout.batch(); b++) {
|
||||
for (auto f = 0; f < compare_layout.feature(); f++) {
|
||||
for (auto z = 0; z < compare_layout.spatial(2); z++) {
|
||||
for (auto y = 0; y < compare_layout.spatial(1); y++) {
|
||||
for (auto x = 0; x < compare_layout.spatial(0); x++) {
|
||||
tensor input_tensor{
|
||||
batch(b + offset.batch[0]),
|
||||
feature(f + offset.feature[0]),
|
||||
spatial(x + offset.spatial[0], y + offset.spatial[1], z + offset.spatial[2], 0) };
|
||||
auto input_idx = input_layout.get_linear_offset(input_tensor);
|
||||
tensor compare_tensor{ batch(b), feature(f), spatial(x, y, z, 0) };
|
||||
auto compare_idx = compare_layout.get_linear_offset(compare_tensor);
|
||||
if (!check_condition(input_ptr[input_idx], compare_ptr[compare_idx], function))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
memory::ptr execute_branch(network::ptr branch,
|
||||
const primitive_id& input_id,
|
||||
memory::ptr input_memory) const {
|
||||
branch->set_input_data(input_id, input_memory);
|
||||
branch->execute({});
|
||||
return branch->get_outputs().at(0)->output_memory_ptr();
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_condition_common::attach_condition_common() {
|
||||
implementation_map<condition>::add(impl_types::common, condition_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
});
|
||||
implementation_map<condition>::add(impl_types::common,
|
||||
shape_types::dynamic_shape,
|
||||
condition_impl::create,
|
||||
{},
|
||||
{});
|
||||
implementation_map<condition>::add(impl_types::common, condition_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace common
|
||||
} // namespace cldnn
|
||||
|
||||
// TODO: Change code like cldnn::loop
|
||||
ASSIGN_TYPE_NAME(cldnn::common::condition_impl)
|
||||
|
@ -18,63 +18,43 @@ struct typed_program_node<condition> : public typed_program_node_base<condition>
|
||||
private:
|
||||
using parent = typed_program_node_base<condition>;
|
||||
|
||||
class branch {
|
||||
public:
|
||||
explicit branch(const topology& tpl) : _topology(tpl) {}
|
||||
|
||||
void set(const program_node& node) {
|
||||
add_or_change_input_layout(node);
|
||||
_program = program::build_program(node.get_program().get_engine(),
|
||||
_topology,
|
||||
node.get_program().get_config(),
|
||||
true); // rebuild program
|
||||
}
|
||||
program::ptr get() const { return _program; }
|
||||
|
||||
private:
|
||||
topology _topology;
|
||||
program::ptr _program = nullptr;
|
||||
|
||||
void add_or_change_input_layout(const program_node& node) {
|
||||
auto layout = node.get_input_layout(0);
|
||||
auto input_id = node.as<condition>().result_id();
|
||||
if (_topology.get_primitives().count(input_id) == 0) {
|
||||
_topology.add_primitive(std::make_shared<input_layout>(input_id, layout));
|
||||
for (auto& prim : _topology.get_primitives()) {
|
||||
for (auto& inp : prim.second->input) {
|
||||
if (inp.pid == node.id())
|
||||
inp.pid = input_id;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_topology.change_input_layout(input_id, layout);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
typed_program_node(std::shared_ptr<primitive> prim, program& prog)
|
||||
typed_program_node(std::shared_ptr<condition> prim, program& prog)
|
||||
: parent(prim, prog),
|
||||
_branch_true(this->get_primitive()->topology_true),
|
||||
_branch_false(this->get_primitive()->topology_false) {}
|
||||
_branch_true(prim->branch_true),
|
||||
_branch_false(prim->branch_false) {}
|
||||
|
||||
program_node& input() const { return get_dependency(0); }
|
||||
program_node& compare() const { return get_dependency(1); }
|
||||
cond_functions func() const { return get_primitive()->function; }
|
||||
tensor offset() const { return get_primitive()->offset; }
|
||||
void set_branches() const {
|
||||
_branch_true.set(*this);
|
||||
_branch_false.set(*this);
|
||||
condition::branch get_branch_true() const { return _branch_true; }
|
||||
condition::branch get_branch_false() const { return _branch_false; }
|
||||
|
||||
using parent::get_kernel_impl_params;
|
||||
std::unique_ptr<kernel_impl_params> get_kernel_impl_params(const std::vector<layout>& in_layouts, const std::vector<layout>& out_layouts) const override {
|
||||
auto params = parent::get_kernel_impl_params(in_layouts, out_layouts);
|
||||
params->inner_progs = { _branch_true.inner_program, _branch_false.inner_program };
|
||||
params->io_output_maps = { _branch_true.output_map, _branch_false.output_map };
|
||||
return params;
|
||||
}
|
||||
|
||||
void update_primitive_map(const primitive_id& prevID, const primitive_id& newID) {
|
||||
auto replace_external_id = [&](std::map<primitive_id, primitive_id>& input_map, const primitive_id& prevID, const primitive_id& newID) {
|
||||
auto iter = input_map.find(prevID);
|
||||
if (iter != input_map.end()) {
|
||||
primitive_id new_external_id = newID;
|
||||
primitive_id internal_id = iter->second;
|
||||
input_map.erase(iter);
|
||||
input_map.insert({new_external_id, internal_id});
|
||||
}
|
||||
};
|
||||
|
||||
replace_external_id(_branch_true.input_map, prevID, newID);
|
||||
replace_external_id(_branch_false.input_map, prevID, newID);
|
||||
}
|
||||
program::ptr get_branch_true() const { return _branch_true.get(); }
|
||||
program::ptr get_branch_false() const { return _branch_false.get(); }
|
||||
primitive_id result_id() const { return id() + ":result"; }
|
||||
|
||||
private:
|
||||
mutable branch _branch_true;
|
||||
mutable branch _branch_false;
|
||||
condition::branch& _branch_true;
|
||||
condition::branch& _branch_false;
|
||||
};
|
||||
|
||||
using condition_node = typed_program_node<condition>;
|
||||
@ -85,17 +65,20 @@ class typed_primitive_inst<condition> : public typed_primitive_inst_base<conditi
|
||||
using parent::parent;
|
||||
|
||||
public:
|
||||
static layout calc_output_layout(condition_node const& node, kernel_impl_params const& impl_param);
|
||||
template<typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(condition_node const& /*node*/, kernel_impl_params const& impl_param);
|
||||
static layout calc_output_layout(condition_node const& /* node */, kernel_impl_params const& impl_param);
|
||||
static std::string to_string(condition_node const& node);
|
||||
static bool get_pred_from_memory(memory::ptr mem, stream& stream);
|
||||
typed_primitive_inst(network& network, condition_node const& node);
|
||||
|
||||
memory::ptr input_memory_ptr() const { return dep_memory_ptr(0); }
|
||||
memory::ptr compare_memory_ptr() const { return dep_memory_ptr(1); }
|
||||
memory& input_memory() const { return dep_memory(0); }
|
||||
memory& compare_memory() const { return dep_memory(1); }
|
||||
memory::ptr pred_memory_ptr() const { return dep_memory_ptr(0); }
|
||||
network::ptr get_net_true() const { return _net_true; }
|
||||
network::ptr get_net_false() const { return _net_false; }
|
||||
primitive_id result_id() const { return node->result_id(); }
|
||||
condition::branch get_branch_true() const { return node->get_branch_true(); }
|
||||
condition::branch get_branch_false() const { return node->get_branch_false(); }
|
||||
|
||||
void update_output_layout();
|
||||
|
||||
private:
|
||||
network::ptr _net_true;
|
||||
|
@ -308,7 +308,7 @@ public:
|
||||
std::vector<primitive_id> output_names_vec(output_names.begin(), output_names.end());
|
||||
auto config = get_program().get_config();
|
||||
config.set_property(ov::intel_gpu::custom_outputs(output_names_vec));
|
||||
body_program = program::build_program(get_program().get_engine(), body, config, false, false, true);
|
||||
body_program = program::build_program(get_program().get_engine(), body, config, get_program().get_task_executor(), false, false, true);
|
||||
}
|
||||
|
||||
const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; }
|
||||
|
@ -378,9 +378,9 @@ public:
|
||||
void run(program& p) override;
|
||||
};
|
||||
|
||||
class update_loop_primitive_map : public base_pass {
|
||||
class update_inner_program_io_map : public base_pass {
|
||||
public:
|
||||
update_loop_primitive_map() : base_pass("update_loop_primitive_map") {}
|
||||
update_inner_program_io_map() : base_pass("update_inner_program_io_map") {}
|
||||
|
||||
private:
|
||||
void run(program& p) override;
|
||||
|
@ -232,7 +232,7 @@ public:
|
||||
bool is_constant() const { return _is_constant; }
|
||||
bool needs_completion_event() const { return _needs_completion_event; }
|
||||
bool has_unfused_subgraph() const { return (_unfused_subgraph != nullptr); }
|
||||
|
||||
bool has_inner_networks() const;
|
||||
void allocate_internal_buffers();
|
||||
static memory::ptr allocate_output(engine& engine, memory_pool& pool, const program_node& _node,
|
||||
const kernel_impl_params& impl_params, uint32_t net_id, bool is_internal, size_t idx = 0, bool reset_mem = true, bool is_output_buffer = false);
|
||||
@ -257,6 +257,9 @@ public:
|
||||
void set_output_layout(const layout& new_out_lay, size_t idx = 0) {
|
||||
_impl_params->output_layouts[idx] = new_out_lay;
|
||||
}
|
||||
void set_inner_networks(const std::vector<network::ptr> inner_nets) {
|
||||
_impl_params->inner_nets = inner_nets;
|
||||
}
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
std::vector<cldnn::fused_primitive_desc_onednn>& get_fused_primitives_onednn() const { return _impl_params->fused_desc_onednn; }
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
|
@ -16,6 +16,8 @@ class typed_primitive_inst<roi_align> : public typed_primitive_inst_base<roi_ali
|
||||
using parent::parent;
|
||||
|
||||
public:
|
||||
template<typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(roi_align_node const& node, kernel_impl_params const& impl_param);
|
||||
static layout calc_output_layout(roi_align_node const& node, kernel_impl_params const& impl_param);
|
||||
static std::string to_string(roi_align_node const& node);
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "reshape_inst.h"
|
||||
#include "arg_max_min_inst.h"
|
||||
#include "shape_of_inst.h"
|
||||
#include "condition_inst.h"
|
||||
#include <sstream>
|
||||
|
||||
#include "gemm_inst.h"
|
||||
@ -1410,6 +1411,8 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
|
||||
if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
|
||||
preferred_impl = _forcing_map.at(node.id()).second;
|
||||
} else if (node.is_type<condition>()) {
|
||||
preferred_impl = impl_types::common;
|
||||
} else if (node.is_type<detection_output>()) {
|
||||
const auto& program = node.get_program();
|
||||
const auto& device_info = program.get_engine().get_device_info();
|
||||
|
@ -342,8 +342,9 @@ network::network(program::ptr program, const ExecutionConfig& config, stream::pt
|
||||
network::network(engine& engine,
|
||||
const topology& topo,
|
||||
const ExecutionConfig& config,
|
||||
bool is_internal)
|
||||
: network(program::build_program(engine, topo, config, is_internal), config, engine.create_stream(config), is_internal) {}
|
||||
bool is_internal,
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr task_executor)
|
||||
: network(program::build_program(engine, topo, config, task_executor, is_internal), config, engine.create_stream(config), is_internal) {}
|
||||
|
||||
network::network(engine& engine,
|
||||
const std::set<std::shared_ptr<program_node>>& nodes,
|
||||
@ -653,8 +654,9 @@ network::ptr network::allocate_network(engine& engine, program::ptr program, boo
|
||||
network::ptr network::build_network(engine& engine,
|
||||
const topology& topology,
|
||||
const ExecutionConfig& config,
|
||||
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
|
||||
bool is_internal) {
|
||||
return std::make_shared<network>(engine, topology, config, is_internal);
|
||||
return std::make_shared<network>(engine, topology, config, is_internal, task_executor);
|
||||
}
|
||||
|
||||
network::ptr network::build_network(engine& engine,
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "gemm_inst.h"
|
||||
#include "assign_inst.h"
|
||||
#include "read_value_inst.h"
|
||||
#include "condition_inst.h"
|
||||
#include "experimental_detectron_roi_feature_extractor_inst.hpp"
|
||||
#include "compilation_context.hpp"
|
||||
#include "implementation_map.hpp"
|
||||
@ -619,6 +620,10 @@ void primitive_inst::do_runtime_in_place_concat() {
|
||||
GPU_DEBUG_TRACE_DETAIL << "[In place concat] " << concat_inst->id() << ": can_be_optimized " << std::endl;
|
||||
}
|
||||
|
||||
bool primitive_inst::has_inner_networks() const {
|
||||
return (_impl_params->inner_nets.size() > 0);
|
||||
}
|
||||
|
||||
event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
const auto primitive_id = id();
|
||||
OPENVINO_ASSERT(_has_valid_input, primitive_id, " has invalid/unset input");
|
||||
@ -626,7 +631,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
|
||||
bool need_args_update = false;
|
||||
std::vector<event::ptr> dependencies;
|
||||
if (is_dynamic()) {
|
||||
if (is_dynamic() && !has_inner_networks()) {
|
||||
do_runtime_in_place_concat();
|
||||
OPENVINO_ASSERT(_node != nullptr, "[GPU] Invalid primitive_inst object for dynamic shapes case: program_node can't be null");
|
||||
update_shape();
|
||||
@ -679,11 +684,11 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
dependencies.push_back(ev_reset);
|
||||
}
|
||||
}
|
||||
|
||||
OPENVINO_ASSERT(_impl_params->get_output_layout().is_static(),
|
||||
"[GPU] Can't execute ", primitive_id, " primitive as output layout is dynamic in runtime");
|
||||
}
|
||||
update_shape_done_by_other = false; // reset
|
||||
OPENVINO_ASSERT(_impl_params->get_output_layout().is_static(),
|
||||
"[GPU] Can't execute ", primitive_id, " primitive as output layout is dynamic in runtime");
|
||||
|
||||
OPENVINO_ASSERT(_impl != nullptr, "[GPU] Implementation is nullptr for ", primitive_id, " primitive");
|
||||
|
||||
// Output buffer may be changed under the following conditions, so we need to set args to kernel on each iteration
|
||||
@ -1253,7 +1258,7 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() {
|
||||
ov::intel_gpu::allow_static_input_reorder(true),
|
||||
ov::intel_gpu::allow_new_shape_infer(true)
|
||||
};
|
||||
auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, true, false);
|
||||
auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, get_network().get_program()->get_task_executor(), true, false);
|
||||
|
||||
_unfused_subgraph = network::allocate_network(get_network().get_stream_ptr(), prog, true, get_network().is_primary_stream());
|
||||
}
|
||||
|
@ -66,6 +66,7 @@
|
||||
#include "loop_inst.h"
|
||||
#include "reverse_inst.h"
|
||||
#include "unique_inst.hpp"
|
||||
#include "condition_inst.h"
|
||||
#include "to_string_utils.h"
|
||||
|
||||
// TODO: Remove once we have interface for kernels cache
|
||||
@ -103,15 +104,58 @@
|
||||
using namespace cldnn;
|
||||
using namespace ov::intel_gpu;
|
||||
|
||||
static void adjust_num_cores(InferenceEngine::CPUStreamsExecutor::Config& config) {
|
||||
if (InferenceEngine::getAvailableCoresTypes().size() == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores();
|
||||
const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true);
|
||||
const auto total_num_little_cores = total_num_cores - total_num_big_cores;
|
||||
auto core_type = config._threadPreferredCoreType;
|
||||
|
||||
int num_cores = total_num_cores;
|
||||
if (core_type == InferenceEngine::IStreamsExecutor::Config::BIG) {
|
||||
num_cores = total_num_big_cores;
|
||||
} else if (core_type == InferenceEngine::IStreamsExecutor::Config::LITTLE) {
|
||||
num_cores = total_num_little_cores;
|
||||
}
|
||||
|
||||
config._streams = std::min(config._streams, num_cores);
|
||||
}
|
||||
|
||||
static InferenceEngine::CPUStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags) {
|
||||
InferenceEngine::CPUStreamsExecutor::Config task_executor_config(tags, 1);
|
||||
task_executor_config._streams = config.get_property(ov::compilation_num_threads);
|
||||
auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority);
|
||||
switch (priority) {
|
||||
case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::LITTLE; break;
|
||||
case ov::hint::Priority::MEDIUM: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::ANY; break;
|
||||
case ov::hint::Priority::HIGH: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::BIG; break;
|
||||
default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
|
||||
}
|
||||
|
||||
adjust_num_cores(task_executor_config);
|
||||
|
||||
return task_executor_config;
|
||||
}
|
||||
|
||||
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> program::make_task_executor(const ExecutionConfig& config) {
|
||||
InferenceEngine::CPUStreamsExecutor::Config task_executor_config = make_task_executor_config(config, "CPU Tasks executor for GPU plugin");
|
||||
return std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
|
||||
}
|
||||
|
||||
program::program(engine& engine_ref,
|
||||
topology const& topology,
|
||||
const ExecutionConfig& config,
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
|
||||
bool is_internal,
|
||||
bool no_optimizations,
|
||||
bool is_body_program)
|
||||
: _engine(engine_ref),
|
||||
_stream(_engine.create_stream(config)),
|
||||
_config(config),
|
||||
_task_executor(task_executor),
|
||||
processing_order(),
|
||||
is_body_program(is_body_program) {
|
||||
_config.apply_user_properties(_engine.get_device_info());
|
||||
@ -162,7 +206,8 @@ void program::init_program() {
|
||||
|
||||
pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
|
||||
|
||||
_task_executor = make_task_executor(_config);
|
||||
if (_task_executor == nullptr)
|
||||
_task_executor = program::make_task_executor(_config);
|
||||
_kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, _config, prog_id, _task_executor,
|
||||
kernel_selector::KernelBase::get_db().get_batch_header_str()));
|
||||
|
||||
@ -194,58 +239,27 @@ void program::init_primitives() {
|
||||
}
|
||||
}
|
||||
|
||||
static void adjust_num_cores(InferenceEngine::CPUStreamsExecutor::Config& config) {
|
||||
if (InferenceEngine::getAvailableCoresTypes().size() == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores();
|
||||
const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true);
|
||||
const auto total_num_little_cores = total_num_cores - total_num_big_cores;
|
||||
auto core_type = config._threadPreferredCoreType;
|
||||
|
||||
int num_cores = total_num_cores;
|
||||
if (core_type == InferenceEngine::IStreamsExecutor::Config::BIG) {
|
||||
num_cores = total_num_big_cores;
|
||||
} else if (core_type == InferenceEngine::IStreamsExecutor::Config::LITTLE) {
|
||||
num_cores = total_num_little_cores;
|
||||
}
|
||||
|
||||
config._streams = std::min(config._streams, num_cores);
|
||||
}
|
||||
|
||||
InferenceEngine::CPUStreamsExecutor::Config program::make_task_executor_config(const ExecutionConfig& config, std::string tags) const {
|
||||
InferenceEngine::CPUStreamsExecutor::Config task_executor_config(tags, 1);
|
||||
task_executor_config._streams = config.get_property(ov::compilation_num_threads);
|
||||
auto priority = config.get_property(ov::intel_gpu::hint::host_task_priority);
|
||||
switch (priority) {
|
||||
case ov::hint::Priority::LOW: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::LITTLE; break;
|
||||
case ov::hint::Priority::MEDIUM: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::ANY; break;
|
||||
case ov::hint::Priority::HIGH: task_executor_config._threadPreferredCoreType = InferenceEngine::IStreamsExecutor::Config::BIG; break;
|
||||
default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
|
||||
}
|
||||
|
||||
adjust_num_cores(task_executor_config);
|
||||
|
||||
return task_executor_config;
|
||||
}
|
||||
|
||||
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> program::make_task_executor(const ExecutionConfig& config) const {
|
||||
InferenceEngine::CPUStreamsExecutor::Config task_executor_config = make_task_executor_config(config, "CPU Tasks executor for GPU plugin");
|
||||
return std::make_shared<InferenceEngine::CPUStreamsExecutor>(task_executor_config);
|
||||
}
|
||||
|
||||
kernels_cache& program::get_kernels_cache() const {
|
||||
return *_kernels_cache;
|
||||
}
|
||||
|
||||
program::ptr program::build_program(engine& engine,
|
||||
const topology& topology,
|
||||
const ExecutionConfig& config,
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
|
||||
bool is_internal,
|
||||
bool no_optimizations,
|
||||
bool is_body_program) {
|
||||
return std::make_shared<program>(engine, topology, config, task_executor, is_internal, no_optimizations, is_body_program);
|
||||
}
|
||||
|
||||
program::ptr program::build_program(engine& engine,
|
||||
const topology& topology,
|
||||
const ExecutionConfig& config,
|
||||
bool is_internal,
|
||||
bool no_optimizations,
|
||||
bool is_body_program) {
|
||||
return std::make_shared<program>(engine, topology, config, is_internal, no_optimizations, is_body_program);
|
||||
return std::make_shared<program>(engine, topology, config, nullptr, is_internal, no_optimizations, is_body_program);
|
||||
}
|
||||
|
||||
program::ptr program::build_program(engine& engine,
|
||||
@ -597,8 +611,8 @@ void program::post_optimize_graph(bool is_internal) {
|
||||
if (_config.get_property(ov::intel_gpu::optimize_data))
|
||||
apply_opt_pass<remove_redundant_reorders>(lo, false, true, true); // pass to remove output reorders while all others graph optimizations were done
|
||||
|
||||
// update loop input/output primitive mappings
|
||||
apply_opt_pass<update_loop_primitive_map>();
|
||||
// update inner program input/output primitive mappings
|
||||
apply_opt_pass<update_inner_program_io_map>();
|
||||
|
||||
// Recalculate processing order after all graph transformation to keep optimal primitives ordering
|
||||
// for OOO queue
|
||||
@ -1007,12 +1021,18 @@ bool program::extract(program_node& node) {
|
||||
if (user->is_type<loop>()) {
|
||||
loop_node& loop = *user;
|
||||
loop.update_primitive_map(node.id(), input.id());
|
||||
} else if (user->is_type<condition>()) {
|
||||
condition_node& cond = *user;
|
||||
cond.update_primitive_map(node.id(), input.id());
|
||||
}
|
||||
|
||||
for (auto& dep : node.dependencies) {
|
||||
if (dep.first->is_type<loop>()) {
|
||||
loop_node& loop = *dep.first;
|
||||
loop.update_primitive_map(node.id(), user->id());
|
||||
} else if (dep.first->is_type<condition>()) {
|
||||
condition_node& cond = *dep.first;
|
||||
cond.update_primitive_map(node.id(), user->id());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +25,19 @@ layout roi_align_inst::calc_output_layout(roi_align_node const& node, kernel_imp
|
||||
{num_rois, num_channels, primitive->pooled_h, primitive->pooled_w});
|
||||
}
|
||||
|
||||
template<typename ShapeType>
|
||||
std::vector<layout> roi_align_inst::calc_output_layouts(roi_align_node const& node, kernel_impl_params const& impl_param) {
|
||||
auto primitive = impl_param.typed_desc<roi_align>();
|
||||
auto input_layout = impl_param.get_input_layout(0);
|
||||
auto rois_layout = impl_param.get_input_layout(1);
|
||||
auto num_rois = rois_layout.get_partial_shape()[0];
|
||||
auto num_channels = input_layout.get_partial_shape()[1];
|
||||
return {layout({num_rois, num_channels, primitive->pooled_h, primitive->pooled_w}, input_layout.data_type, input_layout.format) };
|
||||
}
|
||||
|
||||
template
|
||||
std::vector<layout> roi_align_inst::calc_output_layouts<ov::PartialShape>(roi_align_node const& node, const kernel_impl_params& impl_param);
|
||||
|
||||
std::string roi_align_inst::to_string(roi_align_node const& node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
json_composite roi_align_info;
|
||||
|
91
src/plugins/intel_gpu/src/plugin/ops/condition.cpp
Normal file
91
src/plugins/intel_gpu/src/plugin/ops/condition.cpp
Normal file
@ -0,0 +1,91 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "ngraph/op/if.hpp"
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/condition.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
const size_t idx_true = 0;
|
||||
const size_t idx_false = 1;
|
||||
|
||||
static cldnn::condition::branch gen_branch(Program& p, const std::shared_ptr<ngraph::op::v8::If>& op, size_t idx) {
|
||||
cldnn::condition::branch branch;
|
||||
const auto& internal_body = (idx == idx_true)? op->get_then_body() : op->get_else_body();
|
||||
|
||||
InferenceEngine::CNNNetwork body_network(internal_body);
|
||||
{
|
||||
// CNNNetwork change the input/output data type to fp32 when input/output data type is fp16
|
||||
// To run internal body, rollback input/output data to original one.
|
||||
size_t tidx = 0;
|
||||
auto& model_inputs = internal_body->get_parameters();
|
||||
for (auto& in : body_network.getInputsInfo()) {
|
||||
auto input_data_type = InferenceEngine::details::convertPrecision(model_inputs[tidx++]->get_output_tensor(0).get_element_type());
|
||||
if (in.second->getPrecision() != input_data_type)
|
||||
in.second->setPrecision(input_data_type);
|
||||
}
|
||||
|
||||
tidx = 0;
|
||||
for (auto& out : body_network.getOutputsInfo()) {
|
||||
const auto& model_output = internal_body->get_output_op(tidx++);
|
||||
auto output_data_type = InferenceEngine::details::convertPrecision(model_output->get_output_tensor(0).get_element_type());
|
||||
if (out.second->getPrecision() != output_data_type)
|
||||
out.second->setPrecision(output_data_type);
|
||||
}
|
||||
}
|
||||
|
||||
auto config = p.get_config();
|
||||
config.set_property(ov::intel_gpu::max_dynamic_batch(1));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic()));
|
||||
|
||||
Program prog(body_network, p.get_engine(), config, false, false, nullptr, nullptr, p.get_task_executor(), true);
|
||||
branch.inner_program = prog.GetCompiledProgram();
|
||||
|
||||
auto& input_map = branch.input_map;
|
||||
auto external_inputs = p.GetInputInfo(op);
|
||||
auto internal_inputs = internal_body->get_parameters();
|
||||
auto input_desc_vec = op->get_input_descriptions(static_cast<int>(idx));
|
||||
for (auto& in_desc : input_desc_vec) {
|
||||
const auto& external_id = external_inputs.at(in_desc->m_input_index).pid;
|
||||
const auto& internal_id = layer_type_name_ID(internal_inputs.at(in_desc->m_body_parameter_index));
|
||||
input_map.insert({external_id, internal_id});
|
||||
}
|
||||
|
||||
auto& output_map = branch.output_map;
|
||||
auto internal_outputs = internal_body->get_results();
|
||||
auto output_desc_vec = op->get_output_descriptions(static_cast<int>(idx));
|
||||
for (auto& out_desc : output_desc_vec) {
|
||||
const auto& internal_id = layer_type_name_ID(internal_outputs.at(out_desc->m_body_value_index));
|
||||
output_map.insert({out_desc->m_output_index, internal_id});
|
||||
}
|
||||
|
||||
return branch;
|
||||
}
|
||||
|
||||
static void CreateIfOp(Program& p, const std::shared_ptr<ngraph::op::v8::If>& op) {
|
||||
auto inputs = p.GetInputInfo(op);
|
||||
OPENVINO_ASSERT(inputs.size() >= 1, "Invalid inputs count (Not allowed no input)");
|
||||
auto compare_node_pshape = op->get_input_partial_shape(0);
|
||||
auto p_input_name = inputs[0].pid;
|
||||
std::string type_name_str = op->get_input_node_ptr(0)->get_type_name();
|
||||
|
||||
const std::string layerName = layer_type_name_ID(op);
|
||||
auto branch_true = gen_branch(p, op, idx_true);
|
||||
auto branch_false = gen_branch(p, op, idx_false);
|
||||
|
||||
const cldnn::condition conditionPrimitive(layerName,
|
||||
inputs,
|
||||
branch_true,
|
||||
branch_false);
|
||||
|
||||
p.add_primitive(*op, conditionPrimitive);
|
||||
}
|
||||
|
||||
REGISTER_FACTORY_IMPL(v8, If);
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace ov
|
@ -16,6 +16,8 @@
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
#include "intel_gpu/primitives/data.hpp"
|
||||
|
||||
#include <ie_system_conf.h>
|
||||
|
||||
#ifdef __linux__
|
||||
# include <dlfcn.h>
|
||||
#endif
|
||||
@ -121,11 +123,15 @@ bool Program::IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
|
||||
|
||||
Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
|
||||
bool createTopologyOnly, bool partialBuild,
|
||||
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
|
||||
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs,
|
||||
InferenceEngine::CPUStreamsExecutor::Ptr task_executor, bool innerProgram)
|
||||
: m_curBatch(-1)
|
||||
, m_config(config)
|
||||
, m_engine(engine)
|
||||
, queryMode(false) {
|
||||
, queryMode(false)
|
||||
, m_task_executor(task_executor) {
|
||||
if (m_task_executor == nullptr)
|
||||
m_task_executor = cldnn::program::make_task_executor(m_config);
|
||||
// Extract inputs/outputs info from CNNNetwork
|
||||
auto networkInputs = (inputs != nullptr) ? *inputs : network.getInputsInfo();
|
||||
auto networkOutputs = (outputs != nullptr) ? *outputs : network.getOutputsInfo();
|
||||
@ -179,7 +185,8 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co
|
||||
int m_bv_sz = GetMaxBatchSizeForSingleProgram();
|
||||
m_max_batch = static_cast<int>(m_config.get_property(ov::intel_gpu::max_dynamic_batch));
|
||||
|
||||
if (dyn_shape_batch_found || m_max_batch > 1) {
|
||||
// Do not apply dynamic batch for inner program (only single batch is allowed)
|
||||
if (!innerProgram && (dyn_shape_batch_found || m_max_batch > 1)) {
|
||||
// compile log2 networks to serve dynamic batch requests
|
||||
for (int b = m_bv_sz - 1; b >= 0; b--) {
|
||||
inputLayouts.clear();
|
||||
@ -290,7 +297,7 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co
|
||||
m_input_batch_dim = batch_dim;
|
||||
}
|
||||
} else {
|
||||
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));
|
||||
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild, innerProgram));
|
||||
}
|
||||
}
|
||||
|
||||
@ -301,6 +308,7 @@ Program::Program(cldnn::engine& engine, const ExecutionConfig& config,
|
||||
, m_config(config)
|
||||
, m_engine(engine)
|
||||
, queryMode(false) {
|
||||
m_task_executor = cldnn::program::make_task_executor(m_config);
|
||||
if (inputs != nullptr)
|
||||
m_networkInputs = *inputs;
|
||||
if (outputs != nullptr)
|
||||
@ -356,9 +364,11 @@ void Program::CleanupBuild() {
|
||||
std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::shared_ptr<ngraph::Node>>& ops,
|
||||
InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs,
|
||||
bool createTopologyOnly, bool partialBuild) {
|
||||
bool createTopologyOnly, bool partialBuild, bool innerProgram) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::BuildProgram");
|
||||
|
||||
// std::cout << "BuildProgram " << createTopologyOnly << ", " << partialBuild << ", " << innerProgram << std::endl;
|
||||
// In the case of inner program, allow_new_shape_infer flag is setted by outside of program.
|
||||
// So, do not check allow_new_shape_infer for inner program build
|
||||
for (const auto& op : ops) {
|
||||
if (requires_new_shape_infer(*op)) {
|
||||
allow_new_shape_infer = true;
|
||||
@ -366,6 +376,10 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
|
||||
}
|
||||
}
|
||||
|
||||
if (innerProgram) {
|
||||
allow_new_shape_infer = (m_config.get_property(ov::intel_gpu::allow_new_shape_infer) || allow_new_shape_infer);
|
||||
}
|
||||
|
||||
m_config.set_property(ov::intel_gpu::partial_build_program(partialBuild));
|
||||
m_config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
m_config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer));
|
||||
@ -383,7 +397,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateProgram");
|
||||
cldnn::program::ptr program;
|
||||
try {
|
||||
program = cldnn::program::build_program(m_engine, *m_topology, m_config);
|
||||
program = cldnn::program::build_program(m_engine, *m_topology, m_config, get_task_executor());
|
||||
} catch (std::exception& e) {
|
||||
OPENVINO_ASSERT(false, "GPU program build failed!\n", e.what());
|
||||
}
|
||||
|
@ -0,0 +1,696 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
#include "shared_test_classes/base/utils/ranges.hpp"
|
||||
#include <common_test_utils/ov_tensor_utils.hpp>
|
||||
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace GPULayerTestsDefinitions {
|
||||
|
||||
class InnerBodyGenerator {
|
||||
public:
|
||||
using ptr = std::shared_ptr<InnerBodyGenerator>;
|
||||
|
||||
enum InnerBodyType {
|
||||
/**
|
||||
* Simple inner body with single constant value
|
||||
*/
|
||||
Type01 = 1,
|
||||
/**
|
||||
* Inner body with eltwise sum
|
||||
*/
|
||||
Type02 = 2,
|
||||
/**
|
||||
* Inner body with eltwise multiply
|
||||
*/
|
||||
Type03 = 3,
|
||||
/**
|
||||
* Inner body with eltwise sum and pooling
|
||||
* output shape is different with type02 and type03 for same input shape
|
||||
*/
|
||||
Type04 = 4,
|
||||
/**
|
||||
* Inner body with nested condition case
|
||||
*/
|
||||
Type05 = 5
|
||||
};
|
||||
|
||||
public:
|
||||
InnerBodyGenerator() { }
|
||||
|
||||
virtual std::shared_ptr<ngraph::Function> get_function() { return _func; }
|
||||
virtual std::shared_ptr<ngraph::opset9::Parameter> get_input() { return _param; }
|
||||
virtual std::shared_ptr<ngraph::opset1::Result> get_result() { return _result; }
|
||||
|
||||
// virtual void create_body(ngraph::Shape input_shape, ngraph::element::Type prc) {
|
||||
virtual void create_body(ov::PartialShape& input_shape, ngraph::element::Type prc) {
|
||||
_func = generate(input_shape, prc);
|
||||
_param = (_func->get_parameters().size() > 0)? _func->get_parameters().front() : nullptr;
|
||||
_result = _func->get_results().front();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) = 0;
|
||||
|
||||
std::shared_ptr<ngraph::Function> _func;
|
||||
std::shared_ptr<ngraph::opset9::Parameter> _param;
|
||||
std::shared_ptr<ngraph::opset1::Result> _result;
|
||||
};
|
||||
|
||||
class InnerBodyType01 : public InnerBodyGenerator {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
|
||||
auto constantA = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 2), {2.0f});
|
||||
constantA->set_friendly_name("body1_constantA");
|
||||
auto constantB = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 2), {12.0f});
|
||||
constantB->set_friendly_name("body1_constantB");
|
||||
auto add = std::make_shared<ngraph::opset9::Add>(constantA, constantB);
|
||||
add->set_friendly_name("body1_add");
|
||||
auto result = std::make_shared<ngraph::opset1::Result>(add);
|
||||
auto o_layout = result->get_layout();
|
||||
result->set_friendly_name("body1_result");
|
||||
auto body = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {result},
|
||||
ngraph::ParameterVector{},
|
||||
"constant");
|
||||
return body;
|
||||
}
|
||||
};
|
||||
|
||||
class InnerBodyType02 : public InnerBodyGenerator {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
|
||||
auto constant = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 10.0f);
|
||||
constant->set_friendly_name("body2_const");
|
||||
auto data = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
|
||||
data->set_friendly_name("body2_data");
|
||||
auto sum = std::make_shared<ngraph::opset9::Multiply>(data, constant);
|
||||
sum->set_friendly_name("body2_mul");
|
||||
auto result = std::make_shared<ngraph::opset1::Result>(sum);
|
||||
result->set_friendly_name("body2_result");
|
||||
auto body = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {result},
|
||||
ngraph::ParameterVector{data},
|
||||
"eltwise_mul");
|
||||
return body;
|
||||
}
|
||||
};
|
||||
|
||||
class InnerBodyType03 : public InnerBodyGenerator {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
|
||||
auto constant = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
|
||||
constant->set_friendly_name("body3_constant");
|
||||
auto data = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
|
||||
data->set_friendly_name("body3_data");
|
||||
auto add = std::make_shared<ngraph::opset9::Add>(data, constant);
|
||||
add->set_friendly_name("body3_add");
|
||||
auto result = std::make_shared<ngraph::opset1::Result>(add);
|
||||
result->set_friendly_name("body3_result");
|
||||
auto body = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {result},
|
||||
ngraph::ParameterVector{data},
|
||||
"eltwise_sum");
|
||||
return body;
|
||||
}
|
||||
};
|
||||
|
||||
class InnerBodyType04 : public InnerBodyGenerator {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
|
||||
auto scale = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
|
||||
scale->set_friendly_name("body4_scale");
|
||||
auto data = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
|
||||
data->set_friendly_name("body4_data");
|
||||
auto mul = std::make_shared<ngraph::opset9::Multiply>(data, scale);
|
||||
mul->set_friendly_name("body4_mul");
|
||||
auto pooling = generate_pooling(mul, input_shape);
|
||||
pooling->set_friendly_name("body4_pool");
|
||||
auto result = std::make_shared<ngraph::opset1::Result>(pooling);
|
||||
result->set_friendly_name("body4_result");
|
||||
auto body = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {result},
|
||||
ngraph::ParameterVector{data},
|
||||
"eltwise_mul_pooling");
|
||||
return body;
|
||||
}
|
||||
|
||||
|
||||
struct poolSpecificParams {
|
||||
ngraph::helpers::PoolingTypes pooling_type; // Pooling type, max or avg
|
||||
std::vector<size_t> kernel_size; // Kernel size
|
||||
std::vector<size_t> stride; // Stride
|
||||
std::vector<size_t> pad_begin; // Pad begin
|
||||
std::vector<size_t> pad_end; // Pad end
|
||||
ngraph::op::RoundingType rounding_type; // Rounding type
|
||||
ngraph::op::PadType pad_type; // Pad type
|
||||
bool exclued_pad; // Exclude pad
|
||||
};
|
||||
|
||||
std::shared_ptr<ov::Node> generate_pooling(const ngraph::Output<ov::Node> &in, ov::PartialShape& input_shape) {
|
||||
poolSpecificParams params;
|
||||
switch (input_shape.rank().get_length()) {
|
||||
case 5:
|
||||
{
|
||||
params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
|
||||
{2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0},
|
||||
ngraph::op::RoundingType::CEIL,
|
||||
ngraph::op::PadType::SAME_LOWER, true };
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
|
||||
{2, 2}, {2, 2}, {0, 0}, {0, 0},
|
||||
ngraph::op::RoundingType::CEIL,
|
||||
ngraph::op::PadType::SAME_LOWER, true };
|
||||
break;
|
||||
}
|
||||
case 3:
|
||||
{
|
||||
params = poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX,
|
||||
{2}, {2}, {0}, {0},
|
||||
ngraph::op::RoundingType::CEIL,
|
||||
ngraph::op::PadType::SAME_LOWER, true };
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
OPENVINO_ASSERT(false, "Not allowed other rank");
|
||||
}
|
||||
}
|
||||
return ngraph::builder::makePooling(in, params.stride, params.pad_begin,
|
||||
params.pad_end, params.kernel_size, params.rounding_type,
|
||||
params.pad_type, params.exclued_pad, params.pooling_type);
|
||||
}
|
||||
};
|
||||
|
||||
class InnerBodyType05 : public InnerBodyGenerator {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
|
||||
auto constant = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 2.0f);
|
||||
constant->set_friendly_name("body5_constant");
|
||||
auto data = std::make_shared<ngraph::opset9::Parameter>(prc, input_shape);
|
||||
data->set_friendly_name("body5_data");
|
||||
auto add = std::make_shared<ngraph::opset9::Add>(data, constant);
|
||||
add->set_friendly_name("body5_add");
|
||||
std::vector<int> axes;
|
||||
for (int i = 0, r = 0; i < input_shape.rank().get_length(); i++) {
|
||||
axes.push_back(r--);
|
||||
}
|
||||
std::vector<size_t> shapeAxes;
|
||||
shapeAxes.push_back(axes.size());
|
||||
|
||||
auto reductionAxesNode = std::dynamic_pointer_cast<ngraph::Node>(
|
||||
std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes));
|
||||
|
||||
const auto reduce = ngraph::builder::makeReduce(add, reductionAxesNode, false, ngraph::helpers::ReductionType::Min);
|
||||
reduce->set_friendly_name("body5_reduce");
|
||||
auto constant_ref = std::make_shared<ngraph::opset9::Constant>(prc, ngraph::Shape{}, 10.0f);
|
||||
constant_ref->set_friendly_name("body5_ref_constant");
|
||||
|
||||
auto pred = std::make_shared<ngraph::opset3::GreaterEqual>(reduce, constant_ref);
|
||||
pred->set_friendly_name("nested_pred");
|
||||
|
||||
auto nested_body_then_generator = std::make_shared<InnerBodyType03>();
|
||||
auto nested_body_else_generator = std::make_shared<InnerBodyType04>();
|
||||
|
||||
auto nested_input_shape = add->get_output_partial_shape(0);
|
||||
nested_body_then_generator->create_body(nested_input_shape, prc);
|
||||
nested_body_else_generator->create_body(nested_input_shape, prc);
|
||||
nested_body_then_generator->get_function()->set_friendly_name("nested_then_inner_body");
|
||||
nested_body_else_generator->get_function()->set_friendly_name("nested_else_inner_body");
|
||||
|
||||
auto cond_nested = std::make_shared<ngraph::opset8::If>(pred);
|
||||
cond_nested->set_friendly_name("if_operator_nested");
|
||||
cond_nested->set_else_body(nested_body_else_generator->get_function());
|
||||
cond_nested->set_then_body(nested_body_then_generator->get_function());
|
||||
cond_nested->set_input(add, nested_body_then_generator->get_input(), nested_body_else_generator->get_input());
|
||||
cond_nested->set_output(nested_body_then_generator->get_result(), nested_body_else_generator->get_result());
|
||||
|
||||
auto result = std::make_shared<ngraph::opset1::Result>(cond_nested);
|
||||
result->set_friendly_name("body5_result");
|
||||
auto body = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {result},
|
||||
ngraph::ParameterVector{data},
|
||||
"eltwise_sum");
|
||||
return body;
|
||||
}
|
||||
};
|
||||
|
||||
static std::shared_ptr<InnerBodyGenerator> get_inner_body_generator(InnerBodyGenerator::InnerBodyType type) {
|
||||
std::shared_ptr<InnerBodyGenerator> generator_ptr;
|
||||
switch (type) {
|
||||
case InnerBodyGenerator::InnerBodyType::Type01:
|
||||
{
|
||||
return std::make_shared<InnerBodyType01>();
|
||||
}
|
||||
case InnerBodyGenerator::InnerBodyType::Type02:
|
||||
{
|
||||
return std::make_shared<InnerBodyType02>();
|
||||
}
|
||||
case InnerBodyGenerator::InnerBodyType::Type03:
|
||||
{
|
||||
return std::make_shared<InnerBodyType03>();
|
||||
}
|
||||
case InnerBodyGenerator::InnerBodyType::Type04:
|
||||
{
|
||||
return std::make_shared<InnerBodyType04>();
|
||||
}
|
||||
case InnerBodyGenerator::InnerBodyType::Type05:
|
||||
{
|
||||
return std::make_shared<InnerBodyType05>();
|
||||
}
|
||||
default:
|
||||
{
|
||||
OPENVINO_ASSERT(false, "Not supported type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class TestModelGenerator {
|
||||
public:
|
||||
enum PredicateTypes {
|
||||
PARAM,
|
||||
NODE
|
||||
};
|
||||
|
||||
public:
|
||||
TestModelGenerator(InnerBodyGenerator::InnerBodyType then_body_type,
|
||||
InnerBodyGenerator::InnerBodyType else_body_type,
|
||||
PredicateTypes pred_type,
|
||||
ngraph::element::Type prc,
|
||||
ov::PartialShape input_shape,
|
||||
bool cond_execution_value = false) {
|
||||
body_then_generator = get_inner_body_generator(then_body_type);
|
||||
body_else_generator = get_inner_body_generator(else_body_type);
|
||||
|
||||
body_then_generator->create_body(input_shape, prc);
|
||||
body_else_generator->create_body(input_shape, prc);
|
||||
body_else_generator->get_function()->set_friendly_name("else_inner_body");
|
||||
body_then_generator->get_function()->set_friendly_name("then_inner_body");
|
||||
|
||||
ngraph::ParameterVector params{};
|
||||
auto predicate = create_cond_execution(pred_type, params, ngraph::element::boolean, ngraph::Shape{});
|
||||
predicate->set_friendly_name("if_predicate");
|
||||
auto data = create_condition_input(params, prc, input_shape);
|
||||
data->set_friendly_name("input_data");
|
||||
auto cond = std::make_shared<ngraph::opset8::If>(predicate);
|
||||
cond->set_friendly_name("if_operator");
|
||||
cond->set_else_body(body_else_generator->get_function());
|
||||
cond->set_then_body(body_then_generator->get_function());
|
||||
cond->set_input(data, body_then_generator->get_input(), body_else_generator->get_input());
|
||||
cond->set_output(body_then_generator->get_result(), body_else_generator->get_result());
|
||||
auto result = std::make_shared<ngraph::opset1::Result>(cond);
|
||||
result->set_friendly_name("outer_result");
|
||||
function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
|
||||
}
|
||||
std::shared_ptr<ngraph::Function> get_function() { return function; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<ngraph::Node> create_condition_input(ngraph::ParameterVector& params,
|
||||
const ngraph::element::Type prc, const ov::PartialShape& shape,
|
||||
int value = 0, bool is_static = false) {
|
||||
if (is_static)
|
||||
return std::make_shared<ngraph::opset9::Constant>(prc, shape.to_shape(), value);
|
||||
|
||||
auto input = std::make_shared<ngraph::opset9::Parameter>(prc, shape);
|
||||
params.push_back(input);
|
||||
return input;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> create_cond_execution(PredicateTypes pred_type,
|
||||
ngraph::ParameterVector& params,
|
||||
const ngraph::element::Type prc = ngraph::element::u8,
|
||||
const ngraph::Shape shape = ngraph::Shape{}) {
|
||||
std::shared_ptr<ngraph::Node> pred;
|
||||
switch (pred_type) {
|
||||
case PredicateTypes::PARAM:
|
||||
{
|
||||
pred = create_condition_input(params, prc, shape);
|
||||
break;
|
||||
}
|
||||
case PredicateTypes::NODE:
|
||||
{
|
||||
auto param_cond = create_condition_input(params, prc, shape);
|
||||
param_cond->set_friendly_name("param_cond");
|
||||
auto const_cond = create_condition_input(params, prc, ngraph::Shape{}, 1, true);
|
||||
const_cond->set_friendly_name("const_cond");
|
||||
pred = std::make_shared<ngraph::opset3::GreaterEqual>(param_cond, const_cond);
|
||||
pred->set_friendly_name("pred");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
OPENVINO_ASSERT(false, "Not supported type");
|
||||
}
|
||||
}
|
||||
return pred;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<ngraph::Function> function;
|
||||
InnerBodyGenerator::ptr body_then_generator;
|
||||
InnerBodyGenerator::ptr body_else_generator;
|
||||
};
|
||||
|
||||
static std::ostream& operator<<(std::ostream& os, const InnerBodyGenerator::InnerBodyType type) {
|
||||
switch (type) {
|
||||
case InnerBodyGenerator::InnerBodyType::Type01:
|
||||
{
|
||||
os << "Type01";
|
||||
break;
|
||||
}
|
||||
case InnerBodyGenerator::InnerBodyType::Type02:
|
||||
{
|
||||
os << "Type02";
|
||||
break;
|
||||
}
|
||||
case InnerBodyGenerator::InnerBodyType::Type03:
|
||||
{
|
||||
os << "Type03";
|
||||
break;
|
||||
}
|
||||
case InnerBodyGenerator::InnerBodyType::Type04:
|
||||
{
|
||||
os << "Type04";
|
||||
break;
|
||||
}
|
||||
case InnerBodyGenerator::InnerBodyType::Type05:
|
||||
{
|
||||
os << "Type05";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
os << "NONE";
|
||||
break;
|
||||
}
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
static std::ostream& operator<<(std::ostream& os, const TestModelGenerator::PredicateTypes type) {
|
||||
switch (type) {
|
||||
case TestModelGenerator::PredicateTypes::PARAM:
|
||||
{
|
||||
os << "PARAM";
|
||||
break;
|
||||
}
|
||||
case TestModelGenerator::PredicateTypes::NODE:
|
||||
{
|
||||
os << "NODE";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
os << "NONE";
|
||||
break;
|
||||
}
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
using ConditionParams = typename std::tuple<
|
||||
InferenceEngine::SizeVector, // Shape
|
||||
InferenceEngine::Precision, // Precision
|
||||
TestModelGenerator::PredicateTypes, // if predicate type
|
||||
LayerTestsUtils::TargetDevice // Device name
|
||||
>;
|
||||
|
||||
class StaticConditionLayerGPUTest : public testing::WithParamInterface<ConditionParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<ConditionParams>& obj) {
|
||||
InferenceEngine::SizeVector data_shape;
|
||||
InferenceEngine::Precision data_prc;
|
||||
TestModelGenerator::PredicateTypes pred;
|
||||
std::string targetDevice;
|
||||
|
||||
std::tie(data_shape, data_prc, pred, targetDevice) = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(data_shape) << "_";
|
||||
result << "netPRC=" << std::to_string(data_prc) << "_";
|
||||
result << "ifCond=" << pred << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
auto res_str = result.str();
|
||||
std::replace(res_str.begin(), res_str.end(), '-', '_');
|
||||
return res_str;
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_GPU;
|
||||
TestModelGenerator::PredicateTypes pred;
|
||||
std::tie(data_shape, data_prc, pred, targetDevice) = GetParam();
|
||||
const auto ngShape = ov::PartialShape{data_shape};
|
||||
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc);
|
||||
TestModelGenerator model_generator(InnerBodyGenerator::InnerBodyType::Type02,
|
||||
InnerBodyGenerator::InnerBodyType::Type03,
|
||||
pred,
|
||||
prc,
|
||||
ngShape);
|
||||
function = model_generator.get_function();
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
|
||||
auto tensor_desc = info.getTensorDesc();
|
||||
auto blob = make_blob_with_precision(tensor_desc);
|
||||
blob->allocate();
|
||||
|
||||
if (tensor_desc.getLayout() == InferenceEngine::SCALAR) {
|
||||
auto prc = tensor_desc.getPrecision();
|
||||
auto scalar_1d = CommonTestUtils::make_reshape_view(blob, {1});
|
||||
if (prc == InferenceEngine::Precision::BOOL) {
|
||||
auto mem_blob = dynamic_cast<InferenceEngine::MemoryBlob*>(blob.get());
|
||||
auto mem = mem_blob->rwmap();
|
||||
auto data_ptr = mem.as<bool*>();
|
||||
*data_ptr = false;
|
||||
} else {
|
||||
CommonTestUtils::fill_data_with_broadcast(scalar_1d, 0, {20.f});
|
||||
}
|
||||
} else {
|
||||
CommonTestUtils::fill_data_with_broadcast(blob, 0, {20.f});
|
||||
}
|
||||
return blob;
|
||||
}
|
||||
|
||||
InferenceEngine::SizeVector data_shape;
|
||||
InferenceEngine::Precision data_prc;
|
||||
};
|
||||
|
||||
TEST_P(StaticConditionLayerGPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED();
|
||||
Run();
|
||||
}
|
||||
|
||||
std::vector<InferenceEngine::Precision> netPrecisions_static = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16,
|
||||
InferenceEngine::Precision::I8
|
||||
};
|
||||
|
||||
std::vector<InferenceEngine::SizeVector> inputs_shape = {
|
||||
{3, 6}
|
||||
};
|
||||
|
||||
std::vector<GPULayerTestsDefinitions::TestModelGenerator::PredicateTypes> if_cond_types = {
|
||||
GPULayerTestsDefinitions::TestModelGenerator::PredicateTypes::PARAM
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_static, StaticConditionLayerGPUTest,
|
||||
testing::Combine(
|
||||
testing::ValuesIn(inputs_shape),
|
||||
testing::ValuesIn(netPrecisions_static),
|
||||
testing::ValuesIn(if_cond_types),
|
||||
testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)),
|
||||
StaticConditionLayerGPUTest::getTestCaseName);
|
||||
|
||||
|
||||
/// Dynamic shape test
|
||||
struct InnerBodyTypeParams {
|
||||
InnerBodyGenerator::InnerBodyType then_body_type;
|
||||
InnerBodyGenerator::InnerBodyType else_body_type;
|
||||
};
|
||||
|
||||
using ConditionGPUParams = typename std::tuple<
|
||||
InputShape, // Input Shapes
|
||||
InnerBodyTypeParams, // Inner body type
|
||||
InferenceEngine::Precision, // Precision
|
||||
TestModelGenerator::PredicateTypes, // if predicate type
|
||||
LayerTestsUtils::TargetDevice // Device name
|
||||
>;
|
||||
|
||||
class DynamicConditionLayerGPUTest : public testing::WithParamInterface<ConditionGPUParams>,
|
||||
virtual public SubgraphBaseTest {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<ConditionGPUParams>& obj) {
|
||||
InputShape inputShapes;
|
||||
InnerBodyTypeParams bodyParams;
|
||||
InferenceEngine::Precision dataPrc;
|
||||
TestModelGenerator::PredicateTypes condType;
|
||||
std::string targetDevice;
|
||||
|
||||
std::tie(inputShapes, bodyParams, dataPrc, condType, targetDevice) = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "IS=(";
|
||||
result << CommonTestUtils::partialShape2str({inputShapes.first}) << "_";
|
||||
for (size_t i = 0lu; i < inputShapes.second.size(); i++) {
|
||||
result << "{";
|
||||
result << CommonTestUtils::vec2str(inputShapes.second[i]) << "_";
|
||||
result << "}_";
|
||||
}
|
||||
result << ")_";
|
||||
result << "innerBody={" << bodyParams.then_body_type << ", " << bodyParams.else_body_type << "}_";
|
||||
result << "netPRC=" << dataPrc << "_";
|
||||
result << "ifCond=" << condType << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
auto res_str = result.str();
|
||||
std::replace(res_str.begin(), res_str.end(), '-', '_');
|
||||
return res_str;
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
InputShape inputShapes;
|
||||
InnerBodyTypeParams bodyParams;
|
||||
InferenceEngine::Precision dataPrc;
|
||||
TestModelGenerator::PredicateTypes condType;
|
||||
std::tie(inputShapes, bodyParams, dataPrc, condType, targetDevice) = GetParam();
|
||||
auto num_second = inputShapes.second.size();
|
||||
std::vector<ov::Shape> condSecondVec;
|
||||
for (size_t i = 0; i < num_second; i++) {
|
||||
condSecondVec.push_back({});
|
||||
}
|
||||
auto condShapes = ov::test::InputShape(ov::PartialShape({}), condSecondVec);
|
||||
init_input_shapes({condShapes, inputShapes});
|
||||
|
||||
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrc);
|
||||
TestModelGenerator model_generator(bodyParams.then_body_type,
|
||||
bodyParams.else_body_type,
|
||||
condType,
|
||||
prc,
|
||||
inputShapes.first);
|
||||
function = model_generator.get_function();
|
||||
function->set_friendly_name("if_operator_outer");
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Override generate_inputs to support boolean param for if(condition) operator.
|
||||
*
|
||||
* @param targetInputStaticShapes
|
||||
*/
|
||||
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
|
||||
ov::Shape input_shape;
|
||||
for (auto& shape : targetInputStaticShapes) {
|
||||
if (shape.size() > 1) {
|
||||
input_shape = shape;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inputs.clear();
|
||||
for (const auto ¶m : function->get_parameters()) {
|
||||
if (param->get_output_element_type(0) == ov::element::boolean) {
|
||||
auto tensor = ov::Tensor{ov::element::boolean, {}};
|
||||
auto p_data = tensor.data<ov::element_type_traits<ov::element::boolean>::value_type>();
|
||||
p_data[0] = (niter++ % 2);
|
||||
|
||||
inputs.insert({param, tensor});
|
||||
} else {
|
||||
ov::test::utils::InputGenerateData inGenData;
|
||||
inGenData.range = 10;
|
||||
inGenData.start_from = 0;
|
||||
inGenData.resolution = 128;
|
||||
inGenData.seed = 1;
|
||||
auto tensor = ov::test::utils::create_and_fill_tensor(param->get_element_type(), input_shape, inGenData.range,
|
||||
inGenData.start_from, inGenData.resolution, inGenData.seed);
|
||||
inputs.insert({param, tensor});
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t niter = 0;
|
||||
};
|
||||
|
||||
TEST_P(DynamicConditionLayerGPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
run();
|
||||
}
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions_f32 = {
|
||||
InferenceEngine::Precision::FP32
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions_f16 = {
|
||||
InferenceEngine::Precision::FP16
|
||||
};
|
||||
|
||||
const std::vector<ov::test::InputShape> dynamicInputShapes_f32 = {
|
||||
ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{4, 1, 1, 64, 32}, {6, 1, 1, 8, 4}, {8, 1, 1, 24, 16}}),
|
||||
ov::test::InputShape(ov::PartialShape({1, 1, -1, -1}), {{1, 1, 64, 32}, {1, 1, 8, 4}, {1, 1, 24, 16}})
|
||||
};
|
||||
|
||||
const std::vector<ov::test::InputShape> dynamicInputShapes_f16 = {
|
||||
ov::test::InputShape(ov::PartialShape({1, 1, -1, -1}), {{1, 1, 64, 32}, {1, 1, 8, 4}, {1, 1, 24, 16}}),
|
||||
ov::test::InputShape(ov::PartialShape({-1, -1, -1}), {{2, 24, 16}, {2, 64, 32}, {2, 8, 4}})
|
||||
};
|
||||
|
||||
const std::vector<InnerBodyTypeParams> innerBodyTypes_f32 = {
|
||||
{
|
||||
InnerBodyGenerator::InnerBodyType::Type01,
|
||||
InnerBodyGenerator::InnerBodyType::Type02
|
||||
},
|
||||
{
|
||||
InnerBodyGenerator::InnerBodyType::Type02,
|
||||
InnerBodyGenerator::InnerBodyType::Type03
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<InnerBodyTypeParams> innerBodyTypes_f16 = {
|
||||
{
|
||||
InnerBodyGenerator::InnerBodyType::Type04,
|
||||
InnerBodyGenerator::InnerBodyType::Type03
|
||||
},
|
||||
{
|
||||
InnerBodyGenerator::InnerBodyType::Type02,
|
||||
InnerBodyGenerator::InnerBodyType::Type05
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<TestModelGenerator::PredicateTypes> condTypes = {
|
||||
TestModelGenerator::PredicateTypes::PARAM,
|
||||
TestModelGenerator::PredicateTypes::NODE
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f32, DynamicConditionLayerGPUTest,
|
||||
testing::Combine(
|
||||
testing::ValuesIn(dynamicInputShapes_f32), // input shapes
|
||||
testing::ValuesIn(innerBodyTypes_f32), // inner body type
|
||||
testing::ValuesIn(netPrecisions_f32), // network precision
|
||||
testing::ValuesIn(condTypes), // cond type
|
||||
testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)), // device type
|
||||
DynamicConditionLayerGPUTest::getTestCaseName);
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f16, DynamicConditionLayerGPUTest,
|
||||
testing::Combine(
|
||||
testing::ValuesIn(dynamicInputShapes_f16), // input shapes
|
||||
testing::ValuesIn(innerBodyTypes_f16), // inner body type
|
||||
testing::ValuesIn(netPrecisions_f16), // network precision
|
||||
testing::ValuesIn(condTypes), // cond type
|
||||
testing::Values<std::string>(CommonTestUtils::DEVICE_GPU)), // device type
|
||||
DynamicConditionLayerGPUTest::getTestCaseName);
|
||||
} // namespace GPULayerTestsDefinitions
|
@ -18,138 +18,151 @@ using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
namespace {
|
||||
bool is_output_equal(const cldnn::memory::ptr mem, const std::vector<float>& ref)
|
||||
template <class T>
|
||||
bool is_output_equal(const cldnn::memory::ptr mem, const std::vector<T>& ref)
|
||||
{
|
||||
cldnn::mem_lock<float> ptr(mem, get_test_stream());
|
||||
cldnn::mem_lock<T> ptr(mem, get_test_stream());
|
||||
for (size_t i = 0; i < mem->get_layout().count(); i++) {
|
||||
if (!are_equal(ptr[i], ref[i])) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
topology generate_simple_branch (bool branch_true_false, const primitive_id& input_id)
|
||||
topology generate_simple_branch (bool branch_true_false, const primitive_id& id, const primitive_id& input_id, const data_types dt = data_types::f32)
|
||||
{
|
||||
topology branch;
|
||||
if (branch_true_false) {
|
||||
branch.add(
|
||||
pooling(input_id + "_when_true", input_id, cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
|
||||
input_layout(input_id, { dt, format::bfyx,{ 1, 1, 4, 1 } }),
|
||||
pooling(id + "_when_true", input_id, cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 })
|
||||
);
|
||||
} else {
|
||||
branch.add(
|
||||
pooling(input_id + "_when_false", input_id, cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
|
||||
input_layout(input_id, { dt, format::bfyx,{ 1, 1, 4, 1 } }),
|
||||
pooling(id + "_when_false", input_id, cldnn::pooling_mode::average, { 1, 2 }, { 1, 2 })
|
||||
);
|
||||
}
|
||||
return branch;
|
||||
}
|
||||
|
||||
std::pair<std::vector<float>, std::vector<float>> get_values_to_compare(const cldnn::tensor& offset,
|
||||
const cldnn::tensor& range,
|
||||
const std::vector<float>& values,
|
||||
const cldnn::layout& input_lay,
|
||||
const cond_functions& func) {
|
||||
std::vector<float> ret_true;
|
||||
std::vector<float> ret_false;
|
||||
auto mem_desc = generic_test::get_linear_memory_desc(input_lay);
|
||||
for (int32_t b = 0; b < range.batch[0]; b++) {
|
||||
for (int32_t f = 0; f < range.feature[0]; f++) {
|
||||
for (int32_t y = 0; y < range.spatial[1]; y++) {
|
||||
for (int32_t x = 0; x < range.spatial[0]; x++) {
|
||||
auto linear_idx = generic_test::get_linear_index(
|
||||
input_lay,
|
||||
offset.batch[0] + b,
|
||||
offset.feature[0] + f,
|
||||
offset.spatial[1] + y,
|
||||
offset.spatial[0] + x,
|
||||
mem_desc);
|
||||
|
||||
switch (func) {
|
||||
case cond_functions::EQUAL:
|
||||
ret_true.push_back(values.at(linear_idx));
|
||||
ret_false.push_back(-1.0f);
|
||||
break;
|
||||
case cond_functions::GREATER:
|
||||
ret_true.push_back(values.at(linear_idx) - 1.0f);
|
||||
ret_false.push_back(99.0f);
|
||||
break;
|
||||
case cond_functions::LESS:
|
||||
ret_true.push_back(values.at(linear_idx) + 1.0f);
|
||||
ret_false.push_back(-1.0f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return { ret_true, ret_false };
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(DISABLED_condition_gpu, basic_equal_comp) {
|
||||
auto& engine = get_test_engine();
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
template < typename DataType>
|
||||
struct condition_data_types {
|
||||
using type = DataType;
|
||||
static const data_types data_type = type_to_data_type<DataType>::value;
|
||||
};
|
||||
|
||||
topology branch_true = generate_simple_branch(true, "condi");
|
||||
topology branch_false = generate_simple_branch(false, "condi");
|
||||
template <typename ConditionDataType>
|
||||
class condition_gpu_basic_test : public ::testing::Test {
|
||||
public:
|
||||
|
||||
topology topology;
|
||||
topology.add(
|
||||
input_layout("input", input->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("scale_data", scale_mem->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
|
||||
);
|
||||
topology.add(
|
||||
eltwise("output", { input_info("condi"), input_info("scale_data") }, eltwise_mode::prod)
|
||||
);
|
||||
using input_type = typename ConditionDataType::type;
|
||||
std::vector<input_type> convert_data(std::vector<int> in_vec) {
|
||||
const size_t vec_size = in_vec.size();
|
||||
std::vector<input_type> converted_data_vec(vec_size);
|
||||
for (size_t i = 0; i < vec_size; i++) {
|
||||
converted_data_vec[i] = (input_type)in_vec[i];
|
||||
}
|
||||
return converted_data_vec;
|
||||
}
|
||||
|
||||
network net(engine, topology, config);
|
||||
set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f });
|
||||
set_values(scale_mem, { 10.0f });
|
||||
net.set_input_data("input", input);
|
||||
net.set_input_data("scale_data", scale_mem);
|
||||
void run_test() {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
decltype(net.execute()) out;
|
||||
auto dat_dt = ConditionDataType::data_type;
|
||||
|
||||
//WHEN TRUE
|
||||
set_values(compare, { 1.0f });
|
||||
net.set_input_data("compare", compare);
|
||||
out = net.execute();
|
||||
auto out_data_true = out.at("output").get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data_true, {20.0f, 40.0f}));
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ dat_dt, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
auto scale_mem = engine.allocate_memory({ dat_dt, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
|
||||
//WHEN FALSE
|
||||
set_values(compare, { 4.0f });
|
||||
net.set_input_data("compare", compare);
|
||||
out = net.execute();
|
||||
auto out_data_false = out.at("output").get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data_false, { 15.0f, 35.0f }));
|
||||
primitive_id input_id = "input";
|
||||
primitive_id pred_id = "predicate";
|
||||
primitive_id branch_input_id = "branch_input";
|
||||
primitive_id cond_id = "condi";
|
||||
primitive_id scale_data_id = "scale_data";
|
||||
primitive_id output_id = "output";
|
||||
|
||||
condition::branch branch_true;
|
||||
{
|
||||
cldnn::topology branch_true_topology = generate_simple_branch(true, cond_id, branch_input_id, dat_dt);
|
||||
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
|
||||
branch_true.input_map.insert({input_id, branch_input_id});
|
||||
branch_true.output_map.insert({0, "condi_when_true"});
|
||||
}
|
||||
condition::branch branch_false;
|
||||
{
|
||||
cldnn::topology branch_false_topology = generate_simple_branch(false, cond_id, branch_input_id, dat_dt);
|
||||
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
|
||||
branch_false.input_map.insert({input_id, branch_input_id});
|
||||
branch_false.output_map.insert({0, "condi_when_false"});
|
||||
}
|
||||
|
||||
cldnn::topology topology;
|
||||
topology.add(
|
||||
input_layout(input_id, input->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout(pred_id, predicate->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout(scale_data_id, scale_mem->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
|
||||
);
|
||||
topology.add(
|
||||
eltwise(output_id, { input_info(cond_id), input_info(scale_data_id) }, eltwise_mode::prod)
|
||||
);
|
||||
|
||||
network net(engine, topology, config);
|
||||
set_values(input, convert_data({ 1, 2, 3, 4 }));
|
||||
set_values(scale_mem, convert_data({ 10 }));
|
||||
net.set_input_data(input_id, input);
|
||||
net.set_input_data(scale_data_id, scale_mem);
|
||||
|
||||
decltype(net.execute()) out;
|
||||
|
||||
//WHEN TRUE
|
||||
set_values(predicate, { 1 });
|
||||
net.set_input_data(pred_id, predicate);
|
||||
out = net.execute();
|
||||
auto out_data_true = out.at(output_id).get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data_true, convert_data({ 20, 40 })));
|
||||
|
||||
//WHEN FALSE
|
||||
set_values(predicate, { 0 });
|
||||
net.set_input_data(pred_id, predicate);
|
||||
out = net.execute();
|
||||
auto out_data_false = out.at(output_id).get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data_false, convert_data({ 15, 35 })));
|
||||
}
|
||||
};
|
||||
|
||||
using test_data_types = testing::Types<condition_data_types<FLOAT16>,
|
||||
condition_data_types<float>>;
|
||||
|
||||
TYPED_TEST_SUITE(condition_gpu_basic_test, test_data_types);
|
||||
|
||||
TYPED_TEST(condition_gpu_basic_test, simple_basic_test) {
|
||||
this->run_test();
|
||||
}
|
||||
|
||||
TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
|
||||
TEST(condition_gpu, basic_range_equal_comp) {
|
||||
auto& engine = get_test_engine();
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
|
||||
auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
|
||||
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
|
||||
auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
|
||||
topology branch_true = generate_simple_branch(true, "condi");
|
||||
topology branch_false = generate_simple_branch(false, "condi");
|
||||
primitive_id condi_id = "condi";
|
||||
primitive_id branch_input_id = "branch_input";
|
||||
primitive_id concat_id = "concat";
|
||||
|
||||
topology topology;
|
||||
cldnn::topology topology;
|
||||
topology.add(
|
||||
input_layout("input0", input0->get_layout())
|
||||
);
|
||||
@ -157,32 +170,48 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
|
||||
input_layout("input1", input1->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
input_layout("predicate", predicate->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
concatenation("concat", { input_info("input0"), input_info("input1") }, 3)
|
||||
);
|
||||
|
||||
condition::branch branch_true;
|
||||
{
|
||||
cldnn::topology branch_true_topology = generate_simple_branch(true, condi_id, branch_input_id);
|
||||
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
|
||||
branch_true.input_map.insert({concat_id, branch_input_id});
|
||||
branch_true.output_map.insert({0, "condi_when_true"});
|
||||
}
|
||||
condition::branch branch_false;
|
||||
{
|
||||
cldnn::topology branch_false_topology = generate_simple_branch(false, condi_id, branch_input_id);
|
||||
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
|
||||
branch_false.input_map.insert({concat_id, branch_input_id});
|
||||
branch_false.output_map.insert({0, "condi_when_false"});
|
||||
}
|
||||
|
||||
topology.add(
|
||||
condition("condi", input_info("concat"), branch_true, branch_false, "compare", cond_functions::EQUAL)
|
||||
condition("condi", {input_info("predicate"), input_info("concat")}, branch_true, branch_false)
|
||||
);
|
||||
|
||||
std::vector<float> input0_data = {
|
||||
1, 2, 3, 4
|
||||
1, 2
|
||||
};
|
||||
std::vector<float> input1_data = {
|
||||
5, 6, 7, 8
|
||||
3, 4
|
||||
};
|
||||
std::vector<float> compare_data_true = {
|
||||
1, 2, 3
|
||||
std::vector<uint8_t> predicate_data_true = {
|
||||
1
|
||||
};
|
||||
std::vector<float> pooling_when_true_data = {
|
||||
2, 4, 6, 8
|
||||
2, 4
|
||||
};
|
||||
std::vector<float> compare_data_false = {
|
||||
1, 2, 10
|
||||
std::vector<uint8_t> predicate_data_false = {
|
||||
0
|
||||
};
|
||||
std::vector<float> pooling_when_false_data = {
|
||||
1.5, 3.5, 5.5, 7.5
|
||||
1.5, 3.5
|
||||
};
|
||||
|
||||
set_values(input0, input0_data);
|
||||
@ -194,121 +223,23 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
|
||||
decltype(net.execute()) outputs;
|
||||
|
||||
//CHECK TRUE
|
||||
set_values(compare, compare_data_true);
|
||||
net.set_input_data("compare", compare);
|
||||
set_values(predicate, predicate_data_true);
|
||||
net.set_input_data("predicate", predicate);
|
||||
outputs = net.execute();
|
||||
|
||||
auto out_data_true = outputs.at("condi").get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data_true, pooling_when_true_data));
|
||||
|
||||
//CHECK FALSE
|
||||
set_values(compare, compare_data_false);
|
||||
net.set_input_data("compare", compare);
|
||||
set_values(predicate, predicate_data_false);
|
||||
net.set_input_data("predicate", predicate);
|
||||
outputs = net.execute();
|
||||
|
||||
auto out_data_false = outputs.at("condi").get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data_false, pooling_when_false_data));
|
||||
}
|
||||
|
||||
TEST(DISABLED_condition_gpu, generic_test_true_false) {
|
||||
auto& engine = get_test_engine();
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 2, 5, 1 } });
|
||||
std::vector<float> input_data(50);
|
||||
std::iota(input_data.begin(), input_data.end(), 0.0f);
|
||||
|
||||
std::vector<cond_functions> functions = {
|
||||
cond_functions::EQUAL,
|
||||
cond_functions::GREATER,
|
||||
cond_functions::LESS,
|
||||
};
|
||||
|
||||
// ranges, with data when condition is true or false
|
||||
std::vector<cldnn::tensor> ranges = {
|
||||
{1, 1, 1, 1},
|
||||
{1, 1, 3, 1},
|
||||
{2, 1, 1, 1},
|
||||
{2, 1, 1, 1}
|
||||
};
|
||||
|
||||
std::vector<cldnn::tensor> offsets = {
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 1, 0},
|
||||
{ 0, 0, 2, 0},
|
||||
{ 2, 0, 0, 0},
|
||||
{ 2, 1, 1, 0}
|
||||
};
|
||||
|
||||
std::vector<float> pooling_when_true_data = {
|
||||
2, 4, 7, 9, 12, 14, 17,
|
||||
19, 22, 24, 27, 29, 32,
|
||||
34, 37, 39, 42, 44, 47, 49
|
||||
};
|
||||
|
||||
std::vector<float> pooling_when_false_data = {
|
||||
1, 3, 6, 8, 11, 13, 16,
|
||||
18, 21, 23, 26, 28, 31,
|
||||
33, 36, 38, 41, 43, 46, 48
|
||||
};
|
||||
|
||||
for (auto const& func : functions) {
|
||||
for (auto const& range : ranges) {
|
||||
for (auto const& offset : offsets) {
|
||||
auto comp_values = get_values_to_compare(offset, range, input_data, input->get_layout(), func);
|
||||
auto comp_values_true = comp_values.first;
|
||||
auto comp_values_false = comp_values.second;
|
||||
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx, range });
|
||||
|
||||
topology branch_true;
|
||||
topology branch_false;
|
||||
branch_true.add(
|
||||
pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 1, 1, 3, 1 }, { 1, 1, 2, 1 })
|
||||
);
|
||||
branch_false.add(
|
||||
pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::average, { 1, 1, 3, 1 }, { 1, 1, 2, 1 })
|
||||
);
|
||||
|
||||
topology topology;
|
||||
topology.add(
|
||||
input_layout("input", input->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition("condi", input_info("input"), branch_true, branch_false, "compare", func, offset)
|
||||
);
|
||||
|
||||
set_values(input, input_data);
|
||||
network net(engine, topology, config);
|
||||
net.set_input_data("input", input);
|
||||
|
||||
decltype(net.execute()) outputs;
|
||||
|
||||
//CHECK TRUE
|
||||
set_values(compare, comp_values_true);
|
||||
net.set_input_data("compare", compare);
|
||||
outputs = net.execute();
|
||||
|
||||
auto out_data_true = outputs.at("condi").get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data_true, pooling_when_true_data));
|
||||
|
||||
//CHECK FALSE
|
||||
set_values(compare, comp_values_false);
|
||||
net.set_input_data("compare", compare);
|
||||
outputs = net.execute();
|
||||
|
||||
auto out_data_false = outputs.at("condi").get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data_false, pooling_when_false_data));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DISABLED_condition_gpu, basic_stacked_ifs) {
|
||||
TEST(condition_gpu, basic_stacked_ifs) {
|
||||
/*
|
||||
<prims...>
|
||||
<if>
|
||||
@ -324,61 +255,95 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) {
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
|
||||
auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
auto predicate2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
|
||||
topology condi_1_true = generate_simple_branch(true, "condi");
|
||||
topology condi_1_false = generate_simple_branch(false, "condi");
|
||||
primitive_id input_id = "input";
|
||||
primitive_id pred_id = "predicate";
|
||||
primitive_id predicate2_id = "predicate2";
|
||||
primitive_id branch_input_id = "branch_input";
|
||||
primitive_id cond_id = "condi";
|
||||
primitive_id cond2_id = "condi2";
|
||||
primitive_id scale_data_id = "scale_data";
|
||||
primitive_id output_id = "output";
|
||||
|
||||
topology condi_1_true = generate_simple_branch(true, cond_id, branch_input_id);
|
||||
topology condi_1_false = generate_simple_branch(false, cond_id, branch_input_id);
|
||||
topology condi_2_true;
|
||||
condi_2_true.add(
|
||||
activation("activ_when_true", input_info("condi2"), activation_func::log2)
|
||||
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
|
||||
activation("activ_when_true", input_info(branch_input_id), activation_func::log2)
|
||||
);
|
||||
topology condi_2_false;
|
||||
condi_2_false.add(
|
||||
activation("activ_when_false", input_info("condi2"), activation_func::relu)
|
||||
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
|
||||
activation("activ_when_false", input_info(branch_input_id), activation_func::relu)
|
||||
);
|
||||
|
||||
condition::branch branch_condi_1_true;
|
||||
branch_condi_1_true.inner_program = program::build_program(engine, condi_1_true, config, true);
|
||||
branch_condi_1_true.input_map.insert({input_id, branch_input_id});
|
||||
branch_condi_1_true.output_map.insert({0, "condi_when_true"});
|
||||
|
||||
condition::branch branch_condi_1_false;
|
||||
branch_condi_1_false.inner_program = program::build_program(engine, condi_1_false, config, true);
|
||||
branch_condi_1_false.input_map.insert({input_id, branch_input_id});
|
||||
branch_condi_1_false.output_map.insert({0, "condi_when_false"});
|
||||
|
||||
condition::branch branch_condi_2_true;
|
||||
branch_condi_2_true.inner_program = program::build_program(engine, condi_2_true, config, true);
|
||||
branch_condi_2_true.input_map.insert({cond_id, branch_input_id});
|
||||
branch_condi_2_true.output_map.insert({0, "activ_when_true"});
|
||||
|
||||
condition::branch branch_condi_2_false;
|
||||
branch_condi_2_false.inner_program = program::build_program(engine, condi_2_false, config, true);
|
||||
branch_condi_2_false.input_map.insert({cond_id, branch_input_id});
|
||||
branch_condi_2_false.output_map.insert({0, "activ_when_false"});
|
||||
|
||||
topology topology;
|
||||
topology.add(
|
||||
input_layout("input", input->get_layout())
|
||||
input_layout(input_id, input->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
input_layout(pred_id, predicate->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition("condi", input_info("input"), condi_1_true, condi_1_false, "compare", cond_functions::EQUAL)
|
||||
condition(cond_id, { input_info(pred_id), input_info(input_id) }, branch_condi_1_true, branch_condi_1_false)
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare2", compare2->get_layout())
|
||||
input_layout(predicate2_id, predicate2->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition("condi2", input_info("condi"), condi_2_true, condi_2_false, "compare2", cond_functions::GREATER)
|
||||
condition(cond2_id, { input_info(predicate2_id), input_info(cond_id) }, branch_condi_2_true, branch_condi_2_false)
|
||||
);
|
||||
|
||||
std::vector<float> input_data = {
|
||||
1, 2, 3, 4
|
||||
};
|
||||
std::vector<float> compare_data = {
|
||||
std::vector<uint8_t> predicate_data = {
|
||||
1
|
||||
};
|
||||
std::vector<float> compare_2_data = {
|
||||
0.0f, 0.0f
|
||||
std::vector<uint8_t> predicate_2_data = {
|
||||
0
|
||||
};
|
||||
set_values(input, input_data);
|
||||
set_values(compare, compare_data);
|
||||
set_values(compare2, compare_2_data);
|
||||
set_values(predicate, predicate_data);
|
||||
set_values(predicate2, predicate_2_data);
|
||||
|
||||
network net(engine, topology, config);
|
||||
net.set_input_data("input", input);
|
||||
net.set_input_data("compare", compare);
|
||||
net.set_input_data("compare2", compare2);
|
||||
net.set_input_data(input_id, input);
|
||||
net.set_input_data(pred_id, predicate);
|
||||
net.set_input_data(predicate2_id, predicate2);
|
||||
auto outputs = net.execute();
|
||||
|
||||
auto out_data = outputs.at("condi2").get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data, {1.0f, 2.0f}));
|
||||
std::vector<float> ref_data = {
|
||||
2.0f, 4.0f
|
||||
};
|
||||
auto out_data = outputs.at(cond2_id).get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data, ref_data));
|
||||
}
|
||||
|
||||
TEST(DISABLED_condition_gpu, basic_nested_ifs) {
|
||||
TEST(condition_gpu, basic_nested_ifs) {
|
||||
/*
|
||||
<prims...>
|
||||
<if 0>
|
||||
@ -394,191 +359,243 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) {
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
|
||||
auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
auto predicate2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
auto scale_5_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
set_values(scale_5_mem, { 5.0f });
|
||||
auto scale_10_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
set_values(scale_10_mem, { 10.0f });
|
||||
|
||||
topology nested_true;
|
||||
condition::branch nested_true;
|
||||
{
|
||||
nested_true.add(eltwise("scale_5", { input_info("condi_nested"), input_info("scale_5_data") }, eltwise_mode::prod),
|
||||
data("scale_5_data", scale_5_mem));
|
||||
cldnn::topology nested_true_topology;
|
||||
nested_true_topology.add(
|
||||
input_layout("branch_input1", { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
|
||||
data("scale_5_data", scale_5_mem),
|
||||
eltwise("scale_5", { input_info("branch_input1"), input_info("scale_5_data") }, eltwise_mode::prod)
|
||||
);
|
||||
nested_true.inner_program = program::build_program(engine, nested_true_topology, config, true);
|
||||
nested_true.input_map.insert({"pooling_when_true", "branch_input1"});
|
||||
nested_true.output_map.insert({0, "scale_5"});
|
||||
}
|
||||
topology nested_false;
|
||||
condition::branch nested_false;
|
||||
{
|
||||
nested_false.add(eltwise("scale_10", { input_info("condi_nested"), input_info("scale_10_data") }, eltwise_mode::prod),
|
||||
data("scale_10_data", scale_10_mem));
|
||||
cldnn::topology nested_false_topology;
|
||||
nested_false_topology.add(
|
||||
input_layout("branch_input2", { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }),
|
||||
data("scale_10_data", scale_10_mem),
|
||||
eltwise("scale_10", { input_info("branch_input2"), input_info("scale_10_data") }, eltwise_mode::prod)
|
||||
);
|
||||
nested_false.inner_program = program::build_program(engine, nested_false_topology, config, true);
|
||||
nested_false.input_map.insert({"pooling_when_true", "branch_input2"});
|
||||
nested_false.output_map.insert({0, "scale_10"});
|
||||
}
|
||||
|
||||
topology branch_true;
|
||||
branch_true.add(
|
||||
pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
|
||||
);
|
||||
branch_true.add(
|
||||
input_layout("compare2", compare2->get_layout())
|
||||
);
|
||||
condition::branch branch_true;
|
||||
{
|
||||
cldnn::topology branch_true_topology;
|
||||
branch_true_topology.add(
|
||||
input_layout("branch_input3", { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
|
||||
pooling("pooling_when_true", input_info("branch_input3"), cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 }),
|
||||
input_layout("predicate2", predicate2->get_layout()),
|
||||
condition( "condi_nested", {input_info("predicate2"), input_info("pooling_when_true")}, nested_true, nested_false)
|
||||
);
|
||||
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
|
||||
branch_true.input_map.insert({"input", "branch_input3"});
|
||||
branch_true.output_map.insert({0, "condi_nested"});
|
||||
}
|
||||
|
||||
branch_true.add(
|
||||
condition(
|
||||
"condi_nested",
|
||||
input_info("pooling_when_true"),
|
||||
nested_true,
|
||||
nested_false,
|
||||
"compare2",
|
||||
cond_functions::EQUAL)
|
||||
);
|
||||
condition::branch branch_false;
|
||||
{
|
||||
cldnn::topology branch_false_topology;
|
||||
branch_false_topology.add(
|
||||
input_layout("branch_input4", { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
|
||||
pooling("pooling_when_false", input_info("branch_input4"), cldnn::pooling_mode::average, { 1, 2 }, { 1, 2 })
|
||||
);
|
||||
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
|
||||
branch_false.input_map.insert({"input", "branch_input4"});
|
||||
branch_false.output_map.insert({0, "pooling_when_false"});
|
||||
}
|
||||
|
||||
topology branch_false;
|
||||
branch_false.add(
|
||||
pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
|
||||
);
|
||||
|
||||
topology topology;
|
||||
cldnn::topology topology;
|
||||
topology.add(
|
||||
input_layout("input", input->get_layout())
|
||||
);
|
||||
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
input_layout("predicate", predicate->get_layout())
|
||||
);
|
||||
|
||||
topology.add(
|
||||
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
|
||||
condition("condi", {input_info("predicate"), input_info("input")}, branch_true, branch_false)
|
||||
);
|
||||
|
||||
std::vector<float> input_data = {
|
||||
1.0f, 2.0f, 3.0f, 4.0f
|
||||
};
|
||||
std::vector<float> compare_data = {
|
||||
std::vector<float> predicate_data = {
|
||||
1.0f
|
||||
};
|
||||
std::vector<float> compare_2_data = {
|
||||
std::vector<float> predicate_2_data = {
|
||||
2.0f, 4.0f
|
||||
};
|
||||
set_values(input, input_data);
|
||||
set_values(compare, compare_data);
|
||||
set_values(compare2, compare_2_data);
|
||||
set_values(predicate, predicate_data);
|
||||
set_values(predicate2, predicate_2_data);
|
||||
|
||||
network net(engine, topology, config);
|
||||
net.set_input_data("input", input);
|
||||
net.set_input_data("compare", compare);
|
||||
net.set_input_data("compare2", compare2);
|
||||
net.set_input_data("predicate", predicate);
|
||||
net.set_input_data("predicate2", predicate2);
|
||||
auto outputs = net.execute();
|
||||
|
||||
auto out_data = outputs.at("condi").get_memory();
|
||||
ASSERT_TRUE(is_output_equal(out_data, { 10.0f, 20.0f }));
|
||||
ASSERT_TRUE(is_output_equal(out_data, std::vector<float>({ 10.0f, 20.0f })));
|
||||
}
|
||||
|
||||
TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) {
|
||||
TEST(condition_gpu, negative_predicate_wrong_layout) {
|
||||
auto& engine = get_test_engine();
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } });
|
||||
auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } });
|
||||
|
||||
topology branch_true = generate_simple_branch(true, "condi");
|
||||
topology branch_false = generate_simple_branch(false, "condi");
|
||||
primitive_id input_id = "input";
|
||||
primitive_id pred_id = "predicate";
|
||||
primitive_id branch_input_id = "branch_input";
|
||||
primitive_id cond_id = "condi";
|
||||
|
||||
condition::branch branch_true;
|
||||
{
|
||||
cldnn::topology branch_true_topology = generate_simple_branch(true, cond_id, branch_input_id, data_types::f32);
|
||||
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
|
||||
branch_true.input_map.insert({input_id, branch_input_id});
|
||||
branch_true.output_map.insert({0, "condi_when_true"});
|
||||
}
|
||||
condition::branch branch_false;
|
||||
{
|
||||
cldnn::topology branch_false_topology = generate_simple_branch(false, cond_id, branch_input_id, data_types::f32);
|
||||
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
|
||||
branch_false.input_map.insert({input_id, branch_input_id});
|
||||
branch_false.output_map.insert({0, "condi_when_false"});
|
||||
}
|
||||
|
||||
topology topology;
|
||||
topology.add(
|
||||
input_layout("input", input->get_layout())
|
||||
input_layout(input_id, input->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
input_layout(pred_id, predicate->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
|
||||
condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
|
||||
);
|
||||
|
||||
EXPECT_ANY_THROW(network net(engine, topology, config););
|
||||
}
|
||||
|
||||
TEST(DISABLED_condition_gpu, negative_too_big_offset) {
|
||||
TEST(condition_gpu, negative_not_same_layouts) {
|
||||
auto& engine = get_test_engine();
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
|
||||
auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
|
||||
primitive_id input_id = "input";
|
||||
primitive_id pred_id = "predicate";
|
||||
primitive_id branch_input_id = "branch_input";
|
||||
primitive_id cond_id = "condi";
|
||||
|
||||
condition::branch branch_true;
|
||||
{
|
||||
primitive_id pool_id = "pooling_when_true";
|
||||
topology branch_true_topology;
|
||||
branch_true_topology.add(
|
||||
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
|
||||
pooling(pool_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 })
|
||||
);
|
||||
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
|
||||
branch_true.input_map.insert({input_id, branch_input_id});
|
||||
branch_true.output_map.insert({0, pool_id});
|
||||
}
|
||||
|
||||
condition::branch branch_false;
|
||||
{
|
||||
primitive_id pool_id = "pooling_when_false";
|
||||
topology branch_false_topology;
|
||||
branch_false_topology.add(
|
||||
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
|
||||
pooling(pool_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 1, 4 }, { 1, 4 })
|
||||
);
|
||||
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
|
||||
branch_false.input_map.insert({input_id, branch_input_id});
|
||||
branch_false.output_map.insert({0, pool_id});
|
||||
}
|
||||
|
||||
topology branch_true = generate_simple_branch(true, "condi");
|
||||
topology branch_false = generate_simple_branch(false, "condi");
|
||||
|
||||
topology topology;
|
||||
topology.add(
|
||||
input_layout("input", input->get_layout())
|
||||
input_layout(input_id, input->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
input_layout(pred_id, predicate->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL, {1, 1, 2, 1})
|
||||
condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
|
||||
);
|
||||
|
||||
EXPECT_ANY_THROW(network net(engine, topology, config););
|
||||
}
|
||||
|
||||
TEST(DISABLED_condition_gpu, negative_not_same_layouts) {
|
||||
TEST(condition_gpu, negative_same_names_within_different_networks) {
|
||||
auto& engine = get_test_engine();
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
|
||||
topology branch_true;
|
||||
branch_true.add(
|
||||
pooling("pooling_when_true", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
|
||||
);
|
||||
primitive_id input_id = "input";
|
||||
primitive_id pred_id = "predicate";
|
||||
primitive_id branch_input_id = "branch_input";
|
||||
primitive_id cond_id = "condi";
|
||||
primitive_id duplicated_id = "pooling_check_name";
|
||||
|
||||
topology branch_false;
|
||||
branch_false.add(
|
||||
pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 4, 1 }, { 0, 0, 4, 1 })
|
||||
);
|
||||
condition::branch branch_true;
|
||||
{
|
||||
topology branch_true_topology;
|
||||
branch_true_topology.add(
|
||||
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
|
||||
pooling(duplicated_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
|
||||
);
|
||||
branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true);
|
||||
branch_true.input_map.insert({input_id, branch_input_id});
|
||||
branch_true.output_map.insert({0, duplicated_id});
|
||||
}
|
||||
|
||||
condition::branch branch_false;
|
||||
{
|
||||
topology branch_false_topology;
|
||||
branch_false_topology.add(
|
||||
input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }),
|
||||
pooling("pooling_when_false", input_info(branch_input_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
|
||||
);
|
||||
branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true);
|
||||
branch_false.input_map.insert({input_id, branch_input_id});
|
||||
branch_false.output_map.insert({0, "pooling_when_false"});
|
||||
}
|
||||
|
||||
topology topology;
|
||||
topology.add(
|
||||
input_layout("input", input->get_layout())
|
||||
input_layout(input_id, input->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
input_layout(pred_id, predicate->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
|
||||
);
|
||||
|
||||
EXPECT_ANY_THROW(network net(engine, topology, config););
|
||||
}
|
||||
|
||||
TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) {
|
||||
auto& engine = get_test_engine();
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
|
||||
auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
|
||||
|
||||
topology branch_true;
|
||||
branch_true.add(
|
||||
pooling("pooling_check_name", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
|
||||
);
|
||||
|
||||
topology branch_false;
|
||||
branch_false.add(
|
||||
pooling("pooling_when_false", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
|
||||
);
|
||||
|
||||
topology topology;
|
||||
topology.add(
|
||||
input_layout("input", input->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
input_layout("compare", compare->get_layout())
|
||||
);
|
||||
topology.add(
|
||||
condition("condi", input_info("input"), branch_true, branch_false, "compare", cond_functions::EQUAL)
|
||||
);
|
||||
topology.add(
|
||||
pooling("pooling_check_name", input_info("condi"), cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
|
||||
condition(cond_id, {input_info(pred_id), input_info(input_id)}, branch_true, branch_false)
|
||||
);
|
||||
topology.add(
|
||||
pooling(duplicated_id, input_info(cond_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 })
|
||||
);
|
||||
|
||||
EXPECT_ANY_THROW(network net(engine, topology, config););
|
||||
|
Loading…
Reference in New Issue
Block a user