[GPU] Fix acc issue for LSTMSequence w/ -1 seq_length (#21054)
* [GPU] Fix acc issue for LSTMSequence w/ -1 seq_length * add output port for multiple outputs of node * add functional test for lstm_sequence * Fix CI test failures
This commit is contained in:
parent
a720b43041
commit
da2a886477
@ -273,10 +273,6 @@ struct loop : public primitive_base<loop> {
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
ret.push_back(std::ref(num_iteration_id));
|
||||
if (!trip_count_id.empty()) ret.push_back(std::ref(trip_count_id));
|
||||
if (!first_execution_condition_id.empty()) ret.push_back(std::ref(first_execution_condition_id));
|
||||
|
||||
// add external_id in dependencies if not exist
|
||||
for (const auto& mapping : input_primitive_maps) {
|
||||
auto target = std::find_if(input.begin(), input.end(),
|
||||
|
@ -83,7 +83,9 @@ void handle_reshape::run(program& p) {
|
||||
|
||||
for (const auto& node : p.get_processing_order()) {
|
||||
if (node->is_type<reshape>()) {
|
||||
auto& input_node = node->get_dependency(0);
|
||||
const auto& dep = node->get_dependency_with_port(0);
|
||||
auto& input_node = *dep.first;
|
||||
auto& input_port = dep.second;
|
||||
|
||||
if (input_node.is_type<reorder>())
|
||||
continue;
|
||||
@ -162,8 +164,10 @@ void handle_reshape::run(program& p) {
|
||||
if (std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) !=
|
||||
reorder_node_to_split.end()) {
|
||||
auto new_reshape = std::make_shared<reshape>("reorder:_reshape_split_" + user->id() + "_" + node->id(),
|
||||
input_node.id(),
|
||||
cldnn::input_info(input_node.id(), input_port),
|
||||
output_shape);
|
||||
GPU_DEBUG_LOG << "reshape_handler: " << new_reshape->id
|
||||
<< " input_info : " << new_reshape->dependencies().front().to_string() << std::endl;
|
||||
new_reshape->special_zero = prim->special_zero;
|
||||
new_reshape->output_partial_shape = prim->output_partial_shape;
|
||||
new_reshape->output_pattern = prim->output_pattern;
|
||||
@ -192,9 +196,12 @@ void handle_reshape::run(program& p) {
|
||||
auto format = cldnn::format::get_default_format(dims);
|
||||
auto reshape_input = std::make_shared<reorder>(
|
||||
"reorder:_reshape_input_" + reorder_node->id() + "_" + reorder_reshape_node->id(),
|
||||
input_node.id(),
|
||||
cldnn::input_info(input_node.id(), input_port),
|
||||
format,
|
||||
reshape_in_layout.data_type);
|
||||
GPU_DEBUG_LOG << "reshape_handler: " << reshape_input->id
|
||||
<< " input_info : " << reshape_input->dependencies().front().to_string() << std::endl;
|
||||
|
||||
auto& reshape_input_node = p.get_or_create(reshape_input);
|
||||
p.add_intermediate(reshape_input_node,
|
||||
*reorder_reshape_node,
|
||||
@ -214,9 +221,11 @@ void handle_reshape::run(program& p) {
|
||||
// in reshape stage we assume user provides the input vector in bfyx
|
||||
if (!reshape_layout.compatible(target_layout)) {
|
||||
auto reshape_input = std::make_shared<reorder>("reorder:_reshape_input_" + node->id(),
|
||||
input_node.id(),
|
||||
cldnn::input_info(input_node.id(), input_port),
|
||||
target_format,
|
||||
reshape_layout.data_type);
|
||||
GPU_DEBUG_LOG << "reshape_handler: " << reshape_input->id
|
||||
<< " input_info : " << reshape_input->dependencies().front().to_string() << std::endl;
|
||||
auto& reshape_input_node = p.get_or_create(reshape_input);
|
||||
p.add_intermediate(reshape_input_node, *node, 0, reshape_input_node.get_dependencies().empty());
|
||||
reshape_input_node.recalc_output_layout();
|
||||
|
@ -315,7 +315,8 @@ void prepare_primitive_fusing::fuse_bias(program &p) {
|
||||
p.replace(prev_node, new_node);
|
||||
// Insert bias_node into 3-rd position in dependencies vector to get correct order in case of asymmetric quantization
|
||||
// which means that node can have > 2 dependencies even without bias
|
||||
new_node.dependencies.insert(new_node.dependencies.begin() + 2, {&bias_node, 0});
|
||||
auto port_idx = new_node.get_port_from_deps(bias_node.id());
|
||||
new_node.dependencies.insert(new_node.dependencies.begin() + 2, {&bias_node, port_idx});
|
||||
bias_node.users.push_back(&new_node);
|
||||
|
||||
// Remove all edges connected with peer node
|
||||
|
@ -350,8 +350,11 @@ void remove_redundant_reorders::run(program& p) {
|
||||
!user->has_fused_primitives()) {
|
||||
auto l1 = node->get_output_layout();
|
||||
auto l2 = user->get_output_layout();
|
||||
// in multiple outputs, remove redundant reorder is only allowed for same output port idx
|
||||
auto l1_port_idx = node->get_dependency_with_port(0).second;
|
||||
auto l2_port_idx = user->get_dependency_with_port(0).second;
|
||||
|
||||
if (l1.identical(l2))
|
||||
if (l1.identical(l2) && (l1_port_idx == l2_port_idx))
|
||||
r_nodes_to_remove.push_back(user);
|
||||
}
|
||||
}
|
||||
|
@ -118,18 +118,35 @@ struct loop_impl : typed_primitive_impl<loop> {
|
||||
|
||||
auto body_network = instance.get_body_network();
|
||||
int64_t current_iteration_idx = 0;
|
||||
|
||||
auto ev = stream.create_user_event(false);
|
||||
const auto is_dynamic = instance.is_dynamic();
|
||||
|
||||
if (is_dynamic) {
|
||||
instance.update_shape();
|
||||
if (instance.shape_changed()) {
|
||||
instance.preproc_memories_done = false;
|
||||
instance.reset_memory();
|
||||
}
|
||||
}
|
||||
|
||||
body_network->set_shape_predictor(outer_network.get_shape_predictor());
|
||||
OPENVINO_ASSERT(!primitive->num_iteration_id.empty(), "loop operation should have num_iteration_id");
|
||||
|
||||
// shortcut of execution_condition memory in body network
|
||||
memory::ptr body_execution_condition_mem = nullptr;
|
||||
if (!primitive->body_execution_condition_id.empty()) {
|
||||
body_execution_condition_mem = body_network->get_primitive(primitive->body_execution_condition_id)->output_memory_ptr();
|
||||
}
|
||||
|
||||
// shortcut of current_iteration memory in body network
|
||||
if (!primitive->body_current_iteration_id.empty()) {
|
||||
memory::ptr body_current_iteration_mem = body_network->get_primitive(primitive->body_current_iteration_id)->output_memory_ptr();
|
||||
write_scalar_value(body_current_iteration_mem, body_network->get_stream(), 0);
|
||||
}
|
||||
|
||||
auto num_iterations = instance.get_num_iterations();
|
||||
GPU_DEBUG_LOG << "num_iterations : " << num_iterations << std::endl;
|
||||
|
||||
//////////////////////////////////////////
|
||||
// memory pointers for outer network
|
||||
//////////////////////////////////////////
|
||||
// read trip_count from outer network
|
||||
int64_t trip_count = -1;
|
||||
if (!primitive->trip_count_id.empty()) {
|
||||
@ -166,30 +183,6 @@ struct loop_impl : typed_primitive_impl<loop> {
|
||||
return ev;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// memory pointers for body network
|
||||
//////////////////////////////////////////
|
||||
// shortcut of execution_condition memory in body network
|
||||
memory::ptr body_execution_condition_mem = nullptr;
|
||||
if (!primitive->body_execution_condition_id.empty()) {
|
||||
body_execution_condition_mem = body_network->get_primitive(primitive->body_execution_condition_id)->output_memory_ptr();
|
||||
}
|
||||
|
||||
// shortcut of current_iteration memory in body network
|
||||
if (!primitive->body_current_iteration_id.empty()) {
|
||||
memory::ptr body_current_iteration_mem = body_network->get_primitive(primitive->body_current_iteration_id)->output_memory_ptr();
|
||||
write_scalar_value(body_current_iteration_mem, body_network->get_stream(), 0);
|
||||
}
|
||||
|
||||
const auto is_dynamic = instance.is_dynamic();
|
||||
if (is_dynamic) {
|
||||
instance.update_shape();
|
||||
if (instance.shape_changed()) {
|
||||
instance.preproc_memories_done = false;
|
||||
instance.reset_memory();
|
||||
}
|
||||
}
|
||||
|
||||
if (!instance.preproc_memories_done) {
|
||||
instance.preprocess_output_memory(num_iterations);
|
||||
instance.preprocess_input_memory(num_iterations);
|
||||
|
@ -446,6 +446,18 @@ public:
|
||||
void set_preferred_input_fmt(size_t idx, format::type type);
|
||||
void set_preferred_output_fmt(size_t idx, format::type type);
|
||||
|
||||
int32_t get_port_from_deps(primitive_id target_id) const {
|
||||
auto deps = get_primitive()->dependencies();
|
||||
auto iter = std::find_if(deps.begin(), deps.end(), [&](input_info& info) {
|
||||
return target_id == info.pid;
|
||||
});
|
||||
if (iter != deps.end()) {
|
||||
return iter->idx;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
size_t unique_id = 0;
|
||||
static thread_local size_t cur_id;
|
||||
|
@ -655,17 +655,26 @@ void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_itera
|
||||
OPENVINO_ASSERT(internal_mem != nullptr, "internal_mem should not be nullptr");
|
||||
if (!output_allocated) {
|
||||
external_outputs[external_id.idx] = internal_mem;
|
||||
GPU_DEBUG_LOG << "[Internal: " << internal_id.to_string() << ", External: " << external_id.to_string() << " ] "
|
||||
<< "Set internal memory(" << internal_mem << ") to external output because external output memory is nullptr." << std::endl;
|
||||
} else {
|
||||
auto external_mem = _outputs[external_id.idx];
|
||||
if (external_mem != internal_mem) {
|
||||
if (external_mem->get_layout() != internal_mem->get_layout()) {
|
||||
external_outputs[external_id.idx] = internal_mem;
|
||||
GPU_DEBUG_LOG << "[Internal: " << internal_id.to_string() << ", External: " << external_id.to_string() << " ] "
|
||||
<< "Set internal memory(" << internal_mem
|
||||
<< ") to external output for different layout between external_mem and internal_mem." << std::endl;
|
||||
} else {
|
||||
external_mem->copy_from(get_network().get_stream(), *internal_mem);
|
||||
external_outputs[external_id.idx] = external_mem;
|
||||
GPU_DEBUG_LOG << "[Internal: " << internal_id.to_string() << ", External: " << external_id.to_string() << " ] "
|
||||
<< "Copy internal memory data to external memory data." << std::endl;
|
||||
}
|
||||
} else {
|
||||
external_outputs[external_id.idx] = external_mem;
|
||||
GPU_DEBUG_LOG << "[Internal: " << internal_id.to_string() << ", External: " << external_id.to_string() << " ] "
|
||||
<< " Have same memory pointer." << std::endl;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -680,9 +689,17 @@ void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_itera
|
||||
});
|
||||
if (iter != concatenated_output_mem_mappings.end()) {
|
||||
(*iter)->update_concatenated_mem(concat_mem);
|
||||
GPU_DEBUG_LOG << "[Internal: " << internal_id.to_string() << ", External: " << external_id.to_string() << " ]"
|
||||
<< " Update concat_mem" << std::endl;
|
||||
}
|
||||
GPU_DEBUG_IF(iter == concatenated_output_mem_mappings.end()) {
|
||||
GPU_DEBUG_LOG << "[Internal: " << internal_id.to_string() << ", External: " << external_id.to_string() << " ]"
|
||||
<< " Can't find concatenated_memory_mapping" << std::endl;
|
||||
}
|
||||
} else {
|
||||
external_outputs[external_id.idx] = _outputs[external_id.idx];
|
||||
GPU_DEBUG_LOG << "[Internal: " << internal_id.to_string() << ", External: " << external_id.to_string() << " ]"
|
||||
<< " No update concat_mem" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -696,6 +713,7 @@ void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_itera
|
||||
}
|
||||
|
||||
void loop_inst::reset_memory() {
|
||||
GPU_DEBUG_LOG << "Reset memory" << std::endl;
|
||||
backedge_memory_mappings.clear();
|
||||
concatenated_input_mem_mappings.clear();
|
||||
for (auto concat_mem_map : concatenated_output_mem_mappings) {
|
||||
@ -882,7 +900,9 @@ int64_t loop_inst::get_num_iterations() {
|
||||
is_default_num_iter = false;
|
||||
num_iterations = current_num_iterations;
|
||||
}
|
||||
OPENVINO_ASSERT(num_iterations == current_num_iterations,
|
||||
// only check num_terations when shape is not changed.
|
||||
if (preproc_memories_done)
|
||||
OPENVINO_ASSERT(num_iterations == current_num_iterations,
|
||||
"iteration num shuld be same between ", num_iterations, " and ", current_num_iterations);
|
||||
}
|
||||
return num_iterations;
|
||||
@ -928,6 +948,7 @@ std::vector<event::ptr> loop_inst::handle_buffers_for_next_iteration(const loop_
|
||||
if (mapping.from_mem != nullptr) {
|
||||
auto ev = mapping.from_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem));
|
||||
if (ev) event_vec = {ev};
|
||||
GPU_DEBUG_LOG << iter << ") Copy data from inintal_mem(" << mapping.initial_mem << ")" << std::endl;
|
||||
}
|
||||
} else {
|
||||
// In dynamic model, output memory is not defined before execution.
|
||||
@ -936,6 +957,7 @@ std::vector<event::ptr> loop_inst::handle_buffers_for_next_iteration(const loop_
|
||||
mapping.from_mem = mapping.from_primitive->output_memory_ptr();
|
||||
OPENVINO_ASSERT(mapping.from_mem != nullptr, "from_mem should not be null");
|
||||
set_memory_in_body_network(body_network, mapping.to_primitive, mapping.from_mem);
|
||||
GPU_DEBUG_LOG << iter << ") Set memory from from_mem(" << mapping.from_mem << ") to " << mapping.to_primitive->id() << ")" << std::endl;
|
||||
}
|
||||
}
|
||||
} else if (mapping.type == loop_inst::backedge_memory_mapping::SINGLE) {
|
||||
|
@ -38,7 +38,8 @@ void lstm_dynamic_timeloop_node::reverse_optional_outputs_connections() {
|
||||
}));
|
||||
mutable_data_node.users.push_back(this);
|
||||
users.remove(&mutable_data_node);
|
||||
dependencies.insert(dependencies.begin() + index_to_insert, {&mutable_data_node, 0});
|
||||
auto port_idx = get_port_from_deps(mutable_data_node.id());
|
||||
dependencies.insert(dependencies.begin() + index_to_insert, {&mutable_data_node, port_idx});
|
||||
// fix inputs/outputs
|
||||
if (mutable_data_node.get_dependencies().empty()) {
|
||||
myprog.get_inputs().push_back(&mutable_data_node);
|
||||
|
@ -1011,6 +1011,17 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
|
||||
allocate_memory = false;
|
||||
}
|
||||
_mem_allocated = allocate_memory;
|
||||
if (!_mem_allocated && (node.is_dynamic() && _outputs_memory_count > 1)) {
|
||||
auto avaiable_allocate_memory = [&](std::vector<cldnn::layout>& layouts) -> bool {
|
||||
for (auto& l : layouts) {
|
||||
if (l.is_static())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
allocate_memory = _mem_allocated = avaiable_allocate_memory(_impl_params->output_layouts);
|
||||
}
|
||||
|
||||
if (allocate_memory) {
|
||||
// In case when output is mutable_data primitive, and other users dependencies are only used for
|
||||
// synchronization, The output memory of such primitive will be fused with mutable_data
|
||||
@ -1377,23 +1388,28 @@ memory::ptr primitive_inst::allocate_output(engine& _engine,
|
||||
|
||||
std::vector<memory::ptr> primitive_inst::allocate_outputs(kernel_impl_params* updated_params, bool reset_mem, bool runtime_alloc) {
|
||||
std::vector<memory::ptr> outputs;
|
||||
auto impl_params = updated_params != nullptr ? *updated_params : *_impl_params;
|
||||
auto& out_layouts = impl_params.output_layouts;
|
||||
for (size_t i = 0; i < get_node().get_outputs_count() ; ++i) {
|
||||
auto impl_params = updated_params != nullptr ? *updated_params : *_impl_params;
|
||||
auto current_memory_ptr = _outputs.size() > i ? output_memory_ptr(i).get() : nullptr;
|
||||
auto is_output = is_output_buffer(this, runtime_alloc);
|
||||
if (out_layouts[i].is_dynamic() && !out_layouts[i].has_upper_bound()) {
|
||||
outputs.push_back(memory::ptr());
|
||||
} else {
|
||||
auto current_memory_ptr = _outputs.size() > i ? output_memory_ptr(i).get() : nullptr;
|
||||
auto is_output = is_output_buffer(this, runtime_alloc);
|
||||
|
||||
outputs.push_back(allocate_output(_network.get_engine(),
|
||||
_network.get_memory_pool(),
|
||||
*_node,
|
||||
impl_params,
|
||||
_runtime_memory_dependencies,
|
||||
get_network_id(),
|
||||
_network.is_internal(),
|
||||
i,
|
||||
reset_mem,
|
||||
is_output,
|
||||
current_memory_ptr,
|
||||
runtime_alloc));
|
||||
outputs.push_back(allocate_output(_network.get_engine(),
|
||||
_network.get_memory_pool(),
|
||||
*_node,
|
||||
impl_params,
|
||||
_runtime_memory_dependencies,
|
||||
get_network_id(),
|
||||
_network.is_internal(),
|
||||
i,
|
||||
reset_mem,
|
||||
is_output,
|
||||
current_memory_ptr,
|
||||
runtime_alloc));
|
||||
}
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
@ -850,7 +850,8 @@ void program::add_intermediate(program_node& node,
|
||||
|
||||
void program::add_connection(program_node& prev, program_node& next) {
|
||||
prev.users.push_back(&next);
|
||||
next.dependencies.push_back({&prev, 0});
|
||||
auto port_idx = next.get_port_from_deps(prev.id());
|
||||
next.dependencies.push_back({&prev, port_idx});
|
||||
}
|
||||
|
||||
void program::remove_connection(program_node& prev, program_node& next) {
|
||||
@ -1131,7 +1132,9 @@ void program::fuse_nodes(program_node &fused_node,
|
||||
continue;
|
||||
}
|
||||
}
|
||||
fused_node.dependencies.push_back({&dep, 0});
|
||||
|
||||
auto port_idx = fused_node.get_port_from_deps(dep.id());
|
||||
fused_node.dependencies.push_back({&dep, port_idx});
|
||||
local_desc.deps.emplace_back(dep.id(), deps_idx++);
|
||||
dep.users.push_back(&fused_node);
|
||||
}
|
||||
|
@ -368,7 +368,7 @@ bool program_node::recalc_output_layouts(bool invalidate_users_if_changed) {
|
||||
|
||||
bool program_node::is_dynamic() const {
|
||||
for (const auto& input : get_dependencies()) {
|
||||
if (input.first->is_dynamic_output_layout())
|
||||
if (input.first->is_dynamic_output_layout(input.second))
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -381,7 +381,7 @@ bool program_node::is_dynamic() const {
|
||||
|
||||
bool program_node::is_dynamic() {
|
||||
for (auto& input : get_dependencies()) {
|
||||
if (input.first->is_dynamic_output_layout())
|
||||
if (input.first->is_dynamic_output_layout(input.second))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -66,19 +66,23 @@ static void SetLoopInputOutputMap(ProgramBuilder& p,
|
||||
auto& body_input = body_inputs.at(loop_input_desc->m_body_parameter_index);
|
||||
cldnn::primitive_id internal_id = layer_type_name_ID(body_input);
|
||||
|
||||
GPU_DEBUG_LOG << "loop_input_descs[" << layerName << "] = {m_input_index:" << loop_input_desc->m_input_index << "(external_id: "
|
||||
<< external_id << "), m_body_parameter_index:" << loop_input_desc->m_body_parameter_index
|
||||
<< "(internal_id: " << internal_id << ")}" << std::endl;
|
||||
|
||||
// set input mapping
|
||||
if (const auto& sliceInfo =
|
||||
std::dynamic_pointer_cast<ov::op::util::MultiSubGraphOp::SliceInputDescription>(loop_input_desc)) {
|
||||
// sliced input
|
||||
input_primitive_maps.emplace_back(external_id, internal_id, sliceInfo->m_axis,
|
||||
sliceInfo->m_start, sliceInfo->m_end, sliceInfo->m_stride);
|
||||
GPU_DEBUG_LOG << "loop_input_descs[" << layerName << "][SliceInputDescription] = {m_input_index:"
|
||||
<< loop_input_desc->m_input_index << "(external_id: "
|
||||
<< external_id << "), m_body_parameter_index:" << loop_input_desc->m_body_parameter_index
|
||||
<< "(internal_id: " << internal_id << ")}" << std::endl;
|
||||
} else {
|
||||
// input without slicing
|
||||
input_primitive_maps.emplace_back(external_id, internal_id);
|
||||
GPU_DEBUG_LOG << "loop_input_descs[" << layerName << "][InputDescription] = {m_input_index:"
|
||||
<< loop_input_desc->m_input_index << "(external_id: "
|
||||
<< external_id << "), m_body_parameter_index:" << loop_input_desc->m_body_parameter_index
|
||||
<< "(internal_id: " << internal_id << ")}" << std::endl;
|
||||
}
|
||||
|
||||
// set back edges
|
||||
@ -92,6 +96,7 @@ static void SetLoopInputOutputMap(ProgramBuilder& p,
|
||||
cldnn::primitive_id from_id = layer_type_name_ID(from);
|
||||
|
||||
back_edges_maps.emplace_back(from_id, to_id);
|
||||
GPU_DEBUG_LOG << "back_edge = {" << from_id << " => " << to_id << "}" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -279,7 +284,7 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr<ov::op::
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
|
||||
|
||||
// get body program from ov::Model
|
||||
ProgramBuilder prog(ov_model, p.get_engine(), config, false, false, p.get_task_executor(), p.get_compilation_context(), true);
|
||||
ProgramBuilder prog(ov_model, p.get_engine(), config, false, false, p.get_task_executor(), p.get_compilation_context(), true);
|
||||
auto body_program = prog.get_compiled_program();
|
||||
|
||||
GPU_DEBUG_LOG << "* trip_count_id : " << trip_count_id << std::endl;
|
||||
|
@ -169,7 +169,7 @@ static std::shared_ptr<ov::Model> makeLSTMSequence(ov::element::Type_t ngPRC, ov
|
||||
|
||||
enum class LSTMType {
|
||||
LSTMCell = 0,
|
||||
LSTMSequence = 1 // will be updated at next step.
|
||||
LSTMSequence = 1
|
||||
};
|
||||
|
||||
using DynamicTensorIteratorParams = typename std::tuple<
|
||||
@ -288,6 +288,10 @@ TEST_P(DynamicTensorIteratorTest, CompareWithRefs) {
|
||||
run();
|
||||
}
|
||||
|
||||
std::vector<LSTMType> lstm_types = {
|
||||
LSTMType::LSTMCell, LSTMType::LSTMSequence
|
||||
};
|
||||
|
||||
std::vector<InputShape> input_shapes = {
|
||||
InputShape(ov::PartialShape({1, -1, 512}), {{1, 30, 512}, {1, 10, 512}, {1, 5, 512}})
|
||||
};
|
||||
@ -319,4 +323,15 @@ INSTANTIATE_TEST_SUITE_P(smoke_DynamicTensorIterator_LSTMCell, DynamicTensorIter
|
||||
/* data_prc */ testing::ValuesIn(net_precision),
|
||||
/* configuration */ testing::Values<ov::AnyMap>(net_configuration)),
|
||||
DynamicTensorIteratorTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_DynamicTensorIterator_LSTMSequence, DynamicTensorIteratorTest,
|
||||
testing::Combine(
|
||||
/* lstm_type */ testing::ValuesIn({LSTMType::LSTMSequence}),
|
||||
/* data_shape */ testing::ValuesIn(input_shapes),
|
||||
/* hidden_size */ testing::ValuesIn(hidden_sizes),
|
||||
/* direction */ testing::ValuesIn(reccurent_sequence_direction),
|
||||
/* device */ testing::Values<std::string>(ov::test::utils::DEVICE_GPU),
|
||||
/* data_prc */ testing::ValuesIn(net_precision),
|
||||
/* configuration */ testing::Values<ov::AnyMap>(net_configuration)),
|
||||
DynamicTensorIteratorTest::getTestCaseName);
|
||||
} // namespace GPULayerTestsDefinitions
|
||||
|
@ -111,7 +111,8 @@ TEST_P(permute_eltwise_loop, basic) {
|
||||
data("trip_count", trip_count_mem),
|
||||
data("initial_condition", initial_condition_mem),
|
||||
mutable_data("num_iteration", num_iteration_mem),
|
||||
loop("loop", { input_info("num_iteration"), input_info("eltwise"), input_info("loop_eltwise_init_values") }, body_program,
|
||||
loop("loop", { input_info("num_iteration"), input_info("trip_count"), input_info("initial_condition"),
|
||||
input_info("eltwise"), input_info("loop_eltwise_init_values") }, body_program,
|
||||
"trip_count", "initial_condition", "num_iteration",
|
||||
input_primitive_maps, output_primitive_maps, back_edges, p.loop_trip_count),
|
||||
reorder("output", input_info("loop"), format::bfyx, p.default_type)
|
||||
|
@ -99,7 +99,7 @@ void test_loop_gpu_basic_no_concat(bool is_caching_test)
|
||||
input_layout("trip_count", trip_count_mem->get_layout()),
|
||||
input_layout("initial_condition", initial_condition_mem->get_layout()),
|
||||
mutable_data("num_iteration", num_iteration_mem),
|
||||
loop("loop", { input_info("num_iteration"), input_info("input") }, body_program,
|
||||
loop("loop", { input_info("num_iteration"), input_info("trip_count"), input_info("initial_condition"), input_info("input") }, body_program,
|
||||
"trip_count", "initial_condition", "num_iteration",
|
||||
input_primitive_maps, output_primitive_maps, back_edges, 8)
|
||||
);
|
||||
@ -201,7 +201,7 @@ void test_loop_gpu_basic_concat(bool is_caching_test)
|
||||
input_layout("trip_count", trip_count_mem->get_layout()),
|
||||
input_layout("initial_condition", initial_condition_mem->get_layout()),
|
||||
mutable_data("num_iteration", num_iteration_mem),
|
||||
loop("loop", { input_info("num_iteration"), input_info("input") }, body_program,
|
||||
loop("loop", { input_info("num_iteration"), input_info("trip_count"), input_info("initial_condition"), input_info("input") }, body_program,
|
||||
"trip_count", "initial_condition", "num_iteration",
|
||||
input_primitive_maps, output_primitive_maps, back_edges, trip_count)
|
||||
);
|
||||
@ -316,7 +316,7 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test)
|
||||
input_layout("trip_count", inner_trip_count_mem->get_layout()),
|
||||
input_layout("initial_condition", inner_initial_condition_mem->get_layout()),
|
||||
mutable_data("inner_num_iteration", inner_num_iteration_mem),
|
||||
loop("inner_loop", { input_info("inner_num_iteration"), input_info("inner_input"), input_info("trip_count"), input_info("initial_condition") },
|
||||
loop("inner_loop", { input_info("inner_num_iteration"), input_info("trip_count"), input_info("initial_condition"), input_info("inner_input") },
|
||||
inner_body_program, "trip_count", "initial_condition", "inner_num_iteration",
|
||||
inner_input_primitive_maps, inner_output_primitive_maps, inner_back_edges, inner_trip_count)
|
||||
);
|
||||
@ -342,9 +342,10 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test)
|
||||
mutable_data("num_iteration", num_iteration_mem),
|
||||
input_layout("inner_trip_count", inner_trip_count_mem->get_layout()),
|
||||
input_layout("inner_initial_condition", inner_initial_condition_mem->get_layout()),
|
||||
loop("loop", { input_info("num_iteration"), input_info("input"), input_info("inner_trip_count"), input_info("inner_initial_condition") },
|
||||
outer_body_program, "trip_count", "initial_condition", "num_iteration",
|
||||
outer_input_primitive_maps, outer_output_primitive_maps, outer_back_edges, outer_trip_count)
|
||||
loop("loop", { input_info("num_iteration"), input_info("trip_count"), input_info("initial_condition"),
|
||||
input_info("input"), input_info("inner_trip_count"), input_info("inner_initial_condition") },
|
||||
outer_body_program, "trip_count", "initial_condition", "num_iteration",
|
||||
outer_input_primitive_maps, outer_output_primitive_maps, outer_back_edges, outer_trip_count)
|
||||
);
|
||||
|
||||
/////////////////////////////////
|
||||
@ -498,7 +499,7 @@ static void test_loop_gpu_wo_trip_count(bool is_caching_test) {
|
||||
input_layout("input", e_input_layout),
|
||||
input_layout(initial_condition_id, e_initial_condition_mem->get_layout()),
|
||||
mutable_data(actual_iteration_count_id, e_num_iteration_mem),
|
||||
loop("loop", { input_info(actual_iteration_count_id), input_info("input") }, body_program,
|
||||
loop("loop", { input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input") }, body_program,
|
||||
trip_count_id, initial_condition_id, actual_iteration_count_id,
|
||||
input_primitive_maps, output_primitive_maps, back_edges,
|
||||
num_iterations, body_current_iteration_id, body_execution_condition_id, 2),
|
||||
|
Loading…
Reference in New Issue
Block a user