[GPU] Stateful LLMs initial support (#21235)
This commit is contained in:
parent
76b1861f86
commit
7ab79be0f6
@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/subgraph/stateful_model.hpp"
|
||||
|
||||
using namespace ov::test;
|
||||
|
||||
namespace {
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, StaticShapeStatefulModel, ::testing::Values(ov::test::utils::DEVICE_CPU));
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, StaticShapeTwoStatesModel, ::testing::Values(ov::test::utils::DEVICE_CPU));
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, DynamicShapeStatefulModelDefault, ::testing::Values(ov::test::utils::DEVICE_CPU));
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, DynamicShapeStatefulModelParam, ::testing::Values(ov::test::utils::DEVICE_CPU));
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, DynamicShapeStatefulModelStateAsInp, ::testing::Values(ov::test::utils::DEVICE_CPU));
|
||||
|
||||
} // namespace
|
@ -53,6 +53,7 @@ struct kernel_impl_params {
|
||||
optional_layout weights_zero_points_layout = optional_layout();
|
||||
optional_layout activations_zero_points_layout = optional_layout();
|
||||
optional_layout compensation_layout = optional_layout();
|
||||
optional_layout state_layout = optional_layout();
|
||||
|
||||
std::map<size_t, memory::ptr> memory_deps = {};
|
||||
size_t primary_input_idx = 0;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "intel_gpu/runtime/stream.hpp"
|
||||
#include "intel_gpu/runtime/lru_cache.hpp"
|
||||
#include "intel_gpu/runtime/shape_predictor.hpp"
|
||||
#include "intel_gpu/plugin/variable_state.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
@ -66,20 +67,6 @@ struct network {
|
||||
public:
|
||||
using ptr = std::shared_ptr<network>;
|
||||
|
||||
struct VariableState {
|
||||
using Ptr = std::shared_ptr<VariableState>;
|
||||
|
||||
VariableState(cldnn::memory_ptr mem = nullptr) :
|
||||
memory { std::move(mem) }, is_set { false } {
|
||||
}
|
||||
void set_memory(cldnn::memory_ptr new_mem) {
|
||||
memory = new_mem;
|
||||
}
|
||||
cldnn::memory_ptr memory;
|
||||
bool is_set;
|
||||
};
|
||||
using variables_states_map = std::map<std::string, VariableState::Ptr>;
|
||||
|
||||
explicit network(program::ptr program, const ExecutionConfig& config, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true);
|
||||
network(engine& engine,
|
||||
const topology& topo,
|
||||
@ -232,19 +219,13 @@ public:
|
||||
return *_memory_pool;
|
||||
}
|
||||
|
||||
void allocate_variables_memories();
|
||||
void assign_variables_memories();
|
||||
/// Assigns memory state locations
|
||||
void assign_variables_memories(variables_states_map &&variables_memories);
|
||||
void update_variable_memory(const std::string& variable_id, const cldnn::layout& layout);
|
||||
void set_variable(const std::string& name, const std::shared_ptr<ov::intel_gpu::VariableState>& variable);
|
||||
bool has_variable(const std::string &variable_id) const;
|
||||
ov::intel_gpu::VariableState& get_variable(const std::string &variable_id) const;
|
||||
const ov::intel_gpu::VariableStateInfo& get_variable_info(const std::string &variable_id) const;
|
||||
const ov::intel_gpu::VariablesMap& get_variables() const;
|
||||
const ov::intel_gpu::VariablesInfoMap& get_variables_info() const;
|
||||
|
||||
/// Returns memory state @p variable_id of stateful network
|
||||
VariableState& get_variable_memory(const std::string &variable_id);
|
||||
const variables_states_map& get_variable_memories() const { return _variables_states; }
|
||||
|
||||
using variables_state_info_map = std::map<std::string, cldnn::layout>;
|
||||
void set_variables_state_info(const std::string& variable_id, const cldnn::layout& layout);
|
||||
const variables_state_info_map& get_variables_state_info() const;
|
||||
const ExecutionConfig& get_config() const { return _config; }
|
||||
|
||||
std::shared_ptr<ShapePredictor> get_shape_predictor() { return _shape_predictor; }
|
||||
@ -276,9 +257,10 @@ private:
|
||||
std::vector<std::shared_ptr<primitive_inst>> _outputs;
|
||||
std::list<std::shared_ptr<primitive_inst>> _exec_order;
|
||||
std::list<std::shared_ptr<primitive_inst>> _data_outputs;
|
||||
variables_states_map _variables_states;
|
||||
std::vector<std::shared_ptr<primitive_inst>> _variable_state_primitives;
|
||||
variables_state_info_map _variables_state_info;
|
||||
|
||||
ov::intel_gpu::VariablesMap _variables_states;
|
||||
ov::intel_gpu::VariablesInfoMap _variables_state_info;
|
||||
|
||||
program::primitives_info _prims_info;
|
||||
std::map<primitive_id, primitive_id> _ext_id_mapping;
|
||||
size_t _weights_cache_capacity = 1;
|
||||
@ -300,6 +282,7 @@ private:
|
||||
void add_default_output_chains();
|
||||
void calculate_weights_cache_capacity();
|
||||
output_chains_map::iterator add_output_chain(std::shared_ptr<primitive_inst>& p_inst);
|
||||
void set_variables_state_info(const std::string& variable_id, const layout& variable_layout);
|
||||
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
int64_t iteration = 0;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <ostream>
|
||||
#include <tuple>
|
||||
#include "intel_gpu/runtime/layout.hpp"
|
||||
#include "intel_gpu/runtime/shape_predictor.hpp"
|
||||
#include "openvino/core/layout.hpp"
|
||||
#include "openvino/core/type/element_type.hpp"
|
||||
|
||||
@ -69,6 +70,27 @@ inline ov::element::Type convert_to_supported_device_type(ov::element::Type et)
|
||||
}
|
||||
}
|
||||
|
||||
inline ov::Shape get_tensor_shape(const ov::PartialShape& pshape) {
|
||||
ov::Shape res(pshape.size());
|
||||
for (size_t i = 0; i < pshape.size(); i++) {
|
||||
res[i] = pshape[i].is_dynamic() ? 0 : pshape[i].get_length();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
inline ov::Shape predict_shape(const std::string& name, const ov::Shape current_shape, ov::element::Type element_type, cldnn::ShapePredictor& shape_predictor) {
|
||||
auto prealloc_info = shape_predictor.predict_preallocation_shape(name, current_shape, element_type.bitwidth(), false);
|
||||
const auto& preallocation_shape = prealloc_info.second;
|
||||
auto can_preallocate_buffer = prealloc_info.first &&
|
||||
shape_predictor.can_preallocate(cldnn::ceil_div(ov::shape_size(preallocation_shape) * element_type.bitwidth(), 8));
|
||||
if (can_preallocate_buffer) {
|
||||
return preallocation_shape;
|
||||
}
|
||||
|
||||
return current_shape;
|
||||
}
|
||||
|
||||
/// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES.
|
||||
inline void ForceExit() {
|
||||
std::cerr << "[GPU] force exit.\n"
|
||||
|
@ -128,13 +128,6 @@ public:
|
||||
|
||||
void add_primitive(const ov::Node& op, std::shared_ptr<cldnn::primitive> prim, std::vector<std::string> aliases = {});
|
||||
|
||||
|
||||
using variables_state_info_map = std::map<std::string, std::set<cldnn::layout>>;
|
||||
|
||||
void AddVariableStateInfo(const std::string& variable_id, const cldnn::layout& layout);
|
||||
|
||||
const variables_state_info_map& GetVariablesStatesInfo() const { return m_variablesStateInfo; }
|
||||
|
||||
bool use_new_shape_infer() const { return allow_new_shape_infer; }
|
||||
bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) const;
|
||||
bool is_inner_program() const { return m_is_inner_program; }
|
||||
@ -150,7 +143,6 @@ private:
|
||||
static std::mutex m_mutex;
|
||||
|
||||
std::shared_ptr<cldnn::topology> m_topology;
|
||||
variables_state_info_map m_variablesStateInfo;
|
||||
CustomLayerMap m_custom_layers;
|
||||
|
||||
bool allow_new_shape_infer = false;
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "intel_gpu/plugin/variable_state.hpp"
|
||||
#include "openvino/runtime/isync_infer_request.hpp"
|
||||
#include "intel_gpu/plugin/graph.hpp"
|
||||
#include "intel_gpu/plugin/remote_tensor.hpp"
|
||||
@ -79,6 +80,7 @@ private:
|
||||
std::unordered_map<std::string, std::string> m_output_names_map;
|
||||
|
||||
std::map<cldnn::primitive_id, cldnn::network_output> m_internal_outputs;
|
||||
VariablesMap m_variables;
|
||||
|
||||
std::shared_ptr<Graph> m_graph;
|
||||
RemoteContextImpl::Ptr m_context = nullptr;
|
||||
@ -87,6 +89,7 @@ private:
|
||||
bool m_enable_profiling = false;
|
||||
bool m_use_external_queue = false;
|
||||
|
||||
void prepare_state(const std::string& name, const VariableState::Ptr variable);
|
||||
std::vector<cldnn::event::ptr> prepare_input(const std::string& name, const ov::Output<const ov::Node>& port, const TensorWrapper& user_tensor_wrapper);
|
||||
std::vector<cldnn::event::ptr> prepare_output(const std::string& name, const ov::Output<const ov::Node>& port, const TensorWrapper& user_tensor_wrapper);
|
||||
std::vector<cldnn::event::ptr> prepare_batched_input(const std::string& name,
|
||||
|
@ -4,24 +4,51 @@
|
||||
#pragma once
|
||||
|
||||
#include "openvino/runtime/ivariable_state.hpp"
|
||||
#include "intel_gpu/plugin/graph.hpp"
|
||||
#include "intel_gpu/runtime/layout.hpp"
|
||||
#include "intel_gpu/runtime/shape_predictor.hpp"
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
class RemoteContextImpl;
|
||||
|
||||
struct VariableStateInfo {
|
||||
VariableStateInfo(const std::string& id, const cldnn::layout& layout) : m_id(id), m_layout(layout) {}
|
||||
|
||||
std::string m_id;
|
||||
cldnn::layout m_layout;
|
||||
};
|
||||
|
||||
class VariableState : public ov::IVariableState {
|
||||
public:
|
||||
VariableState(const std::string& name, cldnn::network::VariableState::Ptr states, cldnn::engine& engine);
|
||||
VariableState(const VariableStateInfo& info, std::shared_ptr<RemoteContextImpl> context, std::shared_ptr<cldnn::ShapePredictor> shape_predictor);
|
||||
using Ptr = std::shared_ptr<VariableState>;
|
||||
|
||||
void reset() override;
|
||||
void set_state(const ov::SoPtr<ov::ITensor>& state) override;
|
||||
ov::SoPtr<ov::ITensor> get_state() const override;
|
||||
|
||||
cldnn::memory::ptr get_memory() const;
|
||||
const cldnn::layout& get_layout() const;
|
||||
bool is_set() const;
|
||||
void set();
|
||||
void set_layout(const cldnn::layout& new_layout);
|
||||
|
||||
private:
|
||||
cldnn::network::VariableState::Ptr m_variable_state;
|
||||
cldnn::engine& m_engine;
|
||||
cldnn::layout m_layout;
|
||||
std::shared_ptr<RemoteContextImpl> m_context;
|
||||
std::shared_ptr<cldnn::ShapePredictor> m_shape_predictor;
|
||||
bool m_is_set = false;
|
||||
cldnn::memory::ptr m_memory = nullptr;
|
||||
size_t actual_size = 0;
|
||||
|
||||
void update_device_buffer();
|
||||
};
|
||||
|
||||
using VariablesMap = std::unordered_map<std::string, VariableState::Ptr>;
|
||||
using VariablesInfoMap = std::unordered_map<std::string, VariableStateInfo>;
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace ov
|
||||
|
@ -43,4 +43,8 @@ void assign_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
ib >> variable_id;
|
||||
set_variable_id(variable_id);
|
||||
}
|
||||
|
||||
void assign_inst::on_execute() {
|
||||
_outputs[0] = input_memory_ptr(0);
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "concatenation_inst.h"
|
||||
#include "crop_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
#include "read_value_inst.h"
|
||||
#include "reshape_inst.h"
|
||||
#include "depth_to_space_inst.h"
|
||||
#include "resample_inst.h"
|
||||
@ -432,6 +433,9 @@ void prepare_buffer_fusing::run(program& p) {
|
||||
bool is_dynamic = node->is_dynamic();
|
||||
bool is_planar = format::is_default_format(node->get_output_layout().format);
|
||||
bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layout(0).data_padding;
|
||||
if (node->is_type<read_value>())
|
||||
return true;
|
||||
|
||||
if (node->is_type<reshape>() && is_dynamic && is_planar && no_pad && !node->is_output() && !node->has_fused_primitives()) {
|
||||
return true;
|
||||
}
|
||||
@ -602,5 +606,36 @@ void prepare_buffer_fusing::run(program& p) {
|
||||
|
||||
node.can_be_optimized(can_reshape_be_optimized(node));
|
||||
});
|
||||
program_helpers::do_for_types<read_value>(*node, [](read_value_node& node) {
|
||||
// Current implementation allows to avoid copy on read_value primitive
|
||||
// only in cases when it has single user
|
||||
// Otherwise we may face an issue with exeuction of read_value users and assign to the same variable
|
||||
// Graph below is an example of unsupported case
|
||||
// ┌────────┐ ┌───────┐
|
||||
// │ Param1 │ │ Const │
|
||||
// └───┬────┘ └───┬───┘
|
||||
// │ │
|
||||
// │ ┌────┴──────┐
|
||||
// .......│.........│ ReadValue │
|
||||
// . │ └────┬─────┬┘
|
||||
// . │ │ │
|
||||
// . │ ┌─────┐ │ │
|
||||
// . └───┤ Add ├────┘ │
|
||||
// . └──┬──┘ │
|
||||
// . │ │
|
||||
// . │ │
|
||||
// . ┌────────┐ │ ┌─────┐ │
|
||||
// ..│ Assign ├──┴────┤ Add ├──┘
|
||||
// └────────┘ └──┬──┘
|
||||
// │
|
||||
// │
|
||||
// ┌────┴──────┐
|
||||
// │ Result │
|
||||
// └───────────┘
|
||||
// If read_value here returns virable memory w/o copy, then based on Add-s and Assign execution order we may have different results
|
||||
// TODO: Allow optimizations for the case above too. Looks like it can be achieved by more careful
|
||||
// topological sort (i.e. if we ensure that all read_value users are completed before assign is run)
|
||||
node.can_be_optimized(node.get_users().size() == 1);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -43,11 +43,11 @@ struct assign_impl : public typed_primitive_impl<assign> {
|
||||
}
|
||||
|
||||
event::ptr execute_impl(const std::vector<event::ptr>& events, assign_inst& instance) override {
|
||||
auto& variable = instance.get_network().get_variable_memory(variable_id);
|
||||
auto& variable = instance.get_network().get_variable(variable_id);
|
||||
|
||||
if (variable.memory->get_layout() != instance.get_output_layout()) {
|
||||
CLDNN_ERROR_MESSAGE(instance.id(), "Layout mismatch");
|
||||
}
|
||||
OPENVINO_ASSERT(variable.get_layout() == instance.get_output_layout(),
|
||||
"[GPU] Layout mismatch: variable layout: ", variable.get_layout().to_short_string(),
|
||||
" assign output layout: ", instance.get_output_layout().to_short_string());
|
||||
|
||||
auto& stream = instance.get_network().get_stream();
|
||||
|
||||
@ -55,13 +55,13 @@ struct assign_impl : public typed_primitive_impl<assign> {
|
||||
e->wait();
|
||||
}
|
||||
|
||||
const auto ev_set_memory = variable.memory->copy_from(stream, instance.input_memory());
|
||||
variable.is_set = true;
|
||||
const auto ev_set_memory = variable.get_memory()->copy_from(stream, instance.input_memory());
|
||||
variable.set();
|
||||
|
||||
return ev_set_memory;
|
||||
}
|
||||
|
||||
void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
|
||||
void init_kernels(const kernels_cache&, const kernel_impl_params&) override {}
|
||||
|
||||
public:
|
||||
static std::unique_ptr<primitive_impl> create(const assign_node& arg, const kernel_impl_params& impl_param) {
|
||||
@ -73,7 +73,8 @@ public:
|
||||
namespace detail {
|
||||
|
||||
attach_assign_impl::attach_assign_impl() {
|
||||
implementation_map<assign>::add(impl_types::cpu, assign_impl::create, {});
|
||||
implementation_map<assign>::add(impl_types::cpu, shape_types::dynamic_shape, assign_impl::create, {});
|
||||
implementation_map<assign>::add(impl_types::cpu, shape_types::static_shape, assign_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
@ -47,16 +47,23 @@ struct read_value_impl : public typed_primitive_impl<read_value> {
|
||||
e->wait();
|
||||
}
|
||||
|
||||
auto& variable = instance.get_network().get_variable_memory(variable_id);
|
||||
auto& variable = instance.get_network().get_variable(variable_id);
|
||||
auto &stream = instance.get_network().get_stream();
|
||||
|
||||
if (variable.memory->get_layout() != instance.get_output_layout()) {
|
||||
CLDNN_ERROR_MESSAGE(instance.id(), "Layout mismatch");
|
||||
OPENVINO_ASSERT(variable.get_layout() == instance.get_output_layout(),
|
||||
"[GPU] Layout mismatch: variable layout: ", variable.get_layout().to_short_string(),
|
||||
" read_value output layout: ", instance.get_output_layout().to_short_string());
|
||||
|
||||
if (!variable.is_set()) {
|
||||
if (instance.get_impl_params()->input_layouts.size() > 0) {
|
||||
variable.get_memory()->copy_from(stream, instance.dep_memory(0), true);
|
||||
} else {
|
||||
variable.get_memory()->fill(stream, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!variable.is_set) {
|
||||
auto &stream = instance.get_network().get_stream();
|
||||
const auto ev_set_output = instance.output_memory().fill(stream, 0);
|
||||
return ev_set_output;
|
||||
if (!instance.can_be_optimized()) {
|
||||
return instance.output_memory(0).copy_from(stream, *variable.get_memory(), false);
|
||||
}
|
||||
|
||||
return instance.get_network().get_stream().create_user_event(true);
|
||||
@ -73,7 +80,8 @@ public:
|
||||
namespace detail {
|
||||
|
||||
attach_read_value_impl::attach_read_value_impl() {
|
||||
implementation_map<read_value>::add(impl_types::cpu, read_value_impl::create, {});
|
||||
implementation_map<read_value>::add(impl_types::cpu, shape_types::dynamic_shape, read_value_impl::create, {});
|
||||
implementation_map<read_value>::add(impl_types::cpu, shape_types::static_shape, read_value_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
@ -23,6 +23,19 @@ private:
|
||||
|
||||
} // namespace memory_state
|
||||
|
||||
template <>
|
||||
struct typed_program_node<assign> : public typed_program_node_base<assign> {
|
||||
private:
|
||||
using parent = typed_program_node_base<assign>;
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input() const { return get_dependency(0); }
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
using assign_node = typed_program_node<assign>;
|
||||
|
||||
template<>
|
||||
@ -44,6 +57,8 @@ public:
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const override;
|
||||
void load(cldnn::BinaryInputBuffer& ib) override;
|
||||
|
||||
void on_execute() override;
|
||||
};
|
||||
|
||||
using assign_inst = typed_primitive_inst<assign>;
|
||||
|
@ -10,6 +10,19 @@
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template <>
|
||||
struct typed_program_node<read_value> : public typed_program_node_base<read_value> {
|
||||
private:
|
||||
using parent = typed_program_node_base<read_value>;
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input() const { return get_dependency(0); }
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
using read_value_node = typed_program_node<read_value>;
|
||||
|
||||
template<>
|
||||
@ -19,7 +32,13 @@ class typed_primitive_inst<read_value> : public typed_primitive_inst_base<read_v
|
||||
public:
|
||||
template<typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(read_value_node const& /*node*/, const kernel_impl_params& impl_param) {
|
||||
return forward_input0_shape<ShapeType>(impl_param);
|
||||
auto desc = impl_param.typed_desc<read_value>();
|
||||
const auto default_layout = desc->output_layout;
|
||||
auto out_layout = impl_param.state_layout.value_or(default_layout);
|
||||
if (out_layout.is_dynamic() && desc->input_size() > 0) {
|
||||
out_layout = impl_param.get_input_layout(0);
|
||||
}
|
||||
return { out_layout };
|
||||
}
|
||||
|
||||
static layout calc_output_layout(const read_value_node& node, kernel_impl_params const& impl_param);
|
||||
@ -31,6 +50,11 @@ public:
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const override;
|
||||
void load(cldnn::BinaryInputBuffer& ib) override;
|
||||
|
||||
void update_output_memory() override;
|
||||
|
||||
protected:
|
||||
void on_execute() override;
|
||||
};
|
||||
|
||||
using read_value_inst = typed_primitive_inst<read_value>;
|
||||
|
@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/plugin/variable_state.hpp"
|
||||
#include "intel_gpu/primitives/read_value.hpp"
|
||||
#include "openvino/util/file_util.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/data.hpp"
|
||||
@ -431,6 +433,9 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
|
||||
if (p_inst->type() == cldnn::data::type_id())
|
||||
_data_outputs.push_back(p_inst);
|
||||
}
|
||||
if (auto state_prim = std::dynamic_pointer_cast<memory_state::variable>(p_inst)) {
|
||||
set_variables_state_info(state_prim->variable_id(), p_inst->get_output_layout(0));
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& p_inst : _exec_order) {
|
||||
@ -474,14 +479,6 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
|
||||
}
|
||||
}
|
||||
|
||||
size_t num_variable_state_primitives;
|
||||
ib >> num_variable_state_primitives;
|
||||
for (size_t i = 0; i < num_variable_state_primitives; i++) {
|
||||
primitive_id p_inst_id;
|
||||
ib >> p_inst_id;
|
||||
_variable_state_primitives.emplace_back(_primitives.at(p_inst_id));
|
||||
}
|
||||
|
||||
add_default_output_chains();
|
||||
|
||||
size_t prims_info_size;
|
||||
@ -621,11 +618,6 @@ void network::save(cldnn::BinaryOutputBuffer& ob) {
|
||||
|
||||
ob << reuse_map;
|
||||
|
||||
ob << _variable_state_primitives.size();
|
||||
for (const auto& p_inst : _variable_state_primitives) {
|
||||
ob << p_inst->id();
|
||||
}
|
||||
|
||||
auto& prims_info = get_primitives_info();
|
||||
ob << prims_info.size();
|
||||
for (auto& prim_info : prims_info) {
|
||||
@ -692,7 +684,8 @@ void network::set_arguments() {
|
||||
// In that case some_op is static and we may want to set arguments once,
|
||||
// but dynamic optimized out reshape means that output buffer of reshape is unavailable
|
||||
// and attempt to set args will fail.
|
||||
if (dep.first->can_be_optimized() && dep.first->is_dynamic())
|
||||
auto prim = dep.first->get_impl_params()->desc;
|
||||
if (dep.first->can_be_optimized() && (dep.first->is_dynamic() || prim->type == read_value::type_id()))
|
||||
can_set_args = false;
|
||||
}
|
||||
|
||||
@ -1581,11 +1574,12 @@ void network::allocate_primitive_instance(program_node const& node) {
|
||||
|
||||
std::function<bool(const program_node&)> is_mutable_input = [&is_mutable_input](const program_node& node) {
|
||||
for (auto& dep : node.get_dependencies()) {
|
||||
if (dep.first->is_type<input_layout>() || dep.first->is_type<mutable_data>()) {
|
||||
return true;
|
||||
const auto dep_node = dep.first;
|
||||
if (dep_node->is_type<input_layout>() || dep_node->is_type<mutable_data>() || dep_node->is_type<read_value>()) {
|
||||
return true;
|
||||
}
|
||||
if (dep.first->can_be_optimized()) {
|
||||
if (is_mutable_input(*dep.first) || dep.first->is_dynamic()) {
|
||||
if (dep_node->can_be_optimized()) {
|
||||
if (is_mutable_input(*dep_node) || dep_node->is_dynamic()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -1614,15 +1608,8 @@ void network::allocate_primitive_instance(program_node const& node) {
|
||||
if (node.is_type<data>())
|
||||
_data_outputs.push_back(inst);
|
||||
}
|
||||
if (node.is_type<assign>() || node.is_type<read_value>()) {
|
||||
if (node.is_type<assign>()) {
|
||||
auto assign_prim = node.as<assign>().get_primitive();
|
||||
set_variables_state_info(assign_prim->variable_id, assign_prim->output_layout);
|
||||
} else {
|
||||
auto read_value_prim = node.as<read_value>().get_primitive();
|
||||
set_variables_state_info(read_value_prim->variable_id, read_value_prim->output_layout);
|
||||
}
|
||||
_variable_state_primitives.push_back(inst);
|
||||
if (auto state_prim = std::dynamic_pointer_cast<memory_state::variable>(inst)) {
|
||||
set_variables_state_info(state_prim->variable_id(), node.get_output_layout(0));
|
||||
}
|
||||
if (node.is_constant())
|
||||
transfer_memory_to_device(inst, node);
|
||||
@ -1660,79 +1647,36 @@ void network::transfer_memory_to_device(std::shared_ptr<primitive_inst> instance
|
||||
}
|
||||
}
|
||||
|
||||
network::VariableState& network::get_variable_memory(const std::string &variable_id) {
|
||||
void network::set_variable(const std::string& name, const std::shared_ptr<ov::intel_gpu::VariableState>& variable) {
|
||||
GPU_DEBUG_TRACE_DETAIL << "Set variable " << name << " " << variable->get_layout().to_short_string() << std::endl;
|
||||
_variables_states[name] = variable;
|
||||
}
|
||||
|
||||
bool network::has_variable(const std::string &variable_id) const {
|
||||
return _variables_states.find(variable_id) != _variables_states.end();
|
||||
}
|
||||
|
||||
ov::intel_gpu::VariableState& network::get_variable(const std::string &variable_id) const {
|
||||
auto it = _variables_states.find(variable_id);
|
||||
if (it == _variables_states.end()) {
|
||||
CLDNN_ERROR_MESSAGE(variable_id, "Variable not found");
|
||||
}
|
||||
OPENVINO_ASSERT(it != _variables_states.end(), "[GPU] ", variable_id, " variable not found");
|
||||
return *it->second;
|
||||
}
|
||||
|
||||
void network::assign_variables_memories(variables_states_map &&variables_memories) {
|
||||
_variables_states = variables_memories;
|
||||
for (auto primitive : _variable_state_primitives) {
|
||||
if (const auto& memory_state_primitive = std::dynamic_pointer_cast<memory_state::variable>(primitive)) {
|
||||
auto it = _variables_states.find(memory_state_primitive->variable_id());
|
||||
if (it != _variables_states.end())
|
||||
primitive->set_output_memory(it->second->memory, false);
|
||||
else
|
||||
CLDNN_ERROR_MESSAGE(memory_state_primitive->variable_id(), "Memory state not found");
|
||||
}
|
||||
}
|
||||
const ov::intel_gpu::VariableStateInfo& network::get_variable_info(const std::string &variable_id) const {
|
||||
auto it = _variables_state_info.find(variable_id);
|
||||
OPENVINO_ASSERT(it != _variables_state_info.end(), "[GPU] ", variable_id, " variable info not found");
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void network::assign_variables_memories() {
|
||||
for (auto primitive : _variable_state_primitives) {
|
||||
if (const auto& memory_state_primitive = std::dynamic_pointer_cast<memory_state::variable>(primitive)) {
|
||||
auto it = _variables_states.find(memory_state_primitive->variable_id());
|
||||
if (it != _variables_states.end()) {
|
||||
primitive->set_output_memory(it->second->memory, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
const ov::intel_gpu::VariablesMap& network::get_variables() const {
|
||||
return _variables_states;
|
||||
}
|
||||
|
||||
void network::update_variable_memory(const std::string& variable_id, const cldnn::layout& layout) {
|
||||
auto it = _variables_states.find(variable_id);
|
||||
if (it == _variables_states.end()) {
|
||||
cldnn::network::VariableState::Ptr variable_state = std::make_shared<cldnn::network::VariableState>(get_engine().allocate_memory(layout, false));
|
||||
_variables_states.insert({variable_id, variable_state});
|
||||
} else {
|
||||
bool can_reuse = it->second->memory && layout.count() <= it->second->memory->get_layout().count();
|
||||
if (can_reuse)
|
||||
it->second->set_memory(get_engine().reinterpret_buffer(*it->second->memory, layout));
|
||||
else
|
||||
it->second->set_memory(get_engine().allocate_memory(layout, false));
|
||||
it->second->is_set = false;
|
||||
}
|
||||
for (auto primitive : _variable_state_primitives) {
|
||||
if (const auto& memory_state_primitive = std::dynamic_pointer_cast<memory_state::variable>(primitive)) {
|
||||
if (!variable_id.compare(memory_state_primitive->variable_id())) {
|
||||
auto& variable_state = get_variable_memory(variable_id);
|
||||
primitive->set_output_memory(variable_state.memory, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void network::allocate_variables_memories() {
|
||||
for (const auto& info : _variables_state_info) {
|
||||
auto variable_layout = info.second;
|
||||
if (variable_layout.is_static()) {
|
||||
_variables_states.insert({info.first, std::make_shared<cldnn::network::VariableState>(get_engine().allocate_memory(variable_layout, false))});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cldnn::network::variables_state_info_map& network::get_variables_state_info() const {
|
||||
const ov::intel_gpu::VariablesInfoMap& network::get_variables_info() const {
|
||||
return _variables_state_info;
|
||||
}
|
||||
|
||||
void network::set_variables_state_info(const std::string& variable_id, const cldnn::layout& layout) {
|
||||
auto it = _variables_state_info.find(variable_id);
|
||||
if (it == _variables_state_info.end()) {
|
||||
_variables_state_info.insert({variable_id, layout});
|
||||
}
|
||||
void network::set_variables_state_info(const std::string& variable_id, const layout& variable_layout) {
|
||||
_variables_state_info.emplace(variable_id, ov::intel_gpu::VariableStateInfo{variable_id, variable_layout});
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -264,6 +264,15 @@ void primitive_inst::update_shape() {
|
||||
}
|
||||
}
|
||||
|
||||
if (get_node().is_type<read_value>()) {
|
||||
const auto& variable_id = get_node().as<read_value>().get_primitive()->variable_id;
|
||||
auto new_layout = get_network().get_variable(variable_id).get_layout();
|
||||
if (!_impl_params->state_layout.has_value() || _impl_params->state_layout.value() != new_layout) {
|
||||
_impl_params->state_layout = new_layout;
|
||||
input_shape_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (input_shape_changed)
|
||||
set_shape_change();
|
||||
|
||||
@ -387,6 +396,26 @@ void primitive_inst::update_shape() {
|
||||
for (auto& fused_prim : _impl_params->fused_desc) {
|
||||
fused_prim.output_layout.set_partial_shape(_impl_params->get_output_layout().get_partial_shape());
|
||||
}
|
||||
|
||||
if (get_node().is_type<assign>()) {
|
||||
auto desc = get_node().as<assign>().get_primitive();
|
||||
get_network().get_variable(desc->variable_id).set_layout(_impl_params->get_output_layout());
|
||||
_impl_params->state_layout = _impl_params->get_output_layout();
|
||||
}
|
||||
|
||||
if (get_node().is_type<read_value>()) {
|
||||
auto desc = get_node().as<read_value>().get_primitive();
|
||||
if (_impl_params->output_layouts[0].is_dynamic()) {
|
||||
auto pshape = _impl_params->output_layouts[0].get_partial_shape();
|
||||
for (auto& d : pshape) {
|
||||
if (d.is_dynamic()) {
|
||||
d = 0;
|
||||
}
|
||||
}
|
||||
_impl_params->output_layouts[0].set_partial_shape(pshape);
|
||||
}
|
||||
get_network().get_variable(desc->variable_id).set_layout(_impl_params->get_output_layout());
|
||||
}
|
||||
}
|
||||
|
||||
event::ptr primitive_inst::realloc_if_needed() {
|
||||
@ -416,14 +445,10 @@ event::ptr primitive_inst::realloc_if_needed() {
|
||||
if (_node->is_type<input_layout>())
|
||||
return ev;
|
||||
|
||||
if (_node->is_type<assign>() || _node->is_type<read_value>()) {
|
||||
std::string variable_id = "";
|
||||
if (_node->is_type<assign>())
|
||||
variable_id = _node->as<assign>().get_primitive()->variable_id;
|
||||
else
|
||||
variable_id = _node->as<read_value>().get_primitive()->variable_id;
|
||||
get_network().update_variable_memory(variable_id, actual_layout);
|
||||
return ev;
|
||||
if (auto stateful_prim = dynamic_cast<memory_state::variable*>(this)) {
|
||||
std::string variable_id = stateful_prim->variable_id();
|
||||
auto variable = get_network().get_variable(variable_id);
|
||||
variable.set_layout(actual_layout);
|
||||
}
|
||||
|
||||
bool can_reuse_buffer = _outputs[0] && actual_layout.count() <= max_output_layout_size;
|
||||
|
@ -659,7 +659,7 @@ void program::mark_if_constant(program_node& node) {
|
||||
|
||||
// mark if the node is in data flow assuming that all dependencies are marked properly
|
||||
void program::mark_if_data_flow(program_node& node) {
|
||||
if (node.is_type<mutable_data>() || node.is_type<input_layout>()) {
|
||||
if (node.is_type<mutable_data>() || node.is_type<input_layout>() || node.is_type<read_value>()) {
|
||||
node.data_flow = true;
|
||||
} else {
|
||||
node.data_flow = false;
|
||||
|
@ -11,7 +11,7 @@ namespace cldnn {
|
||||
GPU_DEFINE_PRIMITIVE_TYPE_ID(read_value)
|
||||
|
||||
read_value_inst::typed_primitive_inst(network& network, const read_value_node& node) :
|
||||
parent(network, node, false),
|
||||
parent(network, node, !node.can_be_optimized() && (node.get_output_layout().is_static() || node.get_output_layout().has_upper_bound())),
|
||||
memory_state::variable{node.get_primitive()->variable_id} {
|
||||
}
|
||||
|
||||
@ -23,7 +23,6 @@ std::string read_value_inst::to_string(const read_value_node& node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
|
||||
json_composite read_value_info;
|
||||
read_value_info.add("input id", node.input().id());
|
||||
read_value_info.add("variable id", node.get_primitive()->variable_id);
|
||||
node_info->add("read_value info", read_value_info);
|
||||
|
||||
@ -32,6 +31,18 @@ std::string read_value_inst::to_string(const read_value_node& node) {
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
void read_value_inst::on_execute() {
|
||||
update_output_memory();
|
||||
}
|
||||
|
||||
void read_value_inst::update_output_memory() {
|
||||
if (!can_be_optimized() || !get_network().has_variable(variable_id()))
|
||||
return;
|
||||
|
||||
const auto& variable = get_network().get_variable(variable_id());
|
||||
set_output_memory(variable.get_memory(), false, 0);
|
||||
}
|
||||
|
||||
void read_value_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
|
||||
|
@ -17,8 +17,6 @@ namespace {
|
||||
template<typename T_PRIMITIVE>
|
||||
void CreateVariableAccessPrimitive(ProgramBuilder &p, const std::shared_ptr<ov::op::Op> &op,
|
||||
const std::string &variable_id) {
|
||||
validate_inputs_count(op, {1});
|
||||
|
||||
const auto output_pshape = op->get_output_partial_shape(0);
|
||||
const auto output_dtype = cldnn::element_type_to_data_type(op->get_output_element_type(0));
|
||||
const auto output_format = cldnn::format::get_default_format(output_pshape.size());
|
||||
@ -26,8 +24,6 @@ void CreateVariableAccessPrimitive(ProgramBuilder &p, const std::shared_ptr<ov::
|
||||
const auto variable_layout = cldnn::layout{ output_pshape, output_dtype, output_format };
|
||||
|
||||
auto inputs = p.GetInputInfo(op);
|
||||
if (!p.use_new_shape_infer())
|
||||
p.AddVariableStateInfo(variable_id, variable_layout);
|
||||
const auto prim = T_PRIMITIVE{layer_type_name_ID(op),
|
||||
inputs,
|
||||
variable_id,
|
||||
@ -37,18 +33,22 @@ void CreateVariableAccessPrimitive(ProgramBuilder &p, const std::shared_ptr<ov::
|
||||
}
|
||||
|
||||
void CreateReadValueOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v3::ReadValue>& op) {
|
||||
validate_inputs_count(op, {0, 1});
|
||||
CreateVariableAccessPrimitive<cldnn::read_value>(p, op, op->get_variable_id());
|
||||
}
|
||||
|
||||
void CreateAssignOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v3::Assign>& op) {
|
||||
validate_inputs_count(op, {1});
|
||||
CreateVariableAccessPrimitive<cldnn::assign>(p, op, op->get_variable_id());
|
||||
}
|
||||
|
||||
void CreateReadValueOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v6::ReadValue>& op) {
|
||||
validate_inputs_count(op, {0, 1});
|
||||
CreateVariableAccessPrimitive<cldnn::read_value>(p, op, op->get_variable_id());
|
||||
}
|
||||
|
||||
void CreateAssignOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v6::Assign>& op) {
|
||||
validate_inputs_count(op, {1});
|
||||
CreateVariableAccessPrimitive<cldnn::assign>(p, op, op->get_variable_id());
|
||||
}
|
||||
|
||||
|
@ -287,14 +287,6 @@ void ProgramBuilder::init_profile_info(const cldnn::primitive& prim) {
|
||||
perfEntry.parentPrimitive = prim.origin_op_name;
|
||||
}
|
||||
|
||||
void ProgramBuilder::AddVariableStateInfo(const std::string& variable_id, const cldnn::layout& layout) {
|
||||
auto it = m_variablesStateInfo.find(variable_id);
|
||||
if (it != m_variablesStateInfo.end())
|
||||
it->second.insert(layout);
|
||||
else
|
||||
m_variablesStateInfo.insert({variable_id, { layout }});
|
||||
}
|
||||
|
||||
void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::primitive> prim, std::vector<std::string> aliases) {
|
||||
OPENVINO_ASSERT(m_topology != nullptr, "[GPU] Invalid ProgramBuilder builder state: topology is nullptr");
|
||||
|
||||
|
@ -43,15 +43,6 @@ inline bool can_use_usm_host(const cldnn::engine& engine) {
|
||||
return can_use_usm;
|
||||
}
|
||||
|
||||
inline ov::Shape get_tensor_shape(const ov::PartialShape& pshape) {
|
||||
ov::Shape res(pshape.size());
|
||||
for (size_t i = 0; i < pshape.size(); i++) {
|
||||
res[i] = pshape[i].is_dynamic() ? 0 : pshape[i].get_length();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
inline std::string get_port_name(const ov::Output<const ov::Node>& port, const bool is_legacy_api) {
|
||||
std::string name;
|
||||
// TODO: Should use tensor name as the port name, but many legacy tests still use legacy name
|
||||
@ -177,18 +168,6 @@ bool same_host_mem(cldnn::memory::cptr memory, const uint8_t* host_ptr) {
|
||||
return device_ptr == host_ptr;
|
||||
}
|
||||
|
||||
ov::Shape predict_shape(const std::string& name, const ov::Shape current_shape, ov::element::Type element_type, cldnn::ShapePredictor& shape_predictor) {
|
||||
auto prealloc_info = shape_predictor.predict_preallocation_shape(name, current_shape, element_type.bitwidth(), false);
|
||||
const auto& preallocation_shape = prealloc_info.second;
|
||||
auto can_preallocate_buffer = prealloc_info.first &&
|
||||
shape_predictor.can_preallocate(cldnn::ceil_div(ov::shape_size(preallocation_shape) * element_type.bitwidth(), 8));
|
||||
if (can_preallocate_buffer) {
|
||||
return preallocation_shape;
|
||||
}
|
||||
|
||||
return current_shape;
|
||||
}
|
||||
|
||||
inline bool all_remote_buffers(const std::vector<ov::SoPtr<ov::ITensor>>& tensors) {
|
||||
return std::all_of(tensors.begin(), tensors.end(), [](const ov::SoPtr<ov::ITensor>& tensor) {
|
||||
if (auto remote_ptr = std::dynamic_pointer_cast<ov::intel_gpu::RemoteTensorImpl>(tensor._ptr)) {
|
||||
@ -262,13 +241,9 @@ std::vector<ov::ProfilingInfo> SyncInferRequest::get_profiling_info() const {
|
||||
|
||||
std::vector<ov::SoPtr<ov::IVariableState>> SyncInferRequest::query_state() const {
|
||||
std::vector<ov::SoPtr<ov::IVariableState>> ret{};
|
||||
const auto& variable_states = m_graph->get_network()->get_variable_memories();
|
||||
for (const auto& pair : variable_states) {
|
||||
ret.emplace_back(std::make_shared<VariableState>(pair.first, pair.second, m_graph->get_engine()));
|
||||
for (const auto& pair : m_variables) {
|
||||
ret.emplace_back(pair.second, nullptr);
|
||||
}
|
||||
auto expected_states_count = m_graph->get_network()->get_variables_state_info().size();
|
||||
OPENVINO_ASSERT(expected_states_count == ret.size(), "[GPU] Mismatch of expected states count (",
|
||||
expected_states_count, ") and actual size (", ret.size(), ")");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -411,8 +386,13 @@ void SyncInferRequest::enqueue() {
|
||||
std::move(events.begin(), events.end(), std::back_inserter(dependencies));
|
||||
}
|
||||
|
||||
for (const auto& it : m_variables) {
|
||||
const auto& name = it.first;
|
||||
const auto& variable = it.second;
|
||||
prepare_state(name, variable);
|
||||
}
|
||||
|
||||
auto network = m_graph->get_network();
|
||||
network->assign_variables_memories();
|
||||
network->set_shape_predictor(m_shape_predictor);
|
||||
|
||||
m_internal_outputs.clear();
|
||||
@ -687,7 +667,16 @@ void SyncInferRequest::allocate_outputs() {
|
||||
}
|
||||
|
||||
void SyncInferRequest::allocate_states() {
|
||||
m_graph->get_network()->allocate_variables_memories();
|
||||
const auto& network = m_graph->get_network();
|
||||
const auto& variables_info = network->get_variables_info();
|
||||
for (auto& vi : variables_info) {
|
||||
auto variable = std::make_shared<VariableState>(vi.second, m_context, m_shape_predictor);
|
||||
m_variables.emplace(vi.first, variable);
|
||||
}
|
||||
}
|
||||
|
||||
void SyncInferRequest::prepare_state(const std::string& name, const VariableState::Ptr variable) {
|
||||
m_graph->get_network()->set_variable(name, variable);
|
||||
}
|
||||
|
||||
std::vector<cldnn::event::ptr> SyncInferRequest::prepare_batched_input(const std::string& name,
|
||||
|
@ -3,8 +3,11 @@
|
||||
//
|
||||
|
||||
#include "openvino/runtime/make_tensor.hpp"
|
||||
#include "intel_gpu/plugin/remote_context.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "intel_gpu/plugin/remote_tensor.hpp"
|
||||
#include "intel_gpu/plugin/variable_state.hpp"
|
||||
#include "intel_gpu/runtime/memory_caps.hpp"
|
||||
#include "intel_gpu/runtime/layout.hpp"
|
||||
|
||||
#include <memory>
|
||||
@ -12,38 +15,73 @@
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
VariableState::VariableState(const std::string &name, cldnn::network::VariableState::Ptr state, cldnn::engine& engine)
|
||||
: ov::IVariableState {name}
|
||||
, m_variable_state(state)
|
||||
, m_engine(engine) {
|
||||
auto internal_memory = m_variable_state->memory;
|
||||
auto internal_layout = internal_memory->get_layout();
|
||||
auto shape = internal_layout.get_shape();
|
||||
m_state = ov::make_tensor(internal_layout.data_type, shape);
|
||||
VariableState::VariableState(const VariableStateInfo& info, RemoteContextImpl::Ptr context, std::shared_ptr<cldnn::ShapePredictor> shape_predictor)
|
||||
: ov::IVariableState {info.m_id}
|
||||
, m_layout(info.m_layout)
|
||||
, m_context(context)
|
||||
, m_shape_predictor(shape_predictor) {
|
||||
m_state = m_context->create_host_tensor(m_layout.data_type, get_tensor_shape(m_layout.get_partial_shape()));
|
||||
update_device_buffer();
|
||||
}
|
||||
|
||||
void VariableState::reset() {
|
||||
m_variable_state->is_set = false;
|
||||
m_is_set = false;
|
||||
}
|
||||
|
||||
cldnn::memory::ptr VariableState::get_memory() const {
|
||||
return m_memory;
|
||||
}
|
||||
|
||||
const cldnn::layout& VariableState::get_layout() const {
|
||||
return m_layout;
|
||||
}
|
||||
|
||||
bool VariableState::is_set() const {
|
||||
return m_is_set;
|
||||
}
|
||||
void VariableState::set() {
|
||||
m_is_set = true;
|
||||
}
|
||||
|
||||
void VariableState::set_layout(const cldnn::layout& new_layout) {
|
||||
m_layout = new_layout;
|
||||
update_device_buffer();
|
||||
}
|
||||
|
||||
void VariableState::set_state(const ov::SoPtr<ov::ITensor>& state) {
|
||||
const bool blocking = true;
|
||||
auto remote_ptr = std::dynamic_pointer_cast<RemoteTensorImpl>(state._ptr);
|
||||
m_layout.set_partial_shape(state->get_shape());
|
||||
update_device_buffer();
|
||||
if (remote_ptr != nullptr) {
|
||||
auto user_memory = remote_ptr->get_memory();
|
||||
cldnn::mem_lock<uint8_t> lock(user_memory, m_engine.get_service_stream());
|
||||
m_variable_state->memory->copy_from(m_engine.get_service_stream(), lock.data(), blocking);
|
||||
m_memory->copy_from(m_context->get_engine().get_service_stream(), *user_memory, blocking);
|
||||
} else {
|
||||
auto data = state->data();
|
||||
m_variable_state->memory->copy_from(m_engine.get_service_stream(), data, blocking);
|
||||
m_memory->copy_from(m_context->get_engine().get_service_stream(), data, blocking);
|
||||
}
|
||||
m_variable_state->is_set = true;
|
||||
set();
|
||||
}
|
||||
|
||||
void VariableState::update_device_buffer() {
|
||||
if (m_layout.is_dynamic() || m_layout.bytes_count() == 0)
|
||||
return;
|
||||
|
||||
if (actual_size < m_layout.bytes_count()) {
|
||||
const auto alloc_type = m_context->get_engine().use_unified_shared_memory() ? cldnn::allocation_type::usm_device : cldnn::allocation_type::cl_mem;
|
||||
const auto current_shape = get_tensor_shape(m_layout.get_partial_shape());
|
||||
const auto alloc_shape = predict_shape(m_name, current_shape, m_layout.data_type, *m_shape_predictor);
|
||||
const auto alloc_layout = cldnn::layout(alloc_shape, m_layout.data_type, m_layout.format);
|
||||
m_memory = m_context->get_engine().allocate_memory(alloc_layout, alloc_type, false);
|
||||
actual_size = std::max(actual_size, alloc_layout.bytes_count());
|
||||
}
|
||||
m_memory = m_context->get_engine().reinterpret_buffer(*m_memory, m_layout);
|
||||
}
|
||||
|
||||
ov::SoPtr<ov::ITensor> VariableState::get_state() const {
|
||||
auto internal_memory = m_variable_state->memory;
|
||||
const bool blocking = true;
|
||||
internal_memory->copy_to(m_engine.get_service_stream(), m_state->data(), blocking);
|
||||
m_state->set_shape(m_memory->get_layout().get_shape());
|
||||
m_memory->copy_to(m_context->get_engine().get_service_stream(), m_state->data(), blocking);
|
||||
|
||||
return m_state;
|
||||
}
|
||||
|
@ -14,13 +14,15 @@
|
||||
#include "openvino/op/matmul.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/pass/make_stateful.hpp"
|
||||
|
||||
namespace tests {
|
||||
|
||||
inline std::shared_ptr<ov::Model> make_llm_kv_cache_pattern(ov::Dimension batch = ov::Dimension::dynamic(),
|
||||
ov::Dimension n_heads = ov::Dimension::dynamic(),
|
||||
ov::Dimension n_features = ov::Dimension::dynamic(),
|
||||
ov::element::Type_t element_type = ov::element::f32) {
|
||||
ov::element::Type_t element_type = ov::element::f32,
|
||||
bool stateful = false) {
|
||||
ov::PartialShape kv_cache_size = {batch, n_heads, -1, n_features};
|
||||
ov::PartialShape new_token_size = {batch, -1, n_heads, n_features};
|
||||
ov::PartialShape matmul_in_size = {batch, n_heads, -1, -1};
|
||||
@ -44,7 +46,12 @@ inline std::shared_ptr<ov::Model> make_llm_kv_cache_pattern(ov::Dimension batch
|
||||
|
||||
ov::ParameterVector params{in_kv_prev, in_new_token, in_matmul};
|
||||
ov::ResultVector results{kv_present, matmul_out};
|
||||
return std::make_shared<ov::Model>(results, params, "LLM-KV-Cache");
|
||||
auto model = std::make_shared<ov::Model>(results, params, "LLM-KV-Cache");
|
||||
if (stateful) {
|
||||
ov::pass::MakeStateful({{in_kv_prev, kv_present}}).run_on_model(model);
|
||||
}
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
} // namespace tests
|
||||
|
@ -222,3 +222,24 @@ TEST(TensorTest, smoke_canReallocateDeviceInputForHostTensor) {
|
||||
ASSERT_NO_THROW(inf_req.infer());
|
||||
}
|
||||
|
||||
TEST(VariablesTest, smoke_canSetStateTensor) {
|
||||
auto ov = ov::Core();
|
||||
const ov::Shape virable_shape = {1, 3, 2, 4};
|
||||
const ov::Shape input_shape = {1, 3, 2, 4};
|
||||
const ov::element::Type et = ov::element::f16;
|
||||
auto model = ngraph::builder::subgraph::makeReadConcatSplitAssign(input_shape, et);
|
||||
auto compiled_model = ov.compile_model(model, ov::test::utils::DEVICE_GPU);
|
||||
auto request = compiled_model.create_infer_request();
|
||||
|
||||
ov::Tensor variable_tensor(et, virable_shape);
|
||||
ov::Tensor input_tensor(et, input_shape);
|
||||
|
||||
auto variables = request.query_state();
|
||||
ASSERT_EQ(variables.size(), 1);
|
||||
auto variable = variables.front();
|
||||
ASSERT_EQ(variable.get_name(), "v0");
|
||||
auto default_state_tensor = variable.get_state();
|
||||
ASSERT_EQ(default_state_tensor.get_shape(), virable_shape);
|
||||
|
||||
ASSERT_NO_THROW(request.infer());
|
||||
}
|
||||
|
@ -2,12 +2,13 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph/opsets/opset8.hpp"
|
||||
#include "ov_models/subgraph_builders.hpp"
|
||||
#include "openvino/core/partial_shape.hpp"
|
||||
#include "openvino/opsets/opset8.hpp"
|
||||
#include "openvino/runtime/compiled_model.hpp"
|
||||
#include "openvino/runtime/infer_request.hpp"
|
||||
#include "openvino/runtime/core.hpp"
|
||||
#include "ov_models/subgraph_builders.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include <ie_plugin_config.hpp>
|
||||
#include "functional_test_utils/skip_tests_config.hpp"
|
||||
#include "functional_test_utils/ov_plugin_cache.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
@ -16,11 +17,8 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace opset8;
|
||||
using namespace ov::test;
|
||||
|
||||
|
||||
using MemoryDynamicBatchParams = std::tuple<
|
||||
ov::PartialShape, // Partial shape for network initialization
|
||||
ov::Shape, // Actual shape to be passed to inference request
|
||||
@ -28,45 +26,49 @@ using MemoryDynamicBatchParams = std::tuple<
|
||||
std::string>; // Device name
|
||||
|
||||
class MemoryDynamicBatch : public ::testing::Test,
|
||||
public ::testing::WithParamInterface<MemoryDynamicBatchParams> {
|
||||
public ::testing::WithParamInterface<MemoryDynamicBatchParams> {
|
||||
public:
|
||||
static std::string getTestCaseName(::testing::TestParamInfo<MemoryDynamicBatchParams> obj) {
|
||||
ov::PartialShape inputPartialShape;
|
||||
ov::Shape inputShape;
|
||||
int iterationsNum;
|
||||
std::string targetDevice;
|
||||
std::tie(inputPartialShape, inputShape, iterationsNum, targetDevice) = obj.param;
|
||||
static std::string get_test_case_name(::testing::TestParamInfo<MemoryDynamicBatchParams> obj) {
|
||||
ov::PartialShape input_phape;
|
||||
ov::Shape input_shape;
|
||||
int iterations_num;
|
||||
std::string target_device;
|
||||
std::tie(input_phape, input_shape, iterations_num, target_device) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=";
|
||||
result << ov::test::utils::partialShape2str({ inputPartialShape }) << "_";
|
||||
result << ov::test::utils::partialShape2str({ input_phape }) << "_";
|
||||
result << "TS=";
|
||||
result << ov::test::utils::partialShape2str({inputShape});
|
||||
result << ov::test::utils::partialShape2str({input_shape});
|
||||
result << ")_";
|
||||
result << "iterationsCount=" << iterationsNum << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
result << "iterations_num=" << iterations_num << "_";
|
||||
result << "target_device=" << target_device;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPartialShape_, inputShape_, iterationsNum_, deviceName_) = GetParam();
|
||||
model_ = buildModel(precision_, inputPartialShape_);
|
||||
core_ = ov::test::utils::PluginCache::get().core();
|
||||
ov::PartialShape input_pshape;
|
||||
std::string device_name;
|
||||
std::tie(input_pshape, input_shape, iterations_num, device_name) = GetParam();
|
||||
std::shared_ptr<ov::Model> model = build_model(element_type, input_pshape);
|
||||
std::shared_ptr<ov::Core> core = ov::test::utils::PluginCache::get().core();
|
||||
|
||||
compiled_model = core->compile_model(model, device_name, { });
|
||||
infer_request = compiled_model.create_infer_request();
|
||||
}
|
||||
|
||||
static std::shared_ptr<ov::Model> buildModel(ElementType precision, const ov::PartialShape& shape) {
|
||||
static std::shared_ptr<ov::Model> build_model(ElementType precision, const ov::PartialShape& shape) {
|
||||
auto param = std::make_shared<ov::op::v0::Parameter>(precision, shape);
|
||||
const VariableInfo variable_info { shape, precision, "v0" };
|
||||
auto variable = std::make_shared<Variable>(variable_info);
|
||||
auto read_value = std::make_shared<ReadValue>(param, variable);
|
||||
auto add = std::make_shared<Add>(read_value, param);
|
||||
auto assign = std::make_shared<Assign>(add, variable);
|
||||
auto res = std::make_shared<Result>(add);
|
||||
return std::make_shared<ov::Model>(ResultVector { res }, SinkVector { assign }, ov::ParameterVector{param},
|
||||
"MemoryDynamicBatchTest");
|
||||
const ov::op::util::VariableInfo variable_info { shape, precision, "v0" };
|
||||
auto variable = std::make_shared<ov::op::util::Variable>(variable_info);
|
||||
auto read_value = std::make_shared<ov::op::v6::ReadValue>(param, variable);
|
||||
auto add = std::make_shared<ov::op::v1::Add>(read_value, param);
|
||||
auto assign = std::make_shared<ov::op::v6::Assign>(add, variable);
|
||||
auto res = std::make_shared<ov::op::v0::Result>(add);
|
||||
return std::make_shared<ov::Model>(ov::ResultVector { res }, ov::SinkVector { assign }, ov::ParameterVector{param}, "MemoryDynamicBatchTest");
|
||||
}
|
||||
|
||||
static std::vector<int> generateInput(const ov::Shape& shape) {
|
||||
static std::vector<int> generate_inputs(const ov::Shape& shape) {
|
||||
auto len = ov::shape_size(shape);
|
||||
std::vector<int> result {};
|
||||
result.reserve(len);
|
||||
@ -75,7 +77,7 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::vector<int> calculateReference(const std::vector<int>& input, int iterations) {
|
||||
static std::vector<int> calculate_reference(const std::vector<int>& input, int iterations) {
|
||||
std::vector<int> reference {};
|
||||
reference.reserve(input.size());
|
||||
std::transform(input.begin(), input.end(), std::back_inserter(reference), [iterations](const int &i) {
|
||||
@ -84,97 +86,86 @@ public:
|
||||
return reference;
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
ov::PartialShape inputPartialShape_;
|
||||
ov::Shape inputShape_;
|
||||
int iterationsNum_;
|
||||
std::string deviceName_;
|
||||
std::shared_ptr<ov::Model> model_;
|
||||
std::shared_ptr<ov::Core> core_;
|
||||
std::vector<int> input_;
|
||||
ElementType precision_ { ElementType::i32 };
|
||||
ov::Shape input_shape;
|
||||
int iterations_num;
|
||||
ov::CompiledModel compiled_model;
|
||||
ov::InferRequest infer_request;
|
||||
std::vector<int> input_data;
|
||||
ov::element::Type element_type { ov::element::i32 };
|
||||
};
|
||||
|
||||
TEST_P(MemoryDynamicBatch, MultipleInferencesOnTheSameInferRequest) {
|
||||
TEST_P(MemoryDynamicBatch, MultipleInferencesOnTheSameInfer_request) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
auto compiledModel = core_->compile_model(model_, ov::test::utils::DEVICE_GPU, { });
|
||||
auto inferRequest = compiledModel.create_infer_request();
|
||||
input_ = generateInput(inputShape_);
|
||||
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.set_input_tensor(inputTensor);
|
||||
for (int i = 0; i < iterationsNum_; i++)
|
||||
inferRequest.infer();
|
||||
auto output = inferRequest.get_output_tensor(0);
|
||||
std::vector<int> reference = calculateReference(input_, iterationsNum_);
|
||||
input_data = generate_inputs(input_shape);
|
||||
ov::Tensor input_tensor = ov::Tensor(element_type, input_shape, input_data.data());
|
||||
infer_request.set_input_tensor(input_tensor);
|
||||
for (int i = 0; i < iterations_num; i++)
|
||||
infer_request.infer();
|
||||
auto output = infer_request.get_output_tensor(0);
|
||||
std::vector<int> reference = calculate_reference(input_data, iterations_num + 1);
|
||||
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
|
||||
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
|
||||
actualIt++, referenceIt++)
|
||||
EXPECT_EQ(*actualIt, *referenceIt);
|
||||
ASSERT_EQ(*actualIt, *referenceIt);
|
||||
}
|
||||
|
||||
TEST_P(MemoryDynamicBatch, ResetVariableState) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
auto compiledModel = core_->compile_model(model_, ov::test::utils::DEVICE_GPU, { });
|
||||
auto inferRequest = compiledModel.create_infer_request();
|
||||
input_ = generateInput(inputShape_);
|
||||
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.set_input_tensor(inputTensor);
|
||||
inferRequest.infer();
|
||||
inferRequest.query_state().front().reset();
|
||||
inferRequest.infer();
|
||||
auto output = inferRequest.get_output_tensor(0);
|
||||
std::vector<int> reference = calculateReference(input_, 1);
|
||||
input_data = generate_inputs(input_shape);
|
||||
ov::Tensor input_tensor = ov::Tensor(element_type, input_shape, input_data.data());
|
||||
infer_request.set_input_tensor(input_tensor);
|
||||
infer_request.infer();
|
||||
infer_request.query_state().front().reset();
|
||||
infer_request.infer();
|
||||
auto output = infer_request.get_output_tensor(0);
|
||||
std::vector<int> reference = calculate_reference(input_data, 2);
|
||||
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
|
||||
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
|
||||
actualIt++, referenceIt++)
|
||||
EXPECT_EQ(*actualIt, *referenceIt);
|
||||
ASSERT_EQ(*actualIt, *referenceIt);
|
||||
}
|
||||
|
||||
TEST_P(MemoryDynamicBatch, GetVariableState) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
auto compiledModel = core_->compile_model(model_, ov::test::utils::DEVICE_GPU, { });
|
||||
auto inferRequest = compiledModel.create_infer_request();
|
||||
input_ = generateInput(inputShape_);
|
||||
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.set_input_tensor(inputTensor);
|
||||
for (int i = 0; i < iterationsNum_; i++)
|
||||
inferRequest.infer();
|
||||
auto blob = inferRequest.query_state().front().get_state();
|
||||
std::vector<int> reference = calculateReference(input_, iterationsNum_);
|
||||
input_data = generate_inputs(input_shape);
|
||||
ov::Tensor input_tensor = ov::Tensor(element_type, input_shape, input_data.data());
|
||||
infer_request.set_input_tensor(input_tensor);
|
||||
for (int i = 0; i < iterations_num; i++)
|
||||
infer_request.infer();
|
||||
auto blob = infer_request.query_state().front().get_state();
|
||||
std::vector<int> reference = calculate_reference(input_data, iterations_num + 1);
|
||||
std::vector<int> actual(blob.data<int>(), blob.data<int>() + blob.get_size());
|
||||
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
|
||||
actualIt++, referenceIt++)
|
||||
EXPECT_EQ(*actualIt, *referenceIt);
|
||||
ASSERT_EQ(*actualIt, *referenceIt);
|
||||
}
|
||||
|
||||
TEST_P(MemoryDynamicBatch, SetVariableState) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
auto compiledModel = core_->compile_model(model_, ov::test::utils::DEVICE_GPU, { });
|
||||
auto inferRequest = compiledModel.create_infer_request();
|
||||
input_ = generateInput(inputShape_);
|
||||
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.set_input_tensor(inputTensor);
|
||||
ov::Tensor state = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.query_state().front().set_state(state);
|
||||
for (int i = 0; i < iterationsNum_; i++)
|
||||
inferRequest.infer();
|
||||
auto output = inferRequest.get_output_tensor(0);
|
||||
std::vector<int> reference = calculateReference(input_, iterationsNum_ + 1);
|
||||
input_data = generate_inputs(input_shape);
|
||||
ov::Tensor input_tensor = ov::Tensor(element_type, input_shape, input_data.data());
|
||||
infer_request.set_input_tensor(input_tensor);
|
||||
ov::Tensor state = ov::Tensor(element_type, input_shape, input_data.data());
|
||||
infer_request.query_state().front().set_state(state);
|
||||
for (int i = 0; i < iterations_num; i++)
|
||||
infer_request.infer();
|
||||
auto output = infer_request.get_output_tensor(0);
|
||||
std::vector<int> reference = calculate_reference(input_data, iterations_num + 1);
|
||||
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
|
||||
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
|
||||
actualIt++, referenceIt++)
|
||||
EXPECT_EQ(*actualIt, *referenceIt);
|
||||
ASSERT_EQ(*actualIt, *referenceIt);
|
||||
}
|
||||
|
||||
ov::PartialShape networkPartialShape { {1, 19}, 4, 20, 20 };
|
||||
std::vector<ov::Shape> inputShapes { { 7, 4, 20, 20 }, { 19, 4, 20, 20 } };
|
||||
std::vector<int> iterationsNum { 3, 7 };
|
||||
static ov::PartialShape model_pshape { {1, 19}, 4, 20, 20 };
|
||||
static std::vector<ov::Shape> input_shapes { { 7, 4, 20, 20 }, { 19, 4, 20, 20 } };
|
||||
static std::vector<int> iterations_num { 3, 7 };
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MemoryDynamicBatch, MemoryDynamicBatch,
|
||||
::testing::Combine(
|
||||
::testing::Values(networkPartialShape),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::ValuesIn(iterationsNum),
|
||||
::testing::Values(model_pshape),
|
||||
::testing::ValuesIn(input_shapes),
|
||||
::testing::ValuesIn(iterations_num),
|
||||
::testing::Values(ov::test::utils::DEVICE_GPU)),
|
||||
MemoryDynamicBatch::getTestCaseName);
|
||||
MemoryDynamicBatch::get_test_case_name);
|
||||
|
@ -113,8 +113,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*smoke_LPT.*MatMulWithConstantTransformation.*)",
|
||||
R"(.*smoke_LPT.*PullReshapeThroughDequantizationTransformation.*)",
|
||||
R"(.*smoke_LPT.*ElementwiseBranchSelectionTransformation.*)",
|
||||
// Dynamic state unsupported for now
|
||||
R"(.*MemoryDynamicBatch.*)",
|
||||
// Issue: 123493
|
||||
R"(.*GroupNormalizationTest.*CompareWithRefs.*NetType=f16.*)",
|
||||
// Issue: 123507
|
||||
@ -123,5 +121,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*RandomUniformLayerTest.*f16.*)",
|
||||
// Issue: 125165
|
||||
R"(smoke_Nms9LayerTest.*)",
|
||||
// Doesn't match reference results as v6 ref impl behavior is misaligned with expected
|
||||
R"(smoke_MemoryTest.*)",
|
||||
};
|
||||
}
|
||||
|
@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/subgraph/stateful_model.hpp"
|
||||
|
||||
using namespace ov::test;
|
||||
|
||||
namespace {
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, StaticShapeStatefulModel, ::testing::Values(ov::test::utils::DEVICE_GPU));
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, StaticShapeTwoStatesModel, ::testing::Values(ov::test::utils::DEVICE_GPU));
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, DynamicShapeStatefulModelDefault, ::testing::Values(ov::test::utils::DEVICE_GPU));
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, DynamicShapeStatefulModelParam, ::testing::Values(ov::test::utils::DEVICE_GPU));
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, DynamicShapeStatefulModelStateAsInp, ::testing::Values(ov::test::utils::DEVICE_GPU));
|
||||
|
||||
} // namespace
|
@ -210,4 +210,114 @@ TEST(KVCacheTest, smoke_multipleIterations) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(KVCacheTest, smoke_multipleIterations_stateful) {
|
||||
#if defined(ANDROID)
|
||||
GTEST_SKIP();
|
||||
#endif
|
||||
auto core = ov::Core();
|
||||
|
||||
const size_t batch = 1;
|
||||
const size_t n_heads = 32;
|
||||
const size_t n_features = 80;
|
||||
const size_t context_size = 20;
|
||||
size_t cache_size = 0;
|
||||
|
||||
ov::element::Type element_type = ov::element::f16;
|
||||
|
||||
auto model = tests::make_llm_kv_cache_pattern(batch, n_heads, n_features, element_type, true);
|
||||
auto ref_model = tests::make_llm_kv_cache_pattern(batch, n_heads, n_features, element_type, false);
|
||||
auto compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f16));
|
||||
|
||||
auto input0 = model->get_parameters().at(0);
|
||||
auto input1 = model->get_parameters().at(1);
|
||||
auto output0 = model->get_results().at(0);
|
||||
|
||||
auto get_ref_results = [&ref_model](const ov::Tensor& kv_cache, const ov::Tensor& new_token_data, const ov::Tensor& matmul_data) {
|
||||
auto input0 = ref_model->get_parameters().at(0);
|
||||
auto input1 = ref_model->get_parameters().at(1);
|
||||
auto input2 = ref_model->get_parameters().at(2);
|
||||
ngraph::helpers::resize_function(ref_model, {kv_cache.get_shape(), new_token_data.get_shape(), matmul_data.get_shape()});
|
||||
return ngraph::helpers::interpretFunction(ref_model, {{input0, kv_cache}, {input1, new_token_data}, {input2, matmul_data}});
|
||||
};
|
||||
|
||||
auto compare_tensors = [&model](const std::vector<ov::Tensor> expected, const std::vector<ov::Tensor>& actual) {
|
||||
ASSERT_EQ(expected.size(), actual.size());
|
||||
ASSERT_EQ(expected.size(), model->get_results().size());
|
||||
auto compareMap = ov::test::utils::getCompareMap();
|
||||
const auto& results = model->get_results();
|
||||
for (size_t j = 0; j < results.size(); j++) {
|
||||
const auto result = results[j];
|
||||
for (size_t i = 0; i < result->get_input_size(); ++i) {
|
||||
std::shared_ptr<ov::Node> inputNode = result->get_input_node_shared_ptr(i);
|
||||
if (std::dynamic_pointer_cast<ov::op::v0::Convert>(inputNode)) {
|
||||
std::shared_ptr<ov::Node> nextNodePtr = inputNode->get_input_node_shared_ptr(0);
|
||||
if (!ngraph::is_type<ov::op::v0::Result>(nextNodePtr)) {
|
||||
inputNode = nextNodePtr;
|
||||
}
|
||||
}
|
||||
auto it = compareMap.find(inputNode->get_type_info());
|
||||
ASSERT_NE(it, compareMap.end());
|
||||
it->second(inputNode, i, expected[j], actual[j], 1e-4f, 1e-4f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto infer_request = compiled_model.create_infer_request();
|
||||
auto matmul_out = infer_request.get_tensor(output0);
|
||||
auto new_token_input = infer_request.get_tensor(input0);
|
||||
auto matmul_input = infer_request.get_tensor(input1);
|
||||
|
||||
infer_request.set_tensor(input0, new_token_input);
|
||||
infer_request.set_tensor(input1, matmul_input);
|
||||
|
||||
ov::Tensor ref_kv_cache;
|
||||
|
||||
{
|
||||
const ov::Shape new_token_size_initial = {batch, context_size, n_heads, n_features};
|
||||
const ov::Shape kv_cache_size_initial = {batch, n_heads, cache_size, n_features};
|
||||
const ov::Shape matmul_in_size_initial = {batch, n_heads, context_size, context_size};
|
||||
|
||||
auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size_initial);
|
||||
auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_initial);
|
||||
|
||||
new_token_input.set_shape(new_token_data.get_shape());
|
||||
matmul_input.set_shape(matmul_data.get_shape());
|
||||
|
||||
new_token_data.copy_to(new_token_input);
|
||||
matmul_data.copy_to(matmul_input);
|
||||
|
||||
ref_kv_cache = ov::Tensor(element_type, kv_cache_size_initial);
|
||||
|
||||
auto ref_results = get_ref_results(ref_kv_cache, new_token_data, matmul_data);
|
||||
ref_kv_cache = ref_results[0];
|
||||
|
||||
infer_request.infer();
|
||||
|
||||
compare_tensors({ ref_results[1] }, {matmul_out});
|
||||
|
||||
cache_size += context_size;
|
||||
}
|
||||
|
||||
const size_t input_tokens = 1;
|
||||
const size_t niters = 10;
|
||||
const ov::Shape new_token_size = {batch, input_tokens, n_heads, n_features};
|
||||
size_t context_length = cache_size + input_tokens;
|
||||
for (size_t i = 0; i < niters; i++, context_length += input_tokens) {
|
||||
ov::Shape matmul_in_size_loop = {batch, n_heads, input_tokens, context_length};
|
||||
auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size);
|
||||
auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_loop);
|
||||
auto ref_results = get_ref_results(ref_kv_cache, new_token_data, matmul_data);
|
||||
ref_kv_cache = ref_results[0];
|
||||
|
||||
new_token_input.set_shape(new_token_data.get_shape());
|
||||
matmul_input.set_shape(matmul_data.get_shape());
|
||||
new_token_data.copy_to(new_token_input);
|
||||
matmul_data.copy_to(matmul_input);
|
||||
|
||||
infer_request.infer();
|
||||
|
||||
compare_tensors({ ref_results[1] }, {matmul_out});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
@ -51,11 +51,9 @@ protected:
|
||||
for (auto&& shape : inputDynamicShapes) {
|
||||
params.push_back(std::make_shared<ov::op::v0::Parameter>(input_precision, shape));
|
||||
}
|
||||
const VariableInfo variable_info { inputDynamicShapes[0], input_precision, "v0" };
|
||||
auto variable = std::make_shared<ov::op::util::Variable>(variable_info);
|
||||
auto read_value = std::make_shared<ov::op::v6::ReadValue>(params.at(0), variable);
|
||||
auto read_value = std::make_shared<ov::op::v3::ReadValue>(params.at(0), "v0");
|
||||
auto add = std::make_shared<ov::op::v1::Add>(read_value, params.at(0));
|
||||
auto assign = std::make_shared<ov::op::v6::Assign>(add, variable);
|
||||
auto assign = std::make_shared<ov::op::v3::Assign>(add, "v0");
|
||||
auto res = std::make_shared<ov::op::v0::Result>(add);
|
||||
function = std::make_shared<ov::Model>(ResultVector { res }, SinkVector { assign }, params);
|
||||
}
|
||||
|
@ -20,6 +20,10 @@ file(GLOB_RECURSE SOURCES_MAIN
|
||||
# openvino graph transformation
|
||||
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/transformations/*.hpp"
|
||||
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/transformations/*.cpp"
|
||||
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/variable_state.cpp"
|
||||
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/remote_context.cpp"
|
||||
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp"
|
||||
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp"
|
||||
)
|
||||
|
||||
if (NOT ENABLE_ONEDNN_FOR_GPU)
|
||||
@ -45,6 +49,7 @@ set(SOURCES_ALL
|
||||
)
|
||||
|
||||
add_executable(${TARGET_NAME} ${SOURCES_ALL})
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE CI_BUILD_NUMBER="")
|
||||
|
||||
ov_set_threading_interface_for(${TARGET_NAME})
|
||||
|
||||
|
@ -2,6 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/plugin/remote_context.hpp"
|
||||
#include "intel_gpu/plugin/variable_state.hpp"
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
@ -10,6 +13,7 @@
|
||||
#include <intel_gpu/primitives/read_value.hpp>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ov::intel_gpu;
|
||||
using namespace ::tests;
|
||||
|
||||
template<typename T>
|
||||
@ -37,19 +41,25 @@ struct variable_test : public ::testing::TestWithParam<VariableParams<T>> {
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
|
||||
auto context = std::make_shared<RemoteContextImpl>("GPU", std::vector<cldnn::device::ptr>{engine.get_device()});
|
||||
auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, context, network->get_shape_predictor());
|
||||
network->set_variable("v0", variable);
|
||||
network->set_input_data("input", input_data);
|
||||
|
||||
constexpr size_t number_of_inferences = 5;
|
||||
for (size_t inference = 1; inference <= number_of_inferences; ++inference) {
|
||||
const auto outputs = network->execute();
|
||||
const auto output = outputs.at("assign").get_memory();
|
||||
const cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
const cldnn::mem_lock<T, mem_lock_type::read> output_ptr(output, get_test_stream());
|
||||
const auto output_count = output_ptr.size();
|
||||
ASSERT_EQ(output_count, param.values.size()) << "inference " << inference;
|
||||
|
||||
for (size_t i = 0; i < output_count; ++i) {
|
||||
ASSERT_EQ(output_ptr[i], inference * param.values[i]) << "inference " << inference;
|
||||
if (ov::element::Type(output->get_layout().data_type).is_real()) {
|
||||
ASSERT_FLOAT_EQ(output_ptr[i], (inference + 1) * param.values[i]) << "inference " << inference;
|
||||
} else {
|
||||
ASSERT_EQ(output_ptr[i], (inference + 1) * param.values[i]) << "inference " << inference;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -125,7 +135,9 @@ void test_exception_on_wrong_layout(bool is_caching_test) {
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
|
||||
auto context = std::make_shared<RemoteContextImpl>("GPU", std::vector<cldnn::device::ptr>{engine.get_device()});
|
||||
auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, context, network->get_shape_predictor());
|
||||
network->set_variable("v0", variable);
|
||||
network->set_input_data("input", input_data);
|
||||
network->set_input_data("wrong_input", wrong_input_data);
|
||||
|
||||
@ -147,12 +159,12 @@ template <typename T>
|
||||
void test_different_output_data_type(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const layout in_layout{data_types::f32, format::bfyx, tensor{1}};
|
||||
const layout in_layout{{ 1 }, data_types::f32, format::bfyx};
|
||||
const auto input_data = engine.allocate_memory(in_layout);
|
||||
std::vector<float> inputs = { 70.0f };
|
||||
set_values(input_data, inputs);
|
||||
|
||||
const layout variable_layout{data_types::f16, format::bfyx, tensor{1}};
|
||||
const layout variable_layout{{ 1 }, data_types::f16, format::bfyx};
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input_data->get_layout()));
|
||||
@ -161,13 +173,14 @@ void test_different_output_data_type(bool is_caching_test) {
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
|
||||
auto context = std::make_shared<RemoteContextImpl>("GPU", std::vector<cldnn::device::ptr>{engine.get_device()});
|
||||
auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, context, network->get_shape_predictor());
|
||||
network->set_variable("v0", variable);
|
||||
network->set_input_data("input", input_data);
|
||||
|
||||
const auto outputs = network->execute();
|
||||
const auto output = outputs.at("assign").get_memory();
|
||||
const cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
const cldnn::mem_lock<T, mem_lock_type::read> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < output_ptr.size(); ++i) {
|
||||
ASSERT_EQ(half_to_float(output_ptr[i]), inputs[i]);
|
||||
@ -216,19 +229,21 @@ void test_variables_are_preserved_across_inferences(bool is_caching_test) {
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->assign_variables_memories({
|
||||
{ "v1", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
|
||||
{ "v2", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
|
||||
{ "v_result", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) }
|
||||
});
|
||||
auto context = std::make_shared<RemoteContextImpl>("GPU", std::vector<cldnn::device::ptr>{engine.get_device()});
|
||||
auto variable1 = std::make_shared<VariableState>(VariableStateInfo{"v1", variable_layout}, context, network->get_shape_predictor());
|
||||
auto variable2 = std::make_shared<VariableState>(VariableStateInfo{"v2", variable_layout}, context, network->get_shape_predictor());
|
||||
auto variable3 = std::make_shared<VariableState>(VariableStateInfo{"v_result", variable_layout}, context, network->get_shape_predictor());
|
||||
network->set_variable("v1", variable1);
|
||||
network->set_variable("v2", variable2);
|
||||
network->set_variable("v_result", variable3);
|
||||
network->set_input_data("input_1", input_1);
|
||||
network->set_input_data("input_2", input_2);
|
||||
|
||||
// set variables with assign on 1st inference, read with read_values on 2nd one
|
||||
// network->execute();
|
||||
network->execute();
|
||||
const auto outputs = network->execute();
|
||||
const auto output = outputs.at("read_result").get_memory();
|
||||
const cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
const cldnn::mem_lock<T, mem_lock_type::read> output_ptr(output, get_test_stream());
|
||||
ASSERT_EQ(output_ptr[0], value_1 + value_2);
|
||||
}
|
||||
|
||||
|
@ -2,17 +2,20 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include <common_test_utils/ov_tensor_utils.hpp>
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ov_models/utils/ov_helpers.hpp"
|
||||
#include "ov_models/builders.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
namespace ov {
|
||||
namespace test {
|
||||
|
||||
class StatefulModelTest : public SubgraphBaseTest {
|
||||
static constexpr ov::element::Type_t test_element_type = ov::element::Type_t::f32;
|
||||
|
||||
class StatefulModelTest : public SubgraphBaseTest, public testing::WithParamInterface<const char*> {
|
||||
public:
|
||||
static constexpr ov::element::Type_t testPrc = ov::element::Type_t::f32;
|
||||
|
||||
@ -68,7 +71,7 @@ public:
|
||||
class StaticShapeStatefulModel : public StatefulModelTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
targetDevice = ov::test::utils::DEVICE_CPU;
|
||||
targetDevice = GetParam();
|
||||
ov::element::Type netPrc = testPrc;
|
||||
|
||||
const ov::Shape inpShape = {1, 1};
|
||||
@ -86,9 +89,9 @@ public:
|
||||
// Creating ov::Model
|
||||
auto read = std::make_shared<ov::op::v6::ReadValue>(init_const, variable);
|
||||
std::vector<std::shared_ptr<ov::Node>> args = {arg, read};
|
||||
auto add = ngraph::builder::makeEltwise(arg, read, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto add = std::make_shared<ov::op::v1::Add>(arg, read);
|
||||
auto assign = std::make_shared<ov::op::v6::Assign>(add, variable);
|
||||
auto add2 = ngraph::builder::makeEltwise(add, read, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto add2 = std::make_shared<ov::op::v1::Add>(add, read);
|
||||
auto res = std::make_shared<ov::op::v0::Result>(add2);
|
||||
function = std::make_shared<ov::Model>(ov::ResultVector({res}), ov::SinkVector({assign}), ov::ParameterVector({arg}));
|
||||
}
|
||||
@ -139,7 +142,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(StaticShapeStatefulModel, smoke_Run_Stateful_Static) {
|
||||
TEST_P(StaticShapeStatefulModel, smoke_Run_Stateful_Static) {
|
||||
prepare();
|
||||
run_test();
|
||||
reset_state();
|
||||
@ -171,7 +174,7 @@ TEST_F(StaticShapeStatefulModel, smoke_Run_Stateful_Static) {
|
||||
class StaticShapeTwoStatesModel : public StatefulModelTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
targetDevice = ov::test::utils::DEVICE_CPU;
|
||||
targetDevice = GetParam();
|
||||
ov::element::Type netPrc = testPrc;
|
||||
|
||||
const ov::Shape inpShape = {1, 1};
|
||||
@ -190,10 +193,10 @@ public:
|
||||
|
||||
// Creating ov::Model
|
||||
auto read0 = std::make_shared<ov::op::v6::ReadValue>(init_const, variable0);
|
||||
auto add = ngraph::builder::makeEltwise(arg, read0, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto add = std::make_shared<ov::op::v1::Add>(arg, read0);
|
||||
auto assign0 = std::make_shared<ov::op::v6::Assign>(add, variable0);
|
||||
auto read1 = std::make_shared<ov::op::v6::ReadValue>(init_const, variable1);
|
||||
auto add2 = ngraph::builder::makeEltwise(add, read1, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto add2 = std::make_shared<ov::op::v1::Add>(add, read1);
|
||||
auto assign1 = std::make_shared<ov::op::v6::Assign>(add2, variable1);
|
||||
auto res = std::make_shared<ov::op::v0::Result>(add2);
|
||||
function = std::make_shared<ov::Model>(
|
||||
@ -258,7 +261,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(StaticShapeTwoStatesModel, smoke_Run_Static_Two_States) {
|
||||
TEST_P(StaticShapeTwoStatesModel, smoke_Run_Static_Two_States) {
|
||||
prepare();
|
||||
run_test();
|
||||
}
|
||||
@ -284,7 +287,7 @@ TEST_F(StaticShapeTwoStatesModel, smoke_Run_Static_Two_States) {
|
||||
class DynamicShapeStatefulModel : public StatefulModelTest {
|
||||
public:
|
||||
void SetUp(bool use_param) {
|
||||
targetDevice = ov::test::utils::DEVICE_CPU;
|
||||
targetDevice = GetParam();
|
||||
ov::element::Type netPrc = testPrc;
|
||||
|
||||
const ov::Shape inpShape = {1, 1};
|
||||
@ -304,9 +307,9 @@ public:
|
||||
std::make_shared<ov::op::v6::ReadValue>(arg, variable) :
|
||||
std::make_shared<ov::op::v6::ReadValue>(variable);
|
||||
std::vector<std::shared_ptr<ov::Node>> args = {arg, read};
|
||||
auto add = ngraph::builder::makeEltwise(arg, read, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto add = std::make_shared<ov::op::v1::Add>(arg, read);
|
||||
constexpr int concat_axis = 0;
|
||||
auto concat = std::make_shared<ngraph::opset1::Concat>(ov::NodeVector{arg, add}, concat_axis);
|
||||
auto concat = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{arg, add}, concat_axis);
|
||||
auto assign = std::make_shared<ov::op::v6::Assign>(concat, variable);
|
||||
auto res = std::make_shared<ov::op::v0::Result>(concat);
|
||||
function = std::make_shared<ov::Model>(ov::ResultVector({res}), ov::SinkVector({assign}), ov::ParameterVector({arg}));
|
||||
@ -389,7 +392,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(DynamicShapeStatefulModelDefault, smoke_Run_Stateful_Dynamic_Default) {
|
||||
TEST_P(DynamicShapeStatefulModelDefault, smoke_Run_Stateful_Dynamic_Default) {
|
||||
prepare();
|
||||
run_test();
|
||||
reset_state();
|
||||
@ -404,7 +407,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(DynamicShapeStatefulModelParam, smoke_Run_Stateful_Dynamic_Param) {
|
||||
TEST_P(DynamicShapeStatefulModelParam, smoke_Run_Stateful_Dynamic_Param) {
|
||||
prepare();
|
||||
run_test();
|
||||
reset_state();
|
||||
@ -442,9 +445,8 @@ TEST_F(DynamicShapeStatefulModelParam, smoke_Run_Stateful_Dynamic_Param) {
|
||||
class DynamicShapeStatefulModelStateAsInp : public StatefulModelTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
targetDevice = ov::test::utils::DEVICE_CPU;
|
||||
targetDevice = GetParam();
|
||||
ov::element::Type netPrc = testPrc;
|
||||
const_val = 42.0f;
|
||||
|
||||
const ov::Shape inpShape = {1, 1};
|
||||
const InputShape input_shape = {{1, -1}, {{1, 1}, {1, 2}, {1, 4}, {1, 8}, {1, 16}}};
|
||||
@ -463,9 +465,9 @@ public:
|
||||
// Creating ov::Model
|
||||
auto read = std::make_shared<ov::op::v6::ReadValue>(init_param, variable);
|
||||
std::vector<std::shared_ptr<ov::Node>> args = {param1, param2, read};
|
||||
auto add1 = ngraph::builder::makeEltwise(param1, param2, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto add1 = std::make_shared<ov::op::v1::Add>(param1, param2);
|
||||
auto add_const = ov::op::v0::Constant::create(netPrc, ov::Shape{1, 1}, {const_val});
|
||||
auto add2 = ngraph::builder::makeEltwise(add1, add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto add2 = std::make_shared<ov::op::v1::Add>(add1, add_const);
|
||||
constexpr int concat_axis = 1;
|
||||
auto concat = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{add2, read}, concat_axis);
|
||||
auto assign = std::make_shared<ov::op::v6::Assign>(concat, variable);
|
||||
@ -562,14 +564,15 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
float const_val = 0.0f;
|
||||
const float const_val = 42.0f;
|
||||
};
|
||||
|
||||
TEST_F(DynamicShapeStatefulModelStateAsInp, smoke_Run_Stateful_Dynamic_State_As_Inp) {
|
||||
TEST_P(DynamicShapeStatefulModelStateAsInp, smoke_Run_Stateful_Dynamic_State_As_Inp) {
|
||||
prepare();
|
||||
run_test();
|
||||
reset_state();
|
||||
run_test();
|
||||
}
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
} // namespace test
|
||||
} // namespace ov
|
Loading…
Reference in New Issue
Block a user