[GPU] Assign-6 and ReadValue-6 (#11780)
* Add methods for access to varables information in Program class * add ReadValue and Assign primitives * ReadValue and Assign implementations * Implementation of memory states allocation * Add output existance check in primitive_inst to avoid crashes if output is set during execution * Add memory states management functionality in network component * Integration of memory states feature in inference request component * Exclude constant path for read_value and assign nodes in cldnn transformations * Improve memory states test to run on a single inference request * unit tests for ReadValue and Assign * single-layer test for ReadValue and Assign * Add QueryState API implementation * Add memory state test which covers dynamic batch case Co-authored-by: Oleksii Khovan <okhovan@lohika.com>
This commit is contained in:
@@ -54,7 +54,7 @@ protected:
|
||||
/**
|
||||
* @brief A default dtor
|
||||
*/
|
||||
~IVariableStateInternal() = default;
|
||||
virtual ~IVariableStateInternal() = default;
|
||||
|
||||
std::string name;
|
||||
Blob::Ptr state;
|
||||
|
||||
@@ -52,6 +52,18 @@ class primitive_inst;
|
||||
struct network {
|
||||
public:
|
||||
using ptr = std::shared_ptr<network>;
|
||||
|
||||
struct VariableState {
|
||||
using Ptr = std::shared_ptr<VariableState>;
|
||||
|
||||
cldnn::memory_ptr memory;
|
||||
bool is_set;
|
||||
VariableState(cldnn::memory_ptr mem = nullptr) :
|
||||
memory { mem }, is_set { false } {
|
||||
}
|
||||
};
|
||||
using variables_states_map = std::map<std::string, VariableState::Ptr>;
|
||||
|
||||
explicit network(program::ptr program, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true);
|
||||
network(engine& engine,
|
||||
const topology& topo,
|
||||
@@ -194,6 +206,12 @@ public:
|
||||
return *_memory_pool;
|
||||
}
|
||||
|
||||
/// Assigns memory state locations
|
||||
void assign_variables_memories(variables_states_map &&variables_memories);
|
||||
|
||||
/// Returns memory state @p variable_id of stateful network
|
||||
VariableState& get_variable_memory(const std::string &variable_id);
|
||||
|
||||
private:
|
||||
using output_chains_map = std::map<primitive_id, std::vector<std::shared_ptr<primitive_inst>>>;
|
||||
uint32_t net_id = 0;
|
||||
@@ -209,6 +227,8 @@ private:
|
||||
std::vector<std::shared_ptr<primitive_inst>> _outputs;
|
||||
std::list<std::shared_ptr<primitive_inst>> _exec_order;
|
||||
std::list<std::shared_ptr<primitive_inst>> _data_outputs;
|
||||
variables_states_map _variables_states;
|
||||
std::vector<std::shared_ptr<primitive_inst>> _variable_state_primitives;
|
||||
|
||||
std::unordered_map<primitive_id, event::ptr> _events;
|
||||
output_chains_map _output_chains;
|
||||
|
||||
@@ -87,6 +87,27 @@ inline cldnn::data_types DataTypeFromPrecision(ngraph::element::Type t) {
|
||||
}
|
||||
}
|
||||
|
||||
inline InferenceEngine::Precision PrecisionFromDataType(cldnn::data_types dt) {
|
||||
switch (dt) {
|
||||
case cldnn::data_types::bin:
|
||||
return InferenceEngine::Precision::ePrecision::BIN;
|
||||
case cldnn::data_types::u8:
|
||||
return InferenceEngine::Precision::ePrecision::U8;
|
||||
case cldnn::data_types::i8:
|
||||
return InferenceEngine::Precision::ePrecision::I8;
|
||||
case cldnn::data_types::f16:
|
||||
return InferenceEngine::Precision::ePrecision::FP16;
|
||||
case cldnn::data_types::f32:
|
||||
return InferenceEngine::Precision::ePrecision::FP32;
|
||||
case cldnn::data_types::i32:
|
||||
return InferenceEngine::Precision::ePrecision::I32;
|
||||
case cldnn::data_types::i64:
|
||||
return InferenceEngine::Precision::ePrecision::I64;
|
||||
default:
|
||||
IE_THROW(ParameterMismatch) << "The plugin does not support " << cldnn::data_type_traits::name(dt) << " data type";
|
||||
}
|
||||
}
|
||||
|
||||
inline cldnn::format FormatFromLayout(InferenceEngine::Layout l) {
|
||||
switch (l) {
|
||||
// TODO: change 6d case once new layout added in IE
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "ie_blob.h"
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
|
||||
@@ -38,6 +39,7 @@ public:
|
||||
POSTPROC = 4
|
||||
};
|
||||
typedef std::shared_ptr<Graph> Ptr;
|
||||
using variable_states_map = std::map<std::string, std::vector<cldnn::network::VariableState::Ptr>>;
|
||||
|
||||
Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
|
||||
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
|
||||
@@ -55,6 +57,7 @@ public:
|
||||
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
|
||||
const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); }
|
||||
const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); }
|
||||
variable_states_map AllocateVariablesMemories();
|
||||
std::map<std::string, std::pair<int64_t, int64_t>> GetInputDynBatchDims() { return m_program->m_input_batch_dim; }
|
||||
std::map<std::string, int64_t> GetOutputDynBatchDims() { return m_program->m_output_batch_dim; }
|
||||
size_t GetNetworksCount() const { return m_networks.size(); }
|
||||
|
||||
@@ -48,6 +48,7 @@ public:
|
||||
void SetBlobs(const std::string& name, const std::vector<InferenceEngine::Blob::Ptr> &data) override;
|
||||
|
||||
void SetBatch(int batch = -1) override;
|
||||
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState() override;
|
||||
void SetGraph(std::shared_ptr<Graph> graph);
|
||||
void EnableProfiling() { m_useProfiling = true; }
|
||||
void EnableStreams() { m_useStreams = true; }
|
||||
@@ -106,6 +107,7 @@ private:
|
||||
|
||||
std::map<cldnn::primitive_id, cldnn::network_output> internal_outputs;
|
||||
std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
|
||||
Graph::variable_states_map variables_states_;
|
||||
};
|
||||
|
||||
} // namespace intel_gpu
|
||||
|
||||
@@ -209,6 +209,8 @@ REGISTER_FACTORY(v6, ExperimentalDetectronROIFeatureExtractor);
|
||||
REGISTER_FACTORY(v6, ExperimentalDetectronTopKROIs)
|
||||
REGISTER_FACTORY(v6, ExperimentalDetectronGenerateProposalsSingleImage);
|
||||
REGISTER_FACTORY(v6, ExperimentalDetectronDetectionOutput);
|
||||
REGISTER_FACTORY(v6, Assign);
|
||||
REGISTER_FACTORY(v6, ReadValue);
|
||||
|
||||
// ------------------------------ Supported v7 ops ------------------------------ //
|
||||
REGISTER_FACTORY(v7, DFT);
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
@@ -150,6 +151,12 @@ public:
|
||||
|
||||
std::shared_ptr<cldnn::topology> GetTopology() const { return m_topology; }
|
||||
|
||||
using variables_state_info_map = std::map<std::string, std::set<cldnn::layout>>;
|
||||
|
||||
void AddVariableStateInfo(const std::string& variable_id, const cldnn::layout& layout);
|
||||
|
||||
const variables_state_info_map& GetVariablesStatesInfo() const { return m_variablesStateInfo; }
|
||||
|
||||
private:
|
||||
static factories_map_t factories_map;
|
||||
std::vector<std::shared_ptr<cldnn::program>> m_programs;
|
||||
@@ -159,6 +166,7 @@ private:
|
||||
std::shared_ptr<cldnn::topology> m_topology;
|
||||
InferenceEngine::InputsDataMap m_networkInputs;
|
||||
InferenceEngine::OutputsDataMap m_networkOutputs;
|
||||
variables_state_info_map m_variablesStateInfo;
|
||||
|
||||
bool queryMode;
|
||||
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
|
||||
#include "intel_gpu/plugin/graph.hpp"
|
||||
#include <functional>
|
||||
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
class VariableState : public InferenceEngine::IVariableStateInternal {
|
||||
public:
|
||||
VariableState(const std::string& name, const std::vector<cldnn::network::VariableState::Ptr>& states,
|
||||
std::shared_ptr<cldnn::engine> engine, int currentBatch);
|
||||
|
||||
/**
|
||||
* @brief Reset internal variable state for relevant infer request, to a value specified as
|
||||
* default for according `ReadValue` node
|
||||
*/
|
||||
void Reset() override;
|
||||
|
||||
/**
|
||||
* @brief Sets the new state for the next inference
|
||||
* @param newState A new state
|
||||
*/
|
||||
void SetState(const InferenceEngine::Blob::Ptr &newState) override;
|
||||
|
||||
/**
|
||||
* @brief Returns the value of the variable state.
|
||||
* @return The value of the variable state
|
||||
*/
|
||||
InferenceEngine::Blob::CPtr GetState() const override;
|
||||
|
||||
protected:
|
||||
InferenceEngine::SizeVector AggregateShape(const cldnn::layout &layout);
|
||||
void IterateOverStates(std::function<void(cldnn::network::VariableState&)> f) const;
|
||||
|
||||
private:
|
||||
int currentBatch_;
|
||||
std::vector<cldnn::network::VariableState::Ptr> states_;
|
||||
InferenceEngine::TensorDesc desc_;
|
||||
std::shared_ptr<cldnn::engine> engine_;
|
||||
};
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
@@ -0,0 +1,43 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "primitive.hpp"
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Sets an input value to the variable_id variable.
|
||||
struct assign : public primitive_base<assign> {
|
||||
CLDNN_DECLARE_PRIMITIVE(assign)
|
||||
|
||||
/// @brief Constructs Assign primitive.
|
||||
/// @param id This primitive id
|
||||
/// @param inputs Input parameters ids
|
||||
/// @param variable_id Variable id
|
||||
/// @param output_layout Memory layout
|
||||
assign(const primitive_id &id,
|
||||
const std::vector<primitive_id>& inputs,
|
||||
const std::string& variable_id,
|
||||
const layout& output_layout)
|
||||
: primitive_base(id, inputs, "", {}, optional_data_type{output_layout.data_type}),
|
||||
variable_id{variable_id},
|
||||
output_layout{output_layout} {}
|
||||
|
||||
std::string variable_id;
|
||||
layout output_layout;
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
||||
@@ -0,0 +1,43 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "primitive.hpp"
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Returns value of the variable_id variable.
|
||||
struct read_value : public primitive_base<read_value> {
|
||||
CLDNN_DECLARE_PRIMITIVE(read_value)
|
||||
|
||||
/// @brief Constructs ReadValue primitive.
|
||||
/// @param id This primitive id
|
||||
/// @param inputs Input parameters ids
|
||||
/// @param variable_id Variable id
|
||||
/// @param output_layout Memory layout
|
||||
read_value(const primitive_id& id,
|
||||
const std::vector<primitive_id>& inputs,
|
||||
const std::string& variable_id,
|
||||
const layout& output_layout)
|
||||
: primitive_base(id, inputs, "", {}, optional_data_type{output_layout.data_type}),
|
||||
variable_id{variable_id},
|
||||
output_layout{output_layout} {}
|
||||
|
||||
std::string variable_id;
|
||||
layout output_layout;
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
||||
38
src/plugins/intel_gpu/src/graph/assign.cpp
Normal file
38
src/plugins/intel_gpu/src/graph/assign.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <assign_inst.h>
|
||||
#include "primitive_type_base.h"
|
||||
#include <sstream>
|
||||
#include <json_object.h>
|
||||
#include <data_inst.h>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
primitive_type_id assign::type_id() {
|
||||
static primitive_type_base<assign> instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
assign_inst::typed_primitive_inst(network& network, const assign_node& node) :
|
||||
parent{network, node, false},
|
||||
memory_state::variable{node.get_primitive()->variable_id} {
|
||||
}
|
||||
|
||||
layout assign_inst::calc_output_layout(const assign_node& node) {
|
||||
return node.get_primitive()->output_layout;
|
||||
}
|
||||
|
||||
std::string assign_inst::to_string(const assign_node& node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
json_composite assign_info;
|
||||
assign_info.add("input id", node.input().id());
|
||||
assign_info.add("variable id", node.get_primitive()->variable_id);
|
||||
node_info->add("assign info", assign_info);
|
||||
std::stringstream primitive_description;
|
||||
node_info->dump(primitive_description);
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
53
src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
Normal file
53
src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
Normal file
@@ -0,0 +1,53 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "assign_inst.h"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "register.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace cpu {
|
||||
|
||||
struct assign_impl : public typed_primitive_impl<assign> {
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<assign_impl>(*this);
|
||||
}
|
||||
|
||||
event::ptr execute_impl(const std::vector<event::ptr>& events, assign_inst& instance) override {
|
||||
const auto arg = instance.argument;
|
||||
const auto variable_id = arg.variable_id;
|
||||
auto& variable = instance.get_network().get_variable_memory(variable_id);
|
||||
|
||||
if (variable.memory->get_layout() != arg.output_layout) {
|
||||
CLDNN_ERROR_MESSAGE(instance.id(), "Layout mismatch");
|
||||
}
|
||||
|
||||
auto& stream = instance.get_network().get_stream();
|
||||
|
||||
for (auto e : events) {
|
||||
e->wait();
|
||||
}
|
||||
|
||||
const auto ev_set_memory = variable.memory->copy_from(stream, instance.input_memory());
|
||||
variable.is_set = true;
|
||||
|
||||
return ev_set_memory;
|
||||
}
|
||||
|
||||
void init_kernels() override {}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(assign_node const& arg) { return new assign_impl{}; }
|
||||
};
|
||||
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_assign_impl::attach_assign_impl() {
|
||||
implementation_map<assign>::add(impl_types::cpu, assign_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace cpu
|
||||
} // namespace cldnn
|
||||
53
src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
Normal file
53
src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
Normal file
@@ -0,0 +1,53 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "read_value_inst.h"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "register.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace cpu {
|
||||
|
||||
struct read_value_impl : public typed_primitive_impl<read_value> {
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<read_value_impl>(*this);
|
||||
}
|
||||
|
||||
event::ptr execute_impl(const std::vector<event::ptr>& events, read_value_inst& instance) override {
|
||||
for (auto e : events) {
|
||||
e->wait();
|
||||
}
|
||||
const auto arg = instance.argument;
|
||||
const auto variable_id = arg.variable_id;
|
||||
|
||||
auto& variable = instance.get_network().get_variable_memory(variable_id);
|
||||
|
||||
if (variable.memory->get_layout() != arg.output_layout) {
|
||||
CLDNN_ERROR_MESSAGE(instance.id(), "Layout mismatch");
|
||||
}
|
||||
|
||||
if (!variable.is_set) {
|
||||
auto &stream = instance.get_network().get_stream();
|
||||
const auto ev_set_output = instance.output_memory().fill(stream, 0);
|
||||
return ev_set_output;
|
||||
}
|
||||
|
||||
return instance.get_network().get_stream().create_user_event(true);
|
||||
}
|
||||
|
||||
void init_kernels() override {}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(read_value_node const& arg) { return new read_value_impl{}; }
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_read_value_impl::attach_read_value_impl() {
|
||||
implementation_map<read_value>::add(impl_types::cpu, read_value_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace cpu
|
||||
} // namespace cldnn
|
||||
@@ -11,8 +11,10 @@ namespace cpu {
|
||||
static detail::attach_##prim##_impl attach_##prim
|
||||
|
||||
void register_implementations() {
|
||||
REGISTER_CPU(assign);
|
||||
REGISTER_CPU(detection_output);
|
||||
REGISTER_CPU(proposal);
|
||||
REGISTER_CPU(read_value);
|
||||
REGISTER_CPU(non_max_suppression);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "intel_gpu/primitives/assign.hpp"
|
||||
#include "intel_gpu/primitives/detection_output.hpp"
|
||||
#include "intel_gpu/primitives/proposal.hpp"
|
||||
#include "intel_gpu/primitives/read_value.hpp"
|
||||
#include "intel_gpu/primitives/non_max_suppression.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
@@ -20,7 +22,9 @@ namespace detail {
|
||||
attach_##prim##_impl(); \
|
||||
}
|
||||
|
||||
REGISTER_CPU(assign);
|
||||
REGISTER_CPU(proposal);
|
||||
REGISTER_CPU(read_value);
|
||||
REGISTER_CPU(non_max_suppression);
|
||||
REGISTER_CPU(detection_output);
|
||||
|
||||
|
||||
52
src/plugins/intel_gpu/src/graph/include/assign_inst.h
Normal file
52
src/plugins/intel_gpu/src/graph/include/assign_inst.h
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "intel_gpu/primitives/assign.hpp"
|
||||
#include "primitive_inst.h"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace memory_state {
|
||||
|
||||
class variable {
|
||||
public:
|
||||
explicit variable(const std::string& variable_id) : variable_id_ {variable_id} {}
|
||||
|
||||
const std::string& variable_id() const { return variable_id_; }
|
||||
|
||||
private:
|
||||
std::string variable_id_;
|
||||
};
|
||||
|
||||
} // namespace memory_state
|
||||
|
||||
template<>
|
||||
struct typed_program_node<assign> : public typed_program_node_base<assign> {
|
||||
using parent = typed_program_node_base<assign>;
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
const program_node& input(std::size_t index = 0) const { return get_dependency(index); }
|
||||
};
|
||||
|
||||
using assign_node = typed_program_node<assign>;
|
||||
|
||||
template<>
|
||||
class typed_primitive_inst<assign> : public typed_primitive_inst_base<assign>, public memory_state::variable {
|
||||
using parent = typed_primitive_inst_base<assign>;
|
||||
|
||||
public:
|
||||
static layout calc_output_layout(const assign_node& node);
|
||||
|
||||
static std::string to_string(const assign_node& node);
|
||||
|
||||
public:
|
||||
typed_primitive_inst(network& network, const assign_node& desc);
|
||||
};
|
||||
|
||||
using assign_inst = typed_primitive_inst<assign>;
|
||||
|
||||
} // namespace cldnn
|
||||
39
src/plugins/intel_gpu/src/graph/include/read_value_inst.h
Normal file
39
src/plugins/intel_gpu/src/graph/include/read_value_inst.h
Normal file
@@ -0,0 +1,39 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "assign_inst.h"
|
||||
#include "intel_gpu/primitives/read_value.hpp"
|
||||
#include "primitive_inst.h"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template<>
|
||||
struct typed_program_node<read_value> : public typed_program_node_base<read_value> {
|
||||
using parent = typed_program_node_base<read_value>;
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
const program_node& input(std::size_t index = 0) const { return get_dependency(index); }
|
||||
};
|
||||
|
||||
using read_value_node = typed_program_node<read_value>;
|
||||
|
||||
template<>
|
||||
class typed_primitive_inst<read_value> : public typed_primitive_inst_base<read_value>, public memory_state::variable {
|
||||
using parent = typed_primitive_inst_base<read_value>;
|
||||
|
||||
public:
|
||||
static layout calc_output_layout(const read_value_node& node);
|
||||
|
||||
static std::string to_string(const read_value_node& node);
|
||||
|
||||
typed_primitive_inst(network& network, const read_value_node& desc);
|
||||
};
|
||||
|
||||
using read_value_inst = typed_primitive_inst<read_value>;
|
||||
|
||||
} // namespace cldnn
|
||||
@@ -17,6 +17,8 @@
|
||||
|
||||
#include "intel_gpu/graph/program.hpp"
|
||||
#include "intel_gpu/graph/network.hpp"
|
||||
#include "assign_inst.h"
|
||||
#include "read_value_inst.h"
|
||||
|
||||
#include "to_string_utils.h"
|
||||
#include "primitive_inst.h"
|
||||
@@ -876,6 +878,8 @@ void network::allocate_primitive_instance(program_node const& node) {
|
||||
if (node.is_type<data>())
|
||||
_data_outputs.push_back(inst);
|
||||
}
|
||||
if (std::dynamic_pointer_cast<assign_inst>(inst) || std::dynamic_pointer_cast<read_value_inst>(inst))
|
||||
_variable_state_primitives.push_back(inst);
|
||||
if (node.is_constant())
|
||||
transfer_memory_to_device(inst, node);
|
||||
}
|
||||
@@ -915,4 +919,26 @@ memory::ptr network::get_memory_from_pool(const layout& layout,
|
||||
return _memory_pool->get_memory(layout, id, get_id(), dependencies, type, reusable);
|
||||
return _memory_pool->get_memory(layout, type);
|
||||
}
|
||||
|
||||
network::VariableState& network::get_variable_memory(const std::string &variable_id) {
|
||||
auto it = _variables_states.find(variable_id);
|
||||
if (it == _variables_states.end()) {
|
||||
CLDNN_ERROR_MESSAGE(variable_id, "Variable not found");
|
||||
}
|
||||
return *it->second;
|
||||
}
|
||||
|
||||
void network::assign_variables_memories(variables_states_map &&variables_memories) {
|
||||
_variables_states = variables_memories;
|
||||
for (auto primitive : _variable_state_primitives) {
|
||||
if (const auto& memory_state_primitive = std::dynamic_pointer_cast<memory_state::variable>(primitive)) {
|
||||
auto it = _variables_states.find(memory_state_primitive->variable_id());
|
||||
if (it != _variables_states.end())
|
||||
primitive->set_output_memory(it->second->memory, false);
|
||||
else
|
||||
CLDNN_ERROR_MESSAGE(memory_state_primitive->variable_id(), "Memory state not found");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
|
||||
@@ -120,7 +120,7 @@ void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout
|
||||
void primitive_inst::set_output_memory(memory::ptr mem_new, bool check) {
|
||||
auto& eng = _network.get_engine();
|
||||
// skip all the buzz if no action actually required
|
||||
if (eng.is_the_same_buffer(*mem_new, *_output)) {
|
||||
if (_output && eng.is_the_same_buffer(*mem_new, *_output)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -587,7 +587,8 @@ void program::post_optimize_graph(bool is_internal) {
|
||||
|
||||
// mark if the node is constant assuming that all dependencies are marked properly
|
||||
void program::mark_if_constant(program_node& node) {
|
||||
if (node.get_dependencies().empty() || node.is_type<prior_box>()) {
|
||||
if (node.get_dependencies().empty() || node.is_type<prior_box>() ||
|
||||
node.is_type<assign>() || node.is_type<read_value>()) {
|
||||
return;
|
||||
}
|
||||
node.constant = true;
|
||||
|
||||
40
src/plugins/intel_gpu/src/graph/read_value.cpp
Normal file
40
src/plugins/intel_gpu/src/graph/read_value.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <read_value_inst.h>
|
||||
#include "primitive_type_base.h"
|
||||
#include <sstream>
|
||||
#include <json_object.h>
|
||||
#include <data_inst.h>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
primitive_type_id read_value::type_id() {
|
||||
static primitive_type_base<read_value> instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
read_value_inst::typed_primitive_inst(network& network, const read_value_node& node) :
|
||||
parent(network, node, false),
|
||||
memory_state::variable{node.get_primitive()->variable_id} {
|
||||
}
|
||||
|
||||
layout read_value_inst::calc_output_layout(const read_value_node& node) {
|
||||
return node.get_primitive()->output_layout;
|
||||
}
|
||||
|
||||
std::string read_value_inst::to_string(const read_value_node& node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
|
||||
json_composite read_value_info;
|
||||
read_value_info.add("input id", node.input().id());
|
||||
read_value_info.add("variable id", node.get_primitive()->variable_id);
|
||||
node_info->add("read_value info", read_value_info);
|
||||
|
||||
std::stringstream primitive_description;
|
||||
node_info->dump(primitive_description);
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
@@ -133,6 +133,23 @@ std::shared_ptr<cldnn::network> Graph::BuildNetwork(std::shared_ptr<cldnn::progr
|
||||
return network;
|
||||
}
|
||||
|
||||
Graph::variable_states_map Graph::AllocateVariablesMemories() {
|
||||
Graph::variable_states_map states {};
|
||||
const auto& memStatesInfo = m_program->GetVariablesStatesInfo();
|
||||
for (const auto& memStateInfo : memStatesInfo) {
|
||||
std::vector<cldnn::layout> orderedLayouts {memStateInfo.second.begin(), memStateInfo.second.end()};
|
||||
std::sort(orderedLayouts.begin(), orderedLayouts.end(), [](cldnn::layout& first, cldnn::layout& second) {
|
||||
return first.size.batch[0] < second.size.batch[0];
|
||||
});
|
||||
std::vector<cldnn::network::VariableState::Ptr> memoryStates;
|
||||
memoryStates.reserve(orderedLayouts.size());
|
||||
for (const auto& layout : orderedLayouts)
|
||||
memoryStates.push_back(std::make_shared<cldnn::network::VariableState>(GetEngine()->allocate_memory(layout, false)));
|
||||
states.insert({memStateInfo.first, memoryStates });
|
||||
}
|
||||
return states;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
|
||||
bool filter_const_primitives) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo");
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "intel_gpu/plugin/remote_context.hpp"
|
||||
#include "intel_gpu/plugin/compiled_model.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
#include "intel_gpu/plugin/variable_state.hpp"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
#include "openvino/core/preprocess/input_tensor_info.hpp"
|
||||
#include <ie_algorithm.hpp>
|
||||
@@ -532,6 +533,7 @@ void InferRequest::SetGraph(std::shared_ptr<Graph> graph) {
|
||||
} else {
|
||||
allocate_inputs();
|
||||
allocate_outputs();
|
||||
variables_states_ = m_graph->AllocateVariablesMemories();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -605,6 +607,7 @@ void InferRequest::SetBatch(int new_batch) {
|
||||
|
||||
batchOutputs[no.first] = out_buf;
|
||||
}
|
||||
variables_states_ = m_graph->AllocateVariablesMemories();
|
||||
|
||||
m_curBatch = new_batch;
|
||||
}
|
||||
@@ -744,6 +747,14 @@ void InferRequest::enqueue() {
|
||||
}
|
||||
}
|
||||
|
||||
cldnn::network::variables_states_map variables_states;
|
||||
for (auto &variable_state_pair : variables_states_)
|
||||
variables_states.insert({ variable_state_pair.first, variable_state_pair.second[0] });
|
||||
|
||||
auto networkPtr = m_graph->GetNetwork();
|
||||
|
||||
networkPtr->assign_variables_memories(std::move(variables_states));
|
||||
|
||||
for (auto& item : _outputs) {
|
||||
std::string outputName = item.first;
|
||||
Blob::Ptr& outputBlob = item.second;
|
||||
@@ -751,7 +762,7 @@ void InferRequest::enqueue() {
|
||||
}
|
||||
|
||||
internal_outputs.clear();
|
||||
internal_outputs = m_graph->GetNetwork()->execute(dependencies);
|
||||
internal_outputs = networkPtr->execute(dependencies);
|
||||
|
||||
// If dump layers path is set, only runs first inference.
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
@@ -826,7 +837,16 @@ void InferRequest::enqueue_dynamic() {
|
||||
inputLayout.size.batch[0] = mask;
|
||||
copy_input_data(m_graph->GetNetwork(nb), inputName, inputLayout, *inputBlob, &batchInputs[inputName][nb]);
|
||||
}
|
||||
internal_outputs_dynamic[nb] = m_graph->GetNetwork(nb)->execute();
|
||||
|
||||
cldnn::network::variables_states_map variables_states;
|
||||
for (auto &variable_state_pair : variables_states_)
|
||||
variables_states.insert({ variable_state_pair.first, variable_state_pair.second[nb] });
|
||||
|
||||
auto networkPtr = m_graph->GetNetwork(nb);
|
||||
|
||||
networkPtr->assign_variables_memories(std::move(variables_states));
|
||||
|
||||
internal_outputs_dynamic[nb] = networkPtr->execute();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1248,6 +1268,14 @@ InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngin
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> InferRequest::QueryState() {
|
||||
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> ret{};
|
||||
ret.reserve(variables_states_.size());
|
||||
for (const auto& pair : variables_states_)
|
||||
ret.push_back(std::make_shared<VariableState>(pair.first, pair.second, m_graph->GetEngine(), m_curBatch));
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
||||
58
src/plugins/intel_gpu/src/plugin/ops/variable.cpp
Normal file
58
src/plugins/intel_gpu/src/plugin/ops/variable.cpp
Normal file
@@ -0,0 +1,58 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "ngraph/op/assign.hpp"
|
||||
#include "ngraph/op/read_value.hpp"
|
||||
#include "intel_gpu/primitives/assign.hpp"
|
||||
#include "intel_gpu/primitives/read_value.hpp"
|
||||
|
||||
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
namespace {
|
||||
template<typename T_PRIMITIVE>
|
||||
void CreateVariableAccessPrimitive(Program &p, const std::shared_ptr<ngraph::op::Op> &op,
|
||||
const std::string &variable_id) {
|
||||
p.ValidateInputs(op, {1});
|
||||
|
||||
const auto output_data_type = DataTypeFromPrecision(op->get_output_element_type(0));
|
||||
const auto op_output_shape = op->get_output_shape(0);
|
||||
const auto output_format = DefaultFormatForDims(op_output_shape.size());
|
||||
const auto output_shape = tensor_from_dims(op_output_shape);
|
||||
|
||||
const auto variable_layout = cldnn::layout{output_data_type,
|
||||
output_format,
|
||||
output_shape};
|
||||
|
||||
auto input_primitives = p.GetInputPrimitiveIDs(op);
|
||||
p.AddVariableStateInfo(variable_id, variable_layout);
|
||||
const auto prim = T_PRIMITIVE{layer_type_name_ID(op),
|
||||
input_primitives,
|
||||
variable_id,
|
||||
variable_layout};
|
||||
|
||||
p.AddPrimitive(prim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
}
|
||||
|
||||
void CreateReadValueOp(Program& p, const std::shared_ptr<ngraph::op::v6::ReadValue>& op) {
|
||||
CreateVariableAccessPrimitive<cldnn::read_value>(p, op, op->get_variable_id());
|
||||
}
|
||||
|
||||
void CreateAssignOp(Program& p, const std::shared_ptr<ngraph::op::v6::Assign>& op) {
|
||||
CreateVariableAccessPrimitive<cldnn::assign>(p, op, op->get_variable_id());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_FACTORY_IMPL(v6, Assign);
|
||||
REGISTER_FACTORY_IMPL(v6, ReadValue);
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
@@ -488,6 +488,14 @@ void Program::InitProfileInfo(const std::string& layerName,
|
||||
perfEntry.parentPrimitive = parentId;
|
||||
}
|
||||
|
||||
void Program::AddVariableStateInfo(const std::string& variable_id, const cldnn::layout& layout) {
|
||||
auto it = m_variablesStateInfo.find(variable_id);
|
||||
if (it != m_variablesStateInfo.end())
|
||||
it->second.insert(layout);
|
||||
else
|
||||
m_variablesStateInfo.insert({variable_id, { layout }});
|
||||
}
|
||||
|
||||
// TODO: Does it make sense to add such method to ngraph core?
|
||||
bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
|
||||
std::set<std::shared_ptr<ngraph::Node>> nodes_processed = {};
|
||||
|
||||
74
src/plugins/intel_gpu/src/plugin/variable_state.cpp
Normal file
74
src/plugins/intel_gpu/src/plugin/variable_state.cpp
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <intel_gpu/plugin/variable_state.hpp>
|
||||
#include <blob_factory.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
VariableState::VariableState(const std::string &name,
|
||||
const std::vector<cldnn::network::VariableState::Ptr> &states,
|
||||
std::shared_ptr<cldnn::engine> engine, int currentBatch) :
|
||||
InferenceEngine::IVariableStateInternal {name},
|
||||
currentBatch_ {currentBatch},
|
||||
states_ {states},
|
||||
desc_{
|
||||
PrecisionFromDataType(states.front()->memory->get_layout().data_type),
|
||||
AggregateShape(states.front()->memory->get_layout()),
|
||||
InferenceEngine::Layout::ANY
|
||||
},
|
||||
engine_ {std::move(engine)} {
|
||||
}
|
||||
|
||||
void VariableState::Reset() {
|
||||
IterateOverStates([this](cldnn::network::VariableState &state) {
|
||||
state.is_set = false;
|
||||
});
|
||||
}
|
||||
|
||||
void VariableState::SetState(const InferenceEngine::Blob::Ptr &newState) {
|
||||
auto lock = std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(newState)->rmap();
|
||||
auto data = lock.as<char*>();
|
||||
IterateOverStates([&data, this](cldnn::network::VariableState &state) {
|
||||
state.memory->copy_from(engine_->get_program_stream(), data);
|
||||
data += state.memory->get_layout().bytes_count();
|
||||
state.is_set = true;
|
||||
});
|
||||
engine_->get_program_stream().enqueue_barrier();
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::CPtr VariableState::GetState() const {
|
||||
auto blob = make_blob_with_precision(desc_, InferenceEngine::CreateDefaultAllocator());
|
||||
blob->allocate();
|
||||
auto blobLock = std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(blob)->wmap();
|
||||
auto data = blobLock.as<char*>();
|
||||
IterateOverStates([&data, this](cldnn::network::VariableState &state) {
|
||||
cldnn::mem_lock<char, cldnn::mem_lock_type::read> lock { state.memory, engine_->get_program_stream() };
|
||||
std::copy(lock.begin(), lock.end(), data);
|
||||
data += state.memory->get_layout().bytes_count();
|
||||
});
|
||||
return blob;
|
||||
}
|
||||
|
||||
InferenceEngine::SizeVector VariableState::AggregateShape(const cldnn::layout &layout) {
|
||||
const auto& dims = layout.get_dims();
|
||||
InferenceEngine::SizeVector shape {dims.begin(), dims.end()};
|
||||
if (currentBatch_ != -1)
|
||||
shape.front() = currentBatch_;
|
||||
return shape;
|
||||
}
|
||||
|
||||
void VariableState::IterateOverStates(std::function<void(cldnn::network::VariableState&)> f) const {
|
||||
for (int i = 0; i < states_.size(); i++) {
|
||||
auto batch = 1 << i;
|
||||
if (batch & currentBatch_)
|
||||
f(*states_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
||||
192
src/plugins/intel_gpu/tests/test_cases/variable.cpp
Normal file
192
src/plugins/intel_gpu/tests/test_cases/variable.cpp
Normal file
@@ -0,0 +1,192 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/eltwise.hpp>
|
||||
#include <intel_gpu/primitives/assign.hpp>
|
||||
#include <intel_gpu/primitives/read_value.hpp>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
template<typename T>
|
||||
struct VariableParams {
|
||||
cldnn::layout layout;
|
||||
std::vector<T> values;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct variable_test : public ::testing::TestWithParam<VariableParams<T>> {
|
||||
void test() {
|
||||
const VariableParams<T> param = testing::TestWithParam<VariableParams<T>>::GetParam();
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const auto variable_layout = param.layout;
|
||||
const auto input_data = engine.allocate_memory(variable_layout);
|
||||
set_values(input_data, param.values);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input_data->get_layout()));
|
||||
topology.add(read_value{"read_value", {"input"}, "v0", variable_layout});
|
||||
topology.add(eltwise{"sum", {"input", "read_value"}, eltwise_mode::sum, {}, variable_layout.data_type});
|
||||
topology.add(assign{"assign", {"sum"}, "v0", variable_layout});
|
||||
|
||||
network network(engine, topology, build_options{}, false);
|
||||
network.assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
|
||||
network.set_input_data("input", input_data);
|
||||
|
||||
constexpr size_t number_of_inferences = 5;
|
||||
for (size_t inference = 1; inference <= number_of_inferences; ++inference) {
|
||||
const auto outputs = network.execute();
|
||||
const auto output = outputs.at("assign").get_memory();
|
||||
const cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
const auto output_count = output_ptr.size();
|
||||
ASSERT_EQ(output_count, param.values.size()) << "inference " << inference;
|
||||
|
||||
for (size_t i = 0; i < output_count; ++i) {
|
||||
ASSERT_EQ(output_ptr[i], inference * param.values[i]) << "inference " << inference;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using variable_test_i32 = variable_test<int32_t>;
|
||||
using variable_test_i64 = variable_test<int64_t>;
|
||||
using variable_test_f32 = variable_test<float>;
|
||||
|
||||
TEST_P(variable_test_i32, variable_i32) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
}
|
||||
|
||||
TEST_P(variable_test_i64, variable_i64) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
}
|
||||
|
||||
TEST_P(variable_test_f32, variable_f32) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
basic,
|
||||
variable_test_i32,
|
||||
::testing::Values(
|
||||
VariableParams<int32_t>{ {data_types::i32, format::bfyx, tensor{1}}, {333666} },
|
||||
VariableParams<int32_t>{ {data_types::i32, format::bfyx, tensor{1, 1, 1, 3}}, {444, 555, 666} },
|
||||
VariableParams<int32_t>{ {data_types::i32, format::bfzyx, tensor{1, 2, 3, 2}},
|
||||
{1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1} }
|
||||
)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
basic,
|
||||
variable_test_i64,
|
||||
::testing::Values(
|
||||
VariableParams<int64_t>{ {data_types::i64, format::bfyx, tensor{1}}, {333666L} },
|
||||
VariableParams<int64_t>{ {data_types::i64, format::bfyx, tensor{1, 1, 1, 3}}, {444L, 555L, 666L} },
|
||||
VariableParams<int64_t>{ {data_types::i64, format::bfzyx, tensor{1, 2, 3, 2}},
|
||||
{1L, 2L, 3L, 4L, 5L, 6L, 6L, 5L, 4L, 3L, 2L, 1L} }
|
||||
)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
basic,
|
||||
variable_test_f32,
|
||||
::testing::Values(
|
||||
VariableParams<float>{ {data_types::f32, format::bfyx, tensor{1}}, {333666.f} },
|
||||
VariableParams<float>{ {data_types::f32, format::bfyx, tensor{1, 1, 1, 3}}, {44.4f, 55.5f, 66.6f} },
|
||||
VariableParams<float>{ {data_types::f32, format::bfzyx, tensor{1, 2, 3, 2}},
|
||||
{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 6.f, 5.f, 4.f, 3.f, 2.f, 1.f} }
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
TEST(variable_test_common, exception_on_wrong_layout) {
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const layout variable_layout{data_types::i32, format::bfyx, tensor{1}};
|
||||
const auto input_data = engine.allocate_memory(variable_layout);
|
||||
set_values(input_data, {333666});
|
||||
|
||||
auto wrong_layout = variable_layout;
|
||||
wrong_layout.data_type = data_types::f32;
|
||||
const auto wrong_input_data = engine.allocate_memory(wrong_layout);
|
||||
set_values(input_data, {333.666});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input_data->get_layout()));
|
||||
topology.add(read_value{"read_value", {"input"}, "v0", variable_layout});
|
||||
topology.add(input_layout("wrong_input", wrong_input_data->get_layout()));
|
||||
topology.add(assign{"assign", {"wrong_input"}, "v0", wrong_layout});
|
||||
|
||||
network network(engine, topology, build_options{}, false);
|
||||
network.assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
|
||||
network.set_input_data("input", input_data);
|
||||
network.set_input_data("wrong_input", wrong_input_data);
|
||||
|
||||
bool layout_mismatch_exception = false;
|
||||
try {
|
||||
network.execute();
|
||||
} catch(std::exception& exc) {
|
||||
const std::string error = exc.what();
|
||||
layout_mismatch_exception = error.find("Layout mismatch") != std::string::npos;
|
||||
}
|
||||
ASSERT_TRUE(layout_mismatch_exception);
|
||||
}
|
||||
|
||||
TEST(variable_test_common, variables_are_preserved_across_inferences) {
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const layout variable_layout{data_types::i32, format::bfyx, tensor{1}};
|
||||
|
||||
const auto input_1 = engine.allocate_memory(variable_layout);
|
||||
constexpr auto value_1 = 333;
|
||||
set_values(input_1, {value_1});
|
||||
|
||||
const auto input_2 = engine.allocate_memory(variable_layout);
|
||||
constexpr auto value_2 = 666;
|
||||
set_values(input_2, {value_2});
|
||||
|
||||
const auto dummy1 = engine.allocate_memory(variable_layout);
|
||||
set_values(dummy1, {11});
|
||||
const auto dummy2 = engine.allocate_memory(variable_layout);
|
||||
set_values(dummy2, {22});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input_1", input_1->get_layout()));
|
||||
topology.add(assign{"assign_1", {"input_1"}, "v1", variable_layout});
|
||||
|
||||
topology.add(input_layout("input_2", input_2->get_layout()));
|
||||
topology.add(assign{"assign_2", {"input_2"}, "v2", variable_layout});
|
||||
|
||||
topology.add(data("dummy1", dummy1));
|
||||
topology.add(read_value{"read_value_1", {"dummy1"}, "v1", variable_layout});
|
||||
topology.add(read_value{"read_value_2", {"dummy1"}, "v2", variable_layout});
|
||||
|
||||
topology.add(eltwise{"sum", {"read_value_1", "read_value_2"}, eltwise_mode::sum, {}, variable_layout.data_type});
|
||||
topology.add(assign{"assign_result", {"sum"}, "v_result", variable_layout});
|
||||
|
||||
topology.add(data("dummy2", dummy2));
|
||||
topology.add(read_value{"read_result", {"dummy2"}, "v_result", variable_layout});
|
||||
|
||||
network network{engine, topology, build_options{}, true};
|
||||
network.assign_variables_memories({
|
||||
{ "v1", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
|
||||
{ "v2", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
|
||||
{ "v_result", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) }
|
||||
});
|
||||
network.set_input_data("input_1", input_1);
|
||||
network.set_input_data("input_2", input_2);
|
||||
|
||||
// set variables with assign on 1st inference, read with read_values on 2nd one
|
||||
network.execute();
|
||||
const auto outputs = network.execute();
|
||||
const auto output = outputs.at("read_result").get_memory();
|
||||
const cldnn::mem_lock<int> output_ptr(output, get_test_stream());
|
||||
ASSERT_EQ(output_ptr[0], value_1 + value_2);
|
||||
}
|
||||
175
src/tests/functional/plugin/gpu/behavior/memory_dyn_batch.cpp
Normal file
175
src/tests/functional/plugin/gpu/behavior/memory_dyn_batch.cpp
Normal file
@@ -0,0 +1,175 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph/opsets/opset8.hpp"
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "openvino/runtime/core.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include <ie_plugin_config.hpp>
|
||||
#include "functional_test_utils/ov_plugin_cache.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace opset8;
|
||||
using namespace ov::test;
|
||||
|
||||
|
||||
using MemoryDynamicBatchParams = std::tuple<
|
||||
ov::PartialShape, // Partial shape for network initialization
|
||||
ov::Shape, // Actual shape to be passed to inference request
|
||||
int, // Iterations number
|
||||
std::string>; // Device name
|
||||
|
||||
class MemoryDynamicBatch : public ::testing::Test,
|
||||
public ::testing::WithParamInterface<MemoryDynamicBatchParams> {
|
||||
public:
|
||||
static std::string getTestCaseName(::testing::TestParamInfo<MemoryDynamicBatchParams> obj) {
|
||||
ov::PartialShape inputPartialShape;
|
||||
ov::Shape inputShape;
|
||||
int iterationsNum;
|
||||
std::string targetDevice;
|
||||
std::tie(inputPartialShape, inputShape, iterationsNum, targetDevice) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=";
|
||||
result << CommonTestUtils::partialShape2str({ inputPartialShape }) << "_";
|
||||
result << "TS=";
|
||||
result << CommonTestUtils::partialShape2str({inputShape});
|
||||
result << ")_";
|
||||
result << "iterationsCount=" << iterationsNum << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPartialShape_, inputShape_, iterationsNum_, deviceName_) = GetParam();
|
||||
model_ = buildModel(precision_, inputPartialShape_);
|
||||
core_ = ov::test::utils::PluginCache::get().core();
|
||||
}
|
||||
|
||||
static std::shared_ptr<ov::Model> buildModel(ElementType precision, const ov::PartialShape& shape) {
|
||||
auto param = builder::makeDynamicParams(precision, { shape });
|
||||
const VariableInfo variable_info { shape, precision, "v0" };
|
||||
auto variable = std::make_shared<Variable>(variable_info);
|
||||
auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
|
||||
auto add = std::make_shared<Add>(read_value, param.at(0));
|
||||
auto assign = std::make_shared<Assign>(add, variable);
|
||||
auto res = std::make_shared<Result>(add);
|
||||
return std::make_shared<ov::Model>(ResultVector { res }, SinkVector { assign }, param,
|
||||
"MemoryDynamicBatchTest");
|
||||
}
|
||||
|
||||
static std::vector<int> generateInput(const ov::Shape& shape) {
|
||||
auto len = ov::shape_size(shape);
|
||||
std::vector<int> result {};
|
||||
result.reserve(len);
|
||||
for (int i = 0; i < len; i++)
|
||||
result.push_back(i);
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::vector<int> calculateReference(const std::vector<int>& input, int iterations) {
|
||||
std::vector<int> reference {};
|
||||
reference.reserve(input.size());
|
||||
std::transform(input.begin(), input.end(), std::back_inserter(reference), [iterations](const int &i) {
|
||||
return i * iterations;
|
||||
});
|
||||
return reference;
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
ov::PartialShape inputPartialShape_;
|
||||
ov::Shape inputShape_;
|
||||
int iterationsNum_;
|
||||
std::string deviceName_;
|
||||
std::shared_ptr<ov::Model> model_;
|
||||
std::shared_ptr<ov::Core> core_;
|
||||
std::vector<int> input_;
|
||||
ElementType precision_ { ElementType::i32 };
|
||||
};
|
||||
|
||||
TEST_P(MemoryDynamicBatch, MultipleInferencesOnTheSameInferRequest) {
|
||||
auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
|
||||
auto inferRequest = compiledModel.create_infer_request();
|
||||
input_ = generateInput(inputShape_);
|
||||
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.set_input_tensor(inputTensor);
|
||||
for (int i = 0; i < iterationsNum_; i++)
|
||||
inferRequest.infer();
|
||||
auto output = inferRequest.get_output_tensor(0);
|
||||
std::vector<int> reference = calculateReference(input_, iterationsNum_);
|
||||
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
|
||||
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
|
||||
actualIt++, referenceIt++)
|
||||
EXPECT_EQ(*actualIt, *referenceIt);
|
||||
}
|
||||
|
||||
TEST_P(MemoryDynamicBatch, ResetVariableState) {
|
||||
auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
|
||||
auto inferRequest = compiledModel.create_infer_request();
|
||||
input_ = generateInput(inputShape_);
|
||||
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.set_input_tensor(inputTensor);
|
||||
inferRequest.infer();
|
||||
inferRequest.query_state().front().reset();
|
||||
inferRequest.infer();
|
||||
auto output = inferRequest.get_output_tensor(0);
|
||||
std::vector<int> reference = calculateReference(input_, 1);
|
||||
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
|
||||
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
|
||||
actualIt++, referenceIt++)
|
||||
EXPECT_EQ(*actualIt, *referenceIt);
|
||||
}
|
||||
|
||||
TEST_P(MemoryDynamicBatch, GetVariableState) {
|
||||
auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
|
||||
auto inferRequest = compiledModel.create_infer_request();
|
||||
input_ = generateInput(inputShape_);
|
||||
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.set_input_tensor(inputTensor);
|
||||
for (int i = 0; i < iterationsNum_; i++)
|
||||
inferRequest.infer();
|
||||
auto blob = inferRequest.query_state().front().get_state();
|
||||
std::vector<int> reference = calculateReference(input_, iterationsNum_);
|
||||
std::vector<int> actual(blob.data<int>(), blob.data<int>() + blob.get_size());
|
||||
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
|
||||
actualIt++, referenceIt++)
|
||||
EXPECT_EQ(*actualIt, *referenceIt);
|
||||
}
|
||||
|
||||
TEST_P(MemoryDynamicBatch, SetVariableState) {
|
||||
auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
|
||||
auto inferRequest = compiledModel.create_infer_request();
|
||||
input_ = generateInput(inputShape_);
|
||||
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.set_input_tensor(inputTensor);
|
||||
ov::Tensor state = ov::Tensor(precision_, inputShape_, input_.data());
|
||||
inferRequest.query_state().front().set_state(state);
|
||||
for (int i = 0; i < iterationsNum_; i++)
|
||||
inferRequest.infer();
|
||||
auto output = inferRequest.get_output_tensor(0);
|
||||
std::vector<int> reference = calculateReference(input_, iterationsNum_ + 1);
|
||||
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
|
||||
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
|
||||
actualIt++, referenceIt++)
|
||||
EXPECT_EQ(*actualIt, *referenceIt);
|
||||
}
|
||||
|
||||
ov::PartialShape networkPartialShape { {1, 19}, 4, 20, 20 };
|
||||
std::vector<ov::Shape> inputShapes { { 7, 4, 20, 20 }, { 19, 4, 20, 20 } };
|
||||
std::vector<int> iterationsNum { 3, 7 };
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MemoryDynamicBatch, MemoryDynamicBatch,
|
||||
::testing::Combine(
|
||||
::testing::Values(networkPartialShape),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::ValuesIn(iterationsNum),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
MemoryDynamicBatch::getTestCaseName);
|
||||
@@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/memory.h"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<InferenceEngine::SizeVector> inShapes = {
|
||||
{1},
|
||||
{3},
|
||||
{3, 3, 3},
|
||||
{2, 3, 4, 5},
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
||||
InferenceEngine::Precision::I32,
|
||||
InferenceEngine::Precision::FP32,
|
||||
};
|
||||
|
||||
const std::vector<int64_t> iterationCount {1, 3, 10};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MemoryTest, MemoryTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(ngraph::helpers::MemoryTransformation::NONE),
|
||||
::testing::ValuesIn(iterationCount),
|
||||
::testing::ValuesIn(inShapes),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
MemoryTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
@@ -28,6 +28,7 @@ public:
|
||||
protected:
|
||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
|
||||
void SetUp() override;
|
||||
void Infer() override;
|
||||
private:
|
||||
void CreateTIFunc();
|
||||
void CreateCommonFunc();
|
||||
|
||||
@@ -82,6 +82,7 @@ namespace LayerTestsDefinitions {
|
||||
ConfigureNetwork();
|
||||
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||
}
|
||||
inferRequest = executableNetwork.CreateInferRequest();
|
||||
GenerateInputs();
|
||||
for (int64_t i = 0; i < iteration_count; ++i) {
|
||||
Infer();
|
||||
@@ -101,6 +102,11 @@ namespace LayerTestsDefinitions {
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryTest::Infer() {
|
||||
ConfigureInferRequest();
|
||||
inferRequest.Infer();
|
||||
}
|
||||
|
||||
std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
|
||||
using namespace ngraph;
|
||||
function->validate_nodes_and_infer_types();
|
||||
@@ -177,7 +183,9 @@ namespace LayerTestsDefinitions {
|
||||
|
||||
void MemoryTest::CreateCommonFunc() {
|
||||
auto param = builder::makeParams(ngPrc, {inputShape});
|
||||
auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
|
||||
const auto variable_info = targetDevice == CommonTestUtils::DEVICE_GPU ?
|
||||
VariableInfo{Shape{inputShape}, ngPrc, "v0"} : VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"};
|
||||
auto variable = std::make_shared<Variable>(variable_info);
|
||||
auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
|
||||
auto add = std::make_shared<Add>(read_value, param.at(0));
|
||||
auto assign = std::make_shared<Assign>(add, variable);
|
||||
|
||||
Reference in New Issue
Block a user