[GPU] Fixed deserialization logic for dynamic batch (#17486)
* deserialization of dynamic batch * updated multi stream tests * added unit tests * updated cache dir name * resolved type conversion warning * removed teardown() * added const
This commit is contained in:
parent
fac6668ed1
commit
7c84a586f9
@ -7,6 +7,7 @@
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include "buffer.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "bind.hpp"
|
||||
@ -36,32 +37,43 @@ private:
|
||||
class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
|
||||
public:
|
||||
BinaryInputBuffer(std::istream& stream, engine& engine)
|
||||
: InputBuffer(this, engine), stream(stream), _impl_params(nullptr) {}
|
||||
: InputBuffer(this, engine), _stream(stream), _impl_params(nullptr), _num_networks(0), _stream_id(0) {}
|
||||
|
||||
void read(void* const data, std::streamsize size) {
|
||||
auto const read_size = stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
|
||||
auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
|
||||
OPENVINO_ASSERT(read_size == size,
|
||||
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
|
||||
}
|
||||
|
||||
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
|
||||
void* getKernelImplParams() const { return _impl_params; }
|
||||
void addConstData(const std::string& prim_id, const std::shared_ptr<memory> mem_ptr) {
|
||||
OPENVINO_ASSERT(_const_data_map.find(prim_id) == _const_data_map.end(), "[GPU] duplicated primitive id " + prim_id);
|
||||
_const_data_map[prim_id] = mem_ptr;
|
||||
void addConstData(const uint32_t net_id, const std::string& prim_id, const std::shared_ptr<memory> mem_ptr) {
|
||||
while (_const_data_map.size() <= net_id) {
|
||||
_const_data_map.emplace_back(std::unordered_map<std::string, std::shared_ptr<memory>>());
|
||||
}
|
||||
OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) == _const_data_map[net_id].end(), "[GPU] duplicated primitive id " + prim_id);
|
||||
_const_data_map[net_id][prim_id] = mem_ptr;
|
||||
}
|
||||
std::shared_ptr<memory> getConstData(const std::string& prim_id) {
|
||||
OPENVINO_ASSERT(_const_data_map.find(prim_id) != _const_data_map.end(), "[GPU] Not found primitive id " + prim_id);
|
||||
return _const_data_map[prim_id];
|
||||
std::shared_ptr<memory> getConstData(const uint32_t net_id, const std::string& prim_id) {
|
||||
OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) != _const_data_map[net_id].end(), "[GPU] Not found primitive id " + prim_id);
|
||||
return _const_data_map[net_id][prim_id];
|
||||
}
|
||||
|
||||
std::streampos tellg() { return stream.tellg(); }
|
||||
void seekg(std::streampos pos) { stream.seekg(pos); }
|
||||
std::streampos tellg() { return _stream.tellg(); }
|
||||
void seekg(std::streampos pos) { _stream.seekg(pos); }
|
||||
|
||||
void new_network_added() { _num_networks += 1; }
|
||||
int get_num_networks() const { return _num_networks; }
|
||||
|
||||
void set_stream_id(uint16_t stream_id) { _stream_id = stream_id; }
|
||||
uint16_t get_stream_id() const { return _stream_id; }
|
||||
|
||||
private:
|
||||
std::istream& stream;
|
||||
std::istream& _stream;
|
||||
void* _impl_params;
|
||||
std::unordered_map<std::string, std::shared_ptr<memory>> _const_data_map;
|
||||
std::vector<std::unordered_map<std::string, std::shared_ptr<memory>>> _const_data_map;
|
||||
int _num_networks;
|
||||
uint16_t _stream_id;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
@ -26,7 +26,8 @@ public:
|
||||
|
||||
CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config);
|
||||
CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
|
||||
void Export(std::ostream& networkModel) override;
|
||||
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
|
||||
|
@ -46,7 +46,12 @@ public:
|
||||
uint16_t stream_id = 0,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr,
|
||||
InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
Graph(cldnn::BinaryInputBuffer& ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id = 0);
|
||||
Graph(cldnn::BinaryInputBuffer& ib,
|
||||
RemoteContextImpl::Ptr context,
|
||||
const ExecutionConfig& config,
|
||||
uint16_t stream_id = 0,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr,
|
||||
InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
|
||||
void Export(cldnn::BinaryOutputBuffer &ob);
|
||||
std::shared_ptr<ngraph::Function> GetExecGraphInfo();
|
||||
|
@ -84,12 +84,8 @@ public:
|
||||
Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
|
||||
bool createTopologyOnly = false, bool partialBuild = false,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
Program(cldnn::engine& engine, const ExecutionConfig& config)
|
||||
: m_max_batch(1)
|
||||
, m_curBatch(-1)
|
||||
, m_config(config)
|
||||
, m_engine(engine)
|
||||
, queryMode(false) {}
|
||||
Program(cldnn::engine& engine, const ExecutionConfig& config,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
|
||||
static const cldnn::primitive_id m_preProcessTag;
|
||||
static const cldnn::primitive_id m_meanValuesTag;
|
||||
|
@ -87,7 +87,7 @@ void data_inst::load(BinaryInputBuffer& ib) {
|
||||
ib >> make_data(&data_size, sizeof(size_t));
|
||||
|
||||
if (!get_network().is_primary_stream()) {
|
||||
_outputs[0] = ib.getConstData(id());
|
||||
_outputs[0] = ib.getConstData(get_network_id() - (ib.get_num_networks() * ib.get_stream_id()), id());
|
||||
auto pos = ib.tellg();
|
||||
pos += data_size;
|
||||
ib.seekg(pos);
|
||||
@ -103,7 +103,7 @@ void data_inst::load(BinaryInputBuffer& ib) {
|
||||
_outputs[0]->copy_from(get_network().get_stream(), _buf.data());
|
||||
}
|
||||
|
||||
ib.addConstData(id(), _outputs[0]);
|
||||
ib.addConstData(get_network_id(), id(), _outputs[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -371,6 +371,8 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
|
||||
, _is_primary_stream(is_primary_stream)
|
||||
, _reset_arguments(true) {
|
||||
net_id = get_unique_net_id();
|
||||
if (is_primary_stream)
|
||||
ib.new_network_added();
|
||||
|
||||
kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""});
|
||||
ib >> kernels_cache;
|
||||
|
@ -68,7 +68,11 @@ CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network,
|
||||
}
|
||||
}
|
||||
|
||||
CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config) :
|
||||
CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib,
|
||||
InferenceEngine::RemoteContext::Ptr context,
|
||||
const ExecutionConfig& config,
|
||||
InferenceEngine::InputsDataMap* inputs,
|
||||
InferenceEngine::OutputsDataMap* outputs) :
|
||||
InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr {
|
||||
if (config.get_property(ov::intel_gpu::exclusive_async_requests)) {
|
||||
//exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
|
||||
@ -90,7 +94,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::Remo
|
||||
auto pos = ib.tellg();
|
||||
for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
|
||||
ib.seekg(pos);
|
||||
auto graph = std::make_shared<Graph>(ib, context_impl, m_config, n);
|
||||
auto graph = std::make_shared<Graph>(ib, context_impl, m_config, n, inputs, outputs);
|
||||
m_graphs.push_back(graph);
|
||||
}
|
||||
}
|
||||
|
@ -57,12 +57,14 @@ Graph::Graph(InferenceEngine::CNNNetwork& network, RemoteContextImpl::Ptr contex
|
||||
Build();
|
||||
}
|
||||
|
||||
Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id)
|
||||
Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id,
|
||||
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
|
||||
: m_context(context)
|
||||
, m_config(config)
|
||||
, m_stream_id(stream_id)
|
||||
, m_state(0) {
|
||||
m_program = std::make_shared<Program>(get_engine(), config);
|
||||
m_program = std::make_shared<Program>(get_engine(), config, inputs, outputs);
|
||||
ib >> m_program->m_max_batch;
|
||||
if (m_program->m_max_batch > 1)
|
||||
m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch));
|
||||
|
||||
@ -105,6 +107,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const
|
||||
size_t num_networks;
|
||||
ib >> num_networks;
|
||||
for (size_t i = 0; i < num_networks; ++i) {
|
||||
ib.set_stream_id(m_stream_id);
|
||||
m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id == 0));
|
||||
}
|
||||
}
|
||||
@ -506,6 +509,8 @@ std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::v
|
||||
// [ ov::intel_gpu::Graph::outputDims ]
|
||||
// [ cldnn::network ]
|
||||
void Graph::Export(cldnn::BinaryOutputBuffer &ob) {
|
||||
ob << m_program->m_max_batch;
|
||||
|
||||
bool need_onednn_engine = false;
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
try {
|
||||
|
@ -531,7 +531,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istr
|
||||
auto transformedNetwork = GetCore()->ReadNetwork(xmlString, std::move(dataBlob), true);
|
||||
exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, config, &inputs, &outputs);
|
||||
} else {
|
||||
exeNetwork = std::make_shared<CompiledModel>(ib, context, config);
|
||||
exeNetwork = std::make_shared<CompiledModel>(ib, context, config, &inputs, &outputs);
|
||||
exeNetwork->SetPointerToPlugin(shared_from_this());
|
||||
}
|
||||
|
||||
|
@ -305,6 +305,19 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co
|
||||
}
|
||||
}
|
||||
|
||||
Program::Program(cldnn::engine& engine, const ExecutionConfig& config,
|
||||
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
|
||||
: m_max_batch(1)
|
||||
, m_curBatch(-1)
|
||||
, m_config(config)
|
||||
, m_engine(engine)
|
||||
, queryMode(false) {
|
||||
if (inputs != nullptr)
|
||||
m_networkInputs = *inputs;
|
||||
if (outputs != nullptr)
|
||||
m_networkOutputs = *outputs;
|
||||
}
|
||||
|
||||
int Program::GetMaxBatchSizeForSingleProgram() {
|
||||
auto max_dynamic_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch);
|
||||
if (max_dynamic_batch > 1) {
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "openvino/runtime/core.hpp"
|
||||
#include <common_test_utils/test_common.hpp>
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "common_test_utils/file_utils.hpp"
|
||||
#include "functional_test_utils/skip_tests_config.hpp"
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
@ -55,9 +56,7 @@ public:
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
void TearDown() override {
|
||||
core.reset();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
if (core)
|
||||
@ -86,6 +85,33 @@ TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound) {
|
||||
run();
|
||||
}
|
||||
|
||||
TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound_cached) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
std::string cacheFolderName;
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "InferDynamicBatchBound_cached_" << netPrecision << "_" << targetDevice;
|
||||
cacheFolderName = ss.str();
|
||||
|
||||
CommonTestUtils::removeFilesWithExt(cacheFolderName, "blob");
|
||||
CommonTestUtils::removeFilesWithExt(cacheFolderName, "cl_cache");
|
||||
CommonTestUtils::removeDir(cacheFolderName);
|
||||
|
||||
core = std::make_shared<ov::Core>();
|
||||
core->set_property(ov::cache_dir(cacheFolderName));
|
||||
run();
|
||||
}
|
||||
{
|
||||
core = std::make_shared<ov::Core>();
|
||||
core->set_property(ov::cache_dir(cacheFolderName));
|
||||
run();
|
||||
|
||||
CommonTestUtils::removeFilesWithExt(cacheFolderName, "blob");
|
||||
CommonTestUtils::removeFilesWithExt(cacheFolderName, "cl_cache");
|
||||
CommonTestUtils::removeDir(cacheFolderName);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
auto config = []() {
|
||||
return ov::AnyMap{};
|
||||
|
@ -127,7 +127,6 @@ public:
|
||||
|
||||
cldnn::network::ptr network;
|
||||
if (is_caching_test) {
|
||||
std::cout << "cached" << std::endl;
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, tp, get_test_default_config(engine));
|
||||
|
@ -78,7 +78,6 @@ public:
|
||||
cldnn::network::ptr network0;
|
||||
cldnn::network::ptr network1;
|
||||
if (is_caching_test) {
|
||||
std::cout << "cached" << std::endl;
|
||||
membuf mem_buf;
|
||||
{
|
||||
auto prog = program::build_program(engine, topology, get_test_default_config(engine));
|
||||
@ -96,6 +95,7 @@ public:
|
||||
auto pos = ib.tellg();
|
||||
network0 = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine, true);
|
||||
ib.seekg(pos);
|
||||
ib.set_stream_id(1);
|
||||
network1 = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine, false);
|
||||
}
|
||||
}
|
||||
@ -172,7 +172,6 @@ public:
|
||||
cldnn::network::ptr network0;
|
||||
cldnn::network::ptr network1;
|
||||
if (is_caching_test) {
|
||||
std::cout << "cached" << std::endl;
|
||||
membuf mem_buf0;
|
||||
membuf mem_buf1;
|
||||
{
|
||||
@ -194,12 +193,13 @@ public:
|
||||
{
|
||||
std::istream in_mem0(&mem_buf0);
|
||||
BinaryInputBuffer ib0 = BinaryInputBuffer(in_mem0, engine);
|
||||
network0 = std::make_shared<cldnn::network>(ib0, get_test_stream_ptr(), engine, 0);
|
||||
network0 = std::make_shared<cldnn::network>(ib0, get_test_stream_ptr(), engine, false);
|
||||
}
|
||||
{
|
||||
std::istream in_mem1(&mem_buf1);
|
||||
BinaryInputBuffer ib1 = BinaryInputBuffer(in_mem1, engine);
|
||||
network1 = std::make_shared<cldnn::network>(ib1, get_test_stream_ptr(), engine, 1);
|
||||
ib1.set_stream_id(1);
|
||||
network1 = std::make_shared<cldnn::network>(ib1, get_test_stream_ptr(), engine, true);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user