[GPU] Fixed deserialization logic for dynamic batch (#17486)

* deserialization of dynamic batch

* updated multi stream tests

* added unit tests

* updated cache dir name

* resolved type conversion warning

* removed teardown()

* added const
This commit is contained in:
Eddy Kim 2023-05-19 07:40:04 +09:00 committed by GitHub
parent fac6668ed1
commit 7c84a586f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 98 additions and 35 deletions

View File

@ -7,6 +7,7 @@
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <type_traits> #include <type_traits>
#include <vector>
#include "buffer.hpp" #include "buffer.hpp"
#include "helpers.hpp" #include "helpers.hpp"
#include "bind.hpp" #include "bind.hpp"
@ -36,32 +37,43 @@ private:
class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> { class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
public: public:
BinaryInputBuffer(std::istream& stream, engine& engine) BinaryInputBuffer(std::istream& stream, engine& engine)
: InputBuffer(this, engine), stream(stream), _impl_params(nullptr) {} : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr), _num_networks(0), _stream_id(0) {}
void read(void* const data, std::streamsize size) { void read(void* const data, std::streamsize size) {
auto const read_size = stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size); auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
OPENVINO_ASSERT(read_size == size, OPENVINO_ASSERT(read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
} }
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; } void* getKernelImplParams() const { return _impl_params; }
void addConstData(const std::string& prim_id, const std::shared_ptr<memory> mem_ptr) { void addConstData(const uint32_t net_id, const std::string& prim_id, const std::shared_ptr<memory> mem_ptr) {
OPENVINO_ASSERT(_const_data_map.find(prim_id) == _const_data_map.end(), "[GPU] duplicated primitive id " + prim_id); while (_const_data_map.size() <= net_id) {
_const_data_map[prim_id] = mem_ptr; _const_data_map.emplace_back(std::unordered_map<std::string, std::shared_ptr<memory>>());
}
OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) == _const_data_map[net_id].end(), "[GPU] duplicated primitive id " + prim_id);
_const_data_map[net_id][prim_id] = mem_ptr;
} }
std::shared_ptr<memory> getConstData(const std::string& prim_id) { std::shared_ptr<memory> getConstData(const uint32_t net_id, const std::string& prim_id) {
OPENVINO_ASSERT(_const_data_map.find(prim_id) != _const_data_map.end(), "[GPU] Not found primitive id " + prim_id); OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) != _const_data_map[net_id].end(), "[GPU] Not found primitive id " + prim_id);
return _const_data_map[prim_id]; return _const_data_map[net_id][prim_id];
} }
std::streampos tellg() { return stream.tellg(); } std::streampos tellg() { return _stream.tellg(); }
void seekg(std::streampos pos) { stream.seekg(pos); } void seekg(std::streampos pos) { _stream.seekg(pos); }
void new_network_added() { _num_networks += 1; }
int get_num_networks() const { return _num_networks; }
void set_stream_id(uint16_t stream_id) { _stream_id = stream_id; }
uint16_t get_stream_id() const { return _stream_id; }
private: private:
std::istream& stream; std::istream& _stream;
void* _impl_params; void* _impl_params;
std::unordered_map<std::string, std::shared_ptr<memory>> _const_data_map; std::vector<std::unordered_map<std::string, std::shared_ptr<memory>>> _const_data_map;
int _num_networks;
uint16_t _stream_id;
}; };
template <typename T> template <typename T>

View File

@ -26,7 +26,8 @@ public:
CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config, CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config,
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config); CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config,
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
void Export(std::ostream& networkModel) override; void Export(std::ostream& networkModel) override;
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override; std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;

View File

@ -46,7 +46,12 @@ public:
uint16_t stream_id = 0, uint16_t stream_id = 0,
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::InputsDataMap* inputs = nullptr,
InferenceEngine::OutputsDataMap* outputs = nullptr); InferenceEngine::OutputsDataMap* outputs = nullptr);
Graph(cldnn::BinaryInputBuffer& ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id = 0); Graph(cldnn::BinaryInputBuffer& ib,
RemoteContextImpl::Ptr context,
const ExecutionConfig& config,
uint16_t stream_id = 0,
InferenceEngine::InputsDataMap* inputs = nullptr,
InferenceEngine::OutputsDataMap* outputs = nullptr);
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0); explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
void Export(cldnn::BinaryOutputBuffer &ob); void Export(cldnn::BinaryOutputBuffer &ob);
std::shared_ptr<ngraph::Function> GetExecGraphInfo(); std::shared_ptr<ngraph::Function> GetExecGraphInfo();

View File

@ -84,12 +84,8 @@ public:
Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config, Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
bool createTopologyOnly = false, bool partialBuild = false, bool createTopologyOnly = false, bool partialBuild = false,
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
Program(cldnn::engine& engine, const ExecutionConfig& config) Program(cldnn::engine& engine, const ExecutionConfig& config,
: m_max_batch(1) InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
, m_curBatch(-1)
, m_config(config)
, m_engine(engine)
, queryMode(false) {}
static const cldnn::primitive_id m_preProcessTag; static const cldnn::primitive_id m_preProcessTag;
static const cldnn::primitive_id m_meanValuesTag; static const cldnn::primitive_id m_meanValuesTag;

View File

@ -87,7 +87,7 @@ void data_inst::load(BinaryInputBuffer& ib) {
ib >> make_data(&data_size, sizeof(size_t)); ib >> make_data(&data_size, sizeof(size_t));
if (!get_network().is_primary_stream()) { if (!get_network().is_primary_stream()) {
_outputs[0] = ib.getConstData(id()); _outputs[0] = ib.getConstData(get_network_id() - (ib.get_num_networks() * ib.get_stream_id()), id());
auto pos = ib.tellg(); auto pos = ib.tellg();
pos += data_size; pos += data_size;
ib.seekg(pos); ib.seekg(pos);
@ -103,7 +103,7 @@ void data_inst::load(BinaryInputBuffer& ib) {
_outputs[0]->copy_from(get_network().get_stream(), _buf.data()); _outputs[0]->copy_from(get_network().get_stream(), _buf.data());
} }
ib.addConstData(id(), _outputs[0]); ib.addConstData(get_network_id(), id(), _outputs[0]);
} }
} }

View File

@ -371,6 +371,8 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
, _is_primary_stream(is_primary_stream) , _is_primary_stream(is_primary_stream)
, _reset_arguments(true) { , _reset_arguments(true) {
net_id = get_unique_net_id(); net_id = get_unique_net_id();
if (is_primary_stream)
ib.new_network_added();
kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""}); kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""});
ib >> kernels_cache; ib >> kernels_cache;

View File

@ -68,7 +68,11 @@ CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network,
} }
} }
CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config) : CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib,
InferenceEngine::RemoteContext::Ptr context,
const ExecutionConfig& config,
InferenceEngine::InputsDataMap* inputs,
InferenceEngine::OutputsDataMap* outputs) :
InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr { InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr {
if (config.get_property(ov::intel_gpu::exclusive_async_requests)) { if (config.get_property(ov::intel_gpu::exclusive_async_requests)) {
//exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
@ -90,7 +94,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::Remo
auto pos = ib.tellg(); auto pos = ib.tellg();
for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
ib.seekg(pos); ib.seekg(pos);
auto graph = std::make_shared<Graph>(ib, context_impl, m_config, n); auto graph = std::make_shared<Graph>(ib, context_impl, m_config, n, inputs, outputs);
m_graphs.push_back(graph); m_graphs.push_back(graph);
} }
} }

View File

@ -57,12 +57,14 @@ Graph::Graph(InferenceEngine::CNNNetwork& network, RemoteContextImpl::Ptr contex
Build(); Build();
} }
Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id) Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id,
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
: m_context(context) : m_context(context)
, m_config(config) , m_config(config)
, m_stream_id(stream_id) , m_stream_id(stream_id)
, m_state(0) { , m_state(0) {
m_program = std::make_shared<Program>(get_engine(), config); m_program = std::make_shared<Program>(get_engine(), config, inputs, outputs);
ib >> m_program->m_max_batch;
if (m_program->m_max_batch > 1) if (m_program->m_max_batch > 1)
m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch)); m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch));
@ -105,6 +107,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const
size_t num_networks; size_t num_networks;
ib >> num_networks; ib >> num_networks;
for (size_t i = 0; i < num_networks; ++i) { for (size_t i = 0; i < num_networks; ++i) {
ib.set_stream_id(m_stream_id);
m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id == 0)); m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id == 0));
} }
} }
@ -506,6 +509,8 @@ std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::v
// [ ov::intel_gpu::Graph::outputDims ] // [ ov::intel_gpu::Graph::outputDims ]
// [ cldnn::network ] // [ cldnn::network ]
void Graph::Export(cldnn::BinaryOutputBuffer &ob) { void Graph::Export(cldnn::BinaryOutputBuffer &ob) {
ob << m_program->m_max_batch;
bool need_onednn_engine = false; bool need_onednn_engine = false;
#ifdef ENABLE_ONEDNN_FOR_GPU #ifdef ENABLE_ONEDNN_FOR_GPU
try { try {

View File

@ -531,7 +531,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istr
auto transformedNetwork = GetCore()->ReadNetwork(xmlString, std::move(dataBlob), true); auto transformedNetwork = GetCore()->ReadNetwork(xmlString, std::move(dataBlob), true);
exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, config, &inputs, &outputs); exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, config, &inputs, &outputs);
} else { } else {
exeNetwork = std::make_shared<CompiledModel>(ib, context, config); exeNetwork = std::make_shared<CompiledModel>(ib, context, config, &inputs, &outputs);
exeNetwork->SetPointerToPlugin(shared_from_this()); exeNetwork->SetPointerToPlugin(shared_from_this());
} }

View File

@ -305,6 +305,19 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co
} }
} }
Program::Program(cldnn::engine& engine, const ExecutionConfig& config,
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
: m_max_batch(1)
, m_curBatch(-1)
, m_config(config)
, m_engine(engine)
, queryMode(false) {
if (inputs != nullptr)
m_networkInputs = *inputs;
if (outputs != nullptr)
m_networkOutputs = *outputs;
}
int Program::GetMaxBatchSizeForSingleProgram() { int Program::GetMaxBatchSizeForSingleProgram() {
auto max_dynamic_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch); auto max_dynamic_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch);
if (max_dynamic_batch > 1) { if (max_dynamic_batch > 1) {

View File

@ -5,6 +5,7 @@
#include "openvino/runtime/core.hpp" #include "openvino/runtime/core.hpp"
#include <common_test_utils/test_common.hpp> #include <common_test_utils/test_common.hpp>
#include "common_test_utils/common_utils.hpp" #include "common_test_utils/common_utils.hpp"
#include "common_test_utils/file_utils.hpp"
#include "functional_test_utils/skip_tests_config.hpp" #include "functional_test_utils/skip_tests_config.hpp"
#include "ngraph_functions/subgraph_builders.hpp" #include "ngraph_functions/subgraph_builders.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp" #include "shared_test_classes/base/ov_subgraph.hpp"
@ -55,9 +56,7 @@ public:
} }
return result.str(); return result.str();
} }
void TearDown() override {
core.reset();
}
protected: protected:
void SetUp() override { void SetUp() override {
if (core) if (core)
@ -86,6 +85,33 @@ TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound) {
run(); run();
} }
TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound_cached) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
std::string cacheFolderName;
{
std::stringstream ss;
ss << "InferDynamicBatchBound_cached_" << netPrecision << "_" << targetDevice;
cacheFolderName = ss.str();
CommonTestUtils::removeFilesWithExt(cacheFolderName, "blob");
CommonTestUtils::removeFilesWithExt(cacheFolderName, "cl_cache");
CommonTestUtils::removeDir(cacheFolderName);
core = std::make_shared<ov::Core>();
core->set_property(ov::cache_dir(cacheFolderName));
run();
}
{
core = std::make_shared<ov::Core>();
core->set_property(ov::cache_dir(cacheFolderName));
run();
CommonTestUtils::removeFilesWithExt(cacheFolderName, "blob");
CommonTestUtils::removeFilesWithExt(cacheFolderName, "cl_cache");
CommonTestUtils::removeDir(cacheFolderName);
}
}
namespace { namespace {
auto config = []() { auto config = []() {
return ov::AnyMap{}; return ov::AnyMap{};

View File

@ -127,7 +127,6 @@ public:
cldnn::network::ptr network; cldnn::network::ptr network;
if (is_caching_test) { if (is_caching_test) {
std::cout << "cached" << std::endl;
membuf mem_buf; membuf mem_buf;
{ {
cldnn::network _network(engine, tp, get_test_default_config(engine)); cldnn::network _network(engine, tp, get_test_default_config(engine));

View File

@ -78,7 +78,6 @@ public:
cldnn::network::ptr network0; cldnn::network::ptr network0;
cldnn::network::ptr network1; cldnn::network::ptr network1;
if (is_caching_test) { if (is_caching_test) {
std::cout << "cached" << std::endl;
membuf mem_buf; membuf mem_buf;
{ {
auto prog = program::build_program(engine, topology, get_test_default_config(engine)); auto prog = program::build_program(engine, topology, get_test_default_config(engine));
@ -96,6 +95,7 @@ public:
auto pos = ib.tellg(); auto pos = ib.tellg();
network0 = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine, true); network0 = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine, true);
ib.seekg(pos); ib.seekg(pos);
ib.set_stream_id(1);
network1 = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine, false); network1 = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine, false);
} }
} }
@ -172,7 +172,6 @@ public:
cldnn::network::ptr network0; cldnn::network::ptr network0;
cldnn::network::ptr network1; cldnn::network::ptr network1;
if (is_caching_test) { if (is_caching_test) {
std::cout << "cached" << std::endl;
membuf mem_buf0; membuf mem_buf0;
membuf mem_buf1; membuf mem_buf1;
{ {
@ -194,12 +193,13 @@ public:
{ {
std::istream in_mem0(&mem_buf0); std::istream in_mem0(&mem_buf0);
BinaryInputBuffer ib0 = BinaryInputBuffer(in_mem0, engine); BinaryInputBuffer ib0 = BinaryInputBuffer(in_mem0, engine);
network0 = std::make_shared<cldnn::network>(ib0, get_test_stream_ptr(), engine, 0); network0 = std::make_shared<cldnn::network>(ib0, get_test_stream_ptr(), engine, false);
} }
{ {
std::istream in_mem1(&mem_buf1); std::istream in_mem1(&mem_buf1);
BinaryInputBuffer ib1 = BinaryInputBuffer(in_mem1, engine); BinaryInputBuffer ib1 = BinaryInputBuffer(in_mem1, engine);
network1 = std::make_shared<cldnn::network>(ib1, get_test_stream_ptr(), engine, 1); ib1.set_stream_id(1);
network1 = std::make_shared<cldnn::network>(ib1, get_test_stream_ptr(), engine, true);
} }
} }
} else { } else {