From 7c84a586f9bf8fd632aeacda7ee0adf3602b62fa Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Fri, 19 May 2023 07:40:04 +0900 Subject: [PATCH] [GPU] Fixed deserialization logic for dynamic batch (#17486) * deserialization of dynamic batch * updated multi stream tests * added unit tests * updated cache dir name * resolved type conversion warning * removed teardown() * added const --- .../graph/serialization/binary_buffer.hpp | 36 ++++++++++++------- .../intel_gpu/plugin/compiled_model.hpp | 3 +- .../include/intel_gpu/plugin/graph.hpp | 7 +++- .../include/intel_gpu/plugin/program.hpp | 8 ++--- src/plugins/intel_gpu/src/graph/data.cpp | 4 +-- src/plugins/intel_gpu/src/graph/network.cpp | 2 ++ .../intel_gpu/src/plugin/compiled_model.cpp | 8 +++-- src/plugins/intel_gpu/src/plugin/graph.cpp | 9 +++-- src/plugins/intel_gpu/src/plugin/plugin.cpp | 2 +- src/plugins/intel_gpu/src/plugin/program.cpp | 13 +++++++ .../gpu_dyn_batch_shape_tests.cpp | 32 +++++++++++++++-- .../tests/unit/test_cases/gemm_gpu_test.cpp | 1 - .../tests/unit/test_cases/streams_test.cpp | 8 ++--- 13 files changed, 98 insertions(+), 35 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp index d36376e5716..dccbea3eaef 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "buffer.hpp" #include "helpers.hpp" #include "bind.hpp" @@ -36,32 +37,43 @@ private: class BinaryInputBuffer : public InputBuffer { public: BinaryInputBuffer(std::istream& stream, engine& engine) - : InputBuffer(this, engine), stream(stream), _impl_params(nullptr) {} + : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr), _num_networks(0), _stream_id(0) {} void read(void* const data, std::streamsize size) { - auto const read_size = stream.rdbuf()->sgetn(reinterpret_cast(data), size); + auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast(data), size); OPENVINO_ASSERT(read_size == size, "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); } void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } - void addConstData(const std::string& prim_id, const std::shared_ptr mem_ptr) { - OPENVINO_ASSERT(_const_data_map.find(prim_id) == _const_data_map.end(), "[GPU] duplicated primitive id " + prim_id); - _const_data_map[prim_id] = mem_ptr; + void addConstData(const uint32_t net_id, const std::string& prim_id, const std::shared_ptr mem_ptr) { + while (_const_data_map.size() <= net_id) { + _const_data_map.emplace_back(std::unordered_map>()); + } + OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) == _const_data_map[net_id].end(), "[GPU] duplicated primitive id " + prim_id); + _const_data_map[net_id][prim_id] = mem_ptr; } - std::shared_ptr getConstData(const std::string& prim_id) { - OPENVINO_ASSERT(_const_data_map.find(prim_id) != _const_data_map.end(), "[GPU] Not found primitive id " + prim_id); - return _const_data_map[prim_id]; + std::shared_ptr getConstData(const uint32_t net_id, const std::string& prim_id) { + OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) != _const_data_map[net_id].end(), "[GPU] Not found primitive id " + prim_id); + return _const_data_map[net_id][prim_id]; } - std::streampos tellg() { return stream.tellg(); } - void seekg(std::streampos pos) { stream.seekg(pos); } + std::streampos tellg() { return _stream.tellg(); } + void seekg(std::streampos pos) { _stream.seekg(pos); } + + void new_network_added() { _num_networks += 1; } + int get_num_networks() const { return _num_networks; } + + void set_stream_id(uint16_t stream_id) { _stream_id = stream_id; } + uint16_t get_stream_id() const { return _stream_id; } private: - std::istream& stream; + std::istream& _stream; void* _impl_params; - std::unordered_map> _const_data_map; + std::vector>> _const_data_map; + int _num_networks; + uint16_t _stream_id; }; template diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp index 4715c6d74bd..b1422495e8b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp @@ -26,7 +26,8 @@ public: CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config, InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); - CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config); + CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config, + InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); void Export(std::ostream& networkModel) override; std::shared_ptr GetExecGraphInfo() override; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp index 39a36443a68..e69cc41d220 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp @@ -46,7 +46,12 @@ public: uint16_t stream_id = 0, InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); - Graph(cldnn::BinaryInputBuffer& ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id = 0); + Graph(cldnn::BinaryInputBuffer& ib, + RemoteContextImpl::Ptr context, + const ExecutionConfig& config, + uint16_t stream_id = 0, + InferenceEngine::InputsDataMap* inputs = nullptr, + InferenceEngine::OutputsDataMap* outputs = nullptr); explicit Graph(std::shared_ptr graph, uint16_t stream_id = 0); void Export(cldnn::BinaryOutputBuffer &ob); std::shared_ptr GetExecGraphInfo(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp index f2b75d3e539..83e549f2abc 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp @@ -84,12 +84,8 @@ public: Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config, bool createTopologyOnly = false, bool partialBuild = false, InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); - Program(cldnn::engine& engine, const ExecutionConfig& config) - : m_max_batch(1) - , m_curBatch(-1) - , m_config(config) - , m_engine(engine) - , queryMode(false) {} + Program(cldnn::engine& engine, const ExecutionConfig& config, + InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); static const cldnn::primitive_id m_preProcessTag; static const cldnn::primitive_id m_meanValuesTag; diff --git a/src/plugins/intel_gpu/src/graph/data.cpp b/src/plugins/intel_gpu/src/graph/data.cpp index dcc17faf531..907c6a79434 100644 --- a/src/plugins/intel_gpu/src/graph/data.cpp +++ b/src/plugins/intel_gpu/src/graph/data.cpp @@ -87,7 +87,7 @@ void data_inst::load(BinaryInputBuffer& ib) { ib >> make_data(&data_size, sizeof(size_t)); if (!get_network().is_primary_stream()) { - _outputs[0] = ib.getConstData(id()); + _outputs[0] = ib.getConstData(get_network_id() - (ib.get_num_networks() * ib.get_stream_id()), id()); auto pos = ib.tellg(); pos += data_size; ib.seekg(pos); @@ -103,7 +103,7 @@ void data_inst::load(BinaryInputBuffer& ib) { _outputs[0]->copy_from(get_network().get_stream(), _buf.data()); } - ib.addConstData(id(), _outputs[0]); + ib.addConstData(get_network_id(), id(), _outputs[0]); } } diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 936b4bb4d78..009753635ee 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -371,6 +371,8 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st , _is_primary_stream(is_primary_stream) , _reset_arguments(true) { net_id = get_unique_net_id(); + if (is_primary_stream) + ib.new_network_added(); kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""}); ib >> kernels_cache; diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index adc8897d7d6..05e494d77cc 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -68,7 +68,11 @@ CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, } } -CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config) : +CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, + InferenceEngine::RemoteContext::Ptr context, + const ExecutionConfig& config, + InferenceEngine::InputsDataMap* inputs, + InferenceEngine::OutputsDataMap* outputs) : InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr { if (config.get_property(ov::intel_gpu::exclusive_async_requests)) { //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior @@ -90,7 +94,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::Remo auto pos = ib.tellg(); for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { ib.seekg(pos); - auto graph = std::make_shared(ib, context_impl, m_config, n); + auto graph = std::make_shared(ib, context_impl, m_config, n, inputs, outputs); m_graphs.push_back(graph); } } diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 7a1574216c5..d1e0892da7a 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -57,12 +57,14 @@ Graph::Graph(InferenceEngine::CNNNetwork& network, RemoteContextImpl::Ptr contex Build(); } -Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id) +Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id, + InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs) : m_context(context) , m_config(config) , m_stream_id(stream_id) , m_state(0) { - m_program = std::make_shared(get_engine(), config); + m_program = std::make_shared(get_engine(), config, inputs, outputs); + ib >> m_program->m_max_batch; if (m_program->m_max_batch > 1) m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch)); @@ -105,6 +107,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const size_t num_networks; ib >> num_networks; for (size_t i = 0; i < num_networks; ++i) { + ib.set_stream_id(m_stream_id); m_networks.emplace_back(std::make_shared(ib, get_engine().create_stream(config), get_engine(), m_stream_id == 0)); } } @@ -506,6 +509,8 @@ std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::v // [ ov::intel_gpu::Graph::outputDims ] // [ cldnn::network ] void Graph::Export(cldnn::BinaryOutputBuffer &ob) { + ob << m_program->m_max_batch; + bool need_onednn_engine = false; #ifdef ENABLE_ONEDNN_FOR_GPU try { diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 27d64f9fd26..0fa28540caa 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -531,7 +531,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istr auto transformedNetwork = GetCore()->ReadNetwork(xmlString, std::move(dataBlob), true); exeNetwork = std::make_shared(transformedNetwork, context, config, &inputs, &outputs); } else { - exeNetwork = std::make_shared(ib, context, config); + exeNetwork = std::make_shared(ib, context, config, &inputs, &outputs); exeNetwork->SetPointerToPlugin(shared_from_this()); } diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp index ff667d8075f..0988117acac 100644 --- a/src/plugins/intel_gpu/src/plugin/program.cpp +++ b/src/plugins/intel_gpu/src/plugin/program.cpp @@ -305,6 +305,19 @@ Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, co } } +Program::Program(cldnn::engine& engine, const ExecutionConfig& config, + InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs) + : m_max_batch(1) + , m_curBatch(-1) + , m_config(config) + , m_engine(engine) + , queryMode(false) { + if (inputs != nullptr) + m_networkInputs = *inputs; + if (outputs != nullptr) + m_networkOutputs = *outputs; +} + int Program::GetMaxBatchSizeForSingleProgram() { auto max_dynamic_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch); if (max_dynamic_batch > 1) { diff --git a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp index db637e58891..91a82b2bb75 100644 --- a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp @@ -5,6 +5,7 @@ #include "openvino/runtime/core.hpp" #include #include "common_test_utils/common_utils.hpp" +#include "common_test_utils/file_utils.hpp" #include "functional_test_utils/skip_tests_config.hpp" #include "ngraph_functions/subgraph_builders.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" @@ -55,9 +56,7 @@ public: } return result.str(); } - void TearDown() override { - core.reset(); - } + protected: void SetUp() override { if (core) @@ -86,6 +85,33 @@ TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound) { run(); } +TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound_cached) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + std::string cacheFolderName; + { + std::stringstream ss; + ss << "InferDynamicBatchBound_cached_" << netPrecision << "_" << targetDevice; + cacheFolderName = ss.str(); + + CommonTestUtils::removeFilesWithExt(cacheFolderName, "blob"); + CommonTestUtils::removeFilesWithExt(cacheFolderName, "cl_cache"); + CommonTestUtils::removeDir(cacheFolderName); + + core = std::make_shared(); + core->set_property(ov::cache_dir(cacheFolderName)); + run(); + } + { + core = std::make_shared(); + core->set_property(ov::cache_dir(cacheFolderName)); + run(); + + CommonTestUtils::removeFilesWithExt(cacheFolderName, "blob"); + CommonTestUtils::removeFilesWithExt(cacheFolderName, "cl_cache"); + CommonTestUtils::removeDir(cacheFolderName); + } +} + namespace { auto config = []() { return ov::AnyMap{}; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index 702aff8e70f..5713a994ce0 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -127,7 +127,6 @@ public: cldnn::network::ptr network; if (is_caching_test) { - std::cout << "cached" << std::endl; membuf mem_buf; { cldnn::network _network(engine, tp, get_test_default_config(engine)); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/streams_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/streams_test.cpp index 760126af925..bc9349807bb 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/streams_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/streams_test.cpp @@ -78,7 +78,6 @@ public: cldnn::network::ptr network0; cldnn::network::ptr network1; if (is_caching_test) { - std::cout << "cached" << std::endl; membuf mem_buf; { auto prog = program::build_program(engine, topology, get_test_default_config(engine)); @@ -96,6 +95,7 @@ public: auto pos = ib.tellg(); network0 = std::make_shared(ib, get_test_stream_ptr(), engine, true); ib.seekg(pos); + ib.set_stream_id(1); network1 = std::make_shared(ib, get_test_stream_ptr(), engine, false); } } @@ -172,7 +172,6 @@ public: cldnn::network::ptr network0; cldnn::network::ptr network1; if (is_caching_test) { - std::cout << "cached" << std::endl; membuf mem_buf0; membuf mem_buf1; { @@ -194,12 +193,13 @@ public: { std::istream in_mem0(&mem_buf0); BinaryInputBuffer ib0 = BinaryInputBuffer(in_mem0, engine); - network0 = std::make_shared(ib0, get_test_stream_ptr(), engine, 0); + network0 = std::make_shared(ib0, get_test_stream_ptr(), engine, false); } { std::istream in_mem1(&mem_buf1); BinaryInputBuffer ib1 = BinaryInputBuffer(in_mem1, engine); - network1 = std::make_shared(ib1, get_test_stream_ptr(), engine, 1); + ib1.set_stream_id(1); + network1 = std::make_shared(ib1, get_test_stream_ptr(), engine, true); } } } else {