From f562e963055cedea41a46cba5ae63963599b4abb Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Fri, 24 Feb 2023 15:05:16 +0900 Subject: [PATCH] [GPU] Fallback to kernel caching in the case of dynamic models (#15842) * use kernel caching for dynamic models * replaced cl_cache with blob * updated to serialize dims info of input and output * updated to skip unicode tests in Windows --- .../intel_gpu/plugin/compiled_model.hpp | 3 --- .../include/intel_gpu/plugin/plugin.hpp | 2 +- .../impls/onednn/primitive_onednn_base.h | 6 ++---- .../intel_gpu/src/plugin/compiled_model.cpp | 20 +++++++++---------- src/plugins/intel_gpu/src/plugin/plugin.cpp | 9 +++------ .../intel_gpu/src/runtime/kernels_cache.cpp | 6 ++---- .../behavior/ov_plugin/caching_tests.cpp | 6 +++--- .../behavior/plugin/caching_tests.cpp | 6 +++--- .../skip_tests_config.cpp | 2 +- 9 files changed, 24 insertions(+), 36 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp index 979e203cab5..20fb79db866 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp @@ -46,9 +46,6 @@ public: ExecutionConfig m_config; InferenceEngine::ITaskExecutor::Ptr m_taskExecutor; InferenceEngine::ITaskExecutor::Ptr m_waitExecutor; - -private: - bool is_serializable(); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 007e55e7fb3..9e0f8941527 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -18,7 +18,7 @@ namespace intel_gpu { class Plugin : public InferenceEngine::IInferencePlugin { struct impl; std::shared_ptr _impl; - bool isModelCachingEnabled = false; + bool isModelCachingEnabled = true; std::string default_device_id = "0"; // key: device_id, value: cldnn device diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 4e404518d66..5e8c03dd0c6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -324,10 +324,8 @@ private: void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) { - if (env_p[0] == '1') { - cache_outpath = ""; - } + if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) { + cache_outpath = ""; } if (cache_outpath.empty()) { diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index b2370fa7951..d638e38dec1 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -6,6 +6,7 @@ #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include "intel_gpu/graph/serialization/string_serializer.hpp" #include "intel_gpu/graph/serialization/utils.hpp" +#include "intel_gpu/graph/serialization/vector_serializer.hpp" #include "intel_gpu/plugin/graph.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/plugin/infer_request.hpp" @@ -96,11 +97,14 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote std::string name; std::string precision; std::string layout; + InferenceEngine::SizeVector dims; ib >> name; ib >> precision; ib >> layout; + ib >> dims; DataPtr input = std::make_shared(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout)); + input->setDims(dims); InputInfo::Ptr infoNew = std::make_shared(); infoNew->setInputData(input); inputs.emplace(std::make_pair(name, infoNew)); @@ -115,11 +119,14 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote std::string name; std::string precision; std::string layout; + InferenceEngine::SizeVector dims; ib >> name; ib >> precision; ib >> layout; + ib >> dims; DataPtr output = std::make_shared(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout)); + output->setDims(dims); outputs.emplace(std::make_pair(name, output)); } @@ -317,14 +324,6 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() { _callbackExecutor); } -bool CompiledModel::is_serializable() { - // Dynamic model serialization is not yet supported. - if (m_graphs[0]->GetNetwork()->is_dynamic()) - return false; - - return true; -} - // Cache blob format: // [ ConstInputsDataMap / ConstOutputsDataMap ] // [ ov::Node::Input/ ov::Node::Output ] @@ -334,9 +333,6 @@ void CompiledModel::Export(std::ostream& networkModel) { if (m_graphs.empty()) IE_THROW(NetworkNotLoaded); - if (!is_serializable()) - return; - cldnn::BinaryOutputBuffer ob(networkModel); // InputsInfo and OutputsInfo for CNNNetwork @@ -350,6 +346,7 @@ void CompiledModel::Export(std::ostream& networkModel) { std::stringstream ss; ss << in.second->getInputData()->getLayout(); ob << ss.str(); + ob << in.second->getTensorDesc().getDims(); } ob << GetOutputsInfo().size(); @@ -361,6 +358,7 @@ void CompiledModel::Export(std::ostream& networkModel) { std::stringstream ss; ss << out.second->getLayout(); ob << ss.str(); + ob << out.second->getTensorDesc().getDims(); } } diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 54630c5384a..aaced7fdc61 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -142,12 +142,6 @@ Plugin::Plugin() : m_default_contexts({}) { m_default_contexts.insert({device.first, ctx}); } } - - if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) { - if (env_p[0] == '1') { - isModelCachingEnabled = true; - } - } } auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) { @@ -204,6 +198,9 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork"); CompiledModel::Ptr exeNetwork = std::make_shared(transformedNetwork, context, config); + if (exeNetwork->m_graphs[0]->GetNetwork()->is_dynamic()) { + isModelCachingEnabled = false; + } update_memory_statistics(context->get_impl()); return exeNetwork; } diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp index 7ca5f1acb3c..6cfbd21725f 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp @@ -70,10 +70,8 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) { - if (env_p[0] == '1') { - return false; - } + if (!_config.get_property(ov::intel_gpu::allow_new_shape_infer)) { + return false; } return !_config.get_property(ov::cache_dir).empty(); diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp index f178723e025..64468aa0cec 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp @@ -48,15 +48,15 @@ namespace { INSTANTIATE_TEST_SUITE_P(smoke_KernelCachingSupportCase_GPU, CompiledKernelsCacheTest, ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_GPU), - ::testing::Values(std::make_pair(ov::AnyMap{}, "cl_cache"))), + ::testing::Values(std::make_pair(ov::AnyMap{}, "blob"))), CompiledKernelsCacheTest::getTestCaseName); auto autoConfigs = []() { return std::vector>{ - std::make_pair(ov::AnyMap{{ov::device::priorities(CommonTestUtils::DEVICE_GPU)}}, "cl_cache"), + std::make_pair(ov::AnyMap{{ov::device::priorities(CommonTestUtils::DEVICE_GPU)}}, "blob"), std::make_pair( ov::AnyMap{{ov::device::priorities(CommonTestUtils::DEVICE_GPU, CommonTestUtils::DEVICE_CPU)}}, - "blob,cl_cache"), + "blob"), std::make_pair( ov::AnyMap{{ov::device::priorities(CommonTestUtils::DEVICE_CPU, CommonTestUtils::DEVICE_GPU)}}, "blob")}; diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/caching_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/caching_tests.cpp index 1c10cea5ffe..92dc383ad49 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/caching_tests.cpp @@ -46,7 +46,7 @@ namespace { INSTANTIATE_TEST_SUITE_P(smoke_KernelCachingSupportCase_GPU, LoadNetworkCompiledKernelsCacheTest, ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_GPU), - ::testing::Values(std::make_pair(std::map(), "cl_cache"))), + ::testing::Values(std::make_pair(std::map(), "blob"))), LoadNetworkCompiledKernelsCacheTest::getTestCaseName); typedef std::map conftype; @@ -54,10 +54,10 @@ namespace { return std::vector>{ std::make_pair(conftype{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_GPU}}, - "cl_cache"), + "blob"), std::make_pair(conftype{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, (std::string(CommonTestUtils::DEVICE_GPU) + "," + CommonTestUtils::DEVICE_CPU)}}, - "blob,cl_cache"), + "blob"), std::make_pair(conftype{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, (std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU)}}, "blob")}; diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 176e1e7dd35..cd11d6a4445 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -118,7 +118,7 @@ std::vector disabledTestPatterns() { R"(.*smoke_select_CompareWithRefsNumpy_dynamic_range.*)", R"(.*CachingSupportCase.*LoadNetworkCacheTestBase.*CompareWithRefImpl.*)", #if defined(_WIN32) || defined(_WIN64) - R"(.*Auto_KernelCachingSupportCase.*CanCreateCacheDirAndDumpBinariesUnicodePath.*)", + R"(.*KernelCachingSupportCase.*CanCreateCacheDirAndDumpBinariesUnicodePath.*)", #endif R"(.*CachingSupportCase.*GPU.*CompileModelCacheTestBase.*CompareWithRefImpl.*)", // Currently 1D convolution has an issue