[GPU] Fallback to kernel caching in the case of dynamic models (#15842)

* use kernel caching for dynamic models

* replaced cl_cache with blob

* updated to serialize dims info of input and output

* updated to skip unicode tests in Windows
This commit is contained in:
Eddy Kim 2023-02-24 15:05:16 +09:00 committed by GitHub
parent a4f0b340d0
commit f562e96305
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 24 additions and 36 deletions

View File

@ -46,9 +46,6 @@ public:
ExecutionConfig m_config;
InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
InferenceEngine::ITaskExecutor::Ptr m_waitExecutor;
private:
bool is_serializable();
};
} // namespace intel_gpu

View File

@ -18,7 +18,7 @@ namespace intel_gpu {
class Plugin : public InferenceEngine::IInferencePlugin {
struct impl;
std::shared_ptr<impl> _impl;
bool isModelCachingEnabled = false;
bool isModelCachingEnabled = true;
std::string default_device_id = "0";
// key: device_id, value: cldnn device

View File

@ -324,10 +324,8 @@ private:
void build_primitive(const ExecutionConfig& config) {
auto cache_outpath = get_cache_directory(config);
if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) {
if (env_p[0] == '1') {
cache_outpath = "";
}
if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) {
cache_outpath = "";
}
if (cache_outpath.empty()) {

View File

@ -6,6 +6,7 @@
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
#include "intel_gpu/graph/serialization/string_serializer.hpp"
#include "intel_gpu/graph/serialization/utils.hpp"
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
#include "intel_gpu/plugin/graph.hpp"
#include "intel_gpu/runtime/itt.hpp"
#include "intel_gpu/plugin/infer_request.hpp"
@ -96,11 +97,14 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote
std::string name;
std::string precision;
std::string layout;
InferenceEngine::SizeVector dims;
ib >> name;
ib >> precision;
ib >> layout;
ib >> dims;
DataPtr input = std::make_shared<Data>(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout));
input->setDims(dims);
InputInfo::Ptr infoNew = std::make_shared<InputInfo>();
infoNew->setInputData(input);
inputs.emplace(std::make_pair(name, infoNew));
@ -115,11 +119,14 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote
std::string name;
std::string precision;
std::string layout;
InferenceEngine::SizeVector dims;
ib >> name;
ib >> precision;
ib >> layout;
ib >> dims;
DataPtr output = std::make_shared<Data>(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout));
output->setDims(dims);
outputs.emplace(std::make_pair(name, output));
}
@ -317,14 +324,6 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() {
_callbackExecutor);
}
bool CompiledModel::is_serializable() {
// Dynamic model serialization is not yet supported.
if (m_graphs[0]->GetNetwork()->is_dynamic())
return false;
return true;
}
// Cache blob format:
// [ ConstInputsDataMap / ConstOutputsDataMap ]
// [ ov::Node::Input/ ov::Node::Output ]
@ -334,9 +333,6 @@ void CompiledModel::Export(std::ostream& networkModel) {
if (m_graphs.empty())
IE_THROW(NetworkNotLoaded);
if (!is_serializable())
return;
cldnn::BinaryOutputBuffer ob(networkModel);
// InputsInfo and OutputsInfo for CNNNetwork
@ -350,6 +346,7 @@ void CompiledModel::Export(std::ostream& networkModel) {
std::stringstream ss;
ss << in.second->getInputData()->getLayout();
ob << ss.str();
ob << in.second->getTensorDesc().getDims();
}
ob << GetOutputsInfo().size();
@ -361,6 +358,7 @@ void CompiledModel::Export(std::ostream& networkModel) {
std::stringstream ss;
ss << out.second->getLayout();
ob << ss.str();
ob << out.second->getTensorDesc().getDims();
}
}

View File

@ -142,12 +142,6 @@ Plugin::Plugin() : m_default_contexts({}) {
m_default_contexts.insert({device.first, ctx});
}
}
if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) {
if (env_p[0] == '1') {
isModelCachingEnabled = true;
}
}
}
auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
@ -204,6 +198,9 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine
{
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork");
CompiledModel::Ptr exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, config);
if (exeNetwork->m_graphs[0]->GetNetwork()->is_dynamic()) {
isModelCachingEnabled = false;
}
update_memory_statistics(context->get_impl());
return exeNetwork;
}

View File

@ -70,10 +70,8 @@ std::string kernels_cache::get_cache_path() const {
}
bool kernels_cache::is_cache_enabled() const {
if (const char* env_p = std::getenv("OV_GPU_CACHE_MODEL")) {
if (env_p[0] == '1') {
return false;
}
if (!_config.get_property(ov::intel_gpu::allow_new_shape_infer)) {
return false;
}
return !_config.get_property(ov::cache_dir).empty();

View File

@ -48,15 +48,15 @@ namespace {
INSTANTIATE_TEST_SUITE_P(smoke_KernelCachingSupportCase_GPU, CompiledKernelsCacheTest,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(std::make_pair(ov::AnyMap{}, "cl_cache"))),
::testing::Values(std::make_pair(ov::AnyMap{}, "blob"))),
CompiledKernelsCacheTest::getTestCaseName);
auto autoConfigs = []() {
return std::vector<std::pair<ov::AnyMap, std::string>>{
std::make_pair(ov::AnyMap{{ov::device::priorities(CommonTestUtils::DEVICE_GPU)}}, "cl_cache"),
std::make_pair(ov::AnyMap{{ov::device::priorities(CommonTestUtils::DEVICE_GPU)}}, "blob"),
std::make_pair(
ov::AnyMap{{ov::device::priorities(CommonTestUtils::DEVICE_GPU, CommonTestUtils::DEVICE_CPU)}},
"blob,cl_cache"),
"blob"),
std::make_pair(
ov::AnyMap{{ov::device::priorities(CommonTestUtils::DEVICE_CPU, CommonTestUtils::DEVICE_GPU)}},
"blob")};

View File

@ -46,7 +46,7 @@ namespace {
INSTANTIATE_TEST_SUITE_P(smoke_KernelCachingSupportCase_GPU, LoadNetworkCompiledKernelsCacheTest,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(std::make_pair(std::map<std::string, std::string>(), "cl_cache"))),
::testing::Values(std::make_pair(std::map<std::string, std::string>(), "blob"))),
LoadNetworkCompiledKernelsCacheTest::getTestCaseName);
typedef std::map<std::string, std::string> conftype;
@ -54,10 +54,10 @@ namespace {
return std::vector<std::pair<conftype, std::string>>{
std::make_pair(conftype{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES,
CommonTestUtils::DEVICE_GPU}},
"cl_cache"),
"blob"),
std::make_pair(conftype{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES,
(std::string(CommonTestUtils::DEVICE_GPU) + "," + CommonTestUtils::DEVICE_CPU)}},
"blob,cl_cache"),
"blob"),
std::make_pair(conftype{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES,
(std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU)}},
"blob")};

View File

@ -118,7 +118,7 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*smoke_select_CompareWithRefsNumpy_dynamic_range.*)",
R"(.*CachingSupportCase.*LoadNetworkCacheTestBase.*CompareWithRefImpl.*)",
#if defined(_WIN32) || defined(_WIN64)
R"(.*Auto_KernelCachingSupportCase.*CanCreateCacheDirAndDumpBinariesUnicodePath.*)",
R"(.*KernelCachingSupportCase.*CanCreateCacheDirAndDumpBinariesUnicodePath.*)",
#endif
R"(.*CachingSupportCase.*GPU.*CompileModelCacheTestBase.*CompareWithRefImpl.*)",
// Currently 1D convolution has an issue