Add new compile model api to support hash model memory (#14543)

* Add new compile_model api for ONNX RUNTIME OV EP

Allow compile_model() accept model/weight data.

* Update minor place

* Cache model if possible

* Compute hash based on model_xml and model_weight

* Update typo

* Change hash key computation for model's weights

* Resolve test case issue

* Use tensor replace blob for hash computation

* Fix hash computation isssue and add more test cases

* Fix a build issue caused by data format
This commit is contained in:
River Li 2023-01-10 16:32:34 +08:00 committed by GitHub
parent b64c1ff20a
commit 246a287c34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 352 additions and 0 deletions

View File

@ -99,6 +99,27 @@ public:
const std::map<std::string, std::string>& config, const std::map<std::string, std::string>& config,
const std::function<void(const ie::CNNNetwork&)>& val = nullptr) = 0; const std::function<void(const ie::CNNNetwork&)>& val = nullptr) = 0;
/**
* @brief Creates an executable network from a model memory.
*
* Users can create as many networks as they need and use
* them simultaneously (up to the limitation of the hardware resources)
*
* @param modelStr String data of model
* @param weights Model's weights
* @param deviceName Name of device to load network to
* @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
* operation
* @param val Optional callback to perform validation of loaded CNNNetwork, if ReadNetwork is triggered
* @return An executable network reference
*/
virtual ie::SoExecutableNetworkInternal LoadNetwork(
const std::string& modelStr,
const ie::Blob::CPtr& weights,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
const std::function<void(const ie::CNNNetwork&)>& val = nullptr) = 0;
/** /**
* @brief Creates an executable network from a previously exported network * @brief Creates an executable network from a previously exported network
* @param networkModel network model stream * @param networkModel network model stream

View File

@ -255,6 +255,44 @@ public:
return compile_model(model_path, device_name, AnyMap{std::forward<Properties>(properties)...}); return compile_model(model_path, device_name, AnyMap{std::forward<Properties>(properties)...});
} }
/**
* @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory.
* @param model String with a model in IR/ONNX/PDPD format.
* @param weights Shared pointer to a constant tensor with weights.
* Reading ONNX/PDPD models does not support loading weights from the @p weights tensors.
* @param device_name Name of a device to load a model to.
* @param properties Optional map of pairs: (property name, property value) relevant only for this load
* operation.
* @note Created model object shares the weights with the @p weights object.
* Thus, do not create @p weights on temporary data that can be freed later, since the model
* constant data will point to an invalid memory.
* @return A compiled model.
*/
CompiledModel compile_model(const std::string& model,
const ov::Tensor& weights,
const std::string& device_name,
const AnyMap& properties = {});
/**
* @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory.
* @param model String with a model in IR/ONNX/PDPD format.
* @param weights Shared pointer to a constant tensor with weights.
* Reading ONNX/PDPD models does not support loading weights from the @p weights tensors.
* @param device_name Name of a device to load a model to.
* @tparam Properties Should be a pack of `std::pair<std::string, ov::Any>` types.
* @note Created model object shares the weights with the @p weights object.
* Thus, do not create @p weights on temporary data that can be freed later, since the model
* constant data will point to an invalid memory.
* @return A compiled model.
*/
template <typename... Properties>
util::EnableIfAllStringAny<CompiledModel, Properties...> compile_model(const std::string& model,
const ov::Tensor& weights,
const std::string& device_name,
Properties&&... properties) {
return compile_model(model, weights, device_name, AnyMap{std::forward<Properties>(properties)...});
}
/** /**
* @brief Creates a compiled model from a source model within a specified remote context. * @brief Creates a compiled model from a source model within a specified remote context.
* @param model Model object acquired from Core::read_model. * @param model Model object acquired from Core::read_model.

View File

@ -140,6 +140,34 @@ std::string NetworkCompilationContext::computeHash(const std::string& modelName,
return std::to_string(seed); return std::to_string(seed);
} }
std::string NetworkCompilationContext::computeHash(const std::string& modelStr,
const ov::Tensor& tensor,
const std::map<std::string, std::string>& compileOptions) {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - Model Memory");
uint64_t seed = 0;
// model string
seed = hash_combine(seed, modelStr);
// tensor data
seed = hash_combine(seed, tensor.get_size());
auto ptr = static_cast<size_t*>(tensor.data());
size_t size = tensor.get_size() / sizeof(size_t);
for (size_t i = 0; i < size; i++)
seed = hash_combine(seed, ptr[i]);
auto size_done = size * sizeof(size_t);
auto ptr_left = static_cast<uint8_t*>(tensor.data()) + size_done;
size_t size_left = tensor.get_size() - size_done;
for (size_t i = 0; i < size_left; i++)
seed = hash_combine(seed, ptr_left[i]);
// compile options
for (const auto& kvp : compileOptions) {
seed = hash_combine(seed, kvp.first + kvp.second);
}
return std::to_string(seed);
}
////////////////////////////////////////////////// //////////////////////////////////////////////////
CompiledBlobHeader::CompiledBlobHeader() {} CompiledBlobHeader::CompiledBlobHeader() {}

View File

@ -9,6 +9,10 @@
#include <ostream> #include <ostream>
#include <string> #include <string>
namespace ov {
class Tensor;
} // namespace ov
namespace InferenceEngine { namespace InferenceEngine {
class CNNNetwork; class CNNNetwork;
@ -20,6 +24,9 @@ struct NetworkCompilationContext final {
static std::string computeHash(const std::string& modelName, static std::string computeHash(const std::string& modelName,
const std::map<std::string, std::string>& compileOptions); const std::map<std::string, std::string>& compileOptions);
static std::string computeHash(const std::string& modeStr,
const ov::Tensor& data,
const std::map<std::string, std::string>& compileOptions);
}; };
class CompiledBlobHeader final { class CompiledBlobHeader final {

View File

@ -546,6 +546,15 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
return ie::NetworkCompilationContext::computeHash(modelName, compileConfig); return ie::NetworkCompilationContext::computeHash(modelName, compileConfig);
} }
std::string CalculateMemoryHash(const std::string& modelStr,
const ov::Tensor& weights,
const std::string& deviceFamily,
const ov::InferencePlugin& plugin,
const std::map<std::string, std::string>& config) const {
auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config);
return ie::NetworkCompilationContext::computeHash(modelStr, weights, compileConfig);
}
public: public:
CoreImpl(bool _newAPI) : newAPI(_newAPI) { CoreImpl(bool _newAPI) : newAPI(_newAPI) {
add_mutex(""); // Register global mutex add_mutex(""); // Register global mutex
@ -884,6 +893,46 @@ public:
return {res._ptr, res._so}; return {res._ptr, res._so};
} }
ie::SoExecutableNetworkInternal LoadNetwork(const std::string& modelStr,
const ie::Blob::CPtr& weights,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
const std::function<void(const CNNNetwork&)>& val = nullptr) override {
OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::LoadNetwork::Memory");
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
auto plugin = GetCPPPluginByName(parsed._deviceName);
ov::SoPtr<ie::IExecutableNetworkInternal> res;
auto cacheManager =
coreConfig.getCacheConfigForDevice(parsed._deviceName, DeviceSupportsCacheDir(plugin), parsed._config)
._cacheManager;
auto cacheContent = CacheContent{cacheManager};
if (cacheManager && DeviceSupportsImportExport(plugin)) {
bool loadedFromCache = false;
ov::Tensor tensor = ov::Tensor();
if (weights) {
tensor = ov::Tensor(element::u8, {weights->byteSize()}, weights->cbuffer().as<uint8_t*>());
}
cacheContent.blobId = CalculateMemoryHash(modelStr, tensor, parsed._deviceName, plugin, parsed._config);
auto lock = cacheGuard.getHashLock(cacheContent.blobId);
res = LoadNetworkFromCache(cacheContent, plugin, parsed._config, nullptr, loadedFromCache);
if (!loadedFromCache) {
auto cnnNetwork = ReadNetwork(modelStr, weights);
if (val) {
val(cnnNetwork);
}
res = compile_model_impl(cnnNetwork, plugin, parsed._config, nullptr, cacheContent);
}
} else {
auto cnnNetwork = ReadNetwork(modelStr, weights);
if (val) {
val(cnnNetwork);
}
res = compile_model_impl(cnnNetwork, plugin, parsed._config, nullptr, cacheContent);
}
return {res._ptr, res._so};
}
ie::SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel, ie::SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel,
const std::string& deviceName, const std::string& deviceName,
const std::map<std::string, std::string>& config) override { const std::map<std::string, std::string>& config) override {
@ -2003,6 +2052,20 @@ CompiledModel Core::compile_model(const std::string& modelPath, const std::strin
}); });
} }
CompiledModel Core::compile_model(const std::string& model,
const ov::Tensor& weights,
const std::string& deviceName,
const AnyMap& config) {
InferenceEngine::Blob::Ptr blob;
if (weights) {
blob = weights._impl;
}
OV_CORE_CALL_STATEMENT({
auto exec = _impl->LoadNetwork(model, blob, deviceName, any_copy(flatten_sub_properties(deviceName, config)));
return {exec._ptr, exec._so};
});
}
CompiledModel Core::compile_model(const std::shared_ptr<const ov::Model>& model, CompiledModel Core::compile_model(const std::shared_ptr<const ov::Model>& model,
const RemoteContext& context, const RemoteContext& context,
const AnyMap& config) { const AnyMap& config) {

View File

@ -173,4 +173,10 @@ namespace {
::testing::ValuesIn(TestTargets), ::testing::ValuesIn(TestTargets),
::testing::ValuesIn(LoadFromFileConfigs)), ::testing::ValuesIn(LoadFromFileConfigs)),
CompileModelLoadFromFileTestBase::getTestCaseName); CompileModelLoadFromFileTestBase::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Auto_CachingSupportCase_CPU,
CompileModelLoadFromMemoryTestBase,
::testing::Combine(::testing::ValuesIn(TestTargets),
::testing::ValuesIn(LoadFromFileConfigs)),
CompileModelLoadFromMemoryTestBase::getTestCaseName);
} // namespace } // namespace

View File

@ -83,6 +83,12 @@ namespace {
::testing::ValuesIn(LoadFromFileConfigs)), ::testing::ValuesIn(LoadFromFileConfigs)),
CompileModelLoadFromFileTestBase::getTestCaseName); CompileModelLoadFromFileTestBase::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Auto_CachingSupportCase_GPU,
CompileModelLoadFromMemoryTestBase,
::testing::Combine(::testing::ValuesIn(TestTargets),
::testing::ValuesIn(LoadFromFileConfigs)),
CompileModelLoadFromMemoryTestBase::getTestCaseName);
const std::vector<ov::AnyMap> GPULoadFromFileConfigs = { const std::vector<ov::AnyMap> GPULoadFromFileConfigs = {
{ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}, {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
{ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}, {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
@ -94,4 +100,9 @@ namespace {
::testing::ValuesIn(GPULoadFromFileConfigs)), ::testing::ValuesIn(GPULoadFromFileConfigs)),
CompileModelLoadFromFileTestBase::getTestCaseName); CompileModelLoadFromFileTestBase::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_GPU,
CompileModelLoadFromMemoryTestBase,
::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(GPULoadFromFileConfigs)),
CompileModelLoadFromMemoryTestBase::getTestCaseName);
} // namespace } // namespace

View File

@ -84,6 +84,64 @@ TEST_P(OVExecutableNetworkBaseTest, canLoadCorrectNetworkToGetExecutable) {
EXPECT_NO_THROW(auto execNet = core->compile_model(function, target_device, configuration)); EXPECT_NO_THROW(auto execNet = core->compile_model(function, target_device, configuration));
} }
TEST_P(OVExecutableNetworkBaseTest, canLoadNetworkFromMemory) {
std::string model = R"V0G0N(
<net name="Network" version="10">
<layers>
<layer name="in1" type="Parameter" id="0" version="opset8">
<data element_type="f16" shape="1,3,22,22"/>
<output>
<port id="0" precision="FP16" names="data">
<dim>1</dim>
<dim>3</dim>
<dim>22</dim>
<dim>22</dim>
</port>
</output>
</layer>
<layer name="round" id="1" type="Round" version="opset8">
<data mode="half_to_even"/>
<input>
<port id="1" precision="FP16">
<dim>1</dim>
<dim>3</dim>
<dim>22</dim>
<dim>22</dim>
</port>
</input>
<output>
<port id="2" precision="FP16" names="r">
<dim>1</dim>
<dim>3</dim>
<dim>22</dim>
<dim>22</dim>
</port>
</output>
</layer>
<layer name="output" type="Result" id="2" version="opset8">
<input>
<port id="0" precision="FP16">
<dim>1</dim>
<dim>3</dim>
<dim>22</dim>
<dim>22</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
<edge from-layer="1" from-port="2" to-layer="2" to-port="0"/>
</edges>
</net>
)V0G0N";
if (target_device.find("GNA") != std::string::npos) {
GTEST_SKIP();
}
EXPECT_NO_THROW(auto execNet = core->compile_model(model, ov::Tensor(), target_device, configuration));
}
TEST(OVExecutableNetworkBaseTest, smoke_LoadNetworkToDefaultDeviceNoThrow) { TEST(OVExecutableNetworkBaseTest, smoke_LoadNetworkToDefaultDeviceNoThrow) {
SKIP_IF_CURRENT_TEST_IS_DISABLED() SKIP_IF_CURRENT_TEST_IS_DISABLED()
std::shared_ptr<ov::Core> core = utils::PluginCache::get().core(); std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();

View File

@ -82,6 +82,27 @@ public:
void run() override; void run() override;
}; };
using compileModelLoadFromMemoryParams = std::tuple<std::string, // device name
ov::AnyMap // device configuration
>;
class CompileModelLoadFromMemoryTestBase : public testing::WithParamInterface<compileModelLoadFromMemoryParams>,
virtual public SubgraphBaseTest,
virtual public OVPluginTestBase {
std::string m_cacheFolderName;
std::string m_modelName;
std::string m_weightsName;
std::string m_model;
ov::Tensor m_weights;
std::vector<std::uint8_t> weights_vector;
public:
static std::string getTestCaseName(testing::TestParamInfo<compileModelLoadFromMemoryParams> obj);
void SetUp() override;
void TearDown() override;
void run() override;
};
using compileKernelsCacheParams = std::tuple< using compileKernelsCacheParams = std::tuple<
std::string, // device name std::string, // device name
std::pair<ov::AnyMap, std::string> // device and cache configuration std::pair<ov::AnyMap, std::string> // device and cache configuration

View File

@ -321,6 +321,98 @@ TEST_P(CompileModelLoadFromFileTestBase, CanLoadFromFileWithoutExecption) {
run(); run();
} }
std::string CompileModelLoadFromMemoryTestBase::getTestCaseName(
testing::TestParamInfo<compileModelLoadFromMemoryParams> obj) {
auto param = obj.param;
auto deviceName = std::get<0>(param);
auto configuration = std::get<1>(param);
std::ostringstream result;
std::replace(deviceName.begin(), deviceName.end(), ':', '.');
result << "device_name=" << deviceName << "_";
for (auto& iter : configuration) {
result << "_" << iter.first << "_" << iter.second.as<std::string>() << "_";
}
return result.str();
}
void CompileModelLoadFromMemoryTestBase::SetUp() {
ovModelWithName funcPair;
std::tie(targetDevice, configuration) = GetParam();
target_device = targetDevice;
APIBaseTest::SetUp();
std::stringstream ss;
auto hash = std::hash<std::string>()(SubgraphBaseTest::GetTestName());
ss << "testCache_" << std::to_string(hash) << "_" << std::this_thread::get_id() << "_" << GetTimestamp();
m_modelName = ss.str() + ".xml";
m_weightsName = ss.str() + ".bin";
for (auto& iter : configuration) {
ss << "_" << iter.first << "_" << iter.second.as<std::string>() << "_";
}
m_cacheFolderName = ss.str();
core->set_property(ov::cache_dir());
ngraph::pass::Manager manager;
manager.register_pass<ov::pass::Serialize>(m_modelName, m_weightsName);
manager.run_passes(ngraph::builder::subgraph::makeConvPoolRelu(
{1, 3, 227, 227},
InferenceEngine::details::convertPrecision(InferenceEngine::Precision::FP32)));
try {
std::ifstream model_file(m_modelName, std::ios::binary);
std::stringstream ss;
ss << model_file.rdbuf();
m_model = ss.str();
} catch (const Exception& ex) {
GTEST_FAIL() << "Can't read xml file from: " << m_modelName << "\nException [" << ex.what() << "]" << std::endl;
}
try {
std::ifstream weights_file(m_weightsName, std::ios::binary);
weights_file.unsetf(std::ios::skipws);
weights_file.seekg(0, std::ios::end);
const auto weights_size = static_cast<std::size_t>(weights_file.tellg());
weights_file.seekg(0, std::ios::beg);
weights_vector.reserve(weights_size);
weights_vector.insert(weights_vector.begin(),
std::istream_iterator<std::uint8_t>(weights_file),
std::istream_iterator<std::uint8_t>());
m_weights = ov::Tensor(ov::element::u8, {1, 1, 1, weights_size}, weights_vector.data());
} catch (const Exception& ex) {
GTEST_FAIL() << "Can't read weights file from: " << m_weightsName << "\nException [" << ex.what() << "]"
<< std::endl;
}
}
void CompileModelLoadFromMemoryTestBase::TearDown() {
CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "blob");
CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "cl_cache");
CommonTestUtils::removeIRFiles(m_modelName, m_weightsName);
std::remove(m_cacheFolderName.c_str());
core->set_property(ov::cache_dir());
APIBaseTest::TearDown();
weights_vector.clear();
}
void CompileModelLoadFromMemoryTestBase::run() {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
core->set_property(ov::cache_dir(m_cacheFolderName));
try {
compiledModel = core->compile_model(m_model, m_weights, targetDevice, configuration);
inferRequest = compiledModel.create_infer_request();
inferRequest.infer();
} catch (const Exception& ex) {
GTEST_FAIL() << "Can't loadNetwork with model path " << m_modelName << "\nException [" << ex.what() << "]"
<< std::endl;
} catch (...) {
GTEST_FAIL() << "Can't compile network with model path " << m_modelName << std::endl;
}
}
TEST_P(CompileModelLoadFromMemoryTestBase, CanLoadFromMemoryWithoutExecption) {
run();
}
std::string CompiledKernelsCacheTest::getTestCaseName(testing::TestParamInfo<compileKernelsCacheParams> obj) { std::string CompiledKernelsCacheTest::getTestCaseName(testing::TestParamInfo<compileKernelsCacheParams> obj) {
auto param = obj.param; auto param = obj.param;
std::string deviceName; std::string deviceName;

View File

@ -21,6 +21,13 @@ public:
const std::string &, const std::string &,
const std::map<std::string, std::string> &, const std::map<std::string, std::string> &,
const std::function<void(const InferenceEngine::CNNNetwork&)> &)); const std::function<void(const InferenceEngine::CNNNetwork&)> &));
MOCK_METHOD5(
LoadNetwork,
InferenceEngine::SoExecutableNetworkInternal(const std::string&,
const InferenceEngine::Blob::CPtr&,
const std::string&,
const std::map<std::string, std::string>&,
const std::function<void(const InferenceEngine::CNNNetwork&)>&));
MOCK_METHOD3(ImportNetwork, InferenceEngine::SoExecutableNetworkInternal( MOCK_METHOD3(ImportNetwork, InferenceEngine::SoExecutableNetworkInternal(
std::istream&, const std::string&, const std::map<std::string, std::string>&)); std::istream&, const std::string&, const std::map<std::string, std::string>&));