Add new compile model api to support hash model memory (#14543)
* Add new compile_model api for ONNX RUNTIME OV EP Allow compile_model() accept model/weight data. * Update minor place * Cache model if possible * Compute hash based on model_xml and model_weight * Update typo * Change hash key computation for model's weights * Resolve test case issue * Use tensor replace blob for hash computation * Fix hash computation isssue and add more test cases * Fix a build issue caused by data format
This commit is contained in:
parent
b64c1ff20a
commit
246a287c34
@ -99,6 +99,27 @@ public:
|
||||
const std::map<std::string, std::string>& config,
|
||||
const std::function<void(const ie::CNNNetwork&)>& val = nullptr) = 0;
|
||||
|
||||
/**
|
||||
* @brief Creates an executable network from a model memory.
|
||||
*
|
||||
* Users can create as many networks as they need and use
|
||||
* them simultaneously (up to the limitation of the hardware resources)
|
||||
*
|
||||
* @param modelStr String data of model
|
||||
* @param weights Model's weights
|
||||
* @param deviceName Name of device to load network to
|
||||
* @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
|
||||
* operation
|
||||
* @param val Optional callback to perform validation of loaded CNNNetwork, if ReadNetwork is triggered
|
||||
* @return An executable network reference
|
||||
*/
|
||||
virtual ie::SoExecutableNetworkInternal LoadNetwork(
|
||||
const std::string& modelStr,
|
||||
const ie::Blob::CPtr& weights,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
const std::function<void(const ie::CNNNetwork&)>& val = nullptr) = 0;
|
||||
|
||||
/**
|
||||
* @brief Creates an executable network from a previously exported network
|
||||
* @param networkModel network model stream
|
||||
|
@ -255,6 +255,44 @@ public:
|
||||
return compile_model(model_path, device_name, AnyMap{std::forward<Properties>(properties)...});
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory.
|
||||
* @param model String with a model in IR/ONNX/PDPD format.
|
||||
* @param weights Shared pointer to a constant tensor with weights.
|
||||
* Reading ONNX/PDPD models does not support loading weights from the @p weights tensors.
|
||||
* @param device_name Name of a device to load a model to.
|
||||
* @param properties Optional map of pairs: (property name, property value) relevant only for this load
|
||||
* operation.
|
||||
* @note Created model object shares the weights with the @p weights object.
|
||||
* Thus, do not create @p weights on temporary data that can be freed later, since the model
|
||||
* constant data will point to an invalid memory.
|
||||
* @return A compiled model.
|
||||
*/
|
||||
CompiledModel compile_model(const std::string& model,
|
||||
const ov::Tensor& weights,
|
||||
const std::string& device_name,
|
||||
const AnyMap& properties = {});
|
||||
|
||||
/**
|
||||
* @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory.
|
||||
* @param model String with a model in IR/ONNX/PDPD format.
|
||||
* @param weights Shared pointer to a constant tensor with weights.
|
||||
* Reading ONNX/PDPD models does not support loading weights from the @p weights tensors.
|
||||
* @param device_name Name of a device to load a model to.
|
||||
* @tparam Properties Should be a pack of `std::pair<std::string, ov::Any>` types.
|
||||
* @note Created model object shares the weights with the @p weights object.
|
||||
* Thus, do not create @p weights on temporary data that can be freed later, since the model
|
||||
* constant data will point to an invalid memory.
|
||||
* @return A compiled model.
|
||||
*/
|
||||
template <typename... Properties>
|
||||
util::EnableIfAllStringAny<CompiledModel, Properties...> compile_model(const std::string& model,
|
||||
const ov::Tensor& weights,
|
||||
const std::string& device_name,
|
||||
Properties&&... properties) {
|
||||
return compile_model(model, weights, device_name, AnyMap{std::forward<Properties>(properties)...});
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Creates a compiled model from a source model within a specified remote context.
|
||||
* @param model Model object acquired from Core::read_model.
|
||||
|
@ -140,6 +140,34 @@ std::string NetworkCompilationContext::computeHash(const std::string& modelName,
|
||||
return std::to_string(seed);
|
||||
}
|
||||
|
||||
std::string NetworkCompilationContext::computeHash(const std::string& modelStr,
|
||||
const ov::Tensor& tensor,
|
||||
const std::map<std::string, std::string>& compileOptions) {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - Model Memory");
|
||||
uint64_t seed = 0;
|
||||
// model string
|
||||
seed = hash_combine(seed, modelStr);
|
||||
|
||||
// tensor data
|
||||
seed = hash_combine(seed, tensor.get_size());
|
||||
|
||||
auto ptr = static_cast<size_t*>(tensor.data());
|
||||
size_t size = tensor.get_size() / sizeof(size_t);
|
||||
for (size_t i = 0; i < size; i++)
|
||||
seed = hash_combine(seed, ptr[i]);
|
||||
auto size_done = size * sizeof(size_t);
|
||||
auto ptr_left = static_cast<uint8_t*>(tensor.data()) + size_done;
|
||||
size_t size_left = tensor.get_size() - size_done;
|
||||
for (size_t i = 0; i < size_left; i++)
|
||||
seed = hash_combine(seed, ptr_left[i]);
|
||||
|
||||
// compile options
|
||||
for (const auto& kvp : compileOptions) {
|
||||
seed = hash_combine(seed, kvp.first + kvp.second);
|
||||
}
|
||||
return std::to_string(seed);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
CompiledBlobHeader::CompiledBlobHeader() {}
|
||||
|
@ -9,6 +9,10 @@
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
namespace ov {
|
||||
class Tensor;
|
||||
} // namespace ov
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
class CNNNetwork;
|
||||
@ -20,6 +24,9 @@ struct NetworkCompilationContext final {
|
||||
|
||||
static std::string computeHash(const std::string& modelName,
|
||||
const std::map<std::string, std::string>& compileOptions);
|
||||
static std::string computeHash(const std::string& modeStr,
|
||||
const ov::Tensor& data,
|
||||
const std::map<std::string, std::string>& compileOptions);
|
||||
};
|
||||
|
||||
class CompiledBlobHeader final {
|
||||
|
@ -546,6 +546,15 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
|
||||
return ie::NetworkCompilationContext::computeHash(modelName, compileConfig);
|
||||
}
|
||||
|
||||
std::string CalculateMemoryHash(const std::string& modelStr,
|
||||
const ov::Tensor& weights,
|
||||
const std::string& deviceFamily,
|
||||
const ov::InferencePlugin& plugin,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config);
|
||||
return ie::NetworkCompilationContext::computeHash(modelStr, weights, compileConfig);
|
||||
}
|
||||
|
||||
public:
|
||||
CoreImpl(bool _newAPI) : newAPI(_newAPI) {
|
||||
add_mutex(""); // Register global mutex
|
||||
@ -884,6 +893,46 @@ public:
|
||||
return {res._ptr, res._so};
|
||||
}
|
||||
|
||||
ie::SoExecutableNetworkInternal LoadNetwork(const std::string& modelStr,
|
||||
const ie::Blob::CPtr& weights,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
const std::function<void(const CNNNetwork&)>& val = nullptr) override {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::LoadNetwork::Memory");
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
auto plugin = GetCPPPluginByName(parsed._deviceName);
|
||||
ov::SoPtr<ie::IExecutableNetworkInternal> res;
|
||||
|
||||
auto cacheManager =
|
||||
coreConfig.getCacheConfigForDevice(parsed._deviceName, DeviceSupportsCacheDir(plugin), parsed._config)
|
||||
._cacheManager;
|
||||
auto cacheContent = CacheContent{cacheManager};
|
||||
if (cacheManager && DeviceSupportsImportExport(plugin)) {
|
||||
bool loadedFromCache = false;
|
||||
ov::Tensor tensor = ov::Tensor();
|
||||
if (weights) {
|
||||
tensor = ov::Tensor(element::u8, {weights->byteSize()}, weights->cbuffer().as<uint8_t*>());
|
||||
}
|
||||
cacheContent.blobId = CalculateMemoryHash(modelStr, tensor, parsed._deviceName, plugin, parsed._config);
|
||||
auto lock = cacheGuard.getHashLock(cacheContent.blobId);
|
||||
res = LoadNetworkFromCache(cacheContent, plugin, parsed._config, nullptr, loadedFromCache);
|
||||
if (!loadedFromCache) {
|
||||
auto cnnNetwork = ReadNetwork(modelStr, weights);
|
||||
if (val) {
|
||||
val(cnnNetwork);
|
||||
}
|
||||
res = compile_model_impl(cnnNetwork, plugin, parsed._config, nullptr, cacheContent);
|
||||
}
|
||||
} else {
|
||||
auto cnnNetwork = ReadNetwork(modelStr, weights);
|
||||
if (val) {
|
||||
val(cnnNetwork);
|
||||
}
|
||||
res = compile_model_impl(cnnNetwork, plugin, parsed._config, nullptr, cacheContent);
|
||||
}
|
||||
return {res._ptr, res._so};
|
||||
}
|
||||
|
||||
ie::SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
@ -2003,6 +2052,20 @@ CompiledModel Core::compile_model(const std::string& modelPath, const std::strin
|
||||
});
|
||||
}
|
||||
|
||||
CompiledModel Core::compile_model(const std::string& model,
|
||||
const ov::Tensor& weights,
|
||||
const std::string& deviceName,
|
||||
const AnyMap& config) {
|
||||
InferenceEngine::Blob::Ptr blob;
|
||||
if (weights) {
|
||||
blob = weights._impl;
|
||||
}
|
||||
OV_CORE_CALL_STATEMENT({
|
||||
auto exec = _impl->LoadNetwork(model, blob, deviceName, any_copy(flatten_sub_properties(deviceName, config)));
|
||||
return {exec._ptr, exec._so};
|
||||
});
|
||||
}
|
||||
|
||||
CompiledModel Core::compile_model(const std::shared_ptr<const ov::Model>& model,
|
||||
const RemoteContext& context,
|
||||
const AnyMap& config) {
|
||||
|
@ -173,4 +173,10 @@ namespace {
|
||||
::testing::ValuesIn(TestTargets),
|
||||
::testing::ValuesIn(LoadFromFileConfigs)),
|
||||
CompileModelLoadFromFileTestBase::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Auto_CachingSupportCase_CPU,
|
||||
CompileModelLoadFromMemoryTestBase,
|
||||
::testing::Combine(::testing::ValuesIn(TestTargets),
|
||||
::testing::ValuesIn(LoadFromFileConfigs)),
|
||||
CompileModelLoadFromMemoryTestBase::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -83,6 +83,12 @@ namespace {
|
||||
::testing::ValuesIn(LoadFromFileConfigs)),
|
||||
CompileModelLoadFromFileTestBase::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Auto_CachingSupportCase_GPU,
|
||||
CompileModelLoadFromMemoryTestBase,
|
||||
::testing::Combine(::testing::ValuesIn(TestTargets),
|
||||
::testing::ValuesIn(LoadFromFileConfigs)),
|
||||
CompileModelLoadFromMemoryTestBase::getTestCaseName);
|
||||
|
||||
const std::vector<ov::AnyMap> GPULoadFromFileConfigs = {
|
||||
{ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
|
||||
{ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
|
||||
@ -94,4 +100,9 @@ namespace {
|
||||
::testing::ValuesIn(GPULoadFromFileConfigs)),
|
||||
CompileModelLoadFromFileTestBase::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_GPU,
|
||||
CompileModelLoadFromMemoryTestBase,
|
||||
::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::ValuesIn(GPULoadFromFileConfigs)),
|
||||
CompileModelLoadFromMemoryTestBase::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -84,6 +84,64 @@ TEST_P(OVExecutableNetworkBaseTest, canLoadCorrectNetworkToGetExecutable) {
|
||||
EXPECT_NO_THROW(auto execNet = core->compile_model(function, target_device, configuration));
|
||||
}
|
||||
|
||||
TEST_P(OVExecutableNetworkBaseTest, canLoadNetworkFromMemory) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="Network" version="10">
|
||||
<layers>
|
||||
<layer name="in1" type="Parameter" id="0" version="opset8">
|
||||
<data element_type="f16" shape="1,3,22,22"/>
|
||||
<output>
|
||||
<port id="0" precision="FP16" names="data">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>22</dim>
|
||||
<dim>22</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="round" id="1" type="Round" version="opset8">
|
||||
<data mode="half_to_even"/>
|
||||
<input>
|
||||
<port id="1" precision="FP16">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>22</dim>
|
||||
<dim>22</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP16" names="r">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>22</dim>
|
||||
<dim>22</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="output" type="Result" id="2" version="opset8">
|
||||
<input>
|
||||
<port id="0" precision="FP16">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>22</dim>
|
||||
<dim>22</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
|
||||
<edge from-layer="1" from-port="2" to-layer="2" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
|
||||
if (target_device.find("GNA") != std::string::npos) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
EXPECT_NO_THROW(auto execNet = core->compile_model(model, ov::Tensor(), target_device, configuration));
|
||||
}
|
||||
|
||||
TEST(OVExecutableNetworkBaseTest, smoke_LoadNetworkToDefaultDeviceNoThrow) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();
|
||||
|
@ -82,6 +82,27 @@ public:
|
||||
void run() override;
|
||||
};
|
||||
|
||||
using compileModelLoadFromMemoryParams = std::tuple<std::string, // device name
|
||||
ov::AnyMap // device configuration
|
||||
>;
|
||||
class CompileModelLoadFromMemoryTestBase : public testing::WithParamInterface<compileModelLoadFromMemoryParams>,
|
||||
virtual public SubgraphBaseTest,
|
||||
virtual public OVPluginTestBase {
|
||||
std::string m_cacheFolderName;
|
||||
std::string m_modelName;
|
||||
std::string m_weightsName;
|
||||
std::string m_model;
|
||||
ov::Tensor m_weights;
|
||||
std::vector<std::uint8_t> weights_vector;
|
||||
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<compileModelLoadFromMemoryParams> obj);
|
||||
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
void run() override;
|
||||
};
|
||||
|
||||
using compileKernelsCacheParams = std::tuple<
|
||||
std::string, // device name
|
||||
std::pair<ov::AnyMap, std::string> // device and cache configuration
|
||||
|
@ -321,6 +321,98 @@ TEST_P(CompileModelLoadFromFileTestBase, CanLoadFromFileWithoutExecption) {
|
||||
run();
|
||||
}
|
||||
|
||||
std::string CompileModelLoadFromMemoryTestBase::getTestCaseName(
|
||||
testing::TestParamInfo<compileModelLoadFromMemoryParams> obj) {
|
||||
auto param = obj.param;
|
||||
auto deviceName = std::get<0>(param);
|
||||
auto configuration = std::get<1>(param);
|
||||
std::ostringstream result;
|
||||
std::replace(deviceName.begin(), deviceName.end(), ':', '.');
|
||||
result << "device_name=" << deviceName << "_";
|
||||
for (auto& iter : configuration) {
|
||||
result << "_" << iter.first << "_" << iter.second.as<std::string>() << "_";
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void CompileModelLoadFromMemoryTestBase::SetUp() {
|
||||
ovModelWithName funcPair;
|
||||
std::tie(targetDevice, configuration) = GetParam();
|
||||
target_device = targetDevice;
|
||||
APIBaseTest::SetUp();
|
||||
std::stringstream ss;
|
||||
auto hash = std::hash<std::string>()(SubgraphBaseTest::GetTestName());
|
||||
ss << "testCache_" << std::to_string(hash) << "_" << std::this_thread::get_id() << "_" << GetTimestamp();
|
||||
m_modelName = ss.str() + ".xml";
|
||||
m_weightsName = ss.str() + ".bin";
|
||||
for (auto& iter : configuration) {
|
||||
ss << "_" << iter.first << "_" << iter.second.as<std::string>() << "_";
|
||||
}
|
||||
m_cacheFolderName = ss.str();
|
||||
core->set_property(ov::cache_dir());
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ov::pass::Serialize>(m_modelName, m_weightsName);
|
||||
manager.run_passes(ngraph::builder::subgraph::makeConvPoolRelu(
|
||||
{1, 3, 227, 227},
|
||||
InferenceEngine::details::convertPrecision(InferenceEngine::Precision::FP32)));
|
||||
|
||||
try {
|
||||
std::ifstream model_file(m_modelName, std::ios::binary);
|
||||
std::stringstream ss;
|
||||
ss << model_file.rdbuf();
|
||||
m_model = ss.str();
|
||||
} catch (const Exception& ex) {
|
||||
GTEST_FAIL() << "Can't read xml file from: " << m_modelName << "\nException [" << ex.what() << "]" << std::endl;
|
||||
}
|
||||
|
||||
try {
|
||||
std::ifstream weights_file(m_weightsName, std::ios::binary);
|
||||
weights_file.unsetf(std::ios::skipws);
|
||||
|
||||
weights_file.seekg(0, std::ios::end);
|
||||
const auto weights_size = static_cast<std::size_t>(weights_file.tellg());
|
||||
weights_file.seekg(0, std::ios::beg);
|
||||
|
||||
weights_vector.reserve(weights_size);
|
||||
weights_vector.insert(weights_vector.begin(),
|
||||
std::istream_iterator<std::uint8_t>(weights_file),
|
||||
std::istream_iterator<std::uint8_t>());
|
||||
m_weights = ov::Tensor(ov::element::u8, {1, 1, 1, weights_size}, weights_vector.data());
|
||||
} catch (const Exception& ex) {
|
||||
GTEST_FAIL() << "Can't read weights file from: " << m_weightsName << "\nException [" << ex.what() << "]"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void CompileModelLoadFromMemoryTestBase::TearDown() {
|
||||
CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "blob");
|
||||
CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "cl_cache");
|
||||
CommonTestUtils::removeIRFiles(m_modelName, m_weightsName);
|
||||
std::remove(m_cacheFolderName.c_str());
|
||||
core->set_property(ov::cache_dir());
|
||||
APIBaseTest::TearDown();
|
||||
weights_vector.clear();
|
||||
}
|
||||
|
||||
void CompileModelLoadFromMemoryTestBase::run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED();
|
||||
core->set_property(ov::cache_dir(m_cacheFolderName));
|
||||
try {
|
||||
compiledModel = core->compile_model(m_model, m_weights, targetDevice, configuration);
|
||||
inferRequest = compiledModel.create_infer_request();
|
||||
inferRequest.infer();
|
||||
} catch (const Exception& ex) {
|
||||
GTEST_FAIL() << "Can't loadNetwork with model path " << m_modelName << "\nException [" << ex.what() << "]"
|
||||
<< std::endl;
|
||||
} catch (...) {
|
||||
GTEST_FAIL() << "Can't compile network with model path " << m_modelName << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(CompileModelLoadFromMemoryTestBase, CanLoadFromMemoryWithoutExecption) {
|
||||
run();
|
||||
}
|
||||
|
||||
std::string CompiledKernelsCacheTest::getTestCaseName(testing::TestParamInfo<compileKernelsCacheParams> obj) {
|
||||
auto param = obj.param;
|
||||
std::string deviceName;
|
||||
|
@ -21,6 +21,13 @@ public:
|
||||
const std::string &,
|
||||
const std::map<std::string, std::string> &,
|
||||
const std::function<void(const InferenceEngine::CNNNetwork&)> &));
|
||||
MOCK_METHOD5(
|
||||
LoadNetwork,
|
||||
InferenceEngine::SoExecutableNetworkInternal(const std::string&,
|
||||
const InferenceEngine::Blob::CPtr&,
|
||||
const std::string&,
|
||||
const std::map<std::string, std::string>&,
|
||||
const std::function<void(const InferenceEngine::CNNNetwork&)>&));
|
||||
|
||||
MOCK_METHOD3(ImportNetwork, InferenceEngine::SoExecutableNetworkInternal(
|
||||
std::istream&, const std::string&, const std::map<std::string, std::string>&));
|
||||
|
Loading…
Reference in New Issue
Block a user