[CPU] Extract weight cache to executable network (#11118)

This commit is contained in:
Maxim Andronov
2022-04-04 10:47:58 +03:00
committed by GitHub
parent afdaa7cf89
commit 65a182aaea
5 changed files with 79 additions and 7 deletions

View File

@@ -67,13 +67,11 @@ struct ImmediateSerialExecutor : public ITaskExecutor {
ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network,
const Config &cfg,
const ExtensionManager::Ptr& extMgr,
NumaNodesWeights &numaNodesWeights,
const std::shared_ptr<InferenceEngine::IInferencePlugin>& plugin) :
InferenceEngine::ExecutableNetworkThreadSafeDefault{nullptr, nullptr},
extensionManager(extMgr),
_cfg{cfg},
_name{network.getName()},
_numaNodesWeights(numaNodesWeights),
_network(network) {
SetPointerToPlugin(plugin);
auto function = network.getFunction();

View File

@@ -35,7 +35,7 @@ public:
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
ExecNetwork(const InferenceEngine::CNNNetwork &network, const Config &cfg,
const ExtensionManager::Ptr &extMgr, NumaNodesWeights &weightsSharing,
const ExtensionManager::Ptr &extMgr,
const std::shared_ptr<InferenceEngine::IInferencePlugin>& plugin);
void setProperty(const std::map<std::string, std::string> &properties);
@@ -67,7 +67,7 @@ protected:
// WARNING: Do not use _graphs directly.
mutable std::deque<GraphGuard> _graphs;
NumaNodesWeights& _numaNodesWeights;
mutable NumaNodesWeights _numaNodesWeights;
/* WARNING: Use GetGraph() function to get access to graph in current stream.
* NOTE: Main thread is interpreted as master thread of external stream so use this function to get access to graphs

View File

@@ -712,7 +712,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
conf.batchLimit = static_cast<int>(network.getBatchSize());
}
return std::make_shared<ExecNetwork>(clonedNetwork, conf, extensionManager, weightsSharing, shared_from_this());
return std::make_shared<ExecNetwork>(clonedNetwork, conf, extensionManager, shared_from_this());
}
void Engine::SetConfig(const std::map<std::string, std::string> &config) {
@@ -1030,7 +1030,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
conf.batchLimit = static_cast<int>(cnnnetwork.getBatchSize());
}
auto execNetwork = std::make_shared<ExecNetwork>(cnnnetwork, conf, extensionManager, weightsSharing, shared_from_this());
auto execNetwork = std::make_shared<ExecNetwork>(cnnnetwork, conf, extensionManager, shared_from_this());
execNetwork->setNetworkInputs(cnnnetwork.getInputsInfo());
execNetwork->setNetworkOutputs(cnnnetwork.getOutputsInfo());

View File

@@ -51,7 +51,6 @@ private:
void ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const;
Config engConfig;
NumaNodesWeights weightsSharing;
ExtensionManager::Ptr extensionManager = std::make_shared<ExtensionManager>();
/* Explicily configured streams have higher priority even than performance hints.
So track if streams is set explicitly (not auto-configured) */

View File

@@ -0,0 +1,75 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/openvino.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "test_utils/convolution_params.hpp"
using namespace CPUTestUtils;
namespace SubgraphTestsDefinitions {
class EdgeWithSameNameInTwoModels : public ::testing::Test, public CPUTestsBase {};
TEST_F(EdgeWithSameNameInTwoModels, smoke_CompareWithRef) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
const std::string targetDevice = CommonTestUtils::DEVICE_CPU;
const ov::element::Type type(ov::element::Type_t::f32);
const std::string convName("conv_name"), weightName("weight_name");
const std::vector<size_t> kernel{3, 3};
const std::vector<size_t> strides{1, 1};
const std::vector<ptrdiff_t> padsBegin{0, 0};
const std::vector<ptrdiff_t> padsEnd{0, 0};
const std::vector<size_t> dilations{1, 1};
const ngraph::op::PadType autoPad(ngraph::op::PadType::EXPLICIT);
if (InferenceEngine::with_cpu_x86_avx512f()) {
std::tie(inFmts, outFmts, priority, selectedType) = conv_avx512_2D;
} else if (InferenceEngine::with_cpu_x86_avx2()) {
std::tie(inFmts, outFmts, priority, selectedType) = conv_avx2_2D;
} else if (InferenceEngine::with_cpu_x86_sse42()) {
std::tie(inFmts, outFmts, priority, selectedType) = conv_sse42_2D;
}
// first model
const std::vector<std::vector<size_t>> shapes1{{1, 16, 720, 1280}};
auto params1 = ngraph::builder::makeParams(type, shapes1);
const size_t convOutCh1 = 32;
auto conv1 = ngraph::builder::makeConvolution(params1.front(), type, kernel, strides, padsBegin, padsEnd, dilations, autoPad, convOutCh1);
conv1->set_friendly_name(convName);
conv1->get_input_node_shared_ptr(1)->set_friendly_name(weightName);
auto model1 = makeNgraphFunction(type, params1, conv1, "Model1");
// second model
const std::vector<std::vector<size_t>> shapes2{{1, 32, 24, 24}};
auto params2 = ngraph::builder::makeParams(type, shapes2);
const size_t convOutCh2 = 16;
auto conv2 = ngraph::builder::makeConvolution(params2.front(), type, kernel, strides, padsBegin, padsEnd, dilations, autoPad, convOutCh2);
conv2->set_friendly_name(convName);
conv2->get_input_node_shared_ptr(1)->set_friendly_name(weightName);
auto model2 = makeNgraphFunction(type, params2, conv2, "Model2");
// model compilation
std::map<std::string, ov::AnyMap> config;
auto& device_config = config[targetDevice];
device_config[targetDevice + "_THROUGHPUT_STREAMS"] = 4;
ov::Core core;
for (auto&& item : config) {
core.set_property(item.first, item.second);
}
auto compiledModel1 = core.compile_model(model1, targetDevice);
auto compiledModel2 = core.compile_model(model2, targetDevice);
auto inferReq1 = compiledModel1.create_infer_request();
auto inferReq2 = compiledModel2.create_infer_request();
inferReq1.infer();
inferReq2.infer();
}
} // namespace SubgraphTestsDefinitions