[CPU] Extract weight cache to executable network (#11118)

2022-04-04 10:47:58 +03:00
parent afdaa7cf89
commit 65a182aaea
5 changed files with 79 additions and 7 deletions
--- a/src/plugins/intel_cpu/src/exec_network.cpp
+++ b/src/plugins/intel_cpu/src/exec_network.cpp
@@ -67,13 +67,11 @@ struct ImmediateSerialExecutor : public ITaskExecutor {
 ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network,
                         const Config &cfg,
                         const ExtensionManager::Ptr& extMgr,
-                         NumaNodesWeights &numaNodesWeights,
                         const std::shared_ptr<InferenceEngine::IInferencePlugin>& plugin) :
    InferenceEngine::ExecutableNetworkThreadSafeDefault{nullptr, nullptr},
    extensionManager(extMgr),
    _cfg{cfg},
    _name{network.getName()},
-    _numaNodesWeights(numaNodesWeights),
    _network(network) {
    SetPointerToPlugin(plugin);
    auto function = network.getFunction();
--- a/src/plugins/intel_cpu/src/exec_network.h
+++ b/src/plugins/intel_cpu/src/exec_network.h
@@ -35,7 +35,7 @@ public:
    InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;

    ExecNetwork(const InferenceEngine::CNNNetwork &network, const Config &cfg,
-                const ExtensionManager::Ptr &extMgr, NumaNodesWeights &weightsSharing,
+                const ExtensionManager::Ptr &extMgr,
                const std::shared_ptr<InferenceEngine::IInferencePlugin>& plugin);

    void setProperty(const std::map<std::string, std::string> &properties);
@@ -67,7 +67,7 @@ protected:

    // WARNING: Do not use _graphs directly.
    mutable std::deque<GraphGuard>              _graphs;
-    NumaNodesWeights&                           _numaNodesWeights;
+    mutable NumaNodesWeights                           _numaNodesWeights;

    /* WARNING: Use GetGraph() function to get access to graph in current stream.
     * NOTE: Main thread is interpreted as master thread of external stream so use this function to get access to graphs
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -712,7 +712,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
        conf.batchLimit = static_cast<int>(network.getBatchSize());
    }

-    return std::make_shared<ExecNetwork>(clonedNetwork, conf, extensionManager, weightsSharing, shared_from_this());
+    return std::make_shared<ExecNetwork>(clonedNetwork, conf, extensionManager, shared_from_this());
 }

 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
@@ -1030,7 +1030,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
        conf.batchLimit = static_cast<int>(cnnnetwork.getBatchSize());
    }

-    auto execNetwork = std::make_shared<ExecNetwork>(cnnnetwork, conf, extensionManager, weightsSharing, shared_from_this());
+    auto execNetwork = std::make_shared<ExecNetwork>(cnnnetwork, conf, extensionManager, shared_from_this());

    execNetwork->setNetworkInputs(cnnnetwork.getInputsInfo());
    execNetwork->setNetworkOutputs(cnnnetwork.getOutputsInfo());
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@@ -51,7 +51,6 @@ private:
    void ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const;

    Config engConfig;
-    NumaNodesWeights weightsSharing;
    ExtensionManager::Ptr extensionManager = std::make_shared<ExtensionManager>();
    /* Explicily configured streams have higher priority even than performance hints.
       So track if streams is set explicitly (not auto-configured) */
--- a/src/tests/functional/plugin/cpu/subgraph_tests/src/memory_sharing_test.cpp
+++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/memory_sharing_test.cpp
@@ -0,0 +1,75 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/openvino.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "test_utils/convolution_params.hpp"
+
+using namespace CPUTestUtils;
+
+namespace SubgraphTestsDefinitions {
+
+class EdgeWithSameNameInTwoModels : public ::testing::Test, public CPUTestsBase {};
+
+TEST_F(EdgeWithSameNameInTwoModels, smoke_CompareWithRef) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    const std::string targetDevice = CommonTestUtils::DEVICE_CPU;
+    const ov::element::Type type(ov::element::Type_t::f32);
+    const std::string convName("conv_name"), weightName("weight_name");
+    const std::vector<size_t> kernel{3, 3};
+    const std::vector<size_t> strides{1, 1};
+    const std::vector<ptrdiff_t> padsBegin{0, 0};
+    const std::vector<ptrdiff_t> padsEnd{0, 0};
+    const std::vector<size_t> dilations{1, 1};
+    const ngraph::op::PadType autoPad(ngraph::op::PadType::EXPLICIT);
+
+    if (InferenceEngine::with_cpu_x86_avx512f()) {
+        std::tie(inFmts, outFmts, priority, selectedType) = conv_avx512_2D;
+    } else if (InferenceEngine::with_cpu_x86_avx2()) {
+        std::tie(inFmts, outFmts, priority, selectedType) = conv_avx2_2D;
+    } else if (InferenceEngine::with_cpu_x86_sse42()) {
+        std::tie(inFmts, outFmts, priority, selectedType) = conv_sse42_2D;
+    }
+
+    // first model
+    const std::vector<std::vector<size_t>> shapes1{{1, 16, 720, 1280}};
+    auto params1 = ngraph::builder::makeParams(type, shapes1);
+    const size_t convOutCh1 = 32;
+    auto conv1 = ngraph::builder::makeConvolution(params1.front(), type, kernel, strides, padsBegin, padsEnd, dilations, autoPad, convOutCh1);
+    conv1->set_friendly_name(convName);
+    conv1->get_input_node_shared_ptr(1)->set_friendly_name(weightName);
+    auto model1 = makeNgraphFunction(type, params1, conv1, "Model1");
+
+    // second model
+    const std::vector<std::vector<size_t>> shapes2{{1, 32, 24, 24}};
+    auto params2 = ngraph::builder::makeParams(type, shapes2);
+    const size_t convOutCh2 = 16;
+    auto conv2 = ngraph::builder::makeConvolution(params2.front(), type, kernel, strides, padsBegin, padsEnd, dilations, autoPad, convOutCh2);
+    conv2->set_friendly_name(convName);
+    conv2->get_input_node_shared_ptr(1)->set_friendly_name(weightName);
+    auto model2 = makeNgraphFunction(type, params2, conv2, "Model2");
+
+    // model compilation
+    std::map<std::string, ov::AnyMap> config;
+    auto& device_config = config[targetDevice];
+    device_config[targetDevice + "_THROUGHPUT_STREAMS"] = 4;
+
+    ov::Core core;
+    for (auto&& item : config) {
+        core.set_property(item.first, item.second);
+    }
+
+    auto compiledModel1 = core.compile_model(model1, targetDevice);
+    auto compiledModel2 = core.compile_model(model2, targetDevice);
+
+    auto inferReq1 = compiledModel1.create_infer_request();
+    auto inferReq2 = compiledModel2.create_infer_request();
+
+    inferReq1.infer();
+    inferReq2.infer();
+}
+
+} // namespace SubgraphTestsDefinitions