[GPU] Revised unique ID setting scheme. (#10548)

* Revised unique ID setting scheme. Previously it was using program id to distinguish the loop body networks' id. However, it results in cl cache miss for same network loaded multiple time, because program ids are differnt. Now revised it to use parent primitive id instead of program_id for unique id of nodes in body networks. * Revised adding unique_id to entry points to have a temporal number as unique id * Revert the canceld change * Added test to check whether two networks loaded from same function creates same cl cache
2022-02-22 15:34:46 +09:00 · 2022-02-22 15:34:46 +09:00 · 746b77c74a
commit 746b77c74a
parent 1891967ad3
8 changed files with 53 additions and 9 deletions
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@ -20,9 +20,8 @@ using namespace cldnn;

 void compile_graph::run(program& p) {
    OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::CompileGraph");
-    size_t order_idx = 0;
    for (auto& node : p.get_processing_order()) {
-        node->set_unique_id(std::to_string(order_idx++));
+        node->set_unique_id();
        if (!node->is_type<data>()) {
            node->get_output_layout();
        }
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp
@ -62,7 +62,7 @@ void post_input_reorder::run(program& p) {
                                      input_layout.size,
                                      input_layout.data_padding);
                auto& reorder = add_reorder(p, input, node, current_layout);
-                reorder.set_unique_id(node->get_unique_id() + "_input_reorder");
+                reorder.set_unique_id();
                reorder.get_output_layout(false);
                node->set_output_layout(previous_layout, false);
                reorder.set_selected_impl(reorder.type()->choose_impl(reorder));
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@ -35,7 +35,7 @@ void remove_redundant_reorders::run(program& p) {
        if (!update_implementations)
            return;

-        node.set_unique_id(node.get_unique_id() + "_reorder");
+        node.set_unique_id();
        auto new_impl = node.type()->choose_impl(node);
        node.set_selected_impl(std::move(new_impl));
    };
--- a/src/plugins/intel_gpu/src/graph/include/program_node.h
+++ b/src/plugins/intel_gpu/src/graph/include/program_node.h
@ -7,6 +7,7 @@
 #include "intel_gpu/primitives/primitive.hpp"
 #include "intel_gpu/primitives/activation.hpp"
 #include "intel_gpu/primitives/implementation_desc.hpp"
+#include "intel_gpu/graph/program.hpp"

 #include "kernel_selector_helper.h"
 #include "meta_utils.h"
@ -17,6 +18,7 @@
 #include <memory>
 #include <list>
 #include <algorithm>
+#include <thread>

 namespace cldnn {

@ -350,11 +352,19 @@ public:

    bool need_lockable_memory() const;

-    std::string get_unique_id() const { return unique_id; }
-    void set_unique_id(std::string id) { unique_id = id; }
+    size_t get_unique_id() const { return unique_id; }
+
+    void set_unique_id() {
+        unique_id = cur_id++;
+    }
+
+    static void reset_unique_id() {
+        cur_id = 0;
+    }

 protected:
-    std::string unique_id;
+    size_t unique_id = 0;
+    static thread_local size_t cur_id;

    std::shared_ptr<primitive> desc;
    program& myprog;
--- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp
+++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp
@ -839,7 +839,7 @@ void set_params(const program_node& node, kernel_selector::params& params) {
    const auto& program = node.get_program();
    const auto& device_info = program.get_engine().get_device_info();

-    params.uniqueID = std::to_string(program.get_id()) + "_"  + node.get_unique_id();
+    params.uniqueID = std::to_string(node.get_unique_id());
    params.engineInfo.bSubGroupSupport = device_info.supports_subgroups;
    params.engineInfo.bSubGroupShortSupport = device_info.supports_subgroups_short;
    params.engineInfo.bSubGroupCharSupport = device_info.supports_subgroups_char;
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@ -107,6 +107,7 @@ program::program(engine& engine_ref,
    prepare_nodes(topology);
    _kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, prog_id,
                                                                      kernel_selector::KernelBase::get_db().get_batch_header_str()));
+    program_node::reset_unique_id();
    if (no_optimizations) {
        init_graph();
    } else {
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@ -3,7 +3,6 @@
 //

 #include "program_node.h"
-#include "intel_gpu/graph/program.hpp"
 #include "program_helpers.h"
 #include "primitive_inst.h"

@ -24,6 +23,8 @@

 using namespace cldnn;

+thread_local size_t program_node::cur_id = 0;
+
 program_node::program_node(std::shared_ptr<primitive> prim, program& prog)
    : desc(prim), myprog(prog), org_id(prim ? (prim->id) : 0) {
    if (prim)
--- a/src/tests/functional/plugin/gpu/behavior/cache.cpp
+++ b/src/tests/functional/plugin/gpu/behavior/cache.cpp
@ -49,6 +49,39 @@ TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinaries) {
    }
 }

+TEST_F(CompiledKernelsCacheTest, TwoNetworksWithSameModelCreatesSameCache) {
+    std::shared_ptr<InferenceEngine::Core> ie = PluginCache::get().ie();
+    // Create two CNNNetwork from same ngraph::Function
+    InferenceEngine::CNNNetwork cnnNet1(function);
+    InferenceEngine::CNNNetwork cnnNet2(function);
+    std::map<std::string, std::string> config = {{ CONFIG_KEY(CACHE_DIR), cache_path }};
+    try {
+        // Load 1st CNNNetwork
+        auto execNet1 = ie->LoadNetwork(cnnNet1, "GPU", config);
+        auto n_cache_files = CommonTestUtils::listFilesWithExt(cache_path, "cl_cache").size();
+
+        // Check that directory with cached kernels exists after loading network
+        ASSERT_TRUE(CommonTestUtils::directoryExists(cache_path)) << "Directory with cached kernels doesn't exist";
+        // Load 2nd CNNNetwork
+        auto execNet2 = ie->LoadNetwork(cnnNet2, "GPU", config);
+
+        // Check that two loaded networks with same function creates same caches
+        ASSERT_EQ(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), n_cache_files);
+
+        // Remove directory and check that it doesn't exist anymore
+        ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0);
+        ASSERT_FALSE(CommonTestUtils::directoryExists(cache_path));
+    } catch (std::exception& ex) {
+        // Cleanup in case of any exception
+        if (CommonTestUtils::directoryExists(cache_path)) {
+            ASSERT_GE(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), 0);
+            ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0);
+        }
+        FAIL() << ex.what() << std::endl;
+    }
+}
+
+
 #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT

 TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinariesUnicodePath) {