[GPU] Revised unique ID setting scheme. (#10548)

* Revised unique ID setting scheme. Previously it was using program id to distinguish the loop body networks' id.
However, it results in cl cache miss for same network loaded multiple time, because program ids are differnt.
Now revised it to use parent primitive id instead of program_id for unique id of nodes in body networks.

* Revised adding unique_id to entry points to have a temporal number as unique id

* Revert the canceld change

* Added test to check whether two networks loaded from same function creates same cl cache
This commit is contained in:
Taylor Yeonbok Lee 2022-02-22 15:34:46 +09:00 committed by GitHub
parent 1891967ad3
commit 746b77c74a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 53 additions and 9 deletions

View File

@ -20,9 +20,8 @@ using namespace cldnn;
void compile_graph::run(program& p) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::CompileGraph");
size_t order_idx = 0;
for (auto& node : p.get_processing_order()) {
node->set_unique_id(std::to_string(order_idx++));
node->set_unique_id();
if (!node->is_type<data>()) {
node->get_output_layout();
}

View File

@ -62,7 +62,7 @@ void post_input_reorder::run(program& p) {
input_layout.size,
input_layout.data_padding);
auto& reorder = add_reorder(p, input, node, current_layout);
reorder.set_unique_id(node->get_unique_id() + "_input_reorder");
reorder.set_unique_id();
reorder.get_output_layout(false);
node->set_output_layout(previous_layout, false);
reorder.set_selected_impl(reorder.type()->choose_impl(reorder));

View File

@ -35,7 +35,7 @@ void remove_redundant_reorders::run(program& p) {
if (!update_implementations)
return;
node.set_unique_id(node.get_unique_id() + "_reorder");
node.set_unique_id();
auto new_impl = node.type()->choose_impl(node);
node.set_selected_impl(std::move(new_impl));
};

View File

@ -7,6 +7,7 @@
#include "intel_gpu/primitives/primitive.hpp"
#include "intel_gpu/primitives/activation.hpp"
#include "intel_gpu/primitives/implementation_desc.hpp"
#include "intel_gpu/graph/program.hpp"
#include "kernel_selector_helper.h"
#include "meta_utils.h"
@ -17,6 +18,7 @@
#include <memory>
#include <list>
#include <algorithm>
#include <thread>
namespace cldnn {
@ -350,11 +352,19 @@ public:
bool need_lockable_memory() const;
std::string get_unique_id() const { return unique_id; }
void set_unique_id(std::string id) { unique_id = id; }
size_t get_unique_id() const { return unique_id; }
void set_unique_id() {
unique_id = cur_id++;
}
static void reset_unique_id() {
cur_id = 0;
}
protected:
std::string unique_id;
size_t unique_id = 0;
static thread_local size_t cur_id;
std::shared_ptr<primitive> desc;
program& myprog;

View File

@ -839,7 +839,7 @@ void set_params(const program_node& node, kernel_selector::params& params) {
const auto& program = node.get_program();
const auto& device_info = program.get_engine().get_device_info();
params.uniqueID = std::to_string(program.get_id()) + "_" + node.get_unique_id();
params.uniqueID = std::to_string(node.get_unique_id());
params.engineInfo.bSubGroupSupport = device_info.supports_subgroups;
params.engineInfo.bSubGroupShortSupport = device_info.supports_subgroups_short;
params.engineInfo.bSubGroupCharSupport = device_info.supports_subgroups_char;

View File

@ -107,6 +107,7 @@ program::program(engine& engine_ref,
prepare_nodes(topology);
_kernels_cache = std::unique_ptr<kernels_cache>(new kernels_cache(_engine, prog_id,
kernel_selector::KernelBase::get_db().get_batch_header_str()));
program_node::reset_unique_id();
if (no_optimizations) {
init_graph();
} else {

View File

@ -3,7 +3,6 @@
//
#include "program_node.h"
#include "intel_gpu/graph/program.hpp"
#include "program_helpers.h"
#include "primitive_inst.h"
@ -24,6 +23,8 @@
using namespace cldnn;
thread_local size_t program_node::cur_id = 0;
program_node::program_node(std::shared_ptr<primitive> prim, program& prog)
: desc(prim), myprog(prog), org_id(prim ? (prim->id) : 0) {
if (prim)

View File

@ -49,6 +49,39 @@ TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinaries) {
}
}
TEST_F(CompiledKernelsCacheTest, TwoNetworksWithSameModelCreatesSameCache) {
std::shared_ptr<InferenceEngine::Core> ie = PluginCache::get().ie();
// Create two CNNNetwork from same ngraph::Function
InferenceEngine::CNNNetwork cnnNet1(function);
InferenceEngine::CNNNetwork cnnNet2(function);
std::map<std::string, std::string> config = {{ CONFIG_KEY(CACHE_DIR), cache_path }};
try {
// Load 1st CNNNetwork
auto execNet1 = ie->LoadNetwork(cnnNet1, "GPU", config);
auto n_cache_files = CommonTestUtils::listFilesWithExt(cache_path, "cl_cache").size();
// Check that directory with cached kernels exists after loading network
ASSERT_TRUE(CommonTestUtils::directoryExists(cache_path)) << "Directory with cached kernels doesn't exist";
// Load 2nd CNNNetwork
auto execNet2 = ie->LoadNetwork(cnnNet2, "GPU", config);
// Check that two loaded networks with same function creates same caches
ASSERT_EQ(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), n_cache_files);
// Remove directory and check that it doesn't exist anymore
ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0);
ASSERT_FALSE(CommonTestUtils::directoryExists(cache_path));
} catch (std::exception& ex) {
// Cleanup in case of any exception
if (CommonTestUtils::directoryExists(cache_path)) {
ASSERT_GE(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), 0);
ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0);
}
FAIL() << ex.what() << std::endl;
}
}
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinariesUnicodePath) {