[GPU] Fix missed weights params update (#16815)

2023-04-10 10:28:06 +04:00 · 2023-04-10 10:28:06 +04:00 · 48dee7c30a
commit 48dee7c30a
parent c7fe5ca73b
3 changed files with 75 additions and 2 deletions
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@ -731,11 +731,12 @@ event::ptr primitive_inst::update_weights() {
    if (weights_params.engine == kernel_selector::GenericKernelParams::Engine::NONE) {
        // If kernel doesn't says that it doesn't require weights reorder, but weights were reordered previously, then
        // incorrect memory buffer may be assigned, so reset cached weights for such case
-        _reordered_weights_cache.add(original_weights_memory->get_layout(), original_weights_memory);
+        _reordered_weights_cache.add(original_layout, original_weights_memory);
+        _impl_params->weights_layout = optional_layout(original_layout);
    } else {
        auto expected_layout = from_weights_tensor(weights_params.dest);
        // Set original patrial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion
-        expected_layout.set_partial_shape(original_weights_memory->get_layout().get_partial_shape());
+        expected_layout.set_partial_shape(original_layout.get_partial_shape());
        _impl_params->weights_layout = optional_layout(expected_layout);

        if (_reordered_weights_cache.has(expected_layout)) {
--- a/src/plugins/intel_gpu/tests/module_tests/kernel_impl_params_relevance_test.cpp
+++ b/src/plugins/intel_gpu/tests/module_tests/kernel_impl_params_relevance_test.cpp
@ -0,0 +1,69 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/fully_connected.hpp>
+#include <intel_gpu/primitives/data.hpp>
+
+#include "compilation_context.hpp"
+
+#include "program_wrapper.h"
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(kernel_impl_params_relevance, weights_layout) {
+    auto& engine = get_test_engine();
+
+    const int32_t in_b = 1;
+    const int32_t in_f = 4;
+    const int32_t wei_o = 3;
+
+    auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), in_f }, data_types::f32, format::bfyx };
+    auto actual_input_data = engine.allocate_memory(layout{ ov::PartialShape{ in_b, in_f }, data_types::f32, format::bfyx });
+    auto weights_data = engine.allocate_memory({ ov::PartialShape{ wei_o, in_f }, data_types::f32, format::bfyx });
+
+    cldnn::topology topology{
+        input_layout("input", input_dyn_layout),
+        data("weights", weights_data),
+        fully_connected("fc", input_info("input"), "weights")
+    };
+
+    auto fc_opt_impl = ov::intel_gpu::ImplementationDesc(format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl);
+    ExecutionConfig cfg{ ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc", fc_opt_impl} }),
+                         ov::intel_gpu::optimize_data(true),
+                         ov::intel_gpu::allow_new_shape_infer(true) };
+
+    // 1. Compile network with forced `fully_connected_gpu_bf_tiled` kernel => optimized shape-agnostic
+    //    kernel will be used
+    network network(engine, topology, cfg);
+    network.set_input_data("input", actual_input_data);
+
+    // 2. Force reference `fully_connected_gpu_bfyx_ref` kernel impl before execution,
+    //    so during _node->type()->choose_impl(*_node, updated_params); call for static kernel vesrion reference
+    //    impl will be used. Call execute() to trigger desired kernel compilation
+    auto fc_ref_impl = ov::intel_gpu::ImplementationDesc(format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl);
+    auto force_impl_prop = ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc", fc_ref_impl} });
+    program_wrapper::update_configs_properties(*network.get_program(), {force_impl_prop});
+
+    network.execute();
+
+    // 3. WA: Call cancel() to wait for all queued kernels compilation finish (including above `fully_connected_gpu_bfyx_ref`)
+    network.get_program()->get_compilation_context().cancel();
+
+    // 4. Call execute() second time with same input shape to use pre-compiled `fully_connected_gpu_bfyx_ref` kernel
+    network.execute();
+
+    // 5. Get FC instance
+    auto inst = network.get_primitive("fc");
+    auto fc_inst = std::dynamic_pointer_cast<fully_connected_inst>(inst);
+    ASSERT_TRUE(fc_inst != nullptr);
+
+    // 6. Requset instance's weights memory, compare it with original weights buffer and check
+    //    if original layout is used (required for `fully_connected_gpu_bfyx_ref` kernel)
+    auto used_weights_memory = fc_inst->weights_memory()->get_layout();
+    ASSERT_EQ(weights_data->get_layout(), used_weights_memory);
+}
--- a/src/plugins/intel_gpu/tests/test_utils/program_wrapper.h
+++ b/src/plugins/intel_gpu/tests/test_utils/program_wrapper.h
@ -37,6 +37,9 @@ namespace cldnn
        {
            p.prepare_memory_dependencies();
        }
+        static void update_configs_properties(program& p, const ov::AnyMap& properties) {
+            p._config.set_property(properties);
+        }
    };

 }