[GPU] Fix missed weights params update (#16815)

This commit is contained in:
Sergey Shlyapnikov 2023-04-10 10:28:06 +04:00 committed by GitHub
parent c7fe5ca73b
commit 48dee7c30a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 75 additions and 2 deletions

View File

@ -731,11 +731,12 @@ event::ptr primitive_inst::update_weights() {
if (weights_params.engine == kernel_selector::GenericKernelParams::Engine::NONE) {
// If kernel doesn't says that it doesn't require weights reorder, but weights were reordered previously, then
// incorrect memory buffer may be assigned, so reset cached weights for such case
_reordered_weights_cache.add(original_weights_memory->get_layout(), original_weights_memory);
_reordered_weights_cache.add(original_layout, original_weights_memory);
_impl_params->weights_layout = optional_layout(original_layout);
} else {
auto expected_layout = from_weights_tensor(weights_params.dest);
// Set original patrial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion
expected_layout.set_partial_shape(original_weights_memory->get_layout().get_partial_shape());
expected_layout.set_partial_shape(original_layout.get_partial_shape());
_impl_params->weights_layout = optional_layout(expected_layout);
if (_reordered_weights_cache.has(expected_layout)) {

View File

@ -0,0 +1,69 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/fully_connected.hpp>
#include <intel_gpu/primitives/data.hpp>
#include "compilation_context.hpp"
#include "program_wrapper.h"
using namespace cldnn;
using namespace ::tests;
TEST(kernel_impl_params_relevance, weights_layout) {
auto& engine = get_test_engine();
const int32_t in_b = 1;
const int32_t in_f = 4;
const int32_t wei_o = 3;
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), in_f }, data_types::f32, format::bfyx };
auto actual_input_data = engine.allocate_memory(layout{ ov::PartialShape{ in_b, in_f }, data_types::f32, format::bfyx });
auto weights_data = engine.allocate_memory({ ov::PartialShape{ wei_o, in_f }, data_types::f32, format::bfyx });
cldnn::topology topology{
input_layout("input", input_dyn_layout),
data("weights", weights_data),
fully_connected("fc", input_info("input"), "weights")
};
auto fc_opt_impl = ov::intel_gpu::ImplementationDesc(format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl);
ExecutionConfig cfg{ ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc", fc_opt_impl} }),
ov::intel_gpu::optimize_data(true),
ov::intel_gpu::allow_new_shape_infer(true) };
// 1. Compile network with forced `fully_connected_gpu_bf_tiled` kernel => optimized shape-agnostic
// kernel will be used
network network(engine, topology, cfg);
network.set_input_data("input", actual_input_data);
// 2. Force reference `fully_connected_gpu_bfyx_ref` kernel impl before execution,
// so during _node->type()->choose_impl(*_node, updated_params); call for static kernel vesrion reference
// impl will be used. Call execute() to trigger desired kernel compilation
auto fc_ref_impl = ov::intel_gpu::ImplementationDesc(format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl);
auto force_impl_prop = ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc", fc_ref_impl} });
program_wrapper::update_configs_properties(*network.get_program(), {force_impl_prop});
network.execute();
// 3. WA: Call cancel() to wait for all queued kernels compilation finish (including above `fully_connected_gpu_bfyx_ref`)
network.get_program()->get_compilation_context().cancel();
// 4. Call execute() second time with same input shape to use pre-compiled `fully_connected_gpu_bfyx_ref` kernel
network.execute();
// 5. Get FC instance
auto inst = network.get_primitive("fc");
auto fc_inst = std::dynamic_pointer_cast<fully_connected_inst>(inst);
ASSERT_TRUE(fc_inst != nullptr);
// 6. Requset instance's weights memory, compare it with original weights buffer and check
// if original layout is used (required for `fully_connected_gpu_bfyx_ref` kernel)
auto used_weights_memory = fc_inst->weights_memory()->get_layout();
ASSERT_EQ(weights_data->get_layout(), used_weights_memory);
}

View File

@ -37,6 +37,9 @@ namespace cldnn
{
p.prepare_memory_dependencies();
}
static void update_configs_properties(program& p, const ov::AnyMap& properties) {
p._config.set_property(properties);
}
};
}