[GPU] Fix accuracy issue (#19351)
- [scatter_update] Use input index for input buffer instead of output index - [concat cpu impl] Sync input layout and mem_ptr when input host tensor creation - Add unit tests for scatter_update and concat cpu impl
This commit is contained in:
parent
e11e8ede1b
commit
99cc3624b7
@ -69,17 +69,18 @@ struct concatenation_impl : public typed_primitive_impl<concatenation> {
|
|||||||
std::vector<memory::ptr> input_mem_ptrs;
|
std::vector<memory::ptr> input_mem_ptrs;
|
||||||
for (size_t i = 0; i < instance.dependencies().size(); i++) {
|
for (size_t i = 0; i < instance.dependencies().size(); i++) {
|
||||||
auto& dep = instance.dependencies().at(i);
|
auto& dep = instance.dependencies().at(i);
|
||||||
if (dep.first->get_output_layout().count() > 0)
|
if (dep.first->get_output_layout().count() > 0) {
|
||||||
input_mem_ptrs.push_back(instance.dep_memory_ptr(i));
|
auto mem_ptr = instance.dep_memory_ptr(i);
|
||||||
|
input_host_tensors.push_back(make_tensor(params->input_layouts[i], mem_ptr->lock(stream, mem_lock_type::read)));
|
||||||
|
// push mem_ptr to input_mem_ptr to unlock after processing
|
||||||
|
input_mem_ptrs.push_back(mem_ptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto output_mem_ptr = instance.output_memory_ptr();
|
auto output_mem_ptr = instance.output_memory_ptr();
|
||||||
|
|
||||||
cldnn::mem_lock<uint8_t, mem_lock_type::read> output_lock(output_mem_ptr, stream);
|
cldnn::mem_lock<uint8_t, mem_lock_type::read> output_lock(output_mem_ptr, stream);
|
||||||
|
|
||||||
for (size_t i = 0; i < input_mem_ptrs.size(); i++)
|
|
||||||
input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read)));
|
|
||||||
|
|
||||||
output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data()));
|
output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data()));
|
||||||
|
|
||||||
if (!op) {
|
if (!op) {
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#define AXIS_X (OUTPUT_DIMS - 1)
|
#define AXIS_X (OUTPUT_DIMS - 1)
|
||||||
|
|
||||||
#define GET_OUTPUT_INDEX(idx_order) OUTPUT_GET_INDEX(idx_order)
|
#define GET_OUTPUT_INDEX(idx_order) OUTPUT_GET_INDEX(idx_order)
|
||||||
|
#define GET_INPUT_INDEX(idx_order) INPUT0_GET_INDEX(idx_order)
|
||||||
|
|
||||||
#if OUTPUT_DIMS == 4
|
#if OUTPUT_DIMS == 4
|
||||||
#define ORDER b,f,y,x
|
#define ORDER b,f,y,x
|
||||||
@ -121,8 +122,10 @@ KERNEL(scatter_update_ref)(OPTIONAL_SHAPE_INFO_ARG
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
const uint output_idx = GET_OUTPUT_INDEX(ORDER);
|
const uint output_idx = GET_OUTPUT_INDEX(ORDER);
|
||||||
|
const uint dict_idx = GET_INPUT_INDEX(ORDER);
|
||||||
|
|
||||||
INPUT0_TYPE val = dictionary[output_idx];
|
// Use input index instead of output index because output padding is not empty.
|
||||||
|
INPUT0_TYPE val = dictionary[dict_idx];
|
||||||
#if HAS_FUSED_OPS
|
#if HAS_FUSED_OPS
|
||||||
FUSED_OPS_FIRST_KERNEL;
|
FUSED_OPS_FIRST_KERNEL;
|
||||||
output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT_FIRST_KERNEL);
|
output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT_FIRST_KERNEL);
|
||||||
@ -233,6 +236,7 @@ KERNEL(scatter_update_ref)(OPTIONAL_SHAPE_INFO_ARG
|
|||||||
}
|
}
|
||||||
|
|
||||||
#undef GET_OUTPUT_INDEX
|
#undef GET_OUTPUT_INDEX
|
||||||
|
#undef GET_INPUT_INDEX
|
||||||
#undef ORDER
|
#undef ORDER
|
||||||
#undef AXIS_B
|
#undef AXIS_B
|
||||||
#undef AXIS_F
|
#undef AXIS_F
|
||||||
|
@ -192,6 +192,13 @@ void start_concat_test_dynamic(impl_types impl_type) {
|
|||||||
{{1, 5, 3, 4}, data_types::f32, format::bfyx},
|
{{1, 5, 3, 4}, data_types::f32, format::bfyx},
|
||||||
{{1, 3, 3, 4}, data_types::f32, format::bfyx},
|
{{1, 3, 3, 4}, data_types::f32, format::bfyx},
|
||||||
{{1, 2, 3, 4}, data_types::f32, format::bfyx});
|
{{1, 2, 3, 4}, data_types::f32, format::bfyx});
|
||||||
|
|
||||||
|
if (impl_type == impl_types::cpu) {
|
||||||
|
run_on_shapes({{1, 2, 3, 4}, data_types::f32, format::bfyx},
|
||||||
|
{{1, 0, 3, 4}, data_types::f32, format::bfyx},
|
||||||
|
{{1, 3, 3, 4}, data_types::f32, format::bfyx},
|
||||||
|
{{1, 8, 3, 4}, data_types::f32, format::bfyx});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(concat_gpu, dynamic_4d_f) {
|
TEST(concat_gpu, dynamic_4d_f) {
|
||||||
|
@ -1774,3 +1774,101 @@ TEST(scatter_update_gpu_fp16, d21214_bfzyx_axisX_bfwzyx_cached) {
|
|||||||
TEST(scatter_update_gpu_fp16, d2411_axisB_cached) {
|
TEST(scatter_update_gpu_fp16, d2411_axisB_cached) {
|
||||||
test_d2411_axisB<FLOAT16>(true);
|
test_d2411_axisB<FLOAT16>(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(scatter_update_gpu_fp32, output_padding) {
|
||||||
|
// Dictionary : 2x2x1x4
|
||||||
|
// Indexes : 3x1x1x1
|
||||||
|
// Updates : 2x2x1x3
|
||||||
|
// Axis : 3
|
||||||
|
// Output : 2x2x1x4
|
||||||
|
// Input values in fp32
|
||||||
|
|
||||||
|
// Indexes:
|
||||||
|
// 2.f, 0.f, 3.f
|
||||||
|
//
|
||||||
|
// Updates:
|
||||||
|
// 20.f, 30.f, 40.f,
|
||||||
|
// 50.f, 60.f, 70.f,
|
||||||
|
//
|
||||||
|
// 80.f, 90.f, 100.f,
|
||||||
|
// 110.f, 120.f, 130.f
|
||||||
|
//
|
||||||
|
// Dictionary:
|
||||||
|
// 0.f, 1.f, 2.f, 3.f,
|
||||||
|
// 4.f, 5.f, 6.f, 7.f,
|
||||||
|
//
|
||||||
|
// 8.f, 9.f, 10.f, 11.f,
|
||||||
|
// 12.f, 13.f, 14.f, 15.f
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// 30.f, 1.f, 20.f, 40.f,
|
||||||
|
// 60.f, 5.f, 50.f, 70.f,
|
||||||
|
//
|
||||||
|
// 90.f, 9.f, 80.f, 100.f,
|
||||||
|
// 120.f, 13.f, 110.f, 130.f
|
||||||
|
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
for(const auto target_format : formats2D) {
|
||||||
|
auto input1 = engine.allocate_memory({data_types::f32, plain_2d_format, tensor{2, 2, 4, 1}}); // Dictionary
|
||||||
|
auto input2 = engine.allocate_memory({data_types::f32, plain_2d_format, tensor{3, 1, 1, 1}}); // Indexes
|
||||||
|
auto input3 = engine.allocate_memory({data_types::f32, plain_2d_format, tensor{2, 2, 3, 1}}); // Updates
|
||||||
|
auto axis = 3;
|
||||||
|
|
||||||
|
set_values(input1, {
|
||||||
|
0.f, 1.f, 2.f, 3.f,
|
||||||
|
4.f, 5.f, 6.f, 7.f,
|
||||||
|
8.f, 9.f, 10.f, 11.f,
|
||||||
|
12.f, 13.f, 14.f, 15.f
|
||||||
|
});
|
||||||
|
|
||||||
|
set_values(input2, {
|
||||||
|
2.f, 0.f, 3.f
|
||||||
|
});
|
||||||
|
|
||||||
|
set_values(input3, {
|
||||||
|
20.f, 30.f, 40.f,
|
||||||
|
50.f, 60.f, 70.f,
|
||||||
|
80.f, 90.f, 100.f,
|
||||||
|
110.f, 120.f, 130.f
|
||||||
|
});
|
||||||
|
|
||||||
|
padding output_padding = padding({1,1}, {1,1});
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("InputDictionary", input1->get_layout()));
|
||||||
|
topology.add(input_layout("InputText", input2->get_layout()));
|
||||||
|
topology.add(input_layout("InputUpdates", input3->get_layout()));
|
||||||
|
topology.add(reorder("DictionaryReordered", input_info("InputDictionary"), target_format, data_types::f32));
|
||||||
|
topology.add(reorder("TextReordered", input_info("InputText"), target_format, data_types::f32));
|
||||||
|
topology.add(reorder("UpdatesReordered", input_info("InputUpdates"), target_format, data_types::f32));
|
||||||
|
topology.add(
|
||||||
|
scatter_update("scatter_update", input_info("DictionaryReordered"), input_info("TextReordered"), input_info("UpdatesReordered"), axis, output_padding)
|
||||||
|
);
|
||||||
|
topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32));
|
||||||
|
|
||||||
|
network network(engine, topology, get_test_default_config(engine));
|
||||||
|
|
||||||
|
network.set_input_data("InputDictionary", input1);
|
||||||
|
network.set_input_data("InputText", input2);
|
||||||
|
network.set_input_data("InputUpdates", input3);
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
auto output = outputs.at("out").get_memory();
|
||||||
|
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
std::vector<float> expected_results = {
|
||||||
|
30.f, 1.f, 20.f, 40.f,
|
||||||
|
60.f, 5.f, 50.f, 70.f,
|
||||||
|
90.f, 9.f, 80.f, 100.f,
|
||||||
|
120.f, 13.f, 110.f, 130.f
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||||
|
ASSERT_EQ(expected_results[i], output_ptr[i])
|
||||||
|
<< "i=" << i << ", target_format=" << target_format;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user