[GPU] GPU serialization fix (#14164)

* kernel_impl_params should be serialized before primitive_impl

* reduced the range of min max values ​​for this test case

* updated to use default get_mem when min max are not specified
This commit is contained in:
Eddy Kim
2022-11-23 17:00:22 +09:00
committed by GitHub
parent 7aac663f4a
commit ae0b2ce3c3
3 changed files with 18 additions and 13 deletions

View File

@@ -32,7 +32,7 @@ public:
size_t num_dimensions;
buffer >> num_dimensions;
for (size_t i = 0; i < num_dimensions; i++) {
ov::Dimension::value_type min_val, max_val;
ov::Interval::value_type min_val, max_val;
buffer >> min_val >> max_val;
partial_shape.push_back(ov::Dimension(min_val, max_val));
}

View File

@@ -1059,14 +1059,17 @@ static primitive_id find_dep_by_mem(const cldnn::primitive_inst* p_inst, memory&
}
// Cache blob format:
// [ primitive_impl ]
// [ kernel_impl_params ]
// [ primitive_impl ]
// [ member variables of primitive_inst ]
// [ output memory information ]
// [ memory dependency information ]
// [ execution dependency information ]
// [ intermediate memory information ]
void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
_impl_params->save(ob);
ob.setKernlImplParams(_impl_params.get());
if (_impl != nullptr) {
ob << true;
kernel_arguments_data args = _impl->get_arguments(*this);
@@ -1078,9 +1081,6 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
ob << false;
}
_impl_params->save(ob);
ob.setKernlImplParams(_impl_params.get());
ob << _node_output_layout;
ob << has_mutable_input();
ob << mem_allocated();
@@ -1169,6 +1169,11 @@ int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const {
}
void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
_impl_params.release();
_impl_params = make_unique<kernel_impl_params>();
_impl_params->load(ib);
ib.setKernlImplParams(_impl_params.get());
bool has_impl;
ib >> has_impl;
if (has_impl) {
@@ -1176,11 +1181,6 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
ib >> _impl;
}
_impl_params.release();
_impl_params = make_unique<kernel_impl_params>();
_impl_params->load(ib);
ib.setKernlImplParams(_impl_params.get());
ib >> _node_output_layout;
ib >> _has_mutable_input;
ib >> _mem_allocated;

View File

@@ -80,8 +80,13 @@ struct conv_eltw_test_params {
class ConvFusingTest : public BaseFusingTest<convolution_test_params> {
public:
void execute(convolution_test_params& p) {
auto input_prim = get_mem(get_input_layout(p));
void execute(convolution_test_params& p, int min=0, int max=0) {
cldnn::memory::ptr input_prim;
if (min == max) {
input_prim = get_mem(get_input_layout(p));
} else {
input_prim = get_mem(get_input_layout(p), min, max);
}
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
network network_fused(this->engine, this->topology_fused, bo_fused);
network_fused.set_input_data("input", input_prim);
@@ -1744,7 +1749,7 @@ TEST_P(conv_int8_scale_shift_swish, basic) {
// high tolerance because many eltwise operations
tolerance = default_tolerance(p.default_type) * 10;
execute(p);
execute(p, -20, 20);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_scale_shift_swish, ::testing::ValuesIn(std::vector<convolution_test_params>{