[GPU] GPU serialization fix (#14164)
* kernel_impl_params should be serialized before primitive_impl * reduced the range of min max values for this test case * updated to use default get_mem when min max are not specified
This commit is contained in:
@@ -32,7 +32,7 @@ public:
|
||||
size_t num_dimensions;
|
||||
buffer >> num_dimensions;
|
||||
for (size_t i = 0; i < num_dimensions; i++) {
|
||||
ov::Dimension::value_type min_val, max_val;
|
||||
ov::Interval::value_type min_val, max_val;
|
||||
buffer >> min_val >> max_val;
|
||||
partial_shape.push_back(ov::Dimension(min_val, max_val));
|
||||
}
|
||||
|
||||
@@ -1059,14 +1059,17 @@ static primitive_id find_dep_by_mem(const cldnn::primitive_inst* p_inst, memory&
|
||||
}
|
||||
|
||||
// Cache blob format:
|
||||
// [ primitive_impl ]
|
||||
// [ kernel_impl_params ]
|
||||
// [ primitive_impl ]
|
||||
// [ member variables of primitive_inst ]
|
||||
// [ output memory information ]
|
||||
// [ memory dependency information ]
|
||||
// [ execution dependency information ]
|
||||
// [ intermediate memory information ]
|
||||
void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
_impl_params->save(ob);
|
||||
ob.setKernlImplParams(_impl_params.get());
|
||||
|
||||
if (_impl != nullptr) {
|
||||
ob << true;
|
||||
kernel_arguments_data args = _impl->get_arguments(*this);
|
||||
@@ -1078,9 +1081,6 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
ob << false;
|
||||
}
|
||||
|
||||
_impl_params->save(ob);
|
||||
ob.setKernlImplParams(_impl_params.get());
|
||||
|
||||
ob << _node_output_layout;
|
||||
ob << has_mutable_input();
|
||||
ob << mem_allocated();
|
||||
@@ -1169,6 +1169,11 @@ int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const {
|
||||
}
|
||||
|
||||
void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
_impl_params.release();
|
||||
_impl_params = make_unique<kernel_impl_params>();
|
||||
_impl_params->load(ib);
|
||||
ib.setKernlImplParams(_impl_params.get());
|
||||
|
||||
bool has_impl;
|
||||
ib >> has_impl;
|
||||
if (has_impl) {
|
||||
@@ -1176,11 +1181,6 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
ib >> _impl;
|
||||
}
|
||||
|
||||
_impl_params.release();
|
||||
_impl_params = make_unique<kernel_impl_params>();
|
||||
_impl_params->load(ib);
|
||||
ib.setKernlImplParams(_impl_params.get());
|
||||
|
||||
ib >> _node_output_layout;
|
||||
ib >> _has_mutable_input;
|
||||
ib >> _mem_allocated;
|
||||
|
||||
@@ -80,8 +80,13 @@ struct conv_eltw_test_params {
|
||||
|
||||
class ConvFusingTest : public BaseFusingTest<convolution_test_params> {
|
||||
public:
|
||||
void execute(convolution_test_params& p) {
|
||||
auto input_prim = get_mem(get_input_layout(p));
|
||||
void execute(convolution_test_params& p, int min=0, int max=0) {
|
||||
cldnn::memory::ptr input_prim;
|
||||
if (min == max) {
|
||||
input_prim = get_mem(get_input_layout(p));
|
||||
} else {
|
||||
input_prim = get_mem(get_input_layout(p), min, max);
|
||||
}
|
||||
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
|
||||
network network_fused(this->engine, this->topology_fused, bo_fused);
|
||||
network_fused.set_input_data("input", input_prim);
|
||||
@@ -1744,7 +1749,7 @@ TEST_P(conv_int8_scale_shift_swish, basic) {
|
||||
|
||||
// high tolerance because many eltwise operations
|
||||
tolerance = default_tolerance(p.default_type) * 10;
|
||||
execute(p);
|
||||
execute(p, -20, 20);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_scale_shift_swish, ::testing::ValuesIn(std::vector<convolution_test_params>{
|
||||
|
||||
Reference in New Issue
Block a user