diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp index 75ecf821b5e..01acdac9168 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp @@ -142,8 +142,6 @@ struct kernel_impl_params { virtual primitive_type_id type() const { return desc->type; } - void save(BinaryOutputBuffer& ob) const; - void load(BinaryInputBuffer& ib); const program& get_program() const { OPENVINO_ASSERT(prog != nullptr, "[GPU] Program pointer in kernel_impl_params is not initialized"); return *prog; diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 79891ec13f5..595fc3ff533 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -84,13 +84,8 @@ public: network(program::ptr program, stream::ptr stream, uint16_t stream_id); - network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id); - network(cldnn::BinaryInputBuffer& ifs, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id); - ~network(); - void save(cldnn::BinaryOutputBuffer& ob); - static ptr build_network(engine& engine, const topology& topology, const ExecutionConfig& config = {}, diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index 671add01303..d4b30edbfd9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -98,6 +98,9 @@ public: _processing_order.erase(i); } + void save(cldnn::BinaryOutputBuffer& ob) const; + void load(cldnn::BinaryInputBuffer& ib, program& p); + private: list_of_nodes _processing_order; std::map processing_order_iterators; @@ -282,6 +285,9 @@ public: static std::shared_ptr make_task_executor(const ExecutionConfig& config); static std::shared_ptr make_compilation_context(const ExecutionConfig& config); + void save(cldnn::BinaryOutputBuffer& ob) const; + void load(cldnn::BinaryInputBuffer& ib); + private: uint32_t prog_id = 0; engine& _engine; diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp index 05972d7a972..0608511014a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp @@ -18,7 +18,7 @@ struct memory; class BinaryOutputBuffer : public OutputBuffer { public: BinaryOutputBuffer(std::ostream& stream) - : OutputBuffer(this), stream(stream), _impl_params(nullptr) {} + : OutputBuffer(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {} void write(void const * data, std::streamsize size) { auto const written_size = stream.rdbuf()->sputn(reinterpret_cast(data), size); @@ -28,16 +28,19 @@ public: void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } + void set_stream(void* strm) { _strm = strm; } + void* get_stream() const { return _strm; } private: std::ostream& stream; void* _impl_params; + void* _strm; }; class BinaryInputBuffer : public InputBuffer { public: BinaryInputBuffer(std::istream& stream, engine& engine) - : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr) {} + : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr) {} void read(void* const data, std::streamsize size) { auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast(data), size); @@ -47,17 +50,6 @@ public: void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } - void addConstData(const uint32_t net_id, const std::string& prim_id, const std::shared_ptr mem_ptr) { - while (_const_data_map.size() <= net_id) { - _const_data_map.emplace_back(std::unordered_map>()); - } - OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) == _const_data_map[net_id].end(), "[GPU] duplicated primitive id " + prim_id); - _const_data_map[net_id][prim_id] = mem_ptr; - } - std::shared_ptr getConstData(const uint32_t net_id, const std::string& prim_id) { - OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) != _const_data_map[net_id].end(), "[GPU] Not found primitive id " + prim_id); - return _const_data_map[net_id][prim_id]; - } std::streampos tellg() { return _stream.tellg(); } void seekg(std::streampos pos) { _stream.seekg(pos); } @@ -65,7 +57,6 @@ public: private: std::istream& _stream; void* _impl_params; - std::vector>> _const_data_map; }; template diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/layout_serializer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/layout_serializer.hpp index 77e0754d300..96479b7826c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/layout_serializer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/layout_serializer.hpp @@ -44,9 +44,7 @@ public: static void save(BufferType& buffer, const cldnn::layout& _layout) { buffer << make_data(&_layout.data_type, sizeof(cldnn::data_types)); buffer << make_data(&_layout.format, sizeof(cldnn::format)); - buffer << _layout.data_padding.filling_value(); - buffer << _layout.data_padding.lower_size().sizes(); - buffer << _layout.data_padding.upper_size().sizes(); + buffer << _layout.data_padding; buffer << _layout.get_partial_shape(); } }; @@ -57,16 +55,7 @@ public: static void load(BufferType& buffer, cldnn::layout& _layout) { buffer >> make_data(&_layout.data_type, sizeof(cldnn::data_types)); buffer >> make_data(&_layout.format, sizeof(cldnn::format)); - - { - float _filling_value; - buffer >> _filling_value; - std::vector _lower_size; - buffer >> _lower_size; - std::vector _upper_size; - buffer >> _upper_size; - _layout.data_padding = cldnn::padding(_lower_size, _upper_size, _filling_value); - } + buffer >> _layout.data_padding; ov::PartialShape partial_shape; buffer >> partial_shape; diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/vector_serializer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/vector_serializer.hpp index e4abe0b47b4..cb3560ff589 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/vector_serializer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/vector_serializer.hpp @@ -37,6 +37,32 @@ public: } }; +template +class Serializer, typename std::enable_if, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const std::vector& vector) { + buffer << vector.size(); + for (const bool el : vector) { + buffer << el; + } + } +}; + +template +class Serializer, typename std::enable_if, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, std::vector& vector) { + typename std::vector::size_type vector_size = 0UL; + buffer >> vector_size; + bool el; + vector.clear(); + for (size_t i = 0; i < vector_size; ++i) { + buffer >> el; + vector.push_back(el); + } + } +}; + template class Serializer, typename std::enable_if, BufferType>::value && !std::is_arithmetic::value>::type> { diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp index 2c9eec02acc..3f89ced4147 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp @@ -28,7 +28,7 @@ public: const std::shared_ptr& plugin, RemoteContextImpl::Ptr context, const ExecutionConfig& config); - CompiledModel(cldnn::BinaryInputBuffer ib, + CompiledModel(cldnn::BinaryInputBuffer& ib, const std::shared_ptr& plugin, RemoteContextImpl::Ptr context, const ExecutionConfig& config); @@ -58,7 +58,6 @@ private: RemoteContextImpl::Ptr m_context; ExecutionConfig m_config; std::shared_ptr m_wait_executor; - std::shared_ptr m_model; std::string m_model_name; std::vector> m_inputs; std::vector> m_outputs; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp index 418f8df90db..2354a58ef01 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp @@ -121,8 +121,6 @@ struct arg_max_min : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << input; - ob << num_outputs; ob << make_data(&mode, sizeof(ov::op::TopKMode)); ob << top_k; ob << axis; @@ -133,8 +131,6 @@ struct arg_max_min : public primitive_base { void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> input; - ib >> num_outputs; ib >> make_data(&mode, sizeof(ov::op::TopKMode)); ib >> top_k; ib >> axis; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp index 4301f7cc10d..3886bf31856 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp @@ -17,6 +17,8 @@ namespace cldnn { struct condition : public primitive_base { CLDNN_DECLARE_PRIMITIVE(condition) + condition() : primitive_base("", {}) {} + /// @brief branch has compiled program, input_map and output_map /// struct branch { @@ -39,6 +41,43 @@ struct condition : public primitive_base { ss << "]}"; return ss.str(); } + + void save(BinaryOutputBuffer& ob) const { + ob << input_map.size(); + for (auto& input_pair : input_map) { + ob << input_pair.first; + ob << input_pair.second; + } + ob << output_map.size(); + for (auto& output_pair : output_map) { + ob << output_pair.first; + ob << output_pair.second; + } + inner_program->save(ob); + } + + void load(BinaryInputBuffer& ib) { + size_t map_size; + ib >> map_size; + input_map.clear(); + for (size_t i = 0; i < map_size; ++i) { + primitive_id input_first, input_second; + ib >> input_first; + ib >> input_second; + input_map.insert({input_first, input_second}); + } + ib >> map_size; + output_map.clear(); + for (size_t i = 0; i < map_size; ++i) { + size_t output_index; + primitive_id output_second; + ib >> output_index; + ib >> output_second; + output_map.insert({output_index, output_second}); + } + inner_program = std::make_shared(ib.get_engine()); + inner_program->load(ib); + } }; /// @brief Constructs condition primitive / layer. @@ -62,6 +101,18 @@ struct condition : public primitive_base { branch branch_true; branch branch_false; + void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); + ob << branch_true; + ob << branch_false; + } + + void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); + ib >> branch_true; + ib >> branch_false; + } + protected: std::vector> get_dependencies() const override { return {}; } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp index 177fbf99b70..764d3dc319f 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp @@ -5,6 +5,7 @@ #pragma once #include "primitive.hpp" #include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/engine.hpp" namespace cldnn { @@ -33,5 +34,53 @@ struct data : public primitive_base { seed = hash_combine(seed, id); return seed; } + + void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); + + ob << mem->get_layout(); + + const auto _allocation_type = mem->get_allocation_type(); + ob << make_data(&_allocation_type, sizeof(_allocation_type)); + + size_t data_size = mem->size(); + ob << make_data(&data_size, sizeof(size_t)); + + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + ob << make_data(mem->buffer_ptr(), data_size); + } else { + std::vector _buf; + _buf.resize(data_size); + stream* strm = reinterpret_cast(ob.get_stream()); + mem->copy_to(*strm, _buf.data()); + ob << make_data(_buf.data(), data_size); + } + } + + void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); + + layout output_layout = layout(); + ib >> output_layout; + + allocation_type _allocation_type = allocation_type::unknown; + ib >> make_data(&_allocation_type, sizeof(_allocation_type)); + + size_t data_size = 0; + ib >> make_data(&data_size, sizeof(size_t)); + + mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false); + + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + ib >> make_data(mem->buffer_ptr(), data_size); + } else { + std::vector _buf; + _buf.resize(data_size); + ib >> make_data(_buf.data(), data_size); + // stream* strm = reinterpret_cast(ib.get_stream()); + auto& strm = ib.get_engine().get_service_stream(); + mem->copy_from(strm, _buf.data()); + } + } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp index e41577bbe80..0ea990b61b5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp @@ -246,6 +246,7 @@ struct loop : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); + body_program->save(ob); ob << trip_count_id; ob << first_execution_condition_id; ob << num_iteration_id; @@ -259,6 +260,8 @@ struct loop : public primitive_base { void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); + body_program = std::make_shared(ib.get_engine()); + body_program->load(ib); ib >> trip_count_id; ib >> first_execution_condition_id; ib >> num_iteration_id; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp index 71922acd217..ae979d17fb3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp @@ -282,6 +282,7 @@ struct lstm_gemm : public primitive_base { } void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); ob << weights; ob << recurrent; ob << bias; @@ -290,6 +291,7 @@ struct lstm_gemm : public primitive_base { } void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); ib >> weights; ib >> recurrent; ib >> bias; @@ -401,6 +403,7 @@ struct lstm_elt : public primitive_base { } void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); ob << cell; ob << clip; ob << input_forget; @@ -411,6 +414,7 @@ struct lstm_elt : public primitive_base { } void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); ib >> cell; ib >> clip; ib >> input_forget; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/mutable_data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/mutable_data.hpp index 105711b58dd..3d0443ec005 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/mutable_data.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/mutable_data.hpp @@ -5,6 +5,7 @@ #pragma once #include "primitive.hpp" #include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/engine.hpp" #include namespace cldnn { @@ -59,11 +60,51 @@ struct mutable_data : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); ob << make_data(&fill_type, sizeof(filler_type)); + + ob << mem->get_layout(); + + const auto _allocation_type = mem->get_allocation_type(); + ob << make_data(&_allocation_type, sizeof(_allocation_type)); + + size_t data_size = mem->size(); + ob << make_data(&data_size, sizeof(size_t)); + + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + ob << make_data(mem->buffer_ptr(), data_size); + } else { + std::vector _buf; + _buf.resize(data_size); + stream* strm = reinterpret_cast(ob.get_stream()); + mem->copy_to(*strm, _buf.data()); + ob << make_data(_buf.data(), data_size); + } } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); ib >> make_data(&fill_type, sizeof(filler_type)); + + layout output_layout = layout(); + ib >> output_layout; + + allocation_type _allocation_type = allocation_type::unknown; + ib >> make_data(&_allocation_type, sizeof(_allocation_type)); + + size_t data_size = 0; + ib >> make_data(&data_size, sizeof(size_t)); + + mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false); + + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + ib >> make_data(mem->buffer_ptr(), data_size); + } else { + std::vector _buf; + _buf.resize(data_size); + ib >> make_data(_buf.data(), data_size); + // stream* strm = reinterpret_cast(ib.get_stream()); + auto& strm = ib.get_engine().get_service_stream(); + mem->copy_from(strm, _buf.data()); + } } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp index bb97725eeac..360258c13d9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp @@ -98,12 +98,17 @@ struct prim_map_storage { return map.at(type_string); } + const cldnn::primitive_id get_type_string(const cldnn::primitive_type_id type_id) const { + return inverse_map.at(type_id); + } + bool set_type_id(const std::string& type_string, const cldnn::primitive_type_id type_id) { - return map.insert({type_string, type_id}).second; + return map.insert({type_string, type_id}).second && inverse_map.insert({type_id, type_string}).second; } private: std::unordered_map map; + std::unordered_map inverse_map; }; /// @brief Base class of network primitive description. @@ -249,6 +254,7 @@ public: ib >> output_paddings; size_t output_data_types_size; ib >> output_data_types_size; + output_data_types.clear(); for (size_t i = 0; i < output_data_types_size; i++) { bool has_value; ib >> has_value; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp index b9c86b7391a..a5185330ad4 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp @@ -20,6 +20,8 @@ enum class reorder_mean_mode { }; struct WeightsReorderParams { + WeightsReorderParams() {} + WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed = false, bool grouped = false) : _in_layout(in_layout), _out_layout(out_layout), @@ -51,6 +53,19 @@ struct WeightsReorderParams { void set_input_layout(const layout& layout) { _in_layout = layout; } void set_output_layout(const layout& layout) { _out_layout = layout; } + void save(cldnn::BinaryOutputBuffer& ob) const { + ob << _in_layout; + ob << _out_layout; + ob << _transposed; + ob << _grouped; + } + void load(cldnn::BinaryInputBuffer& ib) { + ib >> _in_layout; + ib >> _out_layout; + ib >> _transposed; + ib >> _grouped; + } + protected: layout _in_layout; layout _out_layout; @@ -257,6 +272,12 @@ struct reorder : public primitive_base { ob << subtract_per_feature; ob << make_data(&mean_mode, sizeof(reorder_mean_mode)); ob << make_data(&input_mem_type, sizeof(memory_type)); + if (weights_reorder_params == nullptr) { + ob << false; + } else { + ob << true; + weights_reorder_params->save(ob); + } ob << truncate; } @@ -267,6 +288,12 @@ struct reorder : public primitive_base { ib >> subtract_per_feature; ib >> make_data(&mean_mode, sizeof(reorder_mean_mode)); ib >> make_data(&input_mem_type, sizeof(memory_type)); + bool has_weights_reorder_params; + ib >> has_weights_reorder_params; + if (has_weights_reorder_params) { + weights_reorder_params = std::make_shared(); + weights_reorder_params->load(ib); + } ib >> truncate; } diff --git a/src/plugins/intel_gpu/src/graph/assign.cpp b/src/plugins/intel_gpu/src/graph/assign.cpp index 692b6e80339..6bf55efc1fa 100644 --- a/src/plugins/intel_gpu/src/graph/assign.cpp +++ b/src/plugins/intel_gpu/src/graph/assign.cpp @@ -30,20 +30,6 @@ std::string assign_inst::to_string(const assign_node& node) { return primitive_description.str(); } -void assign_inst::save(cldnn::BinaryOutputBuffer& ob) const { - parent::save(ob); - - ob << variable_id(); -} - -void assign_inst::load(cldnn::BinaryInputBuffer& ib) { - parent::load(ib); - - std::string variable_id; - ib >> variable_id; - set_variable_id(variable_id); -} - void assign_inst::on_execute() { _outputs[0] = input_memory_ptr(0); } diff --git a/src/plugins/intel_gpu/src/graph/convolution.cpp b/src/plugins/intel_gpu/src/graph/convolution.cpp index 1590e68fefe..2886284da5b 100644 --- a/src/plugins/intel_gpu/src/graph/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/convolution.cpp @@ -266,16 +266,4 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const& input_layout.feature(), "Weights/ifm mismatch"); } - -void convolution_inst::save(cldnn::BinaryOutputBuffer& ob) const { - parent::save(ob); - - ob << _deform_conv_dep_offset; -} - -void convolution_inst::load(cldnn::BinaryInputBuffer& ib) { - parent::load(ib); - - ib >> _deform_conv_dep_offset; -} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/data.cpp b/src/plugins/intel_gpu/src/graph/data.cpp index 00cd00bf90a..2cd7d87038f 100644 --- a/src/plugins/intel_gpu/src/graph/data.cpp +++ b/src/plugins/intel_gpu/src/graph/data.cpp @@ -51,60 +51,4 @@ std::string data_inst::to_string(data_node const& node) { data_inst::typed_primitive_inst(network& network, data_node const& node) : parent(network, node, attach_or_copy_data(network, node.get_attached_memory_ptr())) {} -// Cache blob format: -// [ kernel_impl_params ] -// [ output memory information ] -// [ data stored in memory ] -void data_inst::save(cldnn::BinaryOutputBuffer& ob) const { - parent::save(ob); - ob << _outputs[0]->get_layout(); - - const auto _allocation_type = _outputs[0]->get_allocation_type(); - ob << make_data(&_allocation_type, sizeof(_allocation_type)); - - size_t data_size = _outputs[0]->size(); - ob << make_data(&data_size, sizeof(size_t)); - - if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { - ob << make_data(_outputs[0]->buffer_ptr(), data_size); - } else { - std::vector _buf; - _buf.resize(data_size); - _outputs[0]->copy_to(get_network().get_stream(), _buf.data()); - ob << make_data(_buf.data(), data_size); - } -} - -void data_inst::load(BinaryInputBuffer& ib) { - parent::load(ib); - layout output_layout = layout(); - ib >> output_layout; - - allocation_type _allocation_type = allocation_type::unknown; - ib >> make_data(&_allocation_type, sizeof(_allocation_type)); - - size_t data_size = 0; - ib >> make_data(&data_size, sizeof(size_t)); - - if (!get_network().is_primary_stream()) { - _outputs[0] = ib.getConstData(_network.get_local_id(), id()); - auto pos = ib.tellg(); - pos += data_size; - ib.seekg(pos); - } else { - _outputs[0] = get_network().get_engine().allocate_memory(output_layout, _allocation_type, false); - - if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { - ib >> make_data(_outputs[0]->buffer_ptr(), data_size); - } else { - std::vector _buf; - _buf.resize(data_size); - ib >> make_data(_buf.data(), data_size); - _outputs[0]->copy_from(get_network().get_stream(), _buf.data()); - } - - ib.addConstData(_network.get_local_id(), id(), _outputs[0]); - } -} - } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/detection_output.cpp b/src/plugins/intel_gpu/src/graph/detection_output.cpp index 3dbcef84438..2cbf731ae9d 100644 --- a/src/plugins/intel_gpu/src/graph/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/detection_output.cpp @@ -234,88 +234,4 @@ detection_output_inst::typed_primitive_inst(network& network, detection_output_n node.get_dependency(2).is_padded(), "Detection output layer doesn't support input padding in Prior-Box input"); } - -void detection_output_inst::save(cldnn::BinaryOutputBuffer& ob) const { - parent::save(ob); - - // argument (struct detection_output) - ob << argument->id; - ob << argument->input; - ob << make_data(&argument->output_paddings[0], sizeof(argument->output_paddings[0])); - ob << argument->num_classes; - ob << argument->keep_top_k; - ob << argument->share_location; - ob << argument->background_label_id; - ob << argument->nms_threshold; - ob << argument->top_k; - ob << argument->eta; - ob << make_data(&argument->code_type, sizeof(argument->code_type)); - ob << argument->variance_encoded_in_target; - ob << argument->confidence_threshold; - ob << argument->prior_info_size; - ob << argument->prior_coordinates_offset; - ob << argument->prior_is_normalized; - ob << argument->input_width; - ob << argument->input_height; - ob << argument->decrease_label_id; - ob << argument->clip_before_nms; - ob << argument->clip_after_nms; - ob << argument->objectness_score; -} - -void detection_output_inst::load(cldnn::BinaryInputBuffer& ib) { - parent::load(ib); - - primitive_id id; - std::vector input; - uint32_t num_classes; - uint32_t keep_top_k; - bool share_location; - int background_label_id; - float nms_threshold; - int top_k; - float eta; - prior_box_code_type code_type = prior_box_code_type::corner; - bool variance_encoded_in_target; - float confidence_threshold; - int32_t prior_info_size; - int32_t prior_coordinates_offset; - bool prior_is_normalized; - int32_t input_width; - int32_t input_height; - bool decrease_label_id; - bool clip_before_nms; - bool clip_after_nms; - float objectness_score; - padding output_padding; - - ib >> id; - ib >> input; - ib >> make_data(&output_padding, sizeof(output_padding)); - ib >> num_classes; - ib >> keep_top_k; - ib >> share_location; - ib >> background_label_id; - ib >> nms_threshold; - ib >> top_k; - ib >> eta; - ib >> make_data(&code_type, sizeof(code_type)); - ib >> variance_encoded_in_target; - ib >> confidence_threshold; - ib >> prior_info_size; - ib >> prior_coordinates_offset; - ib >> prior_is_normalized; - ib >> input_width; - ib >> input_height; - ib >> decrease_label_id; - ib >> clip_before_nms; - ib >> clip_after_nms; - ib >> objectness_score; - - argument = std::make_shared( - id, input, num_classes, keep_top_k, share_location, background_label_id, nms_threshold, top_k, - eta, code_type, variance_encoded_in_target, confidence_threshold, prior_info_size, - prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id, - clip_before_nms, clip_after_nms, objectness_score, output_padding); -} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp index ffc607c4370..14a904c6297 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp @@ -13,12 +13,17 @@ namespace cldnn { namespace common { struct condition_impl : typed_primitive_impl { + using parent = typed_primitive_impl; + using parent::parent; + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::common::condition_impl) std::unique_ptr clone() const override { return make_unique(*this); } + condition_impl() : parent() {} + explicit condition_impl(const condition_node& outer) { set_node_params(outer); } @@ -131,6 +136,16 @@ struct condition_impl : typed_primitive_impl { void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << _node_id; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> _node_id; + } + private: primitive_id _node_id; }; @@ -150,5 +165,5 @@ attach_condition_common::attach_condition_common() { } // namespace common } // namespace cldnn -// TODO: Change code like cldnn::loop -ASSIGN_TYPE_NAME(cldnn::common::condition_impl) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::common::condition_impl) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::condition) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp index 7f1e7abcb9b..69224ffddf9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp @@ -79,11 +79,13 @@ struct activation_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << make_data(&activation_function, sizeof(activation_func)); ob << make_data(&additional_params, sizeof(activation_additional_params)); } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> make_data(&activation_function, sizeof(activation_func)); ib >> make_data(&additional_params, sizeof(activation_additional_params)); } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp index 9d1ef36a003..7d11374f178 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp @@ -35,10 +35,12 @@ struct assign_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << variable_id; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> variable_id; } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp index fe8c30f3cd5..b3fda1193d7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp @@ -45,12 +45,14 @@ struct broadcast_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << make_data(&broadcast_mode, sizeof(ov::op::BroadcastModeSpec)); ob << make_data(&target_shape, sizeof(ov::Shape)); ob << axes_mapping; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> make_data(&broadcast_mode, sizeof(ov::op::BroadcastModeSpec)); ib >> make_data(&target_shape, sizeof(ov::Shape)); ib >> axes_mapping; diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp index 0d2e68c7b94..6bdde7bff2f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp @@ -40,10 +40,12 @@ struct concatenation_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << axis; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> axis; } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index f15d143e285..083d1774614 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -62,10 +62,12 @@ public: } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << make_data(&nms_type, sizeof(NMSType)); } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> make_data(&nms_type, sizeof(NMSType)); } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp index ef336d81e6b..b261404b75e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/eltwise.cpp @@ -64,11 +64,13 @@ struct eltwise_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << make_data(&mode, sizeof(eltwise_mode)); ob << coefficients; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> make_data(&mode, sizeof(eltwise_mode)); ib >> coefficients; } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp index 8daef9d220b..a7437af3022 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp @@ -42,11 +42,13 @@ struct gather_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << axis; ob << batch_dims; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> axis; ib >> batch_dims; } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp index c11522c0c8d..20a8a4afa0e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp @@ -35,10 +35,12 @@ struct read_value_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << variable_id; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> variable_id; } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp index e30c172402d..0719c03a2be 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp @@ -40,10 +40,12 @@ struct scatter_update_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << axis; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> axis; } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/strided_slice.cpp index 50babdecc99..1103cb10dc7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/strided_slice.cpp @@ -56,6 +56,7 @@ struct strided_slice_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << begin_data; ob << end_data; ob << strides_data; @@ -68,6 +69,7 @@ struct strided_slice_impl : public typed_primitive_impl { } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> begin_data; ib >> end_data; ib >> strides_data; diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/tile.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/tile.cpp index bfc982aaa63..17adf746a35 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/tile.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/tile.cpp @@ -39,10 +39,12 @@ struct tile_impl : public typed_primitive_impl { } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << repeats; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); ib >> repeats; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp index cd56a28b5c7..93d1768cf14 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/activation.cpp @@ -31,6 +31,15 @@ struct activation_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args = parent::get_arguments(instance); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp index 1816aa8f9ec..ca832edbefc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp @@ -45,6 +45,15 @@ struct arg_max_min_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args = parent::get_arguments(instance); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp index c3d84f6269f..94c6aa9eabf 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp @@ -110,7 +110,30 @@ struct border_impl : typed_primitive_impl_ocl { (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); } + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + const auto& prim_params = static_cast(*_kernel_data.params); + if (prim_params.inputs[0].LogicalSize() == 0) { + ob << true; + } else { + ob << false; + } + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> zero_input; + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + protected: + // WA for static impl deserialization + bool zero_input = false; + kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args = parent::get_arguments(instance); @@ -127,7 +150,8 @@ protected: const auto& prim_params = static_cast(*_kernel_data.params); std::vector layouts; - if (prim_params.inputs[0].LogicalSize() == 0) { + if ((_kernel_data.params == nullptr && zero_input) || + (_kernel_data.params != nullptr && prim_params.inputs[0].LogicalSize() == 0)) { layout any_layout = {data_types::u8, format::bfyx, {1, 1, 1, 1}}; layouts.push_back(any_layout); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp index 03dc67176fd..2ec9330db33 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp @@ -23,6 +23,15 @@ struct broadcast_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_shape_agnostic); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp index 539c6b83040..7031db449a6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp @@ -53,6 +53,15 @@ struct concatenation_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index 5ca29b7a4f9..51adecf9464 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -3,7 +3,7 @@ // #include "primitive_base.hpp" - +#include "kernel_base.h" #include "convolution_inst.h" #include "convolution/convolution_kernel_selector.h" #include "convolution/convolution_params.h" @@ -25,6 +25,15 @@ struct convolution_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args = parent::get_arguments(instance); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp index 7e71b214e61..8bb10e1da6e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/crop.cpp @@ -23,6 +23,15 @@ struct crop_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { auto params = get_default_params(impl_param, is_shape_agnostic); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp index b17b52cba18..1afaead914a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp @@ -56,6 +56,15 @@ struct cum_sum_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp index 5b0cc7e5b6f..6a3f0a00a29 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/custom_primitive.cpp @@ -28,7 +28,6 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { std::shared_ptr cl_kernel; std::vector _kernels; - std::string _cached_kernel_id; std::unique_ptr clone() const override { return make_unique(*this); @@ -39,8 +38,7 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other) : cl_kernel(other.cl_kernel) - , _kernels({}) - , _cached_kernel_id(other._cached_kernel_id) { + , _kernels({}) { for (const auto& kernel : other._kernels) { _kernels.emplace_back(kernel->clone()); } @@ -49,8 +47,7 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { custom_gpu_primitive_impl(const custom_gpu_primitive_node& arg, std::shared_ptr& cl_kernel) : cl_kernel(cl_kernel) - , _kernels() - , _cached_kernel_id() { } + , _kernels() { } std::vector> get_kernels_source() override { std::vector> kernel_strings; @@ -64,12 +61,12 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { _kernels.insert(_kernels.begin(), compiled_kernels.begin(), compiled_kernels.end()); } - void init_by_cached_kernels(const kernels_cache& kernels_cache) override { - _kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_id)); + void init_by_cached_kernels(const kernels_cache& kernels_cache, std::vector& cached_kernel_ids) override { + _kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(cached_kernel_ids[0])); } - void set_cached_kernel_ids(const kernels_cache& kernels_cache) override { - _cached_kernel_id = kernels_cache.get_cached_kernel_id(_kernels[0]); + std::vector get_cached_kernel_ids(const kernels_cache& kernels_cache) override { + return {kernels_cache.get_cached_kernel_id(_kernels[0])}; } void set_arguments_impl(custom_gpu_primitive_inst& instance) override { @@ -93,19 +90,23 @@ struct custom_gpu_primitive_impl : typed_primitive_impl { return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.is_output()); } + std::vector get_kernels() override { + return _kernels; + } + std::vector get_kernels() const override { return _kernels; } void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); ob << *cl_kernel; - ob << _cached_kernel_id; } void load(BinaryInputBuffer& ib) override { + parent::load(ib); cl_kernel = std::make_shared(); ib >> *cl_kernel; - ib >> _cached_kernel_id; } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp index 08ebd9351ec..b254a3bac97 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp @@ -23,6 +23,15 @@ struct eltwise_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args = parent::get_arguments(instance); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index e3ac31cccc7..fde99f2c07b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -3,7 +3,7 @@ // #include "primitive_base.hpp" - +#include "kernel_base.h" #include "fully_connected_inst.h" #include "fully_connected/fully_connected_kernel_selector.h" #include "fully_connected/fully_connected_params.h" @@ -48,6 +48,15 @@ struct fully_connected_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args = parent::get_arguments(instance); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp index e1631161420..0be99f35f48 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp @@ -67,6 +67,15 @@ struct gather_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp index 94a528bb49f..ccefe90da1d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp @@ -54,6 +54,15 @@ struct gather_elements_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_shape_agnostic); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp index c3461c3c226..a226c099e27 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_nd.cpp @@ -23,6 +23,15 @@ struct gather_nd_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp index 1b61437cb12..9989316c7f3 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp @@ -23,6 +23,15 @@ struct gemm_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp index 48e2b68e911..7cd0f250f74 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/mvn.cpp @@ -23,6 +23,15 @@ struct mvn_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_shape_agnostic); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp index 7adc4bae0f5..7198d0a2302 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp @@ -25,6 +25,15 @@ struct count_nonzero_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { auto params = get_default_params(impl_param, is_shape_agnostic); auto optional_params = get_default_optional_params(impl_param.get_program()); @@ -49,6 +58,15 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { auto params = get_default_params(impl_param, is_shape_agnostic); auto optional_params = get_default_optional_params(impl_param.get_program()); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp index c18f7f33fa2..71cb534200a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/permute.cpp @@ -50,6 +50,15 @@ struct permute_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_shape_agnostic); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp index 429aa535121..98d65e63ab9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp @@ -33,18 +33,16 @@ For example, all gpu convolution implementations should derive from typed_primit template struct typed_primitive_impl_ocl : public typed_primitive_impl { kernel_selector::kernel_data _kernel_data; - std::vector _cached_kernel_ids; std::vector _kernels; // a pair of batch program hash and kernel entry hash of each ocl impl. std::pair kernel_dump_info; - typed_primitive_impl_ocl() : _kernel_data({}), _cached_kernel_ids({}), _kernels({}) {} + typed_primitive_impl_ocl() : _kernel_data({}), _kernels({}) {} typed_primitive_impl_ocl(const typed_primitive_impl_ocl& other) : typed_primitive_impl(other._weights_reorder_params, other._kernel_name, other._is_dynamic) , _kernel_data(other._kernel_data) - , _cached_kernel_ids(other._cached_kernel_ids) , _kernels({}) { _kernels.reserve(other._kernels.size()); for (size_t k = 0; k < other._kernels.size(); ++k) { @@ -65,17 +63,19 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl { // [ kernel_selector::kernel_data ] // [ kernel_ids ] void save(BinaryOutputBuffer& ob) const override { + primitive_impl::save(ob); ob << make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); ob << _kernel_data.internalBufferSizes; ob << _kernel_data.kernels; - ob << _cached_kernel_ids; + ob << _kernel_data.kernelName; } void load(BinaryInputBuffer& ib) override { + primitive_impl::load(ib); ib >> make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype)); ib >> _kernel_data.internalBufferSizes; ib >> _kernel_data.kernels; - ib >> _cached_kernel_ids; + ib >> _kernel_data.kernelName; } template @@ -144,22 +144,22 @@ protected: kernel_dump_info = std::make_pair(std::to_string(kernels_cache.get_kernel_batch_hash(params)), _kernel_data.kernels[0].code.kernelString->entry_point); } - } + } - void init_by_cached_kernels(const kernels_cache& kernels_cache) override { + void init_by_cached_kernels(const kernels_cache& kernels_cache, std::vector& cached_kernel_ids) override { if (is_cpu()) { return; } _kernels.clear(); - _kernels.reserve(_cached_kernel_ids.size()); - for (size_t k = 0; k < _cached_kernel_ids.size(); ++k) { - _kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_ids[k])); + _kernels.reserve(cached_kernel_ids.size()); + for (size_t k = 0; k < cached_kernel_ids.size(); ++k) { + _kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(cached_kernel_ids[k])); } } - void set_cached_kernel_ids(const kernels_cache& kernels_cache) override { - _cached_kernel_ids = kernels_cache.get_cached_kernel_ids(_kernels); + std::vector get_cached_kernel_ids(const kernels_cache& kernels_cache) override { + return {kernels_cache.get_cached_kernel_ids(_kernels)}; } std::vector get_kernels() const override { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp index 2164c922a2a..7fb7aa08d5d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp @@ -23,6 +23,15 @@ struct quantize_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp index e1ef0c4d22f..58cedb83db9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp @@ -23,6 +23,15 @@ struct range_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { auto params = get_default_params(impl_param, is_shape_agnostic); for (int i : {1, 2}) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp index 715d88b26d4..58125a50dd4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp @@ -72,6 +72,13 @@ struct reduce_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_shape_agnostic); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp index 976d6e38a58..77cdf621e9f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp @@ -24,6 +24,15 @@ struct reorder_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + protected: kernel_arguments_data get_arguments(const reorder_inst& instance) const override { kernel_arguments_data args = parent::get_arguments(instance); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp index 71f44e68515..0d193ecb88b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp @@ -23,6 +23,15 @@ struct rms_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_shape_agnostic); @@ -63,3 +72,6 @@ attach_rms_impl::attach_rms_impl() { } // namespace detail } // namespace ocl } // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::rms_impl) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::rms) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp index 729237f6fa4..4576e763f20 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_nd_update.cpp @@ -23,6 +23,15 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_shape_agnostic); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp index aa5d3dbe337..09941c2a673 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/scatter_update.cpp @@ -48,6 +48,15 @@ struct scatter_update_impl : typed_primitive_impl_ocl { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp index f4996cba0ea..6c4eba43b56 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/select.cpp @@ -23,6 +23,15 @@ struct select_impl : typed_primitive_impl_ocl