[GPU] cldnn::program serialization (#21290)

* cldnn::program serialization

* fixed to set a stream in a outputbuffer

* removed unnecessary comments

* replaced emplace_back with push_back

* updated bool vector serializer

* fixed several bugs

* removed m_model from compiled_model

* fixed streams_test for model caching

* updated mutable_data handling logic

* added a new API SetUpdateDispatchDataFunc

* updated to serialize weights_reorder_param

* added functional tests for dynamic model caching

* renamed to GetUpdateDispatchDataFunc

* added dynamic model caching tests
This commit is contained in:
Eddy Kim 2023-12-01 13:16:51 +09:00 committed by GitHub
parent 7ff517466c
commit 3e2c2c06af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
166 changed files with 3850 additions and 3183 deletions

View File

@ -142,8 +142,6 @@ struct kernel_impl_params {
virtual primitive_type_id type() const { return desc->type; }
void save(BinaryOutputBuffer& ob) const;
void load(BinaryInputBuffer& ib);
const program& get_program() const {
OPENVINO_ASSERT(prog != nullptr, "[GPU] Program pointer in kernel_impl_params is not initialized");
return *prog;

View File

@ -84,13 +84,8 @@ public:
network(program::ptr program, stream::ptr stream, uint16_t stream_id);
network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id);
network(cldnn::BinaryInputBuffer& ifs, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id);
~network();
void save(cldnn::BinaryOutputBuffer& ob);
static ptr build_network(engine& engine,
const topology& topology,
const ExecutionConfig& config = {},

View File

@ -98,6 +98,9 @@ public:
_processing_order.erase(i);
}
void save(cldnn::BinaryOutputBuffer& ob) const;
void load(cldnn::BinaryInputBuffer& ib, program& p);
private:
list_of_nodes _processing_order;
std::map<program_node*, node_iterator> processing_order_iterators;
@ -282,6 +285,9 @@ public:
static std::shared_ptr<ov::threading::IStreamsExecutor> make_task_executor(const ExecutionConfig& config);
static std::shared_ptr<ICompilationContext> make_compilation_context(const ExecutionConfig& config);
void save(cldnn::BinaryOutputBuffer& ob) const;
void load(cldnn::BinaryInputBuffer& ib);
private:
uint32_t prog_id = 0;
engine& _engine;

View File

@ -18,7 +18,7 @@ struct memory;
class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
public:
BinaryOutputBuffer(std::ostream& stream)
: OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr) {}
: OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {}
void write(void const * data, std::streamsize size) {
auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
@ -28,16 +28,19 @@ public:
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; }
void set_stream(void* strm) { _strm = strm; }
void* get_stream() const { return _strm; }
private:
std::ostream& stream;
void* _impl_params;
void* _strm;
};
class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
public:
BinaryInputBuffer(std::istream& stream, engine& engine)
: InputBuffer(this, engine), _stream(stream), _impl_params(nullptr) {}
: InputBuffer<BinaryInputBuffer>(this, engine), _stream(stream), _impl_params(nullptr) {}
void read(void* const data, std::streamsize size) {
auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
@ -47,17 +50,6 @@ public:
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; }
void addConstData(const uint32_t net_id, const std::string& prim_id, const std::shared_ptr<memory> mem_ptr) {
while (_const_data_map.size() <= net_id) {
_const_data_map.emplace_back(std::unordered_map<std::string, std::shared_ptr<memory>>());
}
OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) == _const_data_map[net_id].end(), "[GPU] duplicated primitive id " + prim_id);
_const_data_map[net_id][prim_id] = mem_ptr;
}
std::shared_ptr<memory> getConstData(const uint32_t net_id, const std::string& prim_id) {
OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) != _const_data_map[net_id].end(), "[GPU] Not found primitive id " + prim_id);
return _const_data_map[net_id][prim_id];
}
std::streampos tellg() { return _stream.tellg(); }
void seekg(std::streampos pos) { _stream.seekg(pos); }
@ -65,7 +57,6 @@ public:
private:
std::istream& _stream;
void* _impl_params;
std::vector<std::unordered_map<std::string, std::shared_ptr<memory>>> _const_data_map;
};
template <typename T>

View File

@ -44,9 +44,7 @@ public:
static void save(BufferType& buffer, const cldnn::layout& _layout) {
buffer << make_data(&_layout.data_type, sizeof(cldnn::data_types));
buffer << make_data(&_layout.format, sizeof(cldnn::format));
buffer << _layout.data_padding.filling_value();
buffer << _layout.data_padding.lower_size().sizes();
buffer << _layout.data_padding.upper_size().sizes();
buffer << _layout.data_padding;
buffer << _layout.get_partial_shape();
}
};
@ -57,16 +55,7 @@ public:
static void load(BufferType& buffer, cldnn::layout& _layout) {
buffer >> make_data(&_layout.data_type, sizeof(cldnn::data_types));
buffer >> make_data(&_layout.format, sizeof(cldnn::format));
{
float _filling_value;
buffer >> _filling_value;
std::vector<cldnn::tensor::value_type> _lower_size;
buffer >> _lower_size;
std::vector<cldnn::tensor::value_type> _upper_size;
buffer >> _upper_size;
_layout.data_padding = cldnn::padding(_lower_size, _upper_size, _filling_value);
}
buffer >> _layout.data_padding;
ov::PartialShape partial_shape;
buffer >> partial_shape;

View File

@ -37,6 +37,32 @@ public:
}
};
template <typename BufferType>
class Serializer<BufferType, std::vector<bool>, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void save(BufferType& buffer, const std::vector<bool>& vector) {
buffer << vector.size();
for (const bool el : vector) {
buffer << el;
}
}
};
template <typename BufferType>
class Serializer<BufferType, std::vector<bool>, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void load(BufferType& buffer, std::vector<bool>& vector) {
typename std::vector<bool>::size_type vector_size = 0UL;
buffer >> vector_size;
bool el;
vector.clear();
for (size_t i = 0; i < vector_size; ++i) {
buffer >> el;
vector.push_back(el);
}
}
};
template <typename BufferType, typename T>
class Serializer<BufferType, std::vector<T>, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value &&
!std::is_arithmetic<T>::value>::type> {

View File

@ -28,7 +28,7 @@ public:
const std::shared_ptr<const ov::IPlugin>& plugin,
RemoteContextImpl::Ptr context,
const ExecutionConfig& config);
CompiledModel(cldnn::BinaryInputBuffer ib,
CompiledModel(cldnn::BinaryInputBuffer& ib,
const std::shared_ptr<const ov::IPlugin>& plugin,
RemoteContextImpl::Ptr context,
const ExecutionConfig& config);
@ -58,7 +58,6 @@ private:
RemoteContextImpl::Ptr m_context;
ExecutionConfig m_config;
std::shared_ptr<ov::threading::ITaskExecutor> m_wait_executor;
std::shared_ptr<ov::Model> m_model;
std::string m_model_name;
std::vector<ov::Output<const ov::Node>> m_inputs;
std::vector<ov::Output<const ov::Node>> m_outputs;

View File

@ -121,8 +121,6 @@ struct arg_max_min : public primitive_base<arg_max_min> {
void save(BinaryOutputBuffer& ob) const override {
primitive_base<arg_max_min>::save(ob);
ob << input;
ob << num_outputs;
ob << make_data(&mode, sizeof(ov::op::TopKMode));
ob << top_k;
ob << axis;
@ -133,8 +131,6 @@ struct arg_max_min : public primitive_base<arg_max_min> {
void load(BinaryInputBuffer& ib) override {
primitive_base<arg_max_min>::load(ib);
ib >> input;
ib >> num_outputs;
ib >> make_data(&mode, sizeof(ov::op::TopKMode));
ib >> top_k;
ib >> axis;

View File

@ -17,6 +17,8 @@ namespace cldnn {
struct condition : public primitive_base<condition> {
CLDNN_DECLARE_PRIMITIVE(condition)
condition() : primitive_base("", {}) {}
/// @brief branch has compiled program, input_map and output_map
///
struct branch {
@ -39,6 +41,43 @@ struct condition : public primitive_base<condition> {
ss << "]}";
return ss.str();
}
void save(BinaryOutputBuffer& ob) const {
ob << input_map.size();
for (auto& input_pair : input_map) {
ob << input_pair.first;
ob << input_pair.second;
}
ob << output_map.size();
for (auto& output_pair : output_map) {
ob << output_pair.first;
ob << output_pair.second;
}
inner_program->save(ob);
}
void load(BinaryInputBuffer& ib) {
size_t map_size;
ib >> map_size;
input_map.clear();
for (size_t i = 0; i < map_size; ++i) {
primitive_id input_first, input_second;
ib >> input_first;
ib >> input_second;
input_map.insert({input_first, input_second});
}
ib >> map_size;
output_map.clear();
for (size_t i = 0; i < map_size; ++i) {
size_t output_index;
primitive_id output_second;
ib >> output_index;
ib >> output_second;
output_map.insert({output_index, output_second});
}
inner_program = std::make_shared<cldnn::program>(ib.get_engine());
inner_program->load(ib);
}
};
/// @brief Constructs condition primitive / layer.
@ -62,6 +101,18 @@ struct condition : public primitive_base<condition> {
branch branch_true;
branch branch_false;
void save(BinaryOutputBuffer& ob) const override {
primitive_base<condition>::save(ob);
ob << branch_true;
ob << branch_false;
}
void load(BinaryInputBuffer& ib) override {
primitive_base<condition>::load(ib);
ib >> branch_true;
ib >> branch_false;
}
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
};

View File

@ -5,6 +5,7 @@
#pragma once
#include "primitive.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/engine.hpp"
namespace cldnn {
@ -33,5 +34,53 @@ struct data : public primitive_base<data> {
seed = hash_combine(seed, id);
return seed;
}
void save(BinaryOutputBuffer& ob) const override {
primitive_base<data>::save(ob);
ob << mem->get_layout();
const auto _allocation_type = mem->get_allocation_type();
ob << make_data(&_allocation_type, sizeof(_allocation_type));
size_t data_size = mem->size();
ob << make_data(&data_size, sizeof(size_t));
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ob << make_data(mem->buffer_ptr(), data_size);
} else {
std::vector<uint8_t> _buf;
_buf.resize(data_size);
stream* strm = reinterpret_cast<stream*>(ob.get_stream());
mem->copy_to(*strm, _buf.data());
ob << make_data(_buf.data(), data_size);
}
}
void load(BinaryInputBuffer& ib) override {
primitive_base<data>::load(ib);
layout output_layout = layout();
ib >> output_layout;
allocation_type _allocation_type = allocation_type::unknown;
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
size_t data_size = 0;
ib >> make_data(&data_size, sizeof(size_t));
mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false);
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ib >> make_data(mem->buffer_ptr(), data_size);
} else {
std::vector<uint8_t> _buf;
_buf.resize(data_size);
ib >> make_data(_buf.data(), data_size);
// stream* strm = reinterpret_cast<stream*>(ib.get_stream());
auto& strm = ib.get_engine().get_service_stream();
mem->copy_from(strm, _buf.data());
}
}
};
} // namespace cldnn

View File

@ -246,6 +246,7 @@ struct loop : public primitive_base<loop> {
void save(BinaryOutputBuffer& ob) const override {
primitive_base<loop>::save(ob);
body_program->save(ob);
ob << trip_count_id;
ob << first_execution_condition_id;
ob << num_iteration_id;
@ -259,6 +260,8 @@ struct loop : public primitive_base<loop> {
void load(BinaryInputBuffer& ib) override {
primitive_base<loop>::load(ib);
body_program = std::make_shared<cldnn::program>(ib.get_engine());
body_program->load(ib);
ib >> trip_count_id;
ib >> first_execution_condition_id;
ib >> num_iteration_id;

View File

@ -282,6 +282,7 @@ struct lstm_gemm : public primitive_base<lstm_gemm> {
}
void save(BinaryOutputBuffer& ob) const override {
primitive_base<lstm_gemm>::save(ob);
ob << weights;
ob << recurrent;
ob << bias;
@ -290,6 +291,7 @@ struct lstm_gemm : public primitive_base<lstm_gemm> {
}
void load(BinaryInputBuffer& ib) override {
primitive_base<lstm_gemm>::load(ib);
ib >> weights;
ib >> recurrent;
ib >> bias;
@ -401,6 +403,7 @@ struct lstm_elt : public primitive_base<lstm_elt> {
}
void save(BinaryOutputBuffer& ob) const override {
primitive_base<lstm_elt>::save(ob);
ob << cell;
ob << clip;
ob << input_forget;
@ -411,6 +414,7 @@ struct lstm_elt : public primitive_base<lstm_elt> {
}
void load(BinaryInputBuffer& ib) override {
primitive_base<lstm_elt>::load(ib);
ib >> cell;
ib >> clip;
ib >> input_forget;

View File

@ -5,6 +5,7 @@
#pragma once
#include "primitive.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/engine.hpp"
#include <vector>
namespace cldnn {
@ -59,11 +60,51 @@ struct mutable_data : public primitive_base<mutable_data> {
void save(BinaryOutputBuffer& ob) const override {
primitive_base<mutable_data>::save(ob);
ob << make_data(&fill_type, sizeof(filler_type));
ob << mem->get_layout();
const auto _allocation_type = mem->get_allocation_type();
ob << make_data(&_allocation_type, sizeof(_allocation_type));
size_t data_size = mem->size();
ob << make_data(&data_size, sizeof(size_t));
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ob << make_data(mem->buffer_ptr(), data_size);
} else {
std::vector<uint8_t> _buf;
_buf.resize(data_size);
stream* strm = reinterpret_cast<stream*>(ob.get_stream());
mem->copy_to(*strm, _buf.data());
ob << make_data(_buf.data(), data_size);
}
}
void load(BinaryInputBuffer& ib) override {
primitive_base<mutable_data>::load(ib);
ib >> make_data(&fill_type, sizeof(filler_type));
layout output_layout = layout();
ib >> output_layout;
allocation_type _allocation_type = allocation_type::unknown;
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
size_t data_size = 0;
ib >> make_data(&data_size, sizeof(size_t));
mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false);
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ib >> make_data(mem->buffer_ptr(), data_size);
} else {
std::vector<uint8_t> _buf;
_buf.resize(data_size);
ib >> make_data(_buf.data(), data_size);
// stream* strm = reinterpret_cast<stream*>(ib.get_stream());
auto& strm = ib.get_engine().get_service_stream();
mem->copy_from(strm, _buf.data());
}
}
};
} // namespace cldnn

View File

@ -98,12 +98,17 @@ struct prim_map_storage {
return map.at(type_string);
}
const cldnn::primitive_id get_type_string(const cldnn::primitive_type_id type_id) const {
return inverse_map.at(type_id);
}
bool set_type_id(const std::string& type_string, const cldnn::primitive_type_id type_id) {
return map.insert({type_string, type_id}).second;
return map.insert({type_string, type_id}).second && inverse_map.insert({type_id, type_string}).second;
}
private:
std::unordered_map<std::string, cldnn::primitive_type_id> map;
std::unordered_map<cldnn::primitive_type_id, std::string> inverse_map;
};
/// @brief Base class of network primitive description.
@ -249,6 +254,7 @@ public:
ib >> output_paddings;
size_t output_data_types_size;
ib >> output_data_types_size;
output_data_types.clear();
for (size_t i = 0; i < output_data_types_size; i++) {
bool has_value;
ib >> has_value;

View File

@ -20,6 +20,8 @@ enum class reorder_mean_mode {
};
struct WeightsReorderParams {
WeightsReorderParams() {}
WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed = false, bool grouped = false)
: _in_layout(in_layout),
_out_layout(out_layout),
@ -51,6 +53,19 @@ struct WeightsReorderParams {
void set_input_layout(const layout& layout) { _in_layout = layout; }
void set_output_layout(const layout& layout) { _out_layout = layout; }
void save(cldnn::BinaryOutputBuffer& ob) const {
ob << _in_layout;
ob << _out_layout;
ob << _transposed;
ob << _grouped;
}
void load(cldnn::BinaryInputBuffer& ib) {
ib >> _in_layout;
ib >> _out_layout;
ib >> _transposed;
ib >> _grouped;
}
protected:
layout _in_layout;
layout _out_layout;
@ -257,6 +272,12 @@ struct reorder : public primitive_base<reorder> {
ob << subtract_per_feature;
ob << make_data(&mean_mode, sizeof(reorder_mean_mode));
ob << make_data(&input_mem_type, sizeof(memory_type));
if (weights_reorder_params == nullptr) {
ob << false;
} else {
ob << true;
weights_reorder_params->save(ob);
}
ob << truncate;
}
@ -267,6 +288,12 @@ struct reorder : public primitive_base<reorder> {
ib >> subtract_per_feature;
ib >> make_data(&mean_mode, sizeof(reorder_mean_mode));
ib >> make_data(&input_mem_type, sizeof(memory_type));
bool has_weights_reorder_params;
ib >> has_weights_reorder_params;
if (has_weights_reorder_params) {
weights_reorder_params = std::make_shared<WeightsReorderParams>();
weights_reorder_params->load(ib);
}
ib >> truncate;
}

View File

@ -30,20 +30,6 @@ std::string assign_inst::to_string(const assign_node& node) {
return primitive_description.str();
}
void assign_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);
ob << variable_id();
}
void assign_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);
std::string variable_id;
ib >> variable_id;
set_variable_id(variable_id);
}
void assign_inst::on_execute() {
_outputs[0] = input_memory_ptr(0);
}

View File

@ -266,16 +266,4 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
input_layout.feature(),
"Weights/ifm mismatch");
}
void convolution_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);
ob << _deform_conv_dep_offset;
}
void convolution_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);
ib >> _deform_conv_dep_offset;
}
} // namespace cldnn

View File

@ -51,60 +51,4 @@ std::string data_inst::to_string(data_node const& node) {
data_inst::typed_primitive_inst(network& network, data_node const& node)
: parent(network, node, attach_or_copy_data(network, node.get_attached_memory_ptr())) {}
// Cache blob format:
// [ kernel_impl_params ]
// [ output memory information ]
// [ data stored in memory ]
void data_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);
ob << _outputs[0]->get_layout();
const auto _allocation_type = _outputs[0]->get_allocation_type();
ob << make_data(&_allocation_type, sizeof(_allocation_type));
size_t data_size = _outputs[0]->size();
ob << make_data(&data_size, sizeof(size_t));
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ob << make_data(_outputs[0]->buffer_ptr(), data_size);
} else {
std::vector<uint8_t> _buf;
_buf.resize(data_size);
_outputs[0]->copy_to(get_network().get_stream(), _buf.data());
ob << make_data(_buf.data(), data_size);
}
}
void data_inst::load(BinaryInputBuffer& ib) {
parent::load(ib);
layout output_layout = layout();
ib >> output_layout;
allocation_type _allocation_type = allocation_type::unknown;
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
size_t data_size = 0;
ib >> make_data(&data_size, sizeof(size_t));
if (!get_network().is_primary_stream()) {
_outputs[0] = ib.getConstData(_network.get_local_id(), id());
auto pos = ib.tellg();
pos += data_size;
ib.seekg(pos);
} else {
_outputs[0] = get_network().get_engine().allocate_memory(output_layout, _allocation_type, false);
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ib >> make_data(_outputs[0]->buffer_ptr(), data_size);
} else {
std::vector<uint8_t> _buf;
_buf.resize(data_size);
ib >> make_data(_buf.data(), data_size);
_outputs[0]->copy_from(get_network().get_stream(), _buf.data());
}
ib.addConstData(_network.get_local_id(), id(), _outputs[0]);
}
}
} // namespace cldnn

View File

@ -234,88 +234,4 @@ detection_output_inst::typed_primitive_inst(network& network, detection_output_n
node.get_dependency(2).is_padded(),
"Detection output layer doesn't support input padding in Prior-Box input");
}
void detection_output_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);
// argument (struct detection_output)
ob << argument->id;
ob << argument->input;
ob << make_data(&argument->output_paddings[0], sizeof(argument->output_paddings[0]));
ob << argument->num_classes;
ob << argument->keep_top_k;
ob << argument->share_location;
ob << argument->background_label_id;
ob << argument->nms_threshold;
ob << argument->top_k;
ob << argument->eta;
ob << make_data(&argument->code_type, sizeof(argument->code_type));
ob << argument->variance_encoded_in_target;
ob << argument->confidence_threshold;
ob << argument->prior_info_size;
ob << argument->prior_coordinates_offset;
ob << argument->prior_is_normalized;
ob << argument->input_width;
ob << argument->input_height;
ob << argument->decrease_label_id;
ob << argument->clip_before_nms;
ob << argument->clip_after_nms;
ob << argument->objectness_score;
}
void detection_output_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);
primitive_id id;
std::vector<input_info> input;
uint32_t num_classes;
uint32_t keep_top_k;
bool share_location;
int background_label_id;
float nms_threshold;
int top_k;
float eta;
prior_box_code_type code_type = prior_box_code_type::corner;
bool variance_encoded_in_target;
float confidence_threshold;
int32_t prior_info_size;
int32_t prior_coordinates_offset;
bool prior_is_normalized;
int32_t input_width;
int32_t input_height;
bool decrease_label_id;
bool clip_before_nms;
bool clip_after_nms;
float objectness_score;
padding output_padding;
ib >> id;
ib >> input;
ib >> make_data(&output_padding, sizeof(output_padding));
ib >> num_classes;
ib >> keep_top_k;
ib >> share_location;
ib >> background_label_id;
ib >> nms_threshold;
ib >> top_k;
ib >> eta;
ib >> make_data(&code_type, sizeof(code_type));
ib >> variance_encoded_in_target;
ib >> confidence_threshold;
ib >> prior_info_size;
ib >> prior_coordinates_offset;
ib >> prior_is_normalized;
ib >> input_width;
ib >> input_height;
ib >> decrease_label_id;
ib >> clip_before_nms;
ib >> clip_after_nms;
ib >> objectness_score;
argument = std::make_shared<detection_output>(
id, input, num_classes, keep_top_k, share_location, background_label_id, nms_threshold, top_k,
eta, code_type, variance_encoded_in_target, confidence_threshold, prior_info_size,
prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id,
clip_before_nms, clip_after_nms, objectness_score, output_padding);
}
} // namespace cldnn

View File

@ -13,12 +13,17 @@ namespace cldnn {
namespace common {
struct condition_impl : typed_primitive_impl<condition> {
using parent = typed_primitive_impl<condition>;
using parent::parent;
DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::common::condition_impl)
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<condition_impl>(*this);
}
condition_impl() : parent() {}
explicit condition_impl(const condition_node& outer) {
set_node_params(outer);
}
@ -131,6 +136,16 @@ struct condition_impl : typed_primitive_impl<condition> {
void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << _node_id;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> _node_id;
}
private:
primitive_id _node_id;
};
@ -150,5 +165,5 @@ attach_condition_common::attach_condition_common() {
} // namespace common
} // namespace cldnn
// TODO: Change code like cldnn::loop
ASSIGN_TYPE_NAME(cldnn::common::condition_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::common::condition_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::condition)

View File

@ -79,11 +79,13 @@ struct activation_impl : public typed_primitive_impl<activation> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << make_data(&activation_function, sizeof(activation_func));
ob << make_data(&additional_params, sizeof(activation_additional_params));
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> make_data(&activation_function, sizeof(activation_func));
ib >> make_data(&additional_params, sizeof(activation_additional_params));
}

View File

@ -35,10 +35,12 @@ struct assign_impl : public typed_primitive_impl<assign> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << variable_id;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> variable_id;
}

View File

@ -45,12 +45,14 @@ struct broadcast_impl : public typed_primitive_impl<broadcast> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << make_data(&broadcast_mode, sizeof(ov::op::BroadcastModeSpec));
ob << make_data(&target_shape, sizeof(ov::Shape));
ob << axes_mapping;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> make_data(&broadcast_mode, sizeof(ov::op::BroadcastModeSpec));
ib >> make_data(&target_shape, sizeof(ov::Shape));
ib >> axes_mapping;

View File

@ -40,10 +40,12 @@ struct concatenation_impl : public typed_primitive_impl<concatenation> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << axis;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> axis;
}

View File

@ -62,10 +62,12 @@ public:
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << make_data(&nms_type, sizeof(NMSType));
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> make_data(&nms_type, sizeof(NMSType));
}

View File

@ -64,11 +64,13 @@ struct eltwise_impl : public typed_primitive_impl<eltwise> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << make_data(&mode, sizeof(eltwise_mode));
ob << coefficients;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> make_data(&mode, sizeof(eltwise_mode));
ib >> coefficients;
}

View File

@ -42,11 +42,13 @@ struct gather_impl : public typed_primitive_impl<gather> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << axis;
ob << batch_dims;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> axis;
ib >> batch_dims;
}

View File

@ -35,10 +35,12 @@ struct read_value_impl : public typed_primitive_impl<read_value> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << variable_id;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> variable_id;
}

View File

@ -40,10 +40,12 @@ struct scatter_update_impl : public typed_primitive_impl<scatter_update> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << axis;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> axis;
}

View File

@ -56,6 +56,7 @@ struct strided_slice_impl : public typed_primitive_impl<strided_slice> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << begin_data;
ob << end_data;
ob << strides_data;
@ -68,6 +69,7 @@ struct strided_slice_impl : public typed_primitive_impl<strided_slice> {
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> begin_data;
ib >> end_data;
ib >> strides_data;

View File

@ -39,10 +39,12 @@ struct tile_impl : public typed_primitive_impl<tile> {
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << repeats;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> repeats;
}

View File

@ -31,6 +31,15 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
return make_unique<activation_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
kernel_arguments_data get_arguments(const typed_primitive_inst<activation>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);

View File

@ -45,6 +45,15 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
return make_unique<arg_max_min_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<arg_max_min>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);

View File

@ -110,7 +110,30 @@ struct border_impl : typed_primitive_impl_ocl<border> {
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
const auto& prim_params = static_cast<const kernel_selector::border_params&>(*_kernel_data.params);
if (prim_params.inputs[0].LogicalSize() == 0) {
ob << true;
} else {
ob << false;
}
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
ib >> zero_input;
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
protected:
// WA for static impl deserialization
bool zero_input = false;
kernel_arguments_data get_arguments(const typed_primitive_inst<border>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);
@ -127,7 +150,8 @@ protected:
const auto& prim_params = static_cast<const kernel_selector::border_params&>(*_kernel_data.params);
std::vector<layout> layouts;
if (prim_params.inputs[0].LogicalSize() == 0) {
if ((_kernel_data.params == nullptr && zero_input) ||
(_kernel_data.params != nullptr && prim_params.inputs[0].LogicalSize() == 0)) {
layout any_layout = {data_types::u8, format::bfyx, {1, 1, 1, 1}};
layouts.push_back(any_layout);
}

View File

@ -23,6 +23,15 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
return make_unique<broadcast_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<broadcast>();
auto params = get_default_params<kernel_selector::broadcast_params>(impl_param, is_shape_agnostic);

View File

@ -53,6 +53,15 @@ struct concatenation_impl : typed_primitive_impl_ocl<concatenation> {
return make_unique<concatenation_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<concatenation>();

View File

@ -3,7 +3,7 @@
//
#include "primitive_base.hpp"
#include "kernel_base.h"
#include "convolution_inst.h"
#include "convolution/convolution_kernel_selector.h"
#include "convolution/convolution_params.h"
@ -25,6 +25,15 @@ struct convolution_impl : typed_primitive_impl_ocl<convolution> {
return make_unique<convolution_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<convolution>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);

View File

@ -23,6 +23,15 @@ struct crop_impl : typed_primitive_impl_ocl<crop> {
return make_unique<crop_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::eltwise_params>(impl_param, is_shape_agnostic);

View File

@ -56,6 +56,15 @@ struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
return make_unique<cum_sum_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<cum_sum>();

View File

@ -28,7 +28,6 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
std::shared_ptr<kernel_selector::cl_kernel_data> cl_kernel;
std::vector<kernel::ptr> _kernels;
std::string _cached_kernel_id;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<custom_gpu_primitive_impl>(*this);
@ -39,8 +38,7 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other)
: cl_kernel(other.cl_kernel)
, _kernels({})
, _cached_kernel_id(other._cached_kernel_id) {
, _kernels({}) {
for (const auto& kernel : other._kernels) {
_kernels.emplace_back(kernel->clone());
}
@ -49,8 +47,7 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
custom_gpu_primitive_impl(const custom_gpu_primitive_node& arg,
std::shared_ptr<kernel_selector::cl_kernel_data>& cl_kernel)
: cl_kernel(cl_kernel)
, _kernels()
, _cached_kernel_id() { }
, _kernels() { }
std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() override {
std::vector<std::shared_ptr<cldnn::kernel_string>> kernel_strings;
@ -64,12 +61,12 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
_kernels.insert(_kernels.begin(), compiled_kernels.begin(), compiled_kernels.end());
}
void init_by_cached_kernels(const kernels_cache& kernels_cache) override {
_kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_id));
void init_by_cached_kernels(const kernels_cache& kernels_cache, std::vector<std::string>& cached_kernel_ids) override {
_kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(cached_kernel_ids[0]));
}
void set_cached_kernel_ids(const kernels_cache& kernels_cache) override {
_cached_kernel_id = kernels_cache.get_cached_kernel_id(_kernels[0]);
std::vector<std::string> get_cached_kernel_ids(const kernels_cache& kernels_cache) override {
return {kernels_cache.get_cached_kernel_id(_kernels[0])};
}
void set_arguments_impl(custom_gpu_primitive_inst& instance) override {
@ -93,19 +90,23 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.is_output());
}
std::vector<kernel::ptr> get_kernels() override {
return _kernels;
}
std::vector<kernel::ptr> get_kernels() const override {
return _kernels;
}
void save(BinaryOutputBuffer& ob) const override {
parent::save(ob);
ob << *cl_kernel;
ob << _cached_kernel_id;
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
cl_kernel = std::make_shared<kernel_selector::cl_kernel_data>();
ib >> *cl_kernel;
ib >> _cached_kernel_id;
}
};

View File

@ -23,6 +23,15 @@ struct eltwise_impl : typed_primitive_impl_ocl<eltwise> {
return make_unique<eltwise_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<eltwise>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);

View File

@ -3,7 +3,7 @@
//
#include "primitive_base.hpp"
#include "kernel_base.h"
#include "fully_connected_inst.h"
#include "fully_connected/fully_connected_kernel_selector.h"
#include "fully_connected/fully_connected_params.h"
@ -48,6 +48,15 @@ struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
return make_unique<fully_connected_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<fully_connected>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);

View File

@ -67,6 +67,15 @@ struct gather_impl : typed_primitive_impl_ocl<gather> {
return make_unique<gather_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<gather>();

View File

@ -54,6 +54,15 @@ struct gather_elements_impl : typed_primitive_impl_ocl<gather_elements> {
return make_unique<gather_elements_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<gather_elements>();
auto params = get_default_params<kernel_selector::gather_elements_params>(impl_param, is_shape_agnostic);

View File

@ -23,6 +23,15 @@ struct gather_nd_impl : typed_primitive_impl_ocl<gather_nd> {
return make_unique<gather_nd_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
const auto& primitive = impl_param.typed_desc<gather_nd>();
auto params = get_default_params<kernel_selector::gather_nd_params>(impl_param);

View File

@ -23,6 +23,15 @@ struct gemm_impl : typed_primitive_impl_ocl<gemm> {
return make_unique<gemm_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<gemm>();

View File

@ -23,6 +23,15 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
return make_unique<mvn_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<mvn>();
auto params = get_default_params<kernel_selector::mvn_params>(impl_param, is_shape_agnostic);

View File

@ -25,6 +25,15 @@ struct count_nonzero_impl : typed_primitive_impl_ocl<count_nonzero> {
return make_unique<count_nonzero_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::count_nonzero_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::count_nonzero_optional_params>(impl_param.get_program());
@ -49,6 +58,15 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
return make_unique<gather_nonzero_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::gather_nonzero_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::gather_nonzero_optional_params>(impl_param.get_program());

View File

@ -50,6 +50,15 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
return make_unique<permute_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<permute>();
auto params = get_default_params<kernel_selector::permute_params>(impl_param, is_shape_agnostic);

View File

@ -33,18 +33,16 @@ For example, all gpu convolution implementations should derive from typed_primit
template <class PType>
struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
kernel_selector::kernel_data _kernel_data;
std::vector<std::string> _cached_kernel_ids;
std::vector<kernel::ptr> _kernels;
// a pair of batch program hash and kernel entry hash of each ocl impl.
std::pair<std::string, std::string> kernel_dump_info;
typed_primitive_impl_ocl() : _kernel_data({}), _cached_kernel_ids({}), _kernels({}) {}
typed_primitive_impl_ocl() : _kernel_data({}), _kernels({}) {}
typed_primitive_impl_ocl(const typed_primitive_impl_ocl<PType>& other)
: typed_primitive_impl<PType>(other._weights_reorder_params, other._kernel_name, other._is_dynamic)
, _kernel_data(other._kernel_data)
, _cached_kernel_ids(other._cached_kernel_ids)
, _kernels({}) {
_kernels.reserve(other._kernels.size());
for (size_t k = 0; k < other._kernels.size(); ++k) {
@ -65,17 +63,19 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
// [ kernel_selector::kernel_data ]
// [ kernel_ids ]
void save(BinaryOutputBuffer& ob) const override {
primitive_impl::save(ob);
ob << make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype));
ob << _kernel_data.internalBufferSizes;
ob << _kernel_data.kernels;
ob << _cached_kernel_ids;
ob << _kernel_data.kernelName;
}
void load(BinaryInputBuffer& ib) override {
primitive_impl::load(ib);
ib >> make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype));
ib >> _kernel_data.internalBufferSizes;
ib >> _kernel_data.kernels;
ib >> _cached_kernel_ids;
ib >> _kernel_data.kernelName;
}
template<typename ImplType>
@ -144,22 +144,22 @@ protected:
kernel_dump_info = std::make_pair(std::to_string(kernels_cache.get_kernel_batch_hash(params)),
_kernel_data.kernels[0].code.kernelString->entry_point);
}
}
}
void init_by_cached_kernels(const kernels_cache& kernels_cache) override {
void init_by_cached_kernels(const kernels_cache& kernels_cache, std::vector<std::string>& cached_kernel_ids) override {
if (is_cpu()) {
return;
}
_kernels.clear();
_kernels.reserve(_cached_kernel_ids.size());
for (size_t k = 0; k < _cached_kernel_ids.size(); ++k) {
_kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_ids[k]));
_kernels.reserve(cached_kernel_ids.size());
for (size_t k = 0; k < cached_kernel_ids.size(); ++k) {
_kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(cached_kernel_ids[k]));
}
}
void set_cached_kernel_ids(const kernels_cache& kernels_cache) override {
_cached_kernel_ids = kernels_cache.get_cached_kernel_ids(_kernels);
std::vector<std::string> get_cached_kernel_ids(const kernels_cache& kernels_cache) override {
return {kernels_cache.get_cached_kernel_ids(_kernels)};
}
std::vector<kernel::ptr> get_kernels() const override {

View File

@ -23,6 +23,15 @@ struct quantize_impl : typed_primitive_impl_ocl<quantize> {
return make_unique<quantize_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<quantize>& instance) const override {
kernel_arguments_data args;

View File

@ -23,6 +23,15 @@ struct range_impl : typed_primitive_impl_ocl<range> {
return make_unique<range_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::range_params>(impl_param, is_shape_agnostic);
for (int i : {1, 2})

View File

@ -72,6 +72,13 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
return make_unique<reduce_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<reduce>();
auto params = get_default_params<kernel_selector::reduce_params>(impl_param, is_shape_agnostic);

View File

@ -24,6 +24,15 @@ struct reorder_impl : typed_primitive_impl_ocl<reorder> {
return make_unique<reorder_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
protected:
kernel_arguments_data get_arguments(const reorder_inst& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);

View File

@ -23,6 +23,15 @@ struct rms_impl : typed_primitive_impl_ocl<rms> {
return make_unique<rms_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<rms>();
auto params = get_default_params<kernel_selector::rms_params>(impl_param, is_shape_agnostic);
@ -63,3 +72,6 @@ attach_rms_impl::attach_rms_impl() {
} // namespace detail
} // namespace ocl
} // namespace cldnn
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::rms_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::rms)

View File

@ -23,6 +23,15 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
return make_unique<scatter_nd_update_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<scatter_nd_update>();
auto params = get_default_params<kernel_selector::scatter_nd_update_params>(impl_param, is_shape_agnostic);

View File

@ -48,6 +48,15 @@ struct scatter_update_impl : typed_primitive_impl_ocl<scatter_update> {
return make_unique<scatter_update_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<scatter_update>();

View File

@ -23,6 +23,15 @@ struct select_impl : typed_primitive_impl_ocl<select> {
return make_unique<select_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::select_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());

View File

@ -23,6 +23,15 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
return make_unique<shape_of_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::shape_of_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::shape_of_optional_params>(impl_param.get_program());

View File

@ -45,6 +45,15 @@ struct softmax_impl : typed_primitive_impl_ocl<softmax> {
return make_unique<softmax_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<softmax>();
auto params = get_default_params<kernel_selector::softmax_params>(impl_param, is_shape_agnostic);

View File

@ -52,6 +52,15 @@ struct strided_slice_impl : typed_primitive_impl_ocl<strided_slice> {
return make_unique<strided_slice_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& prim = impl_param.typed_desc<strided_slice>();

View File

@ -23,6 +23,15 @@ struct tile_impl : typed_primitive_impl_ocl<tile> {
return make_unique<tile_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<tile>();

View File

@ -23,6 +23,15 @@ struct unique_count_impl : typed_primitive_impl_ocl<unique_count> {
return make_unique<unique_count_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<unique_count>();
auto params = get_default_params<kernel_selector::unique_count_params>(impl_param, is_shape_agnostic);
@ -94,6 +103,15 @@ struct unique_gather_impl : typed_primitive_impl_ocl<unique_gather> {
return make_unique<unique_gather_impl>(*this);
}
void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<unique_gather>();
auto params = get_default_params<kernel_selector::unique_gather_params>(impl_param, is_shape_agnostic);

View File

@ -95,12 +95,14 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
}
bool is_cpu() const override { return false; }
bool is_onednn() const { return true; }
// Cache blob format:
// [ dnnl::primitive_attr ]
// [ dnnl::primitive_desc ]
// [ dnnl::cache_blob ]
void save(BinaryOutputBuffer& ob) const override {
primitive_impl::save(ob);
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
if (_attrs->get() == nullptr) {
ob << false;
@ -203,6 +205,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
}
void load(BinaryInputBuffer& ib) override {
primitive_impl::load(ib);
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
bool has_attrs;
ib >> has_attrs;

View File

@ -55,9 +55,6 @@ public:
typed_primitive_inst(network& network, const assign_node& desc);
typed_primitive_inst(network& network) : parent(network), memory_state::variable("") {}
void save(cldnn::BinaryOutputBuffer& ob) const override;
void load(cldnn::BinaryInputBuffer& ib) override;
void on_execute() override;
};

View File

@ -189,9 +189,6 @@ public:
bool compensation_term() const { return _impl_params->compensation_layout.has_value(); }
bool activations_zero_points_term() const { return _impl_params->activations_zero_points_layout.has_value(); }
void save(cldnn::BinaryOutputBuffer& ob) const override;
void load(cldnn::BinaryInputBuffer& ib) override;
private:
int32_t _deform_conv_dep_offset = 0;
};

View File

@ -40,8 +40,6 @@ public:
static std::string to_string(data_node const& node);
typed_primitive_inst(network& network, data_node const& node);
void save(BinaryOutputBuffer& ob) const override;
void load(BinaryInputBuffer& ib) override;
};
using data_inst = typed_primitive_inst<data>;

View File

@ -46,9 +46,6 @@ public:
memory::ptr location_memory() const { return dep_memory_ptr(0); }
memory::ptr confidence_memory() const { return dep_memory_ptr(1); }
memory::ptr prior_box_memory() const { return dep_memory_ptr(2); }
void save(cldnn::BinaryOutputBuffer& ob) const override;
void load(cldnn::BinaryInputBuffer& ib) override;
};
using detection_output_inst = typed_primitive_inst<detection_output>;

View File

@ -318,8 +318,6 @@ public:
event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0) override;
void reset_memory();
void save(BinaryOutputBuffer& ob) const override;
void load(BinaryInputBuffer& ib) override;
void validate_backedges(loop_node const & node) const;
void update_shape() override { primitive_inst::update_shape(); }

View File

@ -20,6 +20,7 @@ struct typed_program_node<mutable_data> : public typed_program_node_base<mutable
memory& get_attached_memory() const { return *mem; }
memory::ptr get_attached_memory_ptr() const { return mem; }
void attach_memory(memory::ptr new_mem, bool invalidate_users_if_changed = true);
void replace_memory(memory::ptr new_mem, bool invalidate_users_if_changed = false);
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
@ -44,8 +45,6 @@ public:
typed_primitive_inst(network& network, mutable_data_node const& node);
event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0) override;
const std::list<primitive_id>& get_user_ids() const { return _user_ids; }
void save(BinaryOutputBuffer& ob) const override;
void load(BinaryInputBuffer& ib) override;
private:
std::list<primitive_id> _user_ids;

View File

@ -63,15 +63,36 @@ struct primitive_impl {
// class typed_primitive_gpu_impl override this with return false;
virtual bool is_cpu() const { return true; }
virtual bool is_onednn() const { return false; }
virtual void init_kernels(const kernels_cache& kernels_cache, const kernel_impl_params& params) = 0;
virtual void init_by_cached_kernels(const kernels_cache&) {}
virtual void set_cached_kernel_ids(const kernels_cache&) {}
virtual void init_by_cached_kernels(const kernels_cache&, std::vector<std::string>& cached_kernel_ids) {}
virtual std::vector<std::string> get_cached_kernel_ids(const kernels_cache&) { return {}; }
virtual std::unique_ptr<primitive_impl> clone() const = 0;
virtual std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() { return {}; }
virtual void reset_kernels_source() {}
virtual std::vector<kernel::ptr> get_kernels() const { return {}; }
virtual void save(cldnn::BinaryOutputBuffer& ob) const {}
virtual void load(cldnn::BinaryInputBuffer& ib) {}
virtual void save(cldnn::BinaryOutputBuffer& ob) const {
ob << can_reuse_memory;
ob << _kernel_name;
ob << _is_dynamic;
if (_weights_reorder_params == nullptr) {
ob << false;
} else {
ob << true;
_weights_reorder_params->save(ob);
}
}
virtual void load(cldnn::BinaryInputBuffer& ib) {
ib >> can_reuse_memory;
ib >> _kernel_name;
ib >> _is_dynamic;
bool has_weights_reorder_params;
ib >> has_weights_reorder_params;
if (has_weights_reorder_params) {
_weights_reorder_params = std::make_shared<WeightsReorderParams>();
_weights_reorder_params->load(ib);
}
}
// returns a pair of batch program hash and kernel entry of each ocl impl. Returns "" for other impl types.
virtual std::pair<std::string, std::string> get_kernels_dump_info() const {
return std::make_pair("", "");
@ -195,10 +216,6 @@ public:
_impl->init_kernels(kernels_cache, *_impl_params);
}
void init_by_cached_kernels(const kernels_cache& kernels_cache) {
_impl->init_by_cached_kernels(kernels_cache);
}
void set_arguments();
void validate() const {
@ -252,8 +269,6 @@ public:
std::vector<memory::ptr> get_intermediates_memories() const { return _intermediates_memory; }
virtual void save(cldnn::BinaryOutputBuffer& ob) const;
virtual void load(cldnn::BinaryInputBuffer& ib);
void rebuild_deps(
std::unordered_map<primitive_id, std::shared_ptr<primitive_inst>> const& primitives);
void rebuild_exec_deps(

View File

@ -386,6 +386,9 @@ public:
const std::vector<fused_primitive_desc>& get_fused_primitives() const { return fused_prims; }
std::vector<fused_primitive_desc>& get_fused_primitives() { return fused_prims; }
void save(cldnn::BinaryOutputBuffer& ob) const;
void load(cldnn::BinaryInputBuffer& ib);
#ifdef ENABLE_ONEDNN_FOR_GPU
const std::shared_ptr<dnnl::primitive_attr>& get_onednn_primitive_attributes() const {
if (onednn_attrs == nullptr)

View File

@ -45,14 +45,6 @@ public:
end_x = e_x;
end_y = e_y;
}
void save(BinaryOutputBuffer& ob) const {
ob << start_x << start_y << end_x << end_y;
}
void load(BinaryInputBuffer& ib) {
ib >> start_x >> start_y >> end_x >> end_y;
}
};
// indices of the memory objects used by the layer
@ -81,8 +73,6 @@ public:
typed_primitive_inst(network& network, proposal_node const& desc);
const std::vector<anchor>& get_anchors() const { return _anchors; }
void save(BinaryOutputBuffer& ob) const override;
void load(BinaryInputBuffer& ib) override;
private:
std::vector<anchor> _anchors;

View File

@ -48,9 +48,6 @@ public:
typed_primitive_inst(network& network, const read_value_node& desc);
typed_primitive_inst(network& network) : parent(network), memory_state::variable("") {}
void save(cldnn::BinaryOutputBuffer& ob) const override;
void load(cldnn::BinaryInputBuffer& ib) override;
void update_output_memory() override;
protected:

View File

@ -103,9 +103,6 @@ public:
return req_reinterpr;
}
void save(cldnn::BinaryOutputBuffer& ob) const override;
void load(cldnn::BinaryInputBuffer& ib) override;
private:
void on_execute() override;

View File

@ -70,146 +70,4 @@ bool kernel_impl_params::operator==(const kernel_impl_params& rhs) const {
return true;
}
void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
ob << desc;
ob << static_cast<uint64_t>(dev_type);
ob << has_runtime_layouts;
ob << unique_id;
ob << input_layouts;
ob << output_layouts;
ob << input_offsets.size();
for (size_t i = 0; i < input_offsets.size(); i++) {
ob << input_offsets[i].sizes();
}
if (weights_layout.has_value()) {
ob << true;
ob << weights_layout.value();
} else {
ob << false;
}
if (bias_layout.has_value()) {
ob << true;
ob << bias_layout.value();
} else {
ob << false;
}
if (weights_zero_points_layout.has_value()) {
ob << true;
ob << weights_zero_points_layout.value();
} else {
ob << false;
}
if (activations_zero_points_layout.has_value()) {
ob << true;
ob << activations_zero_points_layout.value();
} else {
ob << false;
}
if (compensation_layout.has_value()) {
ob << true;
ob << compensation_layout.value();
} else {
ob << false;
}
ob << fused_desc.size();
#ifdef ENABLE_ONEDNN_FOR_GPU
size_t num_fused_prims = fused_desc_onednn.size();
ob << num_fused_prims;
for (auto fused_prim : fused_desc_onednn) {
ob << make_data(&fused_prim.op_type, sizeof(onednn_post_op_type));
ob << fused_prim.mem_offset;
ob << fused_prim.mem_dep;
ob << make_data(&fused_prim.tag, sizeof(dnnl::memory::format_tag));
ob << fused_prim.flatten;
ob << fused_prim.dims;
ob << make_data(&fused_prim.dt, sizeof(dnnl::memory::data_type));
}
#endif // ENABLE_ONEDNN_FOR_GPU
ob << primary_input_idx;
}
void kernel_impl_params::load(BinaryInputBuffer& ib) {
prog = nullptr;
ib >> desc;
size_t dev_type_id = 0;
ib >> dev_type_id;
dev_type = static_cast<cldnn::device_type>(dev_type_id);
ib >> has_runtime_layouts;
ib >> unique_id;
ib >> input_layouts;
ib >> output_layouts;
{
size_t num_input_offsets;
ib >> num_input_offsets;
input_offsets.resize(num_input_offsets);
for (size_t i = 0; i < num_input_offsets; i++) {
std::vector<cldnn::tensor::value_type> sizes;
ib >> sizes;
input_offsets[i] = cldnn::tensor(sizes);
}
}
bool has_value = false;
layout layout_buf;
ib >> has_value;
if (has_value) {
ib >> layout_buf;
weights_layout = layout_buf;
}
ib >> has_value;
if (has_value) {
ib >> layout_buf;
bias_layout = layout_buf;
}
ib >> has_value;
if (has_value) {
ib >> layout_buf;
weights_zero_points_layout = layout_buf;
}
ib >> has_value;
if (has_value) {
ib >> layout_buf;
activations_zero_points_layout = layout_buf;
}
ib >> has_value;
if (has_value) {
ib >> layout_buf;
compensation_layout = layout_buf;
}
{
// Fake fused_desc just for has_fused_primitives()
size_t num_fused_desc;
ib >> num_fused_desc;
if (num_fused_desc > 0) {
fused_desc.emplace_back(cldnn::fused_primitive_desc(nullptr));
}
}
#ifdef ENABLE_ONEDNN_FOR_GPU
size_t num_fused_prims;
ib >> num_fused_prims;
fused_desc_onednn.resize(num_fused_prims);
for (size_t idx = 0; idx < num_fused_prims; ++idx) {
ib >> make_data(&fused_desc_onednn[idx].op_type, sizeof(onednn_post_op_type));
ib >> fused_desc_onednn[idx].mem_offset;
ib >> fused_desc_onednn[idx].mem_dep;
ib >> make_data(&fused_desc_onednn[idx].tag, sizeof(dnnl::memory::format_tag));
ib >> fused_desc_onednn[idx].flatten;
ib >> fused_desc_onednn[idx].dims;
ib >> make_data(&fused_desc_onednn[idx].dt, sizeof(dnnl::memory::data_type));
}
#endif // ENABLE_ONEDNN_FOR_GPU
ib >> primary_input_idx;
}
} // namespace cldnn

View File

@ -672,35 +672,6 @@ loop_inst::typed_primitive_inst(network & network, loop_node const & node)
_num_iterations_id = node.get_num_iterations_id();
}
void loop_inst::save(BinaryOutputBuffer& ob) const {
parent::save(ob);
ob << _input_primitive_maps;
ob << _output_primitive_maps;
ob << _back_edges;
ob << _trip_count_id;
ob << _initial_execution_id;
ob << _current_iteration_id;
ob << _condition_id;
ob << _num_iterations_id;
body_network->save(ob);
}
void loop_inst::load(BinaryInputBuffer& ib) {
parent::load(ib);
preproc_memories_done = false,
ib >> _input_primitive_maps;
ib >> _output_primitive_maps;
ib >> _back_edges;
ib >> _trip_count_id;
ib >> _initial_execution_id;
ib >> _current_iteration_id;
ib >> _condition_id;
ib >> _num_iterations_id;
body_network = std::make_shared<cldnn::network>(ib, get_network().get_stream_ptr(), get_network().get_engine(), get_network().is_primary_stream(), 0);
// set inner network to the new loaded _impl_params from cache.
set_inner_networks({body_network});
}
void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_iteration) {
if (is_dynamic) {
std::vector<cldnn::memory::ptr> external_outputs;

View File

@ -41,6 +41,11 @@ void mutable_data_node::attach_memory(memory::ptr new_mem, bool invalidate_users
recalc_output_layout(invalidate_users_if_changed);
}
void mutable_data_node::replace_memory(memory::ptr new_mem, bool invalidate_users_if_changed) {
mem = new_mem;
recalc_output_layout(invalidate_users_if_changed);
}
std::string mutable_data_inst::to_string(mutable_data_node const& node) {
auto node_info = node.desc_to_json();
@ -83,46 +88,4 @@ mutable_data_inst::typed_primitive_inst(network& network, mutable_data_node cons
}
}
void mutable_data_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);
size_t data_size = _outputs[0]->size();
ob << make_data(&data_size, sizeof(size_t));
if (data_size == 0)
return;
allocation_type _allocation_type = _outputs[0]->get_allocation_type();
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ob << make_data(_outputs[0]->buffer_ptr(), data_size);
} else {
mem_lock<char, mem_lock_type::read> lock{_outputs[0], get_node().get_program().get_stream()};
ob << make_data(lock.data(), data_size);
}
}
void mutable_data_inst::load(BinaryInputBuffer& ib) {
parent::load(ib);
size_t data_size = 0;
ib >> make_data(&data_size, sizeof(size_t));
if (data_size == 0)
return;
OPENVINO_ASSERT(_outputs[0] != nullptr, "Output memory should be allocated before importing data.");
allocation_type _allocation_type = _outputs[0]->get_allocation_type();
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ib >> make_data(_outputs[0]->buffer_ptr(), data_size);
} else {
std::vector<uint8_t> _buf;
_buf.resize(data_size);
ib >> make_data(_buf.data(), data_size);
_outputs[0]->copy_from(get_network().get_stream(), _buf.data());
}
}
} // namespace cldnn

View File

@ -355,167 +355,6 @@ network::network(program::ptr program, uint16_t stream_id)
network::network(program::ptr program, stream::ptr stream, uint16_t stream_id)
: network(program, program->get_config(), stream, false, stream_id == 0) {}
network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id)
: network(ib, ExecutionConfig{}, stream, engine, is_primary_stream, local_net_id) {}
network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id)
: _program(nullptr)
, _config(config)
, _engine(engine)
, _stream(stream)
, _memory_pool(new memory_pool(engine))
, _internal(false)
, _is_primary_stream(is_primary_stream)
, _reset_arguments(true)
, _local_net_id(local_net_id)
, _shape_predictor(new ShapePredictor(&engine, config.get_property(ov::intel_gpu::buffers_preallocation_ratio))) {
net_id = get_unique_net_id();
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) {
auto& mem_preallocation_params = debug_config->mem_preallocation_params;
_shape_predictor.reset(new ShapePredictor(&engine,
mem_preallocation_params.next_iters_preallocation_count,
mem_preallocation_params.max_per_iter_size,
mem_preallocation_params.max_per_dim_diff,
mem_preallocation_params.buffers_preallocation_ratio));
}
kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""});
ib >> kernels_cache;
int num_data_nodes;
ib >> num_data_nodes;
for (int i = 0; i < num_data_nodes; ++i) {
std::string type;
std::string _primitive_id;
ib >> type >> _primitive_id;
std::shared_ptr<cldnn::primitive_inst> new_primitive_inst = prim_map_storage::instance().get_type_id(type)->create_instance(*this);
ib >> *new_primitive_inst;
_primitives[_primitive_id] = new_primitive_inst;
}
std::vector<std::shared_ptr<primitive_inst>> insts_to_allocate;
size_t exec_order_size;
ib >> exec_order_size;
for (size_t i = 0; i < exec_order_size; ++i) {
std::string type;
ib >> type;
std::shared_ptr<cldnn::primitive_inst> new_primitive_inst = prim_map_storage::instance().get_type_id(type)->create_instance(*this);
insts_to_allocate.emplace_back(new_primitive_inst);
}
_outputs.clear();
_output_chains.clear();
for (const auto& p_inst : insts_to_allocate) {
ib >> *p_inst;
_primitives[p_inst->id()] = p_inst;
if (p_inst->get_impl() != nullptr)
p_inst->init_by_cached_kernels(kernels_cache);
}
std::vector<primitive_id> exec_order_ids;
ib >> exec_order_ids;
_exec_order.clear();
for (auto& exec_order_id : exec_order_ids) {
_exec_order.emplace_back(_primitives[exec_order_id]);
}
for (auto& item : _primitives) {
auto& p_inst = item.second;
if (p_inst->is_input())
_inputs.push_back(p_inst);
if (p_inst->is_output()) {
_outputs.push_back(p_inst);
if (p_inst->type() == cldnn::data::type_id())
_data_outputs.push_back(p_inst);
}
if (auto state_prim = std::dynamic_pointer_cast<memory_state::variable>(p_inst)) {
set_variables_state_info(state_prim->variable_id(), p_inst->get_output_layout(0));
}
}
for (const auto& p_inst : _exec_order) {
p_inst->rebuild_deps(_primitives);
p_inst->rebuild_exec_deps(_primitives);
if (p_inst->type() == cldnn::concatenation::type_id() && p_inst->can_be_optimized()) {
// implicit concat
std::list<const std::vector<std::pair<std::shared_ptr<const primitive_inst>, int32_t>>*> stack = {&p_inst->dependencies()};
while (!stack.empty()) {
auto nodes_list = stack.front();
stack.pop_front();
for (const auto& processed_nodes : *nodes_list) {
auto processed_node = processed_nodes.first;
auto dep_node = _primitives[processed_node->id()];
dep_node->set_output_memory(p_inst->output_memory_ptr(), false);
if (processed_node->type() == concatenation::type_id() && processed_node->can_be_optimized()) {
if (!processed_node->dependencies().empty())
stack.push_back(&processed_node->dependencies());
}
}
}
}
}
std::map<std::string, std::string> reuse_map;
ib >> reuse_map;
for (const auto& reuse_pair : reuse_map) {
auto& eltw_inst = _primitives.at(reuse_pair.second);
auto& prim_inst = _primitives.at(reuse_pair.first);
auto& eltw_mem = eltw_inst->output_memory();
auto new_mem = eltw_mem.get_engine()->reinterpret_buffer(eltw_mem, prim_inst->output_memory_ptr()->get_layout());
prim_inst->set_output_memory(new_mem);
}
for (auto p_inst : _exec_order) {
if (p_inst->can_be_optimized() && !p_inst->is_dynamic()) {
p_inst->update_output_memory();
}
}
add_default_output_chains();
size_t prims_info_size;
ib >> prims_info_size;
for (size_t i = 0; i < prims_info_size; i++) {
primitive_id original_id;
std::string type_id;
primitive::primitive_id_arr c_dependencies;
primitive::primitive_id_arr c_users;
primitive::primitive_id_arr c_fused_ids;
layout output_layout;
std::string layout_str;
std::string kernel_id;
data_types runtime_precision;
bool is_cpu;
int exec_id;
ib >> original_id;
ib >> type_id;
ib >> c_dependencies;
ib >> c_users;
ib >> c_fused_ids;
ib >> output_layout;
ib >> layout_str;
ib >> kernel_id;
ib >> make_data(&runtime_precision, sizeof(data_types));
ib >> is_cpu;
ib >> exec_id;
primitive_info prim_info(original_id, type_id, c_dependencies, c_users, c_fused_ids,
output_layout, layout_str, kernel_id, runtime_precision, is_cpu, exec_id);
_prims_info.emplace_back(prim_info);
}
ib >> _ext_id_mapping;
}
network::~network() {
if (_program != nullptr)
_program->cancel_compilation_context();
@ -526,118 +365,6 @@ network::~network() {
}
}
// Cache blob format:
// [ cldnn::kernels_cache ]
// [ non executable primitive_inst ]
// [ executable primitive_inst ]
// [ memory reuse information ]
void network::save(cldnn::BinaryOutputBuffer& ob) {
auto& kernels_cache = _program->get_kernels_cache();
kernels_cache.reset();
for (const auto& p_inst : _exec_order) {
if (p_inst->get_impl() != nullptr) {
auto const_impl = static_cast<const primitive_impl*>(p_inst->get_impl());
kernels_cache.add_to_cached_kernels(const_impl->get_kernels());
}
}
ob << kernels_cache;
int num_data_nodes = 0;
for (const auto& p_inst : _primitives) {
if (p_inst.second->type() == cldnn::data::type_id() ||
(p_inst.second->type() == cldnn::mutable_data::type_id() && p_inst.second->get_impl() == nullptr)) {
num_data_nodes += 1;
}
}
ob << num_data_nodes;
for (const auto& p_inst : _primitives) {
if (p_inst.second->type() == cldnn::data::type_id() ||
(p_inst.second->type() == cldnn::mutable_data::type_id() && p_inst.second->get_impl() == nullptr)) {
ob << p_inst.second->get_node().get_primitive()->type_string();
ob << p_inst.second->id();
ob << *(p_inst.second);
}
}
size_t exec_order_size = _exec_order.size();
ob << exec_order_size;
std::unordered_map<primitive_id, size_t> exec_order_num;
size_t i = exec_order_size;
for (const auto& p_inst : _exec_order) {
exec_order_num[p_inst->id()] = --i;
}
std::vector<std::shared_ptr<primitive_inst>> insts_to_allocate(_exec_order.begin(), _exec_order.end());
std::sort(insts_to_allocate.begin(),
insts_to_allocate.end(),
[&exec_order_num, &exec_order_size](std::shared_ptr<primitive_inst> const& lhs, std::shared_ptr<primitive_inst> const& rhs) {
size_t lhs_size = (lhs->mem_allocated()) ? (lhs->get_output_layout().bytes_count() + exec_order_size) : exec_order_num[lhs->id()];
size_t rhs_size = (rhs->mem_allocated()) ? (rhs->get_output_layout().bytes_count() + exec_order_size) : exec_order_num[rhs->id()];
return (lhs_size > rhs_size);
});
for (const auto& p_inst : insts_to_allocate) {
ob << p_inst->get_node().get_primitive()->type_string();
}
for (const auto& p_inst : insts_to_allocate) {
ob << *p_inst;
}
std::vector<primitive_id> exec_order_ids;
for (const auto& p_inst : _exec_order) {
exec_order_ids.emplace_back(p_inst->id());
}
ob << exec_order_ids;
std::map<std::string, std::string> reuse_map;
auto& po = _program->get_processing_order();
for (auto const& node : po) {
if (node->get_preferred_impl_type() == impl_types::onednn) {
size_t eltw_dep = 0;
for (auto& fused_op : node->get_fused_primitives()) {
if (fused_op.is_type<eltwise>() && fused_op.deps.size() == 1) {
// If it is first sum, reuse the buffer
auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(*node, fused_op);
if (fusing_type != add_fusing_type::sum || eltw_dep != 0)
continue;
if (!fused_op.has_outer_dep())
continue;
eltw_dep = fused_op.outer_dep_start_idx;
auto& eltw_in = node->get_dependency(eltw_dep);
if (_primitives.find(eltw_in.id()) != _primitives.end() && _primitives.find(node->id()) != _primitives.end()) {
reuse_map[node->id()] = eltw_in.id();
}
}
}
}
}
ob << reuse_map;
auto& prims_info = get_primitives_info();
ob << prims_info.size();
for (auto& prim_info : prims_info) {
ob << prim_info.original_id;
ob << prim_info.type_id;
ob << prim_info.c_dependencies;
ob << prim_info.c_users;
ob << prim_info.c_fused_ids;
ob << prim_info.output_layout;
ob << prim_info.layout_str;
ob << prim_info.kernel_id;
ob << make_data(&prim_info.runtime_precision, sizeof(data_types));
ob << prim_info.is_cpu;
ob << prim_info.exec_id;
}
ob << get_ext_id_mapping();
kernels_cache.reset();
}
network::ptr network::allocate_network(stream::ptr stream, program::ptr program, bool is_internal, bool is_primary_stream) {
return std::make_shared<network>(program, program->get_config(), stream, is_internal, is_primary_stream);
}

View File

@ -89,4 +89,29 @@ bool program::nodes_ordering::is_correct(program_node* node) {
}
return true;
}
void program::nodes_ordering::save(cldnn::BinaryOutputBuffer& ob) const {
ob << _processing_order.size();
auto itr = rbegin();
while (itr != rend()) {
auto& node = *itr;
ob << node->id();
itr++;
}
}
void program::nodes_ordering::load(cldnn::BinaryInputBuffer& ib, program& p) {
size_t num_nodes;
ib >> num_nodes;
clear();
for (size_t i = 0; i < num_nodes; ++i) {
primitive_id node_id;
ib >> node_id;
auto node = p.get_node_ptr(node_id).get();
_processing_order.push_front(node);
processing_order_iterators[node] = _processing_order.begin();
}
}
} // namespace cldnn

View File

@ -1666,245 +1666,4 @@ std::string primitive_inst::get_implementation_name() const {
return "undef";
}
static primitive_id find_dep_by_mem(const cldnn::primitive_inst* p_inst, memory& mem_ptr, int max_dist = 5) {
std::vector<std::pair<primitive_id, int>> queue;
size_t head = 0;
for (auto& p_inst : p_inst->dependencies())
queue.emplace_back(std::make_pair(p_inst.first->id(), 0));
const network& const_network = p_inst->get_network();
while (head < queue.size()) {
auto curr_item = queue.at(head);
auto curr_prim = const_network.get_primitive(curr_item.first);
if (p_inst->get_network().get_engine().is_the_same_buffer(mem_ptr, curr_prim->output_memory()))
return curr_prim->id();
if (max_dist > curr_item.second)
for (auto& p_inst : curr_prim->dependencies())
queue.emplace_back(std::make_pair(p_inst.first->id(), curr_item.second+1));
head += 1;
}
return "NOT_FOUND";
}
// Cache blob format:
// [ kernel_impl_params ]
// [ primitive_impl ]
// [ member variables of primitive_inst ]
// [ output memory information ]
// [ memory dependency information ]
// [ execution dependency information ]
// [ intermediate memory information ]
void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
_impl_params->save(ob);
ob.setKernelImplParams(_impl_params.get());
ob << _node_output_layout;
ob << has_mutable_input();
ob << mem_allocated();
ob << is_dynamic();
ob << _node->get_primitive()->type_string();
ob << id();
ob << org_id();
ob << is_input();
ob << is_output();
ob << inputs_memory_count();
ob << outputs_memory_count();
ob << get_fused_mem_count();
ob << get_fused_mem_offset();
ob << can_be_optimized();
ob << can_share_buffer();
ob << is_constant();
ob << needs_completion_event();
if (type() == cldnn::data::type_id()) {
return;
}
ob << _outputs.size();
for (size_t i = 0; i < _outputs.size(); ++i) {
if (_outputs[i] == nullptr) {
ob << true;
} else {
ob << false;
ob << _outputs[i]->get_layout();
const auto _allocation_type = _outputs[i]->get_allocation_type();
ob << make_data(&_allocation_type, sizeof(_allocation_type));
}
}
bool can_reuse_memory = true;
if (user_requesting_mem_reuse_false(*_node)) {
can_reuse_memory = false;
}
ob << can_reuse_memory;
ob << _node->get_memory_dependencies();
ob << _deps.size();
for (const auto& dep : _deps) {
ob << dep.first->id();
ob << dep.second;
}
ob << _exec_deps.size();
for (const auto& dep : _exec_deps) {
ob << dep->id();
}
for (size_t i = 0; i < _outputs.size(); ++i) {
if (_outputs[i] != nullptr) {
if (!mem_allocated())
ob << find_dep_by_mem(this, output_memory(i));
}
}
ob << _intermediates_memory.size();
for (const auto& ibuf : _intermediates_memory) {
ob << ibuf->get_layout();
const auto _allocation_type = ibuf->get_allocation_type();
ob << make_data(&_allocation_type, sizeof(_allocation_type));
}
if (_impl != nullptr) {
ob << true;
_impl->set_cached_kernel_ids(_network.get_program()->get_kernels_cache());
ob << _impl;
} else {
ob << false;
}
}
int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const {
for (uint32_t idx = 0; idx < _deps.size(); ++idx) {
if (arg == dep_memory_ptr(idx))
return idx;
}
OPENVINO_THROW("[get_index_in_deps]: not found in _deps");
}
void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
_impl_params->load(ib);
ib.setKernelImplParams(_impl_params.get());
ib >> _node_output_layout;
ib >> _has_mutable_input;
ib >> _mem_allocated;
ib >> _is_dynamic;
std::string type_str;
ib >> type_str;
_type = cldnn::prim_map_storage::instance().get_type_id(type_str);
ib >> _id;
ib >> _org_id;
ib >> _is_input;
ib >> _is_output;
ib >> _inputs_memory_count;
ib >> _outputs_memory_count;
ib >> _fused_mem_count;
ib >> _fused_mem_offset;
ib >> _can_be_optimized;
ib >> _can_share_buffer;
ib >> _is_constant;
ib >> _needs_completion_event;
if (type() == cldnn::data::type_id()) {
return;
}
// mem_allocated : it is true if the output memory is allocated by this layer, and
// false if this layer reuses output memory that is allocated by other layer.
// is_output_null : it is true if the output memory is not allocated yet and false otherwise.
size_t num_outputs;
std::vector<bool> is_output_null;
std::vector<layout> output_layouts;
std::vector<allocation_type> allocation_types;
ib >> num_outputs;
is_output_null.resize(num_outputs);
for (size_t i = 0; i < num_outputs; ++i) {
bool is_null;
ib >> is_null;
is_output_null[i] = is_null;
if (!is_null) {
layout output_layout = layout();
ib >> output_layout;
output_layouts.emplace_back(output_layout);
allocation_type _allocation_type = allocation_type::unknown;
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
allocation_types.emplace_back(_allocation_type);
}
}
bool can_reuse_memory;
ib >> can_reuse_memory;
std::set<primitive_id> _node_mem_deps;
ib >> _node_mem_deps;
_runtime_memory_dependencies = _node_mem_deps;
size_t vector_size = 0UL;
ib >> vector_size;
for (size_t i = 0; i < vector_size; ++i) {
primitive_id dep_id;
int32_t dep_idx;
ib >> dep_id >> dep_idx;
_dep_ids.emplace_back(std::pair<primitive_id, int32_t>(dep_id, dep_idx));
}
ib >> vector_size;
_exec_dep_ids.resize(vector_size);
for (auto& el : _exec_dep_ids) {
ib >> el;
}
_outputs.resize(num_outputs);
for (size_t i = 0; i < num_outputs; ++i) {
_outputs[i] = nullptr;
if (!is_output_null[i]) {
if (!_mem_allocated) {
std::string dep_id;
ib >> dep_id;
if (dep_id.compare("NOT_FOUND") != 0 && get_network().get_primitive(dep_id)->output_memory_ptr() != nullptr) {
_outputs[i] = get_network().get_engine().reinterpret_buffer(get_network().get_primitive(dep_id)->output_memory(), output_layouts[i]);
} else if (type() == cldnn::mutable_data::type_id()) {
_outputs[i] = get_network().get_engine().allocate_memory(output_layouts[i], allocation_types[i]);
}
} else {
if ((!can_share_buffer()) || can_be_optimized() || is_output()) {
_outputs[i] = get_network().get_engine().allocate_memory(output_layouts[i], allocation_types[i]);
} else {
_outputs[i] = get_network().get_memory_pool().get_memory(output_layouts[i], id(), get_network_id(), _node_mem_deps,
allocation_types[i], can_reuse_memory);
}
}
}
}
_output_changed = false;
ib >> vector_size;
_intermediates_memory.resize(vector_size);
for (size_t i = 0; i < vector_size; i++) {
layout ibuf_layout = layout();
ib >> ibuf_layout;
allocation_type _allocation_type;
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
_intermediates_memory[i] = get_network().get_memory_pool().get_memory(ibuf_layout, id(), get_network_id(),
_node_mem_deps, _allocation_type, true, true);
}
bool has_impl;
ib >> has_impl;
if (has_impl) {
_impl.reset();
ib >> _impl;
}
}
} // namespace cldnn

View File

@ -1683,3 +1683,245 @@ void program::cancel_compilation_context() {
if (_compilation_context != nullptr)
_compilation_context->cancel();
}
void program::save(cldnn::BinaryOutputBuffer& ob) const {
std::map<cldnn::memory::ptr, std::vector<const cldnn::program_node*>> mutable_datas_ptrs;
ob << nodes_map.size();
for (auto& node : nodes_map) {
ob.setKernelImplParams(node.second->get_kernel_impl_params().get());
if (node.second->is_type<data>() && node.second->as<data>().get_primitive()->mem == nullptr) {
auto& data_node = node.second->as<data>();
if (data_node.get_attached_memory_ptr() == nullptr) {
ob << false;
continue;
} else {
node.second->as<data>().typed_desc()->mem = data_node.get_attached_memory_ptr();
}
}
ob << true;
ob << node.second->desc;
if (node.second->is_type<mutable_data>()) {
mutable_datas_ptrs[node.second->as<mutable_data>().get_attached_memory_ptr()].push_back(node.second.get());
}
}
std::list<std::pair<primitive_id, primitive_id>> output_sharing_mutable_datas;
for (auto item : mutable_datas_ptrs) {
if (item.second.size() != 2)
continue;
output_sharing_mutable_datas.push_back({item.second[0]->id(), item.second[1]->id()});
}
ob << output_sharing_mutable_datas.size();
for (auto& shared_mem_pair : output_sharing_mutable_datas) {
ob << shared_mem_pair.first;
ob << shared_mem_pair.second;
}
for (auto& node : nodes_map) {
ob << node.first;
node.second->save(ob);
ob << node.second->get_dependant_shape_of_nodes().size();
for (auto& dep_node : node.second->get_dependant_shape_of_nodes()) {
ob << dep_node->id();
}
}
ob << inputs.size();
for (auto& input : inputs) {
ob << input->id();
}
ob << outputs.size();
for (auto& output : outputs) {
ob << output->id();
}
ob << _is_body_program;
ob << _can_be_optimized;
get_processing_order().save(ob);
{
auto& kernels_cache = get_kernels_cache();
std::vector<primitive_id> impl_ids;
for (auto& node : get_processing_order()) {
if (node->get_selected_impl() != nullptr) {
impl_ids.emplace_back(node->id());
kernels_cache.add_to_cached_kernels(node->get_selected_impl()->get_kernels());
}
}
ob << kernels_cache;
ob << impl_ids;
for (auto& impl_id : impl_ids) {
if (get_node_ptr(impl_id)->get_selected_impl()->is_onednn()) {
ob << true;
auto params = get_node_ptr(impl_id)->get_kernel_impl_params();
ob.setKernelImplParams(params.get());
ob << get_node_ptr(impl_id)->selected_impl;
} else {
ob << false;
ob << get_node_ptr(impl_id)->selected_impl;
}
ob << get_node_ptr(impl_id)->get_selected_impl()->get_cached_kernel_ids(kernels_cache);
}
}
ob << optimized_out.size();
for (auto& opt_prim : optimized_out) {
ob << opt_prim;
}
ob << prim_info.size();
for (auto& p_info : prim_info) {
ob << p_info.original_id;
ob << p_info.type_id;
ob << p_info.c_dependencies;
ob << p_info.c_users;
ob << p_info.c_fused_ids;
ob << p_info.output_layout;
ob << p_info.layout_str;
ob << p_info.kernel_id;
ob << make_data(&p_info.runtime_precision, sizeof(data_types));
ob << p_info.is_cpu;
ob << p_info.exec_id;
}
}
void program::load(cldnn::BinaryInputBuffer& ib) {
init_program();
size_t num_nodes;
ib >> num_nodes;
bool is_valid_data_node;
for (size_t i = 0; i < num_nodes; ++i) {
ib >> is_valid_data_node;
if (!is_valid_data_node)
continue;
std::shared_ptr<cldnn::primitive> prim;
ib >> prim;
get_or_create(prim);
}
size_t num_output_sharing_mutable_datas;
ib >> num_output_sharing_mutable_datas;
for (size_t i = 0; i < num_output_sharing_mutable_datas; ++i) {
primitive_id md_id1, md_id2;
ib >> md_id1;
ib >> md_id2;
auto& md_node1 = get_node(md_id1).as<mutable_data>();
auto& md_node2 = get_node(md_id2).as<mutable_data>();
md_node2.typed_desc()->mem = md_node1.typed_desc()->mem;
md_node2.replace_memory(md_node2.typed_desc()->mem);
}
for (size_t i = 0; i < num_nodes; ++i) {
primitive_id prim_id;
ib >> prim_id;
auto& p_node = get_node(prim_id);
p_node.load(ib);
size_t num_dep_nodes;
ib >> num_dep_nodes;
for (size_t i = 0; i < num_dep_nodes; ++i) {
ib >> prim_id;
auto& dep_node = get_node(prim_id);
p_node.add_dependant_shape_of_node(&dep_node);
}
}
ib >> num_nodes;
inputs.clear();
for (size_t i = 0; i < num_nodes; ++i) {
primitive_id prim_id;
ib >> prim_id;
auto& p_node = get_node(prim_id);
inputs.emplace_back(&p_node);
}
ib >> num_nodes;
outputs.clear();
for (size_t i = 0; i < num_nodes; ++i) {
primitive_id prim_id;
ib >> prim_id;
auto& p_node = get_node(prim_id);
outputs.emplace_back(&p_node);
}
ib >> _is_body_program;
ib >> _can_be_optimized;
get_processing_order().load(ib, *this);
{
auto& kernels_cache = get_kernels_cache();
ib >> kernels_cache;
std::vector<primitive_id> impl_ids;
ib >> impl_ids;
for (auto& impl_id : impl_ids) {
auto& p_node = get_node(impl_id);
bool is_onednn;
ib >> is_onednn;
if (is_onednn) {
auto params = p_node.get_kernel_impl_params();
ib.setKernelImplParams(params.get());
ib >> p_node.selected_impl;
} else {
ib >> p_node.selected_impl;
}
std::vector<std::string> cached_kernel_ids;
ib >> cached_kernel_ids;
p_node.selected_impl->init_by_cached_kernels(get_kernels_cache(), cached_kernel_ids);
}
}
size_t optimized_out_size;
ib >> optimized_out_size;
optimized_out.clear();
for (size_t i = 0; i < optimized_out_size; i++) {
primitive_id opt_prim;
ib >> opt_prim;
optimized_out.emplace_back(opt_prim);
}
size_t prims_info_size;
ib >> prims_info_size;
prim_info.clear();
for (size_t i = 0; i < prims_info_size; i++) {
primitive_id original_id;
std::string type_id;
primitive::primitive_id_arr c_dependencies;
primitive::primitive_id_arr c_users;
primitive::primitive_id_arr c_fused_ids;
layout output_layout;
std::string layout_str;
std::string kernel_id;
data_types runtime_precision;
bool is_cpu;
int exec_id;
ib >> original_id;
ib >> type_id;
ib >> c_dependencies;
ib >> c_users;
ib >> c_fused_ids;
ib >> output_layout;
ib >> layout_str;
ib >> kernel_id;
ib >> make_data(&runtime_precision, sizeof(data_types));
ib >> is_cpu;
ib >> exec_id;
primitive_info p_info(original_id, type_id, c_dependencies, c_users, c_fused_ids,
output_layout, layout_str, kernel_id, runtime_precision, is_cpu, exec_id);
prim_info.emplace_back(p_info);
}
}

View File

@ -7,6 +7,9 @@
#include "primitive_inst.h"
#include "loop_inst.h"
#include "shape_of_inst.h"
#include "activation_inst.h"
#include "reorder_inst.h"
#include "quantize_inst.h"
#include "intel_gpu/runtime/debug_configuration.hpp"
#ifdef ENABLE_ONEDNN_FOR_GPU
#include "convolution_inst.h"
@ -534,6 +537,356 @@ void program_node::add_dependant_shape_of_node(const program_node* node) {
dependant_shape_of_nodes.insert(node);
}
void program_node::save(cldnn::BinaryOutputBuffer& ob) const {
ob << valid_output_layouts;
ob << output_layouts;
ob << preferred_input_fmts.size();
for (auto preferred_input_fmt : preferred_input_fmts) {
int32_t format_type_int = preferred_input_fmt;
ob << format_type_int;
}
ob << preferred_output_fmts.size();
for (auto preferred_output_fmt : preferred_output_fmts) {
int32_t format_type_int = preferred_output_fmt;
ob << format_type_int;
}
ob << dependencies.size();
for (const auto& dep_pair : dependencies) {
ob << dep_pair.first->id();
ob << dep_pair.second;
}
ob << users.size();
for (const auto& user_node : users) {
ob << user_node->id();
}
ob << memory_dependencies;
ob << make_data(&impl_type, sizeof(impl_type));
ob << constant;
ob << data_flow;
ob << in_shape_of_subgraph;
ob << output;
ob << user_mark;
ob << optimized;
ob << share_buffer;
for (const auto& _support_padding : _support_padding_in_axis) {
ob << _support_padding;
}
ob << has_reused_memory;
ob << reused_memory_color;
// fused_prims;
{
ob << fused_prims.size();
for (auto& f_desc : fused_prims) {
if (get_program().has_node(f_desc.desc->id)) {
ob << true;
ob << f_desc.desc->id;
} else {
ob << false;
ob << f_desc.desc;
}
ob << f_desc.input_layout;
ob << f_desc.output_layout;
ob << cldnn::prim_map_storage::instance().get_type_string(f_desc.f_param->type());
if (f_desc.f_param->type() == activation::type_id()) {
auto casted = std::dynamic_pointer_cast<ActivationFuseParams>(f_desc.f_param);
if (get_program().has_node(casted->_desc->id)) {
ob << true;
ob << casted->_desc->id;
} else {
ob << false;
ob << casted->_desc;
}
} else if (f_desc.f_param->type() == reorder::type_id()) {
auto casted = std::dynamic_pointer_cast<ReorderFuseParams>(f_desc.f_param);
ob << casted->_in;
ob << casted->_out;
} else if (f_desc.f_param->type() == eltwise::type_id()) {
auto casted = std::dynamic_pointer_cast<EltwiseFuseParams>(f_desc.f_param);
if (get_program().has_node(casted->_desc->id)) {
ob << true;
ob << casted->_desc->id;
} else {
ob << false;
ob << casted->_desc;
}
} else if (f_desc.f_param->type() == quantize::type_id()) {
auto casted = std::dynamic_pointer_cast<QuantizeFuseParams>(f_desc.f_param);
ob << casted->_out_layout;
ob << casted->_scale_shift_opt;
ob << casted->_need_post_scale;
ob << casted->_need_post_shift;
ob << casted->_need_pre_shift;
ob << casted->_need_clamp;
ob << casted->_need_min_clamp;
ob << casted->_need_max_clamp;
ob << casted->_per_tensor_input_range;
ob << casted->_per_tensor_input_scale;
ob << casted->_per_tensor_input_shift;
ob << casted->_per_tensor_output_range;
ob << casted->_per_tensor_output_scale;
ob << casted->_per_tensor_output_shift;
ob << casted->_in_lo;
ob << casted->_in_hi;
ob << casted->_in_scale;
ob << casted->_in_shift;
ob << casted->_out_lo;
ob << casted->_out_hi;
ob << casted->_out_scale;
ob << casted->_out_shift;
}
ob << f_desc.deps.size();
for (auto& dep : f_desc.deps) {
ob << dep.first;
ob << dep.second;
}
ob << f_desc.fused_deps.size();
for (auto& f_dep : f_desc.fused_deps) {
ob << f_dep.first;
ob << f_dep.second;
}
ob << f_desc.outer_dep_start_idx;
ob << f_desc.total_num_deps;
}
}
#ifdef ENABLE_ONEDNN_FOR_GPU
size_t num_fused_prims = fused_prims_onednn.size();
ob << num_fused_prims;
for (auto fused_prim : fused_prims_onednn) {
ob << make_data(&fused_prim.op_type, sizeof(onednn_post_op_type));
ob << fused_prim.mem_offset;
ob << fused_prim.mem_dep;
ob << make_data(&fused_prim.tag, sizeof(dnnl::memory::format_tag));
ob << fused_prim.flatten;
ob << fused_prim.dims;
ob << make_data(&fused_prim.dt, sizeof(dnnl::memory::data_type));
}
#endif // ENABLE_ONEDNN_FOR_GPU
}
void program_node::load(cldnn::BinaryInputBuffer& ib) {
ib >> valid_output_layouts;
ib >> output_layouts;
{
// preferred_input_fmts
size_t preferred_input_fmts_size;
int32_t format_type_int;
ib >> preferred_input_fmts_size;
preferred_input_fmts.clear();
for (size_t i = 0; i < preferred_input_fmts_size; ++i) {
ib >> format_type_int;
preferred_input_fmts.push_back((format::type) format_type_int);
}
}
{
// preferred_input_fmts
size_t preferred_output_fmts_size;
int32_t format_type_int;
ib >> preferred_output_fmts_size;
preferred_output_fmts.clear();
for (size_t i = 0; i < preferred_output_fmts_size; ++i) {
ib >> format_type_int;
preferred_output_fmts.push_back((format::type) format_type_int);
}
}
{
// dependencies
size_t deps_size;
primitive_id dep_id;
int32_t dep_idx;
ib >> deps_size;
dependencies.clear();
for (size_t i = 0; i < deps_size; ++i) {
ib >> dep_id;
ib >> dep_idx;
dependencies.emplace_back(std::make_pair(get_program().get_node_ptr(dep_id).get(), dep_idx));
}
}
{
// users
size_t users_size;
primitive_id user_id;
ib >> users_size;
users.clear();
users.resize(0);
for (size_t i = 0; i < users_size; ++i) {
ib >> user_id;
users.push_back(get_program().get_node_ptr(user_id).get());
}
}
ib >> memory_dependencies;
ib >> make_data(&impl_type, sizeof(impl_type));
ib >> constant;
ib >> data_flow;
ib >> in_shape_of_subgraph;
ib >> output;
ib >> user_mark;
ib >> optimized;
ib >> share_buffer;
for (auto& _support_padding : _support_padding_in_axis) {
ib >> _support_padding;
}
ib >> has_reused_memory;
ib >> reused_memory_color;
// fused_prims;
{
size_t fused_desc_size;
ib >> fused_desc_size;
for (size_t i = 0; i < fused_desc_size; ++i) {
bool exist_prim;
ib >> exist_prim;
std::shared_ptr<const primitive> desc;
if (exist_prim) {
primitive_id desc_id;
ib >> desc_id;
desc = get_program().get_node_ptr(desc_id)->desc;
} else {
ib >> desc;
}
auto f_desc = fused_primitive_desc(desc);
ib >> f_desc.input_layout;
ib >> f_desc.output_layout;
std::string f_param_type_str;
ib >> f_param_type_str;
auto f_param_type = cldnn::prim_map_storage::instance().get_type_id(f_param_type_str);
if (f_param_type == activation::type_id()) {
ib >> exist_prim;
std::shared_ptr<activation> param_desc;
if (exist_prim) {
primitive_id desc_id;
ib >> desc_id;
param_desc = std::dynamic_pointer_cast<activation>(get_program().get_node_ptr(desc_id)->desc);
} else {
ib >> param_desc;
}
f_desc.f_param = std::make_shared<ActivationFuseParams>(param_desc);
} else if (f_param_type == reorder::type_id()) {
layout in, out;
ib >> in;
ib >> out;
f_desc.f_param = std::make_shared<ReorderFuseParams>(in, out);
} else if (f_param_type == eltwise::type_id()) {
ib >> exist_prim;
std::shared_ptr<eltwise> param_desc;
if (exist_prim) {
primitive_id desc_id;
ib >> desc_id;
param_desc = std::dynamic_pointer_cast<eltwise>(get_program().get_node_ptr(desc_id)->desc);
} else {
ib >> param_desc;
}
f_desc.f_param = std::make_shared<EltwiseFuseParams>(param_desc);
} else if (f_param_type == quantize::type_id()) {
layout out_layout;
bool scale_shift_opt;
bool need_post_scale;
bool need_post_shift;
bool need_pre_shift;
bool need_clamp;
bool need_min_clamp;
bool need_max_clamp;
bool per_tensor_input_range;
bool per_tensor_input_scale;
bool per_tensor_input_shift;
bool per_tensor_output_range;
bool per_tensor_output_scale;
bool per_tensor_output_shift;
float in_lo;
float in_hi;
float in_scale;
float in_shift;
float out_lo;
float out_hi;
float out_scale;
float out_shift;
ib >> out_layout;
ib >> scale_shift_opt;
ib >> need_post_scale;
ib >> need_post_shift;
ib >> need_pre_shift;
ib >> need_clamp;
ib >> need_min_clamp;
ib >> need_max_clamp;
ib >> per_tensor_input_range;
ib >> per_tensor_input_scale;
ib >> per_tensor_input_shift;
ib >> per_tensor_output_range;
ib >> per_tensor_output_scale;
ib >> per_tensor_output_shift;
ib >> in_lo;
ib >> in_hi;
ib >> in_scale;
ib >> in_shift;
ib >> out_lo;
ib >> out_hi;
ib >> out_scale;
ib >> out_shift;
f_desc.f_param = std::make_shared<QuantizeFuseParams>(out_layout, scale_shift_opt, need_post_scale, need_post_shift,
need_pre_shift, need_clamp, need_min_clamp, need_max_clamp, per_tensor_input_range,
per_tensor_input_scale, per_tensor_input_shift, per_tensor_output_range, per_tensor_output_scale,
per_tensor_output_shift, in_lo, in_hi, in_scale, in_shift, out_lo, out_hi, out_scale, out_shift);
} else {
f_desc.f_param = std::make_shared<NodeFuseParams>(f_param_type);
}
size_t num_deps;
primitive_id prim_id;
size_t idx;
ib >> num_deps;
f_desc.deps.clear();
for (size_t i = 0; i < num_deps; ++i) {
ib >> prim_id;
ib >> idx;
f_desc.deps.emplace_back(std::make_pair(prim_id, idx));
}
ib >> num_deps;
f_desc.fused_deps.clear();
for (size_t i = 0; i < num_deps; ++i) {
ib >> prim_id;
ib >> idx;
f_desc.fused_deps[prim_id] = idx;
}
ib >> f_desc.outer_dep_start_idx;
ib >> f_desc.total_num_deps;
fused_prims.emplace_back(f_desc);
}
}
#ifdef ENABLE_ONEDNN_FOR_GPU
size_t num_fused_prims;
ib >> num_fused_prims;
fused_prims_onednn.resize(num_fused_prims);
for (size_t idx = 0; idx < num_fused_prims; ++idx) {
ib >> make_data(&fused_prims_onednn[idx].op_type, sizeof(onednn_post_op_type));
ib >> fused_prims_onednn[idx].mem_offset;
ib >> fused_prims_onednn[idx].mem_dep;
ib >> make_data(&fused_prims_onednn[idx].tag, sizeof(dnnl::memory::format_tag));
ib >> fused_prims_onednn[idx].flatten;
ib >> fused_prims_onednn[idx].dims;
ib >> make_data(&fused_prims_onednn[idx].dt, sizeof(dnnl::memory::data_type));
}
#endif // ENABLE_ONEDNN_FOR_GPU
}
/* ----------------------------------------- */
/* Onednn fused operations integration logic */
/* ----------------------------------------- */

View File

@ -198,14 +198,4 @@ static void generate_anchors(unsigned int base_size,
}
}
}
void proposal_inst::save(BinaryOutputBuffer& ob) const {
parent::save(ob);
ob << _anchors;
}
void proposal_inst::load(BinaryInputBuffer& ib) {
parent::load(ib);
ib >> _anchors;
}
} // namespace cldnn

View File

@ -42,18 +42,4 @@ void read_value_inst::update_output_memory() {
const auto& variable = get_network().get_variable(variable_id());
set_output_memory(variable.get_memory(), false, 0);
}
void read_value_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);
ob << variable_id();
}
void read_value_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);
std::string variable_id;
ib >> variable_id;
set_variable_id(variable_id);
}
} // namespace cldnn

View File

@ -275,14 +275,4 @@ void reorder_inst::update_output_memory() {
}
_mem_allocated = false;
}
void reorder_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);
ob << _req_reinterpr;
}
void reorder_inst::load(cldnn::BinaryInputBuffer& ib) {
parent::load(ib);
ib >> _req_reinterpr;
}
} // namespace cldnn

View File

@ -57,6 +57,7 @@ public:
return DeviceFeaturesKey();
}
virtual const std::string GetName() const { return kernelName; }
virtual void GetUpdateDispatchDataFunc(KernelData& kd) const { }
static const primitive_db& get_db() { return db; }

View File

@ -145,6 +145,14 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, co
return GetNaiveBestKernel(allImplementations, params, options);
}
std::shared_ptr<KernelBase> kernel_selector_base::GetImplementation(std::string& kernel_name) const {
for (auto& impl : implementations) {
if (impl->GetName().compare(kernel_name) == 0)
return impl;
}
return nullptr;
}
KernelList kernel_selector_base::GetAllImplementations(const Params& params, const optional_params& options, KernelType kType) const {
using PriorityPair = std::pair<KernelsPriority, std::shared_ptr<KernelBase>>;
auto comparePriority = [](const PriorityPair& firstImpl, const PriorityPair& secondImpl) {

View File

@ -23,6 +23,7 @@ public:
virtual ~kernel_selector_base() {}
KernelData get_best_kernel(const Params& params, const optional_params& options) const;
std::shared_ptr<KernelBase> GetImplementation(std::string& kernel_name) const;
protected:
template <typename T>

View File

@ -81,6 +81,17 @@ bool ActivationKernelBase::Validate(const Params& p, const optional_params& o) c
return true;
}
void ActivationKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const activation_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
}
KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
@ -94,14 +105,7 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const activation_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
GetUpdateDispatchDataFunc(kd);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,

View File

@ -57,5 +57,6 @@ protected:
virtual JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const;
virtual DispatchData SetDefault(const activation_params& arg) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
};
} // namespace kernel_selector

View File

@ -113,15 +113,7 @@ ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelAxis::SetDefault(const arg_max_
return dispatchData;
}
KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);
bool is_dynamic = orgParams.has_dynamic_tensors();
auto dispatchData = SetDefault(orgParams);
KernelData kd = KernelData::Default<arg_max_min_params>(params);
void ArgMaxMinKernelAxis::GetUpdateDispatchDataFunc(KernelData& kd) const {
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const arg_max_min_params&>(params);
auto dispatchData = SetDefault(prim_params);
@ -143,6 +135,18 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
kd.internalBufferSizes.push_back(ops_size * elem_size);
kd.internalBufferDataType = prim_params.inputs[0].GetDType();
};
}
KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);
bool is_dynamic = orgParams.has_dynamic_tensors();
auto dispatchData = SetDefault(orgParams);
KernelData kd = KernelData::Default<arg_max_min_params>(params);
GetUpdateDispatchDataFunc(kd);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);

View File

@ -19,5 +19,6 @@ public:
ParamsKey GetSupportedKey() const override;
private:
bool Validate(const Params&, const optional_params&) const override;
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
};
} // namespace kernel_selector

View File

@ -40,6 +40,17 @@ ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelBase::SetDefault(const arg_max_
return dispatchData;
}
void ArgMaxMinKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const arg_max_min_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
}
KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
@ -50,14 +61,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
DispatchData dispatchData = SetDefault(orgParams);
KernelData kd = KernelData::Default<arg_max_min_params>(params);
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const arg_max_min_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
GetUpdateDispatchDataFunc(kd);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);

View File

@ -55,5 +55,6 @@ protected:
virtual JitConstants GetJitConstants(const arg_max_min_params& params) const;
virtual DispatchData SetDefault(const arg_max_min_params& params) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
};
} // namespace kernel_selector

View File

@ -64,6 +64,17 @@ bool BorderKernelBase::SkipKernelExecution(const border_params& params) const {
return params.outputs[0].LogicalSize() == 0;
}
void BorderKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const border_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
};
}
KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
const optional_params& options) const {
assert(params.GetType() == KernelType::BORDER);
@ -73,14 +84,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
auto dispatchData = SetDefault(prim_params);
KernelData k_data = KernelData::Default<border_params>(params);
k_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const border_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
};
GetUpdateDispatchDataFunc(k_data);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);

View File

@ -55,5 +55,6 @@ protected:
DispatchData SetDefault(const border_params& params) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
bool SkipKernelExecution(const border_params& params) const;
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
};
} // namespace kernel_selector

View File

@ -76,6 +76,17 @@ static std::string GetInputBlockND(const broadcast_params& params) {
return str_result;
}
void BroadcastKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
kd.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const broadcast_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
}
KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
const optional_params& options) const {
assert(params.GetType() == KernelType::BROADCAST);
@ -84,15 +95,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
auto dispatchData = SetDefault(prim_params);
KernelData k_data = KernelData::Default<broadcast_params>(params);
k_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const broadcast_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
GetUpdateDispatchDataFunc(k_data);
auto cldnn_jit = GetJitConstants(prim_params);
cldnn_jit.AddConstant(MakeJitConstant("INPUT0_BLOCK_ND", GetInputBlockND(prim_params)));

View File

@ -37,5 +37,6 @@ protected:
JitConstants GetJitConstants(const broadcast_params& params) const;
static DispatchData SetDefault(const broadcast_params& params);
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
};
} // namespace kernel_selector

View File

@ -100,14 +100,7 @@ ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const
return dispatchData;
}
KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
const concatenation_params& orgParams = static_cast<const concatenation_params&>(params);
KernelData kd = KernelData::Default<concatenation_params>(params, orgParams.inputs.size());
void ConcatenationKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const concatenation_params&>(params);
uint32_t lastOffset = 0;
@ -139,6 +132,16 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
lastOffset += (uint32_t)input.GetDims()[concatChannelIndex].v;
}
};
}
KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
const concatenation_params& orgParams = static_cast<const concatenation_params&>(params);
KernelData kd = KernelData::Default<concatenation_params>(params, orgParams.inputs.size());
GetUpdateDispatchDataFunc(kd);
bool is_dynamic = orgParams.has_dynamic_tensors();
uint32_t lastOffset = 0;

Some files were not shown because too many files have changed in this diff Show More