[GPU] cldnn::program serialization (#21290)
* cldnn::program serialization * fixed to set a stream in a outputbuffer * removed unnecessary comments * replaced emplace_back with push_back * updated bool vector serializer * fixed several bugs * removed m_model from compiled_model * fixed streams_test for model caching * updated mutable_data handling logic * added a new API SetUpdateDispatchDataFunc * updated to serialize weights_reorder_param * added functional tests for dynamic model caching * renamed to GetUpdateDispatchDataFunc * added dynamic model caching tests
This commit is contained in:
parent
7ff517466c
commit
3e2c2c06af
@ -142,8 +142,6 @@ struct kernel_impl_params {
|
||||
|
||||
virtual primitive_type_id type() const { return desc->type; }
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const;
|
||||
void load(BinaryInputBuffer& ib);
|
||||
const program& get_program() const {
|
||||
OPENVINO_ASSERT(prog != nullptr, "[GPU] Program pointer in kernel_impl_params is not initialized");
|
||||
return *prog;
|
||||
|
@ -84,13 +84,8 @@ public:
|
||||
|
||||
network(program::ptr program, stream::ptr stream, uint16_t stream_id);
|
||||
|
||||
network(cldnn::BinaryInputBuffer& ifs, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id);
|
||||
network(cldnn::BinaryInputBuffer& ifs, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id);
|
||||
|
||||
~network();
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob);
|
||||
|
||||
static ptr build_network(engine& engine,
|
||||
const topology& topology,
|
||||
const ExecutionConfig& config = {},
|
||||
|
@ -98,6 +98,9 @@ public:
|
||||
_processing_order.erase(i);
|
||||
}
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const;
|
||||
void load(cldnn::BinaryInputBuffer& ib, program& p);
|
||||
|
||||
private:
|
||||
list_of_nodes _processing_order;
|
||||
std::map<program_node*, node_iterator> processing_order_iterators;
|
||||
@ -282,6 +285,9 @@ public:
|
||||
static std::shared_ptr<ov::threading::IStreamsExecutor> make_task_executor(const ExecutionConfig& config);
|
||||
static std::shared_ptr<ICompilationContext> make_compilation_context(const ExecutionConfig& config);
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const;
|
||||
void load(cldnn::BinaryInputBuffer& ib);
|
||||
|
||||
private:
|
||||
uint32_t prog_id = 0;
|
||||
engine& _engine;
|
||||
|
@ -18,7 +18,7 @@ struct memory;
|
||||
class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
|
||||
public:
|
||||
BinaryOutputBuffer(std::ostream& stream)
|
||||
: OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr) {}
|
||||
: OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {}
|
||||
|
||||
void write(void const * data, std::streamsize size) {
|
||||
auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
|
||||
@ -28,16 +28,19 @@ public:
|
||||
|
||||
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
|
||||
void* getKernelImplParams() const { return _impl_params; }
|
||||
void set_stream(void* strm) { _strm = strm; }
|
||||
void* get_stream() const { return _strm; }
|
||||
|
||||
private:
|
||||
std::ostream& stream;
|
||||
void* _impl_params;
|
||||
void* _strm;
|
||||
};
|
||||
|
||||
class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
|
||||
public:
|
||||
BinaryInputBuffer(std::istream& stream, engine& engine)
|
||||
: InputBuffer(this, engine), _stream(stream), _impl_params(nullptr) {}
|
||||
: InputBuffer<BinaryInputBuffer>(this, engine), _stream(stream), _impl_params(nullptr) {}
|
||||
|
||||
void read(void* const data, std::streamsize size) {
|
||||
auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
|
||||
@ -47,17 +50,6 @@ public:
|
||||
|
||||
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
|
||||
void* getKernelImplParams() const { return _impl_params; }
|
||||
void addConstData(const uint32_t net_id, const std::string& prim_id, const std::shared_ptr<memory> mem_ptr) {
|
||||
while (_const_data_map.size() <= net_id) {
|
||||
_const_data_map.emplace_back(std::unordered_map<std::string, std::shared_ptr<memory>>());
|
||||
}
|
||||
OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) == _const_data_map[net_id].end(), "[GPU] duplicated primitive id " + prim_id);
|
||||
_const_data_map[net_id][prim_id] = mem_ptr;
|
||||
}
|
||||
std::shared_ptr<memory> getConstData(const uint32_t net_id, const std::string& prim_id) {
|
||||
OPENVINO_ASSERT(_const_data_map[net_id].find(prim_id) != _const_data_map[net_id].end(), "[GPU] Not found primitive id " + prim_id);
|
||||
return _const_data_map[net_id][prim_id];
|
||||
}
|
||||
|
||||
std::streampos tellg() { return _stream.tellg(); }
|
||||
void seekg(std::streampos pos) { _stream.seekg(pos); }
|
||||
@ -65,7 +57,6 @@ public:
|
||||
private:
|
||||
std::istream& _stream;
|
||||
void* _impl_params;
|
||||
std::vector<std::unordered_map<std::string, std::shared_ptr<memory>>> _const_data_map;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
@ -44,9 +44,7 @@ public:
|
||||
static void save(BufferType& buffer, const cldnn::layout& _layout) {
|
||||
buffer << make_data(&_layout.data_type, sizeof(cldnn::data_types));
|
||||
buffer << make_data(&_layout.format, sizeof(cldnn::format));
|
||||
buffer << _layout.data_padding.filling_value();
|
||||
buffer << _layout.data_padding.lower_size().sizes();
|
||||
buffer << _layout.data_padding.upper_size().sizes();
|
||||
buffer << _layout.data_padding;
|
||||
buffer << _layout.get_partial_shape();
|
||||
}
|
||||
};
|
||||
@ -57,16 +55,7 @@ public:
|
||||
static void load(BufferType& buffer, cldnn::layout& _layout) {
|
||||
buffer >> make_data(&_layout.data_type, sizeof(cldnn::data_types));
|
||||
buffer >> make_data(&_layout.format, sizeof(cldnn::format));
|
||||
|
||||
{
|
||||
float _filling_value;
|
||||
buffer >> _filling_value;
|
||||
std::vector<cldnn::tensor::value_type> _lower_size;
|
||||
buffer >> _lower_size;
|
||||
std::vector<cldnn::tensor::value_type> _upper_size;
|
||||
buffer >> _upper_size;
|
||||
_layout.data_padding = cldnn::padding(_lower_size, _upper_size, _filling_value);
|
||||
}
|
||||
buffer >> _layout.data_padding;
|
||||
|
||||
ov::PartialShape partial_shape;
|
||||
buffer >> partial_shape;
|
||||
|
@ -37,6 +37,32 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <typename BufferType>
|
||||
class Serializer<BufferType, std::vector<bool>, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
|
||||
public:
|
||||
static void save(BufferType& buffer, const std::vector<bool>& vector) {
|
||||
buffer << vector.size();
|
||||
for (const bool el : vector) {
|
||||
buffer << el;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename BufferType>
|
||||
class Serializer<BufferType, std::vector<bool>, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
|
||||
public:
|
||||
static void load(BufferType& buffer, std::vector<bool>& vector) {
|
||||
typename std::vector<bool>::size_type vector_size = 0UL;
|
||||
buffer >> vector_size;
|
||||
bool el;
|
||||
vector.clear();
|
||||
for (size_t i = 0; i < vector_size; ++i) {
|
||||
buffer >> el;
|
||||
vector.push_back(el);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename BufferType, typename T>
|
||||
class Serializer<BufferType, std::vector<T>, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value &&
|
||||
!std::is_arithmetic<T>::value>::type> {
|
||||
|
@ -28,7 +28,7 @@ public:
|
||||
const std::shared_ptr<const ov::IPlugin>& plugin,
|
||||
RemoteContextImpl::Ptr context,
|
||||
const ExecutionConfig& config);
|
||||
CompiledModel(cldnn::BinaryInputBuffer ib,
|
||||
CompiledModel(cldnn::BinaryInputBuffer& ib,
|
||||
const std::shared_ptr<const ov::IPlugin>& plugin,
|
||||
RemoteContextImpl::Ptr context,
|
||||
const ExecutionConfig& config);
|
||||
@ -58,7 +58,6 @@ private:
|
||||
RemoteContextImpl::Ptr m_context;
|
||||
ExecutionConfig m_config;
|
||||
std::shared_ptr<ov::threading::ITaskExecutor> m_wait_executor;
|
||||
std::shared_ptr<ov::Model> m_model;
|
||||
std::string m_model_name;
|
||||
std::vector<ov::Output<const ov::Node>> m_inputs;
|
||||
std::vector<ov::Output<const ov::Node>> m_outputs;
|
||||
|
@ -121,8 +121,6 @@ struct arg_max_min : public primitive_base<arg_max_min> {
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_base<arg_max_min>::save(ob);
|
||||
ob << input;
|
||||
ob << num_outputs;
|
||||
ob << make_data(&mode, sizeof(ov::op::TopKMode));
|
||||
ob << top_k;
|
||||
ob << axis;
|
||||
@ -133,8 +131,6 @@ struct arg_max_min : public primitive_base<arg_max_min> {
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_base<arg_max_min>::load(ib);
|
||||
ib >> input;
|
||||
ib >> num_outputs;
|
||||
ib >> make_data(&mode, sizeof(ov::op::TopKMode));
|
||||
ib >> top_k;
|
||||
ib >> axis;
|
||||
|
@ -17,6 +17,8 @@ namespace cldnn {
|
||||
struct condition : public primitive_base<condition> {
|
||||
CLDNN_DECLARE_PRIMITIVE(condition)
|
||||
|
||||
condition() : primitive_base("", {}) {}
|
||||
|
||||
/// @brief branch has compiled program, input_map and output_map
|
||||
///
|
||||
struct branch {
|
||||
@ -39,6 +41,43 @@ struct condition : public primitive_base<condition> {
|
||||
ss << "]}";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const {
|
||||
ob << input_map.size();
|
||||
for (auto& input_pair : input_map) {
|
||||
ob << input_pair.first;
|
||||
ob << input_pair.second;
|
||||
}
|
||||
ob << output_map.size();
|
||||
for (auto& output_pair : output_map) {
|
||||
ob << output_pair.first;
|
||||
ob << output_pair.second;
|
||||
}
|
||||
inner_program->save(ob);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) {
|
||||
size_t map_size;
|
||||
ib >> map_size;
|
||||
input_map.clear();
|
||||
for (size_t i = 0; i < map_size; ++i) {
|
||||
primitive_id input_first, input_second;
|
||||
ib >> input_first;
|
||||
ib >> input_second;
|
||||
input_map.insert({input_first, input_second});
|
||||
}
|
||||
ib >> map_size;
|
||||
output_map.clear();
|
||||
for (size_t i = 0; i < map_size; ++i) {
|
||||
size_t output_index;
|
||||
primitive_id output_second;
|
||||
ib >> output_index;
|
||||
ib >> output_second;
|
||||
output_map.insert({output_index, output_second});
|
||||
}
|
||||
inner_program = std::make_shared<cldnn::program>(ib.get_engine());
|
||||
inner_program->load(ib);
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief Constructs condition primitive / layer.
|
||||
@ -62,6 +101,18 @@ struct condition : public primitive_base<condition> {
|
||||
branch branch_true;
|
||||
branch branch_false;
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_base<condition>::save(ob);
|
||||
ob << branch_true;
|
||||
ob << branch_false;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_base<condition>::load(ib);
|
||||
ib >> branch_true;
|
||||
ib >> branch_false;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
|
||||
};
|
||||
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
@ -33,5 +34,53 @@ struct data : public primitive_base<data> {
|
||||
seed = hash_combine(seed, id);
|
||||
return seed;
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_base<data>::save(ob);
|
||||
|
||||
ob << mem->get_layout();
|
||||
|
||||
const auto _allocation_type = mem->get_allocation_type();
|
||||
ob << make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
|
||||
size_t data_size = mem->size();
|
||||
ob << make_data(&data_size, sizeof(size_t));
|
||||
|
||||
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
|
||||
ob << make_data(mem->buffer_ptr(), data_size);
|
||||
} else {
|
||||
std::vector<uint8_t> _buf;
|
||||
_buf.resize(data_size);
|
||||
stream* strm = reinterpret_cast<stream*>(ob.get_stream());
|
||||
mem->copy_to(*strm, _buf.data());
|
||||
ob << make_data(_buf.data(), data_size);
|
||||
}
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_base<data>::load(ib);
|
||||
|
||||
layout output_layout = layout();
|
||||
ib >> output_layout;
|
||||
|
||||
allocation_type _allocation_type = allocation_type::unknown;
|
||||
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
|
||||
size_t data_size = 0;
|
||||
ib >> make_data(&data_size, sizeof(size_t));
|
||||
|
||||
mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false);
|
||||
|
||||
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
|
||||
ib >> make_data(mem->buffer_ptr(), data_size);
|
||||
} else {
|
||||
std::vector<uint8_t> _buf;
|
||||
_buf.resize(data_size);
|
||||
ib >> make_data(_buf.data(), data_size);
|
||||
// stream* strm = reinterpret_cast<stream*>(ib.get_stream());
|
||||
auto& strm = ib.get_engine().get_service_stream();
|
||||
mem->copy_from(strm, _buf.data());
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace cldnn
|
||||
|
@ -246,6 +246,7 @@ struct loop : public primitive_base<loop> {
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_base<loop>::save(ob);
|
||||
body_program->save(ob);
|
||||
ob << trip_count_id;
|
||||
ob << first_execution_condition_id;
|
||||
ob << num_iteration_id;
|
||||
@ -259,6 +260,8 @@ struct loop : public primitive_base<loop> {
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_base<loop>::load(ib);
|
||||
body_program = std::make_shared<cldnn::program>(ib.get_engine());
|
||||
body_program->load(ib);
|
||||
ib >> trip_count_id;
|
||||
ib >> first_execution_condition_id;
|
||||
ib >> num_iteration_id;
|
||||
|
@ -282,6 +282,7 @@ struct lstm_gemm : public primitive_base<lstm_gemm> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_base<lstm_gemm>::save(ob);
|
||||
ob << weights;
|
||||
ob << recurrent;
|
||||
ob << bias;
|
||||
@ -290,6 +291,7 @@ struct lstm_gemm : public primitive_base<lstm_gemm> {
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_base<lstm_gemm>::load(ib);
|
||||
ib >> weights;
|
||||
ib >> recurrent;
|
||||
ib >> bias;
|
||||
@ -401,6 +403,7 @@ struct lstm_elt : public primitive_base<lstm_elt> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_base<lstm_elt>::save(ob);
|
||||
ob << cell;
|
||||
ob << clip;
|
||||
ob << input_forget;
|
||||
@ -411,6 +414,7 @@ struct lstm_elt : public primitive_base<lstm_elt> {
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_base<lstm_elt>::load(ib);
|
||||
ib >> cell;
|
||||
ib >> clip;
|
||||
ib >> input_forget;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
@ -59,11 +60,51 @@ struct mutable_data : public primitive_base<mutable_data> {
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_base<mutable_data>::save(ob);
|
||||
ob << make_data(&fill_type, sizeof(filler_type));
|
||||
|
||||
ob << mem->get_layout();
|
||||
|
||||
const auto _allocation_type = mem->get_allocation_type();
|
||||
ob << make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
|
||||
size_t data_size = mem->size();
|
||||
ob << make_data(&data_size, sizeof(size_t));
|
||||
|
||||
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
|
||||
ob << make_data(mem->buffer_ptr(), data_size);
|
||||
} else {
|
||||
std::vector<uint8_t> _buf;
|
||||
_buf.resize(data_size);
|
||||
stream* strm = reinterpret_cast<stream*>(ob.get_stream());
|
||||
mem->copy_to(*strm, _buf.data());
|
||||
ob << make_data(_buf.data(), data_size);
|
||||
}
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_base<mutable_data>::load(ib);
|
||||
ib >> make_data(&fill_type, sizeof(filler_type));
|
||||
|
||||
layout output_layout = layout();
|
||||
ib >> output_layout;
|
||||
|
||||
allocation_type _allocation_type = allocation_type::unknown;
|
||||
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
|
||||
size_t data_size = 0;
|
||||
ib >> make_data(&data_size, sizeof(size_t));
|
||||
|
||||
mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false);
|
||||
|
||||
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
|
||||
ib >> make_data(mem->buffer_ptr(), data_size);
|
||||
} else {
|
||||
std::vector<uint8_t> _buf;
|
||||
_buf.resize(data_size);
|
||||
ib >> make_data(_buf.data(), data_size);
|
||||
// stream* strm = reinterpret_cast<stream*>(ib.get_stream());
|
||||
auto& strm = ib.get_engine().get_service_stream();
|
||||
mem->copy_from(strm, _buf.data());
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace cldnn
|
||||
|
@ -98,12 +98,17 @@ struct prim_map_storage {
|
||||
return map.at(type_string);
|
||||
}
|
||||
|
||||
const cldnn::primitive_id get_type_string(const cldnn::primitive_type_id type_id) const {
|
||||
return inverse_map.at(type_id);
|
||||
}
|
||||
|
||||
bool set_type_id(const std::string& type_string, const cldnn::primitive_type_id type_id) {
|
||||
return map.insert({type_string, type_id}).second;
|
||||
return map.insert({type_string, type_id}).second && inverse_map.insert({type_id, type_string}).second;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, cldnn::primitive_type_id> map;
|
||||
std::unordered_map<cldnn::primitive_type_id, std::string> inverse_map;
|
||||
};
|
||||
|
||||
/// @brief Base class of network primitive description.
|
||||
@ -249,6 +254,7 @@ public:
|
||||
ib >> output_paddings;
|
||||
size_t output_data_types_size;
|
||||
ib >> output_data_types_size;
|
||||
output_data_types.clear();
|
||||
for (size_t i = 0; i < output_data_types_size; i++) {
|
||||
bool has_value;
|
||||
ib >> has_value;
|
||||
|
@ -20,6 +20,8 @@ enum class reorder_mean_mode {
|
||||
};
|
||||
|
||||
struct WeightsReorderParams {
|
||||
WeightsReorderParams() {}
|
||||
|
||||
WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed = false, bool grouped = false)
|
||||
: _in_layout(in_layout),
|
||||
_out_layout(out_layout),
|
||||
@ -51,6 +53,19 @@ struct WeightsReorderParams {
|
||||
void set_input_layout(const layout& layout) { _in_layout = layout; }
|
||||
void set_output_layout(const layout& layout) { _out_layout = layout; }
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
ob << _in_layout;
|
||||
ob << _out_layout;
|
||||
ob << _transposed;
|
||||
ob << _grouped;
|
||||
}
|
||||
void load(cldnn::BinaryInputBuffer& ib) {
|
||||
ib >> _in_layout;
|
||||
ib >> _out_layout;
|
||||
ib >> _transposed;
|
||||
ib >> _grouped;
|
||||
}
|
||||
|
||||
protected:
|
||||
layout _in_layout;
|
||||
layout _out_layout;
|
||||
@ -257,6 +272,12 @@ struct reorder : public primitive_base<reorder> {
|
||||
ob << subtract_per_feature;
|
||||
ob << make_data(&mean_mode, sizeof(reorder_mean_mode));
|
||||
ob << make_data(&input_mem_type, sizeof(memory_type));
|
||||
if (weights_reorder_params == nullptr) {
|
||||
ob << false;
|
||||
} else {
|
||||
ob << true;
|
||||
weights_reorder_params->save(ob);
|
||||
}
|
||||
ob << truncate;
|
||||
}
|
||||
|
||||
@ -267,6 +288,12 @@ struct reorder : public primitive_base<reorder> {
|
||||
ib >> subtract_per_feature;
|
||||
ib >> make_data(&mean_mode, sizeof(reorder_mean_mode));
|
||||
ib >> make_data(&input_mem_type, sizeof(memory_type));
|
||||
bool has_weights_reorder_params;
|
||||
ib >> has_weights_reorder_params;
|
||||
if (has_weights_reorder_params) {
|
||||
weights_reorder_params = std::make_shared<WeightsReorderParams>();
|
||||
weights_reorder_params->load(ib);
|
||||
}
|
||||
ib >> truncate;
|
||||
}
|
||||
|
||||
|
@ -30,20 +30,6 @@ std::string assign_inst::to_string(const assign_node& node) {
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
void assign_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
|
||||
ob << variable_id();
|
||||
}
|
||||
|
||||
void assign_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
|
||||
std::string variable_id;
|
||||
ib >> variable_id;
|
||||
set_variable_id(variable_id);
|
||||
}
|
||||
|
||||
void assign_inst::on_execute() {
|
||||
_outputs[0] = input_memory_ptr(0);
|
||||
}
|
||||
|
@ -266,16 +266,4 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
|
||||
input_layout.feature(),
|
||||
"Weights/ifm mismatch");
|
||||
}
|
||||
|
||||
void convolution_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
|
||||
ob << _deform_conv_dep_offset;
|
||||
}
|
||||
|
||||
void convolution_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
|
||||
ib >> _deform_conv_dep_offset;
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -51,60 +51,4 @@ std::string data_inst::to_string(data_node const& node) {
|
||||
data_inst::typed_primitive_inst(network& network, data_node const& node)
|
||||
: parent(network, node, attach_or_copy_data(network, node.get_attached_memory_ptr())) {}
|
||||
|
||||
// Cache blob format:
|
||||
// [ kernel_impl_params ]
|
||||
// [ output memory information ]
|
||||
// [ data stored in memory ]
|
||||
void data_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
ob << _outputs[0]->get_layout();
|
||||
|
||||
const auto _allocation_type = _outputs[0]->get_allocation_type();
|
||||
ob << make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
|
||||
size_t data_size = _outputs[0]->size();
|
||||
ob << make_data(&data_size, sizeof(size_t));
|
||||
|
||||
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
|
||||
ob << make_data(_outputs[0]->buffer_ptr(), data_size);
|
||||
} else {
|
||||
std::vector<uint8_t> _buf;
|
||||
_buf.resize(data_size);
|
||||
_outputs[0]->copy_to(get_network().get_stream(), _buf.data());
|
||||
ob << make_data(_buf.data(), data_size);
|
||||
}
|
||||
}
|
||||
|
||||
void data_inst::load(BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
layout output_layout = layout();
|
||||
ib >> output_layout;
|
||||
|
||||
allocation_type _allocation_type = allocation_type::unknown;
|
||||
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
|
||||
size_t data_size = 0;
|
||||
ib >> make_data(&data_size, sizeof(size_t));
|
||||
|
||||
if (!get_network().is_primary_stream()) {
|
||||
_outputs[0] = ib.getConstData(_network.get_local_id(), id());
|
||||
auto pos = ib.tellg();
|
||||
pos += data_size;
|
||||
ib.seekg(pos);
|
||||
} else {
|
||||
_outputs[0] = get_network().get_engine().allocate_memory(output_layout, _allocation_type, false);
|
||||
|
||||
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
|
||||
ib >> make_data(_outputs[0]->buffer_ptr(), data_size);
|
||||
} else {
|
||||
std::vector<uint8_t> _buf;
|
||||
_buf.resize(data_size);
|
||||
ib >> make_data(_buf.data(), data_size);
|
||||
_outputs[0]->copy_from(get_network().get_stream(), _buf.data());
|
||||
}
|
||||
|
||||
ib.addConstData(_network.get_local_id(), id(), _outputs[0]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -234,88 +234,4 @@ detection_output_inst::typed_primitive_inst(network& network, detection_output_n
|
||||
node.get_dependency(2).is_padded(),
|
||||
"Detection output layer doesn't support input padding in Prior-Box input");
|
||||
}
|
||||
|
||||
void detection_output_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
|
||||
// argument (struct detection_output)
|
||||
ob << argument->id;
|
||||
ob << argument->input;
|
||||
ob << make_data(&argument->output_paddings[0], sizeof(argument->output_paddings[0]));
|
||||
ob << argument->num_classes;
|
||||
ob << argument->keep_top_k;
|
||||
ob << argument->share_location;
|
||||
ob << argument->background_label_id;
|
||||
ob << argument->nms_threshold;
|
||||
ob << argument->top_k;
|
||||
ob << argument->eta;
|
||||
ob << make_data(&argument->code_type, sizeof(argument->code_type));
|
||||
ob << argument->variance_encoded_in_target;
|
||||
ob << argument->confidence_threshold;
|
||||
ob << argument->prior_info_size;
|
||||
ob << argument->prior_coordinates_offset;
|
||||
ob << argument->prior_is_normalized;
|
||||
ob << argument->input_width;
|
||||
ob << argument->input_height;
|
||||
ob << argument->decrease_label_id;
|
||||
ob << argument->clip_before_nms;
|
||||
ob << argument->clip_after_nms;
|
||||
ob << argument->objectness_score;
|
||||
}
|
||||
|
||||
void detection_output_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
|
||||
primitive_id id;
|
||||
std::vector<input_info> input;
|
||||
uint32_t num_classes;
|
||||
uint32_t keep_top_k;
|
||||
bool share_location;
|
||||
int background_label_id;
|
||||
float nms_threshold;
|
||||
int top_k;
|
||||
float eta;
|
||||
prior_box_code_type code_type = prior_box_code_type::corner;
|
||||
bool variance_encoded_in_target;
|
||||
float confidence_threshold;
|
||||
int32_t prior_info_size;
|
||||
int32_t prior_coordinates_offset;
|
||||
bool prior_is_normalized;
|
||||
int32_t input_width;
|
||||
int32_t input_height;
|
||||
bool decrease_label_id;
|
||||
bool clip_before_nms;
|
||||
bool clip_after_nms;
|
||||
float objectness_score;
|
||||
padding output_padding;
|
||||
|
||||
ib >> id;
|
||||
ib >> input;
|
||||
ib >> make_data(&output_padding, sizeof(output_padding));
|
||||
ib >> num_classes;
|
||||
ib >> keep_top_k;
|
||||
ib >> share_location;
|
||||
ib >> background_label_id;
|
||||
ib >> nms_threshold;
|
||||
ib >> top_k;
|
||||
ib >> eta;
|
||||
ib >> make_data(&code_type, sizeof(code_type));
|
||||
ib >> variance_encoded_in_target;
|
||||
ib >> confidence_threshold;
|
||||
ib >> prior_info_size;
|
||||
ib >> prior_coordinates_offset;
|
||||
ib >> prior_is_normalized;
|
||||
ib >> input_width;
|
||||
ib >> input_height;
|
||||
ib >> decrease_label_id;
|
||||
ib >> clip_before_nms;
|
||||
ib >> clip_after_nms;
|
||||
ib >> objectness_score;
|
||||
|
||||
argument = std::make_shared<detection_output>(
|
||||
id, input, num_classes, keep_top_k, share_location, background_label_id, nms_threshold, top_k,
|
||||
eta, code_type, variance_encoded_in_target, confidence_threshold, prior_info_size,
|
||||
prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id,
|
||||
clip_before_nms, clip_after_nms, objectness_score, output_padding);
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -13,12 +13,17 @@ namespace cldnn {
|
||||
namespace common {
|
||||
|
||||
struct condition_impl : typed_primitive_impl<condition> {
|
||||
using parent = typed_primitive_impl<condition>;
|
||||
using parent::parent;
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::common::condition_impl)
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<condition_impl>(*this);
|
||||
}
|
||||
|
||||
condition_impl() : parent() {}
|
||||
|
||||
explicit condition_impl(const condition_node& outer) {
|
||||
set_node_params(outer);
|
||||
}
|
||||
@ -131,6 +136,16 @@ struct condition_impl : typed_primitive_impl<condition> {
|
||||
|
||||
void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << _node_id;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> _node_id;
|
||||
}
|
||||
|
||||
private:
|
||||
primitive_id _node_id;
|
||||
};
|
||||
@ -150,5 +165,5 @@ attach_condition_common::attach_condition_common() {
|
||||
} // namespace common
|
||||
} // namespace cldnn
|
||||
|
||||
// TODO: Change code like cldnn::loop
|
||||
ASSIGN_TYPE_NAME(cldnn::common::condition_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::common::condition_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::condition)
|
||||
|
@ -79,11 +79,13 @@ struct activation_impl : public typed_primitive_impl<activation> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << make_data(&activation_function, sizeof(activation_func));
|
||||
ob << make_data(&additional_params, sizeof(activation_additional_params));
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> make_data(&activation_function, sizeof(activation_func));
|
||||
ib >> make_data(&additional_params, sizeof(activation_additional_params));
|
||||
}
|
||||
|
@ -35,10 +35,12 @@ struct assign_impl : public typed_primitive_impl<assign> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << variable_id;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> variable_id;
|
||||
}
|
||||
|
||||
|
@ -45,12 +45,14 @@ struct broadcast_impl : public typed_primitive_impl<broadcast> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << make_data(&broadcast_mode, sizeof(ov::op::BroadcastModeSpec));
|
||||
ob << make_data(&target_shape, sizeof(ov::Shape));
|
||||
ob << axes_mapping;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> make_data(&broadcast_mode, sizeof(ov::op::BroadcastModeSpec));
|
||||
ib >> make_data(&target_shape, sizeof(ov::Shape));
|
||||
ib >> axes_mapping;
|
||||
|
@ -40,10 +40,12 @@ struct concatenation_impl : public typed_primitive_impl<concatenation> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << axis;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> axis;
|
||||
}
|
||||
|
||||
|
@ -62,10 +62,12 @@ public:
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << make_data(&nms_type, sizeof(NMSType));
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> make_data(&nms_type, sizeof(NMSType));
|
||||
}
|
||||
|
||||
|
@ -64,11 +64,13 @@ struct eltwise_impl : public typed_primitive_impl<eltwise> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << make_data(&mode, sizeof(eltwise_mode));
|
||||
ob << coefficients;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> make_data(&mode, sizeof(eltwise_mode));
|
||||
ib >> coefficients;
|
||||
}
|
||||
|
@ -42,11 +42,13 @@ struct gather_impl : public typed_primitive_impl<gather> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << axis;
|
||||
ob << batch_dims;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> axis;
|
||||
ib >> batch_dims;
|
||||
}
|
||||
|
@ -35,10 +35,12 @@ struct read_value_impl : public typed_primitive_impl<read_value> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << variable_id;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> variable_id;
|
||||
}
|
||||
|
||||
|
@ -40,10 +40,12 @@ struct scatter_update_impl : public typed_primitive_impl<scatter_update> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << axis;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> axis;
|
||||
}
|
||||
|
||||
|
@ -56,6 +56,7 @@ struct strided_slice_impl : public typed_primitive_impl<strided_slice> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << begin_data;
|
||||
ob << end_data;
|
||||
ob << strides_data;
|
||||
@ -68,6 +69,7 @@ struct strided_slice_impl : public typed_primitive_impl<strided_slice> {
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> begin_data;
|
||||
ib >> end_data;
|
||||
ib >> strides_data;
|
||||
|
@ -39,10 +39,12 @@ struct tile_impl : public typed_primitive_impl<tile> {
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << repeats;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> repeats;
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,15 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
|
||||
return make_unique<activation_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<activation>& instance) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
||||
|
@ -45,6 +45,15 @@ struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
|
||||
return make_unique<arg_max_min_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<arg_max_min>& instance) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
@ -110,7 +110,30 @@ struct border_impl : typed_primitive_impl_ocl<border> {
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
const auto& prim_params = static_cast<const kernel_selector::border_params&>(*_kernel_data.params);
|
||||
if (prim_params.inputs[0].LogicalSize() == 0) {
|
||||
ob << true;
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
ib >> zero_input;
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// WA for static impl deserialization
|
||||
bool zero_input = false;
|
||||
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<border>& instance) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
||||
@ -127,7 +150,8 @@ protected:
|
||||
const auto& prim_params = static_cast<const kernel_selector::border_params&>(*_kernel_data.params);
|
||||
std::vector<layout> layouts;
|
||||
|
||||
if (prim_params.inputs[0].LogicalSize() == 0) {
|
||||
if ((_kernel_data.params == nullptr && zero_input) ||
|
||||
(_kernel_data.params != nullptr && prim_params.inputs[0].LogicalSize() == 0)) {
|
||||
layout any_layout = {data_types::u8, format::bfyx, {1, 1, 1, 1}};
|
||||
layouts.push_back(any_layout);
|
||||
}
|
||||
|
@ -23,6 +23,15 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
return make_unique<broadcast_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<broadcast>();
|
||||
auto params = get_default_params<kernel_selector::broadcast_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -53,6 +53,15 @@ struct concatenation_impl : typed_primitive_impl_ocl<concatenation> {
|
||||
return make_unique<concatenation_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<concatenation>();
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "primitive_base.hpp"
|
||||
|
||||
#include "kernel_base.h"
|
||||
#include "convolution_inst.h"
|
||||
#include "convolution/convolution_kernel_selector.h"
|
||||
#include "convolution/convolution_params.h"
|
||||
@ -25,6 +25,15 @@ struct convolution_impl : typed_primitive_impl_ocl<convolution> {
|
||||
return make_unique<convolution_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<convolution>& instance) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
@ -23,6 +23,15 @@ struct crop_impl : typed_primitive_impl_ocl<crop> {
|
||||
return make_unique<crop_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::eltwise_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -56,6 +56,15 @@ struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
|
||||
return make_unique<cum_sum_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<cum_sum>();
|
||||
|
@ -28,7 +28,6 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
|
||||
|
||||
std::shared_ptr<kernel_selector::cl_kernel_data> cl_kernel;
|
||||
std::vector<kernel::ptr> _kernels;
|
||||
std::string _cached_kernel_id;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<custom_gpu_primitive_impl>(*this);
|
||||
@ -39,8 +38,7 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
|
||||
|
||||
custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other)
|
||||
: cl_kernel(other.cl_kernel)
|
||||
, _kernels({})
|
||||
, _cached_kernel_id(other._cached_kernel_id) {
|
||||
, _kernels({}) {
|
||||
for (const auto& kernel : other._kernels) {
|
||||
_kernels.emplace_back(kernel->clone());
|
||||
}
|
||||
@ -49,8 +47,7 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
|
||||
custom_gpu_primitive_impl(const custom_gpu_primitive_node& arg,
|
||||
std::shared_ptr<kernel_selector::cl_kernel_data>& cl_kernel)
|
||||
: cl_kernel(cl_kernel)
|
||||
, _kernels()
|
||||
, _cached_kernel_id() { }
|
||||
, _kernels() { }
|
||||
|
||||
std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() override {
|
||||
std::vector<std::shared_ptr<cldnn::kernel_string>> kernel_strings;
|
||||
@ -64,12 +61,12 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
|
||||
_kernels.insert(_kernels.begin(), compiled_kernels.begin(), compiled_kernels.end());
|
||||
}
|
||||
|
||||
void init_by_cached_kernels(const kernels_cache& kernels_cache) override {
|
||||
_kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_id));
|
||||
void init_by_cached_kernels(const kernels_cache& kernels_cache, std::vector<std::string>& cached_kernel_ids) override {
|
||||
_kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(cached_kernel_ids[0]));
|
||||
}
|
||||
|
||||
void set_cached_kernel_ids(const kernels_cache& kernels_cache) override {
|
||||
_cached_kernel_id = kernels_cache.get_cached_kernel_id(_kernels[0]);
|
||||
std::vector<std::string> get_cached_kernel_ids(const kernels_cache& kernels_cache) override {
|
||||
return {kernels_cache.get_cached_kernel_id(_kernels[0])};
|
||||
}
|
||||
|
||||
void set_arguments_impl(custom_gpu_primitive_inst& instance) override {
|
||||
@ -93,19 +90,23 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
|
||||
return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.is_output());
|
||||
}
|
||||
|
||||
std::vector<kernel::ptr> get_kernels() override {
|
||||
return _kernels;
|
||||
}
|
||||
|
||||
std::vector<kernel::ptr> get_kernels() const override {
|
||||
return _kernels;
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
parent::save(ob);
|
||||
ob << *cl_kernel;
|
||||
ob << _cached_kernel_id;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
cl_kernel = std::make_shared<kernel_selector::cl_kernel_data>();
|
||||
ib >> *cl_kernel;
|
||||
ib >> _cached_kernel_id;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -23,6 +23,15 @@ struct eltwise_impl : typed_primitive_impl_ocl<eltwise> {
|
||||
return make_unique<eltwise_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<eltwise>& instance) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "primitive_base.hpp"
|
||||
|
||||
#include "kernel_base.h"
|
||||
#include "fully_connected_inst.h"
|
||||
#include "fully_connected/fully_connected_kernel_selector.h"
|
||||
#include "fully_connected/fully_connected_params.h"
|
||||
@ -48,6 +48,15 @@ struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
|
||||
return make_unique<fully_connected_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<fully_connected>& instance) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
@ -67,6 +67,15 @@ struct gather_impl : typed_primitive_impl_ocl<gather> {
|
||||
return make_unique<gather_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<gather>();
|
||||
|
@ -54,6 +54,15 @@ struct gather_elements_impl : typed_primitive_impl_ocl<gather_elements> {
|
||||
return make_unique<gather_elements_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<gather_elements>();
|
||||
auto params = get_default_params<kernel_selector::gather_elements_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -23,6 +23,15 @@ struct gather_nd_impl : typed_primitive_impl_ocl<gather_nd> {
|
||||
return make_unique<gather_nd_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
const auto& primitive = impl_param.typed_desc<gather_nd>();
|
||||
auto params = get_default_params<kernel_selector::gather_nd_params>(impl_param);
|
||||
|
@ -23,6 +23,15 @@ struct gemm_impl : typed_primitive_impl_ocl<gemm> {
|
||||
return make_unique<gemm_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<gemm>();
|
||||
|
@ -23,6 +23,15 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
|
||||
return make_unique<mvn_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<mvn>();
|
||||
auto params = get_default_params<kernel_selector::mvn_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -25,6 +25,15 @@ struct count_nonzero_impl : typed_primitive_impl_ocl<count_nonzero> {
|
||||
return make_unique<count_nonzero_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::count_nonzero_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::count_nonzero_optional_params>(impl_param.get_program());
|
||||
@ -49,6 +58,15 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
|
||||
return make_unique<gather_nonzero_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::gather_nonzero_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::gather_nonzero_optional_params>(impl_param.get_program());
|
||||
|
@ -50,6 +50,15 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
|
||||
return make_unique<permute_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<permute>();
|
||||
auto params = get_default_params<kernel_selector::permute_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -33,18 +33,16 @@ For example, all gpu convolution implementations should derive from typed_primit
|
||||
template <class PType>
|
||||
struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
|
||||
kernel_selector::kernel_data _kernel_data;
|
||||
std::vector<std::string> _cached_kernel_ids;
|
||||
std::vector<kernel::ptr> _kernels;
|
||||
|
||||
// a pair of batch program hash and kernel entry hash of each ocl impl.
|
||||
std::pair<std::string, std::string> kernel_dump_info;
|
||||
|
||||
typed_primitive_impl_ocl() : _kernel_data({}), _cached_kernel_ids({}), _kernels({}) {}
|
||||
typed_primitive_impl_ocl() : _kernel_data({}), _kernels({}) {}
|
||||
|
||||
typed_primitive_impl_ocl(const typed_primitive_impl_ocl<PType>& other)
|
||||
: typed_primitive_impl<PType>(other._weights_reorder_params, other._kernel_name, other._is_dynamic)
|
||||
, _kernel_data(other._kernel_data)
|
||||
, _cached_kernel_ids(other._cached_kernel_ids)
|
||||
, _kernels({}) {
|
||||
_kernels.reserve(other._kernels.size());
|
||||
for (size_t k = 0; k < other._kernels.size(); ++k) {
|
||||
@ -65,17 +63,19 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
|
||||
// [ kernel_selector::kernel_data ]
|
||||
// [ kernel_ids ]
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_impl::save(ob);
|
||||
ob << make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype));
|
||||
ob << _kernel_data.internalBufferSizes;
|
||||
ob << _kernel_data.kernels;
|
||||
ob << _cached_kernel_ids;
|
||||
ob << _kernel_data.kernelName;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_impl::load(ib);
|
||||
ib >> make_data(&_kernel_data.internalBufferDataType, sizeof(kernel_selector::Datatype));
|
||||
ib >> _kernel_data.internalBufferSizes;
|
||||
ib >> _kernel_data.kernels;
|
||||
ib >> _cached_kernel_ids;
|
||||
ib >> _kernel_data.kernelName;
|
||||
}
|
||||
|
||||
template<typename ImplType>
|
||||
@ -146,20 +146,20 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
void init_by_cached_kernels(const kernels_cache& kernels_cache) override {
|
||||
void init_by_cached_kernels(const kernels_cache& kernels_cache, std::vector<std::string>& cached_kernel_ids) override {
|
||||
if (is_cpu()) {
|
||||
return;
|
||||
}
|
||||
_kernels.clear();
|
||||
|
||||
_kernels.reserve(_cached_kernel_ids.size());
|
||||
for (size_t k = 0; k < _cached_kernel_ids.size(); ++k) {
|
||||
_kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(_cached_kernel_ids[k]));
|
||||
_kernels.reserve(cached_kernel_ids.size());
|
||||
for (size_t k = 0; k < cached_kernel_ids.size(); ++k) {
|
||||
_kernels.emplace_back(kernels_cache.get_kernel_from_cached_kernels(cached_kernel_ids[k]));
|
||||
}
|
||||
}
|
||||
|
||||
void set_cached_kernel_ids(const kernels_cache& kernels_cache) override {
|
||||
_cached_kernel_ids = kernels_cache.get_cached_kernel_ids(_kernels);
|
||||
std::vector<std::string> get_cached_kernel_ids(const kernels_cache& kernels_cache) override {
|
||||
return {kernels_cache.get_cached_kernel_ids(_kernels)};
|
||||
}
|
||||
|
||||
std::vector<kernel::ptr> get_kernels() const override {
|
||||
|
@ -23,6 +23,15 @@ struct quantize_impl : typed_primitive_impl_ocl<quantize> {
|
||||
return make_unique<quantize_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<quantize>& instance) const override {
|
||||
kernel_arguments_data args;
|
||||
|
@ -23,6 +23,15 @@ struct range_impl : typed_primitive_impl_ocl<range> {
|
||||
return make_unique<range_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::range_params>(impl_param, is_shape_agnostic);
|
||||
for (int i : {1, 2})
|
||||
|
@ -72,6 +72,13 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
|
||||
return make_unique<reduce_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<reduce>();
|
||||
auto params = get_default_params<kernel_selector::reduce_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -24,6 +24,15 @@ struct reorder_impl : typed_primitive_impl_ocl<reorder> {
|
||||
return make_unique<reorder_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(const reorder_inst& instance) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
@ -23,6 +23,15 @@ struct rms_impl : typed_primitive_impl_ocl<rms> {
|
||||
return make_unique<rms_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<rms>();
|
||||
auto params = get_default_params<kernel_selector::rms_params>(impl_param, is_shape_agnostic);
|
||||
@ -63,3 +72,6 @@ attach_rms_impl::attach_rms_impl() {
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
||||
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::rms_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::rms)
|
||||
|
@ -23,6 +23,15 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
|
||||
return make_unique<scatter_nd_update_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<scatter_nd_update>();
|
||||
auto params = get_default_params<kernel_selector::scatter_nd_update_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -48,6 +48,15 @@ struct scatter_update_impl : typed_primitive_impl_ocl<scatter_update> {
|
||||
return make_unique<scatter_update_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<scatter_update>();
|
||||
|
@ -23,6 +23,15 @@ struct select_impl : typed_primitive_impl_ocl<select> {
|
||||
return make_unique<select_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::select_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());
|
||||
|
@ -23,6 +23,15 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
|
||||
return make_unique<shape_of_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::shape_of_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::shape_of_optional_params>(impl_param.get_program());
|
||||
|
@ -45,6 +45,15 @@ struct softmax_impl : typed_primitive_impl_ocl<softmax> {
|
||||
return make_unique<softmax_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<softmax>();
|
||||
auto params = get_default_params<kernel_selector::softmax_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -52,6 +52,15 @@ struct strided_slice_impl : typed_primitive_impl_ocl<strided_slice> {
|
||||
return make_unique<strided_slice_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& prim = impl_param.typed_desc<strided_slice>();
|
||||
|
@ -23,6 +23,15 @@ struct tile_impl : typed_primitive_impl_ocl<tile> {
|
||||
return make_unique<tile_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<tile>();
|
||||
|
@ -23,6 +23,15 @@ struct unique_count_impl : typed_primitive_impl_ocl<unique_count> {
|
||||
return make_unique<unique_count_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<unique_count>();
|
||||
auto params = get_default_params<kernel_selector::unique_count_params>(impl_param, is_shape_agnostic);
|
||||
@ -94,6 +103,15 @@ struct unique_gather_impl : typed_primitive_impl_ocl<unique_gather> {
|
||||
return make_unique<unique_gather_impl>(*this);
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
parent::load(ib);
|
||||
if (is_dynamic()) {
|
||||
auto& kernel_selector = kernel_selector_t::Instance();
|
||||
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
|
||||
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
|
||||
}
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<unique_gather>();
|
||||
auto params = get_default_params<kernel_selector::unique_gather_params>(impl_param, is_shape_agnostic);
|
||||
|
@ -95,12 +95,14 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
}
|
||||
|
||||
bool is_cpu() const override { return false; }
|
||||
bool is_onednn() const { return true; }
|
||||
|
||||
// Cache blob format:
|
||||
// [ dnnl::primitive_attr ]
|
||||
// [ dnnl::primitive_desc ]
|
||||
// [ dnnl::cache_blob ]
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
primitive_impl::save(ob);
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
if (_attrs->get() == nullptr) {
|
||||
ob << false;
|
||||
@ -203,6 +205,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
primitive_impl::load(ib);
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
bool has_attrs;
|
||||
ib >> has_attrs;
|
||||
|
@ -55,9 +55,6 @@ public:
|
||||
typed_primitive_inst(network& network, const assign_node& desc);
|
||||
typed_primitive_inst(network& network) : parent(network), memory_state::variable("") {}
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const override;
|
||||
void load(cldnn::BinaryInputBuffer& ib) override;
|
||||
|
||||
void on_execute() override;
|
||||
};
|
||||
|
||||
|
@ -189,9 +189,6 @@ public:
|
||||
bool compensation_term() const { return _impl_params->compensation_layout.has_value(); }
|
||||
bool activations_zero_points_term() const { return _impl_params->activations_zero_points_layout.has_value(); }
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const override;
|
||||
void load(cldnn::BinaryInputBuffer& ib) override;
|
||||
|
||||
private:
|
||||
int32_t _deform_conv_dep_offset = 0;
|
||||
};
|
||||
|
@ -40,8 +40,6 @@ public:
|
||||
static std::string to_string(data_node const& node);
|
||||
|
||||
typed_primitive_inst(network& network, data_node const& node);
|
||||
void save(BinaryOutputBuffer& ob) const override;
|
||||
void load(BinaryInputBuffer& ib) override;
|
||||
};
|
||||
|
||||
using data_inst = typed_primitive_inst<data>;
|
||||
|
@ -46,9 +46,6 @@ public:
|
||||
memory::ptr location_memory() const { return dep_memory_ptr(0); }
|
||||
memory::ptr confidence_memory() const { return dep_memory_ptr(1); }
|
||||
memory::ptr prior_box_memory() const { return dep_memory_ptr(2); }
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const override;
|
||||
void load(cldnn::BinaryInputBuffer& ib) override;
|
||||
};
|
||||
|
||||
using detection_output_inst = typed_primitive_inst<detection_output>;
|
||||
|
@ -318,8 +318,6 @@ public:
|
||||
event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0) override;
|
||||
void reset_memory();
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override;
|
||||
void load(BinaryInputBuffer& ib) override;
|
||||
void validate_backedges(loop_node const & node) const;
|
||||
|
||||
void update_shape() override { primitive_inst::update_shape(); }
|
||||
|
@ -20,6 +20,7 @@ struct typed_program_node<mutable_data> : public typed_program_node_base<mutable
|
||||
memory& get_attached_memory() const { return *mem; }
|
||||
memory::ptr get_attached_memory_ptr() const { return mem; }
|
||||
void attach_memory(memory::ptr new_mem, bool invalidate_users_if_changed = true);
|
||||
void replace_memory(memory::ptr new_mem, bool invalidate_users_if_changed = false);
|
||||
|
||||
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
|
||||
|
||||
@ -44,8 +45,6 @@ public:
|
||||
typed_primitive_inst(network& network, mutable_data_node const& node);
|
||||
event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0) override;
|
||||
const std::list<primitive_id>& get_user_ids() const { return _user_ids; }
|
||||
void save(BinaryOutputBuffer& ob) const override;
|
||||
void load(BinaryInputBuffer& ib) override;
|
||||
|
||||
private:
|
||||
std::list<primitive_id> _user_ids;
|
||||
|
@ -63,15 +63,36 @@ struct primitive_impl {
|
||||
|
||||
// class typed_primitive_gpu_impl override this with return false;
|
||||
virtual bool is_cpu() const { return true; }
|
||||
virtual bool is_onednn() const { return false; }
|
||||
virtual void init_kernels(const kernels_cache& kernels_cache, const kernel_impl_params& params) = 0;
|
||||
virtual void init_by_cached_kernels(const kernels_cache&) {}
|
||||
virtual void set_cached_kernel_ids(const kernels_cache&) {}
|
||||
virtual void init_by_cached_kernels(const kernels_cache&, std::vector<std::string>& cached_kernel_ids) {}
|
||||
virtual std::vector<std::string> get_cached_kernel_ids(const kernels_cache&) { return {}; }
|
||||
virtual std::unique_ptr<primitive_impl> clone() const = 0;
|
||||
virtual std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() { return {}; }
|
||||
virtual void reset_kernels_source() {}
|
||||
virtual std::vector<kernel::ptr> get_kernels() const { return {}; }
|
||||
virtual void save(cldnn::BinaryOutputBuffer& ob) const {}
|
||||
virtual void load(cldnn::BinaryInputBuffer& ib) {}
|
||||
virtual void save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
ob << can_reuse_memory;
|
||||
ob << _kernel_name;
|
||||
ob << _is_dynamic;
|
||||
if (_weights_reorder_params == nullptr) {
|
||||
ob << false;
|
||||
} else {
|
||||
ob << true;
|
||||
_weights_reorder_params->save(ob);
|
||||
}
|
||||
}
|
||||
virtual void load(cldnn::BinaryInputBuffer& ib) {
|
||||
ib >> can_reuse_memory;
|
||||
ib >> _kernel_name;
|
||||
ib >> _is_dynamic;
|
||||
bool has_weights_reorder_params;
|
||||
ib >> has_weights_reorder_params;
|
||||
if (has_weights_reorder_params) {
|
||||
_weights_reorder_params = std::make_shared<WeightsReorderParams>();
|
||||
_weights_reorder_params->load(ib);
|
||||
}
|
||||
}
|
||||
// returns a pair of batch program hash and kernel entry of each ocl impl. Returns "" for other impl types.
|
||||
virtual std::pair<std::string, std::string> get_kernels_dump_info() const {
|
||||
return std::make_pair("", "");
|
||||
@ -195,10 +216,6 @@ public:
|
||||
_impl->init_kernels(kernels_cache, *_impl_params);
|
||||
}
|
||||
|
||||
void init_by_cached_kernels(const kernels_cache& kernels_cache) {
|
||||
_impl->init_by_cached_kernels(kernels_cache);
|
||||
}
|
||||
|
||||
void set_arguments();
|
||||
|
||||
void validate() const {
|
||||
@ -252,8 +269,6 @@ public:
|
||||
|
||||
std::vector<memory::ptr> get_intermediates_memories() const { return _intermediates_memory; }
|
||||
|
||||
virtual void save(cldnn::BinaryOutputBuffer& ob) const;
|
||||
virtual void load(cldnn::BinaryInputBuffer& ib);
|
||||
void rebuild_deps(
|
||||
std::unordered_map<primitive_id, std::shared_ptr<primitive_inst>> const& primitives);
|
||||
void rebuild_exec_deps(
|
||||
|
@ -386,6 +386,9 @@ public:
|
||||
const std::vector<fused_primitive_desc>& get_fused_primitives() const { return fused_prims; }
|
||||
std::vector<fused_primitive_desc>& get_fused_primitives() { return fused_prims; }
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const;
|
||||
void load(cldnn::BinaryInputBuffer& ib);
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
const std::shared_ptr<dnnl::primitive_attr>& get_onednn_primitive_attributes() const {
|
||||
if (onednn_attrs == nullptr)
|
||||
|
@ -45,14 +45,6 @@ public:
|
||||
end_x = e_x;
|
||||
end_y = e_y;
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const {
|
||||
ob << start_x << start_y << end_x << end_y;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) {
|
||||
ib >> start_x >> start_y >> end_x >> end_y;
|
||||
}
|
||||
};
|
||||
|
||||
// indices of the memory objects used by the layer
|
||||
@ -81,8 +73,6 @@ public:
|
||||
typed_primitive_inst(network& network, proposal_node const& desc);
|
||||
|
||||
const std::vector<anchor>& get_anchors() const { return _anchors; }
|
||||
void save(BinaryOutputBuffer& ob) const override;
|
||||
void load(BinaryInputBuffer& ib) override;
|
||||
|
||||
private:
|
||||
std::vector<anchor> _anchors;
|
||||
|
@ -48,9 +48,6 @@ public:
|
||||
typed_primitive_inst(network& network, const read_value_node& desc);
|
||||
typed_primitive_inst(network& network) : parent(network), memory_state::variable("") {}
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const override;
|
||||
void load(cldnn::BinaryInputBuffer& ib) override;
|
||||
|
||||
void update_output_memory() override;
|
||||
|
||||
protected:
|
||||
|
@ -103,9 +103,6 @@ public:
|
||||
return req_reinterpr;
|
||||
}
|
||||
|
||||
void save(cldnn::BinaryOutputBuffer& ob) const override;
|
||||
void load(cldnn::BinaryInputBuffer& ib) override;
|
||||
|
||||
private:
|
||||
void on_execute() override;
|
||||
|
||||
|
@ -70,146 +70,4 @@ bool kernel_impl_params::operator==(const kernel_impl_params& rhs) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
|
||||
ob << desc;
|
||||
ob << static_cast<uint64_t>(dev_type);
|
||||
ob << has_runtime_layouts;
|
||||
ob << unique_id;
|
||||
ob << input_layouts;
|
||||
ob << output_layouts;
|
||||
ob << input_offsets.size();
|
||||
for (size_t i = 0; i < input_offsets.size(); i++) {
|
||||
ob << input_offsets[i].sizes();
|
||||
}
|
||||
|
||||
if (weights_layout.has_value()) {
|
||||
ob << true;
|
||||
ob << weights_layout.value();
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
|
||||
if (bias_layout.has_value()) {
|
||||
ob << true;
|
||||
ob << bias_layout.value();
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
|
||||
if (weights_zero_points_layout.has_value()) {
|
||||
ob << true;
|
||||
ob << weights_zero_points_layout.value();
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
|
||||
if (activations_zero_points_layout.has_value()) {
|
||||
ob << true;
|
||||
ob << activations_zero_points_layout.value();
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
|
||||
if (compensation_layout.has_value()) {
|
||||
ob << true;
|
||||
ob << compensation_layout.value();
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
|
||||
ob << fused_desc.size();
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
size_t num_fused_prims = fused_desc_onednn.size();
|
||||
ob << num_fused_prims;
|
||||
for (auto fused_prim : fused_desc_onednn) {
|
||||
ob << make_data(&fused_prim.op_type, sizeof(onednn_post_op_type));
|
||||
ob << fused_prim.mem_offset;
|
||||
ob << fused_prim.mem_dep;
|
||||
ob << make_data(&fused_prim.tag, sizeof(dnnl::memory::format_tag));
|
||||
ob << fused_prim.flatten;
|
||||
ob << fused_prim.dims;
|
||||
ob << make_data(&fused_prim.dt, sizeof(dnnl::memory::data_type));
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
ob << primary_input_idx;
|
||||
}
|
||||
|
||||
void kernel_impl_params::load(BinaryInputBuffer& ib) {
|
||||
prog = nullptr;
|
||||
ib >> desc;
|
||||
size_t dev_type_id = 0;
|
||||
ib >> dev_type_id;
|
||||
dev_type = static_cast<cldnn::device_type>(dev_type_id);
|
||||
ib >> has_runtime_layouts;
|
||||
ib >> unique_id;
|
||||
ib >> input_layouts;
|
||||
ib >> output_layouts;
|
||||
{
|
||||
size_t num_input_offsets;
|
||||
ib >> num_input_offsets;
|
||||
input_offsets.resize(num_input_offsets);
|
||||
for (size_t i = 0; i < num_input_offsets; i++) {
|
||||
std::vector<cldnn::tensor::value_type> sizes;
|
||||
ib >> sizes;
|
||||
input_offsets[i] = cldnn::tensor(sizes);
|
||||
}
|
||||
}
|
||||
bool has_value = false;
|
||||
layout layout_buf;
|
||||
|
||||
ib >> has_value;
|
||||
if (has_value) {
|
||||
ib >> layout_buf;
|
||||
weights_layout = layout_buf;
|
||||
}
|
||||
|
||||
ib >> has_value;
|
||||
if (has_value) {
|
||||
ib >> layout_buf;
|
||||
bias_layout = layout_buf;
|
||||
}
|
||||
|
||||
ib >> has_value;
|
||||
if (has_value) {
|
||||
ib >> layout_buf;
|
||||
weights_zero_points_layout = layout_buf;
|
||||
}
|
||||
|
||||
ib >> has_value;
|
||||
if (has_value) {
|
||||
ib >> layout_buf;
|
||||
activations_zero_points_layout = layout_buf;
|
||||
}
|
||||
|
||||
ib >> has_value;
|
||||
if (has_value) {
|
||||
ib >> layout_buf;
|
||||
compensation_layout = layout_buf;
|
||||
}
|
||||
|
||||
{
|
||||
// Fake fused_desc just for has_fused_primitives()
|
||||
size_t num_fused_desc;
|
||||
ib >> num_fused_desc;
|
||||
if (num_fused_desc > 0) {
|
||||
fused_desc.emplace_back(cldnn::fused_primitive_desc(nullptr));
|
||||
}
|
||||
}
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
size_t num_fused_prims;
|
||||
ib >> num_fused_prims;
|
||||
fused_desc_onednn.resize(num_fused_prims);
|
||||
for (size_t idx = 0; idx < num_fused_prims; ++idx) {
|
||||
ib >> make_data(&fused_desc_onednn[idx].op_type, sizeof(onednn_post_op_type));
|
||||
ib >> fused_desc_onednn[idx].mem_offset;
|
||||
ib >> fused_desc_onednn[idx].mem_dep;
|
||||
ib >> make_data(&fused_desc_onednn[idx].tag, sizeof(dnnl::memory::format_tag));
|
||||
ib >> fused_desc_onednn[idx].flatten;
|
||||
ib >> fused_desc_onednn[idx].dims;
|
||||
ib >> make_data(&fused_desc_onednn[idx].dt, sizeof(dnnl::memory::data_type));
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
ib >> primary_input_idx;
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -672,35 +672,6 @@ loop_inst::typed_primitive_inst(network & network, loop_node const & node)
|
||||
_num_iterations_id = node.get_num_iterations_id();
|
||||
}
|
||||
|
||||
void loop_inst::save(BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
ob << _input_primitive_maps;
|
||||
ob << _output_primitive_maps;
|
||||
ob << _back_edges;
|
||||
ob << _trip_count_id;
|
||||
ob << _initial_execution_id;
|
||||
ob << _current_iteration_id;
|
||||
ob << _condition_id;
|
||||
ob << _num_iterations_id;
|
||||
body_network->save(ob);
|
||||
}
|
||||
|
||||
void loop_inst::load(BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
preproc_memories_done = false,
|
||||
ib >> _input_primitive_maps;
|
||||
ib >> _output_primitive_maps;
|
||||
ib >> _back_edges;
|
||||
ib >> _trip_count_id;
|
||||
ib >> _initial_execution_id;
|
||||
ib >> _current_iteration_id;
|
||||
ib >> _condition_id;
|
||||
ib >> _num_iterations_id;
|
||||
body_network = std::make_shared<cldnn::network>(ib, get_network().get_stream_ptr(), get_network().get_engine(), get_network().is_primary_stream(), 0);
|
||||
// set inner network to the new loaded _impl_params from cache.
|
||||
set_inner_networks({body_network});
|
||||
}
|
||||
|
||||
void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_iteration) {
|
||||
if (is_dynamic) {
|
||||
std::vector<cldnn::memory::ptr> external_outputs;
|
||||
|
@ -41,6 +41,11 @@ void mutable_data_node::attach_memory(memory::ptr new_mem, bool invalidate_users
|
||||
recalc_output_layout(invalidate_users_if_changed);
|
||||
}
|
||||
|
||||
void mutable_data_node::replace_memory(memory::ptr new_mem, bool invalidate_users_if_changed) {
|
||||
mem = new_mem;
|
||||
recalc_output_layout(invalidate_users_if_changed);
|
||||
}
|
||||
|
||||
std::string mutable_data_inst::to_string(mutable_data_node const& node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
|
||||
@ -83,46 +88,4 @@ mutable_data_inst::typed_primitive_inst(network& network, mutable_data_node cons
|
||||
}
|
||||
}
|
||||
|
||||
void mutable_data_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
|
||||
size_t data_size = _outputs[0]->size();
|
||||
ob << make_data(&data_size, sizeof(size_t));
|
||||
|
||||
if (data_size == 0)
|
||||
return;
|
||||
|
||||
allocation_type _allocation_type = _outputs[0]->get_allocation_type();
|
||||
|
||||
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
|
||||
ob << make_data(_outputs[0]->buffer_ptr(), data_size);
|
||||
} else {
|
||||
mem_lock<char, mem_lock_type::read> lock{_outputs[0], get_node().get_program().get_stream()};
|
||||
ob << make_data(lock.data(), data_size);
|
||||
}
|
||||
}
|
||||
|
||||
void mutable_data_inst::load(BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
|
||||
size_t data_size = 0;
|
||||
ib >> make_data(&data_size, sizeof(size_t));
|
||||
|
||||
if (data_size == 0)
|
||||
return;
|
||||
|
||||
OPENVINO_ASSERT(_outputs[0] != nullptr, "Output memory should be allocated before importing data.");
|
||||
|
||||
allocation_type _allocation_type = _outputs[0]->get_allocation_type();
|
||||
|
||||
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
|
||||
ib >> make_data(_outputs[0]->buffer_ptr(), data_size);
|
||||
} else {
|
||||
std::vector<uint8_t> _buf;
|
||||
_buf.resize(data_size);
|
||||
ib >> make_data(_buf.data(), data_size);
|
||||
_outputs[0]->copy_from(get_network().get_stream(), _buf.data());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -355,167 +355,6 @@ network::network(program::ptr program, uint16_t stream_id)
|
||||
network::network(program::ptr program, stream::ptr stream, uint16_t stream_id)
|
||||
: network(program, program->get_config(), stream, false, stream_id == 0) {}
|
||||
|
||||
network::network(cldnn::BinaryInputBuffer& ib, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id)
|
||||
: network(ib, ExecutionConfig{}, stream, engine, is_primary_stream, local_net_id) {}
|
||||
|
||||
network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, stream::ptr stream, engine& engine, bool is_primary_stream, uint32_t local_net_id)
|
||||
: _program(nullptr)
|
||||
, _config(config)
|
||||
, _engine(engine)
|
||||
, _stream(stream)
|
||||
, _memory_pool(new memory_pool(engine))
|
||||
, _internal(false)
|
||||
, _is_primary_stream(is_primary_stream)
|
||||
, _reset_arguments(true)
|
||||
, _local_net_id(local_net_id)
|
||||
, _shape_predictor(new ShapePredictor(&engine, config.get_property(ov::intel_gpu::buffers_preallocation_ratio))) {
|
||||
net_id = get_unique_net_id();
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) {
|
||||
auto& mem_preallocation_params = debug_config->mem_preallocation_params;
|
||||
_shape_predictor.reset(new ShapePredictor(&engine,
|
||||
mem_preallocation_params.next_iters_preallocation_count,
|
||||
mem_preallocation_params.max_per_iter_size,
|
||||
mem_preallocation_params.max_per_dim_diff,
|
||||
mem_preallocation_params.buffers_preallocation_ratio));
|
||||
}
|
||||
|
||||
kernels_cache kernels_cache(get_engine(), config, 0, nullptr, {""});
|
||||
ib >> kernels_cache;
|
||||
|
||||
int num_data_nodes;
|
||||
ib >> num_data_nodes;
|
||||
|
||||
for (int i = 0; i < num_data_nodes; ++i) {
|
||||
std::string type;
|
||||
std::string _primitive_id;
|
||||
ib >> type >> _primitive_id;
|
||||
std::shared_ptr<cldnn::primitive_inst> new_primitive_inst = prim_map_storage::instance().get_type_id(type)->create_instance(*this);
|
||||
ib >> *new_primitive_inst;
|
||||
_primitives[_primitive_id] = new_primitive_inst;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<primitive_inst>> insts_to_allocate;
|
||||
size_t exec_order_size;
|
||||
ib >> exec_order_size;
|
||||
|
||||
for (size_t i = 0; i < exec_order_size; ++i) {
|
||||
std::string type;
|
||||
ib >> type;
|
||||
std::shared_ptr<cldnn::primitive_inst> new_primitive_inst = prim_map_storage::instance().get_type_id(type)->create_instance(*this);
|
||||
insts_to_allocate.emplace_back(new_primitive_inst);
|
||||
}
|
||||
|
||||
_outputs.clear();
|
||||
_output_chains.clear();
|
||||
|
||||
for (const auto& p_inst : insts_to_allocate) {
|
||||
ib >> *p_inst;
|
||||
_primitives[p_inst->id()] = p_inst;
|
||||
if (p_inst->get_impl() != nullptr)
|
||||
p_inst->init_by_cached_kernels(kernels_cache);
|
||||
}
|
||||
|
||||
std::vector<primitive_id> exec_order_ids;
|
||||
ib >> exec_order_ids;
|
||||
_exec_order.clear();
|
||||
for (auto& exec_order_id : exec_order_ids) {
|
||||
_exec_order.emplace_back(_primitives[exec_order_id]);
|
||||
}
|
||||
|
||||
for (auto& item : _primitives) {
|
||||
auto& p_inst = item.second;
|
||||
if (p_inst->is_input())
|
||||
_inputs.push_back(p_inst);
|
||||
if (p_inst->is_output()) {
|
||||
_outputs.push_back(p_inst);
|
||||
if (p_inst->type() == cldnn::data::type_id())
|
||||
_data_outputs.push_back(p_inst);
|
||||
}
|
||||
if (auto state_prim = std::dynamic_pointer_cast<memory_state::variable>(p_inst)) {
|
||||
set_variables_state_info(state_prim->variable_id(), p_inst->get_output_layout(0));
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& p_inst : _exec_order) {
|
||||
p_inst->rebuild_deps(_primitives);
|
||||
p_inst->rebuild_exec_deps(_primitives);
|
||||
|
||||
if (p_inst->type() == cldnn::concatenation::type_id() && p_inst->can_be_optimized()) {
|
||||
// implicit concat
|
||||
std::list<const std::vector<std::pair<std::shared_ptr<const primitive_inst>, int32_t>>*> stack = {&p_inst->dependencies()};
|
||||
while (!stack.empty()) {
|
||||
auto nodes_list = stack.front();
|
||||
stack.pop_front();
|
||||
|
||||
for (const auto& processed_nodes : *nodes_list) {
|
||||
auto processed_node = processed_nodes.first;
|
||||
auto dep_node = _primitives[processed_node->id()];
|
||||
dep_node->set_output_memory(p_inst->output_memory_ptr(), false);
|
||||
if (processed_node->type() == concatenation::type_id() && processed_node->can_be_optimized()) {
|
||||
if (!processed_node->dependencies().empty())
|
||||
stack.push_back(&processed_node->dependencies());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> reuse_map;
|
||||
ib >> reuse_map;
|
||||
|
||||
for (const auto& reuse_pair : reuse_map) {
|
||||
auto& eltw_inst = _primitives.at(reuse_pair.second);
|
||||
auto& prim_inst = _primitives.at(reuse_pair.first);
|
||||
auto& eltw_mem = eltw_inst->output_memory();
|
||||
auto new_mem = eltw_mem.get_engine()->reinterpret_buffer(eltw_mem, prim_inst->output_memory_ptr()->get_layout());
|
||||
prim_inst->set_output_memory(new_mem);
|
||||
}
|
||||
|
||||
for (auto p_inst : _exec_order) {
|
||||
if (p_inst->can_be_optimized() && !p_inst->is_dynamic()) {
|
||||
p_inst->update_output_memory();
|
||||
}
|
||||
}
|
||||
|
||||
add_default_output_chains();
|
||||
|
||||
size_t prims_info_size;
|
||||
ib >> prims_info_size;
|
||||
|
||||
for (size_t i = 0; i < prims_info_size; i++) {
|
||||
primitive_id original_id;
|
||||
std::string type_id;
|
||||
primitive::primitive_id_arr c_dependencies;
|
||||
primitive::primitive_id_arr c_users;
|
||||
primitive::primitive_id_arr c_fused_ids;
|
||||
layout output_layout;
|
||||
std::string layout_str;
|
||||
std::string kernel_id;
|
||||
data_types runtime_precision;
|
||||
bool is_cpu;
|
||||
int exec_id;
|
||||
|
||||
ib >> original_id;
|
||||
ib >> type_id;
|
||||
ib >> c_dependencies;
|
||||
ib >> c_users;
|
||||
ib >> c_fused_ids;
|
||||
ib >> output_layout;
|
||||
ib >> layout_str;
|
||||
ib >> kernel_id;
|
||||
ib >> make_data(&runtime_precision, sizeof(data_types));
|
||||
ib >> is_cpu;
|
||||
ib >> exec_id;
|
||||
primitive_info prim_info(original_id, type_id, c_dependencies, c_users, c_fused_ids,
|
||||
output_layout, layout_str, kernel_id, runtime_precision, is_cpu, exec_id);
|
||||
_prims_info.emplace_back(prim_info);
|
||||
}
|
||||
|
||||
ib >> _ext_id_mapping;
|
||||
}
|
||||
|
||||
network::~network() {
|
||||
if (_program != nullptr)
|
||||
_program->cancel_compilation_context();
|
||||
@ -526,118 +365,6 @@ network::~network() {
|
||||
}
|
||||
}
|
||||
|
||||
// Cache blob format:
|
||||
// [ cldnn::kernels_cache ]
|
||||
// [ non executable primitive_inst ]
|
||||
// [ executable primitive_inst ]
|
||||
// [ memory reuse information ]
|
||||
void network::save(cldnn::BinaryOutputBuffer& ob) {
|
||||
auto& kernels_cache = _program->get_kernels_cache();
|
||||
kernels_cache.reset();
|
||||
for (const auto& p_inst : _exec_order) {
|
||||
if (p_inst->get_impl() != nullptr) {
|
||||
auto const_impl = static_cast<const primitive_impl*>(p_inst->get_impl());
|
||||
kernels_cache.add_to_cached_kernels(const_impl->get_kernels());
|
||||
}
|
||||
}
|
||||
ob << kernels_cache;
|
||||
|
||||
int num_data_nodes = 0;
|
||||
for (const auto& p_inst : _primitives) {
|
||||
if (p_inst.second->type() == cldnn::data::type_id() ||
|
||||
(p_inst.second->type() == cldnn::mutable_data::type_id() && p_inst.second->get_impl() == nullptr)) {
|
||||
num_data_nodes += 1;
|
||||
}
|
||||
}
|
||||
ob << num_data_nodes;
|
||||
|
||||
for (const auto& p_inst : _primitives) {
|
||||
if (p_inst.second->type() == cldnn::data::type_id() ||
|
||||
(p_inst.second->type() == cldnn::mutable_data::type_id() && p_inst.second->get_impl() == nullptr)) {
|
||||
ob << p_inst.second->get_node().get_primitive()->type_string();
|
||||
ob << p_inst.second->id();
|
||||
ob << *(p_inst.second);
|
||||
}
|
||||
}
|
||||
|
||||
size_t exec_order_size = _exec_order.size();
|
||||
ob << exec_order_size;
|
||||
|
||||
std::unordered_map<primitive_id, size_t> exec_order_num;
|
||||
size_t i = exec_order_size;
|
||||
for (const auto& p_inst : _exec_order) {
|
||||
exec_order_num[p_inst->id()] = --i;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<primitive_inst>> insts_to_allocate(_exec_order.begin(), _exec_order.end());
|
||||
std::sort(insts_to_allocate.begin(),
|
||||
insts_to_allocate.end(),
|
||||
[&exec_order_num, &exec_order_size](std::shared_ptr<primitive_inst> const& lhs, std::shared_ptr<primitive_inst> const& rhs) {
|
||||
size_t lhs_size = (lhs->mem_allocated()) ? (lhs->get_output_layout().bytes_count() + exec_order_size) : exec_order_num[lhs->id()];
|
||||
size_t rhs_size = (rhs->mem_allocated()) ? (rhs->get_output_layout().bytes_count() + exec_order_size) : exec_order_num[rhs->id()];
|
||||
return (lhs_size > rhs_size);
|
||||
});
|
||||
|
||||
for (const auto& p_inst : insts_to_allocate) {
|
||||
ob << p_inst->get_node().get_primitive()->type_string();
|
||||
}
|
||||
|
||||
for (const auto& p_inst : insts_to_allocate) {
|
||||
ob << *p_inst;
|
||||
}
|
||||
|
||||
std::vector<primitive_id> exec_order_ids;
|
||||
for (const auto& p_inst : _exec_order) {
|
||||
exec_order_ids.emplace_back(p_inst->id());
|
||||
}
|
||||
ob << exec_order_ids;
|
||||
|
||||
std::map<std::string, std::string> reuse_map;
|
||||
|
||||
auto& po = _program->get_processing_order();
|
||||
for (auto const& node : po) {
|
||||
if (node->get_preferred_impl_type() == impl_types::onednn) {
|
||||
size_t eltw_dep = 0;
|
||||
for (auto& fused_op : node->get_fused_primitives()) {
|
||||
if (fused_op.is_type<eltwise>() && fused_op.deps.size() == 1) {
|
||||
// If it is first sum, reuse the buffer
|
||||
auto fusing_type = onednn_add_fusing_helpers::get_add_fusing_type(*node, fused_op);
|
||||
if (fusing_type != add_fusing_type::sum || eltw_dep != 0)
|
||||
continue;
|
||||
if (!fused_op.has_outer_dep())
|
||||
continue;
|
||||
eltw_dep = fused_op.outer_dep_start_idx;
|
||||
auto& eltw_in = node->get_dependency(eltw_dep);
|
||||
if (_primitives.find(eltw_in.id()) != _primitives.end() && _primitives.find(node->id()) != _primitives.end()) {
|
||||
reuse_map[node->id()] = eltw_in.id();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ob << reuse_map;
|
||||
|
||||
auto& prims_info = get_primitives_info();
|
||||
ob << prims_info.size();
|
||||
for (auto& prim_info : prims_info) {
|
||||
ob << prim_info.original_id;
|
||||
ob << prim_info.type_id;
|
||||
ob << prim_info.c_dependencies;
|
||||
ob << prim_info.c_users;
|
||||
ob << prim_info.c_fused_ids;
|
||||
ob << prim_info.output_layout;
|
||||
ob << prim_info.layout_str;
|
||||
ob << prim_info.kernel_id;
|
||||
ob << make_data(&prim_info.runtime_precision, sizeof(data_types));
|
||||
ob << prim_info.is_cpu;
|
||||
ob << prim_info.exec_id;
|
||||
}
|
||||
|
||||
ob << get_ext_id_mapping();
|
||||
kernels_cache.reset();
|
||||
}
|
||||
|
||||
network::ptr network::allocate_network(stream::ptr stream, program::ptr program, bool is_internal, bool is_primary_stream) {
|
||||
return std::make_shared<network>(program, program->get_config(), stream, is_internal, is_primary_stream);
|
||||
}
|
||||
|
@ -89,4 +89,29 @@ bool program::nodes_ordering::is_correct(program_node* node) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void program::nodes_ordering::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
ob << _processing_order.size();
|
||||
auto itr = rbegin();
|
||||
while (itr != rend()) {
|
||||
auto& node = *itr;
|
||||
ob << node->id();
|
||||
itr++;
|
||||
}
|
||||
}
|
||||
|
||||
void program::nodes_ordering::load(cldnn::BinaryInputBuffer& ib, program& p) {
|
||||
size_t num_nodes;
|
||||
ib >> num_nodes;
|
||||
|
||||
clear();
|
||||
for (size_t i = 0; i < num_nodes; ++i) {
|
||||
primitive_id node_id;
|
||||
ib >> node_id;
|
||||
|
||||
auto node = p.get_node_ptr(node_id).get();
|
||||
_processing_order.push_front(node);
|
||||
processing_order_iterators[node] = _processing_order.begin();
|
||||
}
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -1666,245 +1666,4 @@ std::string primitive_inst::get_implementation_name() const {
|
||||
return "undef";
|
||||
}
|
||||
|
||||
static primitive_id find_dep_by_mem(const cldnn::primitive_inst* p_inst, memory& mem_ptr, int max_dist = 5) {
|
||||
std::vector<std::pair<primitive_id, int>> queue;
|
||||
size_t head = 0;
|
||||
|
||||
for (auto& p_inst : p_inst->dependencies())
|
||||
queue.emplace_back(std::make_pair(p_inst.first->id(), 0));
|
||||
|
||||
const network& const_network = p_inst->get_network();
|
||||
while (head < queue.size()) {
|
||||
auto curr_item = queue.at(head);
|
||||
auto curr_prim = const_network.get_primitive(curr_item.first);
|
||||
|
||||
if (p_inst->get_network().get_engine().is_the_same_buffer(mem_ptr, curr_prim->output_memory()))
|
||||
return curr_prim->id();
|
||||
|
||||
if (max_dist > curr_item.second)
|
||||
for (auto& p_inst : curr_prim->dependencies())
|
||||
queue.emplace_back(std::make_pair(p_inst.first->id(), curr_item.second+1));
|
||||
|
||||
head += 1;
|
||||
}
|
||||
|
||||
return "NOT_FOUND";
|
||||
}
|
||||
|
||||
// Cache blob format:
|
||||
// [ kernel_impl_params ]
|
||||
// [ primitive_impl ]
|
||||
// [ member variables of primitive_inst ]
|
||||
// [ output memory information ]
|
||||
// [ memory dependency information ]
|
||||
// [ execution dependency information ]
|
||||
// [ intermediate memory information ]
|
||||
void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
_impl_params->save(ob);
|
||||
ob.setKernelImplParams(_impl_params.get());
|
||||
|
||||
ob << _node_output_layout;
|
||||
ob << has_mutable_input();
|
||||
ob << mem_allocated();
|
||||
ob << is_dynamic();
|
||||
ob << _node->get_primitive()->type_string();
|
||||
ob << id();
|
||||
ob << org_id();
|
||||
ob << is_input();
|
||||
ob << is_output();
|
||||
ob << inputs_memory_count();
|
||||
ob << outputs_memory_count();
|
||||
ob << get_fused_mem_count();
|
||||
ob << get_fused_mem_offset();
|
||||
ob << can_be_optimized();
|
||||
ob << can_share_buffer();
|
||||
ob << is_constant();
|
||||
ob << needs_completion_event();
|
||||
|
||||
if (type() == cldnn::data::type_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
ob << _outputs.size();
|
||||
for (size_t i = 0; i < _outputs.size(); ++i) {
|
||||
if (_outputs[i] == nullptr) {
|
||||
ob << true;
|
||||
} else {
|
||||
ob << false;
|
||||
ob << _outputs[i]->get_layout();
|
||||
const auto _allocation_type = _outputs[i]->get_allocation_type();
|
||||
ob << make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
}
|
||||
}
|
||||
|
||||
bool can_reuse_memory = true;
|
||||
if (user_requesting_mem_reuse_false(*_node)) {
|
||||
can_reuse_memory = false;
|
||||
}
|
||||
ob << can_reuse_memory;
|
||||
|
||||
ob << _node->get_memory_dependencies();
|
||||
|
||||
ob << _deps.size();
|
||||
for (const auto& dep : _deps) {
|
||||
ob << dep.first->id();
|
||||
ob << dep.second;
|
||||
}
|
||||
|
||||
ob << _exec_deps.size();
|
||||
for (const auto& dep : _exec_deps) {
|
||||
ob << dep->id();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < _outputs.size(); ++i) {
|
||||
if (_outputs[i] != nullptr) {
|
||||
if (!mem_allocated())
|
||||
ob << find_dep_by_mem(this, output_memory(i));
|
||||
}
|
||||
}
|
||||
|
||||
ob << _intermediates_memory.size();
|
||||
for (const auto& ibuf : _intermediates_memory) {
|
||||
ob << ibuf->get_layout();
|
||||
const auto _allocation_type = ibuf->get_allocation_type();
|
||||
ob << make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
}
|
||||
|
||||
if (_impl != nullptr) {
|
||||
ob << true;
|
||||
_impl->set_cached_kernel_ids(_network.get_program()->get_kernels_cache());
|
||||
ob << _impl;
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const {
|
||||
for (uint32_t idx = 0; idx < _deps.size(); ++idx) {
|
||||
if (arg == dep_memory_ptr(idx))
|
||||
return idx;
|
||||
}
|
||||
|
||||
OPENVINO_THROW("[get_index_in_deps]: not found in _deps");
|
||||
}
|
||||
|
||||
void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
_impl_params->load(ib);
|
||||
ib.setKernelImplParams(_impl_params.get());
|
||||
|
||||
ib >> _node_output_layout;
|
||||
ib >> _has_mutable_input;
|
||||
ib >> _mem_allocated;
|
||||
ib >> _is_dynamic;
|
||||
std::string type_str;
|
||||
ib >> type_str;
|
||||
_type = cldnn::prim_map_storage::instance().get_type_id(type_str);
|
||||
ib >> _id;
|
||||
ib >> _org_id;
|
||||
ib >> _is_input;
|
||||
ib >> _is_output;
|
||||
ib >> _inputs_memory_count;
|
||||
ib >> _outputs_memory_count;
|
||||
ib >> _fused_mem_count;
|
||||
ib >> _fused_mem_offset;
|
||||
ib >> _can_be_optimized;
|
||||
ib >> _can_share_buffer;
|
||||
ib >> _is_constant;
|
||||
ib >> _needs_completion_event;
|
||||
|
||||
if (type() == cldnn::data::type_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// mem_allocated : it is true if the output memory is allocated by this layer, and
|
||||
// false if this layer reuses output memory that is allocated by other layer.
|
||||
// is_output_null : it is true if the output memory is not allocated yet and false otherwise.
|
||||
size_t num_outputs;
|
||||
std::vector<bool> is_output_null;
|
||||
std::vector<layout> output_layouts;
|
||||
std::vector<allocation_type> allocation_types;
|
||||
|
||||
ib >> num_outputs;
|
||||
is_output_null.resize(num_outputs);
|
||||
for (size_t i = 0; i < num_outputs; ++i) {
|
||||
bool is_null;
|
||||
ib >> is_null;
|
||||
is_output_null[i] = is_null;
|
||||
if (!is_null) {
|
||||
layout output_layout = layout();
|
||||
ib >> output_layout;
|
||||
output_layouts.emplace_back(output_layout);
|
||||
|
||||
allocation_type _allocation_type = allocation_type::unknown;
|
||||
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
allocation_types.emplace_back(_allocation_type);
|
||||
}
|
||||
}
|
||||
|
||||
bool can_reuse_memory;
|
||||
ib >> can_reuse_memory;
|
||||
|
||||
std::set<primitive_id> _node_mem_deps;
|
||||
ib >> _node_mem_deps;
|
||||
_runtime_memory_dependencies = _node_mem_deps;
|
||||
|
||||
size_t vector_size = 0UL;
|
||||
ib >> vector_size;
|
||||
for (size_t i = 0; i < vector_size; ++i) {
|
||||
primitive_id dep_id;
|
||||
int32_t dep_idx;
|
||||
ib >> dep_id >> dep_idx;
|
||||
_dep_ids.emplace_back(std::pair<primitive_id, int32_t>(dep_id, dep_idx));
|
||||
}
|
||||
|
||||
ib >> vector_size;
|
||||
_exec_dep_ids.resize(vector_size);
|
||||
for (auto& el : _exec_dep_ids) {
|
||||
ib >> el;
|
||||
}
|
||||
|
||||
_outputs.resize(num_outputs);
|
||||
for (size_t i = 0; i < num_outputs; ++i) {
|
||||
_outputs[i] = nullptr;
|
||||
if (!is_output_null[i]) {
|
||||
if (!_mem_allocated) {
|
||||
std::string dep_id;
|
||||
ib >> dep_id;
|
||||
if (dep_id.compare("NOT_FOUND") != 0 && get_network().get_primitive(dep_id)->output_memory_ptr() != nullptr) {
|
||||
_outputs[i] = get_network().get_engine().reinterpret_buffer(get_network().get_primitive(dep_id)->output_memory(), output_layouts[i]);
|
||||
} else if (type() == cldnn::mutable_data::type_id()) {
|
||||
_outputs[i] = get_network().get_engine().allocate_memory(output_layouts[i], allocation_types[i]);
|
||||
}
|
||||
} else {
|
||||
if ((!can_share_buffer()) || can_be_optimized() || is_output()) {
|
||||
_outputs[i] = get_network().get_engine().allocate_memory(output_layouts[i], allocation_types[i]);
|
||||
} else {
|
||||
_outputs[i] = get_network().get_memory_pool().get_memory(output_layouts[i], id(), get_network_id(), _node_mem_deps,
|
||||
allocation_types[i], can_reuse_memory);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_output_changed = false;
|
||||
|
||||
ib >> vector_size;
|
||||
_intermediates_memory.resize(vector_size);
|
||||
for (size_t i = 0; i < vector_size; i++) {
|
||||
layout ibuf_layout = layout();
|
||||
ib >> ibuf_layout;
|
||||
allocation_type _allocation_type;
|
||||
ib >> make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
|
||||
_intermediates_memory[i] = get_network().get_memory_pool().get_memory(ibuf_layout, id(), get_network_id(),
|
||||
_node_mem_deps, _allocation_type, true, true);
|
||||
}
|
||||
|
||||
bool has_impl;
|
||||
ib >> has_impl;
|
||||
if (has_impl) {
|
||||
_impl.reset();
|
||||
ib >> _impl;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -1683,3 +1683,245 @@ void program::cancel_compilation_context() {
|
||||
if (_compilation_context != nullptr)
|
||||
_compilation_context->cancel();
|
||||
}
|
||||
|
||||
void program::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
std::map<cldnn::memory::ptr, std::vector<const cldnn::program_node*>> mutable_datas_ptrs;
|
||||
ob << nodes_map.size();
|
||||
for (auto& node : nodes_map) {
|
||||
ob.setKernelImplParams(node.second->get_kernel_impl_params().get());
|
||||
|
||||
if (node.second->is_type<data>() && node.second->as<data>().get_primitive()->mem == nullptr) {
|
||||
auto& data_node = node.second->as<data>();
|
||||
if (data_node.get_attached_memory_ptr() == nullptr) {
|
||||
ob << false;
|
||||
continue;
|
||||
} else {
|
||||
node.second->as<data>().typed_desc()->mem = data_node.get_attached_memory_ptr();
|
||||
}
|
||||
}
|
||||
ob << true;
|
||||
|
||||
ob << node.second->desc;
|
||||
|
||||
if (node.second->is_type<mutable_data>()) {
|
||||
mutable_datas_ptrs[node.second->as<mutable_data>().get_attached_memory_ptr()].push_back(node.second.get());
|
||||
}
|
||||
}
|
||||
|
||||
std::list<std::pair<primitive_id, primitive_id>> output_sharing_mutable_datas;
|
||||
for (auto item : mutable_datas_ptrs) {
|
||||
if (item.second.size() != 2)
|
||||
continue;
|
||||
|
||||
output_sharing_mutable_datas.push_back({item.second[0]->id(), item.second[1]->id()});
|
||||
}
|
||||
|
||||
ob << output_sharing_mutable_datas.size();
|
||||
for (auto& shared_mem_pair : output_sharing_mutable_datas) {
|
||||
ob << shared_mem_pair.first;
|
||||
ob << shared_mem_pair.second;
|
||||
}
|
||||
|
||||
for (auto& node : nodes_map) {
|
||||
ob << node.first;
|
||||
node.second->save(ob);
|
||||
ob << node.second->get_dependant_shape_of_nodes().size();
|
||||
for (auto& dep_node : node.second->get_dependant_shape_of_nodes()) {
|
||||
ob << dep_node->id();
|
||||
}
|
||||
}
|
||||
|
||||
ob << inputs.size();
|
||||
for (auto& input : inputs) {
|
||||
ob << input->id();
|
||||
}
|
||||
|
||||
ob << outputs.size();
|
||||
for (auto& output : outputs) {
|
||||
ob << output->id();
|
||||
}
|
||||
|
||||
ob << _is_body_program;
|
||||
ob << _can_be_optimized;
|
||||
get_processing_order().save(ob);
|
||||
|
||||
{
|
||||
auto& kernels_cache = get_kernels_cache();
|
||||
std::vector<primitive_id> impl_ids;
|
||||
for (auto& node : get_processing_order()) {
|
||||
if (node->get_selected_impl() != nullptr) {
|
||||
impl_ids.emplace_back(node->id());
|
||||
kernels_cache.add_to_cached_kernels(node->get_selected_impl()->get_kernels());
|
||||
}
|
||||
}
|
||||
ob << kernels_cache;
|
||||
ob << impl_ids;
|
||||
for (auto& impl_id : impl_ids) {
|
||||
if (get_node_ptr(impl_id)->get_selected_impl()->is_onednn()) {
|
||||
ob << true;
|
||||
auto params = get_node_ptr(impl_id)->get_kernel_impl_params();
|
||||
ob.setKernelImplParams(params.get());
|
||||
ob << get_node_ptr(impl_id)->selected_impl;
|
||||
} else {
|
||||
ob << false;
|
||||
ob << get_node_ptr(impl_id)->selected_impl;
|
||||
}
|
||||
ob << get_node_ptr(impl_id)->get_selected_impl()->get_cached_kernel_ids(kernels_cache);
|
||||
}
|
||||
}
|
||||
|
||||
ob << optimized_out.size();
|
||||
for (auto& opt_prim : optimized_out) {
|
||||
ob << opt_prim;
|
||||
}
|
||||
|
||||
ob << prim_info.size();
|
||||
for (auto& p_info : prim_info) {
|
||||
ob << p_info.original_id;
|
||||
ob << p_info.type_id;
|
||||
ob << p_info.c_dependencies;
|
||||
ob << p_info.c_users;
|
||||
ob << p_info.c_fused_ids;
|
||||
ob << p_info.output_layout;
|
||||
ob << p_info.layout_str;
|
||||
ob << p_info.kernel_id;
|
||||
ob << make_data(&p_info.runtime_precision, sizeof(data_types));
|
||||
ob << p_info.is_cpu;
|
||||
ob << p_info.exec_id;
|
||||
}
|
||||
}
|
||||
|
||||
void program::load(cldnn::BinaryInputBuffer& ib) {
|
||||
init_program();
|
||||
|
||||
size_t num_nodes;
|
||||
ib >> num_nodes;
|
||||
bool is_valid_data_node;
|
||||
for (size_t i = 0; i < num_nodes; ++i) {
|
||||
ib >> is_valid_data_node;
|
||||
if (!is_valid_data_node)
|
||||
continue;
|
||||
|
||||
std::shared_ptr<cldnn::primitive> prim;
|
||||
ib >> prim;
|
||||
get_or_create(prim);
|
||||
}
|
||||
|
||||
size_t num_output_sharing_mutable_datas;
|
||||
ib >> num_output_sharing_mutable_datas;
|
||||
for (size_t i = 0; i < num_output_sharing_mutable_datas; ++i) {
|
||||
primitive_id md_id1, md_id2;
|
||||
ib >> md_id1;
|
||||
ib >> md_id2;
|
||||
|
||||
auto& md_node1 = get_node(md_id1).as<mutable_data>();
|
||||
auto& md_node2 = get_node(md_id2).as<mutable_data>();
|
||||
|
||||
md_node2.typed_desc()->mem = md_node1.typed_desc()->mem;
|
||||
md_node2.replace_memory(md_node2.typed_desc()->mem);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_nodes; ++i) {
|
||||
primitive_id prim_id;
|
||||
ib >> prim_id;
|
||||
auto& p_node = get_node(prim_id);
|
||||
p_node.load(ib);
|
||||
size_t num_dep_nodes;
|
||||
ib >> num_dep_nodes;
|
||||
for (size_t i = 0; i < num_dep_nodes; ++i) {
|
||||
ib >> prim_id;
|
||||
auto& dep_node = get_node(prim_id);
|
||||
p_node.add_dependant_shape_of_node(&dep_node);
|
||||
}
|
||||
}
|
||||
|
||||
ib >> num_nodes;
|
||||
inputs.clear();
|
||||
for (size_t i = 0; i < num_nodes; ++i) {
|
||||
primitive_id prim_id;
|
||||
ib >> prim_id;
|
||||
auto& p_node = get_node(prim_id);
|
||||
inputs.emplace_back(&p_node);
|
||||
}
|
||||
|
||||
ib >> num_nodes;
|
||||
outputs.clear();
|
||||
for (size_t i = 0; i < num_nodes; ++i) {
|
||||
primitive_id prim_id;
|
||||
ib >> prim_id;
|
||||
auto& p_node = get_node(prim_id);
|
||||
outputs.emplace_back(&p_node);
|
||||
}
|
||||
|
||||
ib >> _is_body_program;
|
||||
ib >> _can_be_optimized;
|
||||
|
||||
get_processing_order().load(ib, *this);
|
||||
|
||||
{
|
||||
auto& kernels_cache = get_kernels_cache();
|
||||
ib >> kernels_cache;
|
||||
|
||||
std::vector<primitive_id> impl_ids;
|
||||
ib >> impl_ids;
|
||||
|
||||
for (auto& impl_id : impl_ids) {
|
||||
auto& p_node = get_node(impl_id);
|
||||
|
||||
bool is_onednn;
|
||||
ib >> is_onednn;
|
||||
if (is_onednn) {
|
||||
auto params = p_node.get_kernel_impl_params();
|
||||
ib.setKernelImplParams(params.get());
|
||||
ib >> p_node.selected_impl;
|
||||
} else {
|
||||
ib >> p_node.selected_impl;
|
||||
}
|
||||
|
||||
std::vector<std::string> cached_kernel_ids;
|
||||
ib >> cached_kernel_ids;
|
||||
p_node.selected_impl->init_by_cached_kernels(get_kernels_cache(), cached_kernel_ids);
|
||||
}
|
||||
}
|
||||
|
||||
size_t optimized_out_size;
|
||||
ib >> optimized_out_size;
|
||||
optimized_out.clear();
|
||||
for (size_t i = 0; i < optimized_out_size; i++) {
|
||||
primitive_id opt_prim;
|
||||
ib >> opt_prim;
|
||||
optimized_out.emplace_back(opt_prim);
|
||||
}
|
||||
|
||||
size_t prims_info_size;
|
||||
ib >> prims_info_size;
|
||||
prim_info.clear();
|
||||
for (size_t i = 0; i < prims_info_size; i++) {
|
||||
primitive_id original_id;
|
||||
std::string type_id;
|
||||
primitive::primitive_id_arr c_dependencies;
|
||||
primitive::primitive_id_arr c_users;
|
||||
primitive::primitive_id_arr c_fused_ids;
|
||||
layout output_layout;
|
||||
std::string layout_str;
|
||||
std::string kernel_id;
|
||||
data_types runtime_precision;
|
||||
bool is_cpu;
|
||||
int exec_id;
|
||||
|
||||
ib >> original_id;
|
||||
ib >> type_id;
|
||||
ib >> c_dependencies;
|
||||
ib >> c_users;
|
||||
ib >> c_fused_ids;
|
||||
ib >> output_layout;
|
||||
ib >> layout_str;
|
||||
ib >> kernel_id;
|
||||
ib >> make_data(&runtime_precision, sizeof(data_types));
|
||||
ib >> is_cpu;
|
||||
ib >> exec_id;
|
||||
primitive_info p_info(original_id, type_id, c_dependencies, c_users, c_fused_ids,
|
||||
output_layout, layout_str, kernel_id, runtime_precision, is_cpu, exec_id);
|
||||
prim_info.emplace_back(p_info);
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,9 @@
|
||||
#include "primitive_inst.h"
|
||||
#include "loop_inst.h"
|
||||
#include "shape_of_inst.h"
|
||||
#include "activation_inst.h"
|
||||
#include "reorder_inst.h"
|
||||
#include "quantize_inst.h"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
#include "convolution_inst.h"
|
||||
@ -534,6 +537,356 @@ void program_node::add_dependant_shape_of_node(const program_node* node) {
|
||||
dependant_shape_of_nodes.insert(node);
|
||||
}
|
||||
|
||||
void program_node::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
ob << valid_output_layouts;
|
||||
ob << output_layouts;
|
||||
|
||||
ob << preferred_input_fmts.size();
|
||||
for (auto preferred_input_fmt : preferred_input_fmts) {
|
||||
int32_t format_type_int = preferred_input_fmt;
|
||||
ob << format_type_int;
|
||||
}
|
||||
|
||||
ob << preferred_output_fmts.size();
|
||||
for (auto preferred_output_fmt : preferred_output_fmts) {
|
||||
int32_t format_type_int = preferred_output_fmt;
|
||||
ob << format_type_int;
|
||||
}
|
||||
|
||||
ob << dependencies.size();
|
||||
for (const auto& dep_pair : dependencies) {
|
||||
ob << dep_pair.first->id();
|
||||
ob << dep_pair.second;
|
||||
}
|
||||
|
||||
ob << users.size();
|
||||
for (const auto& user_node : users) {
|
||||
ob << user_node->id();
|
||||
}
|
||||
|
||||
ob << memory_dependencies;
|
||||
|
||||
ob << make_data(&impl_type, sizeof(impl_type));
|
||||
ob << constant;
|
||||
ob << data_flow;
|
||||
ob << in_shape_of_subgraph;
|
||||
|
||||
ob << output;
|
||||
ob << user_mark;
|
||||
ob << optimized;
|
||||
ob << share_buffer;
|
||||
for (const auto& _support_padding : _support_padding_in_axis) {
|
||||
ob << _support_padding;
|
||||
}
|
||||
|
||||
ob << has_reused_memory;
|
||||
ob << reused_memory_color;
|
||||
|
||||
// fused_prims;
|
||||
{
|
||||
ob << fused_prims.size();
|
||||
for (auto& f_desc : fused_prims) {
|
||||
if (get_program().has_node(f_desc.desc->id)) {
|
||||
ob << true;
|
||||
ob << f_desc.desc->id;
|
||||
} else {
|
||||
ob << false;
|
||||
ob << f_desc.desc;
|
||||
}
|
||||
ob << f_desc.input_layout;
|
||||
ob << f_desc.output_layout;
|
||||
ob << cldnn::prim_map_storage::instance().get_type_string(f_desc.f_param->type());
|
||||
if (f_desc.f_param->type() == activation::type_id()) {
|
||||
auto casted = std::dynamic_pointer_cast<ActivationFuseParams>(f_desc.f_param);
|
||||
if (get_program().has_node(casted->_desc->id)) {
|
||||
ob << true;
|
||||
ob << casted->_desc->id;
|
||||
} else {
|
||||
ob << false;
|
||||
ob << casted->_desc;
|
||||
}
|
||||
} else if (f_desc.f_param->type() == reorder::type_id()) {
|
||||
auto casted = std::dynamic_pointer_cast<ReorderFuseParams>(f_desc.f_param);
|
||||
ob << casted->_in;
|
||||
ob << casted->_out;
|
||||
} else if (f_desc.f_param->type() == eltwise::type_id()) {
|
||||
auto casted = std::dynamic_pointer_cast<EltwiseFuseParams>(f_desc.f_param);
|
||||
if (get_program().has_node(casted->_desc->id)) {
|
||||
ob << true;
|
||||
ob << casted->_desc->id;
|
||||
} else {
|
||||
ob << false;
|
||||
ob << casted->_desc;
|
||||
}
|
||||
} else if (f_desc.f_param->type() == quantize::type_id()) {
|
||||
auto casted = std::dynamic_pointer_cast<QuantizeFuseParams>(f_desc.f_param);
|
||||
ob << casted->_out_layout;
|
||||
ob << casted->_scale_shift_opt;
|
||||
ob << casted->_need_post_scale;
|
||||
ob << casted->_need_post_shift;
|
||||
ob << casted->_need_pre_shift;
|
||||
ob << casted->_need_clamp;
|
||||
ob << casted->_need_min_clamp;
|
||||
ob << casted->_need_max_clamp;
|
||||
ob << casted->_per_tensor_input_range;
|
||||
ob << casted->_per_tensor_input_scale;
|
||||
ob << casted->_per_tensor_input_shift;
|
||||
ob << casted->_per_tensor_output_range;
|
||||
ob << casted->_per_tensor_output_scale;
|
||||
ob << casted->_per_tensor_output_shift;
|
||||
ob << casted->_in_lo;
|
||||
ob << casted->_in_hi;
|
||||
ob << casted->_in_scale;
|
||||
ob << casted->_in_shift;
|
||||
ob << casted->_out_lo;
|
||||
ob << casted->_out_hi;
|
||||
ob << casted->_out_scale;
|
||||
ob << casted->_out_shift;
|
||||
}
|
||||
|
||||
ob << f_desc.deps.size();
|
||||
for (auto& dep : f_desc.deps) {
|
||||
ob << dep.first;
|
||||
ob << dep.second;
|
||||
}
|
||||
ob << f_desc.fused_deps.size();
|
||||
for (auto& f_dep : f_desc.fused_deps) {
|
||||
ob << f_dep.first;
|
||||
ob << f_dep.second;
|
||||
}
|
||||
ob << f_desc.outer_dep_start_idx;
|
||||
ob << f_desc.total_num_deps;
|
||||
}
|
||||
}
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
size_t num_fused_prims = fused_prims_onednn.size();
|
||||
ob << num_fused_prims;
|
||||
for (auto fused_prim : fused_prims_onednn) {
|
||||
ob << make_data(&fused_prim.op_type, sizeof(onednn_post_op_type));
|
||||
ob << fused_prim.mem_offset;
|
||||
ob << fused_prim.mem_dep;
|
||||
ob << make_data(&fused_prim.tag, sizeof(dnnl::memory::format_tag));
|
||||
ob << fused_prim.flatten;
|
||||
ob << fused_prim.dims;
|
||||
ob << make_data(&fused_prim.dt, sizeof(dnnl::memory::data_type));
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
}
|
||||
|
||||
void program_node::load(cldnn::BinaryInputBuffer& ib) {
|
||||
ib >> valid_output_layouts;
|
||||
ib >> output_layouts;
|
||||
|
||||
{
|
||||
// preferred_input_fmts
|
||||
size_t preferred_input_fmts_size;
|
||||
int32_t format_type_int;
|
||||
ib >> preferred_input_fmts_size;
|
||||
preferred_input_fmts.clear();
|
||||
for (size_t i = 0; i < preferred_input_fmts_size; ++i) {
|
||||
ib >> format_type_int;
|
||||
preferred_input_fmts.push_back((format::type) format_type_int);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// preferred_input_fmts
|
||||
size_t preferred_output_fmts_size;
|
||||
int32_t format_type_int;
|
||||
ib >> preferred_output_fmts_size;
|
||||
preferred_output_fmts.clear();
|
||||
for (size_t i = 0; i < preferred_output_fmts_size; ++i) {
|
||||
ib >> format_type_int;
|
||||
preferred_output_fmts.push_back((format::type) format_type_int);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// dependencies
|
||||
size_t deps_size;
|
||||
primitive_id dep_id;
|
||||
int32_t dep_idx;
|
||||
ib >> deps_size;
|
||||
dependencies.clear();
|
||||
for (size_t i = 0; i < deps_size; ++i) {
|
||||
ib >> dep_id;
|
||||
ib >> dep_idx;
|
||||
dependencies.emplace_back(std::make_pair(get_program().get_node_ptr(dep_id).get(), dep_idx));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// users
|
||||
size_t users_size;
|
||||
primitive_id user_id;
|
||||
ib >> users_size;
|
||||
users.clear();
|
||||
users.resize(0);
|
||||
for (size_t i = 0; i < users_size; ++i) {
|
||||
ib >> user_id;
|
||||
users.push_back(get_program().get_node_ptr(user_id).get());
|
||||
}
|
||||
}
|
||||
|
||||
ib >> memory_dependencies;
|
||||
|
||||
ib >> make_data(&impl_type, sizeof(impl_type));
|
||||
ib >> constant;
|
||||
ib >> data_flow;
|
||||
ib >> in_shape_of_subgraph;
|
||||
|
||||
ib >> output;
|
||||
ib >> user_mark;
|
||||
ib >> optimized;
|
||||
ib >> share_buffer;
|
||||
for (auto& _support_padding : _support_padding_in_axis) {
|
||||
ib >> _support_padding;
|
||||
}
|
||||
ib >> has_reused_memory;
|
||||
ib >> reused_memory_color;
|
||||
|
||||
// fused_prims;
|
||||
{
|
||||
size_t fused_desc_size;
|
||||
ib >> fused_desc_size;
|
||||
for (size_t i = 0; i < fused_desc_size; ++i) {
|
||||
bool exist_prim;
|
||||
ib >> exist_prim;
|
||||
std::shared_ptr<const primitive> desc;
|
||||
if (exist_prim) {
|
||||
primitive_id desc_id;
|
||||
ib >> desc_id;
|
||||
desc = get_program().get_node_ptr(desc_id)->desc;
|
||||
} else {
|
||||
ib >> desc;
|
||||
}
|
||||
auto f_desc = fused_primitive_desc(desc);
|
||||
ib >> f_desc.input_layout;
|
||||
ib >> f_desc.output_layout;
|
||||
|
||||
std::string f_param_type_str;
|
||||
ib >> f_param_type_str;
|
||||
auto f_param_type = cldnn::prim_map_storage::instance().get_type_id(f_param_type_str);
|
||||
if (f_param_type == activation::type_id()) {
|
||||
ib >> exist_prim;
|
||||
std::shared_ptr<activation> param_desc;
|
||||
if (exist_prim) {
|
||||
primitive_id desc_id;
|
||||
ib >> desc_id;
|
||||
param_desc = std::dynamic_pointer_cast<activation>(get_program().get_node_ptr(desc_id)->desc);
|
||||
} else {
|
||||
ib >> param_desc;
|
||||
}
|
||||
f_desc.f_param = std::make_shared<ActivationFuseParams>(param_desc);
|
||||
} else if (f_param_type == reorder::type_id()) {
|
||||
layout in, out;
|
||||
ib >> in;
|
||||
ib >> out;
|
||||
f_desc.f_param = std::make_shared<ReorderFuseParams>(in, out);
|
||||
} else if (f_param_type == eltwise::type_id()) {
|
||||
ib >> exist_prim;
|
||||
std::shared_ptr<eltwise> param_desc;
|
||||
if (exist_prim) {
|
||||
primitive_id desc_id;
|
||||
ib >> desc_id;
|
||||
param_desc = std::dynamic_pointer_cast<eltwise>(get_program().get_node_ptr(desc_id)->desc);
|
||||
} else {
|
||||
ib >> param_desc;
|
||||
}
|
||||
f_desc.f_param = std::make_shared<EltwiseFuseParams>(param_desc);
|
||||
} else if (f_param_type == quantize::type_id()) {
|
||||
layout out_layout;
|
||||
bool scale_shift_opt;
|
||||
bool need_post_scale;
|
||||
bool need_post_shift;
|
||||
bool need_pre_shift;
|
||||
bool need_clamp;
|
||||
bool need_min_clamp;
|
||||
bool need_max_clamp;
|
||||
bool per_tensor_input_range;
|
||||
bool per_tensor_input_scale;
|
||||
bool per_tensor_input_shift;
|
||||
bool per_tensor_output_range;
|
||||
bool per_tensor_output_scale;
|
||||
bool per_tensor_output_shift;
|
||||
float in_lo;
|
||||
float in_hi;
|
||||
float in_scale;
|
||||
float in_shift;
|
||||
float out_lo;
|
||||
float out_hi;
|
||||
float out_scale;
|
||||
float out_shift;
|
||||
|
||||
ib >> out_layout;
|
||||
ib >> scale_shift_opt;
|
||||
ib >> need_post_scale;
|
||||
ib >> need_post_shift;
|
||||
ib >> need_pre_shift;
|
||||
ib >> need_clamp;
|
||||
ib >> need_min_clamp;
|
||||
ib >> need_max_clamp;
|
||||
ib >> per_tensor_input_range;
|
||||
ib >> per_tensor_input_scale;
|
||||
ib >> per_tensor_input_shift;
|
||||
ib >> per_tensor_output_range;
|
||||
ib >> per_tensor_output_scale;
|
||||
ib >> per_tensor_output_shift;
|
||||
ib >> in_lo;
|
||||
ib >> in_hi;
|
||||
ib >> in_scale;
|
||||
ib >> in_shift;
|
||||
ib >> out_lo;
|
||||
ib >> out_hi;
|
||||
ib >> out_scale;
|
||||
ib >> out_shift;
|
||||
|
||||
f_desc.f_param = std::make_shared<QuantizeFuseParams>(out_layout, scale_shift_opt, need_post_scale, need_post_shift,
|
||||
need_pre_shift, need_clamp, need_min_clamp, need_max_clamp, per_tensor_input_range,
|
||||
per_tensor_input_scale, per_tensor_input_shift, per_tensor_output_range, per_tensor_output_scale,
|
||||
per_tensor_output_shift, in_lo, in_hi, in_scale, in_shift, out_lo, out_hi, out_scale, out_shift);
|
||||
} else {
|
||||
f_desc.f_param = std::make_shared<NodeFuseParams>(f_param_type);
|
||||
}
|
||||
|
||||
size_t num_deps;
|
||||
primitive_id prim_id;
|
||||
size_t idx;
|
||||
ib >> num_deps;
|
||||
f_desc.deps.clear();
|
||||
for (size_t i = 0; i < num_deps; ++i) {
|
||||
ib >> prim_id;
|
||||
ib >> idx;
|
||||
f_desc.deps.emplace_back(std::make_pair(prim_id, idx));
|
||||
}
|
||||
ib >> num_deps;
|
||||
f_desc.fused_deps.clear();
|
||||
for (size_t i = 0; i < num_deps; ++i) {
|
||||
ib >> prim_id;
|
||||
ib >> idx;
|
||||
f_desc.fused_deps[prim_id] = idx;
|
||||
}
|
||||
ib >> f_desc.outer_dep_start_idx;
|
||||
ib >> f_desc.total_num_deps;
|
||||
fused_prims.emplace_back(f_desc);
|
||||
}
|
||||
}
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
size_t num_fused_prims;
|
||||
ib >> num_fused_prims;
|
||||
fused_prims_onednn.resize(num_fused_prims);
|
||||
for (size_t idx = 0; idx < num_fused_prims; ++idx) {
|
||||
ib >> make_data(&fused_prims_onednn[idx].op_type, sizeof(onednn_post_op_type));
|
||||
ib >> fused_prims_onednn[idx].mem_offset;
|
||||
ib >> fused_prims_onednn[idx].mem_dep;
|
||||
ib >> make_data(&fused_prims_onednn[idx].tag, sizeof(dnnl::memory::format_tag));
|
||||
ib >> fused_prims_onednn[idx].flatten;
|
||||
ib >> fused_prims_onednn[idx].dims;
|
||||
ib >> make_data(&fused_prims_onednn[idx].dt, sizeof(dnnl::memory::data_type));
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
}
|
||||
|
||||
/* ----------------------------------------- */
|
||||
/* Onednn fused operations integration logic */
|
||||
/* ----------------------------------------- */
|
||||
|
@ -198,14 +198,4 @@ static void generate_anchors(unsigned int base_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void proposal_inst::save(BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
ob << _anchors;
|
||||
}
|
||||
|
||||
void proposal_inst::load(BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
ib >> _anchors;
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -42,18 +42,4 @@ void read_value_inst::update_output_memory() {
|
||||
const auto& variable = get_network().get_variable(variable_id());
|
||||
set_output_memory(variable.get_memory(), false, 0);
|
||||
}
|
||||
|
||||
void read_value_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
|
||||
ob << variable_id();
|
||||
}
|
||||
|
||||
void read_value_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
|
||||
std::string variable_id;
|
||||
ib >> variable_id;
|
||||
set_variable_id(variable_id);
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -275,14 +275,4 @@ void reorder_inst::update_output_memory() {
|
||||
}
|
||||
_mem_allocated = false;
|
||||
}
|
||||
|
||||
void reorder_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
ob << _req_reinterpr;
|
||||
}
|
||||
|
||||
void reorder_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
ib >> _req_reinterpr;
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -57,6 +57,7 @@ public:
|
||||
return DeviceFeaturesKey();
|
||||
}
|
||||
virtual const std::string GetName() const { return kernelName; }
|
||||
virtual void GetUpdateDispatchDataFunc(KernelData& kd) const { }
|
||||
|
||||
static const primitive_db& get_db() { return db; }
|
||||
|
||||
|
@ -145,6 +145,14 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, co
|
||||
return GetNaiveBestKernel(allImplementations, params, options);
|
||||
}
|
||||
|
||||
std::shared_ptr<KernelBase> kernel_selector_base::GetImplementation(std::string& kernel_name) const {
|
||||
for (auto& impl : implementations) {
|
||||
if (impl->GetName().compare(kernel_name) == 0)
|
||||
return impl;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
KernelList kernel_selector_base::GetAllImplementations(const Params& params, const optional_params& options, KernelType kType) const {
|
||||
using PriorityPair = std::pair<KernelsPriority, std::shared_ptr<KernelBase>>;
|
||||
auto comparePriority = [](const PriorityPair& firstImpl, const PriorityPair& secondImpl) {
|
||||
|
@ -23,6 +23,7 @@ public:
|
||||
virtual ~kernel_selector_base() {}
|
||||
|
||||
KernelData get_best_kernel(const Params& params, const optional_params& options) const;
|
||||
std::shared_ptr<KernelBase> GetImplementation(std::string& kernel_name) const;
|
||||
|
||||
protected:
|
||||
template <typename T>
|
||||
|
@ -81,6 +81,17 @@ bool ActivationKernelBase::Validate(const Params& p, const optional_params& o) c
|
||||
return true;
|
||||
}
|
||||
|
||||
void ActivationKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
|
||||
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const activation_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
}
|
||||
|
||||
KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
@ -94,14 +105,7 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const activation_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
GetUpdateDispatchDataFunc(kd);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
|
||||
|
@ -57,5 +57,6 @@ protected:
|
||||
virtual JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const;
|
||||
virtual DispatchData SetDefault(const activation_params& arg) const;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
|
||||
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -113,15 +113,7 @@ ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelAxis::SetDefault(const arg_max_
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);
|
||||
bool is_dynamic = orgParams.has_dynamic_tensors();
|
||||
|
||||
auto dispatchData = SetDefault(orgParams);
|
||||
KernelData kd = KernelData::Default<arg_max_min_params>(params);
|
||||
void ArgMaxMinKernelAxis::GetUpdateDispatchDataFunc(KernelData& kd) const {
|
||||
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const arg_max_min_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
@ -143,6 +135,18 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
|
||||
kd.internalBufferSizes.push_back(ops_size * elem_size);
|
||||
kd.internalBufferDataType = prim_params.inputs[0].GetDType();
|
||||
};
|
||||
}
|
||||
|
||||
KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
const arg_max_min_params& orgParams = static_cast<const arg_max_min_params&>(params);
|
||||
bool is_dynamic = orgParams.has_dynamic_tensors();
|
||||
|
||||
auto dispatchData = SetDefault(orgParams);
|
||||
KernelData kd = KernelData::Default<arg_max_min_params>(params);
|
||||
GetUpdateDispatchDataFunc(kd);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
|
@ -19,5 +19,6 @@ public:
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
private:
|
||||
bool Validate(const Params&, const optional_params&) const override;
|
||||
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -40,6 +40,17 @@ ArgMaxMinKernelBase::DispatchData ArgMaxMinKernelBase::SetDefault(const arg_max_
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
void ArgMaxMinKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
|
||||
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const arg_max_min_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
}
|
||||
|
||||
KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
@ -50,14 +61,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
|
||||
DispatchData dispatchData = SetDefault(orgParams);
|
||||
|
||||
KernelData kd = KernelData::Default<arg_max_min_params>(params);
|
||||
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const arg_max_min_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
GetUpdateDispatchDataFunc(kd);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
|
@ -55,5 +55,6 @@ protected:
|
||||
virtual JitConstants GetJitConstants(const arg_max_min_params& params) const;
|
||||
virtual DispatchData SetDefault(const arg_max_min_params& params) const;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
|
||||
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -64,6 +64,17 @@ bool BorderKernelBase::SkipKernelExecution(const border_params& params) const {
|
||||
return params.outputs[0].LogicalSize() == 0;
|
||||
}
|
||||
|
||||
void BorderKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
|
||||
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const border_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
|
||||
};
|
||||
}
|
||||
|
||||
KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
assert(params.GetType() == KernelType::BORDER);
|
||||
@ -73,14 +84,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
|
||||
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
KernelData k_data = KernelData::Default<border_params>(params);
|
||||
k_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const border_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
|
||||
};
|
||||
GetUpdateDispatchDataFunc(k_data);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
|
@ -55,5 +55,6 @@ protected:
|
||||
DispatchData SetDefault(const border_params& params) const;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
|
||||
bool SkipKernelExecution(const border_params& params) const;
|
||||
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -76,6 +76,17 @@ static std::string GetInputBlockND(const broadcast_params& params) {
|
||||
return str_result;
|
||||
}
|
||||
|
||||
void BroadcastKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
|
||||
kd.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const broadcast_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
}
|
||||
|
||||
KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
assert(params.GetType() == KernelType::BROADCAST);
|
||||
@ -84,15 +95,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
|
||||
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
KernelData k_data = KernelData::Default<broadcast_params>(params);
|
||||
|
||||
k_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const broadcast_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
GetUpdateDispatchDataFunc(k_data);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
cldnn_jit.AddConstant(MakeJitConstant("INPUT0_BLOCK_ND", GetInputBlockND(prim_params)));
|
||||
|
@ -37,5 +37,6 @@ protected:
|
||||
JitConstants GetJitConstants(const broadcast_params& params) const;
|
||||
static DispatchData SetDefault(const broadcast_params& params);
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
|
||||
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -100,14 +100,7 @@ ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const concatenation_params& orgParams = static_cast<const concatenation_params&>(params);
|
||||
KernelData kd = KernelData::Default<concatenation_params>(params, orgParams.inputs.size());
|
||||
|
||||
void ConcatenationKernelBase::GetUpdateDispatchDataFunc(KernelData& kd) const {
|
||||
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const concatenation_params&>(params);
|
||||
uint32_t lastOffset = 0;
|
||||
@ -139,6 +132,16 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
|
||||
lastOffset += (uint32_t)input.GetDims()[concatChannelIndex].v;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const concatenation_params& orgParams = static_cast<const concatenation_params&>(params);
|
||||
KernelData kd = KernelData::Default<concatenation_params>(params, orgParams.inputs.size());
|
||||
GetUpdateDispatchDataFunc(kd);
|
||||
|
||||
bool is_dynamic = orgParams.has_dynamic_tensors();
|
||||
uint32_t lastOffset = 0;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user