[GPU] Transformed IR serialization for dynamic models (#16169)
* IR serialization for dynamic models * added ShapeOf1To3 transformation pass * fixed input output type mismatch * removed unnecessary codes * moved ConvertShapeOf1To3 from common to GPU plugin * updated copyright year * fixed build errors
This commit is contained in:
parent
8477bc8897
commit
e348481849
@ -24,8 +24,9 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
|
||||
public:
|
||||
typedef std::shared_ptr<CompiledModel> Ptr;
|
||||
|
||||
CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config);
|
||||
CompiledModel(std::istream& networkModel, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config);
|
||||
CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config);
|
||||
|
||||
void Export(std::ostream& networkModel) override;
|
||||
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
|
||||
@ -46,6 +47,7 @@ public:
|
||||
ExecutionConfig m_config;
|
||||
InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
|
||||
InferenceEngine::ITaskExecutor::Ptr m_waitExecutor;
|
||||
InferenceEngine::CNNNetwork m_network;
|
||||
};
|
||||
|
||||
} // namespace intel_gpu
|
||||
|
@ -43,7 +43,9 @@ public:
|
||||
Graph(InferenceEngine::CNNNetwork& network,
|
||||
RemoteContextImpl::Ptr context,
|
||||
const ExecutionConfig& config,
|
||||
uint16_t stream_id = 0);
|
||||
uint16_t stream_id = 0,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr,
|
||||
InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
Graph(cldnn::BinaryInputBuffer& ib, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id = 0);
|
||||
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
|
||||
void Export(cldnn::BinaryOutputBuffer &ob);
|
||||
|
@ -16,7 +16,7 @@ public:
|
||||
static bool is_legacy_property(const std::pair<std::string, ov::Any>& property, bool is_new_api);
|
||||
static bool is_new_api_property(const std::pair<std::string, ov::Any>& property);
|
||||
static std::vector<std::string> get_supported_configs();
|
||||
static std::vector<std::string> get_supported_metrics(bool model_caching_enabled);
|
||||
static std::vector<std::string> get_supported_metrics();
|
||||
};
|
||||
|
||||
} // namespace intel_gpu
|
||||
|
@ -18,7 +18,6 @@ namespace intel_gpu {
|
||||
class Plugin : public InferenceEngine::IInferencePlugin {
|
||||
struct impl;
|
||||
std::shared_ptr<impl> _impl;
|
||||
bool isModelCachingEnabled = true;
|
||||
|
||||
std::string default_device_id = "0";
|
||||
// key: device_id, value: cldnn device
|
||||
|
@ -82,7 +82,8 @@ public:
|
||||
class Program {
|
||||
public:
|
||||
Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
|
||||
bool createTopologyOnly = false, bool partialBuild = false);
|
||||
bool createTopologyOnly = false, bool partialBuild = false,
|
||||
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
|
||||
Program(cldnn::engine& engine, const ExecutionConfig& config)
|
||||
: m_max_batch(1)
|
||||
, m_curBatch(-1)
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "ie_metric_helpers.hpp"
|
||||
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
|
||||
#include "intel_gpu/graph/serialization/layout_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/string_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/utils.hpp"
|
||||
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
|
||||
@ -40,7 +41,9 @@ namespace intel_gpu {
|
||||
|
||||
CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network,
|
||||
InferenceEngine::RemoteContext::Ptr context,
|
||||
const ExecutionConfig& config) :
|
||||
const ExecutionConfig& config,
|
||||
InferenceEngine::InputsDataMap* inputs,
|
||||
InferenceEngine::OutputsDataMap* outputs) :
|
||||
InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr {
|
||||
if (config.get_property(ov::intel_gpu::exclusive_async_requests)) {
|
||||
//exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
|
||||
@ -56,15 +59,16 @@ CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network,
|
||||
m_context(context),
|
||||
m_config(config),
|
||||
m_taskExecutor{ _taskExecutor },
|
||||
m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) {
|
||||
auto graph_base = std::make_shared<Graph>(network, get_context_impl(m_context), m_config, 0);
|
||||
m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })),
|
||||
m_network(network) {
|
||||
auto graph_base = std::make_shared<Graph>(network, get_context_impl(m_context), m_config, 0, inputs, outputs);
|
||||
for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
|
||||
auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
|
||||
m_graphs.push_back(graph);
|
||||
}
|
||||
}
|
||||
|
||||
CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config) :
|
||||
CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config) :
|
||||
InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr {
|
||||
if (config.get_property(ov::intel_gpu::exclusive_async_requests)) {
|
||||
//exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
|
||||
@ -82,159 +86,6 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote
|
||||
m_taskExecutor{ _taskExecutor },
|
||||
m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) {
|
||||
auto context_impl = get_context_impl(m_context);
|
||||
auto& engine = context_impl->get_engine();
|
||||
|
||||
cldnn::BinaryInputBuffer ib(networkModel, engine);
|
||||
|
||||
// InputsInfo and OutputsInfor for CNNNetwork
|
||||
{
|
||||
size_t inputSize;
|
||||
ib >> inputSize;
|
||||
|
||||
InputsDataMap inputs;
|
||||
|
||||
for (size_t idx = 0; idx < inputSize; ++idx) {
|
||||
std::string name;
|
||||
std::string precision;
|
||||
std::string layout;
|
||||
InferenceEngine::SizeVector dims;
|
||||
ib >> name;
|
||||
ib >> precision;
|
||||
ib >> layout;
|
||||
ib >> dims;
|
||||
|
||||
DataPtr input = std::make_shared<Data>(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout));
|
||||
input->setDims(dims);
|
||||
InputInfo::Ptr infoNew = std::make_shared<InputInfo>();
|
||||
infoNew->setInputData(input);
|
||||
inputs.emplace(std::make_pair(name, infoNew));
|
||||
}
|
||||
|
||||
size_t outputSize;
|
||||
ib >> outputSize;
|
||||
|
||||
OutputsDataMap outputs;
|
||||
|
||||
for (size_t idx = 0; idx < outputSize; ++idx) {
|
||||
std::string name;
|
||||
std::string precision;
|
||||
std::string layout;
|
||||
InferenceEngine::SizeVector dims;
|
||||
ib >> name;
|
||||
ib >> precision;
|
||||
ib >> layout;
|
||||
ib >> dims;
|
||||
|
||||
DataPtr output = std::make_shared<Data>(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout));
|
||||
output->setDims(dims);
|
||||
outputs.emplace(std::make_pair(name, output));
|
||||
}
|
||||
|
||||
setNetworkInputs(inputs);
|
||||
setNetworkOutputs(outputs);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<std::shared_ptr<const ov::Node>> new_params;
|
||||
size_t num_params;
|
||||
ib >> num_params;
|
||||
|
||||
for (size_t idx = 0; idx < num_params; ++idx) {
|
||||
std::string param_name;
|
||||
ib >> param_name;
|
||||
ov::element::Type param_element_type;
|
||||
std::string str_element_type;
|
||||
ib >> str_element_type;
|
||||
std::stringstream oss(str_element_type);
|
||||
oss >> param_element_type;
|
||||
ov::Shape param_shape;
|
||||
size_t shape_size;
|
||||
ib >> shape_size;
|
||||
param_shape.resize(shape_size);
|
||||
for (size_t i = 0; i < shape_size; ++i) {
|
||||
size_t dim;
|
||||
ib >> dim;
|
||||
param_shape[i] = dim;
|
||||
}
|
||||
std::string str_layout;
|
||||
ib >> str_layout;
|
||||
ov::Layout param_layout(str_layout);
|
||||
std::unordered_set<std::string> param_names;
|
||||
size_t num_names;
|
||||
ib >> num_names;
|
||||
for (size_t i = 0; i < num_names; ++i) {
|
||||
std::string name;
|
||||
ib >> name;
|
||||
param_names.emplace(name);
|
||||
}
|
||||
|
||||
auto new_param = std::make_shared<ov::op::v0::Parameter>(param_element_type, param_shape);
|
||||
new_param->set_friendly_name(param_name);
|
||||
new_param->set_element_type(param_element_type);
|
||||
new_param->set_layout(param_layout);
|
||||
new_param->output(0).get_tensor().set_names(param_names);
|
||||
new_param->validate_and_infer_types();
|
||||
new_params.emplace_back(new_param);
|
||||
}
|
||||
|
||||
setInputs(new_params);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<std::shared_ptr<const ov::Node>> new_results;
|
||||
size_t num_results;
|
||||
ib >> num_results;
|
||||
|
||||
for (size_t idx = 0; idx < num_results; ++idx) {
|
||||
ov::element::Type fake_element_type;
|
||||
std::string str_element_type;
|
||||
ib >> str_element_type;
|
||||
std::stringstream oss(str_element_type);
|
||||
oss >> fake_element_type;
|
||||
|
||||
ov::Shape fake_shape;
|
||||
size_t shape_size;
|
||||
ib >> shape_size;
|
||||
fake_shape.resize(shape_size);
|
||||
for (size_t i = 0; i < shape_size; ++i) {
|
||||
size_t dim;
|
||||
ib >> dim;
|
||||
fake_shape[i] = dim;
|
||||
}
|
||||
|
||||
std::string fake_name;
|
||||
ib >> fake_name;
|
||||
|
||||
std::string param_name;
|
||||
ib >> param_name;
|
||||
|
||||
std::string str_layout;
|
||||
ib >> str_layout;
|
||||
ov::Layout param_layout(str_layout);
|
||||
|
||||
std::unordered_set<std::string> param_names;
|
||||
size_t num_names;
|
||||
ib >> num_names;
|
||||
for (size_t i = 0; i < num_names; ++i) {
|
||||
std::string name;
|
||||
ib >> name;
|
||||
param_names.emplace(name);
|
||||
}
|
||||
|
||||
auto fake_param = std::make_shared<ov::op::v0::Parameter>(fake_element_type, fake_shape);
|
||||
fake_param->set_friendly_name(fake_name);
|
||||
fake_param->validate_and_infer_types();
|
||||
|
||||
auto new_result = std::make_shared<ov::op::v0::Result>(fake_param);
|
||||
new_result->set_friendly_name(param_name);
|
||||
new_result->set_layout(param_layout);
|
||||
new_result->output(0).get_tensor().set_names(param_names);
|
||||
new_result->validate_and_infer_types();
|
||||
new_results.emplace_back(new_result);
|
||||
}
|
||||
|
||||
setOutputs(new_results);
|
||||
}
|
||||
|
||||
auto pos = ib.tellg();
|
||||
for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
|
||||
@ -383,11 +234,7 @@ void CompiledModel::Export(std::ostream& networkModel) {
|
||||
std::stringstream ss;
|
||||
ss << param_element_type;
|
||||
ob << ss.str();
|
||||
ov::Shape static_shape = param_shape.get_shape();
|
||||
ob << static_shape.size();
|
||||
for (size_t dim : static_shape) {
|
||||
ob << dim;
|
||||
}
|
||||
ob << param_shape;
|
||||
ob << param_layout.to_string();
|
||||
ob << param_names.size();
|
||||
for (auto name : param_names) {
|
||||
@ -415,11 +262,7 @@ void CompiledModel::Export(std::ostream& networkModel) {
|
||||
std::stringstream ss;
|
||||
ss << fake_element_type;
|
||||
ob << ss.str();
|
||||
ov::Shape static_shape = fake_shape.get_shape();
|
||||
ob << static_shape.size();
|
||||
for (size_t dim : static_shape) {
|
||||
ob << dim;
|
||||
}
|
||||
ob << fake_shape;
|
||||
ob << fake_name;
|
||||
ob << param_name;
|
||||
ob << param_layout.to_string();
|
||||
@ -430,7 +273,14 @@ void CompiledModel::Export(std::ostream& networkModel) {
|
||||
}
|
||||
}
|
||||
|
||||
return m_graphs.front()->Export(ob);
|
||||
if (m_graphs.front()->GetNetwork()->is_dynamic()) {
|
||||
ob << true;
|
||||
ov::pass::StreamSerialize serializer(networkModel, {}, ov::pass::Serialize::Version::UNSPECIFIED);
|
||||
serializer.run_on_model(std::const_pointer_cast<ngraph::Function>(m_network.getFunction()));
|
||||
} else {
|
||||
ob << false;
|
||||
m_graphs.front()->Export(ob);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> CompiledModel::GetExecGraphInfo() {
|
||||
|
@ -44,13 +44,14 @@ using namespace InferenceEngine::details;
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
Graph::Graph(InferenceEngine::CNNNetwork& network, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id)
|
||||
Graph::Graph(InferenceEngine::CNNNetwork& network, RemoteContextImpl::Ptr context, const ExecutionConfig& config, uint16_t stream_id,
|
||||
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
|
||||
: m_context(context)
|
||||
, m_networkName(network.getName())
|
||||
, m_config(config)
|
||||
, m_stream_id(stream_id)
|
||||
, m_state(0) {
|
||||
m_program = std::make_shared<Program>(network, get_engine(), config);
|
||||
m_program = std::make_shared<Program>(network, get_engine(), config, false, false, inputs, outputs);
|
||||
if (m_program->m_max_batch > 1)
|
||||
m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch));
|
||||
Build();
|
||||
|
@ -244,7 +244,7 @@ std::vector<std::string> LegacyAPIHelper::get_supported_configs() {
|
||||
return supported_config;
|
||||
}
|
||||
|
||||
std::vector<std::string> LegacyAPIHelper::get_supported_metrics(bool model_caching_enabled) {
|
||||
std::vector<std::string> LegacyAPIHelper::get_supported_metrics() {
|
||||
std::vector<std::string> supported_metrics = {
|
||||
METRIC_KEY(AVAILABLE_DEVICES),
|
||||
METRIC_KEY(SUPPORTED_METRICS),
|
||||
@ -257,13 +257,12 @@ std::vector<std::string> LegacyAPIHelper::get_supported_metrics(bool model_cachi
|
||||
METRIC_KEY(DEVICE_GOPS),
|
||||
METRIC_KEY(OPTIMAL_BATCH_SIZE),
|
||||
METRIC_KEY(MAX_BATCH_SIZE),
|
||||
METRIC_KEY(IMPORT_EXPORT_SUPPORT),
|
||||
GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE),
|
||||
GPU_METRIC_KEY(UARCH_VERSION),
|
||||
GPU_METRIC_KEY(EXECUTION_UNITS_COUNT),
|
||||
GPU_METRIC_KEY(MEMORY_STATISTICS),
|
||||
};
|
||||
if (model_caching_enabled)
|
||||
supported_metrics.push_back(METRIC_KEY(IMPORT_EXPORT_SUPPORT));
|
||||
|
||||
return supported_metrics;
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
|
||||
// GPU primitive does not support i64 inputs,
|
||||
// so we need additional reorders to convert them to i32
|
||||
auto reorderPrimName = inputs[portIndex].pid + "_" + op->get_friendly_name() + Program::m_preProcessTag;
|
||||
auto targetFormat = cldnn::format::get_default_format(op->get_input_shape(portIndex).size());
|
||||
auto targetFormat = cldnn::format::get_default_format(op->get_input_partial_shape(portIndex).size());
|
||||
auto preprocessPrim = cldnn::reorder(reorderPrimName,
|
||||
inputs[portIndex],
|
||||
targetFormat,
|
||||
|
@ -16,6 +16,10 @@
|
||||
#include <ie_algorithm.hpp>
|
||||
|
||||
#include "openvino/runtime/intel_gpu/properties.hpp"
|
||||
#include "intel_gpu/graph/serialization/layout_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/string_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/utils.hpp"
|
||||
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
|
||||
#include "intel_gpu/plugin/plugin.hpp"
|
||||
#include "intel_gpu/plugin/compiled_model.hpp"
|
||||
#include "intel_gpu/plugin/transformations_pipeline.hpp"
|
||||
@ -198,9 +202,6 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork");
|
||||
CompiledModel::Ptr exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, config);
|
||||
if (exeNetwork->m_graphs[0]->GetNetwork()->is_dynamic()) {
|
||||
isModelCachingEnabled = false;
|
||||
}
|
||||
update_memory_statistics(context->get_impl());
|
||||
return exeNetwork;
|
||||
}
|
||||
@ -369,8 +370,172 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istr
|
||||
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork::CreateExeNetwork");
|
||||
CompiledModel::Ptr exeNetwork = std::make_shared<CompiledModel>(networkModel, context, config);
|
||||
exeNetwork->SetPointerToPlugin(shared_from_this());
|
||||
cldnn::BinaryInputBuffer ib(networkModel, context->get_impl()->get_engine());
|
||||
|
||||
InputsDataMap inputs;
|
||||
OutputsDataMap outputs;
|
||||
std::vector<std::shared_ptr<const ov::Node>> new_params;
|
||||
std::vector<std::shared_ptr<const ov::Node>> new_results;
|
||||
|
||||
// InputsInfo and OutputsInfor for CNNNetwork
|
||||
{
|
||||
size_t inputSize;
|
||||
ib >> inputSize;
|
||||
|
||||
for (size_t idx = 0; idx < inputSize; ++idx) {
|
||||
std::string name;
|
||||
std::string precision;
|
||||
std::string layout;
|
||||
InferenceEngine::SizeVector dims;
|
||||
ib >> name;
|
||||
ib >> precision;
|
||||
ib >> layout;
|
||||
ib >> dims;
|
||||
|
||||
DataPtr input = std::make_shared<Data>(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout));
|
||||
input->setDims(dims);
|
||||
InputInfo::Ptr infoNew = std::make_shared<InputInfo>();
|
||||
infoNew->setInputData(input);
|
||||
inputs.emplace(std::make_pair(name, infoNew));
|
||||
}
|
||||
|
||||
size_t outputSize;
|
||||
ib >> outputSize;
|
||||
|
||||
for (size_t idx = 0; idx < outputSize; ++idx) {
|
||||
std::string name;
|
||||
std::string precision;
|
||||
std::string layout;
|
||||
InferenceEngine::SizeVector dims;
|
||||
ib >> name;
|
||||
ib >> precision;
|
||||
ib >> layout;
|
||||
ib >> dims;
|
||||
|
||||
DataPtr output = std::make_shared<Data>(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout));
|
||||
output->setDims(dims);
|
||||
outputs.emplace(std::make_pair(name, output));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
size_t num_params;
|
||||
ib >> num_params;
|
||||
|
||||
for (size_t idx = 0; idx < num_params; ++idx) {
|
||||
std::string param_name;
|
||||
ib >> param_name;
|
||||
ov::element::Type param_element_type;
|
||||
std::string str_element_type;
|
||||
ib >> str_element_type;
|
||||
std::stringstream oss(str_element_type);
|
||||
oss >> param_element_type;
|
||||
ov::PartialShape param_shape;
|
||||
ib >> param_shape;
|
||||
std::string str_layout;
|
||||
ib >> str_layout;
|
||||
ov::Layout param_layout(str_layout);
|
||||
std::unordered_set<std::string> param_names;
|
||||
size_t num_names;
|
||||
ib >> num_names;
|
||||
for (size_t i = 0; i < num_names; ++i) {
|
||||
std::string name;
|
||||
ib >> name;
|
||||
param_names.emplace(name);
|
||||
}
|
||||
|
||||
auto new_param = std::make_shared<ov::op::v0::Parameter>(param_element_type, param_shape);
|
||||
new_param->set_friendly_name(param_name);
|
||||
new_param->set_element_type(param_element_type);
|
||||
new_param->set_layout(param_layout);
|
||||
new_param->output(0).get_tensor().set_names(param_names);
|
||||
new_param->validate_and_infer_types();
|
||||
new_params.emplace_back(new_param);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
size_t num_results;
|
||||
ib >> num_results;
|
||||
|
||||
for (size_t idx = 0; idx < num_results; ++idx) {
|
||||
ov::element::Type fake_element_type;
|
||||
std::string str_element_type;
|
||||
ib >> str_element_type;
|
||||
std::stringstream oss(str_element_type);
|
||||
oss >> fake_element_type;
|
||||
|
||||
ov::PartialShape fake_shape;
|
||||
ib >> fake_shape;
|
||||
|
||||
std::string fake_name;
|
||||
ib >> fake_name;
|
||||
|
||||
std::string param_name;
|
||||
ib >> param_name;
|
||||
|
||||
std::string str_layout;
|
||||
ib >> str_layout;
|
||||
ov::Layout param_layout(str_layout);
|
||||
|
||||
std::unordered_set<std::string> param_names;
|
||||
size_t num_names;
|
||||
ib >> num_names;
|
||||
for (size_t i = 0; i < num_names; ++i) {
|
||||
std::string name;
|
||||
ib >> name;
|
||||
param_names.emplace(name);
|
||||
}
|
||||
|
||||
auto fake_param = std::make_shared<ov::op::v0::Parameter>(fake_element_type, fake_shape);
|
||||
fake_param->set_friendly_name(fake_name);
|
||||
fake_param->validate_and_infer_types();
|
||||
|
||||
auto new_result = std::make_shared<ov::op::v0::Result>(fake_param);
|
||||
new_result->set_friendly_name(param_name);
|
||||
new_result->set_layout(param_layout);
|
||||
new_result->output(0).get_tensor().set_names(param_names);
|
||||
new_result->validate_and_infer_types();
|
||||
new_results.emplace_back(new_result);
|
||||
}
|
||||
}
|
||||
|
||||
CompiledModel::Ptr exeNetwork;
|
||||
bool is_dynamic;
|
||||
ib >> is_dynamic;
|
||||
|
||||
if (is_dynamic) {
|
||||
std::string xmlString, xmlInOutString;
|
||||
InferenceEngine::Blob::Ptr dataBlob;
|
||||
|
||||
ov::pass::StreamSerialize::DataHeader hdr = {};
|
||||
networkModel.read(reinterpret_cast<char*>(&hdr), sizeof hdr);
|
||||
|
||||
// read blob content
|
||||
networkModel.seekg(hdr.consts_offset);
|
||||
if (hdr.consts_size) {
|
||||
dataBlob = InferenceEngine::make_shared_blob<std::uint8_t>(
|
||||
InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, {hdr.consts_size}, InferenceEngine::Layout::C));
|
||||
dataBlob->allocate();
|
||||
networkModel.read(dataBlob->buffer(), hdr.consts_size);
|
||||
}
|
||||
|
||||
// read XML content
|
||||
networkModel.seekg(hdr.model_offset);
|
||||
xmlString.resize(hdr.model_size);
|
||||
networkModel.read(const_cast<char*>(xmlString.c_str()), hdr.model_size);
|
||||
|
||||
auto transformedNetwork = GetCore()->ReadNetwork(xmlString, std::move(dataBlob), true);
|
||||
exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, config, &inputs, &outputs);
|
||||
} else {
|
||||
exeNetwork = std::make_shared<CompiledModel>(ib, context, config);
|
||||
exeNetwork->SetPointerToPlugin(shared_from_this());
|
||||
}
|
||||
|
||||
exeNetwork->setNetworkInputs(inputs);
|
||||
exeNetwork->setNetworkOutputs(outputs);
|
||||
exeNetwork->setInputs(new_params);
|
||||
exeNetwork->setOutputs(new_results);
|
||||
update_memory_statistics(context->get_impl());
|
||||
return exeNetwork;
|
||||
}
|
||||
@ -434,7 +599,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
|
||||
if (name == ov::supported_properties) {
|
||||
return decltype(ov::supported_properties)::value_type {get_supported_properties()};
|
||||
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, LegacyAPIHelper::get_supported_metrics(isModelCachingEnabled));
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, LegacyAPIHelper::get_supported_metrics());
|
||||
} else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
|
||||
std::vector<std::string> availableDevices = { };
|
||||
for (auto const& dev : device_map)
|
||||
@ -517,7 +682,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
|
||||
} else if (name == METRIC_KEY(MAX_BATCH_SIZE) ||
|
||||
name == ov::max_batch_size) {
|
||||
return decltype(ov::max_batch_size)::value_type {static_cast<uint32_t>(get_max_batch_size(options))};
|
||||
} else if (isModelCachingEnabled && name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) {
|
||||
} else if (name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) {
|
||||
IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true);
|
||||
} else if (name == ov::caching_properties) {
|
||||
std::vector<ov::PropertyName> cachingProperties;
|
||||
@ -603,8 +768,7 @@ std::vector<std::string> Plugin::get_device_capabilities(const cldnn::device_inf
|
||||
capabilities.push_back(ov::device::capability::INT8);
|
||||
if (info.supports_immad)
|
||||
capabilities.push_back(ov::intel_gpu::capability::HW_MATMUL);
|
||||
if (isModelCachingEnabled)
|
||||
capabilities.push_back(ov::device::capability::EXPORT_IMPORT);
|
||||
capabilities.push_back(ov::device::capability::EXPORT_IMPORT);
|
||||
|
||||
return capabilities;
|
||||
}
|
||||
|
@ -120,14 +120,15 @@ bool Program::IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
|
||||
}
|
||||
|
||||
Program::Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
|
||||
bool createTopologyOnly, bool partialBuild)
|
||||
bool createTopologyOnly, bool partialBuild,
|
||||
InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs)
|
||||
: m_curBatch(-1)
|
||||
, m_config(config)
|
||||
, m_engine(engine)
|
||||
, queryMode(false) {
|
||||
// Extract inputs/outputs info from CNNNetwork
|
||||
auto networkInputs = network.getInputsInfo();
|
||||
auto networkOutputs = network.getOutputsInfo();
|
||||
auto networkInputs = (inputs != nullptr) ? *inputs : network.getInputsInfo();
|
||||
auto networkOutputs = (outputs != nullptr) ? *outputs : network.getOutputsInfo();
|
||||
|
||||
auto func = network.getFunction();
|
||||
if (!func) {
|
||||
|
@ -0,0 +1,32 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "convert_shapeof.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <openvino/opsets/opset1.hpp>
|
||||
#include <openvino/opsets/opset3.hpp>
|
||||
#include <vector>
|
||||
|
||||
ov::intel_gpu::ConvertShapeOf1To3::ConvertShapeOf1To3() {
|
||||
auto shapeof1 = ov::pass::pattern::wrap_type<ov::opset1::ShapeOf>();
|
||||
|
||||
matcher_pass_callback callback = [](ov::pass::pattern::Matcher& m) {
|
||||
auto shapeof1 = std::dynamic_pointer_cast<ov::opset1::ShapeOf>(m.get_match_root());
|
||||
if (!shapeof1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto new_shapeof3 = std::make_shared<ov::opset3::ShapeOf>(shapeof1->input_value(0));
|
||||
new_shapeof3->set_friendly_name(shapeof1->get_friendly_name());
|
||||
ngraph::copy_runtime_info(shapeof1, new_shapeof3);
|
||||
ngraph::replace_node(shapeof1, new_shapeof3);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(shapeof1, "ConvertShapeOf1To3");
|
||||
register_matcher(m, callback);
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
class ConvertShapeOf1To3 : public ov::pass::MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("ConvertShapeOf1To3", "0");
|
||||
ConvertShapeOf1To3();
|
||||
};
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace ov
|
@ -29,6 +29,7 @@
|
||||
#include "transformations/einsum_decomposition.hpp"
|
||||
#include "transformations/convert_pooling_to_reduce.hpp"
|
||||
#include "transformations/decompose_reduce_for_false_keepdims.hpp"
|
||||
#include "transformations/convert_shapeof.hpp"
|
||||
|
||||
#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
|
||||
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
|
||||
@ -87,6 +88,7 @@
|
||||
#include <transformations/convert_precision.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/rt_info/fused_names_attribute.hpp>
|
||||
#include <transformations/op_conversions/convert_shapeof3.hpp>
|
||||
|
||||
#include <transformations/low_precision/mark_dequantization_subgraph.hpp>
|
||||
#include <low_precision/pull_reshape_through_dequantization.hpp>
|
||||
@ -216,6 +218,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
manager.register_pass<ov::pass::BidirectionalRNNSequenceDecomposition>();
|
||||
}
|
||||
|
||||
manager.register_pass<ConvertShapeOf1To3>();
|
||||
manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
|
||||
manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
|
||||
manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
|
||||
@ -419,7 +422,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
||||
pass_config->disable<ov::pass::ConvertSoftMax8ToSoftMax1>();
|
||||
pass_config->enable<ov::pass::ConvertGather8ToGather7>();
|
||||
|
||||
pass_config->disable<ov::pass::ConvertShapeOf3>();
|
||||
pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
|
||||
|
||||
if (enableInt8) {
|
||||
|
Loading…
Reference in New Issue
Block a user