diff --git a/src/plugins/intel_cpu/src/async_infer_request.cpp b/src/plugins/intel_cpu/src/async_infer_request.cpp index 7c80ee443c2..556f2a13d29 100644 --- a/src/plugins/intel_cpu/src/async_infer_request.cpp +++ b/src/plugins/intel_cpu/src/async_infer_request.cpp @@ -22,6 +22,6 @@ void ov::intel_cpu::AsyncInferRequest::throw_if_canceled() const { // only check ov::Cancelled exception. try { check_state(); - } catch (ov::Busy& busy) { + } catch (ov::Busy&) { } } diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 9985bcd7cee..4d92cacd878 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -364,7 +364,9 @@ ov::Any CompiledModel::GetMetric(const std::string& name) const { void CompiledModel::export_model(std::ostream& modelStream) const { ModelSerializer serializer(modelStream, extensionManager); - serializer << _model; + std::pair, const std::shared_ptr> models = + std::make_pair(_model, _original_model); + serializer << models; } } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 4a2bf1386d4..d62bf016d68 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -503,23 +503,16 @@ ov::Tensor SyncInferRequest::get_compiled_tensor(const ov::Output& _port) const { - auto is_compiled_port = check_compiled_port(_port); auto port_name = get_port_name(_port); auto port = get_compiled_port(_port); auto compiled_tensor = ov::ISyncInferRequest::get_tensor(port); - auto is_imported_model = _compiled_model->get_property(ov::loaded_from_cache.name()).as(); - if (is_imported_model && !is_compiled_port) { - _orig_ports_map[port_name] = _port; - } - - // compiled port is normal case, it means there is no any precision/shape changes between graph and original model - // compiled port tensors are managed by InferRequest - if (is_compiled_port) { + // No precision change + auto is_precision_changed = _port_precision_changed[port_name]; + if (!is_precision_changed) return compiled_tensor; - } - // if not compiled port means that there is precision/shape change between graph and original model + // If precision has been changed, it need return original precision tensor // port's data will be stored in _aux_tensors, and need converted to compiled tensor // input tensor: will be copied to compiled tensor when sent to do inference // output tensor: need copy compiled tensor to aux tensor and return aux tensor @@ -541,7 +534,7 @@ ov::Tensor SyncInferRequest::get_tensor(const ov::Output& _port) _aux_tensors[port_name] = ov::Tensor(_orig_ports_map[port_name].get_element_type(), external_shape); } - // input tensor is in aux tensors, don't need copy any thing + // input tensor is in aux tensors, don't need copy anything auto& aux_tensor = _aux_tensors[port_name]; if (is_input) { return aux_tensor; @@ -593,43 +586,42 @@ void SyncInferRequest::set_tensor(const ov::Output& _port, const _tensor.get_size() == ov::shape_size(_port.get_shape())) { tensor = ov::Tensor(_tensor.get_element_type(), _port.get_shape(), _tensor.data()); } - - // In case of import model, we cannot get original model info from the imported_model, so have to update it when - // set_tensor if possible auto name = get_port_name(_port); - auto is_imported_model = _compiled_model->get_property(ov::loaded_from_cache.name()).as(); - if (is_imported_model && !is_compiled_port) { - _orig_ports_map[name] = _port; - // _port_precision_changed[name] = get_compiled_port(_port).get_element_type() != _port.get_element_type(); - } + auto is_precision_changed = _port_precision_changed[name]; - // auto precision_changed = check_precision_changed(_port); - if (!is_compiled_port) { - auto _orig_port = _orig_ports_map[name]; - if ((_orig_port.get_element_type() != _tensor.get_element_type()) && - (port.get_element_type() != _tensor.get_element_type())) { - IE_THROW(ParameterMismatch) << "Failed to set input tensor with precision: " << _tensor.get_element_type() - << ", if model input tensor precision is: " << _port.get_element_type(); - } - _aux_tensors[name] = _tensor; - tensor = ov::ISyncInferRequest::get_tensor(port); - tensor.set_shape(_tensor.get_shape()); - } else { - if (_port.get_element_type() != _tensor.get_element_type()) { - // Import model cannot get original port info if it is chained in meta plugin, need convert tensor here - if (is_imported_model) { - _aux_tensors[name] = _tensor; - tensor = ov::ISyncInferRequest::get_tensor(port); - tensor.set_shape(_tensor.get_shape()); - } else if (_orig_ports_map[name].get_element_type() == _tensor.get_element_type()) { - // origina_port precision tensor + // Precision has been changed + if (is_precision_changed) { + if (!is_compiled_port) { + // Orig port + auto _orig_port = _orig_ports_map[name]; + if (_orig_port.get_element_type() == _tensor.get_element_type()) { + // Orig port + orig port's tensor _aux_tensors[name] = _tensor; tensor = ov::ISyncInferRequest::get_tensor(port); tensor.set_shape(_tensor.get_shape()); + } else if (port.get_element_type() == _tensor.get_element_type()) { + // Orig port + compiled port's tensor + tensor = _tensor; } else { - IE_THROW(ParameterMismatch) - << "Failed to set input tensor with precision: " << _tensor.get_element_type() - << ", if model input tensor precision is: " << _port.get_element_type(); + OPENVINO_THROW("Failed to set input tensor with precision: ", + _tensor.get_element_type(), ", if model input tensor precision is: ", + port.get_element_type(), + " or ", + _orig_port.get_element_type()); + } + } else { + // Compiled port + if (_port.get_element_type() != _tensor.get_element_type()) { + if (_orig_ports_map[name].get_element_type() == _tensor.get_element_type()) { + // origina_port precision tensor + _aux_tensors[name] = _tensor; + tensor = ov::ISyncInferRequest::get_tensor(port); + tensor.set_shape(_tensor.get_shape()); + } else { + IE_THROW(ParameterMismatch) + << "Failed to set input tensor with precision: " << _tensor.get_element_type() + << ", if model input tensor precision is: " << _port.get_element_type(); + } } } } diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 6028c2fc672..900b8146667 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -769,7 +769,11 @@ std::shared_ptr Engine::import_model(std::istream& networkMo }); std::shared_ptr model; - deserializer >> model; + std::shared_ptr orig_model; + std::pair, std::shared_ptr> models = std::make_pair(model, orig_model); + deserializer >> models; + model = models.first; + orig_model = models.second; Config conf = engConfig; conf.readProperties(config); @@ -793,7 +797,7 @@ std::shared_ptr Engine::import_model(std::istream& networkMo get_num_streams(conf.streamExecutorConfig._streams, function, conf); } - auto compiled_model = std::make_shared(model, model, shared_from_this(), conf, extensionManager, true); + auto compiled_model = std::make_shared(model, orig_model, shared_from_this(), conf, extensionManager, true); return compiled_model; } } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/serialize.cpp b/src/plugins/intel_cpu/src/serialize.cpp index c9c42815a12..33a30b98fa7 100644 --- a/src/plugins/intel_cpu/src/serialize.cpp +++ b/src/plugins/intel_cpu/src/serialize.cpp @@ -25,8 +25,6 @@ void setInfo(pugi::xml_object_range&& nodes, T&& if (!name_attr || !precision_attr || !shape_attr || info_iter == info.end()) { IE_THROW(NetworkNotRead) << "The inputs/outputs information is invalid."; } - // TODO: Is below info still needed for plugin api 2.0? - // info_iter->set_names({name_attr.value()}); info_iter->get_tensor_ptr()->set_element_type(ov::element::Type(precision_attr.value())); info_iter->get_tensor_ptr()->set_tensor_type(ov::element::Type(precision_attr.value()), ov::PartialShape(shape_attr.value())); @@ -39,7 +37,10 @@ ModelSerializer::ModelSerializer(std::ostream & ostream, ExtensionManager::Ptr e , _extensionManager(extensionManager) { } -void ModelSerializer::operator << (const std::shared_ptr& model) { +void ModelSerializer::operator<<( + std::pair, const std::shared_ptr>& models) { + auto model = std::get<0>(models); + auto orig_model = std::get<1>(models); auto getCustomOpSets = [this]() { std::map custom_opsets; @@ -62,18 +63,19 @@ void ModelSerializer::operator << (const std::shared_ptr& model) { pugi::xml_node outputs = root.append_child("outputs"); // Need it? - for (const auto& in : model->inputs()) { + for (const auto& in : orig_model->inputs()) { auto in_node = inputs.append_child("in"); in_node.append_attribute("name").set_value(ov::op::util::get_ie_output_name(in).c_str()); in_node.append_attribute("precision").set_value(in.get_element_type().get_type_name().c_str()); - in_node.append_attribute("shape").set_value(in.get_shape().to_string().c_str()); + in_node.append_attribute("shape").set_value(in.get_partial_shape().to_string().c_str()); } - for (const auto& out : model->outputs()) { + for (const auto& out : orig_model->outputs()) { auto out_node = outputs.append_child("out"); - out_node.append_attribute("name").set_value(ov::op::util::get_ie_output_name(out).c_str()); + const auto node = out.get_node_shared_ptr(); + out_node.append_attribute("name").set_value(ov::op::util::get_ie_output_name(node->input_value(0)).c_str()); out_node.append_attribute("precision").set_value(out.get_element_type().get_type_name().c_str()); - out_node.append_attribute("shape").set_value(out.get_shape().to_string().c_str()); + out_node.append_attribute("shape").set_value(out.get_partial_shape().to_string().c_str()); } xml_doc.save(stream); }; @@ -90,12 +92,15 @@ ModelDeserializer::ModelDeserializer(std::istream & istream, model_builder fn) , _model_builder(fn) { } -void ModelDeserializer::operator >> (std::shared_ptr & network) { +void ModelDeserializer::operator>>(std::pair, std::shared_ptr>& models) { using namespace ov::pass; std::string xmlString, xmlInOutString; ov::Tensor dataBlob; + auto& network = models.first; + auto& orig_model = models.second; + StreamSerialize::DataHeader hdr = {}; _istream.read(reinterpret_cast(&hdr), sizeof hdr); @@ -122,14 +127,15 @@ void ModelDeserializer::operator >> (std::shared_ptr & network) { _istream.read(const_cast(xmlString.c_str()), hdr.model_size); network = _model_builder(xmlString, std::move(dataBlob)); + orig_model = network->clone(); // Set input and output precisions pugi::xml_node root = xmlInOutDoc.child("cnndata"); pugi::xml_node inputs = root.child("inputs"); pugi::xml_node outputs = root.child("outputs"); - setInfo(inputs.children("in"), network->inputs()); - setInfo(outputs.children("out"), network->outputs()); + setInfo(inputs.children("in"), orig_model->inputs()); + setInfo(outputs.children("out"), orig_model->outputs()); } } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/serialize.h b/src/plugins/intel_cpu/src/serialize.h index b3e33b3fc8c..3bc7b131782 100644 --- a/src/plugins/intel_cpu/src/serialize.h +++ b/src/plugins/intel_cpu/src/serialize.h @@ -14,8 +14,7 @@ namespace intel_cpu { class ModelSerializer { public: ModelSerializer(std::ostream& ostream, ExtensionManager::Ptr extensionManager); - void operator<<(const std::shared_ptr& model); - + void operator<<(std::pair, const std::shared_ptr>& models); private: std::ostream& _ostream; ExtensionManager::Ptr _extensionManager; @@ -25,7 +24,7 @@ class ModelDeserializer { public: typedef std::function(const std::string&, const ov::Tensor&)> model_builder; ModelDeserializer(std::istream& istream, model_builder fn); - void operator>>(std::shared_ptr& model); + void operator>>(std::pair, std::shared_ptr>& models); private: std::istream& _istream; diff --git a/src/tests/engines_util/test_case.cpp b/src/tests/engines_util/test_case.cpp index b258e483e1c..d03ea5a7aca 100644 --- a/src/tests/engines_util/test_case.cpp +++ b/src/tests/engines_util/test_case.cpp @@ -76,9 +76,8 @@ std::shared_ptr function_from_ir(const std::string& xml_path, const st std::pair TestCase::compare_results(size_t tolerance_bits) { auto res = testing::AssertionSuccess(); size_t output_idx = 0; - const auto results = m_function->get_results(); for (; output_idx < m_expected_outputs.size(); ++output_idx) { - const auto& result_tensor = m_request.get_tensor(results[output_idx]); + const auto& result_tensor = m_request.get_output_tensor(output_idx); const auto& exp_result = m_expected_outputs.at(output_idx); const auto& element_type = result_tensor.get_element_type(); diff --git a/src/tests/engines_util/test_case.hpp b/src/tests/engines_util/test_case.hpp index 4725fe4ad87..9d7bd9d6af0 100644 --- a/src/tests/engines_util/test_case.hpp +++ b/src/tests/engines_util/test_case.hpp @@ -63,8 +63,7 @@ public: std::copy(values.begin(), values.end(), tensor.data()); m_request.set_input_tensor(m_input_index, tensor); } else { - // auto tensor = m_request.get_input_tensor(m_input_index); - auto tensor = m_request.get_tensor(params.at(m_input_index)); + auto tensor = m_request.get_input_tensor(m_input_index); NGRAPH_CHECK(tensor.get_size() >= values.size(), "Tensor and values have different sizes. Tensor (", tensor.get_shape(),