[GPU] Fix the new infer request to set output memory normally during enqueue stage (#12938)

* Fix the new infer request to set output memory normally during enqueue stage * Fix to use the generated outputsMap instead of graph API to retrieve outputID Signed-off-by: Andrew Park <andrew.park@intel.com>
2022-09-08 16:15:04 +09:00 · 2022-09-08 16:15:04 +09:00 · 90093d5315
commit 90093d5315
parent 5a42afb45b
1 changed files with 17 additions and 20 deletions
--- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
@ -522,15 +522,15 @@ void InferRequest::wait() {

    // wait for completion & collect outputs as requested by the model
    for (auto& no : _networkOutputs) {
-        std::string outputID = m_graph->MapOutputName(no.first);
+        // In dynamic case, graph API must be used to retrieve outputID
+        // because it does not create outputsMap during SetGraph
+        std::string outputID = outputsMap.empty() ? m_graph->MapOutputName(no.first) : outputsMap.at(no.first);
        auto outputMemory = internal_outputs.at(outputID).get_memory();

-        auto node = findOutputByNodeName(no.first);
-
-        auto out_partial_shape = node->get_output_partial_shape(0);
-        size_t out_rank = out_partial_shape.rank().get_length();
-
        if (_outputs.find(no.first) == _outputs.end()) {
+            auto node = findOutputByNodeName(no.first);
+            auto out_partial_shape = node->get_output_partial_shape(0);
+            size_t out_rank = out_partial_shape.rank().get_length();
            auto mem_dims = outputMemory->get_layout().get_shape();
            auto precision = InferenceEngine::Precision::FP32;
            auto dims = SizeVector(mem_dims.begin(), mem_dims.end());
@ -891,22 +891,19 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr

 void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) {
    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_output");
+    Blob::Ptr reqBlob = _deviceOutputs.at(outputName);
+    cldnn::primitive_id internalName = outputsMap[outputName];
+    auto _nw_ptr = m_graph->GetNetwork();
    auto remote_ptr = outputBlob->as<gpu::ClBlob>();
-    bool is_remote = remote_ptr != nullptr;
-    if (is_remote) {
-        Blob::Ptr reqBlob = _deviceOutputs.at(outputName);
-        cldnn::primitive_id internalName = outputsMap[outputName];
-        auto _nw_ptr = m_graph->GetNetwork();
-        auto output_blob_ptr = (reqBlob != outputBlob && remote_ptr != nullptr)
-            ? remote_ptr
-            : reqBlob->as<gpu::ClBlob>();
-        auto impl = getBlobImpl(output_blob_ptr);
-        if (!impl->is_allocated()) {
-            IE_THROW(NotAllocated) << str_output_not_allocated;
-        }
-        auto outputMem = impl->getMemory();
-        _nw_ptr->set_output_memory(internalName, outputMem);
+    auto output_blob_ptr = (reqBlob != outputBlob && remote_ptr != nullptr)
+        ? remote_ptr
+        : reqBlob->as<gpu::ClBlob>();
+    auto impl = getBlobImpl(output_blob_ptr);
+    if (!impl->is_allocated()) {
+        IE_THROW(NotAllocated) << str_output_not_allocated;
    }
+    auto outputMem = impl->getMemory();
+    _nw_ptr->set_output_memory(internalName, outputMem);
 }

 InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc) {