[GPU] Fix the new infer request to set output memory normally during enqueue stage (#12938)
* Fix the new infer request to set output memory normally during enqueue stage * Fix to use the generated outputsMap instead of graph API to retrieve outputID Signed-off-by: Andrew Park <andrew.park@intel.com>
This commit is contained in:
parent
5a42afb45b
commit
90093d5315
@ -522,15 +522,15 @@ void InferRequest::wait() {
|
|||||||
|
|
||||||
// wait for completion & collect outputs as requested by the model
|
// wait for completion & collect outputs as requested by the model
|
||||||
for (auto& no : _networkOutputs) {
|
for (auto& no : _networkOutputs) {
|
||||||
std::string outputID = m_graph->MapOutputName(no.first);
|
// In dynamic case, graph API must be used to retrieve outputID
|
||||||
|
// because it does not create outputsMap during SetGraph
|
||||||
|
std::string outputID = outputsMap.empty() ? m_graph->MapOutputName(no.first) : outputsMap.at(no.first);
|
||||||
auto outputMemory = internal_outputs.at(outputID).get_memory();
|
auto outputMemory = internal_outputs.at(outputID).get_memory();
|
||||||
|
|
||||||
auto node = findOutputByNodeName(no.first);
|
|
||||||
|
|
||||||
auto out_partial_shape = node->get_output_partial_shape(0);
|
|
||||||
size_t out_rank = out_partial_shape.rank().get_length();
|
|
||||||
|
|
||||||
if (_outputs.find(no.first) == _outputs.end()) {
|
if (_outputs.find(no.first) == _outputs.end()) {
|
||||||
|
auto node = findOutputByNodeName(no.first);
|
||||||
|
auto out_partial_shape = node->get_output_partial_shape(0);
|
||||||
|
size_t out_rank = out_partial_shape.rank().get_length();
|
||||||
auto mem_dims = outputMemory->get_layout().get_shape();
|
auto mem_dims = outputMemory->get_layout().get_shape();
|
||||||
auto precision = InferenceEngine::Precision::FP32;
|
auto precision = InferenceEngine::Precision::FP32;
|
||||||
auto dims = SizeVector(mem_dims.begin(), mem_dims.end());
|
auto dims = SizeVector(mem_dims.begin(), mem_dims.end());
|
||||||
@ -891,22 +891,19 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
|
|||||||
|
|
||||||
void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) {
|
void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_output");
|
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_output");
|
||||||
|
Blob::Ptr reqBlob = _deviceOutputs.at(outputName);
|
||||||
|
cldnn::primitive_id internalName = outputsMap[outputName];
|
||||||
|
auto _nw_ptr = m_graph->GetNetwork();
|
||||||
auto remote_ptr = outputBlob->as<gpu::ClBlob>();
|
auto remote_ptr = outputBlob->as<gpu::ClBlob>();
|
||||||
bool is_remote = remote_ptr != nullptr;
|
auto output_blob_ptr = (reqBlob != outputBlob && remote_ptr != nullptr)
|
||||||
if (is_remote) {
|
? remote_ptr
|
||||||
Blob::Ptr reqBlob = _deviceOutputs.at(outputName);
|
: reqBlob->as<gpu::ClBlob>();
|
||||||
cldnn::primitive_id internalName = outputsMap[outputName];
|
auto impl = getBlobImpl(output_blob_ptr);
|
||||||
auto _nw_ptr = m_graph->GetNetwork();
|
if (!impl->is_allocated()) {
|
||||||
auto output_blob_ptr = (reqBlob != outputBlob && remote_ptr != nullptr)
|
IE_THROW(NotAllocated) << str_output_not_allocated;
|
||||||
? remote_ptr
|
|
||||||
: reqBlob->as<gpu::ClBlob>();
|
|
||||||
auto impl = getBlobImpl(output_blob_ptr);
|
|
||||||
if (!impl->is_allocated()) {
|
|
||||||
IE_THROW(NotAllocated) << str_output_not_allocated;
|
|
||||||
}
|
|
||||||
auto outputMem = impl->getMemory();
|
|
||||||
_nw_ptr->set_output_memory(internalName, outputMem);
|
|
||||||
}
|
}
|
||||||
|
auto outputMem = impl->getMemory();
|
||||||
|
_nw_ptr->set_output_memory(internalName, outputMem);
|
||||||
}
|
}
|
||||||
|
|
||||||
InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc) {
|
InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc) {
|
||||||
|
Loading…
Reference in New Issue
Block a user