From 0b0202b90c4af3e59e955b43b77ee14b00e56d82 Mon Sep 17 00:00:00 2001 From: Evgeny Talanin Date: Tue, 2 Nov 2021 17:35:37 +0300 Subject: [PATCH] Revert "[GPU] Fix some performance degradations from breaking GPU pipeline into explicit stages (#8084)" (#8372) This reverts commit 03106e0cd9363bfc83b9183dcfd18fc05e4631c7. --- .../cldnn_async_infer_request.cpp | 20 ++++---- .../cldnn_engine/cldnn_executable_network.cpp | 3 +- .../src/cldnn_engine/cldnn_infer_request.cpp | 50 +++++++------------ .../src/cldnn_engine/cldnn_infer_request.h | 6 --- 4 files changed, 27 insertions(+), 52 deletions(-) diff --git a/inference-engine/src/cldnn_engine/cldnn_async_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_async_infer_request.cpp index 358d8c993a4..9e69ddeb0c8 100644 --- a/inference-engine/src/cldnn_engine/cldnn_async_infer_request.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_async_infer_request.cpp @@ -19,29 +19,27 @@ CLDNNPlugin::CLDNNAsyncInferRequest::CLDNNAsyncInferRequest(const CLDNNInferRequ OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::PreprocessingAndStartPipeline"); _inferRequest->preprocess(); _inferRequest->enqueue(); - _inferRequest->wait(); } }); - } else { - _pipeline.push_back({ _waitExecutor, - [this] { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::WaitPipeline"); - _inferRequest->wait_notify(); - } }); } + _pipeline.push_back({_waitExecutor, + [this] { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::WaitPipeline"); + _inferRequest->wait(); + }}); } void CLDNNPlugin::CLDNNAsyncInferRequest::Infer_ThreadUnsafe() { if (_inferRequest->use_external_queue()) { - _inferRequest->preprocess_notify(); - _inferRequest->enqueue_notify(); + _inferRequest->preprocess(); + _inferRequest->enqueue(); } Parent::Infer_ThreadUnsafe(); } void CLDNNPlugin::CLDNNAsyncInferRequest::StartAsync_ThreadUnsafe() { if (_inferRequest->use_external_queue()) { - _inferRequest->preprocess_notify(); - _inferRequest->enqueue_notify(); + _inferRequest->preprocess(); + _inferRequest->enqueue(); } Parent::StartAsync_ThreadUnsafe(); } diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp index c554ee1c42f..7e465f0f257 100644 --- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp @@ -48,8 +48,7 @@ CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::sh }()}, m_config(config), m_taskExecutor{ _taskExecutor }, - m_waitExecutor(InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor", - config.throughput_streams > 1 ? config.throughput_streams : 1 })) { + m_waitExecutor(InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { auto casted_context = std::dynamic_pointer_cast(context); if (nullptr == casted_context) { diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp index 92ba0ee3fdd..c71acf4e6d9 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp @@ -471,32 +471,28 @@ CLDNNInferRequest::CLDNNInferRequest(const std::vector(_exeNetwork.get())->m_graphs; + if (nullptr != streamExecutor) { + streamID = streamExecutor->GetStreamId(); + int numGraphs = streamGraphs.size(); + streamID = streamID % numGraphs; + } + m_graph = streamGraphs[streamID]; + m_graph->wait(CLDNNGraph::Stage::PREPROC); if (m_graph->GetMaxDynamicBatchSize() > 1) { preprocess_dynamic(); - } else { - execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP + return; } + execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP m_graph->notify(CLDNNGraph::Stage::PREPROC); } -void CLDNNInferRequest::preprocess() { - setStreamGraph(); - if (m_graph->GetMaxDynamicBatchSize() > 1) { - preprocess_dynamic(); - } else { - execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP - } -} - -void CLDNNInferRequest::enqueue_notify() { - m_graph->wait(CLDNNGraph::Stage::EXECUTE); - enqueue(); -} - void CLDNNInferRequest::enqueue() { + m_graph->wait(CLDNNGraph::Stage::EXECUTE); if (m_graph->GetMaxDynamicBatchSize() > 1) { enqueue_dynamic(); return; @@ -545,11 +541,6 @@ void CLDNNInferRequest::enqueue() { internal_outputs = m_graph->GetNetwork()->execute(dependencies); } -void CLDNNInferRequest::wait_notify() { - wait(); - m_graph->notify(CLDNNGraph::Stage::EXECUTE); -} - void CLDNNInferRequest::wait() { if (m_graph->GetMaxDynamicBatchSize() > 1) { wait_dynamic(); @@ -577,11 +568,13 @@ void CLDNNInferRequest::wait() { if (m_useProfiling) { m_graph->UpdatePerfStatistics(); } + m_graph->notify(CLDNNGraph::Stage::EXECUTE); } void CLDNNInferRequest::preprocess_dynamic() { // execute input pre-processing. execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP + m_graph->notify(CLDNNGraph::Stage::PREPROC); } void CLDNNInferRequest::enqueue_dynamic() { @@ -626,21 +619,12 @@ void CLDNNInferRequest::wait_dynamic() { } } } + m_graph->notify(CLDNNGraph::Stage::EXECUTE); } // ----------------------------------------------------------------------------------------- // // ---------------------------- internal utils --------- ----------------------------------- // // ----------------------------------------------------------------------------------------- // -void CLDNNInferRequest::setStreamGraph() { - int streamID = 0; - auto& streamGraphs = static_cast(_exeNetwork.get())->m_graphs; - if (nullptr != streamExecutor) { - streamID = streamExecutor->GetStreamId(); - int numGraphs = streamGraphs.size(); - streamID = streamID % numGraphs; - } - m_graph = streamGraphs[streamID]; -} Blob::Ptr CLDNNInferRequest::create_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::create_host_blob"); diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.h b/inference-engine/src/cldnn_engine/cldnn_infer_request.h index a4eff5b0c91..72c924b015e 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.h +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.h @@ -49,10 +49,6 @@ public: void EnableProfiling() { m_useProfiling = true; } void EnableStreams() { m_useStreams = true; } - void preprocess_notify(); - void enqueue_notify(); - void wait_notify(); - void preprocess(); void enqueue(); void wait(); @@ -96,8 +92,6 @@ private: void allocate_inputs_dynamic(); void allocate_outputs_dynamic(); - void setStreamGraph(); - std::map internal_outputs; std::vector> internal_outputs_dynamic; };