From 08dc2cf1d6c657dd19899fcc2760c7cff759a1bb Mon Sep 17 00:00:00 2001 From: Xuejun Zhai Date: Wed, 21 Jun 2023 10:11:14 +0800 Subject: [PATCH] [AUTO BATCH PLUGIN] renaming the class name in auto batch plugin & remove namespace (#18145) * [AUTO BATCH PLUGIN] change namespace AutoBatchPlugin to ov::auto_batch_plugin & remove macro MockAutoBatchPlugin Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] change class AutoBatchInferencePlugin to Plugin Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] change class AutoBatchExecutableNetwork to CompiledModel & class member naming style Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] change class AutoBatchInferRequest to SyncInferRequest & class member naming style Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] change class AutoBatchAsyncInferRequest to AsyncInferRequest & class member naming style Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] fix code format issues Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] remove name space InferenceEngine Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] remove explict & change name network to model Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] remove name space MockAutoBatchPlugin Signed-off-by: Zhai, Xuejun * [AUTO BATCH PLUGIN] fix static build issue Signed-off-by: Zhai, Xuejun --------- Signed-off-by: Zhai, Xuejun --- .../auto_batch/src/async_infer_request.cpp | 67 +++-- .../auto_batch/src/async_infer_request.hpp | 26 +- src/plugins/auto_batch/src/compiled_model.cpp | 186 ++++++------- src/plugins/auto_batch/src/compiled_model.hpp | 62 +++-- src/plugins/auto_batch/src/plugin.cpp | 95 +++---- src/plugins/auto_batch/src/plugin.hpp | 27 +- .../auto_batch/src/sync_infer_request.cpp | 258 +++++++++--------- .../auto_batch/src/sync_infer_request.hpp | 57 ++-- .../unit/auto_batch_infer_request_tests.cpp | 92 +++---- .../tests/unit/create_infer_request_tests.cpp | 33 ++- .../tests/unit/exec_network_tests.cpp | 46 ++-- .../tests/unit/load_network_tests.cpp | 13 +- .../tests/unit/mock_auto_batch_plugin.hpp | 9 +- .../auto_batch/tests/unit/plugins_tests.cpp | 19 +- 14 files changed, 510 insertions(+), 480 deletions(-) diff --git a/src/plugins/auto_batch/src/async_infer_request.cpp b/src/plugins/auto_batch/src/async_infer_request.cpp index f14666ed943..de26b22b577 100644 --- a/src/plugins/auto_batch/src/async_infer_request.cpp +++ b/src/plugins/auto_batch/src/async_infer_request.cpp @@ -6,23 +6,21 @@ #include "async_infer_request.hpp" -namespace AutoBatchPlugin { +namespace ov { +namespace autobatch_plugin { -using namespace InferenceEngine; - -AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest( - const AutoBatchInferRequest::Ptr& inferRequest, - InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch, - const ITaskExecutor::Ptr& callbackExecutor) +AsyncInferRequest::AsyncInferRequest(const SyncInferRequest::Ptr& inferRequest, + InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch, + const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) : AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor), - _inferRequestWithoutBatch(inferRequestWithoutBatch), - _inferRequest{inferRequest} { + m_infer_request_without_batch(inferRequestWithoutBatch), + m_sync_infer_request{inferRequest} { // this executor starts the inference while the task (checking the result) is passed to the next stage - struct ThisRequestExecutor : public ITaskExecutor { - explicit ThisRequestExecutor(AutoBatchAsyncInferRequest* _this_) : _this{_this_} {} - void run(Task task) override { - auto& workerInferRequest = _this->_inferRequest->_myBatchedRequestWrapper; - std::pair t; + struct ThisRequestExecutor : public InferenceEngine::ITaskExecutor { + explicit ThisRequestExecutor(AsyncInferRequest* _this_) : _this{_this_} {} + void run(InferenceEngine::Task task) override { + auto& workerInferRequest = _this->m_sync_infer_request->m_batched_request_wrapper; + std::pair t; t.first = _this; t.second = std::move(task); workerInferRequest._tasks.push(t); @@ -32,35 +30,36 @@ AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest( workerInferRequest._cond.notify_one(); } }; - AutoBatchAsyncInferRequest* _this = nullptr; + AsyncInferRequest* _this = nullptr; }; - _pipeline = {{/*TaskExecutor*/ std::make_shared(this), /*task*/ [this] { - if (this->_inferRequest->_exceptionPtr) // if the exception happened in the batch1 fallback - std::rethrow_exception(this->_inferRequest->_exceptionPtr); - auto& batchReq = this->_inferRequest->_myBatchedRequestWrapper; - if (batchReq._exceptionPtr) // when the batchN execution failed - std::rethrow_exception(batchReq._exceptionPtr); - // in the case of non-batched execution the blobs were set explicitly - if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == - this->_inferRequest->_wasBatchedRequestUsed) - this->_inferRequest->CopyOutputsIfNeeded(); - }}}; + _pipeline = { + {/*TaskExecutor*/ std::make_shared(this), /*task*/ [this] { + if (this->m_sync_infer_request->m_exceptionPtr) // if the exception happened in the batch1 fallback + std::rethrow_exception(this->m_sync_infer_request->m_exceptionPtr); + auto& batchReq = this->m_sync_infer_request->m_batched_request_wrapper; + if (batchReq.m_exceptionPtr) // when the batchN execution failed + std::rethrow_exception(batchReq.m_exceptionPtr); + // in the case of non-batched execution the blobs were set explicitly + if (SyncInferRequest::eExecutionFlavor::BATCH_EXECUTED == + this->m_sync_infer_request->m_batched_request_status) + this->m_sync_infer_request->CopyOutputsIfNeeded(); + }}}; } -std::map AutoBatchAsyncInferRequest::GetPerformanceCounts() - const { +std::map AsyncInferRequest::GetPerformanceCounts() const { CheckState(); - if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == _inferRequest->_wasBatchedRequestUsed) - return _inferRequest->_myBatchedRequestWrapper._inferRequestBatched->GetPerformanceCounts(); + if (SyncInferRequest::eExecutionFlavor::BATCH_EXECUTED == m_sync_infer_request->m_batched_request_status) + return m_sync_infer_request->m_batched_request_wrapper._inferRequestBatched->GetPerformanceCounts(); else - return _inferRequestWithoutBatch->GetPerformanceCounts(); + return m_infer_request_without_batch->GetPerformanceCounts(); } -void AutoBatchAsyncInferRequest::Infer_ThreadUnsafe() { +void AsyncInferRequest::Infer_ThreadUnsafe() { InferUsingAsync(); } -AutoBatchAsyncInferRequest::~AutoBatchAsyncInferRequest() { +AsyncInferRequest::~AsyncInferRequest() { StopAndWait(); } -} // namespace AutoBatchPlugin \ No newline at end of file +} // namespace autobatch_plugin +} // namespace ov \ No newline at end of file diff --git a/src/plugins/auto_batch/src/async_infer_request.hpp b/src/plugins/auto_batch/src/async_infer_request.hpp index c6956f8114e..ec6009f9d8c 100644 --- a/src/plugins/auto_batch/src/async_infer_request.hpp +++ b/src/plugins/auto_batch/src/async_infer_request.hpp @@ -7,19 +7,25 @@ #include "cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp" #include "sync_infer_request.hpp" -namespace AutoBatchPlugin { -class AutoBatchAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { +namespace ov { +namespace autobatch_plugin { +class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { public: - using Ptr = std::shared_ptr; + using Ptr = std::shared_ptr; + + explicit AsyncInferRequest(const SyncInferRequest::Ptr& inferRequest, + InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch, + const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); - explicit AutoBatchAsyncInferRequest(const AutoBatchInferRequest::Ptr& inferRequest, - InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); void Infer_ThreadUnsafe() override; - virtual ~AutoBatchAsyncInferRequest(); + + virtual ~AsyncInferRequest(); + std::map GetPerformanceCounts() const override; - InferenceEngine::SoIInferRequestInternal _inferRequestWithoutBatch; - AutoBatchInferRequest::Ptr _inferRequest; + InferenceEngine::SoIInferRequestInternal m_infer_request_without_batch; + + SyncInferRequest::Ptr m_sync_infer_request; }; -} // namespace AutoBatchPlugin \ No newline at end of file +} // namespace autobatch_plugin +} // namespace ov \ No newline at end of file diff --git a/src/plugins/auto_batch/src/compiled_model.cpp b/src/plugins/auto_batch/src/compiled_model.cpp index e08ead91de0..72354f4d0e9 100644 --- a/src/plugins/auto_batch/src/compiled_model.cpp +++ b/src/plugins/auto_batch/src/compiled_model.cpp @@ -9,90 +9,89 @@ #include "ie_performance_hints.hpp" #include "sync_infer_request.hpp" -namespace AutoBatchPlugin { -using namespace InferenceEngine; -AutoBatchExecutableNetwork::AutoBatchExecutableNetwork( - const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch, - const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch, - const DeviceInformation& networkDevice, - const std::unordered_map& config, - const std::set& batchedInputs, - const std::set& batchedOutputs) +namespace ov { +namespace autobatch_plugin { +CompiledModel::CompiledModel(const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch, + const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch, + const DeviceInformation& networkDevice, + const std::unordered_map& config, + const std::set& batchedInputs, + const std::set& batchedOutputs) : InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, std::make_shared()), - _network{networkWithBatch}, - _networkWithoutBatch{networkWithoutBatch}, - _config{config}, - _batchedInputs(batchedInputs), - _batchedOutputs(batchedOutputs) { + m_model_with_batch{networkWithBatch}, + m_model_without_batch{networkWithoutBatch}, + m_config{config}, + m_batched_inputs(batchedInputs), + m_batched_outputs(batchedOutputs) { // WA for gcc 4.8 ( fails compilation with member init-list) - _device = networkDevice; + m_device_info = networkDevice; auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT)); IE_ASSERT(time_out != config.end()); - _timeOut = ParseTimeoutValue(time_out->second.as()); + m_timeout = ParseTimeoutValue(time_out->second.as()); } -AutoBatchExecutableNetwork::~AutoBatchExecutableNetwork() { - _terminate = true; - for (const auto& w : _workerRequests) { +CompiledModel::~CompiledModel() { + m_terminate = true; + for (const auto& w : m_worker_requests) { w->_thread.join(); } - _workerRequests.clear(); + m_worker_requests.clear(); } -unsigned int AutoBatchExecutableNetwork::ParseTimeoutValue(const std::string& s) { +unsigned int CompiledModel::ParseTimeoutValue(const std::string& s) { auto val = std::stoi(s); if (val < 0) IE_THROW(ParameterMismatch) << "Value for the " << CONFIG_KEY(AUTO_BATCH_TIMEOUT) << " should be unsigned int"; return val; } -std::shared_ptr AutoBatchExecutableNetwork::GetContext() const { - return _networkWithoutBatch->GetContext(); +std::shared_ptr CompiledModel::GetContext() const { + return m_model_without_batch->GetContext(); } -InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl( +InferenceEngine::IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl( InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs) { auto workerRequestPtrAndId = GetWorkerInferRequest(); - return std::make_shared(networkInputs, - networkOutputs, - workerRequestPtrAndId.first, - workerRequestPtrAndId.second, - _device.batchForDevice, - _batchedInputs, - _batchedOutputs); + return std::make_shared(networkInputs, + networkOutputs, + workerRequestPtrAndId.first, + workerRequestPtrAndId.second, + m_device_info.batch_for_device, + m_batched_inputs, + m_batched_outputs); } -InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl( +InferenceEngine::IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl( const std::vector>& inputs, const std::vector>& outputs) { if (!this->_plugin || !_plugin->IsNewAPI()) return nullptr; auto workerRequestPtrAndId = GetWorkerInferRequest(); - return std::make_shared(inputs, - outputs, - workerRequestPtrAndId.first, - workerRequestPtrAndId.second, - _device.batchForDevice, - _batchedInputs, - _batchedOutputs); + return std::make_shared(inputs, + outputs, + workerRequestPtrAndId.first, + workerRequestPtrAndId.second, + m_device_info.batch_for_device, + m_batched_inputs, + m_batched_outputs); } -std::pair AutoBatchExecutableNetwork::GetWorkerInferRequest() { - auto num = _numRequestsCreated++; - std::lock_guard lock(_workerRequestsMutex); - auto batch_id = num % _device.batchForDevice; +std::pair CompiledModel::GetWorkerInferRequest() { + auto num = m_num_requests_created++; + std::lock_guard lock(m_worker_requests_mutex); + auto batch_id = num % m_device_info.batch_for_device; if (!batch_id) { // need new request - _workerRequests.push_back(std::make_shared()); - auto workerRequestPtr = _workerRequests.back().get(); - workerRequestPtr->_inferRequestBatched = {_network->CreateInferRequest(), _network._so}; - workerRequestPtr->_batchSize = _device.batchForDevice; + m_worker_requests.push_back(std::make_shared()); + auto workerRequestPtr = m_worker_requests.back().get(); + workerRequestPtr->_inferRequestBatched = {m_model_with_batch->CreateInferRequest(), m_model_with_batch._so}; + workerRequestPtr->_batchSize = m_device_info.batch_for_device; workerRequestPtr->_completionTasks.resize(workerRequestPtr->_batchSize); workerRequestPtr->_inferRequestBatched->SetCallback( [workerRequestPtr](std::exception_ptr exceptionPtr) mutable { if (exceptionPtr) - workerRequestPtr->_exceptionPtr = exceptionPtr; + workerRequestPtr->m_exceptionPtr = exceptionPtr; IE_ASSERT(workerRequestPtr->_completionTasks.size() == (size_t)workerRequestPtr->_batchSize); // notify the individual requests on the completion for (int c = 0; c < workerRequestPtr->_batchSize; c++) { @@ -107,45 +106,46 @@ std::pair AutoBatchExecuta std::cv_status status; { std::unique_lock lock(workerRequestPtr->_mutex); - status = workerRequestPtr->_cond.wait_for(lock, std::chrono::milliseconds(_timeOut)); + status = workerRequestPtr->_cond.wait_for(lock, std::chrono::milliseconds(m_timeout)); } - if (_terminate) { + if (m_terminate) { break; } else { // as we pop the tasks from the queue only here // it is ok to call size() (as the _tasks can only grow in parallel) const int sz = static_cast(workerRequestPtr->_tasks.size()); if (sz == workerRequestPtr->_batchSize) { - std::pair t; + std::pair t; for (int n = 0; n < sz; n++) { IE_ASSERT(workerRequestPtr->_tasks.try_pop(t)); workerRequestPtr->_completionTasks[n] = std::move(t.second); - t.first->_inferRequest->CopyInputsIfNeeded(); - t.first->_inferRequest->_wasBatchedRequestUsed = - AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED; + t.first->m_sync_infer_request->CopyInputsIfNeeded(); + t.first->m_sync_infer_request->m_batched_request_status = + SyncInferRequest::eExecutionFlavor::BATCH_EXECUTED; } workerRequestPtr->_inferRequestBatched->StartAsync(); } else if ((status == std::cv_status::timeout) && sz) { // timeout to collect the batch is over, have to execute the requests in the batch1 mode - std::pair t; + std::pair t; // popping all tasks collected by the moment of the time-out and execute each with batch1 std::atomic arrived = {0}; std::promise all_completed; auto all_completed_future = all_completed.get_future(); for (int n = 0; n < sz; n++) { IE_ASSERT(workerRequestPtr->_tasks.try_pop(t)); - t.first->_inferRequestWithoutBatch->SetCallback( + t.first->m_infer_request_without_batch->SetCallback( [t, sz, &arrived, &all_completed](std::exception_ptr p) { if (p) - t.first->_inferRequest->_exceptionPtr = p; + t.first->m_sync_infer_request->m_exceptionPtr = p; t.second(); if (sz == ++arrived) all_completed.set_value(); }); - t.first->_inferRequest->_wasBatchedRequestUsed = - AutoBatchInferRequest::eExecutionFlavor::TIMEOUT_EXECUTED; - t.first->_inferRequest->SetBlobsToAnotherRequest(t.first->_inferRequestWithoutBatch); - t.first->_inferRequestWithoutBatch->StartAsync(); + t.first->m_sync_infer_request->m_batched_request_status = + SyncInferRequest::eExecutionFlavor::TIMEOUT_EXECUTED; + t.first->m_sync_infer_request->SetBlobsToAnotherRequest( + t.first->m_infer_request_without_batch); + t.first->m_infer_request_without_batch->StartAsync(); } all_completed_future.get(); // now when all the tasks for this batch are completed, start waiting for the timeout again @@ -154,77 +154,78 @@ std::pair AutoBatchExecuta } }); } - return {*_workerRequests.back(), static_cast(batch_id)}; + return {*m_worker_requests.back(), static_cast(batch_id)}; } -InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequest() { - if (!_network) { - auto res = _networkWithoutBatch->CreateInferRequest(); +InferenceEngine::IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() { + if (!m_model_with_batch) { + auto res = m_model_without_batch->CreateInferRequest(); res->setPointerToExecutableNetworkInternal(shared_from_this()); - res->setPointerToSo(_networkWithoutBatch._so); - _so = _networkWithoutBatch._so; + res->setPointerToSo(m_model_without_batch._so); + _so = m_model_without_batch._so; return res; } // trying to create the new API request first - IInferRequestInternal::Ptr syncRequestImpl = CreateInferRequestImpl(_parameters, _results); + InferenceEngine::IInferRequestInternal::Ptr syncRequestImpl = CreateInferRequestImpl(_parameters, _results); if (!syncRequestImpl) syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs); syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this()); - InferenceEngine::SoIInferRequestInternal inferRequestWithoutBatch = {_networkWithoutBatch->CreateInferRequest(), - _networkWithoutBatch._so}; - return std::make_shared( - std::static_pointer_cast(syncRequestImpl), - inferRequestWithoutBatch, - _callbackExecutor); + InferenceEngine::SoIInferRequestInternal inferRequestWithoutBatch = {m_model_without_batch->CreateInferRequest(), + m_model_without_batch._so}; + return std::make_shared(std::static_pointer_cast(syncRequestImpl), + inferRequestWithoutBatch, + _callbackExecutor); } -std::shared_ptr AutoBatchExecutableNetwork::GetExecGraphInfo() { - return _network && _network->GetExecGraphInfo() ? _network->GetExecGraphInfo() - : _networkWithoutBatch->GetExecGraphInfo(); +std::shared_ptr CompiledModel::GetExecGraphInfo() { + return m_model_with_batch && m_model_with_batch->GetExecGraphInfo() ? m_model_with_batch->GetExecGraphInfo() + : m_model_without_batch->GetExecGraphInfo(); } -void AutoBatchExecutableNetwork::SetConfig(const std::map& user_config) { +void CompiledModel::SetConfig(const std::map& user_config) { auto timeout = user_config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT)); if (timeout == user_config.end() || user_config.size() > 1) { IE_THROW() << "The only config that can be changed on the fly for the AutoBatching the is the " << CONFIG_KEY(AUTO_BATCH_TIMEOUT); } else { - _timeOut = ParseTimeoutValue(timeout->second.as()); + m_timeout = ParseTimeoutValue(timeout->second.as()); } } -InferenceEngine::Parameter AutoBatchExecutableNetwork::GetConfig(const std::string& name) const { - auto it = _config.find(name); - if (it != _config.end()) { +InferenceEngine::Parameter CompiledModel::GetConfig(const std::string& name) const { + auto it = m_config.find(name); + if (it != m_config.end()) { return it->second; } else { // find config key among networks config keys - auto param = _networkWithoutBatch->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + auto param = m_model_without_batch->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); for (auto&& configKey : param.as>()) { if (configKey == name) { - return _networkWithoutBatch->GetConfig(configKey); + return m_model_without_batch->GetConfig(configKey); } } IE_THROW(NotFound) << name << " not found in the ExecutableNetwork config"; } } -InferenceEngine::Parameter AutoBatchExecutableNetwork::GetMetric(const std::string& name) const { +InferenceEngine::Parameter CompiledModel::GetMetric(const std::string& name) const { if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) { auto reqs = 0; try { - auto hint = _networkWithoutBatch->GetConfig(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as(); + auto hint = m_model_without_batch->GetConfig(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as(); reqs = InferenceEngine::PerfHintsConfig::CheckPerformanceHintRequestValue(hint); if (!reqs) // no limitations from user, let's deduce the full blown #requests // (multiplied by the devices capabilities to run multiple requests for further perf) - reqs = _device.batchForDevice * - _networkWithoutBatch->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as(); + reqs = + m_device_info.batch_for_device * + m_model_without_batch->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as(); } catch (const InferenceEngine::Exception&) { } - reqs = std::max(reqs, _device.batchForDevice); // round up to the possible user's value + reqs = std::max(reqs, m_device_info.batch_for_device); // round up to the possible user's value IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, reqs); } else if (name == METRIC_KEY(NETWORK_NAME)) { - IE_SET_METRIC_RETURN(NETWORK_NAME, _networkWithoutBatch->GetMetric(METRIC_KEY(NETWORK_NAME)).as()); + IE_SET_METRIC_RETURN(NETWORK_NAME, + m_model_without_batch->GetMetric(METRIC_KEY(NETWORK_NAME)).as()); } else if (name == METRIC_KEY(SUPPORTED_METRICS)) { IE_SET_METRIC_RETURN(SUPPORTED_METRICS, {METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS), @@ -236,9 +237,10 @@ InferenceEngine::Parameter AutoBatchExecutableNetwork::GetMetric(const std::stri IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, {CONFIG_KEY(AUTO_BATCH_TIMEOUT)}); // only timeout can be changed on the fly } else if (name == ov::execution_devices) { - return _networkWithoutBatch->GetMetric(name); + return m_model_without_batch->GetMetric(name); } else { IE_THROW() << "Unsupported Network metric: " << name; } } -} // namespace AutoBatchPlugin \ No newline at end of file +} // namespace autobatch_plugin +} // namespace ov \ No newline at end of file diff --git a/src/plugins/auto_batch/src/compiled_model.hpp b/src/plugins/auto_batch/src/compiled_model.hpp index eeaaed5d8d8..de970dcf36a 100644 --- a/src/plugins/auto_batch/src/compiled_model.hpp +++ b/src/plugins/auto_batch/src/compiled_model.hpp @@ -12,64 +12,72 @@ #include "plugin.hpp" #include "threading/ie_thread_safe_containers.hpp" -namespace AutoBatchPlugin { +namespace ov { +namespace autobatch_plugin { -class AutoBatchAsyncInferRequest; +class AsyncInferRequest; -class AutoBatchExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault { +class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault { public: - using Ptr = std::shared_ptr; + using Ptr = std::shared_ptr; struct WorkerInferRequest { using Ptr = std::shared_ptr; InferenceEngine::SoIInferRequestInternal _inferRequestBatched; int _batchSize; - InferenceEngine::ThreadSafeQueueWithSize> _tasks; + InferenceEngine::ThreadSafeQueueWithSize> _tasks; std::vector _completionTasks; std::thread _thread; std::condition_variable _cond; std::mutex _mutex; - std::exception_ptr _exceptionPtr; + std::exception_ptr m_exceptionPtr; }; - explicit AutoBatchExecutableNetwork( - const InferenceEngine::SoExecutableNetworkInternal& networkForDevice, - const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch, - const DeviceInformation& networkDevices, - const std::unordered_map& config, - const std::set& batchedIntputs, - const std::set& batchedOutputs); + CompiledModel(const InferenceEngine::SoExecutableNetworkInternal& networkForDevice, + const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch, + const DeviceInformation& networkDevices, + const std::unordered_map& config, + const std::set& batchedIntputs, + const std::set& batchedOutputs); void SetConfig(const std::map& config) override; + InferenceEngine::Parameter GetConfig(const std::string& name) const override; + InferenceEngine::Parameter GetMetric(const std::string& name) const override; + InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override; + InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl( InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs) override; + InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl( const std::vector>& inputs, const std::vector>& outputs) override; + std::shared_ptr GetContext() const override; + std::shared_ptr GetExecGraphInfo() override; - virtual ~AutoBatchExecutableNetwork(); + + virtual ~CompiledModel(); protected: static unsigned int ParseTimeoutValue(const std::string&); - std::atomic_bool _terminate = {false}; - DeviceInformation _device; - InferenceEngine::SoExecutableNetworkInternal _network; - InferenceEngine::SoExecutableNetworkInternal _networkWithoutBatch; + std::atomic_bool m_terminate = {false}; + DeviceInformation m_device_info; + InferenceEngine::SoExecutableNetworkInternal m_model_with_batch; + InferenceEngine::SoExecutableNetworkInternal m_model_without_batch; std::pair GetWorkerInferRequest(); - std::vector _workerRequests; - std::mutex _workerRequestsMutex; + std::vector m_worker_requests; + std::mutex m_worker_requests_mutex; - std::unordered_map _config; - bool _needPerfCounters = false; - std::atomic_size_t _numRequestsCreated = {0}; - std::atomic_int _timeOut = {0}; // in ms + std::unordered_map m_config; + std::atomic_size_t m_num_requests_created = {0}; + std::atomic_int m_timeout = {0}; // in ms - const std::set _batchedInputs; - const std::set _batchedOutputs; + const std::set m_batched_inputs; + const std::set m_batched_outputs; }; -} // namespace AutoBatchPlugin \ No newline at end of file +} // namespace autobatch_plugin +} // namespace ov \ No newline at end of file diff --git a/src/plugins/auto_batch/src/plugin.cpp b/src/plugins/auto_batch/src/plugin.cpp index c2709fca2d9..b887c55754d 100644 --- a/src/plugins/auto_batch/src/plugin.cpp +++ b/src/plugins/auto_batch/src/plugin.cpp @@ -19,8 +19,8 @@ #include "transformations/init_node_info.hpp" #include "transformations/utils/utils.hpp" -namespace AutoBatchPlugin { -using namespace InferenceEngine; +namespace ov { +namespace autobatch_plugin { std::vector supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), ov::device::priorities.name(), @@ -38,7 +38,7 @@ std::map mergeConfigs(std::map& user_config) const { +DeviceInformation Plugin::ParseMetaDevice(const std::string& devicesBatchCfg, + const std::map& user_config) const { auto metaDevice = ParseBatchDevice(devicesBatchCfg); - metaDevice.config = GetCore()->GetSupportedConfig(metaDevice.deviceName, user_config); + metaDevice.config = GetCore()->GetSupportedConfig(metaDevice.device_name, user_config); // check that no irrelevant config-keys left for (const auto& k : user_config) { @@ -72,7 +71,7 @@ DeviceInformation AutoBatchInferencePlugin::ParseMetaDevice( return metaDevice; } -RemoteContext::Ptr AutoBatchInferencePlugin::CreateContext(const InferenceEngine::ParamMap& remote_properties) { +InferenceEngine::RemoteContext::Ptr Plugin::CreateContext(const InferenceEngine::ParamMap& remote_properties) { auto cfg = remote_properties; auto it = cfg.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG)); if (it == cfg.end()) @@ -86,11 +85,12 @@ RemoteContext::Ptr AutoBatchInferencePlugin::CreateContext(const InferenceEngine return nullptr; auto metaDevice = ParseMetaDevice(val, std::map()); cfg.erase(it); - return core->CreateContext(metaDevice.deviceName, cfg); + return core->CreateContext(metaDevice.device_name, cfg); } -Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name, - const std::map& user_options) const { +InferenceEngine::Parameter Plugin::GetConfig( + const std::string& name, + const std::map& user_options) const { if (supported_configKeys.end() != std::find(supported_configKeys.begin(), supported_configKeys.end(), name)) { auto it = _config.find(name); if (it == _config.end()) { @@ -103,7 +103,7 @@ Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name, } } -void AutoBatchInferencePlugin::CheckConfig(const std::map& user_config) { +void Plugin::CheckConfig(const std::map& user_config) { for (auto&& kvp : user_config) { const auto name = kvp.first; const auto val = kvp.second; @@ -124,22 +124,22 @@ void AutoBatchInferencePlugin::CheckConfig(const std::map& user_config) { +void Plugin::SetConfig(const std::map& user_config) { CheckConfig(user_config); for (auto&& kvp : user_config) { _config[kvp.first] = kvp.second; } } -static const Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoBatchPlugin"}; -IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoBatchInferencePlugin, version) +static const InferenceEngine::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoBatchPlugin"}; +IE_DEFINE_PLUGIN_CREATE_FUNCTION(Plugin, version) -AutoBatchInferencePlugin::AutoBatchInferencePlugin() { +Plugin::Plugin() { _pluginName = "BATCH"; _config[CONFIG_KEY(AUTO_BATCH_TIMEOUT)] = "1000"; // default value, in ms } -InferenceEngine::Parameter AutoBatchInferencePlugin::GetMetric( +InferenceEngine::Parameter Plugin::GetMetric( const std::string& name, const std::map& user_options) const { if (name == METRIC_KEY(SUPPORTED_METRICS)) { @@ -157,13 +157,13 @@ InferenceEngine::Parameter AutoBatchInferencePlugin::GetMetric( } } -IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl( +InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl( const InferenceEngine::CNNNetwork& network, const std::map& user_config) { return LoadNetworkImpl(network, nullptr, user_config); } -InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadNetworkImpl( +InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadNetworkImpl( const InferenceEngine::CNNNetwork& network, const std::shared_ptr ctx, const std::map& user_config) { @@ -179,7 +179,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN IE_THROW() << "KEY_AUTO_BATCH key is not set for BATCH device"; } auto metaDevice = ParseMetaDevice(device_batch->second, user_config); - const auto& deviceName = metaDevice.deviceName; + const auto& deviceName = metaDevice.device_name; const auto& deviceConfig = metaDevice.config; auto deviceConfigNoAutoBatch = deviceConfig; // avoid recursive auto-batching @@ -196,7 +196,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput); // if the auto-batching is enabled implicitly, check the dims carefully, to avoid outstanding failures const bool check_dims = (bTputInPlg || bTputInLoadCfg); - CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network)); + InferenceEngine::CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network)); auto function = clonedNetwork.getFunction(); // find the batch dim ov::pass::Manager m; @@ -252,10 +252,10 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN IE_THROW(NotImplemented) << "Auto-batching supports only networks with inputs/outputs featuring batched dim!"; } catch (const InferenceEngine::Exception&) { - metaDevice.batchForDevice = 1; + metaDevice.batch_for_device = 1; } - if (!metaDevice.batchForDevice) { + if (!metaDevice.batch_for_device) { unsigned int requests = 0; // batch size is not set explicitly via device name e.g. BATCH:GPU(4) // let's query the optimal batch size @@ -263,19 +263,20 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN options["MODEL_PTR"] = std::const_pointer_cast(network.getFunction()); auto optBatchSize = core->GetMetric(deviceName, METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as(); auto res = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as(); - requests = PerfHintsConfig::CheckPerformanceHintRequestValue(res); + requests = InferenceEngine::PerfHintsConfig::CheckPerformanceHintRequestValue(res); const auto& reqs = user_config.find(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)); if (reqs != user_config.end()) - requests = static_cast(PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second)); + requests = static_cast( + InferenceEngine::PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second)); if (requests) optBatchSize = std::max(1u, std::min(requests, optBatchSize)); if (optBatchSize > 2) // batching is usually in-efficient for batch<4 (as batch1 kernels are heavily optimized) - metaDevice.batchForDevice = optBatchSize; + metaDevice.batch_for_device = optBatchSize; else - metaDevice.batchForDevice = 1; + metaDevice.batch_for_device = 1; } - auto report_footprint = [](std::shared_ptr pCore, std::string device) -> size_t { + auto report_footprint = [](std::shared_ptr pCore, std::string device) -> size_t { size_t footprint = 0; // TODO: use the per-network metric (22.2) rather than plugin-level auto stats = @@ -296,9 +297,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN const auto total_mem = GetCore()->GetMetric(deviceName, GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)).as(); const int estimated_batch = static_cast((total_mem - batch1_footprint) / batch1_footprint); - int closest = static_cast(pow(2, floor(log(estimated_batch) / log(2)))); + int closest = static_cast(pow(2, floor(std::log(estimated_batch) / std::log(2)))); closest = std::max(1, closest); - metaDevice.batchForDevice = std::min(metaDevice.batchForDevice, closest); + metaDevice.batch_for_device = std::min(metaDevice.batch_for_device, closest); } } // auto-batch settings @@ -309,38 +310,37 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN } InferenceEngine::SoExecutableNetworkInternal executableNetworkWithBatch; - if (metaDevice.batchForDevice > 1 && batched_inputs.size()) { + if (metaDevice.batch_for_device > 1 && batched_inputs.size()) { try { - CNNNetwork reshaped(InferenceEngine::details::cloneNetwork(network)); - ICNNNetwork::InputShapes shapes = reshaped.getInputShapes(); + InferenceEngine::CNNNetwork reshaped(InferenceEngine::details::cloneNetwork(network)); + InferenceEngine::ICNNNetwork::InputShapes shapes = reshaped.getInputShapes(); for (const auto& input : batched_inputs) - shapes[input][0] = metaDevice.batchForDevice; + shapes[input][0] = metaDevice.batch_for_device; reshaped.reshape(shapes); executableNetworkWithBatch = ctx ? core->LoadNetwork(reshaped, ctx, deviceConfigNoAutoBatch) : core->LoadNetwork(reshaped, deviceName, deviceConfigNoAutoBatch); } catch (const InferenceEngine::Exception&) { - metaDevice.batchForDevice = 1; + metaDevice.batch_for_device = 1; } } - return std::make_shared(executableNetworkWithBatch, - executableNetworkWithoutBatch, - metaDevice, - networkConfig, - batched_inputs, - batched_outputs); + return std::make_shared(executableNetworkWithBatch, + executableNetworkWithoutBatch, + metaDevice, + networkConfig, + batched_inputs, + batched_outputs); } -InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl( +InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl( const InferenceEngine::CNNNetwork& network, const std::shared_ptr& context, const std::map& user_config) { return LoadNetworkImpl(network, context, user_config); } -InferenceEngine::QueryNetworkResult AutoBatchInferencePlugin::QueryNetwork( - const InferenceEngine::CNNNetwork& network, - const std::map& user_config) const { +InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network, + const std::map& user_config) const { auto core = GetCore(); if (!core) return InferenceEngine::QueryNetworkResult(); @@ -350,9 +350,10 @@ InferenceEngine::QueryNetworkResult AutoBatchInferencePlugin::QueryNetwork( auto val = c.second; cfg.erase(c.first); auto metaDevice = ParseMetaDevice(val, cfg); - return core->QueryNetwork(network, metaDevice.deviceName, cfg); + return core->QueryNetwork(network, metaDevice.device_name, cfg); } } IE_THROW() << "Value for KEY_AUTO_BATCH_DEVICE_CONFIG is not set"; } -} // namespace AutoBatchPlugin \ No newline at end of file +} // namespace autobatch_plugin +} // namespace ov \ No newline at end of file diff --git a/src/plugins/auto_batch/src/plugin.hpp b/src/plugins/auto_batch/src/plugin.hpp index 637d5476690..c2f850b81ee 100644 --- a/src/plugins/auto_batch/src/plugin.hpp +++ b/src/plugins/auto_batch/src/plugin.hpp @@ -11,43 +11,49 @@ #include "cpp_interfaces/interface/ie_iplugin_internal.hpp" #ifdef AUTOBATCH_UNITTEST -# define AutoBatchPlugin MockAutoBatchPlugin +# define autobatch_plugin mock_autobatch_plugin #endif -namespace AutoBatchPlugin { - -using DeviceName = std::string; +namespace ov { +namespace autobatch_plugin { struct DeviceInformation { - DeviceName deviceName; + std::string device_name; std::map config; - int batchForDevice; + int batch_for_device; }; -class AutoBatchInferencePlugin : public InferenceEngine::IInferencePlugin { +class Plugin : public InferenceEngine::IInferencePlugin { public: - AutoBatchInferencePlugin(); - virtual ~AutoBatchInferencePlugin() = default; + Plugin(); + + virtual ~Plugin() = default; + InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl( const InferenceEngine::CNNNetwork& network, const std::map& config) override; + InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl( const InferenceEngine::CNNNetwork& network, const std::shared_ptr& context, const std::map& config) override; void SetConfig(const std::map& config) override; + void CheckConfig(const std::map& config); InferenceEngine::Parameter GetConfig( const std::string& name, const std::map& options) const override; + InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, const std::map& config) const override; InferenceEngine::Parameter GetMetric( const std::string& name, const std::map& options) const override; + InferenceEngine::RemoteContext::Ptr CreateContext(const InferenceEngine::ParamMap&) override; + #ifdef AUTOBATCH_UNITTEST public: @@ -65,4 +71,5 @@ protected: const std::shared_ptr context, const std::map& config); }; -} // namespace AutoBatchPlugin \ No newline at end of file +} // namespace autobatch_plugin +} // namespace ov \ No newline at end of file diff --git a/src/plugins/auto_batch/src/sync_infer_request.cpp b/src/plugins/auto_batch/src/sync_infer_request.cpp index 6e133228441..ab8ff9017ad 100644 --- a/src/plugins/auto_batch/src/sync_infer_request.cpp +++ b/src/plugins/auto_batch/src/sync_infer_request.cpp @@ -5,170 +5,171 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "sync_infer_request.hpp" -namespace AutoBatchPlugin { -using namespace InferenceEngine; +namespace ov { +namespace autobatch_plugin { -template -Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, - std::string name, - const std::set& batched_names, - size_t batch_id, - size_t batch_num) { - typedef typename PrecisionTrait::value_type TYPE; +template +InferenceEngine::Blob::Ptr create_shared_blob_on_top_of_batched_blob(InferenceEngine::Blob::Ptr batched_blob, + std::string name, + const std::set& batched_names, + size_t batch_id, + size_t batch_num) { + typedef typename InferenceEngine::PrecisionTrait::value_type TYPE; typedef typename std::add_pointer::type TYPEPTR; auto ptr = batched_blob->buffer().as(); auto sizePerBatch = batched_blob->size() / batch_num; - SizeVector dims = batched_blob->getTensorDesc().getDims(); + InferenceEngine::SizeVector dims = batched_blob->getTensorDesc().getDims(); // for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim if (batched_names.count(name)) { dims[0] = 1; - return make_shared_blob({precision, dims, batched_blob->getTensorDesc().getLayout()}, - ptr + sizePerBatch * batch_id, - sizePerBatch); + return InferenceEngine::make_shared_blob({precision, dims, batched_blob->getTensorDesc().getLayout()}, + ptr + sizePerBatch * batch_id, + sizePerBatch); } else { // same blob for all requests (e.g. constants) - return make_shared_blob({precision, dims, batched_blob->getTensorDesc().getLayout()}, ptr); + return InferenceEngine::make_shared_blob({precision, dims, batched_blob->getTensorDesc().getLayout()}, + ptr); } } -AutoBatchInferRequest::AutoBatchInferRequest(const std::vector>& inputs, - const std::vector>& outputs, - AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest, - int batch_id, - int num_batch, - const std::set& batchedInputs, - const std::set& batchedOutputs) +SyncInferRequest::SyncInferRequest(const std::vector>& inputs, + const std::vector>& outputs, + CompiledModel::WorkerInferRequest& workerRequest, + int batch_id, + int num_batch, + const std::set& batchedInputs, + const std::set& batchedOutputs) : IInferRequestInternal(inputs, outputs), - _myBatchedRequestWrapper(workerRequest), - _batchId(batch_id), - _batchSize(num_batch) { + m_batched_request_wrapper(workerRequest), + m_batch_id(batch_id), + m_batch_size(num_batch) { ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs); } -AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs, - const OutputsDataMap& networkOutputs, - AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest, - int batch_id, - int num_batch, - const std::set& batchedInputs, - const std::set& batchedOutputs) +SyncInferRequest::SyncInferRequest(const InferenceEngine::InputsDataMap& networkInputs, + const InferenceEngine::OutputsDataMap& networkOutputs, + CompiledModel::WorkerInferRequest& workerRequest, + int batch_id, + int num_batch, + const std::set& batchedInputs, + const std::set& batchedOutputs) : IInferRequestInternal(networkInputs, networkOutputs), - _myBatchedRequestWrapper(workerRequest), - _batchId(batch_id), - _batchSize(num_batch) { + m_batched_request_wrapper(workerRequest), + m_batch_id(batch_id), + m_batch_size(num_batch) { ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs); } -void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::set& batchedInputs, - const std::set& batchedOutputs) { +void SyncInferRequest::ShareBlobsWithBatchRequest(const std::set& batchedInputs, + const std::set& batchedOutputs) { // Allocate all input blobs for (const auto& it : _networkInputs) { - auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first); - Blob::Ptr res; + auto blob = m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first); + InferenceEngine::Blob::Ptr res; switch (it.second->getTensorDesc().getPrecision()) { case InferenceEngine::Precision::FP32: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::I32: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::I8: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::I16: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::U16: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::U32: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::FP64: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::FP16: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::BF16: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::U64: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::I64: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::U8: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::BOOL: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedInputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; default: IE_THROW() << "Unsupported input precision " << it.second->getTensorDesc().getPrecision(); @@ -177,112 +178,112 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::setGetBlob(it.first); - Blob::Ptr res; + auto blob = m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first); + InferenceEngine::Blob::Ptr res; switch (it.second->getTensorDesc().getPrecision()) { case InferenceEngine::Precision::FP32: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::I32: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::I8: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::I16: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::U16: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::U32: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::FP64: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::FP16: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::BF16: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::U64: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::I64: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::U8: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; case InferenceEngine::Precision::BOOL: res = create_shared_blob_on_top_of_batched_blob( - _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + m_batched_request_wrapper._inferRequestBatched->GetBlob(it.first), it.first, batchedOutputs, - _batchId, - _batchSize); + m_batch_id, + m_batch_size); break; default: IE_THROW(NotImplemented) << "Unsupported input precision " << it.second->getTensorDesc().getPrecision(); @@ -290,7 +291,7 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::setGetBlob(name), true); + CopyBlobIfNeeded(GetBlob(name), m_batched_request_wrapper._inferRequestBatched->GetBlob(name), true); } } -void AutoBatchInferRequest::CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, - InferenceEngine::Blob::Ptr dst, - bool bInput) { +void SyncInferRequest::CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput) { auto bufferDst = dst->buffer(); auto ptrDst = bufferDst.as(); auto bufferSrc = src->cbuffer(); @@ -325,13 +324,13 @@ void AutoBatchInferRequest::CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, ptrdiff_t szDst = dst->byteSize(); ptrdiff_t szSrc = src->byteSize(); if (bInput) { - ptrdiff_t offset = szSrc != szDst ? _batchId * szDst / _batchSize : 0; + ptrdiff_t offset = szSrc != szDst ? m_batch_id * szDst / m_batch_size : 0; if ((ptrDst + offset) == ptrSrc) return; else memcpy(ptrDst + offset, ptrSrc, szSrc); } else { - ptrdiff_t offset = szSrc != szDst ? _batchId * szSrc / _batchSize : 0; + ptrdiff_t offset = szSrc != szDst ? m_batch_id * szSrc / m_batch_size : 0; if ((ptrSrc + offset) == ptrDst) return; else @@ -339,11 +338,12 @@ void AutoBatchInferRequest::CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, } } -void AutoBatchInferRequest::CopyOutputsIfNeeded() { +void SyncInferRequest::CopyOutputsIfNeeded() { for (const auto& it : _networkOutputs) { auto& name = it.first; // this request is already in BUSY state, so using the internal functions safely - CopyBlobIfNeeded(_myBatchedRequestWrapper._inferRequestBatched->GetBlob(name), GetBlob(name), false); + CopyBlobIfNeeded(m_batched_request_wrapper._inferRequestBatched->GetBlob(name), GetBlob(name), false); } } -} // namespace AutoBatchPlugin \ No newline at end of file +} // namespace autobatch_plugin +} // namespace ov \ No newline at end of file diff --git a/src/plugins/auto_batch/src/sync_infer_request.hpp b/src/plugins/auto_batch/src/sync_infer_request.hpp index f0b9832e52a..ffe9e16298e 100644 --- a/src/plugins/auto_batch/src/sync_infer_request.hpp +++ b/src/plugins/auto_batch/src/sync_infer_request.hpp @@ -8,42 +8,53 @@ #include "compiled_model.hpp" #include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" -namespace AutoBatchPlugin { -class AutoBatchInferRequest : public InferenceEngine::IInferRequestInternal { +namespace ov { +namespace autobatch_plugin { + +class SyncInferRequest : public InferenceEngine::IInferRequestInternal { public: - using Ptr = std::shared_ptr; - explicit AutoBatchInferRequest(const InferenceEngine::InputsDataMap& networkInputs, - const InferenceEngine::OutputsDataMap& networkOutputs, - AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr, - int batch_id, - int num_batch, - const std::set& batchedIntputs, - const std::set& batchedOutputs); - explicit AutoBatchInferRequest(const std::vector>& inputs, - const std::vector>& outputs, - AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr, - int batch_id, - int num_batch, - const std::set& batchedIntputs, - const std::set& batchedOutputs); + using Ptr = std::shared_ptr; + explicit SyncInferRequest(const InferenceEngine::InputsDataMap& networkInputs, + const InferenceEngine::OutputsDataMap& networkOutputs, + CompiledModel::WorkerInferRequest& workerRequestPtr, + int batch_id, + int num_batch, + const std::set& batchedIntputs, + const std::set& batchedOutputs); + + explicit SyncInferRequest(const std::vector>& inputs, + const std::vector>& outputs, + CompiledModel::WorkerInferRequest& workerRequestPtr, + int batch_id, + int num_batch, + const std::set& batchedIntputs, + const std::set& batchedOutputs); // Batch-Device impl specific: sets the data (blobs from the device request to the batched device request) void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req); + void CopyInputsIfNeeded(); + void CopyOutputsIfNeeded(); - AutoBatchExecutableNetwork::WorkerInferRequest& _myBatchedRequestWrapper; - std::exception_ptr _exceptionPtr; + + CompiledModel::WorkerInferRequest& m_batched_request_wrapper; + + std::exception_ptr m_exceptionPtr; + enum eExecutionFlavor : uint8_t { NOT_EXECUTED, BATCH_EXECUTED, TIMEOUT_EXECUTED - } _wasBatchedRequestUsed = eExecutionFlavor::NOT_EXECUTED; + } m_batched_request_status = eExecutionFlavor::NOT_EXECUTED; protected: void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput); + void ShareBlobsWithBatchRequest(const std::set& batchedIntputs, const std::set& batchedOutputs); - size_t _batchId; - size_t _batchSize; + size_t m_batch_id; + + size_t m_batch_size; }; -} // namespace AutoBatchPlugin \ No newline at end of file +} // namespace autobatch_plugin +} // namespace ov \ No newline at end of file diff --git a/src/plugins/auto_batch/tests/unit/auto_batch_infer_request_tests.cpp b/src/plugins/auto_batch/tests/unit/auto_batch_infer_request_tests.cpp index 36685c2fa6c..13445a2686c 100644 --- a/src/plugins/auto_batch/tests/unit/auto_batch_infer_request_tests.cpp +++ b/src/plugins/auto_batch/tests/unit/auto_batch_infer_request_tests.cpp @@ -4,6 +4,7 @@ #include #include + #include #include "cpp_interfaces/interface/ie_iplugin_internal.hpp" @@ -30,8 +31,7 @@ using ::testing::ReturnRef; using ::testing::StrEq; using ::testing::StrNe; using ::testing::Throw; -using namespace MockAutoBatchPlugin; -using namespace MockAutoBatchDevice; +using namespace ov::mock_autobatch_plugin; using namespace InferenceEngine; using AutoBatchRequestTestParams = std::tuple> mockInferRequestBatched; - std::vector> autoBatchInferRequests; + std::vector> autoBatchInferRequests; std::map blobMap; std::vector> inputs, outputs; std::set batchedInputs, batchedOutputs; - std::shared_ptr workerRequestPtr; + std::shared_ptr workerRequestPtr; public: static std::string getTestCaseName(testing::TestParamInfo obj) { @@ -80,14 +80,14 @@ public: } void create_worker(int batch_size) { - workerRequestPtr = std::make_shared(); + workerRequestPtr = std::make_shared(); workerRequestPtr->_inferRequestBatched = {mockInferRequestBatched, {}}; workerRequestPtr->_batchSize = batch_size; workerRequestPtr->_completionTasks.resize(workerRequestPtr->_batchSize); workerRequestPtr->_inferRequestBatched->SetCallback([this](std::exception_ptr exceptionPtr) mutable { if (exceptionPtr) - workerRequestPtr->_exceptionPtr = exceptionPtr; + workerRequestPtr->m_exceptionPtr = exceptionPtr; }); workerRequestPtr->_thread = std::thread([] { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -173,13 +173,13 @@ TEST_P(AutoBatchRequestTest, AutoBatchRequestCreateTestCase) { create_worker(batch_size); for (int batch_id = 0; batch_id < batch_size; batch_id++) { - auto req = std::make_shared(inputs, - outputs, - *workerRequestPtr, - batch_id, - batch_size, - batchedInputs, - batchedOutputs); + auto req = std::make_shared(inputs, + outputs, + *workerRequestPtr, + batch_id, + batch_size, + batchedInputs, + batchedOutputs); EXPECT_NE(req, nullptr); autoBatchInferRequests.emplace_back(req); @@ -206,13 +206,13 @@ TEST_P(AutoBatchRequestTest, AutoBatchRequestCopyBlobTestCase) { create_worker(batch_size); for (int batch_id = 0; batch_id < batch_size; batch_id++) { - auto req = std::make_shared(inputs, - outputs, - *workerRequestPtr, - batch_id, - batch_size, - batchedInputs, - batchedOutputs); + auto req = std::make_shared(inputs, + outputs, + *workerRequestPtr, + batch_id, + batch_size, + batchedInputs, + batchedOutputs); EXPECT_NE(req, nullptr); autoBatchInferRequests.emplace_back(req); @@ -225,7 +225,7 @@ class AutoBatchAsyncInferRequestTest : public AutoBatchRequestTest { public: std::shared_ptr> mockInferRequestWithoutBatched; MockTaskExecutor::Ptr mockTaskExecutor; - std::vector autoBatchAsyncInferRequestVec; + std::vector autoBatchAsyncInferRequestVec; bool terminate; public: @@ -245,14 +245,14 @@ public: } void create_worker(int batch_size) { - workerRequestPtr = std::make_shared(); + workerRequestPtr = std::make_shared(); workerRequestPtr->_inferRequestBatched = {mockInferRequestBatched, {}}; workerRequestPtr->_batchSize = batch_size; workerRequestPtr->_completionTasks.resize(workerRequestPtr->_batchSize); workerRequestPtr->_inferRequestBatched->SetCallback([this](std::exception_ptr exceptionPtr) mutable { if (exceptionPtr) - workerRequestPtr->_exceptionPtr = exceptionPtr; + workerRequestPtr->m_exceptionPtr = exceptionPtr; }); ON_CALL(*mockInferRequestBatched, StartAsync()).WillByDefault([this]() { @@ -275,21 +275,21 @@ public: } else { const int sz = static_cast(workerRequestPtr->_tasks.size()); if (sz == workerRequestPtr->_batchSize) { - std::pair t; + std::pair t; for (int n = 0; n < sz; n++) { IE_ASSERT(workerRequestPtr->_tasks.try_pop(t)); workerRequestPtr->_completionTasks[n] = std::move(t.second); - t.first->_inferRequest->_wasBatchedRequestUsed = - AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED; + t.first->m_sync_infer_request->m_batched_request_status = + SyncInferRequest::eExecutionFlavor::BATCH_EXECUTED; } workerRequestPtr->_inferRequestBatched->StartAsync(); } else if ((status == std::cv_status::timeout) && sz) { - std::pair t; + std::pair t; for (int n = 0; n < sz; n++) { IE_ASSERT(workerRequestPtr->_tasks.try_pop(t)); - t.first->_inferRequest->_wasBatchedRequestUsed = - AutoBatchInferRequest::eExecutionFlavor::TIMEOUT_EXECUTED; - t.first->_inferRequestWithoutBatch->StartAsync(); + t.first->m_sync_infer_request->m_batched_request_status = + SyncInferRequest::eExecutionFlavor::TIMEOUT_EXECUTED; + t.first->m_infer_request_without_batch->StartAsync(); t.second(); } } @@ -311,19 +311,19 @@ TEST_P(AutoBatchAsyncInferRequestTest, AutoBatchAsyncInferRequestCreateTest) { create_worker(batch_size); for (int batch_id = 0; batch_id < batch_size; batch_id++) { - auto autoRequestImpl = std::make_shared(inputs, - outputs, - *workerRequestPtr, - batch_id, - batch_size, - batchedInputs, - batchedOutputs); + auto autoRequestImpl = std::make_shared(inputs, + outputs, + *workerRequestPtr, + batch_id, + batch_size, + batchedInputs, + batchedOutputs); EXPECT_NE(autoRequestImpl, nullptr); autoBatchInferRequests.emplace_back(autoRequestImpl); InferenceEngine::SoIInferRequestInternal inferRequestWithoutBatched = {mockInferRequestWithoutBatched, {}}; auto asyncInferRequest = - std::make_shared(autoRequestImpl, inferRequestWithoutBatched, nullptr); + std::make_shared(autoRequestImpl, inferRequestWithoutBatched, nullptr); EXPECT_NE(asyncInferRequest, nullptr); autoBatchAsyncInferRequestVec.emplace_back(asyncInferRequest); } @@ -340,19 +340,19 @@ TEST_P(AutoBatchAsyncInferRequestTest, AutoBatchAsyncInferRequestStartAsyncTest) create_worker(batch_size); for (int batch_id = 0; batch_id < batch_size; batch_id++) { - auto autoRequestImpl = std::make_shared(inputs, - outputs, - *workerRequestPtr, - batch_id, - batch_size, - batchedInputs, - batchedOutputs); + auto autoRequestImpl = std::make_shared(inputs, + outputs, + *workerRequestPtr, + batch_id, + batch_size, + batchedInputs, + batchedOutputs); EXPECT_NE(autoRequestImpl, nullptr); autoBatchInferRequests.emplace_back(autoRequestImpl); InferenceEngine::SoIInferRequestInternal inferRequestWithoutBatched = {mockInferRequestWithoutBatched, {}}; auto asyncInferRequest = - std::make_shared(autoRequestImpl, inferRequestWithoutBatched, nullptr); + std::make_shared(autoRequestImpl, inferRequestWithoutBatched, nullptr); EXPECT_NE(asyncInferRequest, nullptr); autoBatchAsyncInferRequestVec.emplace_back(asyncInferRequest); } diff --git a/src/plugins/auto_batch/tests/unit/create_infer_request_tests.cpp b/src/plugins/auto_batch/tests/unit/create_infer_request_tests.cpp index 7e9d7ee8f8f..226a70cbc6e 100644 --- a/src/plugins/auto_batch/tests/unit/create_infer_request_tests.cpp +++ b/src/plugins/auto_batch/tests/unit/create_infer_request_tests.cpp @@ -26,12 +26,11 @@ using ::testing::ReturnRef; using ::testing::StrEq; using ::testing::StrNe; using ::testing::Throw; -using namespace MockAutoBatchPlugin; -using namespace MockAutoBatchDevice; +using namespace ov::mock_autobatch_plugin; using namespace InferenceEngine; -using CreateInferRequestTestParams = std::tuple; // inferReq number +using CreateInferRequestTestParams = std::tuple; // inferReq number class CreateInferRequestTest : public ::testing::TestWithParam { public: std::shared_ptr> core; @@ -44,7 +43,7 @@ public: std::shared_ptr mockPlugin; ov::SoPtr batchedExecNetwork; - std::shared_ptr actualExecNet; + std::shared_ptr actualExecNet; std::vector>> inferRequestVec; public: @@ -75,7 +74,8 @@ public: mockIPlugin = std::make_shared>(); ON_CALL(*mockIPlugin, LoadNetwork(MatcherCast(_), _)).WillByDefault(Return(mockIExecNet)); mockPlugin = mockIPlugin; - mockExecNetwork = ov::SoPtr(mockPlugin->LoadNetwork(CNNNetwork{}, {}), {}); + mockExecNetwork = + ov::SoPtr(mockPlugin->LoadNetwork(CNNNetwork{}, {}), {}); batchedExecNetwork = {}; core = std::shared_ptr>(new NiceMock()); @@ -90,20 +90,21 @@ public: }); } - AutoBatchExecutableNetwork::Ptr createAutoBatchExecutableNetwork(int batch_size) { + CompiledModel::Ptr createAutoBatchExecutableNetwork(int batch_size) { DeviceInformation metaDevice = {"CPU", {}, batch_size}; std::unordered_map config = {{CONFIG_KEY(AUTO_BATCH_TIMEOUT), "200"}}; std::set batched_inputs = {"Parameter_0"}; std::set batched_outputs = {"Convolution_20"}; if (batch_size > 1) - batchedExecNetwork = ov::SoPtr(mockPlugin->LoadNetwork(CNNNetwork{}, {}), {}); - return std::make_shared(batchedExecNetwork, - mockExecNetwork, - metaDevice, - config, - batched_inputs, - batched_outputs); + batchedExecNetwork = + ov::SoPtr(mockPlugin->LoadNetwork(CNNNetwork{}, {}), {}); + return std::make_shared(batchedExecNetwork, + mockExecNetwork, + metaDevice, + config, + batched_inputs, + batched_outputs); } }; @@ -128,7 +129,5 @@ const std::vector batch_size{1, 8, 16, 32, 128, 256}; INSTANTIATE_TEST_SUITE_P(smoke_AutoBatch_BehaviorTests, CreateInferRequestTest, - ::testing::Combine( - ::testing::ValuesIn(batch_size), - ::testing::ValuesIn(requests_num)), + ::testing::Combine(::testing::ValuesIn(batch_size), ::testing::ValuesIn(requests_num)), CreateInferRequestTest::getTestCaseName); \ No newline at end of file diff --git a/src/plugins/auto_batch/tests/unit/exec_network_tests.cpp b/src/plugins/auto_batch/tests/unit/exec_network_tests.cpp index fe91610a516..f13f706c284 100644 --- a/src/plugins/auto_batch/tests/unit/exec_network_tests.cpp +++ b/src/plugins/auto_batch/tests/unit/exec_network_tests.cpp @@ -26,8 +26,7 @@ using ::testing::ReturnRef; using ::testing::StrEq; using ::testing::StrNe; using ::testing::Throw; -using namespace MockAutoBatchPlugin; -using namespace MockAutoBatchDevice; +using namespace ov::mock_autobatch_plugin; using namespace InferenceEngine; using ExecNetworkParams = std::tuple(_), _)).WillByDefault(Return(mockIExecNet)); mockPlugin = mockIPluginPtr; EXPECT_CALL(*mockIPluginPtr, LoadNetwork(MatcherCast(_), _)).Times(1); - mockExecNetwork = ov::SoPtr(mockPlugin->LoadNetwork(CNNNetwork{}, {}), {}); + mockExecNetwork = + ov::SoPtr(mockPlugin->LoadNetwork(CNNNetwork{}, {}), {}); core = std::shared_ptr>(new NiceMock()); plugin = std::shared_ptr>(new NiceMock()); plugin->SetCore(core); ON_CALL(*plugin, ParseBatchDevice).WillByDefault([this](const std::string& batchDevice) { - return plugin->AutoBatchInferencePlugin::ParseBatchDevice(batchDevice); + return plugin->Plugin::ParseBatchDevice(batchDevice); }); ON_CALL(*core, LoadNetwork(MatcherCast(_), MatcherCast(_), _)) .WillByDefault(Return(mockExecNetwork)); @@ -174,25 +174,25 @@ TEST_P(ExecNetworkTest, ExecNetworkGetConfigMetricTestCase) { } const std::vector testConfigs = { - // Metric - ExecNetworkParams{METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS), 0, false}, - ExecNetworkParams{METRIC_KEY(NETWORK_NAME), 0, false}, - ExecNetworkParams{METRIC_KEY(SUPPORTED_METRICS), 0, false}, - ExecNetworkParams{METRIC_KEY(SUPPORTED_CONFIG_KEYS), 0, false}, - ExecNetworkParams{ov::execution_devices.name(), 0, false}, - // Config in autobatch - ExecNetworkParams{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), 1, false}, - ExecNetworkParams{CONFIG_KEY(AUTO_BATCH_TIMEOUT), 1, false}, - ExecNetworkParams{CONFIG_KEY(CACHE_DIR), 1, false}, - // Config in dependent plugin - ExecNetworkParams{"OPTIMAL_BATCH_SIZE", 1, false}, - // Incorrect Metric - ExecNetworkParams{"INCORRECT_METRIC", 0, true}, - // Incorrect config - ExecNetworkParams{"INCORRECT_CONFIG", 1, true}, - // Set Config - ExecNetworkParams{CONFIG_KEY(AUTO_BATCH_TIMEOUT), 2, false}, - ExecNetworkParams{"INCORRECT_CONFIG", 2, true}, + // Metric + ExecNetworkParams{METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS), 0, false}, + ExecNetworkParams{METRIC_KEY(NETWORK_NAME), 0, false}, + ExecNetworkParams{METRIC_KEY(SUPPORTED_METRICS), 0, false}, + ExecNetworkParams{METRIC_KEY(SUPPORTED_CONFIG_KEYS), 0, false}, + ExecNetworkParams{ov::execution_devices.name(), 0, false}, + // Config in autobatch + ExecNetworkParams{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), 1, false}, + ExecNetworkParams{CONFIG_KEY(AUTO_BATCH_TIMEOUT), 1, false}, + ExecNetworkParams{CONFIG_KEY(CACHE_DIR), 1, false}, + // Config in dependent plugin + ExecNetworkParams{"OPTIMAL_BATCH_SIZE", 1, false}, + // Incorrect Metric + ExecNetworkParams{"INCORRECT_METRIC", 0, true}, + // Incorrect config + ExecNetworkParams{"INCORRECT_CONFIG", 1, true}, + // Set Config + ExecNetworkParams{CONFIG_KEY(AUTO_BATCH_TIMEOUT), 2, false}, + ExecNetworkParams{"INCORRECT_CONFIG", 2, true}, }; INSTANTIATE_TEST_SUITE_P(smoke_AutoBatch_BehaviorTests, diff --git a/src/plugins/auto_batch/tests/unit/load_network_tests.cpp b/src/plugins/auto_batch/tests/unit/load_network_tests.cpp index 4cdd0387aa8..3723a24bd57 100644 --- a/src/plugins/auto_batch/tests/unit/load_network_tests.cpp +++ b/src/plugins/auto_batch/tests/unit/load_network_tests.cpp @@ -28,8 +28,7 @@ using ::testing::ReturnRef; using ::testing::StrEq; using ::testing::StrNe; using ::testing::Throw; -using namespace MockAutoBatchPlugin; -using namespace MockAutoBatchDevice; +using namespace ov::mock_autobatch_plugin; using namespace InferenceEngine; using PluginLoadNetworkParams = std::tuple, // Paramters @@ -79,14 +78,15 @@ public: .WillByDefault(Return(cpuMockIExecNet)); cpuMockPlugin = cpuMockIPluginPtr; EXPECT_CALL(*cpuMockIPluginPtr, LoadNetwork(MatcherCast(_), _)).Times(1); - cpuMockExecNetwork = ov::SoPtr(cpuMockPlugin->LoadNetwork(CNNNetwork{}, {}), {}); + cpuMockExecNetwork = + ov::SoPtr(cpuMockPlugin->LoadNetwork(CNNNetwork{}, {}), {}); core = std::shared_ptr>(new NiceMock()); plugin = std::shared_ptr>(new NiceMock()); plugin->SetCore(core); ON_CALL(*plugin, ParseBatchDevice).WillByDefault([this](const std::string& batchDevice) { - return plugin->AutoBatchInferencePlugin::ParseBatchDevice(batchDevice); + return plugin->Plugin::ParseBatchDevice(batchDevice); }); ON_CALL(*core, LoadNetwork(MatcherCast(_), MatcherCast(_), _)) .WillByDefault(Return(cpuMockExecNetwork)); @@ -257,12 +257,13 @@ const std::vector testConfigs = { {"GPU_DEVICE_TOTAL_MEM_SIZE", "4096000000"}}, {{"AUTO_BATCH_TIMEOUT", "200"}, {"AUTO_BATCH_DEVICE_CONFIG", "CPU"}}, 1}, - //PluginLoadNetworkParams{{{"PERFORMANCE_HINT", "THROUGHPUT"}, + // PluginLoadNetworkParams{{{"PERFORMANCE_HINT", "THROUGHPUT"}, // {"OPTIMAL_BATCH_SIZE", "32"}, // {"PERFORMANCE_HINT_NUM_REQUESTS", "16"}, // {"GPU_MEMORY_STATISTICS", "1024000"}, // {"GPU_DEVICE_TOTAL_MEM_SIZE", "4096000000"}}, - // {{"AUTO_BATCH_TIMEOUT", "200"}, {"AUTO_BATCH_DEVICE_CONFIG", "CPU"}, {"PERFORMANCE_HINT_NUM_REQUESTS", "12"}}, + // {{"AUTO_BATCH_TIMEOUT", "200"}, {"AUTO_BATCH_DEVICE_CONFIG", "CPU"}, + // {"PERFORMANCE_HINT_NUM_REQUESTS", "12"}}, // 12}, // // Case 3: GPU batch size is figured out by diff --git a/src/plugins/auto_batch/tests/unit/mock_auto_batch_plugin.hpp b/src/plugins/auto_batch/tests/unit/mock_auto_batch_plugin.hpp index a896b465fb9..966c781122a 100644 --- a/src/plugins/auto_batch/tests/unit/mock_auto_batch_plugin.hpp +++ b/src/plugins/auto_batch/tests/unit/mock_auto_batch_plugin.hpp @@ -13,10 +13,9 @@ #include "plugin.hpp" #include "sync_infer_request.hpp" -using namespace MockAutoBatchPlugin; -namespace MockAutoBatchDevice { +using namespace ov::mock_autobatch_plugin; -class MockAutoBatchInferencePlugin : public AutoBatchInferencePlugin { +class MockAutoBatchInferencePlugin : public Plugin { public: MOCK_METHOD((DeviceInformation), ParseMetaDevices, @@ -30,10 +29,8 @@ public: (const, override)); }; -class MockAutoBatchExecutableNetwork : public AutoBatchExecutableNetwork { +class MockAutoBatchExecutableNetwork : public CompiledModel { public: MOCK_METHOD((InferenceEngine::Parameter), GetConfig, (const std::string&), (const, override)); MOCK_METHOD((InferenceEngine::Parameter), GetMetric, (const std::string&), (const, override)); }; - -} // namespace MockAutoBatchDevice diff --git a/src/plugins/auto_batch/tests/unit/plugins_tests.cpp b/src/plugins/auto_batch/tests/unit/plugins_tests.cpp index 3c7fd47a1ed..c81bf71a5a2 100644 --- a/src/plugins/auto_batch/tests/unit/plugins_tests.cpp +++ b/src/plugins/auto_batch/tests/unit/plugins_tests.cpp @@ -20,8 +20,7 @@ using ::testing::ReturnRef; using ::testing::StrEq; using ::testing::StrNe; using ::testing::Throw; -using namespace MockAutoBatchPlugin; -using namespace MockAutoBatchDevice; +using namespace ov::mock_autobatch_plugin; using BatchDeviceConfigParams = std::tupleSetCore(core); ON_CALL(*plugin, ParseBatchDevice).WillByDefault([this](const std::string& batchDevice) { - return plugin->AutoBatchInferencePlugin::ParseBatchDevice(batchDevice); + return plugin->Plugin::ParseBatchDevice(batchDevice); }); } }; @@ -192,7 +191,7 @@ public: }); ON_CALL(*plugin, ParseBatchDevice).WillByDefault([this](const std::string& batchDevice) { - return plugin->AutoBatchInferencePlugin::ParseBatchDevice(batchDevice); + return plugin->Plugin::ParseBatchDevice(batchDevice); }); } @@ -223,8 +222,8 @@ TEST_P(ParseMetaDeviceTest, ParseMetaDeviceTestCase) { ASSERT_ANY_THROW(plugin->ParseMetaDevice(batch_cfg, config)); } else { auto result = plugin->ParseMetaDevice(batch_cfg, config); - EXPECT_EQ(result.deviceName, expected.deviceName); - EXPECT_EQ(result.batchForDevice, expected.batchForDevice); + EXPECT_EQ(result.device_name, expected.device_name); + EXPECT_EQ(result.batch_for_device, expected.batch_for_device); EXPECT_TRUE(compare(result.config, expected.config)); } } @@ -255,7 +254,7 @@ public: plugin->SetCore(core); ON_CALL(*plugin, ParseBatchDevice).WillByDefault([this](const std::string& batchDevice) { - return plugin->AutoBatchInferencePlugin::ParseBatchDevice(batchDevice); + return plugin->Plugin::ParseBatchDevice(batchDevice); }); } }; @@ -271,8 +270,8 @@ TEST_P(ParseBatchDeviceTest, ParseBatchDeviceTestCase) { ASSERT_ANY_THROW(plugin->ParseBatchDevice(batchDevice)); } else { auto result = plugin->ParseBatchDevice(batchDevice); - EXPECT_EQ(result.deviceName, deviceName); - EXPECT_EQ(result.batchForDevice, batchSize); + EXPECT_EQ(result.device_name, deviceName); + EXPECT_EQ(result.batch_for_device, batchSize); } } @@ -303,7 +302,7 @@ public: ON_CALL(*plugin, GetMetric) .WillByDefault( [this](const std::string& name, const std::map& options) { - return plugin->AutoBatchInferencePlugin::GetMetric(name, options); + return plugin->Plugin::GetMetric(name, options); }); } };