diff --git a/src/plugins/auto_batch/auto_batch.cpp b/src/plugins/auto_batch/auto_batch.cpp index 3ef9b2bf223..7af705e61b1 100644 --- a/src/plugins/auto_batch/auto_batch.cpp +++ b/src/plugins/auto_batch/auto_batch.cpp @@ -746,7 +746,8 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN requests = static_cast(PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second)); if (requests) optBatchSize = std::max(1u, std::min(requests, optimalBatchSize)); - metaDevice.batchForDevice = optBatchSize; + if (optBatchSize > 2) // batching is usually in-efficient for batch<4 (as batch1 kernels are heavily optimized) + metaDevice.batchForDevice = optBatchSize; } const auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);