diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
index fd51e38c483..defda468ddf 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
@@ -53,6 +53,10 @@ public:
     std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
     int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
     const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
+    const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); }
+    const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); }
+    std::map<std::string, std::pair<int64_t, int64_t>> GetInputDynBatchDims() { return m_program->m_input_batch_dim; }
+    std::map<std::string, int64_t> GetOutputDynBatchDims() { return m_program->m_output_batch_dim; }
     size_t GetNetworksCount() const { return m_networks.size(); }
     std::shared_ptr<cldnn::network> GetNetwork(size_t idx = 0) const;
     InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
index f2cdcdf7c79..d7f888f89a6 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
@@ -93,6 +93,8 @@ public:
 
     int m_max_batch;
     int m_curBatch;
+    std::map<std::string, std::pair<int64_t, int64_t>> m_input_batch_dim;
+    std::map<std::string, int64_t> m_output_batch_dim;
 
     std::shared_ptr<cldnn::program> GetCompiledProgram(int program_id = 0);
     const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
@@ -104,6 +106,9 @@ public:
     int GetMaxBatchSizeForSingleProgram();
 
     bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op);
+    bool IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
+                         std::map<std::string, ov::PartialShape>& shapes,
+                         std::map<std::string, std::pair<int64_t, int64_t>>& batch_dim);
 
     // Profiling utils
     void InitProfileInfo(const std::string& layerName,
@@ -170,7 +175,6 @@ private:
                                                  bool createTopologyOnly = false, bool partialBuild = false);
 
     void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
-    bool CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops, InferenceEngine::InputsDataMap networkInputs) const;
     void ChangeInputBatch(int batch);
 };
 
diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp
index fa366e1faff..251251e3d42 100644
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -49,6 +49,8 @@ Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context,
     , m_stream_id(stream_id)
     , m_state(0) {
     m_program = std::make_shared<Program>(network, GetEngine(), m_config);
+    if (m_program->m_max_batch > 1)
+        m_config.max_dynamic_batch = m_program->m_max_batch;
     Build();
 }
 
diff --git a/src/plugins/intel_gpu/src/plugin/infer_request.cpp b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
index 5a3be7abbae..216be15290b 100644
--- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
@@ -171,14 +171,12 @@ bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) {
     }
     return bufferMem == hostPtr;
 }
-
 }  // namespace
 
 namespace ov {
 namespace runtime {
 namespace intel_gpu {
 
-
 // ----------------------------------------------------------------------------------------- //
 // ---------------------------- IE API impl ------------------------------------------------ //
 // ----------------------------------------------------------------------------------------- //
@@ -188,6 +186,8 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
     InputInfo::Ptr foundInput;
     DataPtr foundOutput;
     bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput);
+    auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name);
+    bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic());
 
     if (is_input) {
         // ROI blob is returned only if it was set previously. Otherwise default blob is returned.
@@ -196,11 +196,20 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
             data = it->second->getRoiBlob();
         } else {
             data = _inputs[name];
-            checkInputBlob(data, name, foundInput);
+            if (!isDynamic)
+                checkInputBlob(data, name, foundInput);
         }
     } else {
         data = _outputs[name];
-        checkOutputBlob(data, name, foundOutput);
+        if (isDynamic) {
+            if (m_graph->GetMaxDynamicBatchSize() > 1) {
+                SizeVector outDims = data->getTensorDesc().getDims();
+                outDims[m_graph->GetOutputDynBatchDims()[name]] = m_curBatch;
+                data->getTensorDesc().setDims(outDims);
+            }
+        } else {
+            checkOutputBlob(data, name, foundOutput);
+        }
     }
     return data;
 }
@@ -243,13 +252,16 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
                                            desc.getPrecision().size(),
                                            std::multiplies<size_t>());
     bool preProcResize = false;
+    auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name);
+    bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic());
     if (is_input) {
         preProcResize = foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE;
         const auto inputColorFormat = foundInput->getPreProcess().getColorFormat();
         preProcResize |= (inputColorFormat != ColorFormat::RAW) && (inputColorFormat != ColorFormat::BGR);
     }
 
-    if (dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) {
+    if (!isDynamic &&
+        dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) {
         IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") <<
                       " blob with name: \'" << name <<  "\' " <<
                       "Current: " << dataBinSize << " Required: " << netReqBinSize;
@@ -292,7 +304,9 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
                         auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
                         if (y_ptr) {
                             auto y_impl = getBlobImpl(y_ptr);
-                            y_impl->allocate();
+                            if (!y_impl->is_allocated()) {
+                                y_impl->allocate();
+                            }
                             _deviceInputs[y_name] = nv12_ptr->y();
                             is_remote = true;
                         }
@@ -300,7 +314,9 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
                         auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
                         if (uv_ptr) {
                             auto uv_impl = getBlobImpl(uv_ptr);
-                            uv_impl->allocate();
+                            if (!uv_impl->is_allocated()) {
+                                uv_impl->allocate();
+                            }
                             _deviceInputs[uv_name] = nv12_ptr->uv();
                             is_remote = true;
                         }
@@ -326,12 +342,21 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
                 if (compoundBlobPassed) {
                     IE_THROW(NotImplemented) << cannot_set_compound;
                 }
-                size_t blobSize = desc.getLayout() != SCALAR
-                    ? details::product(desc.getDims())
-                    : 1;
-                if (dataSize != blobSize) {
-                    IE_THROW() << "Input blob size is not equal network input size ("
-                               << dataSize << "!=" << blobSize << ").";
+                if (isDynamic) {
+                    // extract new batch size from blob
+                    if (m_graph->GetMaxDynamicBatchSize() > 1) {
+                        const auto batch_idx = m_graph->GetInputDynBatchDims()[name].first;
+                        if (batch_idx >= 0)
+                            SetBatch(blobDesc.getDims()[batch_idx]);
+                    }
+                } else {
+                    size_t blobSize = desc.getLayout() != SCALAR
+                        ? details::product(desc.getDims())
+                        : 1;
+                    if (dataSize != blobSize) {
+                        IE_THROW() << "Input blob size is not equal to network input size ("
+                            << dataSize << "!=" << blobSize << ").";
+                    }
                 }
 
                 if (data->buffer() == nullptr)
@@ -347,15 +372,17 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
         if (is_remote) {
             _deviceOutputs[name] = data;
         } else {
-            size_t outputSize = desc.getLayout() != SCALAR
-                ? details::product(desc.getDims())
-                : 1;
-            if (dataSize != outputSize) {
-                IE_THROW() << "Output blob size is not equal network output size (" << dataSize
-                           << "!=" << outputSize << ").";
+            if (!isDynamic) {
+                size_t outputSize = desc.getLayout() != SCALAR
+                    ? details::product(desc.getDims())
+                    : 1;
+                if (dataSize != outputSize) {
+                    IE_THROW() << "Output blob size is not equal to network output size (" << dataSize
+                        << "!=" << outputSize << ").";
+                }
+                if (data->buffer() == nullptr)
+                    IE_THROW(NotAllocated) << str_output_not_allocated << " Output name: \'" << name << "\'";
             }
-            if (data->buffer() == nullptr)
-                IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'";
         }
         _outputs[name] = data;
     }
@@ -457,7 +484,10 @@ void InferRequest::checkBlobs() {
         } else {
             IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'";
         }
-        checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs);
+        auto node = findInputByNodeName(input.first);
+        bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
+        if (!is_dynamic)
+            checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs);
     }
     for (auto const &output : _outputs) {
         DataPtr foundOutput = nullptr;
@@ -470,7 +500,10 @@ void InferRequest::checkBlobs() {
         } else {
             IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'";
         }
-        checkOutputBlob(output.second, output.first, foundOutput);
+        auto node = findOutputByNodeName(output.first);
+        bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
+        if (!is_dynamic)
+            checkOutputBlob(output.second, output.first, foundOutput);
     }
 }
 
@@ -509,9 +542,12 @@ void InferRequest::SetBatch(int new_batch) {
     batchOutputs.clear();
 
     // tune expected inputs
-    for (auto &input : m_graph->GetInputLayouts()) {
-        cldnn::tensor dims = input.second.size;
-        const SizeVector sz = { 1, size_t(dims.feature[0]), size_t(dims.spatial[1]), size_t(dims.spatial[0]) };
+    for (auto& input : m_graph->GetNetworkInputs()) {
+        auto sz = input.second->getTensorDesc().getDims();
+        const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first;
+        if (batch_idx >= 0)
+            sz[batch_idx] = 1;
+
         size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
         std::vector<buf_info> in_buf;
 
@@ -534,9 +570,11 @@ void InferRequest::SetBatch(int new_batch) {
     }
 
     // tune expected outputs
-    for (auto& no : _networkOutputs) {
-        auto sz = m_graph->GetOutputSize(no.first);
-        sz.front() = 1;
+    for (auto& no : m_graph->GetNetworkOutputs()) {
+        auto sz = no.second->getTensorDesc().getDims();
+        const auto batch_idx = m_graph->GetInputDynBatchDims()[no.first].first;
+        if (batch_idx >= 0)
+            sz[batch_idx] = 1;
         size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
         std::vector<buf_info> out_buf;
 
@@ -816,6 +854,21 @@ void InferRequest::setup_stream_graph() {
         streamID = streamID % numGraphs;
     }
     m_graph = streamGraphs[streamID];
+    // in case of dynamic batch, check all input blobs and set new batch
+    if (m_graph->GetMaxDynamicBatchSize() > 1) {
+        for (auto& input : _networkInputs) {
+            auto node = findInputByNodeName(input.first);
+            bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
+            if (!is_dynamic)
+                continue;
+            // extract new batch size from blob
+            const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first;
+            if (batch_idx >= 0) {
+                SetBatch(_inputs[input.first]->getTensorDesc().getDims()[batch_idx]);
+                break;
+            }
+        }
+    }
 }
 
 Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, std::shared_ptr<InferenceEngine::IAllocator> alloc) {
@@ -968,16 +1021,9 @@ void InferRequest::allocate_inputs() {
 void InferRequest::allocate_inputs_dynamic() {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic");
     // allocate inputs
-    for (auto &input : m_graph->GetInputLayouts()) {
+    for (auto &input : m_graph->GetNetworkInputs()) {
         InputInfo::Ptr ni = _networkInputs.at(input.first);
-        TensorDesc desc = ni->getTensorDesc();
-        SizeVector& dims = desc.getDims();
-
-        if (!dims.empty()) {
-            *dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
-        } else {
-            IE_THROW() << "Empty dimensions for input blob " << input.first;
-        }
+        TensorDesc desc = input.second->getTensorDesc();
 
         Blob::Ptr inputBlob = create_host_blob(desc);
         if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
@@ -1020,17 +1066,10 @@ void InferRequest::allocate_outputs() {
 void InferRequest::allocate_outputs_dynamic() {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic");
     // allocate outputs
-    for (auto& no : _networkOutputs) {
+    for (auto& no : m_graph->GetNetworkOutputs()) {
         std::string outputID = m_graph->MapOutputName(no.first);
         DataPtr oi = no.second;
         TensorDesc desc = oi->getTensorDesc();
-        SizeVector& dims = desc.getDims();
-
-        if (!dims.empty()) {
-            *dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
-        } else {
-            IE_THROW() << "Empty dimensions for output blob " << no.first;
-        }
 
         Blob::Ptr outputBlob = create_host_blob(desc);
         _outputs[no.first] = outputBlob;
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 9adca79a60a..5a456ec2e26 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -376,16 +376,45 @@ QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network,
     }
 
     auto clonedNetwork = CloneAndTransformNetwork(network, conf);
-    auto ops = clonedNetwork.getFunction()->get_ordered_ops();
+    auto func = clonedNetwork.getFunction();
+    auto ops = func->get_ordered_ops();
     std::unordered_set<std::string> supported;
     std::unordered_set<std::string> unsupported;
 
     std::unordered_set<std::string> constantsNames;
     std::vector<std::shared_ptr<ngraph::Node>> constants;
 
+    std::map<std::string, ngraph::PartialShape> shapes;
+    std::map<std::string, std::pair<int64_t, int64_t>> batch_dim;
+    bool dyn_shape_batch_found = prog.IsDynBatchModel(func, shapes, batch_dim);
     auto layerIsSupported = [&](std::shared_ptr<ngraph::Node> node) {
         if (node->is_dynamic()) {
-            return false;
+            if (!dyn_shape_batch_found)
+                return false;
+
+            auto pshape = node->get_output_partial_shape(0);
+            if (pshape.rank().is_dynamic())
+                return false;
+
+            int dynCount = 0;
+            int64_t batch_idx = -1;
+            for (size_t i = 0; i < pshape.size(); i++) {
+                if (pshape[i].is_dynamic()) {
+                    dynCount++;
+                    if (batch_idx < 0) {
+                        batch_idx = i;
+                    }
+                }
+            }
+
+            if (dynCount != 1)
+                return false;  // more than one dimension is dynamic
+
+            int64_t max_batch = pshape[batch_idx].get_max_length();
+            if (max_batch <= 1)
+                return false;
+
+            return true;
         }
         if (ngraph::is_type<const ngraph::op::v0::PriorBox>(node) ||
             ngraph::is_type<const ngraph::op::v0::PriorBoxClustered>(node) ||
@@ -637,6 +666,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
         auto closest_pow_of_2 = [] (float x) {
             return pow(2, floor(log(x)/log(2)));
         };
+        GPU_DEBUG_GET_INSTANCE(debug_config);
         auto model_param = options.find("MODEL_PTR");
         if (model_param == options.end()) {
             GPU_DEBUG_IF(debug_config->verbose >= 1) {
diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp
index 3f04dbe019c..2d37b86a8b2 100644
--- a/src/plugins/intel_gpu/src/plugin/program.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program.cpp
@@ -5,6 +5,7 @@
 #include "intel_gpu/plugin/program.hpp"
 #include "ngraph/ops.hpp"
 #include "ngraph_ops/nms_ie_internal.hpp"
+#include "openvino/core/graph_util.hpp"
 #include "intel_gpu/plugin/itt.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 
@@ -56,43 +57,74 @@ void Program::ValidateInputs(const std::shared_ptr<ngraph::Node>& op, std::vecto
                        << " op::v" << op->get_type_info().version << ")";
 }
 
-bool Program::CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops, InferenceEngine::InputsDataMap networkInputs) const {
-    if (networkInputs.empty())
-        return false;
+auto getParamName = [](const std::shared_ptr<ov::Node>& param) -> std::string {
+    const auto& names = param->get_output_tensor(0).get_names();
+    if (!names.empty())
+        return *names.begin();
+    else
+        return param->get_friendly_name();
+};
 
-    for (auto op : ops) {
-        // TODO: do we have any other exception cases?
-        if (std::dynamic_pointer_cast<ngraph::op::v1::Reshape>(op)) {
-            if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0])
-                continue;
-        }
-
-        // List of the operations which can lead to invalid dynamic batch processing
-        if (std::dynamic_pointer_cast<ngraph::op::internal::NonMaxSuppressionIEInternal>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v5::NonMaxSuppression>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v4::NonMaxSuppression>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v3::NonMaxSuppression>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v1::NonMaxSuppression>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v0::PSROIPooling>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v0::ROIPooling>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v0::PriorBox>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v0::DetectionOutput>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v1::Reshape>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v0::Squeeze>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v0::Unsqueeze>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v4::Proposal>(op) ||
-            std::dynamic_pointer_cast<ngraph::op::v0::Proposal>(op)) {
+//  detect the only supported dynamic shape case -
+//  exactly one dimension is dynamic in input params with defined min/max interval
+bool Program::IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
+                              std::map<std::string, ov::PartialShape>& shapes,
+                              std::map<std::string, std::pair<int64_t, int64_t>>& batch_dim) {
+    for (const auto& param : model->get_parameters()) {
+        auto pname = getParamName(param);
+        batch_dim[pname] = { -1, -1 };
+        if (param->get_output_partial_shape(0).rank().is_dynamic()) {
             return false;
         }
-
-        auto customLayer = m_config.customLayers.find(op->get_type_name());
-        if (customLayer != m_config.customLayers.end()) {
-            return false;
+        ov::PartialShape pshape = param->get_output_partial_shape(0);
+        int dynCount = 0;
+        int64_t batch_idx = -1;
+        for (size_t i = 0; i < pshape.size(); i++) {
+            if (pshape[i].is_dynamic()) {
+                dynCount++;
+                if (batch_idx < 0) {
+                    batch_idx = i;
+                }
+            }
+        }
+        switch (dynCount) {
+            case 1:
+                // exactly one dynamic dim
+                {
+                    int64_t max_b = pshape[batch_idx].get_max_length();
+                    if (max_b > 1) {
+                        batch_dim[pname].first = batch_idx;
+                        batch_dim[pname].second = max_b;
+                        pshape[batch_idx] = 1;
+                    }
+                }
+            case 0:
+                // no dynamic dims - possible legacy case
+                shapes[pname] = pshape;
+                break;
+            default:
+                break;
         }
     }
-
-    return true;
+    if (batch_dim.empty())
+        return false;
+    bool dyn_shape_batch_found = false;
+    // detect 1st dyn dim, mark it and continue
+    auto bitr = batch_dim.begin();
+    dyn_shape_batch_found = bitr->second.first >= 0;
+    auto batch_val_1st = bitr->second.second;
+    bitr++;
+    for (; bitr != batch_dim.end(); bitr++) {
+        if (bitr->second.first >= 0) {
+            if (bitr->second.second != batch_val_1st) {
+                dyn_shape_batch_found = false;
+                break;
+            } else {
+                dyn_shape_batch_found = true;
+            }
+        }
+    }
+    return dyn_shape_batch_found;
 }
 
 Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config,
@@ -112,26 +144,136 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::en
 
     auto ops = func->get_ordered_ops();
 
-    if (m_config.max_dynamic_batch > 1) {
-        // check topology for applicability
-        if (!CanProcessDynBatch(ops, networkInputs)) {
-            IE_THROW() << "Such topology cannot be compiled for dynamic batch!";
+    bool dyn_shape_batch_found = false;
+    std::map<std::string, ngraph::PartialShape> shapes;
+    std::map<std::string, std::pair<int64_t, int64_t>> batch_dim;
+    if (m_config.enableDynamicBatch) {
+        // in case of legacy dynamic batch,
+        // we assume 4D input with 0 batch dim
+        auto param = func->get_parameters().front();
+        auto pname = getParamName(param);
+        shapes[pname] = param->get_output_partial_shape(0);
+        batch_dim[pname].first = 0;
+        batch_dim[pname].second = m_config.max_dynamic_batch;
+    } else {
+        dyn_shape_batch_found = IsDynBatchModel(func, shapes, batch_dim);
+        if (dyn_shape_batch_found) {
+            m_config.max_dynamic_batch = batch_dim.begin()->second.second;
+        } else {
+            if (!batch_dim.empty() && shapes.empty()) {
+                // more than on dynamic dim or dynamic rank
+                IE_THROW() << "Only dynamic batch is supported!";
+            }
         }
     }
 
     int m_bv_sz = GetMaxBatchSizeForSingleProgram();
+    m_max_batch = m_config.max_dynamic_batch;
 
-    m_max_batch = config.max_dynamic_batch;
-
-    if (config.max_dynamic_batch > 1) {
+    if (dyn_shape_batch_found || config.max_dynamic_batch > 1) {
+        // compile log2 networks to serve dynamic batch requests
         for (int b = m_bv_sz - 1; b >= 0; b--) {
             inputLayouts.clear();
             outputDims.clear();
             primitiveIDs.clear();
             blobMemCache.clear();
 
-            ChangeInputBatch(1U << static_cast<unsigned>(b));
-            m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));
+            auto new_batch = 1U << static_cast<unsigned>(b);
+            ChangeInputBatch(new_batch);
+
+            // clone the source model, find the batch dim
+            // and reshape the model to next batch size
+            auto new_func = ov::clone_model(*func);
+            std::map<ov::Output<ov::Node>, ngraph::PartialShape> new_shapes;
+            for (const auto& param : new_func->get_parameters()) {
+                ov::PartialShape pshape = param->get_output_partial_shape(0);
+
+                auto pname = getParamName(param);
+                auto batch_idx = batch_dim[pname].first;
+
+                if (batch_idx >= 0) {
+                    auto pshape = shapes[pname];
+                    pshape[batch_idx] = new_batch;
+                    new_shapes[param->output(0)] = pshape;
+                }
+            }
+            new_func->reshape(new_shapes);
+
+            // reshape network input/output maps accordingly
+            // for correct network compilation
+            for (auto& new_input : new_func->inputs()) {
+                auto iname = new_input.get_node()->get_friendly_name();
+                auto it = networkInputs.find(iname);
+                if (it != networkInputs.end()) {
+                    auto shape = new_input.get_shape();
+                    auto l = it->second->getTensorDesc().getLayout();
+                    it->second->getInputData()->reshape(shape, l);
+                }
+            }
+
+            for (auto& new_output : new_func->outputs()) {
+                auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name();
+                auto it = networkOutputs.find(iname);
+                if (it != networkOutputs.end()) {
+                    auto shape = new_output.get_shape();
+                    auto l = it->second->getTensorDesc().getLayout();
+                    it->second->reshape(shape, l);
+                }
+            }
+            m_programs.insert(m_programs.begin(), BuildProgram(new_func->get_ordered_ops(), networkInputs, networkOutputs,
+                createTopologyOnly, partialBuild));
+        }
+        {
+            // recompute maximal dynamic batch inputs/outputs for infer request
+            // and store them into internal maps
+            // same operations as above, but for maximum batch
+            auto new_func = ov::clone_model(*func);
+            std::map<ov::Output<ov::Node>, ngraph::PartialShape> new_shapes;
+            for (const auto& param : new_func->get_parameters()) {
+                ov::PartialShape pshape = param->get_output_partial_shape(0);
+
+                auto pname = getParamName(param);
+                auto batch_idx = batch_dim[pname].first;
+
+                if (batch_idx >= 0) {
+                    auto pshape = shapes[pname];
+                    pshape[batch_idx] = m_max_batch;
+                    new_shapes[param->output(0)] = pshape;
+                }
+            }
+            new_func->reshape(new_shapes);
+
+            for (auto& new_input : new_func->inputs()) {
+                auto iname = new_input.get_node()->get_friendly_name();
+                auto it = networkInputs.find(iname);
+                if (it != networkInputs.end()) {
+                    auto shape = new_input.get_shape();
+                    auto l = it->second->getTensorDesc().getLayout();
+                    it->second->getInputData()->reshape(shape, l);
+                }
+            }
+
+            for (auto& new_output : new_func->outputs()) {
+                auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name();
+                auto it = networkOutputs.find(iname);
+                if (it != networkOutputs.end()) {
+                    auto shape = new_output.get_shape();
+                    auto l = it->second->getTensorDesc().getLayout();
+                    SizeVector old_shape = it->second->getTensorDesc().getDims();
+                    it->second->reshape(shape, l);
+                    // detect changed output batch dimension
+                    SizeVector new_shape = it->second->getTensorDesc().getDims();
+                    for (int64_t i = 0; i < old_shape.size(); i++) {
+                        if (old_shape[i] != new_shape[i]) {
+                            m_output_batch_dim[iname] = i;
+                            break;
+                        }
+                    }
+                }
+            }
+            m_networkInputs = networkInputs;
+            m_networkOutputs = networkOutputs;
+            m_input_batch_dim = batch_dim;
         }
     } else {
         m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));
diff --git a/src/tests/functional/plugin/gpu/dynamic_tests/gpu_dyn_batch_shape_tests.cpp b/src/tests/functional/plugin/gpu/dynamic_tests/gpu_dyn_batch_shape_tests.cpp
new file mode 100644
index 00000000000..cc3a6f5caa9
--- /dev/null
+++ b/src/tests/functional/plugin/gpu/dynamic_tests/gpu_dyn_batch_shape_tests.cpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/runtime/core.hpp"
+#include <common_test_utils/test_common.hpp>
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "ngraph_functions/subgraph_builders.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+
+using namespace ::testing;
+using namespace ov::test;
+
+using OVDynamicBatchParams = std::tuple<
+    std::vector<InputShape>,                                           // dynamic and static case sizes
+    ElementType,                                                       // Network precision
+    std::string,                                                       // Device name
+    std::map<std::string, std::string>                                 // Config
+>;
+
+class OVDynamicBatchShape_Tests : public WithParamInterface<OVDynamicBatchParams>,
+    virtual public ov::test::SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(TestParamInfo<OVDynamicBatchParams> obj) {
+        std::vector<InputShape> inputShapes;
+        ElementType netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::tie(inputShapes, netPrecision, targetDevice, configuration) = obj.param;
+
+        std::ostringstream result;
+        result << "IS=";
+        for (const auto& shape : inputShapes) {
+            result << CommonTestUtils::partialShape2str({ shape.first }) << "_";
+        }
+        result << "TS=";
+        for (const auto& shape : inputShapes) {
+            result << "(";
+            if (!shape.second.empty()) {
+                for (const auto& itr : shape.second) {
+                    result << CommonTestUtils::vec2str(itr);
+                }
+            }
+            result << ")_";
+        }
+        result << "netPRC=" << netPrecision << "_";
+        result << "targetDevice=" << targetDevice;
+        if (!configuration.empty()) {
+            for (auto& configItem : configuration) {
+                result << "configItem=" << configItem.first << "_" << configItem.second << "_";
+            }
+        }
+        return result.str();
+    }
+    void TearDown() override {
+        core.reset();
+    }
+protected:
+    void SetUp() override {
+        if (core)
+            core.reset();
+        std::tie(inputShape, netPrecision, targetDevice, configuration) = this->GetParam();
+
+        init_input_shapes(inputShape);
+        //TODO: think how we can switch between several input topologies in the future
+        //  function = ngraph::builder::subgraph::makeSplitConvConcat(inputShape.front().first.get_min_shape(), netPrecision);
+        function = ngraph::builder::subgraph::makeSplitMultiConvConcat(inputShape.front().first.get_min_shape(), netPrecision);
+
+        //  make topology dynamic
+        std::map<std::string, ov::PartialShape> dynShape;
+        dynShape["input_tensor"] = inputShape.front().first;
+        function->reshape(dynShape);
+    }
+    std::shared_ptr<ov::Model> src_func;
+    // std::map<std::string, std::string> configuration;
+    std::vector<InputShape> inputShape;
+    ElementType netPrecision;
+};
+
+TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    core = std::make_shared<ov::runtime::Core>();
+    run();
+}
+
+namespace {
+const std::map<std::string, std::string> config = {};
+
+const std::map<std::string, std::string> hetero_config = {
+    {"TARGET_FALLBACK", CommonTestUtils::DEVICE_GPU}
+};
+
+const std::vector<InputShape> inputShapes = {
+    { { {1, 19}, 4, 20, 20}, { {1, 4, 20, 20}, {7, 4, 20, 20}, {17, 4, 20, 20} } }
+};
+
+const std::vector<ElementType> netPrecisions = {
+    ElementType::f16,
+    ElementType::f32
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_GPU_DynBatch, OVDynamicBatchShape_Tests,
+    ::testing::Combine(
+        ::testing::Values(inputShapes),
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::Values(config)),
+    OVDynamicBatchShape_Tests::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_GPU_DynBatchHetero, OVDynamicBatchShape_Tests,
+    ::testing::Combine(
+        ::testing::Values(inputShapes),
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_HETERO),
+        ::testing::Values(hetero_config)),
+    OVDynamicBatchShape_Tests::getTestCaseName);
+}  // namespace
diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp
index 53707514a1c..c9ddae7028b 100644
--- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp
@@ -189,9 +189,11 @@ inline std::shared_ptr<ngraph::Function> makeKSOFunction(std::vector<size_t> inp
     return fnPtr;
 }
 
-inline std::shared_ptr<ngraph::Function> makeSplitMultiConvConcat(std::vector<size_t> inputShape = {1, 4, 20, 20}) {
-    auto ngPrc = ngraph::element::Type_t::f32;
+inline std::shared_ptr<ngraph::Function> makeSplitMultiConvConcat(std::vector<size_t> inputShape = {1, 4, 20, 20},
+                                                                  ngraph::element::Type_t ngPrc = ngraph::element::Type_t::f32) {
     auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    params.front()->set_friendly_name("Param_1");
+    params.front()->get_output_tensor(0).set_names({ "input_tensor" });
     auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
 
     auto conv1_0 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},