diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp index fd51e38c483..defda468ddf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp @@ -53,6 +53,10 @@ public: std::shared_ptr GetEngine() const { return getContextImpl(m_context)->GetEngine(); } int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; } const std::map& GetInputLayouts() const { return m_program->GetInputLayouts(); } + const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); } + const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); } + std::map> GetInputDynBatchDims() { return m_program->m_input_batch_dim; } + std::map GetOutputDynBatchDims() { return m_program->m_output_batch_dim; } size_t GetNetworksCount() const { return m_networks.size(); } std::shared_ptr GetNetwork(size_t idx = 0) const; InferenceEngine::SizeVector GetOutputSize(std::string outName) const; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp index f2cdcdf7c79..d7f888f89a6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp @@ -93,6 +93,8 @@ public: int m_max_batch; int m_curBatch; + std::map> m_input_batch_dim; + std::map m_output_batch_dim; std::shared_ptr GetCompiledProgram(int program_id = 0); const std::map& GetInputLayouts() const { return inputLayouts; } @@ -104,6 +106,9 @@ public: int GetMaxBatchSizeForSingleProgram(); bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr& op); + bool IsDynBatchModel(const std::shared_ptr& model, + std::map& shapes, + std::map>& batch_dim); // Profiling utils void InitProfileInfo(const std::string& layerName, @@ -170,7 +175,6 @@ private: bool createTopologyOnly = false, bool partialBuild = false); void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr& op); - bool CanProcessDynBatch(std::vector> ops, InferenceEngine::InputsDataMap networkInputs) const; void ChangeInputBatch(int batch); }; diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index fa366e1faff..251251e3d42 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -49,6 +49,8 @@ Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, , m_stream_id(stream_id) , m_state(0) { m_program = std::make_shared(network, GetEngine(), m_config); + if (m_program->m_max_batch > 1) + m_config.max_dynamic_batch = m_program->m_max_batch; Build(); } diff --git a/src/plugins/intel_gpu/src/plugin/infer_request.cpp b/src/plugins/intel_gpu/src/plugin/infer_request.cpp index 5a3be7abbae..216be15290b 100644 --- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp @@ -171,14 +171,12 @@ bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) { } return bufferMem == hostPtr; } - } // namespace namespace ov { namespace runtime { namespace intel_gpu { - // ----------------------------------------------------------------------------------------- // // ---------------------------- IE API impl ------------------------------------------------ // // ----------------------------------------------------------------------------------------- // @@ -188,6 +186,8 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) { InputInfo::Ptr foundInput; DataPtr foundOutput; bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); + auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name); + bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic()); if (is_input) { // ROI blob is returned only if it was set previously. Otherwise default blob is returned. @@ -196,11 +196,20 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) { data = it->second->getRoiBlob(); } else { data = _inputs[name]; - checkInputBlob(data, name, foundInput); + if (!isDynamic) + checkInputBlob(data, name, foundInput); } } else { data = _outputs[name]; - checkOutputBlob(data, name, foundOutput); + if (isDynamic) { + if (m_graph->GetMaxDynamicBatchSize() > 1) { + SizeVector outDims = data->getTensorDesc().getDims(); + outDims[m_graph->GetOutputDynBatchDims()[name]] = m_curBatch; + data->getTensorDesc().setDims(outDims); + } + } else { + checkOutputBlob(data, name, foundOutput); + } } return data; } @@ -243,13 +252,16 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { desc.getPrecision().size(), std::multiplies()); bool preProcResize = false; + auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name); + bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic()); if (is_input) { preProcResize = foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE; const auto inputColorFormat = foundInput->getPreProcess().getColorFormat(); preProcResize |= (inputColorFormat != ColorFormat::RAW) && (inputColorFormat != ColorFormat::BGR); } - if (dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) { + if (!isDynamic && + dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) { IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") << " blob with name: \'" << name << "\' " << "Current: " << dataBinSize << " Required: " << netReqBinSize; @@ -292,7 +304,9 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { auto y_ptr = nv12_ptr->y()->as(); if (y_ptr) { auto y_impl = getBlobImpl(y_ptr); - y_impl->allocate(); + if (!y_impl->is_allocated()) { + y_impl->allocate(); + } _deviceInputs[y_name] = nv12_ptr->y(); is_remote = true; } @@ -300,7 +314,9 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { auto uv_ptr = nv12_ptr->uv()->as(); if (uv_ptr) { auto uv_impl = getBlobImpl(uv_ptr); - uv_impl->allocate(); + if (!uv_impl->is_allocated()) { + uv_impl->allocate(); + } _deviceInputs[uv_name] = nv12_ptr->uv(); is_remote = true; } @@ -326,12 +342,21 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { if (compoundBlobPassed) { IE_THROW(NotImplemented) << cannot_set_compound; } - size_t blobSize = desc.getLayout() != SCALAR - ? details::product(desc.getDims()) - : 1; - if (dataSize != blobSize) { - IE_THROW() << "Input blob size is not equal network input size (" - << dataSize << "!=" << blobSize << ")."; + if (isDynamic) { + // extract new batch size from blob + if (m_graph->GetMaxDynamicBatchSize() > 1) { + const auto batch_idx = m_graph->GetInputDynBatchDims()[name].first; + if (batch_idx >= 0) + SetBatch(blobDesc.getDims()[batch_idx]); + } + } else { + size_t blobSize = desc.getLayout() != SCALAR + ? details::product(desc.getDims()) + : 1; + if (dataSize != blobSize) { + IE_THROW() << "Input blob size is not equal to network input size (" + << dataSize << "!=" << blobSize << ")."; + } } if (data->buffer() == nullptr) @@ -347,15 +372,17 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { if (is_remote) { _deviceOutputs[name] = data; } else { - size_t outputSize = desc.getLayout() != SCALAR - ? details::product(desc.getDims()) - : 1; - if (dataSize != outputSize) { - IE_THROW() << "Output blob size is not equal network output size (" << dataSize - << "!=" << outputSize << ")."; + if (!isDynamic) { + size_t outputSize = desc.getLayout() != SCALAR + ? details::product(desc.getDims()) + : 1; + if (dataSize != outputSize) { + IE_THROW() << "Output blob size is not equal to network output size (" << dataSize + << "!=" << outputSize << ")."; + } + if (data->buffer() == nullptr) + IE_THROW(NotAllocated) << str_output_not_allocated << " Output name: \'" << name << "\'"; } - if (data->buffer() == nullptr) - IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'"; } _outputs[name] = data; } @@ -457,7 +484,10 @@ void InferRequest::checkBlobs() { } else { IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'"; } - checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs); + auto node = findInputByNodeName(input.first); + bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); + if (!is_dynamic) + checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs); } for (auto const &output : _outputs) { DataPtr foundOutput = nullptr; @@ -470,7 +500,10 @@ void InferRequest::checkBlobs() { } else { IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'"; } - checkOutputBlob(output.second, output.first, foundOutput); + auto node = findOutputByNodeName(output.first); + bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); + if (!is_dynamic) + checkOutputBlob(output.second, output.first, foundOutput); } } @@ -509,9 +542,12 @@ void InferRequest::SetBatch(int new_batch) { batchOutputs.clear(); // tune expected inputs - for (auto &input : m_graph->GetInputLayouts()) { - cldnn::tensor dims = input.second.size; - const SizeVector sz = { 1, size_t(dims.feature[0]), size_t(dims.spatial[1]), size_t(dims.spatial[0]) }; + for (auto& input : m_graph->GetNetworkInputs()) { + auto sz = input.second->getTensorDesc().getDims(); + const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first; + if (batch_idx >= 0) + sz[batch_idx] = 1; + size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies()); std::vector in_buf; @@ -534,9 +570,11 @@ void InferRequest::SetBatch(int new_batch) { } // tune expected outputs - for (auto& no : _networkOutputs) { - auto sz = m_graph->GetOutputSize(no.first); - sz.front() = 1; + for (auto& no : m_graph->GetNetworkOutputs()) { + auto sz = no.second->getTensorDesc().getDims(); + const auto batch_idx = m_graph->GetInputDynBatchDims()[no.first].first; + if (batch_idx >= 0) + sz[batch_idx] = 1; size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies()); std::vector out_buf; @@ -816,6 +854,21 @@ void InferRequest::setup_stream_graph() { streamID = streamID % numGraphs; } m_graph = streamGraphs[streamID]; + // in case of dynamic batch, check all input blobs and set new batch + if (m_graph->GetMaxDynamicBatchSize() > 1) { + for (auto& input : _networkInputs) { + auto node = findInputByNodeName(input.first); + bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); + if (!is_dynamic) + continue; + // extract new batch size from blob + const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first; + if (batch_idx >= 0) { + SetBatch(_inputs[input.first]->getTensorDesc().getDims()[batch_idx]); + break; + } + } + } } Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, std::shared_ptr alloc) { @@ -968,16 +1021,9 @@ void InferRequest::allocate_inputs() { void InferRequest::allocate_inputs_dynamic() { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic"); // allocate inputs - for (auto &input : m_graph->GetInputLayouts()) { + for (auto &input : m_graph->GetNetworkInputs()) { InputInfo::Ptr ni = _networkInputs.at(input.first); - TensorDesc desc = ni->getTensorDesc(); - SizeVector& dims = desc.getDims(); - - if (!dims.empty()) { - *dims.begin() = static_cast(m_graph->GetMaxDynamicBatchSize()); - } else { - IE_THROW() << "Empty dimensions for input blob " << input.first; - } + TensorDesc desc = input.second->getTensorDesc(); Blob::Ptr inputBlob = create_host_blob(desc); if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { @@ -1020,17 +1066,10 @@ void InferRequest::allocate_outputs() { void InferRequest::allocate_outputs_dynamic() { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic"); // allocate outputs - for (auto& no : _networkOutputs) { + for (auto& no : m_graph->GetNetworkOutputs()) { std::string outputID = m_graph->MapOutputName(no.first); DataPtr oi = no.second; TensorDesc desc = oi->getTensorDesc(); - SizeVector& dims = desc.getDims(); - - if (!dims.empty()) { - *dims.begin() = static_cast(m_graph->GetMaxDynamicBatchSize()); - } else { - IE_THROW() << "Empty dimensions for output blob " << no.first; - } Blob::Ptr outputBlob = create_host_blob(desc); _outputs[no.first] = outputBlob; diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 9adca79a60a..5a456ec2e26 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -376,16 +376,45 @@ QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network, } auto clonedNetwork = CloneAndTransformNetwork(network, conf); - auto ops = clonedNetwork.getFunction()->get_ordered_ops(); + auto func = clonedNetwork.getFunction(); + auto ops = func->get_ordered_ops(); std::unordered_set supported; std::unordered_set unsupported; std::unordered_set constantsNames; std::vector> constants; + std::map shapes; + std::map> batch_dim; + bool dyn_shape_batch_found = prog.IsDynBatchModel(func, shapes, batch_dim); auto layerIsSupported = [&](std::shared_ptr node) { if (node->is_dynamic()) { - return false; + if (!dyn_shape_batch_found) + return false; + + auto pshape = node->get_output_partial_shape(0); + if (pshape.rank().is_dynamic()) + return false; + + int dynCount = 0; + int64_t batch_idx = -1; + for (size_t i = 0; i < pshape.size(); i++) { + if (pshape[i].is_dynamic()) { + dynCount++; + if (batch_idx < 0) { + batch_idx = i; + } + } + } + + if (dynCount != 1) + return false; // more than one dimension is dynamic + + int64_t max_batch = pshape[batch_idx].get_max_length(); + if (max_batch <= 1) + return false; + + return true; } if (ngraph::is_type(node) || ngraph::is_type(node) || @@ -637,6 +666,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::mapverbose >= 1) { diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp index 3f04dbe019c..2d37b86a8b2 100644 --- a/src/plugins/intel_gpu/src/plugin/program.cpp +++ b/src/plugins/intel_gpu/src/plugin/program.cpp @@ -5,6 +5,7 @@ #include "intel_gpu/plugin/program.hpp" #include "ngraph/ops.hpp" #include "ngraph_ops/nms_ie_internal.hpp" +#include "openvino/core/graph_util.hpp" #include "intel_gpu/plugin/itt.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" @@ -56,43 +57,74 @@ void Program::ValidateInputs(const std::shared_ptr& op, std::vecto << " op::v" << op->get_type_info().version << ")"; } -bool Program::CanProcessDynBatch(std::vector> ops, InferenceEngine::InputsDataMap networkInputs) const { - if (networkInputs.empty()) - return false; +auto getParamName = [](const std::shared_ptr& param) -> std::string { + const auto& names = param->get_output_tensor(0).get_names(); + if (!names.empty()) + return *names.begin(); + else + return param->get_friendly_name(); +}; - for (auto op : ops) { - // TODO: do we have any other exception cases? - if (std::dynamic_pointer_cast(op)) { - if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0]) - continue; - } - - // List of the operations which can lead to invalid dynamic batch processing - if (std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) { +// detect the only supported dynamic shape case - +// exactly one dimension is dynamic in input params with defined min/max interval +bool Program::IsDynBatchModel(const std::shared_ptr& model, + std::map& shapes, + std::map>& batch_dim) { + for (const auto& param : model->get_parameters()) { + auto pname = getParamName(param); + batch_dim[pname] = { -1, -1 }; + if (param->get_output_partial_shape(0).rank().is_dynamic()) { return false; } - - auto customLayer = m_config.customLayers.find(op->get_type_name()); - if (customLayer != m_config.customLayers.end()) { - return false; + ov::PartialShape pshape = param->get_output_partial_shape(0); + int dynCount = 0; + int64_t batch_idx = -1; + for (size_t i = 0; i < pshape.size(); i++) { + if (pshape[i].is_dynamic()) { + dynCount++; + if (batch_idx < 0) { + batch_idx = i; + } + } + } + switch (dynCount) { + case 1: + // exactly one dynamic dim + { + int64_t max_b = pshape[batch_idx].get_max_length(); + if (max_b > 1) { + batch_dim[pname].first = batch_idx; + batch_dim[pname].second = max_b; + pshape[batch_idx] = 1; + } + } + case 0: + // no dynamic dims - possible legacy case + shapes[pname] = pshape; + break; + default: + break; } } - - return true; + if (batch_dim.empty()) + return false; + bool dyn_shape_batch_found = false; + // detect 1st dyn dim, mark it and continue + auto bitr = batch_dim.begin(); + dyn_shape_batch_found = bitr->second.first >= 0; + auto batch_val_1st = bitr->second.second; + bitr++; + for (; bitr != batch_dim.end(); bitr++) { + if (bitr->second.first >= 0) { + if (bitr->second.second != batch_val_1st) { + dyn_shape_batch_found = false; + break; + } else { + dyn_shape_batch_found = true; + } + } + } + return dyn_shape_batch_found; } Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr engine, const Config& config, @@ -112,26 +144,136 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptrget_ordered_ops(); - if (m_config.max_dynamic_batch > 1) { - // check topology for applicability - if (!CanProcessDynBatch(ops, networkInputs)) { - IE_THROW() << "Such topology cannot be compiled for dynamic batch!"; + bool dyn_shape_batch_found = false; + std::map shapes; + std::map> batch_dim; + if (m_config.enableDynamicBatch) { + // in case of legacy dynamic batch, + // we assume 4D input with 0 batch dim + auto param = func->get_parameters().front(); + auto pname = getParamName(param); + shapes[pname] = param->get_output_partial_shape(0); + batch_dim[pname].first = 0; + batch_dim[pname].second = m_config.max_dynamic_batch; + } else { + dyn_shape_batch_found = IsDynBatchModel(func, shapes, batch_dim); + if (dyn_shape_batch_found) { + m_config.max_dynamic_batch = batch_dim.begin()->second.second; + } else { + if (!batch_dim.empty() && shapes.empty()) { + // more than on dynamic dim or dynamic rank + IE_THROW() << "Only dynamic batch is supported!"; + } } } int m_bv_sz = GetMaxBatchSizeForSingleProgram(); + m_max_batch = m_config.max_dynamic_batch; - m_max_batch = config.max_dynamic_batch; - - if (config.max_dynamic_batch > 1) { + if (dyn_shape_batch_found || config.max_dynamic_batch > 1) { + // compile log2 networks to serve dynamic batch requests for (int b = m_bv_sz - 1; b >= 0; b--) { inputLayouts.clear(); outputDims.clear(); primitiveIDs.clear(); blobMemCache.clear(); - ChangeInputBatch(1U << static_cast(b)); - m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild)); + auto new_batch = 1U << static_cast(b); + ChangeInputBatch(new_batch); + + // clone the source model, find the batch dim + // and reshape the model to next batch size + auto new_func = ov::clone_model(*func); + std::map, ngraph::PartialShape> new_shapes; + for (const auto& param : new_func->get_parameters()) { + ov::PartialShape pshape = param->get_output_partial_shape(0); + + auto pname = getParamName(param); + auto batch_idx = batch_dim[pname].first; + + if (batch_idx >= 0) { + auto pshape = shapes[pname]; + pshape[batch_idx] = new_batch; + new_shapes[param->output(0)] = pshape; + } + } + new_func->reshape(new_shapes); + + // reshape network input/output maps accordingly + // for correct network compilation + for (auto& new_input : new_func->inputs()) { + auto iname = new_input.get_node()->get_friendly_name(); + auto it = networkInputs.find(iname); + if (it != networkInputs.end()) { + auto shape = new_input.get_shape(); + auto l = it->second->getTensorDesc().getLayout(); + it->second->getInputData()->reshape(shape, l); + } + } + + for (auto& new_output : new_func->outputs()) { + auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name(); + auto it = networkOutputs.find(iname); + if (it != networkOutputs.end()) { + auto shape = new_output.get_shape(); + auto l = it->second->getTensorDesc().getLayout(); + it->second->reshape(shape, l); + } + } + m_programs.insert(m_programs.begin(), BuildProgram(new_func->get_ordered_ops(), networkInputs, networkOutputs, + createTopologyOnly, partialBuild)); + } + { + // recompute maximal dynamic batch inputs/outputs for infer request + // and store them into internal maps + // same operations as above, but for maximum batch + auto new_func = ov::clone_model(*func); + std::map, ngraph::PartialShape> new_shapes; + for (const auto& param : new_func->get_parameters()) { + ov::PartialShape pshape = param->get_output_partial_shape(0); + + auto pname = getParamName(param); + auto batch_idx = batch_dim[pname].first; + + if (batch_idx >= 0) { + auto pshape = shapes[pname]; + pshape[batch_idx] = m_max_batch; + new_shapes[param->output(0)] = pshape; + } + } + new_func->reshape(new_shapes); + + for (auto& new_input : new_func->inputs()) { + auto iname = new_input.get_node()->get_friendly_name(); + auto it = networkInputs.find(iname); + if (it != networkInputs.end()) { + auto shape = new_input.get_shape(); + auto l = it->second->getTensorDesc().getLayout(); + it->second->getInputData()->reshape(shape, l); + } + } + + for (auto& new_output : new_func->outputs()) { + auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name(); + auto it = networkOutputs.find(iname); + if (it != networkOutputs.end()) { + auto shape = new_output.get_shape(); + auto l = it->second->getTensorDesc().getLayout(); + SizeVector old_shape = it->second->getTensorDesc().getDims(); + it->second->reshape(shape, l); + // detect changed output batch dimension + SizeVector new_shape = it->second->getTensorDesc().getDims(); + for (int64_t i = 0; i < old_shape.size(); i++) { + if (old_shape[i] != new_shape[i]) { + m_output_batch_dim[iname] = i; + break; + } + } + } + } + m_networkInputs = networkInputs; + m_networkOutputs = networkOutputs; + m_input_batch_dim = batch_dim; } } else { m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild)); diff --git a/src/tests/functional/plugin/gpu/dynamic_tests/gpu_dyn_batch_shape_tests.cpp b/src/tests/functional/plugin/gpu/dynamic_tests/gpu_dyn_batch_shape_tests.cpp new file mode 100644 index 00000000000..cc3a6f5caa9 --- /dev/null +++ b/src/tests/functional/plugin/gpu/dynamic_tests/gpu_dyn_batch_shape_tests.cpp @@ -0,0 +1,118 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/runtime/core.hpp" +#include +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/skip_tests_config.hpp" +#include "ngraph_functions/subgraph_builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +using namespace ::testing; +using namespace ov::test; + +using OVDynamicBatchParams = std::tuple< + std::vector, // dynamic and static case sizes + ElementType, // Network precision + std::string, // Device name + std::map // Config +>; + +class OVDynamicBatchShape_Tests : public WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(TestParamInfo obj) { + std::vector inputShapes; + ElementType netPrecision; + std::string targetDevice; + std::map configuration; + std::tie(inputShapes, netPrecision, targetDevice, configuration) = obj.param; + + std::ostringstream result; + result << "IS="; + for (const auto& shape : inputShapes) { + result << CommonTestUtils::partialShape2str({ shape.first }) << "_"; + } + result << "TS="; + for (const auto& shape : inputShapes) { + result << "("; + if (!shape.second.empty()) { + for (const auto& itr : shape.second) { + result << CommonTestUtils::vec2str(itr); + } + } + result << ")_"; + } + result << "netPRC=" << netPrecision << "_"; + result << "targetDevice=" << targetDevice; + if (!configuration.empty()) { + for (auto& configItem : configuration) { + result << "configItem=" << configItem.first << "_" << configItem.second << "_"; + } + } + return result.str(); + } + void TearDown() override { + core.reset(); + } +protected: + void SetUp() override { + if (core) + core.reset(); + std::tie(inputShape, netPrecision, targetDevice, configuration) = this->GetParam(); + + init_input_shapes(inputShape); + //TODO: think how we can switch between several input topologies in the future + // function = ngraph::builder::subgraph::makeSplitConvConcat(inputShape.front().first.get_min_shape(), netPrecision); + function = ngraph::builder::subgraph::makeSplitMultiConvConcat(inputShape.front().first.get_min_shape(), netPrecision); + + // make topology dynamic + std::map dynShape; + dynShape["input_tensor"] = inputShape.front().first; + function->reshape(dynShape); + } + std::shared_ptr src_func; + // std::map configuration; + std::vector inputShape; + ElementType netPrecision; +}; + +TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + core = std::make_shared(); + run(); +} + +namespace { +const std::map config = {}; + +const std::map hetero_config = { + {"TARGET_FALLBACK", CommonTestUtils::DEVICE_GPU} +}; + +const std::vector inputShapes = { + { { {1, 19}, 4, 20, 20}, { {1, 4, 20, 20}, {7, 4, 20, 20}, {17, 4, 20, 20} } } +}; + +const std::vector netPrecisions = { + ElementType::f16, + ElementType::f32 +}; + +INSTANTIATE_TEST_SUITE_P(smoke_GPU_DynBatch, OVDynamicBatchShape_Tests, + ::testing::Combine( + ::testing::Values(inputShapes), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::Values(config)), + OVDynamicBatchShape_Tests::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_GPU_DynBatchHetero, OVDynamicBatchShape_Tests, + ::testing::Combine( + ::testing::Values(inputShapes), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_HETERO), + ::testing::Values(hetero_config)), + OVDynamicBatchShape_Tests::getTestCaseName); +} // namespace diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp index 53707514a1c..c9ddae7028b 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp +++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp @@ -189,9 +189,11 @@ inline std::shared_ptr makeKSOFunction(std::vector inp return fnPtr; } -inline std::shared_ptr makeSplitMultiConvConcat(std::vector inputShape = {1, 4, 20, 20}) { - auto ngPrc = ngraph::element::Type_t::f32; +inline std::shared_ptr makeSplitMultiConvConcat(std::vector inputShape = {1, 4, 20, 20}, + ngraph::element::Type_t ngPrc = ngraph::element::Type_t::f32) { auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + params.front()->set_friendly_name("Param_1"); + params.front()->get_output_tensor(0).set_names({ "input_tensor" }); auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1); auto conv1_0 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},