[GPU] Implement dynamic shape case support for one dimension only (batch) via legacy dynamic batch functionality (#9314)

This commit is contained in:
Mikhail Letavin 2022-01-17 12:02:52 +03:00 committed by GitHub
parent 98cbaf0f08
commit f2be2c915f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 433 additions and 92 deletions

View File

@ -53,6 +53,10 @@ public:
std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); } std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; } int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); } const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); }
const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); }
std::map<std::string, std::pair<int64_t, int64_t>> GetInputDynBatchDims() { return m_program->m_input_batch_dim; }
std::map<std::string, int64_t> GetOutputDynBatchDims() { return m_program->m_output_batch_dim; }
size_t GetNetworksCount() const { return m_networks.size(); } size_t GetNetworksCount() const { return m_networks.size(); }
std::shared_ptr<cldnn::network> GetNetwork(size_t idx = 0) const; std::shared_ptr<cldnn::network> GetNetwork(size_t idx = 0) const;
InferenceEngine::SizeVector GetOutputSize(std::string outName) const; InferenceEngine::SizeVector GetOutputSize(std::string outName) const;

View File

@ -93,6 +93,8 @@ public:
int m_max_batch; int m_max_batch;
int m_curBatch; int m_curBatch;
std::map<std::string, std::pair<int64_t, int64_t>> m_input_batch_dim;
std::map<std::string, int64_t> m_output_batch_dim;
std::shared_ptr<cldnn::program> GetCompiledProgram(int program_id = 0); std::shared_ptr<cldnn::program> GetCompiledProgram(int program_id = 0);
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; } const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
@ -104,6 +106,9 @@ public:
int GetMaxBatchSizeForSingleProgram(); int GetMaxBatchSizeForSingleProgram();
bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op); bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op);
bool IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
std::map<std::string, ov::PartialShape>& shapes,
std::map<std::string, std::pair<int64_t, int64_t>>& batch_dim);
// Profiling utils // Profiling utils
void InitProfileInfo(const std::string& layerName, void InitProfileInfo(const std::string& layerName,
@ -170,7 +175,6 @@ private:
bool createTopologyOnly = false, bool partialBuild = false); bool createTopologyOnly = false, bool partialBuild = false);
void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op); void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
bool CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops, InferenceEngine::InputsDataMap networkInputs) const;
void ChangeInputBatch(int batch); void ChangeInputBatch(int batch);
}; };

View File

@ -49,6 +49,8 @@ Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context,
, m_stream_id(stream_id) , m_stream_id(stream_id)
, m_state(0) { , m_state(0) {
m_program = std::make_shared<Program>(network, GetEngine(), m_config); m_program = std::make_shared<Program>(network, GetEngine(), m_config);
if (m_program->m_max_batch > 1)
m_config.max_dynamic_batch = m_program->m_max_batch;
Build(); Build();
} }

View File

@ -171,14 +171,12 @@ bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) {
} }
return bufferMem == hostPtr; return bufferMem == hostPtr;
} }
} // namespace } // namespace
namespace ov { namespace ov {
namespace runtime { namespace runtime {
namespace intel_gpu { namespace intel_gpu {
// ----------------------------------------------------------------------------------------- // // ----------------------------------------------------------------------------------------- //
// ---------------------------- IE API impl ------------------------------------------------ // // ---------------------------- IE API impl ------------------------------------------------ //
// ----------------------------------------------------------------------------------------- // // ----------------------------------------------------------------------------------------- //
@ -188,6 +186,8 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
InputInfo::Ptr foundInput; InputInfo::Ptr foundInput;
DataPtr foundOutput; DataPtr foundOutput;
bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput);
auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name);
bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic());
if (is_input) { if (is_input) {
// ROI blob is returned only if it was set previously. Otherwise default blob is returned. // ROI blob is returned only if it was set previously. Otherwise default blob is returned.
@ -196,12 +196,21 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
data = it->second->getRoiBlob(); data = it->second->getRoiBlob();
} else { } else {
data = _inputs[name]; data = _inputs[name];
if (!isDynamic)
checkInputBlob(data, name, foundInput); checkInputBlob(data, name, foundInput);
} }
} else { } else {
data = _outputs[name]; data = _outputs[name];
if (isDynamic) {
if (m_graph->GetMaxDynamicBatchSize() > 1) {
SizeVector outDims = data->getTensorDesc().getDims();
outDims[m_graph->GetOutputDynBatchDims()[name]] = m_curBatch;
data->getTensorDesc().setDims(outDims);
}
} else {
checkOutputBlob(data, name, foundOutput); checkOutputBlob(data, name, foundOutput);
} }
}
return data; return data;
} }
@ -243,13 +252,16 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
desc.getPrecision().size(), desc.getPrecision().size(),
std::multiplies<size_t>()); std::multiplies<size_t>());
bool preProcResize = false; bool preProcResize = false;
auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name);
bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic());
if (is_input) { if (is_input) {
preProcResize = foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE; preProcResize = foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE;
const auto inputColorFormat = foundInput->getPreProcess().getColorFormat(); const auto inputColorFormat = foundInput->getPreProcess().getColorFormat();
preProcResize |= (inputColorFormat != ColorFormat::RAW) && (inputColorFormat != ColorFormat::BGR); preProcResize |= (inputColorFormat != ColorFormat::RAW) && (inputColorFormat != ColorFormat::BGR);
} }
if (dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) { if (!isDynamic &&
dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) {
IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") << IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") <<
" blob with name: \'" << name << "\' " << " blob with name: \'" << name << "\' " <<
"Current: " << dataBinSize << " Required: " << netReqBinSize; "Current: " << dataBinSize << " Required: " << netReqBinSize;
@ -292,7 +304,9 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>(); auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
if (y_ptr) { if (y_ptr) {
auto y_impl = getBlobImpl(y_ptr); auto y_impl = getBlobImpl(y_ptr);
if (!y_impl->is_allocated()) {
y_impl->allocate(); y_impl->allocate();
}
_deviceInputs[y_name] = nv12_ptr->y(); _deviceInputs[y_name] = nv12_ptr->y();
is_remote = true; is_remote = true;
} }
@ -300,7 +314,9 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>(); auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
if (uv_ptr) { if (uv_ptr) {
auto uv_impl = getBlobImpl(uv_ptr); auto uv_impl = getBlobImpl(uv_ptr);
if (!uv_impl->is_allocated()) {
uv_impl->allocate(); uv_impl->allocate();
}
_deviceInputs[uv_name] = nv12_ptr->uv(); _deviceInputs[uv_name] = nv12_ptr->uv();
is_remote = true; is_remote = true;
} }
@ -326,13 +342,22 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
if (compoundBlobPassed) { if (compoundBlobPassed) {
IE_THROW(NotImplemented) << cannot_set_compound; IE_THROW(NotImplemented) << cannot_set_compound;
} }
if (isDynamic) {
// extract new batch size from blob
if (m_graph->GetMaxDynamicBatchSize() > 1) {
const auto batch_idx = m_graph->GetInputDynBatchDims()[name].first;
if (batch_idx >= 0)
SetBatch(blobDesc.getDims()[batch_idx]);
}
} else {
size_t blobSize = desc.getLayout() != SCALAR size_t blobSize = desc.getLayout() != SCALAR
? details::product(desc.getDims()) ? details::product(desc.getDims())
: 1; : 1;
if (dataSize != blobSize) { if (dataSize != blobSize) {
IE_THROW() << "Input blob size is not equal network input size (" IE_THROW() << "Input blob size is not equal to network input size ("
<< dataSize << "!=" << blobSize << ")."; << dataSize << "!=" << blobSize << ").";
} }
}
if (data->buffer() == nullptr) if (data->buffer() == nullptr)
IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'"; IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'";
@ -347,15 +372,17 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
if (is_remote) { if (is_remote) {
_deviceOutputs[name] = data; _deviceOutputs[name] = data;
} else { } else {
if (!isDynamic) {
size_t outputSize = desc.getLayout() != SCALAR size_t outputSize = desc.getLayout() != SCALAR
? details::product(desc.getDims()) ? details::product(desc.getDims())
: 1; : 1;
if (dataSize != outputSize) { if (dataSize != outputSize) {
IE_THROW() << "Output blob size is not equal network output size (" << dataSize IE_THROW() << "Output blob size is not equal to network output size (" << dataSize
<< "!=" << outputSize << ")."; << "!=" << outputSize << ").";
} }
if (data->buffer() == nullptr) if (data->buffer() == nullptr)
IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'"; IE_THROW(NotAllocated) << str_output_not_allocated << " Output name: \'" << name << "\'";
}
} }
_outputs[name] = data; _outputs[name] = data;
} }
@ -457,6 +484,9 @@ void InferRequest::checkBlobs() {
} else { } else {
IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'"; IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'";
} }
auto node = findInputByNodeName(input.first);
bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
if (!is_dynamic)
checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs); checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs);
} }
for (auto const &output : _outputs) { for (auto const &output : _outputs) {
@ -470,6 +500,9 @@ void InferRequest::checkBlobs() {
} else { } else {
IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'"; IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'";
} }
auto node = findOutputByNodeName(output.first);
bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
if (!is_dynamic)
checkOutputBlob(output.second, output.first, foundOutput); checkOutputBlob(output.second, output.first, foundOutput);
} }
} }
@ -509,9 +542,12 @@ void InferRequest::SetBatch(int new_batch) {
batchOutputs.clear(); batchOutputs.clear();
// tune expected inputs // tune expected inputs
for (auto &input : m_graph->GetInputLayouts()) { for (auto& input : m_graph->GetNetworkInputs()) {
cldnn::tensor dims = input.second.size; auto sz = input.second->getTensorDesc().getDims();
const SizeVector sz = { 1, size_t(dims.feature[0]), size_t(dims.spatial[1]), size_t(dims.spatial[0]) }; const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first;
if (batch_idx >= 0)
sz[batch_idx] = 1;
size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>()); size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
std::vector<buf_info> in_buf; std::vector<buf_info> in_buf;
@ -534,9 +570,11 @@ void InferRequest::SetBatch(int new_batch) {
} }
// tune expected outputs // tune expected outputs
for (auto& no : _networkOutputs) { for (auto& no : m_graph->GetNetworkOutputs()) {
auto sz = m_graph->GetOutputSize(no.first); auto sz = no.second->getTensorDesc().getDims();
sz.front() = 1; const auto batch_idx = m_graph->GetInputDynBatchDims()[no.first].first;
if (batch_idx >= 0)
sz[batch_idx] = 1;
size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>()); size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
std::vector<buf_info> out_buf; std::vector<buf_info> out_buf;
@ -816,6 +854,21 @@ void InferRequest::setup_stream_graph() {
streamID = streamID % numGraphs; streamID = streamID % numGraphs;
} }
m_graph = streamGraphs[streamID]; m_graph = streamGraphs[streamID];
// in case of dynamic batch, check all input blobs and set new batch
if (m_graph->GetMaxDynamicBatchSize() > 1) {
for (auto& input : _networkInputs) {
auto node = findInputByNodeName(input.first);
bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
if (!is_dynamic)
continue;
// extract new batch size from blob
const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first;
if (batch_idx >= 0) {
SetBatch(_inputs[input.first]->getTensorDesc().getDims()[batch_idx]);
break;
}
}
}
} }
Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, std::shared_ptr<InferenceEngine::IAllocator> alloc) { Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, std::shared_ptr<InferenceEngine::IAllocator> alloc) {
@ -968,16 +1021,9 @@ void InferRequest::allocate_inputs() {
void InferRequest::allocate_inputs_dynamic() { void InferRequest::allocate_inputs_dynamic() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic"); OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic");
// allocate inputs // allocate inputs
for (auto &input : m_graph->GetInputLayouts()) { for (auto &input : m_graph->GetNetworkInputs()) {
InputInfo::Ptr ni = _networkInputs.at(input.first); InputInfo::Ptr ni = _networkInputs.at(input.first);
TensorDesc desc = ni->getTensorDesc(); TensorDesc desc = input.second->getTensorDesc();
SizeVector& dims = desc.getDims();
if (!dims.empty()) {
*dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
} else {
IE_THROW() << "Empty dimensions for input blob " << input.first;
}
Blob::Ptr inputBlob = create_host_blob(desc); Blob::Ptr inputBlob = create_host_blob(desc);
if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
@ -1020,17 +1066,10 @@ void InferRequest::allocate_outputs() {
void InferRequest::allocate_outputs_dynamic() { void InferRequest::allocate_outputs_dynamic() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic"); OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic");
// allocate outputs // allocate outputs
for (auto& no : _networkOutputs) { for (auto& no : m_graph->GetNetworkOutputs()) {
std::string outputID = m_graph->MapOutputName(no.first); std::string outputID = m_graph->MapOutputName(no.first);
DataPtr oi = no.second; DataPtr oi = no.second;
TensorDesc desc = oi->getTensorDesc(); TensorDesc desc = oi->getTensorDesc();
SizeVector& dims = desc.getDims();
if (!dims.empty()) {
*dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
} else {
IE_THROW() << "Empty dimensions for output blob " << no.first;
}
Blob::Ptr outputBlob = create_host_blob(desc); Blob::Ptr outputBlob = create_host_blob(desc);
_outputs[no.first] = outputBlob; _outputs[no.first] = outputBlob;

View File

@ -376,16 +376,45 @@ QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network,
} }
auto clonedNetwork = CloneAndTransformNetwork(network, conf); auto clonedNetwork = CloneAndTransformNetwork(network, conf);
auto ops = clonedNetwork.getFunction()->get_ordered_ops(); auto func = clonedNetwork.getFunction();
auto ops = func->get_ordered_ops();
std::unordered_set<std::string> supported; std::unordered_set<std::string> supported;
std::unordered_set<std::string> unsupported; std::unordered_set<std::string> unsupported;
std::unordered_set<std::string> constantsNames; std::unordered_set<std::string> constantsNames;
std::vector<std::shared_ptr<ngraph::Node>> constants; std::vector<std::shared_ptr<ngraph::Node>> constants;
std::map<std::string, ngraph::PartialShape> shapes;
std::map<std::string, std::pair<int64_t, int64_t>> batch_dim;
bool dyn_shape_batch_found = prog.IsDynBatchModel(func, shapes, batch_dim);
auto layerIsSupported = [&](std::shared_ptr<ngraph::Node> node) { auto layerIsSupported = [&](std::shared_ptr<ngraph::Node> node) {
if (node->is_dynamic()) { if (node->is_dynamic()) {
if (!dyn_shape_batch_found)
return false; return false;
auto pshape = node->get_output_partial_shape(0);
if (pshape.rank().is_dynamic())
return false;
int dynCount = 0;
int64_t batch_idx = -1;
for (size_t i = 0; i < pshape.size(); i++) {
if (pshape[i].is_dynamic()) {
dynCount++;
if (batch_idx < 0) {
batch_idx = i;
}
}
}
if (dynCount != 1)
return false; // more than one dimension is dynamic
int64_t max_batch = pshape[batch_idx].get_max_length();
if (max_batch <= 1)
return false;
return true;
} }
if (ngraph::is_type<const ngraph::op::v0::PriorBox>(node) || if (ngraph::is_type<const ngraph::op::v0::PriorBox>(node) ||
ngraph::is_type<const ngraph::op::v0::PriorBoxClustered>(node) || ngraph::is_type<const ngraph::op::v0::PriorBoxClustered>(node) ||
@ -637,6 +666,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
auto closest_pow_of_2 = [] (float x) { auto closest_pow_of_2 = [] (float x) {
return pow(2, floor(log(x)/log(2))); return pow(2, floor(log(x)/log(2)));
}; };
GPU_DEBUG_GET_INSTANCE(debug_config);
auto model_param = options.find("MODEL_PTR"); auto model_param = options.find("MODEL_PTR");
if (model_param == options.end()) { if (model_param == options.end()) {
GPU_DEBUG_IF(debug_config->verbose >= 1) { GPU_DEBUG_IF(debug_config->verbose >= 1) {

View File

@ -5,6 +5,7 @@
#include "intel_gpu/plugin/program.hpp" #include "intel_gpu/plugin/program.hpp"
#include "ngraph/ops.hpp" #include "ngraph/ops.hpp"
#include "ngraph_ops/nms_ie_internal.hpp" #include "ngraph_ops/nms_ie_internal.hpp"
#include "openvino/core/graph_util.hpp"
#include "intel_gpu/plugin/itt.hpp" #include "intel_gpu/plugin/itt.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/debug_configuration.hpp"
@ -56,43 +57,74 @@ void Program::ValidateInputs(const std::shared_ptr<ngraph::Node>& op, std::vecto
<< " op::v" << op->get_type_info().version << ")"; << " op::v" << op->get_type_info().version << ")";
} }
bool Program::CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops, InferenceEngine::InputsDataMap networkInputs) const { auto getParamName = [](const std::shared_ptr<ov::Node>& param) -> std::string {
if (networkInputs.empty()) const auto& names = param->get_output_tensor(0).get_names();
return false; if (!names.empty())
return *names.begin();
else
return param->get_friendly_name();
};
for (auto op : ops) { // detect the only supported dynamic shape case -
// TODO: do we have any other exception cases? // exactly one dimension is dynamic in input params with defined min/max interval
if (std::dynamic_pointer_cast<ngraph::op::v1::Reshape>(op)) { bool Program::IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0]) std::map<std::string, ov::PartialShape>& shapes,
continue; std::map<std::string, std::pair<int64_t, int64_t>>& batch_dim) {
} for (const auto& param : model->get_parameters()) {
auto pname = getParamName(param);
// List of the operations which can lead to invalid dynamic batch processing batch_dim[pname] = { -1, -1 };
if (std::dynamic_pointer_cast<ngraph::op::internal::NonMaxSuppressionIEInternal>(op) || if (param->get_output_partial_shape(0).rank().is_dynamic()) {
std::dynamic_pointer_cast<ngraph::op::v5::NonMaxSuppression>(op) ||
std::dynamic_pointer_cast<ngraph::op::v4::NonMaxSuppression>(op) ||
std::dynamic_pointer_cast<ngraph::op::v3::NonMaxSuppression>(op) ||
std::dynamic_pointer_cast<ngraph::op::v1::NonMaxSuppression>(op) ||
std::dynamic_pointer_cast<ngraph::op::v0::PSROIPooling>(op) ||
std::dynamic_pointer_cast<ngraph::op::v0::ROIPooling>(op) ||
std::dynamic_pointer_cast<ngraph::op::v0::PriorBox>(op) ||
std::dynamic_pointer_cast<ngraph::op::v0::DetectionOutput>(op) ||
std::dynamic_pointer_cast<ngraph::op::v1::Reshape>(op) ||
std::dynamic_pointer_cast<ngraph::op::v0::Squeeze>(op) ||
std::dynamic_pointer_cast<ngraph::op::v0::Unsqueeze>(op) ||
std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(op) ||
std::dynamic_pointer_cast<ngraph::op::v4::Proposal>(op) ||
std::dynamic_pointer_cast<ngraph::op::v0::Proposal>(op)) {
return false; return false;
} }
ov::PartialShape pshape = param->get_output_partial_shape(0);
auto customLayer = m_config.customLayers.find(op->get_type_name()); int dynCount = 0;
if (customLayer != m_config.customLayers.end()) { int64_t batch_idx = -1;
for (size_t i = 0; i < pshape.size(); i++) {
if (pshape[i].is_dynamic()) {
dynCount++;
if (batch_idx < 0) {
batch_idx = i;
}
}
}
switch (dynCount) {
case 1:
// exactly one dynamic dim
{
int64_t max_b = pshape[batch_idx].get_max_length();
if (max_b > 1) {
batch_dim[pname].first = batch_idx;
batch_dim[pname].second = max_b;
pshape[batch_idx] = 1;
}
}
case 0:
// no dynamic dims - possible legacy case
shapes[pname] = pshape;
break;
default:
break;
}
}
if (batch_dim.empty())
return false; return false;
bool dyn_shape_batch_found = false;
// detect 1st dyn dim, mark it and continue
auto bitr = batch_dim.begin();
dyn_shape_batch_found = bitr->second.first >= 0;
auto batch_val_1st = bitr->second.second;
bitr++;
for (; bitr != batch_dim.end(); bitr++) {
if (bitr->second.first >= 0) {
if (bitr->second.second != batch_val_1st) {
dyn_shape_batch_found = false;
break;
} else {
dyn_shape_batch_found = true;
} }
} }
}
return true; return dyn_shape_batch_found;
} }
Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config, Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config,
@ -112,26 +144,136 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::en
auto ops = func->get_ordered_ops(); auto ops = func->get_ordered_ops();
if (m_config.max_dynamic_batch > 1) { bool dyn_shape_batch_found = false;
// check topology for applicability std::map<std::string, ngraph::PartialShape> shapes;
if (!CanProcessDynBatch(ops, networkInputs)) { std::map<std::string, std::pair<int64_t, int64_t>> batch_dim;
IE_THROW() << "Such topology cannot be compiled for dynamic batch!"; if (m_config.enableDynamicBatch) {
// in case of legacy dynamic batch,
// we assume 4D input with 0 batch dim
auto param = func->get_parameters().front();
auto pname = getParamName(param);
shapes[pname] = param->get_output_partial_shape(0);
batch_dim[pname].first = 0;
batch_dim[pname].second = m_config.max_dynamic_batch;
} else {
dyn_shape_batch_found = IsDynBatchModel(func, shapes, batch_dim);
if (dyn_shape_batch_found) {
m_config.max_dynamic_batch = batch_dim.begin()->second.second;
} else {
if (!batch_dim.empty() && shapes.empty()) {
// more than on dynamic dim or dynamic rank
IE_THROW() << "Only dynamic batch is supported!";
}
} }
} }
int m_bv_sz = GetMaxBatchSizeForSingleProgram(); int m_bv_sz = GetMaxBatchSizeForSingleProgram();
m_max_batch = m_config.max_dynamic_batch;
m_max_batch = config.max_dynamic_batch; if (dyn_shape_batch_found || config.max_dynamic_batch > 1) {
// compile log2 networks to serve dynamic batch requests
if (config.max_dynamic_batch > 1) {
for (int b = m_bv_sz - 1; b >= 0; b--) { for (int b = m_bv_sz - 1; b >= 0; b--) {
inputLayouts.clear(); inputLayouts.clear();
outputDims.clear(); outputDims.clear();
primitiveIDs.clear(); primitiveIDs.clear();
blobMemCache.clear(); blobMemCache.clear();
ChangeInputBatch(1U << static_cast<unsigned>(b)); auto new_batch = 1U << static_cast<unsigned>(b);
m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild)); ChangeInputBatch(new_batch);
// clone the source model, find the batch dim
// and reshape the model to next batch size
auto new_func = ov::clone_model(*func);
std::map<ov::Output<ov::Node>, ngraph::PartialShape> new_shapes;
for (const auto& param : new_func->get_parameters()) {
ov::PartialShape pshape = param->get_output_partial_shape(0);
auto pname = getParamName(param);
auto batch_idx = batch_dim[pname].first;
if (batch_idx >= 0) {
auto pshape = shapes[pname];
pshape[batch_idx] = new_batch;
new_shapes[param->output(0)] = pshape;
}
}
new_func->reshape(new_shapes);
// reshape network input/output maps accordingly
// for correct network compilation
for (auto& new_input : new_func->inputs()) {
auto iname = new_input.get_node()->get_friendly_name();
auto it = networkInputs.find(iname);
if (it != networkInputs.end()) {
auto shape = new_input.get_shape();
auto l = it->second->getTensorDesc().getLayout();
it->second->getInputData()->reshape(shape, l);
}
}
for (auto& new_output : new_func->outputs()) {
auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name();
auto it = networkOutputs.find(iname);
if (it != networkOutputs.end()) {
auto shape = new_output.get_shape();
auto l = it->second->getTensorDesc().getLayout();
it->second->reshape(shape, l);
}
}
m_programs.insert(m_programs.begin(), BuildProgram(new_func->get_ordered_ops(), networkInputs, networkOutputs,
createTopologyOnly, partialBuild));
}
{
// recompute maximal dynamic batch inputs/outputs for infer request
// and store them into internal maps
// same operations as above, but for maximum batch
auto new_func = ov::clone_model(*func);
std::map<ov::Output<ov::Node>, ngraph::PartialShape> new_shapes;
for (const auto& param : new_func->get_parameters()) {
ov::PartialShape pshape = param->get_output_partial_shape(0);
auto pname = getParamName(param);
auto batch_idx = batch_dim[pname].first;
if (batch_idx >= 0) {
auto pshape = shapes[pname];
pshape[batch_idx] = m_max_batch;
new_shapes[param->output(0)] = pshape;
}
}
new_func->reshape(new_shapes);
for (auto& new_input : new_func->inputs()) {
auto iname = new_input.get_node()->get_friendly_name();
auto it = networkInputs.find(iname);
if (it != networkInputs.end()) {
auto shape = new_input.get_shape();
auto l = it->second->getTensorDesc().getLayout();
it->second->getInputData()->reshape(shape, l);
}
}
for (auto& new_output : new_func->outputs()) {
auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name();
auto it = networkOutputs.find(iname);
if (it != networkOutputs.end()) {
auto shape = new_output.get_shape();
auto l = it->second->getTensorDesc().getLayout();
SizeVector old_shape = it->second->getTensorDesc().getDims();
it->second->reshape(shape, l);
// detect changed output batch dimension
SizeVector new_shape = it->second->getTensorDesc().getDims();
for (int64_t i = 0; i < old_shape.size(); i++) {
if (old_shape[i] != new_shape[i]) {
m_output_batch_dim[iname] = i;
break;
}
}
}
}
m_networkInputs = networkInputs;
m_networkOutputs = networkOutputs;
m_input_batch_dim = batch_dim;
} }
} else { } else {
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild)); m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));

View File

@ -0,0 +1,118 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/runtime/core.hpp"
#include <common_test_utils/test_common.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/skip_tests_config.hpp"
#include "ngraph_functions/subgraph_builders.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
using namespace ::testing;
using namespace ov::test;
using OVDynamicBatchParams = std::tuple<
std::vector<InputShape>, // dynamic and static case sizes
ElementType, // Network precision
std::string, // Device name
std::map<std::string, std::string> // Config
>;
class OVDynamicBatchShape_Tests : public WithParamInterface<OVDynamicBatchParams>,
virtual public ov::test::SubgraphBaseTest {
public:
static std::string getTestCaseName(TestParamInfo<OVDynamicBatchParams> obj) {
std::vector<InputShape> inputShapes;
ElementType netPrecision;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::tie(inputShapes, netPrecision, targetDevice, configuration) = obj.param;
std::ostringstream result;
result << "IS=";
for (const auto& shape : inputShapes) {
result << CommonTestUtils::partialShape2str({ shape.first }) << "_";
}
result << "TS=";
for (const auto& shape : inputShapes) {
result << "(";
if (!shape.second.empty()) {
for (const auto& itr : shape.second) {
result << CommonTestUtils::vec2str(itr);
}
}
result << ")_";
}
result << "netPRC=" << netPrecision << "_";
result << "targetDevice=" << targetDevice;
if (!configuration.empty()) {
for (auto& configItem : configuration) {
result << "configItem=" << configItem.first << "_" << configItem.second << "_";
}
}
return result.str();
}
void TearDown() override {
core.reset();
}
protected:
void SetUp() override {
if (core)
core.reset();
std::tie(inputShape, netPrecision, targetDevice, configuration) = this->GetParam();
init_input_shapes(inputShape);
//TODO: think how we can switch between several input topologies in the future
// function = ngraph::builder::subgraph::makeSplitConvConcat(inputShape.front().first.get_min_shape(), netPrecision);
function = ngraph::builder::subgraph::makeSplitMultiConvConcat(inputShape.front().first.get_min_shape(), netPrecision);
// make topology dynamic
std::map<std::string, ov::PartialShape> dynShape;
dynShape["input_tensor"] = inputShape.front().first;
function->reshape(dynShape);
}
std::shared_ptr<ov::Model> src_func;
// std::map<std::string, std::string> configuration;
std::vector<InputShape> inputShape;
ElementType netPrecision;
};
TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
core = std::make_shared<ov::runtime::Core>();
run();
}
namespace {
const std::map<std::string, std::string> config = {};
const std::map<std::string, std::string> hetero_config = {
{"TARGET_FALLBACK", CommonTestUtils::DEVICE_GPU}
};
const std::vector<InputShape> inputShapes = {
{ { {1, 19}, 4, 20, 20}, { {1, 4, 20, 20}, {7, 4, 20, 20}, {17, 4, 20, 20} } }
};
const std::vector<ElementType> netPrecisions = {
ElementType::f16,
ElementType::f32
};
INSTANTIATE_TEST_SUITE_P(smoke_GPU_DynBatch, OVDynamicBatchShape_Tests,
::testing::Combine(
::testing::Values(inputShapes),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(config)),
OVDynamicBatchShape_Tests::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_GPU_DynBatchHetero, OVDynamicBatchShape_Tests,
::testing::Combine(
::testing::Values(inputShapes),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_HETERO),
::testing::Values(hetero_config)),
OVDynamicBatchShape_Tests::getTestCaseName);
} // namespace

View File

@ -189,9 +189,11 @@ inline std::shared_ptr<ngraph::Function> makeKSOFunction(std::vector<size_t> inp
return fnPtr; return fnPtr;
} }
inline std::shared_ptr<ngraph::Function> makeSplitMultiConvConcat(std::vector<size_t> inputShape = {1, 4, 20, 20}) { inline std::shared_ptr<ngraph::Function> makeSplitMultiConvConcat(std::vector<size_t> inputShape = {1, 4, 20, 20},
auto ngPrc = ngraph::element::Type_t::f32; ngraph::element::Type_t ngPrc = ngraph::element::Type_t::f32) {
auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
params.front()->set_friendly_name("Param_1");
params.front()->get_output_tensor(0).set_names({ "input_tensor" });
auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1); auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
auto conv1_0 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, auto conv1_0 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},