[GPU] Implement dynamic shape case support for one dimension only (batch) via legacy dynamic batch functionality (#9314)
This commit is contained in:
parent
98cbaf0f08
commit
f2be2c915f
@ -53,6 +53,10 @@ public:
|
|||||||
std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
|
std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
|
||||||
int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
|
int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
|
||||||
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
|
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
|
||||||
|
const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); }
|
||||||
|
const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); }
|
||||||
|
std::map<std::string, std::pair<int64_t, int64_t>> GetInputDynBatchDims() { return m_program->m_input_batch_dim; }
|
||||||
|
std::map<std::string, int64_t> GetOutputDynBatchDims() { return m_program->m_output_batch_dim; }
|
||||||
size_t GetNetworksCount() const { return m_networks.size(); }
|
size_t GetNetworksCount() const { return m_networks.size(); }
|
||||||
std::shared_ptr<cldnn::network> GetNetwork(size_t idx = 0) const;
|
std::shared_ptr<cldnn::network> GetNetwork(size_t idx = 0) const;
|
||||||
InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
|
InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
|
||||||
|
@ -93,6 +93,8 @@ public:
|
|||||||
|
|
||||||
int m_max_batch;
|
int m_max_batch;
|
||||||
int m_curBatch;
|
int m_curBatch;
|
||||||
|
std::map<std::string, std::pair<int64_t, int64_t>> m_input_batch_dim;
|
||||||
|
std::map<std::string, int64_t> m_output_batch_dim;
|
||||||
|
|
||||||
std::shared_ptr<cldnn::program> GetCompiledProgram(int program_id = 0);
|
std::shared_ptr<cldnn::program> GetCompiledProgram(int program_id = 0);
|
||||||
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
|
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
|
||||||
@ -104,6 +106,9 @@ public:
|
|||||||
int GetMaxBatchSizeForSingleProgram();
|
int GetMaxBatchSizeForSingleProgram();
|
||||||
|
|
||||||
bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op);
|
bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op);
|
||||||
|
bool IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
|
||||||
|
std::map<std::string, ov::PartialShape>& shapes,
|
||||||
|
std::map<std::string, std::pair<int64_t, int64_t>>& batch_dim);
|
||||||
|
|
||||||
// Profiling utils
|
// Profiling utils
|
||||||
void InitProfileInfo(const std::string& layerName,
|
void InitProfileInfo(const std::string& layerName,
|
||||||
@ -170,7 +175,6 @@ private:
|
|||||||
bool createTopologyOnly = false, bool partialBuild = false);
|
bool createTopologyOnly = false, bool partialBuild = false);
|
||||||
|
|
||||||
void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
|
void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
|
||||||
bool CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops, InferenceEngine::InputsDataMap networkInputs) const;
|
|
||||||
void ChangeInputBatch(int batch);
|
void ChangeInputBatch(int batch);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -49,6 +49,8 @@ Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context,
|
|||||||
, m_stream_id(stream_id)
|
, m_stream_id(stream_id)
|
||||||
, m_state(0) {
|
, m_state(0) {
|
||||||
m_program = std::make_shared<Program>(network, GetEngine(), m_config);
|
m_program = std::make_shared<Program>(network, GetEngine(), m_config);
|
||||||
|
if (m_program->m_max_batch > 1)
|
||||||
|
m_config.max_dynamic_batch = m_program->m_max_batch;
|
||||||
Build();
|
Build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,14 +171,12 @@ bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) {
|
|||||||
}
|
}
|
||||||
return bufferMem == hostPtr;
|
return bufferMem == hostPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace runtime {
|
namespace runtime {
|
||||||
namespace intel_gpu {
|
namespace intel_gpu {
|
||||||
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------------------- //
|
// ----------------------------------------------------------------------------------------- //
|
||||||
// ---------------------------- IE API impl ------------------------------------------------ //
|
// ---------------------------- IE API impl ------------------------------------------------ //
|
||||||
// ----------------------------------------------------------------------------------------- //
|
// ----------------------------------------------------------------------------------------- //
|
||||||
@ -188,6 +186,8 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
|
|||||||
InputInfo::Ptr foundInput;
|
InputInfo::Ptr foundInput;
|
||||||
DataPtr foundOutput;
|
DataPtr foundOutput;
|
||||||
bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput);
|
bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput);
|
||||||
|
auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name);
|
||||||
|
bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic());
|
||||||
|
|
||||||
if (is_input) {
|
if (is_input) {
|
||||||
// ROI blob is returned only if it was set previously. Otherwise default blob is returned.
|
// ROI blob is returned only if it was set previously. Otherwise default blob is returned.
|
||||||
@ -196,12 +196,21 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
|
|||||||
data = it->second->getRoiBlob();
|
data = it->second->getRoiBlob();
|
||||||
} else {
|
} else {
|
||||||
data = _inputs[name];
|
data = _inputs[name];
|
||||||
|
if (!isDynamic)
|
||||||
checkInputBlob(data, name, foundInput);
|
checkInputBlob(data, name, foundInput);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
data = _outputs[name];
|
data = _outputs[name];
|
||||||
|
if (isDynamic) {
|
||||||
|
if (m_graph->GetMaxDynamicBatchSize() > 1) {
|
||||||
|
SizeVector outDims = data->getTensorDesc().getDims();
|
||||||
|
outDims[m_graph->GetOutputDynBatchDims()[name]] = m_curBatch;
|
||||||
|
data->getTensorDesc().setDims(outDims);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
checkOutputBlob(data, name, foundOutput);
|
checkOutputBlob(data, name, foundOutput);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -243,13 +252,16 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
|||||||
desc.getPrecision().size(),
|
desc.getPrecision().size(),
|
||||||
std::multiplies<size_t>());
|
std::multiplies<size_t>());
|
||||||
bool preProcResize = false;
|
bool preProcResize = false;
|
||||||
|
auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name);
|
||||||
|
bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic());
|
||||||
if (is_input) {
|
if (is_input) {
|
||||||
preProcResize = foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE;
|
preProcResize = foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE;
|
||||||
const auto inputColorFormat = foundInput->getPreProcess().getColorFormat();
|
const auto inputColorFormat = foundInput->getPreProcess().getColorFormat();
|
||||||
preProcResize |= (inputColorFormat != ColorFormat::RAW) && (inputColorFormat != ColorFormat::BGR);
|
preProcResize |= (inputColorFormat != ColorFormat::RAW) && (inputColorFormat != ColorFormat::BGR);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) {
|
if (!isDynamic &&
|
||||||
|
dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) {
|
||||||
IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") <<
|
IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") <<
|
||||||
" blob with name: \'" << name << "\' " <<
|
" blob with name: \'" << name << "\' " <<
|
||||||
"Current: " << dataBinSize << " Required: " << netReqBinSize;
|
"Current: " << dataBinSize << " Required: " << netReqBinSize;
|
||||||
@ -292,7 +304,9 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
|||||||
auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
|
auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
|
||||||
if (y_ptr) {
|
if (y_ptr) {
|
||||||
auto y_impl = getBlobImpl(y_ptr);
|
auto y_impl = getBlobImpl(y_ptr);
|
||||||
|
if (!y_impl->is_allocated()) {
|
||||||
y_impl->allocate();
|
y_impl->allocate();
|
||||||
|
}
|
||||||
_deviceInputs[y_name] = nv12_ptr->y();
|
_deviceInputs[y_name] = nv12_ptr->y();
|
||||||
is_remote = true;
|
is_remote = true;
|
||||||
}
|
}
|
||||||
@ -300,7 +314,9 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
|||||||
auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
|
auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
|
||||||
if (uv_ptr) {
|
if (uv_ptr) {
|
||||||
auto uv_impl = getBlobImpl(uv_ptr);
|
auto uv_impl = getBlobImpl(uv_ptr);
|
||||||
|
if (!uv_impl->is_allocated()) {
|
||||||
uv_impl->allocate();
|
uv_impl->allocate();
|
||||||
|
}
|
||||||
_deviceInputs[uv_name] = nv12_ptr->uv();
|
_deviceInputs[uv_name] = nv12_ptr->uv();
|
||||||
is_remote = true;
|
is_remote = true;
|
||||||
}
|
}
|
||||||
@ -326,13 +342,22 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
|||||||
if (compoundBlobPassed) {
|
if (compoundBlobPassed) {
|
||||||
IE_THROW(NotImplemented) << cannot_set_compound;
|
IE_THROW(NotImplemented) << cannot_set_compound;
|
||||||
}
|
}
|
||||||
|
if (isDynamic) {
|
||||||
|
// extract new batch size from blob
|
||||||
|
if (m_graph->GetMaxDynamicBatchSize() > 1) {
|
||||||
|
const auto batch_idx = m_graph->GetInputDynBatchDims()[name].first;
|
||||||
|
if (batch_idx >= 0)
|
||||||
|
SetBatch(blobDesc.getDims()[batch_idx]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
size_t blobSize = desc.getLayout() != SCALAR
|
size_t blobSize = desc.getLayout() != SCALAR
|
||||||
? details::product(desc.getDims())
|
? details::product(desc.getDims())
|
||||||
: 1;
|
: 1;
|
||||||
if (dataSize != blobSize) {
|
if (dataSize != blobSize) {
|
||||||
IE_THROW() << "Input blob size is not equal network input size ("
|
IE_THROW() << "Input blob size is not equal to network input size ("
|
||||||
<< dataSize << "!=" << blobSize << ").";
|
<< dataSize << "!=" << blobSize << ").";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (data->buffer() == nullptr)
|
if (data->buffer() == nullptr)
|
||||||
IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'";
|
IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'";
|
||||||
@ -347,15 +372,17 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
|||||||
if (is_remote) {
|
if (is_remote) {
|
||||||
_deviceOutputs[name] = data;
|
_deviceOutputs[name] = data;
|
||||||
} else {
|
} else {
|
||||||
|
if (!isDynamic) {
|
||||||
size_t outputSize = desc.getLayout() != SCALAR
|
size_t outputSize = desc.getLayout() != SCALAR
|
||||||
? details::product(desc.getDims())
|
? details::product(desc.getDims())
|
||||||
: 1;
|
: 1;
|
||||||
if (dataSize != outputSize) {
|
if (dataSize != outputSize) {
|
||||||
IE_THROW() << "Output blob size is not equal network output size (" << dataSize
|
IE_THROW() << "Output blob size is not equal to network output size (" << dataSize
|
||||||
<< "!=" << outputSize << ").";
|
<< "!=" << outputSize << ").";
|
||||||
}
|
}
|
||||||
if (data->buffer() == nullptr)
|
if (data->buffer() == nullptr)
|
||||||
IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'";
|
IE_THROW(NotAllocated) << str_output_not_allocated << " Output name: \'" << name << "\'";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_outputs[name] = data;
|
_outputs[name] = data;
|
||||||
}
|
}
|
||||||
@ -457,6 +484,9 @@ void InferRequest::checkBlobs() {
|
|||||||
} else {
|
} else {
|
||||||
IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'";
|
IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'";
|
||||||
}
|
}
|
||||||
|
auto node = findInputByNodeName(input.first);
|
||||||
|
bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
|
||||||
|
if (!is_dynamic)
|
||||||
checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs);
|
checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs);
|
||||||
}
|
}
|
||||||
for (auto const &output : _outputs) {
|
for (auto const &output : _outputs) {
|
||||||
@ -470,6 +500,9 @@ void InferRequest::checkBlobs() {
|
|||||||
} else {
|
} else {
|
||||||
IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'";
|
IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'";
|
||||||
}
|
}
|
||||||
|
auto node = findOutputByNodeName(output.first);
|
||||||
|
bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
|
||||||
|
if (!is_dynamic)
|
||||||
checkOutputBlob(output.second, output.first, foundOutput);
|
checkOutputBlob(output.second, output.first, foundOutput);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -509,9 +542,12 @@ void InferRequest::SetBatch(int new_batch) {
|
|||||||
batchOutputs.clear();
|
batchOutputs.clear();
|
||||||
|
|
||||||
// tune expected inputs
|
// tune expected inputs
|
||||||
for (auto &input : m_graph->GetInputLayouts()) {
|
for (auto& input : m_graph->GetNetworkInputs()) {
|
||||||
cldnn::tensor dims = input.second.size;
|
auto sz = input.second->getTensorDesc().getDims();
|
||||||
const SizeVector sz = { 1, size_t(dims.feature[0]), size_t(dims.spatial[1]), size_t(dims.spatial[0]) };
|
const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first;
|
||||||
|
if (batch_idx >= 0)
|
||||||
|
sz[batch_idx] = 1;
|
||||||
|
|
||||||
size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
|
size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
|
||||||
std::vector<buf_info> in_buf;
|
std::vector<buf_info> in_buf;
|
||||||
|
|
||||||
@ -534,9 +570,11 @@ void InferRequest::SetBatch(int new_batch) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// tune expected outputs
|
// tune expected outputs
|
||||||
for (auto& no : _networkOutputs) {
|
for (auto& no : m_graph->GetNetworkOutputs()) {
|
||||||
auto sz = m_graph->GetOutputSize(no.first);
|
auto sz = no.second->getTensorDesc().getDims();
|
||||||
sz.front() = 1;
|
const auto batch_idx = m_graph->GetInputDynBatchDims()[no.first].first;
|
||||||
|
if (batch_idx >= 0)
|
||||||
|
sz[batch_idx] = 1;
|
||||||
size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
|
size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
|
||||||
std::vector<buf_info> out_buf;
|
std::vector<buf_info> out_buf;
|
||||||
|
|
||||||
@ -816,6 +854,21 @@ void InferRequest::setup_stream_graph() {
|
|||||||
streamID = streamID % numGraphs;
|
streamID = streamID % numGraphs;
|
||||||
}
|
}
|
||||||
m_graph = streamGraphs[streamID];
|
m_graph = streamGraphs[streamID];
|
||||||
|
// in case of dynamic batch, check all input blobs and set new batch
|
||||||
|
if (m_graph->GetMaxDynamicBatchSize() > 1) {
|
||||||
|
for (auto& input : _networkInputs) {
|
||||||
|
auto node = findInputByNodeName(input.first);
|
||||||
|
bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic());
|
||||||
|
if (!is_dynamic)
|
||||||
|
continue;
|
||||||
|
// extract new batch size from blob
|
||||||
|
const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first;
|
||||||
|
if (batch_idx >= 0) {
|
||||||
|
SetBatch(_inputs[input.first]->getTensorDesc().getDims()[batch_idx]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, std::shared_ptr<InferenceEngine::IAllocator> alloc) {
|
Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, std::shared_ptr<InferenceEngine::IAllocator> alloc) {
|
||||||
@ -968,16 +1021,9 @@ void InferRequest::allocate_inputs() {
|
|||||||
void InferRequest::allocate_inputs_dynamic() {
|
void InferRequest::allocate_inputs_dynamic() {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic");
|
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic");
|
||||||
// allocate inputs
|
// allocate inputs
|
||||||
for (auto &input : m_graph->GetInputLayouts()) {
|
for (auto &input : m_graph->GetNetworkInputs()) {
|
||||||
InputInfo::Ptr ni = _networkInputs.at(input.first);
|
InputInfo::Ptr ni = _networkInputs.at(input.first);
|
||||||
TensorDesc desc = ni->getTensorDesc();
|
TensorDesc desc = input.second->getTensorDesc();
|
||||||
SizeVector& dims = desc.getDims();
|
|
||||||
|
|
||||||
if (!dims.empty()) {
|
|
||||||
*dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "Empty dimensions for input blob " << input.first;
|
|
||||||
}
|
|
||||||
|
|
||||||
Blob::Ptr inputBlob = create_host_blob(desc);
|
Blob::Ptr inputBlob = create_host_blob(desc);
|
||||||
if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
|
if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
|
||||||
@ -1020,17 +1066,10 @@ void InferRequest::allocate_outputs() {
|
|||||||
void InferRequest::allocate_outputs_dynamic() {
|
void InferRequest::allocate_outputs_dynamic() {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic");
|
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic");
|
||||||
// allocate outputs
|
// allocate outputs
|
||||||
for (auto& no : _networkOutputs) {
|
for (auto& no : m_graph->GetNetworkOutputs()) {
|
||||||
std::string outputID = m_graph->MapOutputName(no.first);
|
std::string outputID = m_graph->MapOutputName(no.first);
|
||||||
DataPtr oi = no.second;
|
DataPtr oi = no.second;
|
||||||
TensorDesc desc = oi->getTensorDesc();
|
TensorDesc desc = oi->getTensorDesc();
|
||||||
SizeVector& dims = desc.getDims();
|
|
||||||
|
|
||||||
if (!dims.empty()) {
|
|
||||||
*dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "Empty dimensions for output blob " << no.first;
|
|
||||||
}
|
|
||||||
|
|
||||||
Blob::Ptr outputBlob = create_host_blob(desc);
|
Blob::Ptr outputBlob = create_host_blob(desc);
|
||||||
_outputs[no.first] = outputBlob;
|
_outputs[no.first] = outputBlob;
|
||||||
|
@ -376,16 +376,45 @@ QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network,
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto clonedNetwork = CloneAndTransformNetwork(network, conf);
|
auto clonedNetwork = CloneAndTransformNetwork(network, conf);
|
||||||
auto ops = clonedNetwork.getFunction()->get_ordered_ops();
|
auto func = clonedNetwork.getFunction();
|
||||||
|
auto ops = func->get_ordered_ops();
|
||||||
std::unordered_set<std::string> supported;
|
std::unordered_set<std::string> supported;
|
||||||
std::unordered_set<std::string> unsupported;
|
std::unordered_set<std::string> unsupported;
|
||||||
|
|
||||||
std::unordered_set<std::string> constantsNames;
|
std::unordered_set<std::string> constantsNames;
|
||||||
std::vector<std::shared_ptr<ngraph::Node>> constants;
|
std::vector<std::shared_ptr<ngraph::Node>> constants;
|
||||||
|
|
||||||
|
std::map<std::string, ngraph::PartialShape> shapes;
|
||||||
|
std::map<std::string, std::pair<int64_t, int64_t>> batch_dim;
|
||||||
|
bool dyn_shape_batch_found = prog.IsDynBatchModel(func, shapes, batch_dim);
|
||||||
auto layerIsSupported = [&](std::shared_ptr<ngraph::Node> node) {
|
auto layerIsSupported = [&](std::shared_ptr<ngraph::Node> node) {
|
||||||
if (node->is_dynamic()) {
|
if (node->is_dynamic()) {
|
||||||
|
if (!dyn_shape_batch_found)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
auto pshape = node->get_output_partial_shape(0);
|
||||||
|
if (pshape.rank().is_dynamic())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int dynCount = 0;
|
||||||
|
int64_t batch_idx = -1;
|
||||||
|
for (size_t i = 0; i < pshape.size(); i++) {
|
||||||
|
if (pshape[i].is_dynamic()) {
|
||||||
|
dynCount++;
|
||||||
|
if (batch_idx < 0) {
|
||||||
|
batch_idx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dynCount != 1)
|
||||||
|
return false; // more than one dimension is dynamic
|
||||||
|
|
||||||
|
int64_t max_batch = pshape[batch_idx].get_max_length();
|
||||||
|
if (max_batch <= 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
if (ngraph::is_type<const ngraph::op::v0::PriorBox>(node) ||
|
if (ngraph::is_type<const ngraph::op::v0::PriorBox>(node) ||
|
||||||
ngraph::is_type<const ngraph::op::v0::PriorBoxClustered>(node) ||
|
ngraph::is_type<const ngraph::op::v0::PriorBoxClustered>(node) ||
|
||||||
@ -637,6 +666,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
|
|||||||
auto closest_pow_of_2 = [] (float x) {
|
auto closest_pow_of_2 = [] (float x) {
|
||||||
return pow(2, floor(log(x)/log(2)));
|
return pow(2, floor(log(x)/log(2)));
|
||||||
};
|
};
|
||||||
|
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||||
auto model_param = options.find("MODEL_PTR");
|
auto model_param = options.find("MODEL_PTR");
|
||||||
if (model_param == options.end()) {
|
if (model_param == options.end()) {
|
||||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "intel_gpu/plugin/program.hpp"
|
#include "intel_gpu/plugin/program.hpp"
|
||||||
#include "ngraph/ops.hpp"
|
#include "ngraph/ops.hpp"
|
||||||
#include "ngraph_ops/nms_ie_internal.hpp"
|
#include "ngraph_ops/nms_ie_internal.hpp"
|
||||||
|
#include "openvino/core/graph_util.hpp"
|
||||||
#include "intel_gpu/plugin/itt.hpp"
|
#include "intel_gpu/plugin/itt.hpp"
|
||||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||||
|
|
||||||
@ -56,43 +57,74 @@ void Program::ValidateInputs(const std::shared_ptr<ngraph::Node>& op, std::vecto
|
|||||||
<< " op::v" << op->get_type_info().version << ")";
|
<< " op::v" << op->get_type_info().version << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Program::CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops, InferenceEngine::InputsDataMap networkInputs) const {
|
auto getParamName = [](const std::shared_ptr<ov::Node>& param) -> std::string {
|
||||||
if (networkInputs.empty())
|
const auto& names = param->get_output_tensor(0).get_names();
|
||||||
return false;
|
if (!names.empty())
|
||||||
|
return *names.begin();
|
||||||
|
else
|
||||||
|
return param->get_friendly_name();
|
||||||
|
};
|
||||||
|
|
||||||
for (auto op : ops) {
|
// detect the only supported dynamic shape case -
|
||||||
// TODO: do we have any other exception cases?
|
// exactly one dimension is dynamic in input params with defined min/max interval
|
||||||
if (std::dynamic_pointer_cast<ngraph::op::v1::Reshape>(op)) {
|
bool Program::IsDynBatchModel(const std::shared_ptr<ov::Model>& model,
|
||||||
if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0])
|
std::map<std::string, ov::PartialShape>& shapes,
|
||||||
continue;
|
std::map<std::string, std::pair<int64_t, int64_t>>& batch_dim) {
|
||||||
}
|
for (const auto& param : model->get_parameters()) {
|
||||||
|
auto pname = getParamName(param);
|
||||||
// List of the operations which can lead to invalid dynamic batch processing
|
batch_dim[pname] = { -1, -1 };
|
||||||
if (std::dynamic_pointer_cast<ngraph::op::internal::NonMaxSuppressionIEInternal>(op) ||
|
if (param->get_output_partial_shape(0).rank().is_dynamic()) {
|
||||||
std::dynamic_pointer_cast<ngraph::op::v5::NonMaxSuppression>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v4::NonMaxSuppression>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v3::NonMaxSuppression>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v1::NonMaxSuppression>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v0::PSROIPooling>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v0::ROIPooling>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v0::PriorBox>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v0::DetectionOutput>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v1::Reshape>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v0::Squeeze>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v0::Unsqueeze>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v4::Proposal>(op) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::v0::Proposal>(op)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
ov::PartialShape pshape = param->get_output_partial_shape(0);
|
||||||
auto customLayer = m_config.customLayers.find(op->get_type_name());
|
int dynCount = 0;
|
||||||
if (customLayer != m_config.customLayers.end()) {
|
int64_t batch_idx = -1;
|
||||||
|
for (size_t i = 0; i < pshape.size(); i++) {
|
||||||
|
if (pshape[i].is_dynamic()) {
|
||||||
|
dynCount++;
|
||||||
|
if (batch_idx < 0) {
|
||||||
|
batch_idx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch (dynCount) {
|
||||||
|
case 1:
|
||||||
|
// exactly one dynamic dim
|
||||||
|
{
|
||||||
|
int64_t max_b = pshape[batch_idx].get_max_length();
|
||||||
|
if (max_b > 1) {
|
||||||
|
batch_dim[pname].first = batch_idx;
|
||||||
|
batch_dim[pname].second = max_b;
|
||||||
|
pshape[batch_idx] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 0:
|
||||||
|
// no dynamic dims - possible legacy case
|
||||||
|
shapes[pname] = pshape;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (batch_dim.empty())
|
||||||
return false;
|
return false;
|
||||||
|
bool dyn_shape_batch_found = false;
|
||||||
|
// detect 1st dyn dim, mark it and continue
|
||||||
|
auto bitr = batch_dim.begin();
|
||||||
|
dyn_shape_batch_found = bitr->second.first >= 0;
|
||||||
|
auto batch_val_1st = bitr->second.second;
|
||||||
|
bitr++;
|
||||||
|
for (; bitr != batch_dim.end(); bitr++) {
|
||||||
|
if (bitr->second.first >= 0) {
|
||||||
|
if (bitr->second.second != batch_val_1st) {
|
||||||
|
dyn_shape_batch_found = false;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
dyn_shape_batch_found = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return true;
|
return dyn_shape_batch_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config,
|
Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config,
|
||||||
@ -112,26 +144,136 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::en
|
|||||||
|
|
||||||
auto ops = func->get_ordered_ops();
|
auto ops = func->get_ordered_ops();
|
||||||
|
|
||||||
if (m_config.max_dynamic_batch > 1) {
|
bool dyn_shape_batch_found = false;
|
||||||
// check topology for applicability
|
std::map<std::string, ngraph::PartialShape> shapes;
|
||||||
if (!CanProcessDynBatch(ops, networkInputs)) {
|
std::map<std::string, std::pair<int64_t, int64_t>> batch_dim;
|
||||||
IE_THROW() << "Such topology cannot be compiled for dynamic batch!";
|
if (m_config.enableDynamicBatch) {
|
||||||
|
// in case of legacy dynamic batch,
|
||||||
|
// we assume 4D input with 0 batch dim
|
||||||
|
auto param = func->get_parameters().front();
|
||||||
|
auto pname = getParamName(param);
|
||||||
|
shapes[pname] = param->get_output_partial_shape(0);
|
||||||
|
batch_dim[pname].first = 0;
|
||||||
|
batch_dim[pname].second = m_config.max_dynamic_batch;
|
||||||
|
} else {
|
||||||
|
dyn_shape_batch_found = IsDynBatchModel(func, shapes, batch_dim);
|
||||||
|
if (dyn_shape_batch_found) {
|
||||||
|
m_config.max_dynamic_batch = batch_dim.begin()->second.second;
|
||||||
|
} else {
|
||||||
|
if (!batch_dim.empty() && shapes.empty()) {
|
||||||
|
// more than on dynamic dim or dynamic rank
|
||||||
|
IE_THROW() << "Only dynamic batch is supported!";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int m_bv_sz = GetMaxBatchSizeForSingleProgram();
|
int m_bv_sz = GetMaxBatchSizeForSingleProgram();
|
||||||
|
m_max_batch = m_config.max_dynamic_batch;
|
||||||
|
|
||||||
m_max_batch = config.max_dynamic_batch;
|
if (dyn_shape_batch_found || config.max_dynamic_batch > 1) {
|
||||||
|
// compile log2 networks to serve dynamic batch requests
|
||||||
if (config.max_dynamic_batch > 1) {
|
|
||||||
for (int b = m_bv_sz - 1; b >= 0; b--) {
|
for (int b = m_bv_sz - 1; b >= 0; b--) {
|
||||||
inputLayouts.clear();
|
inputLayouts.clear();
|
||||||
outputDims.clear();
|
outputDims.clear();
|
||||||
primitiveIDs.clear();
|
primitiveIDs.clear();
|
||||||
blobMemCache.clear();
|
blobMemCache.clear();
|
||||||
|
|
||||||
ChangeInputBatch(1U << static_cast<unsigned>(b));
|
auto new_batch = 1U << static_cast<unsigned>(b);
|
||||||
m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));
|
ChangeInputBatch(new_batch);
|
||||||
|
|
||||||
|
// clone the source model, find the batch dim
|
||||||
|
// and reshape the model to next batch size
|
||||||
|
auto new_func = ov::clone_model(*func);
|
||||||
|
std::map<ov::Output<ov::Node>, ngraph::PartialShape> new_shapes;
|
||||||
|
for (const auto& param : new_func->get_parameters()) {
|
||||||
|
ov::PartialShape pshape = param->get_output_partial_shape(0);
|
||||||
|
|
||||||
|
auto pname = getParamName(param);
|
||||||
|
auto batch_idx = batch_dim[pname].first;
|
||||||
|
|
||||||
|
if (batch_idx >= 0) {
|
||||||
|
auto pshape = shapes[pname];
|
||||||
|
pshape[batch_idx] = new_batch;
|
||||||
|
new_shapes[param->output(0)] = pshape;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new_func->reshape(new_shapes);
|
||||||
|
|
||||||
|
// reshape network input/output maps accordingly
|
||||||
|
// for correct network compilation
|
||||||
|
for (auto& new_input : new_func->inputs()) {
|
||||||
|
auto iname = new_input.get_node()->get_friendly_name();
|
||||||
|
auto it = networkInputs.find(iname);
|
||||||
|
if (it != networkInputs.end()) {
|
||||||
|
auto shape = new_input.get_shape();
|
||||||
|
auto l = it->second->getTensorDesc().getLayout();
|
||||||
|
it->second->getInputData()->reshape(shape, l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& new_output : new_func->outputs()) {
|
||||||
|
auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name();
|
||||||
|
auto it = networkOutputs.find(iname);
|
||||||
|
if (it != networkOutputs.end()) {
|
||||||
|
auto shape = new_output.get_shape();
|
||||||
|
auto l = it->second->getTensorDesc().getLayout();
|
||||||
|
it->second->reshape(shape, l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_programs.insert(m_programs.begin(), BuildProgram(new_func->get_ordered_ops(), networkInputs, networkOutputs,
|
||||||
|
createTopologyOnly, partialBuild));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
// recompute maximal dynamic batch inputs/outputs for infer request
|
||||||
|
// and store them into internal maps
|
||||||
|
// same operations as above, but for maximum batch
|
||||||
|
auto new_func = ov::clone_model(*func);
|
||||||
|
std::map<ov::Output<ov::Node>, ngraph::PartialShape> new_shapes;
|
||||||
|
for (const auto& param : new_func->get_parameters()) {
|
||||||
|
ov::PartialShape pshape = param->get_output_partial_shape(0);
|
||||||
|
|
||||||
|
auto pname = getParamName(param);
|
||||||
|
auto batch_idx = batch_dim[pname].first;
|
||||||
|
|
||||||
|
if (batch_idx >= 0) {
|
||||||
|
auto pshape = shapes[pname];
|
||||||
|
pshape[batch_idx] = m_max_batch;
|
||||||
|
new_shapes[param->output(0)] = pshape;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new_func->reshape(new_shapes);
|
||||||
|
|
||||||
|
for (auto& new_input : new_func->inputs()) {
|
||||||
|
auto iname = new_input.get_node()->get_friendly_name();
|
||||||
|
auto it = networkInputs.find(iname);
|
||||||
|
if (it != networkInputs.end()) {
|
||||||
|
auto shape = new_input.get_shape();
|
||||||
|
auto l = it->second->getTensorDesc().getLayout();
|
||||||
|
it->second->getInputData()->reshape(shape, l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& new_output : new_func->outputs()) {
|
||||||
|
auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name();
|
||||||
|
auto it = networkOutputs.find(iname);
|
||||||
|
if (it != networkOutputs.end()) {
|
||||||
|
auto shape = new_output.get_shape();
|
||||||
|
auto l = it->second->getTensorDesc().getLayout();
|
||||||
|
SizeVector old_shape = it->second->getTensorDesc().getDims();
|
||||||
|
it->second->reshape(shape, l);
|
||||||
|
// detect changed output batch dimension
|
||||||
|
SizeVector new_shape = it->second->getTensorDesc().getDims();
|
||||||
|
for (int64_t i = 0; i < old_shape.size(); i++) {
|
||||||
|
if (old_shape[i] != new_shape[i]) {
|
||||||
|
m_output_batch_dim[iname] = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_networkInputs = networkInputs;
|
||||||
|
m_networkOutputs = networkOutputs;
|
||||||
|
m_input_batch_dim = batch_dim;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));
|
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild));
|
||||||
|
@ -0,0 +1,118 @@
|
|||||||
|
// Copyright (C) 2018-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "openvino/runtime/core.hpp"
|
||||||
|
#include <common_test_utils/test_common.hpp>
|
||||||
|
#include "common_test_utils/common_utils.hpp"
|
||||||
|
#include "functional_test_utils/skip_tests_config.hpp"
|
||||||
|
#include "ngraph_functions/subgraph_builders.hpp"
|
||||||
|
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||||
|
|
||||||
|
using namespace ::testing;
|
||||||
|
using namespace ov::test;
|
||||||
|
|
||||||
|
using OVDynamicBatchParams = std::tuple<
|
||||||
|
std::vector<InputShape>, // dynamic and static case sizes
|
||||||
|
ElementType, // Network precision
|
||||||
|
std::string, // Device name
|
||||||
|
std::map<std::string, std::string> // Config
|
||||||
|
>;
|
||||||
|
|
||||||
|
class OVDynamicBatchShape_Tests : public WithParamInterface<OVDynamicBatchParams>,
|
||||||
|
virtual public ov::test::SubgraphBaseTest {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(TestParamInfo<OVDynamicBatchParams> obj) {
|
||||||
|
std::vector<InputShape> inputShapes;
|
||||||
|
ElementType netPrecision;
|
||||||
|
std::string targetDevice;
|
||||||
|
std::map<std::string, std::string> configuration;
|
||||||
|
std::tie(inputShapes, netPrecision, targetDevice, configuration) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << "IS=";
|
||||||
|
for (const auto& shape : inputShapes) {
|
||||||
|
result << CommonTestUtils::partialShape2str({ shape.first }) << "_";
|
||||||
|
}
|
||||||
|
result << "TS=";
|
||||||
|
for (const auto& shape : inputShapes) {
|
||||||
|
result << "(";
|
||||||
|
if (!shape.second.empty()) {
|
||||||
|
for (const auto& itr : shape.second) {
|
||||||
|
result << CommonTestUtils::vec2str(itr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result << ")_";
|
||||||
|
}
|
||||||
|
result << "netPRC=" << netPrecision << "_";
|
||||||
|
result << "targetDevice=" << targetDevice;
|
||||||
|
if (!configuration.empty()) {
|
||||||
|
for (auto& configItem : configuration) {
|
||||||
|
result << "configItem=" << configItem.first << "_" << configItem.second << "_";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
void TearDown() override {
|
||||||
|
core.reset();
|
||||||
|
}
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
if (core)
|
||||||
|
core.reset();
|
||||||
|
std::tie(inputShape, netPrecision, targetDevice, configuration) = this->GetParam();
|
||||||
|
|
||||||
|
init_input_shapes(inputShape);
|
||||||
|
//TODO: think how we can switch between several input topologies in the future
|
||||||
|
// function = ngraph::builder::subgraph::makeSplitConvConcat(inputShape.front().first.get_min_shape(), netPrecision);
|
||||||
|
function = ngraph::builder::subgraph::makeSplitMultiConvConcat(inputShape.front().first.get_min_shape(), netPrecision);
|
||||||
|
|
||||||
|
// make topology dynamic
|
||||||
|
std::map<std::string, ov::PartialShape> dynShape;
|
||||||
|
dynShape["input_tensor"] = inputShape.front().first;
|
||||||
|
function->reshape(dynShape);
|
||||||
|
}
|
||||||
|
std::shared_ptr<ov::Model> src_func;
|
||||||
|
// std::map<std::string, std::string> configuration;
|
||||||
|
std::vector<InputShape> inputShape;
|
||||||
|
ElementType netPrecision;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
core = std::make_shared<ov::runtime::Core>();
|
||||||
|
run();
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const std::map<std::string, std::string> config = {};
|
||||||
|
|
||||||
|
const std::map<std::string, std::string> hetero_config = {
|
||||||
|
{"TARGET_FALLBACK", CommonTestUtils::DEVICE_GPU}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<InputShape> inputShapes = {
|
||||||
|
{ { {1, 19}, 4, 20, 20}, { {1, 4, 20, 20}, {7, 4, 20, 20}, {17, 4, 20, 20} } }
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ElementType> netPrecisions = {
|
||||||
|
ElementType::f16,
|
||||||
|
ElementType::f32
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_GPU_DynBatch, OVDynamicBatchShape_Tests,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::Values(inputShapes),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||||
|
::testing::Values(config)),
|
||||||
|
OVDynamicBatchShape_Tests::getTestCaseName);
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_GPU_DynBatchHetero, OVDynamicBatchShape_Tests,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::Values(inputShapes),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_HETERO),
|
||||||
|
::testing::Values(hetero_config)),
|
||||||
|
OVDynamicBatchShape_Tests::getTestCaseName);
|
||||||
|
} // namespace
|
@ -189,9 +189,11 @@ inline std::shared_ptr<ngraph::Function> makeKSOFunction(std::vector<size_t> inp
|
|||||||
return fnPtr;
|
return fnPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<ngraph::Function> makeSplitMultiConvConcat(std::vector<size_t> inputShape = {1, 4, 20, 20}) {
|
inline std::shared_ptr<ngraph::Function> makeSplitMultiConvConcat(std::vector<size_t> inputShape = {1, 4, 20, 20},
|
||||||
auto ngPrc = ngraph::element::Type_t::f32;
|
ngraph::element::Type_t ngPrc = ngraph::element::Type_t::f32) {
|
||||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||||
|
params.front()->set_friendly_name("Param_1");
|
||||||
|
params.front()->get_output_tensor(0).set_names({ "input_tensor" });
|
||||||
auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
|
auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
|
||||||
|
|
||||||
auto conv1_0 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
|
auto conv1_0 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
|
||||||
|
Loading…
Reference in New Issue
Block a user