Auto batching improved tests (#11179)

* wip remote tests2, fixed smoke_canInferOnUserContext

* completed the OV 1.0 tests for remote blobs

* updated OV 2.0 tests for remote blobs with auto-batching (using the ngraph func that is reshape-able by the batch)

* re-using the DetectionOutput-based ngraph func that is 100% batch-reshapeble
This commit is contained in:
Maxim Shevtsov
2022-03-24 16:23:00 +03:00
committed by GitHub
parent b5dbabe41d
commit 7dc1d0935c
5 changed files with 64 additions and 85 deletions

View File

@@ -19,6 +19,7 @@
#include <transformations/common_optimizations/divide_fusion.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "ngraph_functions/subgraph_builders.hpp"
using namespace testing;
@@ -239,48 +240,8 @@ TEST(TransformationTests, AutoBatch_FindBatch_NegativeTracking) {
}
TEST(TransformationTests, AutoBatch_FindBatch_AutoBatch_LabelPropagation_DO_detachment) {
const auto& data = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{1, 4, 10, 10});
const auto& constant_0 = std::make_shared<ov::opset1::Constant>(ov::element::f32, ov::Shape{1, 1, 1, 1});
const auto& mul_0 = std::make_shared<ov::opset1::Multiply>(data, constant_0);
const auto& filters = std::make_shared<ov::opset1::Constant>(ov::element::f32, ov::Shape{1, 4, 1, 1});
const auto& conv = std::make_shared<ov::opset1::Convolution>(
mul_0, filters, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0}, ov::Strides{1, 1});
const auto& box_logits_reshape = std::make_shared<ov::opset1::Constant>(
ov::element::i64, ov::Shape{2}, std::vector<int64_t>{0, -1});
const auto& box_logits = std::make_shared<ov::opset1::Reshape>(conv, box_logits_reshape, true);
const auto& four_times = std::make_shared<ov::opset1::Tile>(box_logits, std::make_shared<ov::opset1::Constant>(
ov::element::i64, ov::Shape{2}, std::vector<int64_t>{1, 4}));
const auto& third_input_reshape = std::make_shared<ov::opset1::Constant>(
ov::element::i64, ov::Shape{3}, std::vector<int64_t>{0, 1, -1});
const auto& third_input = std::make_shared<ov::opset1::Reshape>(four_times, third_input_reshape, true);
ngraph::op::DetectionOutput::Attributes attr;
attr.num_classes = 4;
attr.background_label_id = 0;
attr.top_k = 75;
attr.variance_encoded_in_target = true;
attr.keep_top_k = {50};
attr.code_type = std::string{"caffe.PriorBoxParameter.CORNER"};
attr.share_location = true;
attr.nms_threshold = 0.5f;
attr.confidence_threshold = 0.5f;
attr.clip_after_nms = false;
attr.clip_before_nms = false;
attr.decrease_label_id = false;
attr.normalized = true;
attr.input_height = 1;
attr.input_width = 1;
attr.objectness_score = 0.4f;
const auto& detection = std::make_shared<ov::opset1::DetectionOutput>(four_times, four_times, third_input, attr);
const auto& convert = std::make_shared<ov::opset1::Convert>(detection, ov::element::f32);
const auto& f = std::make_shared<ov::Model>(ov::NodeVector{convert}, ov::ParameterVector{data});
auto f = ngraph::builder::subgraph::makeDetectionOutput();
auto & data = f->get_parameters()[0];
ov::pass::Manager m;
m.register_pass<ngraph::pass::InitNodeInfo>();

View File

@@ -15,6 +15,7 @@
#include <common_test_utils/test_common.hpp>
#include <functional_test_utils/plugin_cache.hpp>
#include "base/ov_behavior_test_utils.hpp"
#include "ngraph_functions/subgraph_builders.hpp"
#include "functional_test_utils/blob_utils.hpp"
@@ -30,13 +31,16 @@ protected:
public:
void SetUp() override {
fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
deviceName = CommonTestUtils::DEVICE_GPU;
auto with_auto_batching = this->GetParam();
if (with_auto_batching) { // BATCH:GPU
deviceName = std::string(CommonTestUtils::DEVICE_BATCH) + ":" + deviceName;
config = {{CONFIG_KEY(ALLOW_AUTO_BATCHING), CONFIG_VALUE(YES)}};
}
config =
{{CONFIG_KEY(PERFORMANCE_HINT) , CONFIG_VALUE(THROUGHPUT)},
// immediate timeout to avoid increasing the test time
{CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "0"},
};
}
fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(with_auto_batching ? CommonTestUtils::DEVICE_BATCH : deviceName);
}
static std::string getTestCaseName(const testing::TestParamInfo<bool>& obj) {
auto with_auto_batch = obj.param;
@@ -55,7 +59,7 @@ TEST_P(RemoteBlob_Test, smoke_canInputUserBlob) {
// TODO: Issue: investigate issue with IECore
auto ie = InferenceEngine::Core();
auto exec_net = ie.LoadNetwork(net, deviceName);
auto exec_net = ie.LoadNetwork(net, deviceName, config);
// regular inference
auto inf_req_regular = exec_net.CreateInferRequest();
@@ -169,7 +173,7 @@ TEST_P(RemoteBlob_Test, smoke_canInputPluginRemoteBlob) {
// TODO: Issue: investigate issue with IECore
auto ie = InferenceEngine::Core();
auto exec_net = ie.LoadNetwork(net, deviceName);
auto exec_net = ie.LoadNetwork(net, deviceName, config);
// regular inference
auto inf_req_regular = exec_net.CreateInferRequest();
@@ -213,7 +217,6 @@ TEST_P(RemoteBlob_Test, smoke_canInputPluginRemoteBlob) {
TEST_P(RemoteBlob_Test, smoke_canInferOnUserContext) {
auto fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
CNNNetwork net(fn_ptr);
net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
@@ -237,7 +240,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserContext) {
auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_context.get());
// since there is no way to enable the Auto-Batching thru the device name when loading with the RemoteContext
// (as the device name is deduced from the context, which is the "GPU")
// the only-way to test the auto-batching is explicit config with ALLOW_AUTO_BATCHING set to YES
// the only-way to test the auto-batching is explicit config with perf hint set to THROUGHPUT
auto exec_net_shared = ie->LoadNetwork(net, remote_context, config);
auto inf_req_shared = exec_net_shared.CreateInferRequest();
inf_req_shared.SetBlob(net.getInputsInfo().begin()->first, fakeImageData);
@@ -258,7 +261,6 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_out_of_order) {
#if defined _WIN32
GTEST_SKIP();
#endif
auto fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
CNNNetwork net(fn_ptr);
net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
@@ -291,7 +293,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_out_of_order) {
// In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
// without calling thread blocks
auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
auto exec_net_shared = ie->LoadNetwork(net, remote_context);
auto exec_net_shared = ie->LoadNetwork(net, remote_context); // no auto-batching support, so no config is passed
auto inf_req_shared = exec_net_shared.CreateInferRequest();
// Allocate shared buffers for input and output data which will be set to infer request
@@ -350,7 +352,6 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_in_order) {
#if defined _WIN32
GTEST_SKIP();
#endif
auto fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
CNNNetwork net(fn_ptr);
net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
@@ -384,7 +385,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_in_order) {
// In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
// without calling thread blocks
auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
auto exec_net_shared = ie->LoadNetwork(net, remote_context);
auto exec_net_shared = ie->LoadNetwork(net, remote_context); // no auto-batching support, so no config is passed
auto inf_req_shared = exec_net_shared.CreateInferRequest();
// Allocate shared buffers for input and output data which will be set to infer request

View File

@@ -14,6 +14,7 @@
#include <remote_blob_tests/remote_blob_helpers.hpp>
#include <common_test_utils/test_common.hpp>
#include <functional_test_utils/plugin_cache.hpp>
#include "base/ov_behavior_test_utils.hpp"
#include "ngraph_functions/subgraph_builders.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "openvino/core/preprocess/pre_post_process.hpp"
@@ -62,16 +63,22 @@ class OVRemoteTensorInputBlob_Test : public OVRemoteTensor_Test,
protected:
std::shared_ptr<ngraph::Function> fn_ptr;
std::string deviceName;
ov::AnyMap config;
public:
void SetUp() override {
fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
deviceName = CommonTestUtils::DEVICE_GPU;
RemoteTensorSharingType sharing_type;
bool with_auto_batching;
std::tie(sharing_type, with_auto_batching) = this->GetParam();
if (with_auto_batching) // BATCH:GPU
deviceName = std::string(CommonTestUtils::DEVICE_BATCH) + ":" + deviceName;
if (with_auto_batching) {
config =
{ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
// immediate timeout to avoid increasing the test time
ov::auto_batch_timeout(0)
};
}
fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(with_auto_batching ? CommonTestUtils::DEVICE_BATCH : deviceName);
}
static std::string getTestCaseName(const testing::TestParamInfo<RemoteTensorSharingTestOptionsParams>& obj) {
RemoteTensorSharingType sharing_type;
@@ -160,7 +167,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) {
|| RemoteTensorSharingType::PLUGIN_USM_DEVICE_TENSOR == sharing_type))
GTEST_SKIP();
auto exec_net = ie.compile_model(function, deviceName);
auto exec_net = ie.compile_model(function, deviceName, config);
// regular inference
auto inf_req_regular = exec_net.create_infer_request();
@@ -343,10 +350,14 @@ public:
fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
deviceName = CommonTestUtils::DEVICE_GPU;
auto with_auto_batching = this->GetParam();
if (with_auto_batching) { // BATCH:GPU
deviceName = std::string(CommonTestUtils::DEVICE_BATCH) + ":" + deviceName;
config = {{CONFIG_KEY(ALLOW_AUTO_BATCHING), CONFIG_VALUE(YES)}};
if (with_auto_batching) {
config =
{ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
// immediate timeout to avoid increasing the test time
ov::auto_batch_timeout(0)
};
}
fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(with_auto_batching ? CommonTestUtils::DEVICE_BATCH : deviceName);
}
static std::string getTestCaseName(const testing::TestParamInfo<bool>& obj) {
auto with_auto_batch = obj.param;
@@ -478,7 +489,7 @@ TEST_P(OVRemoteTensor_TestsWithContext, smoke_canInferOnUserQueue_out_of_order)
cl::Buffer shared_output_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, out_size, NULL, &err);
auto remote_context = ov::intel_gpu::ocl::ClContext(ie, ocl_instance->_queue.get());
auto exec_net_shared = ie.compile_model(function, remote_context);
auto exec_net_shared = ie.compile_model(function, remote_context); // no auto-batching support, so no config is passed
auto gpu_context = exec_net_shared.get_context().as<ov::intel_gpu::ocl::ClContext>();
auto gpu_in_tensor = gpu_context.create_tensor(input->get_output_element_type(0), input->get_output_shape(0), shared_input_buffer);
@@ -558,7 +569,7 @@ TEST_P(OVRemoteTensor_TestsWithContext, smoke_canInferOnUserQueue_in_order) {
cl::Buffer shared_output_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, out_size, NULL, &err);
auto remote_context = ov::intel_gpu::ocl::ClContext(ie, ocl_instance->_queue.get());
auto exec_net_shared = ie.compile_model(function, remote_context);
auto exec_net_shared = ie.compile_model(function, remote_context); // no auto-batching support, so no config is passed
auto gpu_context = exec_net_shared.get_context().as<ov::intel_gpu::ocl::ClContext>();
auto gpu_in_tensor = gpu_context.create_tensor(input->get_output_element_type(0), input->get_output_shape(0), shared_input_buffer);

View File

@@ -143,8 +143,8 @@ class AutoBatching_Test_DetectionOutput : public AutoBatching_Test {
public:
void SetUp() override {
std::tie(device_name, use_get_blob, num_streams, num_requests, num_batch) = this->GetParam();
fn_ptrs = {ngraph::builder::subgraph::makeEltwisePlusDetectionOutput(),
ngraph::builder::subgraph::makeEltwisePlusDetectionOutput()};
fn_ptrs = {ngraph::builder::subgraph::makeDetectionOutput(),
ngraph::builder::subgraph::makeDetectionOutput()};
};
static std::string getTestCaseName(const testing::TestParamInfo<AutoBatchTwoNetsParams> &obj) {

View File

@@ -329,23 +329,29 @@ inline std::shared_ptr<ngraph::Function> makeSingleConv(std::vector<size_t> inpu
return fn_ptr;
}
inline std::shared_ptr<ngraph::Function> makeEltwisePlusDetectionOutput(std::vector<std::vector<size_t>> inShapes =
{{1, 60}, {1, 165}, {1, 1, 75}},
ngraph::element::Type_t type = ngraph::element::Type_t::f32) {
// adding Eltwise so that we can tests Auto-Batching's HETERO code-path that splits the DetectionOutput and the rest of the network
auto params = ngraph::builder::makeParams(ngraph::element::f32, inShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
ngraph::OutputVector outs;
for (size_t i = 0; i < inShapes.size(); i++) {
auto shape = inShapes[i];
auto p = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{shape});
auto add = ngraph::builder::makeEltwise(paramOuts[i], p, ngraph::helpers::EltwiseTypes::ADD);
params.push_back(p);
outs.push_back(add->output(0));
}
inline std::shared_ptr<ngraph::Function> makeDetectionOutput(ngraph::element::Type_t type = ngraph::element::Type_t::f32) {
const auto& data = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape{1, 4, 10, 10});
const auto& constant_0 = std::make_shared<ngraph::opset1::Constant>(type, ngraph::Shape{1, 1, 1, 1});
const auto& mul_0 = std::make_shared<ngraph::opset1::Multiply>(data, constant_0);
const auto& filters = std::make_shared<ngraph::opset1::Constant>(type, ngraph::Shape{1, 4, 1, 1});
const auto& conv = std::make_shared<ngraph::opset1::Convolution>(
mul_0, filters, ngraph::Strides{1, 1}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0}, ngraph::Strides{1, 1});
const auto& box_logits_reshape = std::make_shared<ngraph::opset1::Constant>(
ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{0, -1});
const auto& box_logits = std::make_shared<ngraph::opset1::Reshape>(conv, box_logits_reshape, true);
const auto& four_times = std::make_shared<ngraph::opset1::Tile>(box_logits, std::make_shared<ngraph::opset1::Constant>(
ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, 4}));
const auto& third_input_reshape = std::make_shared<ngraph::opset1::Constant>(
ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{0, 1, -1});
const auto& third_input = std::make_shared<ngraph::opset1::Reshape>(four_times, third_input_reshape, true);
ngraph::op::DetectionOutput::Attributes attr;
attr.num_classes = 11;
attr.num_classes = 4;
attr.background_label_id = 0;
attr.top_k = 75;
attr.variance_encoded_in_target = true;
@@ -357,14 +363,14 @@ inline std::shared_ptr<ngraph::Function> makeEltwisePlusDetectionOutput(std::vec
attr.clip_after_nms = false;
attr.clip_before_nms = false;
attr.decrease_label_id = false;
attr.normalized = false;
attr.normalized = true;
attr.input_height = 1;
attr.input_width = 1;
attr.objectness_score = 0.4f;
const auto& detection = std::make_shared<ngraph::opset1::DetectionOutput>(four_times, four_times, third_input, attr);
const auto& convert = std::make_shared<ngraph::opset1::Convert>(detection, type);
auto detOut = ngraph::builder::makeDetectionOutput(outs, attr);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
return std::make_shared<ngraph::Function>(results, params, "EltWiseWithDetectionOutput");
return std::make_shared<ov::Model>(ov::NodeVector{convert}, ov::ParameterVector{data}, "SplitableDetectionOutput");
}
inline std::shared_ptr<ngraph::Function> makeMultiSingleConv(std::vector<size_t> inputShape = {1, 3, 24, 24},