Auto batching improved tests (#11179)

* wip remote tests2, fixed smoke_canInferOnUserContext * completed the OV 1.0 tests for remote blobs * updated OV 2.0 tests for remote blobs with auto-batching (using the ngraph func that is reshape-able by the batch) * re-using the DetectionOutput-based ngraph func that is 100% batch-reshapeble
2022-03-24 16:23:00 +03:00
parent b5dbabe41d
commit 7dc1d0935c
5 changed files with 64 additions and 85 deletions
--- a/src/tests/functional/inference_engine/transformations/common_optimizations/dimension_tracking.cpp
+++ b/src/tests/functional/inference_engine/transformations/common_optimizations/dimension_tracking.cpp
@@ -19,6 +19,7 @@
 #include <transformations/common_optimizations/divide_fusion.hpp>

 #include "common_test_utils/ngraph_test_utils.hpp"
+#include "ngraph_functions/subgraph_builders.hpp"

 using namespace testing;

@@ -239,48 +240,8 @@ TEST(TransformationTests, AutoBatch_FindBatch_NegativeTracking) {
 }

 TEST(TransformationTests, AutoBatch_FindBatch_AutoBatch_LabelPropagation_DO_detachment) {
-    const auto& data = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{1, 4, 10, 10});
-
-    const auto& constant_0 = std::make_shared<ov::opset1::Constant>(ov::element::f32, ov::Shape{1, 1, 1, 1});
-    const auto& mul_0 = std::make_shared<ov::opset1::Multiply>(data, constant_0);
-
-    const auto& filters = std::make_shared<ov::opset1::Constant>(ov::element::f32, ov::Shape{1, 4, 1, 1});
-    const auto& conv = std::make_shared<ov::opset1::Convolution>(
-            mul_0, filters, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0}, ov::Strides{1, 1});
-
-    const auto& box_logits_reshape = std::make_shared<ov::opset1::Constant>(
-            ov::element::i64, ov::Shape{2}, std::vector<int64_t>{0, -1});
-    const auto& box_logits = std::make_shared<ov::opset1::Reshape>(conv, box_logits_reshape, true);
-
-    const auto& four_times = std::make_shared<ov::opset1::Tile>(box_logits, std::make_shared<ov::opset1::Constant>(
-            ov::element::i64, ov::Shape{2}, std::vector<int64_t>{1, 4}));
-
-    const auto& third_input_reshape = std::make_shared<ov::opset1::Constant>(
-            ov::element::i64, ov::Shape{3}, std::vector<int64_t>{0, 1, -1});
-    const auto& third_input = std::make_shared<ov::opset1::Reshape>(four_times, third_input_reshape, true);
-
-    ngraph::op::DetectionOutput::Attributes attr;
-    attr.num_classes = 4;
-    attr.background_label_id = 0;
-    attr.top_k = 75;
-    attr.variance_encoded_in_target = true;
-    attr.keep_top_k = {50};
-    attr.code_type = std::string{"caffe.PriorBoxParameter.CORNER"};
-    attr.share_location = true;
-    attr.nms_threshold = 0.5f;
-    attr.confidence_threshold = 0.5f;
-    attr.clip_after_nms = false;
-    attr.clip_before_nms = false;
-    attr.decrease_label_id = false;
-    attr.normalized = true;
-    attr.input_height = 1;
-    attr.input_width = 1;
-    attr.objectness_score = 0.4f;
-
-    const auto& detection = std::make_shared<ov::opset1::DetectionOutput>(four_times, four_times, third_input, attr);
-    const auto& convert = std::make_shared<ov::opset1::Convert>(detection, ov::element::f32);
-
-    const auto& f = std::make_shared<ov::Model>(ov::NodeVector{convert}, ov::ParameterVector{data});
+    auto f = ngraph::builder::subgraph::makeDetectionOutput();
+    auto & data =  f->get_parameters()[0];

    ov::pass::Manager m;
    m.register_pass<ngraph::pass::InitNodeInfo>();
--- a/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
+++ b/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
@@ -15,6 +15,7 @@
 #include <common_test_utils/test_common.hpp>
 #include <functional_test_utils/plugin_cache.hpp>

+#include "base/ov_behavior_test_utils.hpp"
 #include "ngraph_functions/subgraph_builders.hpp"
 #include "functional_test_utils/blob_utils.hpp"

@@ -30,13 +31,16 @@ protected:

 public:
    void SetUp() override {
-        fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
        deviceName = CommonTestUtils::DEVICE_GPU;
        auto with_auto_batching = this->GetParam();
        if (with_auto_batching) { // BATCH:GPU
-            deviceName = std::string(CommonTestUtils::DEVICE_BATCH) + ":" + deviceName;
-            config = {{CONFIG_KEY(ALLOW_AUTO_BATCHING), CONFIG_VALUE(YES)}};
-        }
+            config =
+                    {{CONFIG_KEY(PERFORMANCE_HINT) , CONFIG_VALUE(THROUGHPUT)},
+                            // immediate timeout to avoid increasing the test time
+                            {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "0"},
+                            };
+            }
+        fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(with_auto_batching ? CommonTestUtils::DEVICE_BATCH : deviceName);
    }
    static std::string getTestCaseName(const testing::TestParamInfo<bool>& obj) {
        auto with_auto_batch = obj.param;
@@ -55,7 +59,7 @@ TEST_P(RemoteBlob_Test, smoke_canInputUserBlob) {

    // TODO: Issue: investigate issue with IECore
    auto ie = InferenceEngine::Core();
-    auto exec_net = ie.LoadNetwork(net, deviceName);
+    auto exec_net = ie.LoadNetwork(net, deviceName, config);

    // regular inference
    auto inf_req_regular = exec_net.CreateInferRequest();
@@ -169,7 +173,7 @@ TEST_P(RemoteBlob_Test, smoke_canInputPluginRemoteBlob) {

    // TODO: Issue: investigate issue with IECore
    auto ie = InferenceEngine::Core();
-    auto exec_net = ie.LoadNetwork(net, deviceName);
+    auto exec_net = ie.LoadNetwork(net, deviceName, config);

    // regular inference
    auto inf_req_regular = exec_net.CreateInferRequest();
@@ -213,7 +217,6 @@ TEST_P(RemoteBlob_Test, smoke_canInputPluginRemoteBlob) {


 TEST_P(RemoteBlob_Test, smoke_canInferOnUserContext) {
-    auto fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
    CNNNetwork net(fn_ptr);

    net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
@@ -237,7 +240,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserContext) {
    auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_context.get());
    // since there is no way to enable the Auto-Batching thru the device name when loading with the RemoteContext
    // (as the device name is deduced from the context, which is the "GPU")
-    // the only-way to test the auto-batching is explicit config with ALLOW_AUTO_BATCHING set to YES
+    // the only-way to test the auto-batching is explicit config with perf hint set to THROUGHPUT
    auto exec_net_shared = ie->LoadNetwork(net, remote_context, config);
    auto inf_req_shared = exec_net_shared.CreateInferRequest();
    inf_req_shared.SetBlob(net.getInputsInfo().begin()->first, fakeImageData);
@@ -258,7 +261,6 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_out_of_order) {
 #if defined _WIN32
    GTEST_SKIP();
 #endif
-    auto fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
    CNNNetwork net(fn_ptr);

    net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
@@ -291,7 +293,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_out_of_order) {
    // In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
    // without calling thread blocks
    auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
-    auto exec_net_shared = ie->LoadNetwork(net, remote_context);
+    auto exec_net_shared = ie->LoadNetwork(net, remote_context); // no auto-batching support, so no config is passed
    auto inf_req_shared = exec_net_shared.CreateInferRequest();

    // Allocate shared buffers for input and output data which will be set to infer request
@@ -350,7 +352,6 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_in_order) {
 #if defined _WIN32
    GTEST_SKIP();
 #endif
-    auto fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
    CNNNetwork net(fn_ptr);

    net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
@@ -384,7 +385,7 @@ TEST_P(RemoteBlob_Test, smoke_canInferOnUserQueue_in_order) {
    // In this scenario we create shared OCL queue and run simple pre-process action and post-process action (buffer copies in both cases)
    // without calling thread blocks
    auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get());
-    auto exec_net_shared = ie->LoadNetwork(net, remote_context);
+    auto exec_net_shared = ie->LoadNetwork(net, remote_context); // no auto-batching support, so no config is passed
    auto inf_req_shared = exec_net_shared.CreateInferRequest();

    // Allocate shared buffers for input and output data which will be set to infer request
--- a/src/tests/functional/plugin/gpu/remote_blob_tests/gpu_remote_tensor_tests.cpp
+++ b/src/tests/functional/plugin/gpu/remote_blob_tests/gpu_remote_tensor_tests.cpp
@@ -14,6 +14,7 @@
 #include <remote_blob_tests/remote_blob_helpers.hpp>
 #include <common_test_utils/test_common.hpp>
 #include <functional_test_utils/plugin_cache.hpp>
+#include "base/ov_behavior_test_utils.hpp"
 #include "ngraph_functions/subgraph_builders.hpp"
 #include "functional_test_utils/blob_utils.hpp"
 #include "openvino/core/preprocess/pre_post_process.hpp"
@@ -62,16 +63,22 @@ class OVRemoteTensorInputBlob_Test : public OVRemoteTensor_Test,
 protected:
    std::shared_ptr<ngraph::Function> fn_ptr;
    std::string deviceName;
+    ov::AnyMap config;

 public:
    void SetUp() override {
-        fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
        deviceName = CommonTestUtils::DEVICE_GPU;
        RemoteTensorSharingType sharing_type;
        bool with_auto_batching;
        std::tie(sharing_type, with_auto_batching) = this->GetParam();
-        if (with_auto_batching)  // BATCH:GPU
-            deviceName = std::string(CommonTestUtils::DEVICE_BATCH) + ":" + deviceName;
+        if (with_auto_batching) {
+            config =
+                    {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
+                     // immediate timeout to avoid increasing the test time
+                     ov::auto_batch_timeout(0)
+                    };
+        }
+        fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(with_auto_batching ? CommonTestUtils::DEVICE_BATCH : deviceName);
    }
    static std::string getTestCaseName(const testing::TestParamInfo<RemoteTensorSharingTestOptionsParams>& obj) {
        RemoteTensorSharingType sharing_type;
@@ -160,7 +167,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) {
                    || RemoteTensorSharingType::PLUGIN_USM_DEVICE_TENSOR == sharing_type))
        GTEST_SKIP();

-    auto exec_net = ie.compile_model(function, deviceName);
+    auto exec_net = ie.compile_model(function, deviceName, config);

    // regular inference
    auto inf_req_regular = exec_net.create_infer_request();
@@ -343,10 +350,14 @@ public:
        fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
        deviceName = CommonTestUtils::DEVICE_GPU;
        auto with_auto_batching = this->GetParam();
-        if (with_auto_batching) { // BATCH:GPU
-            deviceName = std::string(CommonTestUtils::DEVICE_BATCH) + ":" + deviceName;
-            config = {{CONFIG_KEY(ALLOW_AUTO_BATCHING), CONFIG_VALUE(YES)}};
+        if (with_auto_batching) {
+            config =
+                    {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
+                            // immediate timeout to avoid increasing the test time
+                     ov::auto_batch_timeout(0)
+                    };
        }
+        fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(with_auto_batching ? CommonTestUtils::DEVICE_BATCH : deviceName);
    }
    static std::string getTestCaseName(const testing::TestParamInfo<bool>& obj) {
        auto with_auto_batch = obj.param;
@@ -478,7 +489,7 @@ TEST_P(OVRemoteTensor_TestsWithContext, smoke_canInferOnUserQueue_out_of_order)
    cl::Buffer shared_output_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, out_size, NULL, &err);

    auto remote_context = ov::intel_gpu::ocl::ClContext(ie, ocl_instance->_queue.get());
-    auto exec_net_shared = ie.compile_model(function, remote_context);
+    auto exec_net_shared = ie.compile_model(function, remote_context); // no auto-batching support, so no config is passed
    auto gpu_context = exec_net_shared.get_context().as<ov::intel_gpu::ocl::ClContext>();

    auto gpu_in_tensor = gpu_context.create_tensor(input->get_output_element_type(0), input->get_output_shape(0), shared_input_buffer);
@@ -558,7 +569,7 @@ TEST_P(OVRemoteTensor_TestsWithContext, smoke_canInferOnUserQueue_in_order) {
    cl::Buffer shared_output_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, out_size, NULL, &err);

    auto remote_context = ov::intel_gpu::ocl::ClContext(ie, ocl_instance->_queue.get());
-    auto exec_net_shared = ie.compile_model(function, remote_context);
+    auto exec_net_shared = ie.compile_model(function, remote_context); // no auto-batching support, so no config is passed
    auto gpu_context = exec_net_shared.get_context().as<ov::intel_gpu::ocl::ClContext>();

    auto gpu_in_tensor = gpu_context.create_tensor(input->get_output_element_type(0), input->get_output_shape(0), shared_input_buffer);
--- a/src/tests/functional/plugin/shared/include/behavior/plugin/auto_batching_tests.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/plugin/auto_batching_tests.hpp
@@ -143,8 +143,8 @@ class AutoBatching_Test_DetectionOutput : public AutoBatching_Test {
 public:
    void SetUp() override {
        std::tie(device_name, use_get_blob, num_streams, num_requests, num_batch) = this->GetParam();
-        fn_ptrs = {ngraph::builder::subgraph::makeEltwisePlusDetectionOutput(),
-                   ngraph::builder::subgraph::makeEltwisePlusDetectionOutput()};
+        fn_ptrs = {ngraph::builder::subgraph::makeDetectionOutput(),
+                   ngraph::builder::subgraph::makeDetectionOutput()};
    };

    static std::string getTestCaseName(const testing::TestParamInfo<AutoBatchTwoNetsParams> &obj) {
--- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp
@@ -329,23 +329,29 @@ inline std::shared_ptr<ngraph::Function> makeSingleConv(std::vector<size_t> inpu
    return fn_ptr;
 }

-inline std::shared_ptr<ngraph::Function> makeEltwisePlusDetectionOutput(std::vector<std::vector<size_t>> inShapes =
-        {{1, 60}, {1, 165}, {1, 1, 75}},
-                                                                         ngraph::element::Type_t type = ngraph::element::Type_t::f32) {
-    // adding Eltwise so that we can tests Auto-Batching's HETERO code-path that splits the DetectionOutput and the rest of the network
-    auto params = ngraph::builder::makeParams(ngraph::element::f32, inShapes);
-    auto paramOuts = ngraph::helpers::convert2OutputVector(
-            ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
-    ngraph::OutputVector outs;
-    for (size_t i = 0; i < inShapes.size(); i++) {
-        auto shape = inShapes[i];
-        auto p = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{shape});
-        auto add = ngraph::builder::makeEltwise(paramOuts[i], p, ngraph::helpers::EltwiseTypes::ADD);
-        params.push_back(p);
-        outs.push_back(add->output(0));
-    }
+inline std::shared_ptr<ngraph::Function> makeDetectionOutput(ngraph::element::Type_t type = ngraph::element::Type_t::f32) {
+    const auto& data = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape{1, 4, 10, 10});
+
+    const auto& constant_0 = std::make_shared<ngraph::opset1::Constant>(type, ngraph::Shape{1, 1, 1, 1});
+    const auto& mul_0 = std::make_shared<ngraph::opset1::Multiply>(data, constant_0);
+
+    const auto& filters = std::make_shared<ngraph::opset1::Constant>(type, ngraph::Shape{1, 4, 1, 1});
+    const auto& conv = std::make_shared<ngraph::opset1::Convolution>(
+            mul_0, filters, ngraph::Strides{1, 1}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0}, ngraph::Strides{1, 1});
+
+    const auto& box_logits_reshape = std::make_shared<ngraph::opset1::Constant>(
+            ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{0, -1});
+    const auto& box_logits = std::make_shared<ngraph::opset1::Reshape>(conv, box_logits_reshape, true);
+
+    const auto& four_times = std::make_shared<ngraph::opset1::Tile>(box_logits, std::make_shared<ngraph::opset1::Constant>(
+            ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, 4}));
+
+    const auto& third_input_reshape = std::make_shared<ngraph::opset1::Constant>(
+            ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{0, 1, -1});
+    const auto& third_input = std::make_shared<ngraph::opset1::Reshape>(four_times, third_input_reshape, true);
+
    ngraph::op::DetectionOutput::Attributes attr;
-    attr.num_classes = 11;
+    attr.num_classes = 4;
    attr.background_label_id = 0;
    attr.top_k = 75;
    attr.variance_encoded_in_target = true;
@@ -357,14 +363,14 @@ inline std::shared_ptr<ngraph::Function> makeEltwisePlusDetectionOutput(std::vec
    attr.clip_after_nms = false;
    attr.clip_before_nms = false;
    attr.decrease_label_id = false;
-    attr.normalized = false;
+    attr.normalized = true;
    attr.input_height = 1;
    attr.input_width = 1;
    attr.objectness_score = 0.4f;
+    const auto& detection = std::make_shared<ngraph::opset1::DetectionOutput>(four_times, four_times, third_input, attr);
+    const auto& convert = std::make_shared<ngraph::opset1::Convert>(detection, type);

-    auto detOut = ngraph::builder::makeDetectionOutput(outs, attr);
-    ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
-    return std::make_shared<ngraph::Function>(results, params, "EltWiseWithDetectionOutput");
+    return std::make_shared<ov::Model>(ov::NodeVector{convert}, ov::ParameterVector{data}, "SplitableDetectionOutput");
 }

 inline std::shared_ptr<ngraph::Function> makeMultiSingleConv(std::vector<size_t> inputShape = {1, 3, 24, 24},