benchmark_app with dynamic reshapes and API 2.0 (#8609)

* API 2.0 changes * stylefix * Update samples/cpp/benchmark_app/main.cpp Co-authored-by: Nadezhda Ageeva <nkogteva@gmail.com> * Update samples/cpp/benchmark_app/infer_request_wrap.hpp Co-authored-by: Ilya Churaev <ilyachur@gmail.com> * Update samples/cpp/benchmark_app/utils.cpp Co-authored-by: Ilya Churaev <ilyachur@gmail.com> * fixes * fix for: gpu headers are moved to another folder... yet again * fix for mac build paranoia * function,classes and files renames/change logic to work with inputs() * stylefix * 2nd portion of fixes * stylefix * Batch warnings Co-authored-by: Nadezhda Ageeva <nkogteva@gmail.com> Co-authored-by: Ilya Churaev <ilyachur@gmail.com>
2021-12-30 19:09:12 +03:00
parent e52c96389d
commit 4dbc9ae2e7
16 changed files with 1284 additions and 939 deletions
--- a/samples/cpp/benchmark_app/infer_request_wrap.hpp
+++ b/samples/cpp/benchmark_app/infer_request_wrap.hpp
@@ -11,14 +11,14 @@
 #include <map>
 #include <memory>
 #include <mutex>
+#include <openvino/openvino.hpp>
 #include <queue>
 #include <string>
 #include <vector>

 // clang-format off
-#include "inference_engine.hpp"

-#include "remote_blobs_filling.hpp"
+#include "remote_tensors_filling.hpp"
 #include "statistics_report.hpp"
 #include "utils.hpp"
 // clang-format on
@@ -33,13 +33,14 @@ public:

    ~InferReqWrap() = default;

-    explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue)
-        : _request(net.CreateInferRequest()),
+    explicit InferReqWrap(ov::runtime::CompiledModel& model, size_t id, QueueCallbackFunction callbackQueue)
+        : _request(model.create_infer_request()),
          _id(id),
          _lat_group_id(0),
          _callbackQueue(callbackQueue),
          outputClBuffer() {
-        _request.SetCompletionCallback([&]() {
+        _request.set_callback([&](const std::exception_ptr& ptr) {
+            // TODO: Add exception ptr rethrow in proper thread
            _endTime = Time::now();
            _callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
        });
@@ -47,30 +48,35 @@ public:

    void startAsync() {
        _startTime = Time::now();
-        _request.StartAsync();
+        _request.start_async();
    }

    void wait() {
-        _request.Wait(InferenceEngine::InferRequest::RESULT_READY);
+        _request.wait();
    }

    void infer() {
        _startTime = Time::now();
-        _request.Infer();
+        _request.infer();
        _endTime = Time::now();
        _callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
    }

-    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> getPerformanceCounts() {
-        return _request.GetPerformanceCounts();
+    std::vector<ov::runtime::ProfilingInfo> getPerformanceCounts() {
+        return _request.get_profiling_info();
    }

-    InferenceEngine::Blob::Ptr getBlob(const std::string& name) {
-        return _request.GetBlob(name);
+    void setShape(const std::string& name, const ov::Shape& dims) {
+        // TODO check return status
+        _request.get_tensor(name).set_shape(dims);
    }

-    void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
-        _request.SetBlob(name, data);
+    ov::runtime::Tensor getTensor(const std::string& name) {
+        return _request.get_tensor(name);
+    }
+
+    void setTensor(const std::string& name, const ov::runtime::Tensor& data) {
+        _request.set_tensor(name, data);
    }

    double getExecutionTimeInMilliseconds() const {
@@ -90,7 +96,7 @@ public:
    }

 private:
-    InferenceEngine::InferRequest _request;
+    ov::runtime::InferRequest _request;
    Time::time_point _startTime;
    Time::time_point _endTime;
    size_t _id;
@@ -101,13 +107,10 @@ private:

 class InferRequestsQueue final {
 public:
-    InferRequestsQueue(InferenceEngine::ExecutableNetwork& net,
-                       size_t nireq,
-                       size_t lat_group_n,
-                       bool enable_lat_groups)
+    InferRequestsQueue(ov::runtime::CompiledModel& model, size_t nireq, size_t lat_group_n, bool enable_lat_groups)
        : enable_lat_groups(enable_lat_groups) {
        for (size_t id = 0; id < nireq; id++) {
-            requests.push_back(std::make_shared<InferReqWrap>(net,
+            requests.push_back(std::make_shared<InferReqWrap>(model,
                                                              id,
                                                              std::bind(&InferRequestsQueue::putIdleRequest,
                                                                        this,