Enable some tests for Template plugin (#16832)

* Remove the skip of template plugin tests * Enable some skipped tests for template plugin * Added cancel callback, collect per-layer statistic, fixed tests * Fixed template tests * Rename internal API terminate to cancel * Fixed windows tests * Fixed logic with performance counters
2023-04-12 01:02:28 +04:00
parent 209db8a29b
commit 4a43753e02
20 changed files with 273 additions and 107 deletions
--- a/docs/IE_PLUGIN_DG/AsyncInferRequest.md
+++ b/docs/IE_PLUGIN_DG/AsyncInferRequest.md
@@ -16,6 +16,7 @@ OpenVINO Runtime Plugin API provides the base ov::IAsyncInferRequest class for a

 ### Class Fields

+- `m_cancel_callback` - a callback which allows to interrupt the execution
 - `m_wait_executor` - a task executor that waits for a response from a device about device tasks completion

 > **NOTE**: If a plugin can work with several instances of a device, `m_wait_executor` must be device-specific. Otherwise, having a single task executor for several devices does not allow them to work in parallel.
@@ -43,3 +44,9 @@ The stages are distributed among two task executors in the following way:
 In the asynchronous request destructor, it is necessary to wait for a pipeline to finish. It can be done using the ov::IAsyncInferRequest::stop_and_wait method of the base class.

@snippet src/async_infer_request.cpp async_infer_request:dtor
+
+### cancel()
+
+The method allows to cancel the infer request execution:
+
+@snippet src/async_infer_request.cpp async_infer_request:cancel
--- a/docs/IE_PLUGIN_DG/InferRequest.md
+++ b/docs/IE_PLUGIN_DG/InferRequest.md
@@ -89,4 +89,10 @@ The method returns the profiling info which was measured during pipeline stages

@snippet src/sync_infer_request.cpp infer_request:get_profiling_info

+### cancel()
+
+The plugin specific method allows to interrupt the synchronous execution from the AsyncInferRequest:
+
+@snippet src/sync_infer_request.cpp infer_request:cancel
+
 The next step in the plugin library implementation is the [Asynchronous Inference Request](@ref openvino_docs_ov_plugin_dg_async_infer_request) class.
--- a/src/plugins/hetero/executable_network.cpp
+++ b/src/plugins/hetero/executable_network.cpp
@@ -457,10 +457,12 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo

 HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& heteroModel,
                                                 const Configs& user_config,
-                                                 Engine* heteroPlugin)
+                                                 Engine* heteroPlugin,
+                                                 bool fromCache)
    : _heteroPlugin(heteroPlugin),
      _hetero_config{},
-      _device_config{} {
+      _device_config{},
+      _loadedFromCache(fromCache) {
    std::string heteroXmlStr;
    std::getline(heteroModel, heteroXmlStr);

@@ -809,12 +811,14 @@ InferenceEngine::Parameter HeteroExecutableNetwork::GetMetric(const std::string&
            ov::PropertyName{ov::model_name.name(), ov::PropertyMutability::RO},
            ov::PropertyName{ov::optimal_number_of_infer_requests.name(), ov::PropertyMutability::RO},
            ov::PropertyName{ov::execution_devices.name(), ov::PropertyMutability::RO},
+            ov::PropertyName{ov::loaded_from_cache.name(), ov::PropertyMutability::RO},
            ov::PropertyName{ov::device::properties.name(), ov::PropertyMutability::RO},
            ov::PropertyName{ov::device::priorities.name(), ov::PropertyMutability::RO}};
    } else if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_METRICS) == name) {
        std::vector<std::string> heteroMetrics = {ov::model_name.name(),
                                                  METRIC_KEY(SUPPORTED_METRICS),
                                                  METRIC_KEY(SUPPORTED_CONFIG_KEYS),
+                                                  ov::loaded_from_cache.name(),
                                                  ov::optimal_number_of_infer_requests.name(),
                                                  ov::execution_devices.name()};
        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, heteroMetrics);
@@ -843,6 +847,8 @@ InferenceEngine::Parameter HeteroExecutableNetwork::GetMetric(const std::string&
        return all_devices;
    } else if (ov::model_name == name) {
        return decltype(ov::model_name)::value_type{_name};
+    } else if (ov::loaded_from_cache == name) {
+        return decltype(ov::loaded_from_cache)::value_type{_loadedFromCache};
    } else if (ov::optimal_number_of_infer_requests == name) {
        unsigned int value = 0u;
        for (auto&& desc : _networks) {
--- a/src/plugins/hetero/executable_network.hpp
+++ b/src/plugins/hetero/executable_network.hpp
@@ -34,7 +34,10 @@ public:
    typedef std::shared_ptr<HeteroExecutableNetwork> Ptr;

    HeteroExecutableNetwork(const InferenceEngine::CNNNetwork& network, const Configs& user_config, Engine* plugin);
-    HeteroExecutableNetwork(std::istream& heteroModel, const Configs& user_config, Engine* plugin);
+    HeteroExecutableNetwork(std::istream& heteroModel,
+                            const Configs& user_config,
+                            Engine* plugin,
+                            bool from_cache = false);

    InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(
        InferenceEngine::InputsDataMap networkInputs,
@@ -65,6 +68,7 @@ private:
    Configs _hetero_config;
    Configs _device_config;
    std::unordered_map<std::string, std::string> _blobNameMap;
+    bool _loadedFromCache = false;
 };

 }  // namespace HeteroPlugin
--- a/src/plugins/hetero/plugin.cpp
+++ b/src/plugins/hetero/plugin.cpp
@@ -165,7 +165,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(cons
 InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(
    std::istream& heteroModel,
    const std::map<std::string, std::string>& user_config) {
-    return std::make_shared<HeteroExecutableNetwork>(heteroModel, user_config, this);
+    return std::make_shared<HeteroExecutableNetwork>(heteroModel, user_config, this, true);
 }

 Engine::DeviceMetaInformationMap Engine::GetDevicePlugins(const std::string& targetFallback,
--- a/src/plugins/template/backend/executable.hpp
+++ b/src/plugins/template/backend/executable.hpp
@@ -21,16 +21,24 @@ public:

    /// \param outputs vector of runtime::Tensor used as outputs
    /// \param inputs vector of runtime::Tensor used as inputs
+    /// \param collect_performance Enable per operation performance statistic
    /// \returns true if iteration is successful, false otherwise
-    virtual bool call(std::vector<ov::Tensor>& outputs, const std::vector<ov::Tensor>& inputs) = 0;
+    virtual bool call(std::vector<ov::Tensor>& outputs,
+                      const std::vector<ov::Tensor>& inputs,
+                      bool collect_performance = false) = 0;

    /// \param outputs vector of runtime::Tensor used as outputs
    /// \param inputs vector of runtime::Tensor used as inputs
    /// \param context Evaluation context
+    /// \param collect_performance Enable per operation performance statistic
    /// \returns true if iteration is successful, false otherwise
    virtual bool call(std::vector<ov::Tensor>& outputs,
                      const std::vector<ov::Tensor>& inputs,
-                      const ov::EvaluationContext& context) = 0;
+                      const ov::EvaluationContext& context,
+                      bool collect_performance = false) = 0;
+
+    /// \brief Cancel and terminate the current execution
+    virtual void cancel() = 0;

    /// \brief Executes a single iteration of a Function.
    /// \param outputs vector of runtime::Tensor used as outputs
--- a/src/plugins/template/backend/int_executable.cpp
+++ b/src/plugins/template/backend/int_executable.cpp
@@ -9,9 +9,11 @@
 #include <openvino/op/util/variable_context.hpp>

 #include "evaluates_map.hpp"
+#include "openvino/core/except.hpp"
 #include "openvino/op/parameter.hpp"
 #include "openvino/op/result.hpp"
 #include "openvino/op/util/op_types.hpp"
+#include "perf_counter.hpp"
 #include "tensor_conversion_util.hpp"

 NGRAPH_SUPPRESS_DEPRECATED_START
@@ -107,8 +109,13 @@ ov::runtime::interpreter::INTExecutable::INTExecutable(const std::shared_ptr<ov:
    set_parameters_and_results(*m_model);
 }

+void ov::runtime::interpreter::INTExecutable::cancel() {
+    m_cancel_execution = true;
+}
+
 bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outputs,
-                                                   const std::vector<ov::Tensor>& inputs) {
+                                                   const std::vector<ov::Tensor>& inputs,
+                                                   bool collect_performance) {
    EvaluationContext eval_context;
    ov::op::util::VariableContext variable_context;
    eval_context.emplace("VariableContext", variable_context);
@@ -127,12 +134,21 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp
        }
    }

-    return call(outputs, inputs, eval_context);
+    return call(outputs, inputs, eval_context, collect_performance);
 }

 bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outputs,
                                                   const std::vector<ov::Tensor>& inputs,
-                                                   const ov::EvaluationContext& context) {
+                                                   const ov::EvaluationContext& context,
+                                                   bool collect_performance) {
+#define CHECK_TERMINATE()                          \
+    if (m_cancel_execution) {                      \
+        std::lock_guard<std::mutex> lock(m_mutex); \
+        m_cancel_execution = false;                \
+        return false;                              \
+    }
+
+    CHECK_TERMINATE()
    // map function params -> ov::Tensor
    std::unordered_map<std::shared_ptr<ov::descriptor::Tensor>, ov::Tensor> tensor_map;
    size_t input_count = 0;
@@ -155,6 +171,7 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp

    // for each ordered op in the graph
    for (const auto& op : m_nodes) {
+        CHECK_TERMINATE()
        if (std::dynamic_pointer_cast<ov::op::v0::Parameter>(op)) {
            continue;
        }
@@ -165,19 +182,13 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp
            op_inputs.push_back(tensor_map.at(tensor));
        }

-        OutputVector output_ports;
-        for (size_t i = 0; i < op->inputs().size(); ++i) {
-            output_ports.push_back(op->get_input_source_output(i));
-        }
-        auto cloned_node = op->clone_with_new_inputs(output_ports);
-
        // get op outputs from map or create
        std::vector<ov::Tensor> op_outputs;
        for (size_t i = 0; i < op->get_output_size(); ++i) {
            auto tensor = op->output(i).get_tensor_ptr();
            ov::Tensor host_tensor;
            auto it = tensor_map.find(tensor);
-            auto output = cloned_node->output(i);
+            auto output = op->output(i);
            if (op::util::is_output(op) || it == tensor_map.end() || !it->second) {
                host_tensor = ov::Tensor(output.get_element_type(),
                                         output.get_partial_shape().is_dynamic()
@@ -189,10 +200,13 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp
            op_outputs.push_back(host_tensor);
        }

-        // Call evaluate for cloned_node with static shapes
-        if (!cloned_node->evaluate(op_outputs, op_inputs, context)) {
-            // TODO: extend evaluate map for the context
-            evaluate_node(cloned_node, op_outputs, op_inputs);
+        {
+            PERF(op, collect_performance);
+            // Call evaluate for cloned_node with static shapes
+            if (!op->evaluate(op_outputs, op_inputs, context)) {
+                // TODO: extend evaluate map for the context
+                evaluate_node(op, op_outputs, op_inputs);
+            }
        }
        // Update tensors in tensor map
        for (size_t i = 0; i < op->get_output_size(); ++i) {
@@ -214,13 +228,13 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector<ov::Tensor>& outp

 std::shared_ptr<ov::op::v0::Parameter> ov::runtime::interpreter::INTExecutable::get_parameter(size_t index) const {
    const ParameterVector& parameters = get_parameters();
-    NGRAPH_CHECK(index < parameters.size(), "create_tensor for input out of bounds");
+    OPENVINO_ASSERT(index < parameters.size(), "create_tensor for input out of bounds");
    return parameters[index];
 }

 std::shared_ptr<ov::op::v0::Result> ov::runtime::interpreter::INTExecutable::get_result(size_t index) const {
    const ResultVector& results = get_results();
-    NGRAPH_CHECK(index < results.size(), "create_tensor for input out of bounds");
+    OPENVINO_ASSERT(index < results.size(), "create_tensor for input out of bounds");
    return results[index];
 }
 ov::Tensor ov::runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index) {
@@ -265,17 +279,12 @@ bool ov::runtime::interpreter::INTExecutable::evaluate_node(const std::shared_pt
    bool res = false;
    const auto tensor_inputs = create_tmp_tensors(inputs);
    auto tensor_outputs = create_tmp_tensors(outputs);
-    if (it != map.end()) {
-        res = it->second(node, tensor_outputs, tensor_inputs);
-        if (!res) {
-            throw ngraph::ngraph_error(std::string("Running evaluate method for OP ") + node->get_type_info().name +
-                                       std::string(" failed!"));
-        }
-        update_output_tensors(outputs, tensor_outputs);
-    } else {
-        throw ngraph::unsupported_op(std::string("Interpreter backend doesn't implement evaluate method for OP ") +
-                                     node->get_type_info().name);
-    }
+    OPENVINO_ASSERT(it != map.end(),
+                    "Interpreter backend doesn't implement evaluate method for OP ",
+                    node->get_type_info().name);
+    res = it->second(node, tensor_outputs, tensor_inputs);
+    OPENVINO_ASSERT(res, "Running evaluate method for OP ", node->get_type_info().name, " failed!");
+    update_output_tensors(outputs, tensor_outputs);
    return res;
 }

--- a/src/plugins/template/backend/int_executable.hpp
+++ b/src/plugins/template/backend/int_executable.hpp
@@ -28,10 +28,15 @@ class INTExecutable : public Executable {
 public:
    INTExecutable(const std::shared_ptr<ov::Model>& model);

-    bool call(std::vector<ov::Tensor>& outputs, const std::vector<ov::Tensor>& inputs) override;
+    void cancel() override;
+
    bool call(std::vector<ov::Tensor>& outputs,
              const std::vector<ov::Tensor>& inputs,
-              const ov::EvaluationContext& context) override;
+              bool collect_performance = false) override;
+    bool call(std::vector<ov::Tensor>& outputs,
+              const std::vector<ov::Tensor>& inputs,
+              const ov::EvaluationContext& context,
+              bool collect_performance = false) override;

    ov::Tensor create_input_tensor(size_t input_index) override;

@@ -52,6 +57,8 @@ protected:
    bool m_is_compiled = false;
    std::shared_ptr<ov::Model> m_model;
    std::vector<std::shared_ptr<Node>> m_nodes;
+    std::atomic_bool m_cancel_execution{false};
+    std::mutex m_mutex;

    struct InfoForNMS5 {
        int64_t max_output_boxes_per_class;
--- a/src/plugins/template/backend/perf_counter.hpp
+++ b/src/plugins/template/backend/perf_counter.hpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <chrono>
+#include <memory>
+#include <ratio>
+
+#include "openvino/core/except.hpp"
+#include "openvino/core/node.hpp"
+
+namespace ov {
+namespace runtime {
+namespace interpreter {
+
+static const char PERF_COUNTER_NAME[] = "template_perf_counter";
+
+class PerfCounter {
+    uint64_t total_duration;
+    uint32_t num;
+
+    std::chrono::high_resolution_clock::time_point __start = {};
+    std::chrono::high_resolution_clock::time_point __finish = {};
+
+public:
+    PerfCounter() : total_duration(0), num(0) {}
+
+    std::chrono::duration<double, std::milli> duration() const {
+        return __finish - __start;
+    }
+
+    uint64_t avg() const {
+        return (num == 0) ? 0 : total_duration / num;
+    }
+    uint32_t count() const {
+        return num;
+    }
+
+private:
+    void start_itr() {
+        __start = std::chrono::high_resolution_clock::now();
+    }
+
+    void finish_itr() {
+        __finish = std::chrono::high_resolution_clock::now();
+        total_duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
+        num++;
+    }
+
+    friend class PerfHelper;
+};
+
+class PerfHelper {
+    std::shared_ptr<PerfCounter> counter;
+
+public:
+    explicit PerfHelper(const std::shared_ptr<ov::Node>& node) {
+        auto info = node->get_rt_info();
+        const auto& it = info.find(ov::runtime::interpreter::PERF_COUNTER_NAME);
+        OPENVINO_ASSERT(it != info.end(), "Operation ", node, " doesn't contain performance counter");
+        counter = it->second.as<std::shared_ptr<ov::runtime::interpreter::PerfCounter>>();
+        OPENVINO_ASSERT(counter, "Performance counter is empty");
+        counter->start_itr();
+    }
+
+    ~PerfHelper() {
+        counter->finish_itr();
+    }
+};
+
+}  // namespace interpreter
+}  // namespace runtime
+}  // namespace ov
+
+#define GET_PERF(node)   std::unique_ptr<PerfHelper>(new PerfHelper(node))
+#define PERF(node, need) auto pc = need ? GET_PERF(node) : nullptr
--- a/src/plugins/template/src/async_infer_request.cpp
+++ b/src/plugins/template/src/async_infer_request.cpp
@@ -23,6 +23,9 @@ ov::template_plugin::AsyncInferRequest::AsyncInferRequest(
    // and waiting tasks. Waiting tasks can lock execution thread so they use separate threads from other executor.
    constexpr const auto remoteDevice = false;

+    m_cancel_callback = [request] {
+        request->cancel();
+    };
    if (remoteDevice) {
        m_pipeline = {{task_executor,
                       [this, request] {
@@ -51,3 +54,10 @@ ov::template_plugin::AsyncInferRequest::~AsyncInferRequest() {
    ov::IAsyncInferRequest::stop_and_wait();
 }
 // ! [async_infer_request:dtor]
+
+// ! [async_infer_request:cancel]
+void ov::template_plugin::AsyncInferRequest::cancel() {
+    ov::IAsyncInferRequest::cancel();
+    m_cancel_callback();
+}
+// ! [async_infer_request:cancel]
--- a/src/plugins/template/src/async_infer_request.hpp
+++ b/src/plugins/template/src/async_infer_request.hpp
@@ -22,8 +22,10 @@ public:
                      const std::shared_ptr<ov::threading::ITaskExecutor>& callback_executor);

    ~AsyncInferRequest();
+    void cancel() override;

 private:
+    std::function<void()> m_cancel_callback;
    std::shared_ptr<ov::threading::ITaskExecutor> m_wait_executor;
 };
 // ! [async_infer_request:header]
--- a/src/plugins/template/src/compiled_model.cpp
+++ b/src/plugins/template/src/compiled_model.cpp
@@ -10,8 +10,12 @@
 #include "ie_ngraph_utils.hpp"
 #include "ie_plugin_config.hpp"
 #include "itt.hpp"
+#include "openvino/op/util/op_types.hpp"
+#include "openvino/runtime/exec_model_info.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "perf_counter.hpp"
 #include "plugin.hpp"
+#include "transformations/rt_info/fused_names_attribute.hpp"
 #include "transformations/utils/utils.hpp"

 // ! [compiled_model:ctor]
@@ -46,10 +50,17 @@ ov::template_plugin::CompiledModel::CompiledModel(const std::shared_ptr<ov::Mode
 void transform_model(const std::shared_ptr<ov::Model>& model);

 void ov::template_plugin::CompiledModel::compile_model(const std::shared_ptr<ov::Model>& model) {
-    if (m_cfg.disable_transformations)
-        return;
    // apply plugins transformations
-    transform_model(model);
+    if (!m_cfg.disable_transformations)
+        transform_model(model);
+
+    // Integrate performance counters to the compiled model
+    for (const auto& op : model->get_ops()) {
+        auto& rt_info = op->get_rt_info();
+        rt_info[ov::runtime::interpreter::PERF_COUNTER_NAME] =
+            std::make_shared<ov::runtime::interpreter::PerfCounter>();
+    }
+
    // Perform any other steps like allocation and filling backend specific memory handles and so on
 }
 // ! [compiled_model:compile_model]
@@ -76,13 +87,37 @@ std::shared_ptr<ov::IAsyncInferRequest> ov::template_plugin::CompiledModel::crea

 // ! [compiled_model:set_property]
 void ov::template_plugin::CompiledModel::set_property(const ov::AnyMap& properties) {
-    OPENVINO_NOT_IMPLEMENTED;
+    m_cfg = Configuration{properties, m_cfg};
 }
 // ! [compiled_model:set_property]

 // ! [compiled_model:get_runtime_model]
 std::shared_ptr<const ov::Model> ov::template_plugin::CompiledModel::get_runtime_model() const {
-    return m_model;
+    auto model = m_model->clone();
+    // Add execution information into the model
+    size_t exec_order = 0;
+    for (const auto& op : model->get_ordered_ops()) {
+        auto& info = op->get_rt_info();
+        const auto& it = info.find(ov::runtime::interpreter::PERF_COUNTER_NAME);
+        OPENVINO_ASSERT(it != info.end(), "Operation ", op, " doesn't contain performance counter");
+        auto perf_count = it->second.as<std::shared_ptr<ov::runtime::interpreter::PerfCounter>>();
+        OPENVINO_ASSERT(perf_count, "Performance counter is empty");
+        info[ov::exec_model_info::LAYER_TYPE] = op->get_type_info().name;
+        info[ov::exec_model_info::EXECUTION_ORDER] = std::to_string(exec_order++);
+        info[ov::exec_model_info::IMPL_TYPE] = "ref";
+        info[ov::exec_model_info::PERF_COUNTER] = m_cfg.perf_count && perf_count && perf_count->avg() != 0
+                                                      ? std::to_string(perf_count->avg())
+                                                      : "not_executed";
+
+        std::string original_names = ov::getFusedNames(op);
+        if (original_names.empty()) {
+            original_names = op->get_friendly_name();
+        } else if (original_names.find(op->get_friendly_name()) == std::string::npos) {
+            original_names = op->get_friendly_name() + "," + original_names;
+        }
+        info[ov::exec_model_info::ORIGINAL_NAMES] = original_names;
+    }
+    return model;
 }
 // ! [compiled_model:get_runtime_model]

--- a/src/plugins/template/src/plugin.cpp
+++ b/src/plugins/template/src/plugin.cpp
@@ -201,7 +201,7 @@ ov::SupportedOpsMap ov::template_plugin::Plugin::query_model(const std::shared_p
    // 3. Produce the result
    ov::SupportedOpsMap res;
    for (auto&& layerName : supported) {
-        res.emplace(layerName, get_device_name());
+        res.emplace(layerName, get_device_name() + "." + std::to_string(m_cfg.device_id));
    }

    return res;
--- a/src/plugins/template/src/sync_infer_request.cpp
+++ b/src/plugins/template/src/sync_infer_request.cpp
@@ -206,7 +206,10 @@ void ov::template_plugin::InferRequest::infer_preprocess() {
 void ov::template_plugin::InferRequest::start_pipeline() {
    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, m_profiling_task[StartPipeline])
    auto start = Time::now();
-    m_executable->call(m_backend_output_tensors, m_backend_input_tensors, m_eval_context);
+    m_executable->call(m_backend_output_tensors,
+                       m_backend_input_tensors,
+                       m_eval_context,
+                       get_template_model()->m_cfg.perf_count);
    m_durations[StartPipeline] = Time::now() - start;
 }
 // ! [infer_request:start_pipeline]
@@ -268,3 +271,9 @@ std::vector<ov::ProfilingInfo> ov::template_plugin::InferRequest::get_profiling_
    return info;
 }
 // ! [infer_request:get_profiling_info]
+
+// ! [infer_request:cancel]
+void ov::template_plugin::InferRequest::cancel() {
+    m_executable->cancel();
+}
+// ! [infer_request:cancel]
--- a/src/plugins/template/src/sync_infer_request.hpp
+++ b/src/plugins/template/src/sync_infer_request.hpp
@@ -39,6 +39,7 @@ public:
    void start_pipeline();
    void wait_pipeline();
    void infer_postprocess();
+    void cancel();

    void set_tensors_impl(const ov::Output<const ov::Node> port, const std::vector<ov::Tensor>& tensors) override;

--- a/src/plugins/template/tests/functional/shared_tests_instances/behavior/executable_network/get_metric.cpp
+++ b/src/plugins/template/tests/functional/shared_tests_instances/behavior/executable_network/get_metric.cpp
@@ -2,12 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include <utility>
-#include <string>
-#include <vector>
-
 #include "behavior/executable_network/get_metric.hpp"

+#include <string>
+#include <utility>
+#include <vector>
+
 using namespace BehaviorTestsDefinitions;

 namespace {
@@ -15,58 +15,54 @@ namespace {
 // Executable Network GetMetric
 //

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest,
+                         IEClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest,
+                         IEClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_NETWORK_NAME,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest,
+                         IEClassExecutableNetworkGetMetricTest_NETWORK_NAME,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest,
+                         IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported, IEClassExecutableNetworkGetMetricTest,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported,
+                         IEClassExecutableNetworkGetMetricTest,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE, "MULTI:TEMPLATE", "HETERO:TEMPLATE"));
 //
 // Executable Network GetConfig / SetConfig
 //

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassExecutableNetworkGetConfigTest, IEClassExecutableNetworkGetConfigTest,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetConfigTest,
+                         IEClassExecutableNetworkGetConfigTest,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassExecutableNetworkSetConfigTest, IEClassExecutableNetworkSetConfigTest,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkSetConfigTest,
+                         IEClassExecutableNetworkSetConfigTest,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));

 //
 // Hetero Executable Network GetMetric
 //

-#ifdef ENABLE_INTEL_CPU
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassHeteroExecutableNetworlGetMetricTest,
+                         IEClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassHeteroExecutableNetworlGetMetricTest, IEClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassHeteroExecutableNetworlGetMetricTest,
+                         IEClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_METRICS,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassHeteroExecutableNetworlGetMetricTest, IEClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_METRICS,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassHeteroExecutableNetworlGetMetricTest,
+                         IEClassHeteroExecutableNetworkGetMetricTest_NETWORK_NAME,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));

-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassHeteroExecutableNetworlGetMetricTest, IEClassHeteroExecutableNetworkGetMetricTest_NETWORK_NAME,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
-
-INSTANTIATE_TEST_SUITE_P(
-        smoke_IEClassHeteroExecutableNetworlGetMetricTest, IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
-
-#endif  // ENABLE_INTEL_CPU
-} // namespace
+INSTANTIATE_TEST_SUITE_P(smoke_IEClassHeteroExecutableNetworlGetMetricTest,
+                         IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK,
+                         ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
+}  // namespace
--- a/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
+++ b/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp
@@ -100,7 +100,7 @@ INSTANTIATE_TEST_SUITE_P(
       ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
 INSTANTIATE_TEST_SUITE_P(
        smoke_OVClassHeteroExecutableNetworkGetMetricTest, OVClassHeteroExecutableNetworkGetMetricTest_EXEC_DEVICES,
-        ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE));
+        ::testing::Values("TEMPLATE.0"));
 //////////////////////////////////////////////////////////////////////////////////////////

 } // namespace
--- a/src/plugins/template/tests/functional/skip_tests_config.cpp
+++ b/src/plugins/template/tests/functional/skip_tests_config.cpp
@@ -9,10 +9,6 @@

 std::vector<std::string> disabledTestPatterns() {
    std::vector<std::string> retVector{
-        // CVS-66280
-        R"(.*canLoadCorrectNetworkAndCheckConfig.*)",
-        R"(.*canSetCorrectConfigLoadNetworkAndCheckConfig.*)",
-        //
        R"(.*ExclusiveAsyncRequests.*)",
        R"(.*ReusableCPUStreamsExecutor.*)",
        R"(.*SplitLayerTest.*numSplits=30.*)",
@@ -20,17 +16,8 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*InferRequestPreprocessConversionTest.*oLT=(NHWC|NCHW).*)",
        R"(.*InferRequestPreprocessDynamicallyInSetBlobTest.*oPRC=0.*oLT=1.*)",
        // Not Implemented
-        R"(.*Behavior.*ExecutableNetworkBaseTest.*(canSetConfigToExecNet|canSetConfigToExecNetAndCheckConfigAndCheck).*)",
-        R"(.*OVCompiledModelBaseTest.*(CanSetConfigToExecNet|canSetConfigToExecNetAndCheckConfigAndCheck).*)",
-        R"(.*Behavior.*ExecutableNetworkBaseTest.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution|CheckExecGraphInfoSerialization).*)",
-        R"(.*Behavior.*OVCompiledModelBaseTest.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution).*)",
-        R"(.*Behavior.*ExecutableNetworkBaseTest.*canExport.*)",
-        R"(.*Behavior.*OVCompiledModelBaseTest.*canExport.*)",
-        R"(.*Behavior.*ExecutableNetworkBaseTest.*(CanCreateTwoExeNetworksAndCheckFunction).*)",
-        R"(.*Behavior.*OVCompiledModelBaseTest.*(CanCreateTwoExeNetworksAndCheckFunction).*)",
-        R"(.*Behavior.*ExecutableNetworkBaseTest.*(checkGetExecGraphInfoIsNotNullptr).*)",
-        R"(.*Behavior.*OVCompiledModelBaseTest.*(checkGetExecGraphInfoIsNotNullptr).*)",
-        R"(.*LoadNetworkCreateDefaultExecGraphResult.*)",
+        R"(.*(Multi|Auto|Hetero).*Behavior.*OVCompiledModelBaseTest.*(CheckExecGraphInfoBeforeExecution|CheckExecGraphInfoAfterExecution).*)",
+        R"(.*(Multi|Auto|Hetero).*Behavior.*OVCompiledModelBaseTest.*(checkGetExecGraphInfoIsNotNullptr).*)",
        R"(.*OVClassExecutableNetworkGetMetricTest_EXEC_DEVICES.*CanGetExecutionDeviceInfo.*)",
        R"(.*OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS.*GetMetricNoThrow.*)",
        R"(.*OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_METRICS.*GetMetricNoThrow.*)",
--- a/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp
@@ -77,7 +77,9 @@ protected:
    ov::AnyMap configuration;
    std::shared_ptr<ov::Model> function;

-    void set_api_entity() override { api_entity = ov::test::utils::ov_entity::ov_compiled_model; }
+    void set_api_entity() override {
+        api_entity = ov::test::utils::ov_entity::ov_compiled_model;
+    }
 };

 using OVAutoExecutableNetworkTest = OVCompiledModelBaseTest;
@@ -147,8 +149,6 @@ TEST(OVCompiledModelBaseTest, canCompileModelToDefaultDevice) {
    EXPECT_NO_THROW(auto execNet = core->compile_model(function));
 }

-
-
 TEST_P(OVCompiledModelBaseTestOptional, canCompileModelAndCreateInferRequest) {
    auto execNet = core->compile_model(function, target_device, configuration);
    EXPECT_NO_THROW(auto req = execNet.create_infer_request());
@@ -201,8 +201,7 @@ TEST_P(OVCompiledModelBaseTest, CanGetOutputsInfoAndCheck) {
    }
    auto results = function->get_results();
    for (const auto& param : results) {
-        EXPECT_NE(std::find(resVec.begin(), resVec.end(), param->get_output_tensor(0).get_any_name()),
-                  resVec.end());
+        EXPECT_NE(std::find(resVec.begin(), resVec.end(), param->get_output_tensor(0).get_any_name()), resVec.end());
    }
 }

@@ -218,7 +217,7 @@ TEST_P(OVCompiledModelBaseTestOptional, CheckExecGraphInfoBeforeExecution) {
    int constCnt = 0;

    std::shared_ptr<const ngraph::Function> getFunction = std::dynamic_pointer_cast<const ngraph::Function>(execGraph);
-    EXPECT_NE(getFunction, nullptr);
+    ASSERT_NE(getFunction, nullptr);

    for (const auto& op : getFunction->get_ops()) {
        const ov::RTMap& rtInfo = op->get_rt_info();
@@ -260,6 +259,7 @@ TEST_P(OVCompiledModelBaseTestOptional, CheckExecGraphInfoAfterExecution) {
    std::shared_ptr<const ov::Model> execGraph;
    // Load CNNNetwork to target plugins
    auto execNet = core->compile_model(function, target_device, configuration);
+    execNet.create_infer_request().infer();
    EXPECT_NO_THROW(execGraph = execNet.get_runtime_model());
    std::map<std::string, int> originalLayersMap;
    for (const auto& layer : function->get_ops()) {
@@ -269,7 +269,7 @@ TEST_P(OVCompiledModelBaseTestOptional, CheckExecGraphInfoAfterExecution) {
    // Store all the layers from the executable graph information represented as CNNNetwork
    bool hasOpWithValidTime = false;
    auto getFunction = std::dynamic_pointer_cast<const ngraph::Function>(execGraph);
-    EXPECT_NE(nullptr, getFunction);
+    ASSERT_NE(nullptr, getFunction);

    for (const auto& op : getFunction->get_ops()) {
        const auto& rtInfo = op->get_rt_info();
--- a/src/tests/functional/plugin/shared/include/behavior/executable_network/exec_network_base.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/executable_network/exec_network_base.hpp
@@ -211,6 +211,7 @@ TEST_P(ExecutableNetworkBaseTest, CheckExecGraphInfoAfterExecution) {
    InferenceEngine::CNNNetwork execGraph;
    // Load CNNNetwork to target plugins
    auto execNet = ie->LoadNetwork(cnnNet, target_device, configuration);
+    execNet.CreateInferRequest().Infer();
    ASSERT_NO_THROW(execGraph = execNet.GetExecGraphInfo());
    std::map<std::string, int> originalLayersMap;
    for (const auto &layer : function->get_ops()) {