Expand time tests to cover CPU caching feature (#8938)

* add performance hint to time infer * add options for vpu, cpu cache, perf hint * change VPUX condition * add perf hint verify * upd perf hint usage. set as non required
2021-12-08 15:22:44 +03:00 · 2021-12-08 15:22:44 +03:00 · 55f64a505a
commit 55f64a505a
parent 893dd4531e
9 changed files with 450 additions and 211 deletions
--- a/tests/time_tests/.automation/desktop_test_config_cache.yml
+++ b/tests/time_tests/.automation/desktop_test_config_cache.yml
@ -30,3 +30,243 @@
    precision: FP16-INT8
    framework: caffe2
  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/faster_rcnn_resnet101_coco/tf/FP16/faster_rcnn_resnet101_coco.xml
+    name: faster_rcnn_resnet101_coco
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/faster_rcnn_resnet101_coco/tf/FP16-INT8/faster_rcnn_resnet101_coco.xml
+    name: faster_rcnn_resnet101_coco
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml
+    name: faster-rcnn-resnet101-coco-sparse-60-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml
+    name: faster-rcnn-resnet101-coco-sparse-60-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml
+    name: googlenet-v1
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml
+    name: googlenet-v1
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml
+    name: googlenet-v3
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml
+    name: googlenet-v3
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml
+    name: ssd512
+    precision: FP16
+    framework: caffe
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml
+    name: ssd512
+    precision: FP16-INT8
+    framework: caffe
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml
+    name: yolo-v2-ava-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml
+    name: yolo-v2-ava-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml
+    name: yolo-v2-ava-sparse-35-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml
+    name: yolo-v2-ava-sparse-35-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml
+    name: yolo-v2-ava-sparse-70-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml
+    name: yolo-v2-ava-sparse-70-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml
+    name: yolo-v2-tiny-ava-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml
+    name: yolo-v2-tiny-ava-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml
+    name: yolo-v2-tiny-ava-sparse-30-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml
+    name: yolo-v2-tiny-ava-sparse-30-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml
+    name: yolo-v2-tiny-ava-sparse-60-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml
+    name: yolo-v2-tiny-ava-sparse-60-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe2/FP16/squeezenet1.1.xml
+    name: squeezenet1.1
+    precision: FP16
+    framework: caffe2
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe2/FP16-INT8/squeezenet1.1.xml
+    name: squeezenet1.1
+    precision: FP16-INT8
+    framework: caffe2
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml
+    name: icnet-camvid-ava-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml
+    name: icnet-camvid-ava-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml
+    name: icnet-camvid-ava-sparse-30-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml
+    name: icnet-camvid-ava-sparse-30-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml
+    name: icnet-camvid-ava-sparse-60-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml
+    name: icnet-camvid-ava-sparse-60-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
--- a/tests/time_tests/scripts/run_timetest.py
+++ b/tests/time_tests/scripts/run_timetest.py
@ -58,15 +58,33 @@ def aggregate_stats(stats: dict):

 def prepare_executable_cmd(args: dict):
    """Generate common part of cmd from arguments to execute"""
-    return [str(args["executable"].resolve(strict=True)),
-            "-m", str(args["model"].resolve(strict=True)),
-            "-d", args["device"]]
+    return [
+        str(args["executable"].resolve(strict=True)),
+        "-m", str(args["model"].resolve(strict=True)),
+        "-d", args["device"],
+        "-p", args["perf_hint"],
+        "-v" if args["vpu_compiler"] else "", args['vpu_compiler'] if args["vpu_compiler"] else "",
+        "-c" if args["cpu_cache"] else "",
+    ]
+
+
+def get_cache_stats(flatten_data):
+    """Update statistics for run with models cache"""
+    data_cache = {
+        "full_run_using_cache": flatten_data["full_run"],
+        "time_to_inference_using_cache": flatten_data["time_to_inference"],
+        "load_plugin": flatten_data["load_plugin"],
+        "load_network_using_cache": flatten_data["load_network"],
+        "first_inference": flatten_data["first_inference"],
+        "fill_inputs": flatten_data["fill_inputs"],
+    }
+    return data_cache


 def run_timetest(args: dict, log=None):
    """Run provided executable several times and aggregate collected statistics"""
    if log is None:
-        log = logging.getLogger('run_timetest')
+        log = logging.getLogger("run_timetest")

    cmd_common = prepare_executable_cmd(args)

@ -90,6 +108,9 @@ def run_timetest(args: dict, log=None):
        flatten_data = {}
        parse_stats(raw_data[0], flatten_data)

+        if run_iter > 0 and args["cpu_cache"]:
+            flatten_data = get_cache_stats(flatten_data)
+
        log.debug(f"Statistics after run of executable #{run_iter}: {flatten_data}")

        # Combine statistics from several runs
@ -108,29 +129,45 @@ def run_timetest(args: dict, log=None):

 def cli_parser():
    """parse command-line arguments"""
-    parser = argparse.ArgumentParser(description='Run timetest executable')
-    parser.add_argument('executable',
+    parser = argparse.ArgumentParser(description="Run timetest executable")
+    parser.add_argument("executable",
                        type=Path,
-                        help='binary to execute')
-    parser.add_argument('-m',
+                        help="Binary to execute")
+    parser.add_argument("-m",
                        required=True,
                        dest="model",
                        type=Path,
-                        help='path to an .xml/.onnx file with a trained model or'
-                             ' to a .blob files with a trained compiled model')
-    parser.add_argument('-d',
+                        help="Path to an .xml/.onnx file with a trained model or"
+                             " to a .blob files with a trained compiled model")
+    parser.add_argument("-d",
                        required=True,
                        dest="device",
                        type=str,
-                        help='target device to infer on')
-    parser.add_argument('-niter',
+                        help="Target device to infer on")
+    parser.add_argument("-niter",
                        default=10,
                        type=check_positive_int,
-                        help='number of times to execute binary to aggregate statistics of')
-    parser.add_argument('-s',
+                        help="Number of times to execute binary to aggregate statistics of")
+    parser.add_argument("-s",
                        dest="stats_path",
                        type=Path,
-                        help='path to a file to save aggregated statistics')
+                        help="path to a file to save aggregated statistics")
+    parser.add_argument("-p",
+                        dest="perf_hint",
+                        choices=["LATENCY", "THROUGHPUT"],
+                        default="LATENCY",
+                        type=str,
+                        help="Enables performance hint for specified device. Default hint is LATENCY")
+    exclusive_group = parser.add_mutually_exclusive_group(required=False)
+    exclusive_group.add_argument("-c",
+                                 dest="cpu_cache",
+                                 action="store_true",
+                                 help="Enable CPU model cache usage")
+    exclusive_group.add_argument("-v",
+                                 dest="vpu_compiler",
+                                 choices=["MCM", "MLIR"],
+                                 type=str,
+                                 help="Change VPUX compiler type")

    args = parser.parse_args()

@ -143,6 +180,12 @@ if __name__ == "__main__":
    logging.basicConfig(format="[ %(levelname)s ] %(message)s",
                        level=logging.DEBUG, stream=sys.stdout)

+    assert not (args.cpu_cache and args.device != "CPU"), \
+        "The cache option is used only for the CPU device."
+
+    assert not (args.vpu_compiler and "VPUX" not in args.device), \
+        "The VPUX compiler option is used only for the VPUX device."
+
    exit_code, _, aggr_stats, _ = run_timetest(
        dict(args._get_kwargs()), log=logging)  # pylint: disable=protected-access
    if args.stats_path:
@ -159,15 +202,15 @@ if __name__ == "__main__":

 def test_timetest_parser():
    # Example of timetest yml file
-    raw_data_example = [{'full_run': [1, {'first_inference_latency': [2, {'load_plugin': [3]}, {
-        'create_exenetwork': [4, {'read_network': [5]}, {'load_network': [6]}]}]},
-                              {'first_inference': [7, {'fill_inputs': [8]}]}]}]
+    raw_data_example = [{"full_run": [1, {"first_inference_latency": [2, {"load_plugin": [3]}, {
+        "create_exenetwork": [4, {"read_network": [5]}, {"load_network": [6]}]}]},
+                              {"first_inference": [7, {"fill_inputs": [8]}]}]}]

    # Refactoring raw data from yml
    flatten_dict = {}
    parse_stats(raw_data_example, flatten_dict)

-    expected_result = {'full_run': 1, 'first_inference_latency': 2, 'load_plugin': 3, 'create_exenetwork': 4,
-                       'read_network': 5, 'load_network': 6, 'first_inference': 7, 'fill_inputs': 8}
+    expected_result = {"full_run": 1, "first_inference_latency": 2, "load_plugin": 3, "create_exenetwork": 4,
+                       "read_network": 5, "load_network": 6, "first_inference": 7, "fill_inputs": 8}

    assert flatten_dict == expected_result, "Statistics parsing is performed incorrectly!"
--- a/tests/time_tests/src/timetests/timetest_infer.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer.cpp
@ -17,51 +17,87 @@ using namespace InferenceEngine;
 * main(). The function should not throw any exceptions and responsible for
 * handling it by itself.
 */
-int runPipeline(const std::string &model, const std::string &device) {
-  auto pipeline = [](const std::string &model, const std::string &device) {
+int runPipeline(const std::string &model, const std::string &device, const std::string &performanceHint,
+                const bool isCacheEnabled, const std::string &vpuCompiler) {
+  auto pipeline = [](const std::string &model, const std::string &device, const std::string &performanceHint,
+                     const bool isCacheEnabled, const std::string &vpuCompiler) {
    Core ie;
    CNNNetwork cnnNetwork;
    ExecutableNetwork exeNetwork;
    InferRequest inferRequest;
    size_t batchSize = 0;

+    if (!performanceHint.empty()) {
+      std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+
+      // enables performance hint for specified device
+      std::string performanceConfig;
+      if (performanceHint == "THROUGHPUT")
+        performanceConfig = CONFIG_VALUE(THROUGHPUT);
+      else if (performanceHint == "LATENCY")
+        performanceConfig = CONFIG_VALUE(LATENCY);
+
+      if (std::find(supported_config_keys.begin(), supported_config_keys.end(), "PERFORMANCE_HINT") ==
+          supported_config_keys.end()) {
+        std::cerr << "Device " << device << " doesn't support config key 'PERFORMANCE_HINT'!\n"
+                  << "Performance config was not set.";
+      }
+      else
+        ie.SetConfig({{CONFIG_KEY(PERFORMANCE_HINT), performanceConfig}}, device);
+    }
+
+    // set config for VPUX device
+    std::map<std::string, std::string> vpuConfig = {};
+    if (vpuCompiler == "MCM")
+      vpuConfig = {{"VPUX_COMPILER_TYPE", "MCM"}};
+    else if (vpuCompiler == "MLIR")
+      vpuConfig = {{"VPUX_COMPILER_TYPE", "MLIR"}};
+
+    // first_inference_latency = time_to_inference + first_inference
    {
-      SCOPED_TIMER(first_inference_latency);
+      SCOPED_TIMER(time_to_inference);
      {
        SCOPED_TIMER(load_plugin);
        ie.GetVersions(device);
-        // enables performance hint for specified device
-        ie.SetConfig({{CONFIG_KEY(PERFORMANCE_HINT), CONFIG_VALUE(LATENCY)}}, device);
+
+        if (isCacheEnabled)
+          ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
      }
      {
-        SCOPED_TIMER(create_exenetwork);
-        if (TimeTest::fileExt(model) == "blob") {
-          SCOPED_TIMER(import_network);
-          exeNetwork = ie.ImportNetwork(model, device);
+        if (!isCacheEnabled) {
+          SCOPED_TIMER(create_exenetwork);
+
+          if (TimeTest::fileExt(model) == "blob") {
+            SCOPED_TIMER(import_network);
+            exeNetwork = ie.ImportNetwork(model, device);
+          }
+          else {
+            {
+              SCOPED_TIMER(read_network);
+              cnnNetwork = ie.ReadNetwork(model);
+              batchSize = cnnNetwork.getBatchSize();
+            }
+
+            {
+              SCOPED_TIMER(load_network);
+              exeNetwork = ie.LoadNetwork(cnnNetwork, device, vpuConfig);
+            }
+          }
        }
        else {
-          {
-            SCOPED_TIMER(read_network);
-            cnnNetwork = ie.ReadNetwork(model);
-            batchSize = cnnNetwork.getBatchSize();
-          }
-
-          {
-            SCOPED_TIMER(load_network);
-            exeNetwork = ie.LoadNetwork(cnnNetwork, device);
-          }
+          SCOPED_TIMER(load_network);
+          exeNetwork = ie.LoadNetwork(model, device);
        }
      }
+      inferRequest = exeNetwork.CreateInferRequest();
    }

    {
      SCOPED_TIMER(first_inference);
-      inferRequest = exeNetwork.CreateInferRequest();
-
      {
-        SCOPED_TIMER(fill_inputs)
-        batchSize = batchSize != 0 ? batchSize : 1;
+        SCOPED_TIMER(fill_inputs);
        const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
+        batchSize = batchSize != 0 ? batchSize : 1;
        fillBlobs(inferRequest, inputsInfo, batchSize);
      }
      inferRequest.Infer();
@ -69,7 +105,7 @@ int runPipeline(const std::string &model, const std::string &device) {
  };

  try {
-    pipeline(model, device);
+    pipeline(model, device, performanceHint, isCacheEnabled, vpuCompiler);
  } catch (const InferenceEngine::Exception &iex) {
    std::cerr
        << "Inference Engine pipeline failed with Inference Engine exception:\n"
--- a/tests/time_tests/src/timetests/timetest_infer_cache.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer_cache.cpp
@ -1,68 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <inference_engine.hpp>
-#include <ie_plugin_config.hpp>
-#include <iostream>
-
-#include "common_utils.h"
-#include "timetests_helper/timer.h"
-#include "timetests_helper/utils.h"
-using namespace InferenceEngine;
-
-
-/**
- * @brief Function that contain executable pipeline which will be called from
- * main(). The function should not throw any exceptions and responsible for
- * handling it by itself.
- */
-int runPipeline(const std::string &model, const std::string &device) {
-  auto pipeline = [](const std::string &model, const std::string &device) {
-    Core ie;
-    CNNNetwork cnnNetwork;
-    ExecutableNetwork exeNetwork;
-    InferRequest inferRequest;
-
-    {
-      SCOPED_TIMER(first_inference_latency);
-      {
-        SCOPED_TIMER(load_plugin);
-        ie.GetVersions(device);
-      }
-      {
-        SCOPED_TIMER(load_network);
-        // enables cache
-        ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
-        exeNetwork = ie.LoadNetwork(model, device);
-      }
-      {
-        SCOPED_TIMER(first_inference);
-        inferRequest = exeNetwork.CreateInferRequest();
-        {
-          SCOPED_TIMER(fill_inputs)
-          const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
-          fillBlobs(inferRequest, inputsInfo, 1);
-        }
-        inferRequest.Infer();
-      }
-    }
-  };
-
-  try {
-    pipeline(model, device);
-  } catch (const InferenceEngine::Exception &iex) {
-    std::cerr
-        << "Inference Engine pipeline failed with Inference Engine exception:\n"
-        << iex.what();
-    return 1;
-  } catch (const std::exception &ex) {
-    std::cerr << "Inference Engine pipeline failed with exception:\n"
-              << ex.what();
-    return 2;
-  } catch (...) {
-    std::cerr << "Inference Engine pipeline failed\n";
-    return 3;
-  }
-  return 0;
-}
--- a/tests/time_tests/src/timetests/timetest_infer_vpu_mlir_compiler.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer_vpu_mlir_compiler.cpp
@ -1,84 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <inference_engine.hpp>
-#include <iostream>
-
-#include "common_utils.h"
-#include "timetests_helper/timer.h"
-#include "timetests_helper/utils.h"
-using namespace InferenceEngine;
-
-
-/**
- * @brief Function that contain executable pipeline which will be called from
- * main(). The function should not throw any exceptions and responsible for
- * handling it by itself.
- */
-int runPipeline(const std::string &model, const std::string &device) {
-  auto pipeline = [](const std::string &model, const std::string &device) {
-    Core ie;
-    CNNNetwork cnnNetwork;
-    ExecutableNetwork exeNetwork;
-    InferRequest inferRequest;
-    size_t batchSize = 0;
-
-    {
-      SCOPED_TIMER(first_inference_latency);
-      {
-        SCOPED_TIMER(load_plugin);
-        ie.GetVersions(device);
-      }
-      {
-        SCOPED_TIMER(create_exenetwork);
-        if (TimeTest::fileExt(model) == "blob") {
-          SCOPED_TIMER(import_network);
-          exeNetwork = ie.ImportNetwork(model, device);
-        }
-        else {
-          {
-            SCOPED_TIMER(read_network);
-            cnnNetwork = ie.ReadNetwork(model);
-            batchSize = cnnNetwork.getBatchSize();
-          }
-
-          {
-            SCOPED_TIMER(load_network);
-            exeNetwork = ie.LoadNetwork(cnnNetwork, device, {{"VPUX_COMPILER_TYPE", "MLIR"}});
-          }
-        }
-      }
-    }
-
-    {
-      SCOPED_TIMER(first_inference);
-      inferRequest = exeNetwork.CreateInferRequest();
-
-      {
-        SCOPED_TIMER(fill_inputs)
-        batchSize = batchSize != 0 ? batchSize : 1;
-        const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
-        fillBlobs(inferRequest, inputsInfo, batchSize);
-      }
-      inferRequest.Infer();
-    }
-  };
-
-  try {
-    pipeline(model, device);
-  } catch (const InferenceEngine::Exception &iex) {
-    std::cerr
-        << "Inference Engine pipeline failed with Inference Engine exception:\n"
-        << iex.what();
-    return 1;
-  } catch (const std::exception &ex) {
-    std::cerr << "Inference Engine pipeline failed with exception:\n"
-              << ex.what();
-    return 2;
-  } catch (...) {
-    std::cerr << "Inference Engine pipeline failed\n";
-    return 3;
-  }
-  return 0;
-}
--- a/tests/time_tests/src/timetests_helper/cli.h
+++ b/tests/time_tests/src/timetests_helper/cli.h
@ -26,6 +26,18 @@ static const char target_device_message[] =
    "plugin. "
    "The application looks for a suitable plugin for the specified device.";

+/// @brief message for vpu argument
+static const char performance_hint_message[] =
+    "Not required. Enables performance hint for specified device. Available hints are LATENCY and THROUGHPUT.";
+
+/// @brief message for cache argument
+static const char cpu_cache_message[] =
+    "Not required. Use this key to run timetests with CPU models caching.";
+
+/// @brief message for vpu argument
+static const char vpu_compiler_message[] =
+    "Not required. Use this key to run timetests using MLIR or MCM VPUX compiler type.";
+
 /// @brief message for statistics path argument
 static const char statistics_path_message[] =
    "Required. Path to a file to write statistics.";
@ -44,6 +56,18 @@ DEFINE_string(m, "", model_message);
 /// It is a required parameter
 DEFINE_string(d, "", target_device_message);

+/// @brief Define parameter for set performance hint for target device <br>
+/// It is a non-required parameter
+DEFINE_string(p, "", performance_hint_message);
+
+/// @brief Define parameter for set CPU models caching <br>
+/// It is a non-required parameter
+DEFINE_bool(c, false, cpu_cache_message);
+
+/// @brief Define parameter VPU compiler type <br>
+/// It is a non-required parameter
+DEFINE_string(v, "", vpu_compiler_message);
+
 /// @brief Define parameter for set path to a file to write statistics <br>
 /// It is a required parameter
 DEFINE_string(s, "", statistics_path_message);
@ -56,10 +80,13 @@ static void showUsage() {
  std::cout << "TimeTests [OPTION]" << std::endl;
  std::cout << "Options:" << std::endl;
  std::cout << std::endl;
-  std::cout << "    -h, --help                " << help_message << std::endl;
+  std::cout << "    -h, --help                  " << help_message << std::endl;
  std::cout << "    -m \"<path>\"               " << model_message << std::endl;
  std::cout << "    -d \"<device>\"             " << target_device_message
            << std::endl;
  std::cout << "    -s \"<path>\"               " << statistics_path_message
            << std::endl;
+  std::cout << "    -p \"<perf_hint>\"          " << performance_hint_message << std::endl;
+  std::cout << "    -c                          " << cpu_cache_message << std::endl;
+  std::cout << "    -v \"<compiler_type>\"      " << vpu_compiler_message << std::endl;
 }
--- a/tests/time_tests/src/timetests_helper/main.cpp
+++ b/tests/time_tests/src/timetests_helper/main.cpp
@ -8,7 +8,8 @@

 #include <iostream>

-int runPipeline(const std::string &model, const std::string &device);
+int runPipeline(const std::string &model, const std::string &device, const std::string &performanceHint,
+                const bool isCacheEnabled, const std::string &vpuCompiler);

 /**
 * @brief Parses command line and check required arguments
@ -40,7 +41,7 @@ bool parseAndCheckCommandLine(int argc, char **argv) {
 */
 int _runPipeline() {
  SCOPED_TIMER(full_run);
-  return runPipeline(FLAGS_m, FLAGS_d);
+  return runPipeline(FLAGS_m, FLAGS_d, FLAGS_p, FLAGS_c, FLAGS_v);
 }

 /**
@ -54,4 +55,4 @@ int main(int argc, char **argv) {
  StatisticsWriter::Instance().setFile(FLAGS_s);
  StatisticsWriter::Instance().write();
  return status;
-}
+}
--- a/tests/time_tests/test_runner/conftest.py
+++ b/tests/time_tests/test_runner/conftest.py
@ -43,7 +43,7 @@ def pytest_addoption(parser):
    test_args_parser.addoption(
        "--test_conf",
        type=Path,
-        help="path to a test config",
+        help="Path to a test config",
        default=Path(__file__).parent / "test_config.yml"
    )
    test_args_parser.addoption(
@ -51,20 +51,38 @@ def pytest_addoption(parser):
        required=True,
        dest="executable",
        type=Path,
-        help="path to a timetest binary to execute"
+        help="Path to a timetest binary to execute"
    )
    test_args_parser.addoption(
        "--niter",
        type=check_positive_int,
-        help="number of iterations to run executable and aggregate results",
+        help="Number of iterations to run executable and aggregate results",
        default=3
    )
+    test_args_parser.addoption(
+        "--cpu_cache",
+        action='store_true',
+        help="Enable model CPU cache usage",
+    )
+    test_args_parser.addoption(
+        "--perf_hint",
+        choices=['LATENCY', 'THROUGHPUT'],
+        default='LATENCY',
+        type=str,
+        help='Enables performance hint for specified device. Default hint is LATENCY'
+    )
+    test_args_parser.addoption(
+        "--vpu_compiler",
+        choices=["MCM", "MLIR"],
+        type=str,
+        help="Change VPUX compiler type",
+    )
    db_args_parser = parser.getgroup("timetest database use")
    db_args_parser.addoption(
        '--db_submit',
        metavar="RUN_ID",
        type=str,
-        help='submit results to the database. ' \
+        help='Submit results to the database. ' \
             '`RUN_ID` should be a string uniquely identifying the run' \
             ' (like Jenkins URL or time)'
    )
@ -79,19 +97,21 @@ def pytest_addoption(parser):
        '--db_collection',
        type=str,
        required=is_db_used,
-        help='collection name in database',
+        help='Collection name in database',
        choices=DB_COLLECTIONS
    )
    db_args_parser.addoption(
        '--db_metadata',
        type=str,
        default=None,
-        help='path to JSON-formatted file to extract additional information')
+        help='Path to JSON-formatted file to extract additional information'
+    )
    db_args_parser.addoption(
        '--manifest',
        type=Path,
        required=is_db_used,
-        help='path to build manifest to extract commit information')
+        help='Path to build manifest to extract commit information'
+    )


@pytest.fixture(scope="session")
@ -112,8 +132,26 @@ def niter(request):
    return request.config.getoption('niter')


+@pytest.fixture(scope="session")
+def cpu_cache(request):
+    """Fixture function for command-line option."""
+    return request.config.getoption('cpu_cache')
+
+
+@pytest.fixture(scope="session")
+def perf_hint(request):
+    """Fixture function for command-line option."""
+    return request.config.getoption('perf_hint')
+
+
+@pytest.fixture(scope="session")
+def vpu_compiler(request):
+    """Fixture function for command-line option."""
+    return request.config.getoption('vpu_compiler')
+
 # -------------------- CLI options --------------------

+
@pytest.fixture(scope="function")
 def temp_dir(pytestconfig):
    """Create temporary directory for test purposes.
--- a/tests/time_tests/test_runner/test_timetest.py
+++ b/tests/time_tests/test_runner/test_timetest.py
@ -34,14 +34,17 @@ from scripts.run_timetest import run_timetest
 REFS_FACTOR = 1.2      # 120%


-def test_timetest(instance, executable, niter, cl_cache_dir, model_cache_dir, test_info, temp_dir, validate_test_case,
-                  prepare_db_info):
+def test_timetest(instance, executable, niter, cl_cache_dir, cpu_cache, vpu_compiler, perf_hint, model_cache_dir,
+                  test_info, temp_dir, validate_test_case, prepare_db_info):
    """Parameterized test.

    :param instance: test instance. Should not be changed during test run
    :param executable: timetest executable to run
    :param niter: number of times to run executable
    :param cl_cache_dir: directory to store OpenCL cache
+    :param cpu_cache: flag to enable model CPU cache
+    :param vpu_compiler: flag to change VPUX compiler type
+    :param perf_hint: performance hint (optimize device for latency or throughput settings)
    :param model_cache_dir: directory to store IE model cache
    :param test_info: custom `test_info` field of built-in `request` pytest fixture
    :param temp_dir: path to a temporary directory. Will be cleaned up after test run
@ -63,7 +66,10 @@ def test_timetest(instance, executable, niter, cl_cache_dir, model_cache_dir, te
        "executable": Path(executable),
        "model": Path(model_path),
        "device": instance["device"]["name"],
-        "niter": niter
+        "niter": niter,
+        "perf_hint": perf_hint,
+        "cpu_cache": cpu_cache,
+        "vpu_compiler": vpu_compiler if vpu_compiler else ""
    }
    logging.info("Run timetest once to generate any cache")
    retcode, msg, _, _ = run_timetest({**exe_args, "niter": 1}, log=logging)