[TF Hub][Perf Tests]Implement performance tests in precommit to compare read_model and convert_model paths (#21023)

* begin * fixes * measure time and compare * fix bug + update precommint models * cleanup * add wget to install requirements; add to linux.yml * fixes + improvements * output results to html * remove unneeded code * fix * code review fixes * store downloaded models in cache * use 1000 runs * use model paths from tf-hub tests * use tf hub api to download instead of wget; measure some time stats * small fixes * remove unneeded files * use own list of models * remove uneeded if * remove unstable models * remove unstable models * fix requirements * code review fixes * Update .github/workflows/linux.yml Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> * fix round_num function * remove unstable network * Update tests/model_hub_tests/models_hub_common/test_performance_model.py Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> * code review fixes * build fix * code review fixes * code review fixes * code review fixes * Update tests/model_hub_tests/models_hub_common/test_performance_model.py Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> * Update tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> * Update tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> * Update tests/model_hub_tests/models_hub_common/test_performance_model.py Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> * code review fixes: use autoflake * remove unneeded tensorflow_text * Update .github/workflows/linux.yml * Update linux.yml Added TensorFlow_Hub_Performance_Models_Tests to final status check --------- Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
2023-11-24 15:13:44 +01:00 · 2023-11-24 15:13:44 +01:00 · 965313ba0b
commit 965313ba0b
parent e087ed083c
8 changed files with 426 additions and 1 deletions
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@ -1357,6 +1357,91 @@ jobs:
            ${{ env.INSTALL_TEST_DIR }}/TEST*.html
          if-no-files-found: 'error'
  TensorFlow_Hub_Performance_Models_Tests:
    name: TensorFlow Hub Performance Models tests
    needs: [Build, Smart_CI]
    defaults:
      run:
        shell: bash
    runs-on: ${{ github.event_name == 'schedule' && 'ubuntu-20.04-16-cores' || 'ubuntu-20.04-8-cores'}}
    timeout-minutes: ${{ github.event_name == 'schedule' && 400 || 5 }}
    env:
      OPENVINO_REPO: ${{ github.workspace }}/openvino
      INSTALL_DIR: ${{ github.workspace }}/install
      INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
      MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests
    if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test || 
        fromJSON(needs.smart_ci.outputs.affected_components).TFL_FE.test
    steps:
      - name: Check sudo
        run: if [ "$(id -u)" -eq 0 ]; then apt update && apt --assume-yes install sudo; fi
      - name: Download OpenVINO package
        uses: actions/download-artifact@v3
        with:
          name: openvino_package
          path: ${{ env.INSTALL_DIR }}
      - name: Download OpenVINO tests package
        uses: actions/download-artifact@v3
        with:
          name: openvino_tests
          path: ${{ env.INSTALL_TEST_DIR }}
      - name: Extract OpenVINO packages
        run: |
          pushd ${INSTALL_DIR}
            tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR}
          popd
          pushd ${INSTALL_TEST_DIR}
            tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR}
          popd
      - name: Fetch setup_python action
        uses: actions/checkout@v4
        with:
          sparse-checkout: |
            .github/actions/setup_python/action.yml
          sparse-checkout-cone-mode: false
          path: 'openvino'
      - name: Setup Python ${{ env.PYTHON_VERSION }}
        uses: ./openvino/.github/actions/setup_python
        with:
          version: ${{ env.PYTHON_VERSION }}
          should-setup-pip-paths: 'false'
          self-hosted-runner: 'false'
      - name: Install OpenVINO Python wheels
        run: python3 -m pip install ${INSTALL_DIR}/tools/openvino-*
      - name: Install TF Hub tests requirements
        run: |
          python3 -m pip install -r ${MODEL_HUB_TESTS_INSTALL_DIR}/tf_hub_tests/requirements.txt
      - name: Install Hub Performance tests requirements
        run: |
          python3 -m pip install -r ${MODEL_HUB_TESTS_INSTALL_DIR}/performance_tests/requirements.txt
      - name: Performance Hub Tests
        run: |
          export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH
          python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/performance_tests/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-tf_hub_performance.html --self-contained-html -v
        env:
          TYPE: ${{ github.event_name == 'schedule' && 'nightly' || 'precommit'}}
          TEST_DEVICE: CPU
      - name: Upload Test Results
        uses: actions/upload-artifact@v3
        if: ${{ !cancelled() }}
        with:
          name: test-results-tensorflow-hub-performance-models
          path: |
            ${{ env.INSTALL_TEST_DIR }}/TEST*.html
          if-no-files-found: 'error'
  PyTorch_Models_Tests:
    name: PyTorch Models tests
    needs: [Build, Smart_CI]
@ -1590,7 +1675,7 @@ jobs:
  Overall_Status:
    name: ci/gha_overall_status
    needs: [Smart_CI, Build, Debian_Packages, Samples, Conformance, ONNX_Runtime, CXX_Unit_Tests, Python_Unit_Tests,
-            CPU_Functional_Tests, TensorFlow_Hub_Models_Tests, PyTorch_Models_Tests, NVIDIA_Plugin]
+            CPU_Functional_Tests, TensorFlow_Hub_Models_Tests, TensorFlow_Hub_Performance_Models_Tests, PyTorch_Models_Tests, NVIDIA_Plugin]
    if: ${{ always() }}
    runs-on: ubuntu-latest
    steps:
--- a/tests/model_hub_tests/models_hub_common/constants.py
+++ b/tests/model_hub_tests/models_hub_common/constants.py
@ -9,8 +9,10 @@ tf_hub_cache_dir = os.environ.get('TFHUB_CACHE_DIR',
                                  os.path.join(tempfile.gettempdir(), "tfhub_modules"))
 os.environ['TFHUB_CACHE_DIR'] = tf_hub_cache_dir
 no_clean_cache_dir = False
 hf_hub_cache_dir = tempfile.gettempdir()
 if os.environ.get('USE_SYSTEM_CACHE', 'True') == 'False':
    no_clean_cache_dir = True
    os.environ['HUGGINGFACE_HUB_CACHE'] = hf_hub_cache_dir
 # supported_devices : CPU, GPU, GNA
--- a/tests/model_hub_tests/models_hub_common/test_performance_model.py
+++ b/tests/model_hub_tests/models_hub_common/test_performance_model.py
@ -0,0 +1,218 @@
 # Copyright (C) 2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import sys
 import time
 from enum import Enum
 import traceback
 import pytest
 from openvino.runtime.utils.types import openvino_to_numpy_types_map
 import numpy as np
 from models_hub_common.multiprocessing_utils import multiprocessing_run
 import openvino as ov
 # noinspection PyUnresolvedReferences
 # set seed to have deterministic input data generation
 # to avoid sporadic issues in inference results
 rng = np.random.default_rng(seed=56190)
 def get_numpy_type(ov_type):
    np_type = next(
        (np_type_value for (ov_type_value, np_type_value) in openvino_to_numpy_types_map if ov_type_value == ov_type),
        None,
    )
    if not np_type:
        raise Exception('no numpy type for type {} found'.format(ov_type))
    return np_type
 class Status(Enum):
    OK = 0
    LARGE_INFER_TIME_DIFF = 1
    LOAD_MODEL = 2
    GET_INPUTS_INFO = 3
    PREPARE_INPUTS = 4
    GET_CONVERTED_MODEL = 5
    GET_READ_MODEL = 6
    INFER_CONVERTED_MODEL = 7
    INFER_READ_MODEL = 8
    LARGE_INFER_TIME_DIFF_WITH_LARGE_VAR = 9
 class Results:
    def __init__(self):
        self.converted_infer_time = 0.0
        self.converted_model_time_variance = 0.0
        self.read_model_infer_time = 0.0
        self.read_model_infer_time_variance = 0.0
        self.infer_time_ratio = 0.0
        self.error_message = ''
        self.status = None
 def wrap_timer(func, args):
    t0 = time.time()
    retval = func(*args)
    t1 = time.time()
    return retval, t1 - t0
 class TestModelPerformance:
    infer_timeout = 600
    threshold_ratio = 0.1
    num_heat_runs = 100
    num_measure_runs = 500
    threshold_var = 10.0
    def load_model(self, model_name, model_link):
        raise "load_model is not implemented"
    def prepare_input(self, input_shape, input_type):
        if input_type in [ov.Type.f32, ov.Type.f64]:
            return 2.0 * rng.random(size=input_shape, dtype=get_numpy_type(input_type))
        elif input_type in [ov.Type.u8, ov.Type.u16, ov.Type.i8, ov.Type.i16, ov.Type.i32, ov.Type.i64]:
            return rng.integers(0, 5, size=input_shape).astype(get_numpy_type(input_type))
        elif input_type in [str]:
            return np.broadcast_to("Some string", input_shape)
        elif input_type in [bool]:
            return rng.integers(0, 2, size=input_shape).astype(get_numpy_type(input_type))
        else:
            assert False, "Unsupported type {}".format(input_type)
    def prepare_inputs(self, inputs_info):
        if len(inputs_info) > 0 and inputs_info[0] == 'list':
            inputs = []
            inputs_info = inputs_info[1:]
            for input_name, input_shape, input_type in inputs_info:
                inputs.append(self.prepare_input(input_shape, input_type))
        else:
            inputs = {}
            for input_name, input_shape, input_type in inputs_info:
                inputs[input_name] = self.prepare_input(input_shape, input_type)
        return inputs
    def get_inputs_info(self, model_path: str):
        inputs_info = []
        core = ov.Core()
        model = core.read_model(model=model_path)
        for param in model.inputs:
            input_shape = []
            param_shape = param.get_node().get_output_partial_shape(0)
            shape_special_dims = [ov.Dimension(), ov.Dimension(), ov.Dimension(), ov.Dimension(3)]
            if param_shape == ov.PartialShape(shape_special_dims) and param.get_element_type() == ov.Type.f32:
                # image classification case, let us imitate an image
                # that helps to avoid compute output size issue
                input_shape = [1, 200, 200, 3]
            else:
                for dim in param_shape:
                    if dim.is_dynamic:
                        input_shape.append(1)
                    else:
                        input_shape.append(dim.get_length())
            inputs_info.append((param.get_node().get_friendly_name(), input_shape, param.get_element_type()))
        return inputs_info
    def get_converted_model(self, model_path: str):
        return ov.convert_model(model_path)
    def get_read_model(self, model_path: str):
        core = ov.Core()
        return core.read_model(model=model_path)
    def infer_model(self, ov_model, inputs):
        infer_step_t0 = time.time()
        # heat run
        for _ in range(0, TestModelPerformance.num_heat_runs):
            ov_model(inputs)
        # measure
        results = []
        for _ in range(0, TestModelPerformance.num_measure_runs):
            t0 = time.time()
            ov_model(inputs)
            t1 = time.time()
            results.append(t1 - t0)
        mean = np.mean(results)
        var = np.std(results, ddof=1) * 100 / mean
        infer_step_t1 = time.time()
        print('inference measurement done in {} secs'.format(infer_step_t1 - infer_step_t0))
        return mean, var
    def compile_model(self, model, ie_device):
        core = ov.Core()
        return core.compile_model(model, ie_device)
    def _run(self, model_name, model_link, ie_device):
        results = Results()
        results.status = None
        try:
            print("Load the model {} (url: {})".format(model_name, model_link))
            results.status = Status.LOAD_MODEL
            model_obj, timedelta = wrap_timer(self.load_model, (model_name, model_link))
            print('Model {} loaded in {} secs'.format(model_name, timedelta))
            print("Retrieve inputs info")
            results.status = Status.GET_INPUTS_INFO
            inputs_info, timedelta = wrap_timer(self.get_inputs_info, (model_obj,))
            print('Got inputs info in {} secs'.format(timedelta))
            print("Prepare input data")
            results.status = Status.PREPARE_INPUTS
            inputs = self.prepare_inputs(inputs_info)
            print("Convert the model into ov::Model")
            results.status = Status.GET_CONVERTED_MODEL
            converted_model = self.compile_model(self.get_converted_model(model_obj), ie_device)
            print("read the model into ov::Model")
            results.status = Status.GET_READ_MODEL
            read_model = self.compile_model(self.get_read_model(model_obj), ie_device)
            print("Infer the converted model")
            results.status = Status.INFER_CONVERTED_MODEL
            converted_model_time, converted_model_time_variance = self.infer_model(converted_model, inputs)
            print('converted model time infer {}'.format(converted_model_time))
            print('converted model time infer var {}'.format(converted_model_time_variance))
            print("Infer read model")
            results.status = Status.INFER_READ_MODEL
            read_model_time, read_model_time_variance = self.infer_model(read_model, inputs)
            print('read model time infer {}'.format(read_model_time))
            print('read model time infer var {}'.format(read_model_time_variance))
            infer_time_ratio = converted_model_time/read_model_time
            results.converted_infer_time = converted_model_time
            results.converted_model_time_variance = converted_model_time_variance
            results.read_model_infer_time = read_model_time
            results.read_model_infer_time_variance = read_model_time_variance
            results.infer_time_ratio = infer_time_ratio
            if abs(infer_time_ratio - 1) > TestModelPerformance.threshold_ratio:
                if (read_model_time_variance > TestModelPerformance.threshold_var
                        or converted_model_time_variance > TestModelPerformance.threshold_var):
                    results.status = Status.LARGE_INFER_TIME_DIFF_WITH_LARGE_VAR
                    results.error_message = "too large ratio {} with large variance".format(infer_time_ratio)
                else:
                    results.status = Status.LARGE_INFER_TIME_DIFF
                    results.error_message = "too large ratio {}".format(infer_time_ratio)
            else:
                results.status = Status.OK
        except:
            ex_type, ex_value, tb = sys.exc_info()
            results.error_message = "{tb}\n{ex_type}: {ex_value}".format(tb=''.join(traceback.format_tb(tb)),
                                                             ex_type=ex_type.__name__, ex_value=ex_value)
        return results
    def run(self, model_name, model_link, ie_device):
        self.result = Results()
        t0 = time.time()
        self.result = multiprocessing_run(self._run, [model_name, model_link, ie_device], model_name, self.infer_timeout)
        t1 = time.time()
        print('test running time {}'.format(t1 - t0))
        if self.result.status == Status.OK:
            return
        err_message = "\n{func} running failed: \n{msg}".format(func=model_name, msg=self.result.error_message)
        if self.result.status == Status.LARGE_INFER_TIME_DIFF_WITH_LARGE_VAR:
            pytest.xfail(err_message)
        else:
            pytest.fail(err_message)
--- a/tests/model_hub_tests/models_hub_common/utils.py
+++ b/tests/model_hub_tests/models_hub_common/utils.py
@ -13,6 +13,7 @@ def get_models_list(file_name: str):
    models = []
    with open(file_name) as f:
        for model_info in f:
            model_info = model_info.strip()
            # skip comment in model scope file
            if model_info.startswith('#'):
                continue
--- a/tests/model_hub_tests/performance_tests/conftest.py
+++ b/tests/model_hub_tests/performance_tests/conftest.py
@ -0,0 +1,52 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import inspect
 import pytest
 from py.xml import html
 from models_hub_common.utils import get_params
 def pytest_generate_tests(metafunc):
    test_gen_attrs_names = list(inspect.signature(get_params).parameters)
    params = get_params()
    metafunc.parametrize(test_gen_attrs_names, params, scope="function")
@pytest.mark.hookwrapper
 def pytest_runtest_makereport(item, call):
    outcome = yield
    report = outcome.get_result()
    if call.when == 'teardown' and getattr(item.obj.__self__, 'result', None) is not None:
        results = item.obj.__self__.result
        report._results = results
@pytest.mark.optionalhook
 def pytest_html_results_table_header(cells):
    cells.insert(2, html.th('status', class_="sortable"))
    cells.insert(3, html.th('converted model infer time'))
    cells.insert(4, html.th('converted model infer time variance'))
    cells.insert(5, html.th('read model infer time'))
    cells.insert(6, html.th('read model infer time variance'))
    cells.insert(7, html.th('model infer time ratio converted_model_time/read_model_time'))
 def round_num(n: float) -> str:
    s = '{:.4E}'.format(n)
    if s.endswith('E+00'):
        return s[:-4]
    return s
@pytest.mark.optionalhook
 def pytest_html_results_table_row(report, cells):
    if not getattr(report, '_results', None):
        return
    cells.insert(2, html.td(report._results.status))
    cells.insert(3, html.td(round_num(report._results.converted_infer_time)))
    cells.insert(4, html.td(round_num(report._results.converted_model_time_variance)))
    cells.insert(5, html.td(round_num(report._results.read_model_infer_time)))
    cells.insert(6, html.td(round_num(report._results.read_model_infer_time_variance)))
    cells.insert(7, html.td(round_num(report._results.infer_time_ratio)))
--- a/tests/model_hub_tests/performance_tests/precommit_models
+++ b/tests/model_hub_tests/performance_tests/precommit_models
@ -0,0 +1,10 @@
 vision/embedder/fungi_V2,https://tfhub.dev/svampeatlas/vision/embedder/fungi_V2/1?tf-hub-format=compressed,skip,Model is not available
 movenet/singlepose/lightning,https://www.kaggle.com/models/google/movenet/frameworks/tensorFlow2/variations/singlepose-lightning/versions/4
 imagenet/resnet_v2_50/feature_vector,https://www.kaggle.com/models/google/resnet-v2/frameworks/tensorFlow2/variations/50-feature-vector/versions/2
 movenet/singlepose/thunder,https://www.kaggle.com/models/google/movenet/frameworks/tensorFlow2/variations/singlepose-thunder/versions/4
 movenet/multipose/lightning,https://www.kaggle.com/models/google/movenet/frameworks/tensorFlow2/variations/multipose-lightning/versions/1
 imagenet/efficientnet_v2_imagenet1k_b0/feature_vector,https://www.kaggle.com/models/google/efficientnet-v2/frameworks/tensorFlow2/variations/imagenet1k-b0-feature-vector/versions/2
 small_bert/bert_en_uncased_L-4_H-256_A-4,https://www.kaggle.com/models/tensorflow/bert/frameworks/tensorFlow2/variations/bert-en-uncased-l-4-h-256-a-4/versions/2
 # secure notebook models
 unet/industrial/class_1,https://tfhub.dev/nvidia/unet/industrial/class_1/1?tf-hub-format=compressed,skip,Model is not available
 movenet/singlepose/thunder,https://www.kaggle.com/models/google/movenet/frameworks/tensorFlow2/variations/singlepose-thunder/versions/4
--- a/tests/model_hub_tests/performance_tests/requirements.txt
+++ b/tests/model_hub_tests/performance_tests/requirements.txt
@ -0,0 +1,6 @@
 -c ../../constraints.txt
 numpy
 pytest
 pytest-html
 tensorflow-hub
 py
--- a/tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py
+++ b/tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py
@ -0,0 +1,51 @@
 # Copyright (C) 2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 import shutil
 import gc
 import pytest
 import tensorflow_hub as hub
 # noinspection PyUnresolvedReferences
 from models_hub_common.test_performance_model import TestModelPerformance
 from models_hub_common.utils import get_models_list
 from models_hub_common.constants import tf_hub_cache_dir
 from models_hub_common.constants import no_clean_cache_dir
 def clean_cache():
    if not os.path.exists(tf_hub_cache_dir):
        return
    for file_name in os.listdir(tf_hub_cache_dir):
        file_path = os.path.join(tf_hub_cache_dir, file_name)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception:
            pass
 class TestTFPerformanceModel(TestModelPerformance):
    def load_model(self, model_name, model_link):
        hub.load(model_link)
        return hub.resolve(model_link)
    def teardown_method(self):
        if not no_clean_cache_dir:
            clean_cache()
        # deallocate memory after each test case
        gc.collect()
    @pytest.mark.parametrize("model_name,model_link,mark,reason",
                             get_models_list(os.path.join(os.path.dirname(__file__), "precommit_models")))
    @pytest.mark.precommit
    def test_convert_model_precommit(self, model_name, model_link, mark, reason, ie_device):
        assert mark is None or mark == 'skip', "Incorrect test case: {}, {}".format(model_name, model_link)
        if mark == 'skip':
            pytest.skip(reason)
        self.run(model_name, model_link, ie_device)