From 965313ba0bef153463f1b05cbeb84c2a4cf3a0f2 Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Fri, 24 Nov 2023 15:13:44 +0100 Subject: [PATCH] [TF Hub][Perf Tests]Implement performance tests in precommit to compare read_model and convert_model paths (#21023) * begin * fixes * measure time and compare * fix bug + update precommint models * cleanup * add wget to install requirements; add to linux.yml * fixes + improvements * output results to html * remove unneeded code * fix * code review fixes * store downloaded models in cache * use 1000 runs * use model paths from tf-hub tests * use tf hub api to download instead of wget; measure some time stats * small fixes * remove unneeded files * use own list of models * remove uneeded if * remove unstable models * remove unstable models * fix requirements * code review fixes * Update .github/workflows/linux.yml Co-authored-by: Roman Kazantsev * fix round_num function * remove unstable network * Update tests/model_hub_tests/models_hub_common/test_performance_model.py Co-authored-by: Roman Kazantsev * code review fixes * build fix * code review fixes * code review fixes * code review fixes * Update tests/model_hub_tests/models_hub_common/test_performance_model.py Co-authored-by: Roman Kazantsev * Update tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py Co-authored-by: Roman Kazantsev * Update tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py Co-authored-by: Roman Kazantsev * Update tests/model_hub_tests/models_hub_common/test_performance_model.py Co-authored-by: Roman Kazantsev * code review fixes: use autoflake * remove unneeded tensorflow_text * Update .github/workflows/linux.yml * Update linux.yml Added TensorFlow_Hub_Performance_Models_Tests to final status check --------- Co-authored-by: Roman Kazantsev Co-authored-by: Ilya Lavrenov --- .github/workflows/linux.yml | 87 ++++++- .../models_hub_common/constants.py | 2 + .../test_performance_model.py | 218 ++++++++++++++++++ .../models_hub_common/utils.py | 1 + .../performance_tests/conftest.py | 52 +++++ .../performance_tests/precommit_models | 10 + .../performance_tests/requirements.txt | 6 + .../test_tf_hub_perfomance_model.py | 51 ++++ 8 files changed, 426 insertions(+), 1 deletion(-) create mode 100644 tests/model_hub_tests/models_hub_common/test_performance_model.py create mode 100644 tests/model_hub_tests/performance_tests/conftest.py create mode 100644 tests/model_hub_tests/performance_tests/precommit_models create mode 100644 tests/model_hub_tests/performance_tests/requirements.txt create mode 100644 tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 01556b97d69..f0eaaa0636e 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -1357,6 +1357,91 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/TEST*.html if-no-files-found: 'error' + TensorFlow_Hub_Performance_Models_Tests: + name: TensorFlow Hub Performance Models tests + needs: [Build, Smart_CI] + defaults: + run: + shell: bash + runs-on: ${{ github.event_name == 'schedule' && 'ubuntu-20.04-16-cores' || 'ubuntu-20.04-8-cores'}} + timeout-minutes: ${{ github.event_name == 'schedule' && 400 || 5 }} + env: + OPENVINO_REPO: ${{ github.workspace }}/openvino + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test || + fromJSON(needs.smart_ci.outputs.affected_components).TFL_FE.test + + steps: + - name: Check sudo + run: if [ "$(id -u)" -eq 0 ]; then apt update && apt --assume-yes install sudo; fi + + - name: Download OpenVINO package + uses: actions/download-artifact@v3 + with: + name: openvino_package + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO tests package + uses: actions/download-artifact@v3 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + - name: Extract OpenVINO packages + run: | + pushd ${INSTALL_DIR} + tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + popd + + pushd ${INSTALL_TEST_DIR} + tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} + popd + + - name: Fetch setup_python action + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + + - name: Install OpenVINO Python wheels + run: python3 -m pip install ${INSTALL_DIR}/tools/openvino-* + + - name: Install TF Hub tests requirements + run: | + python3 -m pip install -r ${MODEL_HUB_TESTS_INSTALL_DIR}/tf_hub_tests/requirements.txt + + - name: Install Hub Performance tests requirements + run: | + python3 -m pip install -r ${MODEL_HUB_TESTS_INSTALL_DIR}/performance_tests/requirements.txt + + - name: Performance Hub Tests + run: | + export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/performance_tests/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-tf_hub_performance.html --self-contained-html -v + env: + TYPE: ${{ github.event_name == 'schedule' && 'nightly' || 'precommit'}} + TEST_DEVICE: CPU + + - name: Upload Test Results + uses: actions/upload-artifact@v3 + if: ${{ !cancelled() }} + with: + name: test-results-tensorflow-hub-performance-models + path: | + ${{ env.INSTALL_TEST_DIR }}/TEST*.html + if-no-files-found: 'error' + PyTorch_Models_Tests: name: PyTorch Models tests needs: [Build, Smart_CI] @@ -1590,7 +1675,7 @@ jobs: Overall_Status: name: ci/gha_overall_status needs: [Smart_CI, Build, Debian_Packages, Samples, Conformance, ONNX_Runtime, CXX_Unit_Tests, Python_Unit_Tests, - CPU_Functional_Tests, TensorFlow_Hub_Models_Tests, PyTorch_Models_Tests, NVIDIA_Plugin] + CPU_Functional_Tests, TensorFlow_Hub_Models_Tests, TensorFlow_Hub_Performance_Models_Tests, PyTorch_Models_Tests, NVIDIA_Plugin] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/tests/model_hub_tests/models_hub_common/constants.py b/tests/model_hub_tests/models_hub_common/constants.py index 7e406f89d4d..5c9d1c600c9 100644 --- a/tests/model_hub_tests/models_hub_common/constants.py +++ b/tests/model_hub_tests/models_hub_common/constants.py @@ -9,8 +9,10 @@ tf_hub_cache_dir = os.environ.get('TFHUB_CACHE_DIR', os.path.join(tempfile.gettempdir(), "tfhub_modules")) os.environ['TFHUB_CACHE_DIR'] = tf_hub_cache_dir +no_clean_cache_dir = False hf_hub_cache_dir = tempfile.gettempdir() if os.environ.get('USE_SYSTEM_CACHE', 'True') == 'False': + no_clean_cache_dir = True os.environ['HUGGINGFACE_HUB_CACHE'] = hf_hub_cache_dir # supported_devices : CPU, GPU, GNA diff --git a/tests/model_hub_tests/models_hub_common/test_performance_model.py b/tests/model_hub_tests/models_hub_common/test_performance_model.py new file mode 100644 index 00000000000..05f1fa7f1a4 --- /dev/null +++ b/tests/model_hub_tests/models_hub_common/test_performance_model.py @@ -0,0 +1,218 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import sys +import time +from enum import Enum +import traceback +import pytest +from openvino.runtime.utils.types import openvino_to_numpy_types_map + +import numpy as np +from models_hub_common.multiprocessing_utils import multiprocessing_run +import openvino as ov + +# noinspection PyUnresolvedReferences + +# set seed to have deterministic input data generation +# to avoid sporadic issues in inference results +rng = np.random.default_rng(seed=56190) + + +def get_numpy_type(ov_type): + np_type = next( + (np_type_value for (ov_type_value, np_type_value) in openvino_to_numpy_types_map if ov_type_value == ov_type), + None, + ) + + if not np_type: + raise Exception('no numpy type for type {} found'.format(ov_type)) + + return np_type + + +class Status(Enum): + OK = 0 + LARGE_INFER_TIME_DIFF = 1 + LOAD_MODEL = 2 + GET_INPUTS_INFO = 3 + PREPARE_INPUTS = 4 + GET_CONVERTED_MODEL = 5 + GET_READ_MODEL = 6 + INFER_CONVERTED_MODEL = 7 + INFER_READ_MODEL = 8 + LARGE_INFER_TIME_DIFF_WITH_LARGE_VAR = 9 + + +class Results: + def __init__(self): + self.converted_infer_time = 0.0 + self.converted_model_time_variance = 0.0 + self.read_model_infer_time = 0.0 + self.read_model_infer_time_variance = 0.0 + self.infer_time_ratio = 0.0 + self.error_message = '' + self.status = None + + +def wrap_timer(func, args): + t0 = time.time() + retval = func(*args) + t1 = time.time() + return retval, t1 - t0 + + +class TestModelPerformance: + infer_timeout = 600 + threshold_ratio = 0.1 + num_heat_runs = 100 + num_measure_runs = 500 + threshold_var = 10.0 + + def load_model(self, model_name, model_link): + raise "load_model is not implemented" + + def prepare_input(self, input_shape, input_type): + if input_type in [ov.Type.f32, ov.Type.f64]: + return 2.0 * rng.random(size=input_shape, dtype=get_numpy_type(input_type)) + elif input_type in [ov.Type.u8, ov.Type.u16, ov.Type.i8, ov.Type.i16, ov.Type.i32, ov.Type.i64]: + return rng.integers(0, 5, size=input_shape).astype(get_numpy_type(input_type)) + elif input_type in [str]: + return np.broadcast_to("Some string", input_shape) + elif input_type in [bool]: + return rng.integers(0, 2, size=input_shape).astype(get_numpy_type(input_type)) + else: + assert False, "Unsupported type {}".format(input_type) + + def prepare_inputs(self, inputs_info): + if len(inputs_info) > 0 and inputs_info[0] == 'list': + inputs = [] + inputs_info = inputs_info[1:] + for input_name, input_shape, input_type in inputs_info: + inputs.append(self.prepare_input(input_shape, input_type)) + else: + inputs = {} + for input_name, input_shape, input_type in inputs_info: + inputs[input_name] = self.prepare_input(input_shape, input_type) + return inputs + + def get_inputs_info(self, model_path: str): + inputs_info = [] + core = ov.Core() + model = core.read_model(model=model_path) + for param in model.inputs: + input_shape = [] + param_shape = param.get_node().get_output_partial_shape(0) + shape_special_dims = [ov.Dimension(), ov.Dimension(), ov.Dimension(), ov.Dimension(3)] + if param_shape == ov.PartialShape(shape_special_dims) and param.get_element_type() == ov.Type.f32: + # image classification case, let us imitate an image + # that helps to avoid compute output size issue + input_shape = [1, 200, 200, 3] + else: + for dim in param_shape: + if dim.is_dynamic: + input_shape.append(1) + else: + input_shape.append(dim.get_length()) + inputs_info.append((param.get_node().get_friendly_name(), input_shape, param.get_element_type())) + return inputs_info + + def get_converted_model(self, model_path: str): + return ov.convert_model(model_path) + + def get_read_model(self, model_path: str): + core = ov.Core() + return core.read_model(model=model_path) + + def infer_model(self, ov_model, inputs): + infer_step_t0 = time.time() + # heat run + for _ in range(0, TestModelPerformance.num_heat_runs): + ov_model(inputs) + # measure + results = [] + for _ in range(0, TestModelPerformance.num_measure_runs): + t0 = time.time() + ov_model(inputs) + t1 = time.time() + results.append(t1 - t0) + mean = np.mean(results) + var = np.std(results, ddof=1) * 100 / mean + infer_step_t1 = time.time() + print('inference measurement done in {} secs'.format(infer_step_t1 - infer_step_t0)) + return mean, var + + def compile_model(self, model, ie_device): + core = ov.Core() + return core.compile_model(model, ie_device) + + def _run(self, model_name, model_link, ie_device): + results = Results() + results.status = None + try: + print("Load the model {} (url: {})".format(model_name, model_link)) + results.status = Status.LOAD_MODEL + model_obj, timedelta = wrap_timer(self.load_model, (model_name, model_link)) + print('Model {} loaded in {} secs'.format(model_name, timedelta)) + print("Retrieve inputs info") + results.status = Status.GET_INPUTS_INFO + inputs_info, timedelta = wrap_timer(self.get_inputs_info, (model_obj,)) + print('Got inputs info in {} secs'.format(timedelta)) + print("Prepare input data") + results.status = Status.PREPARE_INPUTS + inputs = self.prepare_inputs(inputs_info) + print("Convert the model into ov::Model") + results.status = Status.GET_CONVERTED_MODEL + converted_model = self.compile_model(self.get_converted_model(model_obj), ie_device) + print("read the model into ov::Model") + results.status = Status.GET_READ_MODEL + read_model = self.compile_model(self.get_read_model(model_obj), ie_device) + print("Infer the converted model") + results.status = Status.INFER_CONVERTED_MODEL + converted_model_time, converted_model_time_variance = self.infer_model(converted_model, inputs) + print('converted model time infer {}'.format(converted_model_time)) + print('converted model time infer var {}'.format(converted_model_time_variance)) + print("Infer read model") + results.status = Status.INFER_READ_MODEL + read_model_time, read_model_time_variance = self.infer_model(read_model, inputs) + print('read model time infer {}'.format(read_model_time)) + print('read model time infer var {}'.format(read_model_time_variance)) + + infer_time_ratio = converted_model_time/read_model_time + + results.converted_infer_time = converted_model_time + results.converted_model_time_variance = converted_model_time_variance + results.read_model_infer_time = read_model_time + results.read_model_infer_time_variance = read_model_time_variance + results.infer_time_ratio = infer_time_ratio + + if abs(infer_time_ratio - 1) > TestModelPerformance.threshold_ratio: + if (read_model_time_variance > TestModelPerformance.threshold_var + or converted_model_time_variance > TestModelPerformance.threshold_var): + results.status = Status.LARGE_INFER_TIME_DIFF_WITH_LARGE_VAR + results.error_message = "too large ratio {} with large variance".format(infer_time_ratio) + else: + results.status = Status.LARGE_INFER_TIME_DIFF + results.error_message = "too large ratio {}".format(infer_time_ratio) + else: + results.status = Status.OK + except: + ex_type, ex_value, tb = sys.exc_info() + results.error_message = "{tb}\n{ex_type}: {ex_value}".format(tb=''.join(traceback.format_tb(tb)), + ex_type=ex_type.__name__, ex_value=ex_value) + return results + + def run(self, model_name, model_link, ie_device): + self.result = Results() + t0 = time.time() + self.result = multiprocessing_run(self._run, [model_name, model_link, ie_device], model_name, self.infer_timeout) + t1 = time.time() + print('test running time {}'.format(t1 - t0)) + if self.result.status == Status.OK: + return + err_message = "\n{func} running failed: \n{msg}".format(func=model_name, msg=self.result.error_message) + if self.result.status == Status.LARGE_INFER_TIME_DIFF_WITH_LARGE_VAR: + pytest.xfail(err_message) + else: + pytest.fail(err_message) diff --git a/tests/model_hub_tests/models_hub_common/utils.py b/tests/model_hub_tests/models_hub_common/utils.py index 948ebfa14fb..18016086158 100644 --- a/tests/model_hub_tests/models_hub_common/utils.py +++ b/tests/model_hub_tests/models_hub_common/utils.py @@ -13,6 +13,7 @@ def get_models_list(file_name: str): models = [] with open(file_name) as f: for model_info in f: + model_info = model_info.strip() # skip comment in model scope file if model_info.startswith('#'): continue diff --git a/tests/model_hub_tests/performance_tests/conftest.py b/tests/model_hub_tests/performance_tests/conftest.py new file mode 100644 index 00000000000..56810a8aa42 --- /dev/null +++ b/tests/model_hub_tests/performance_tests/conftest.py @@ -0,0 +1,52 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import inspect +import pytest +from py.xml import html + +from models_hub_common.utils import get_params + + +def pytest_generate_tests(metafunc): + test_gen_attrs_names = list(inspect.signature(get_params).parameters) + params = get_params() + metafunc.parametrize(test_gen_attrs_names, params, scope="function") + + +@pytest.mark.hookwrapper +def pytest_runtest_makereport(item, call): + outcome = yield + report = outcome.get_result() + if call.when == 'teardown' and getattr(item.obj.__self__, 'result', None) is not None: + results = item.obj.__self__.result + report._results = results + + +@pytest.mark.optionalhook +def pytest_html_results_table_header(cells): + cells.insert(2, html.th('status', class_="sortable")) + cells.insert(3, html.th('converted model infer time')) + cells.insert(4, html.th('converted model infer time variance')) + cells.insert(5, html.th('read model infer time')) + cells.insert(6, html.th('read model infer time variance')) + cells.insert(7, html.th('model infer time ratio converted_model_time/read_model_time')) + + +def round_num(n: float) -> str: + s = '{:.4E}'.format(n) + if s.endswith('E+00'): + return s[:-4] + return s + + +@pytest.mark.optionalhook +def pytest_html_results_table_row(report, cells): + if not getattr(report, '_results', None): + return + cells.insert(2, html.td(report._results.status)) + cells.insert(3, html.td(round_num(report._results.converted_infer_time))) + cells.insert(4, html.td(round_num(report._results.converted_model_time_variance))) + cells.insert(5, html.td(round_num(report._results.read_model_infer_time))) + cells.insert(6, html.td(round_num(report._results.read_model_infer_time_variance))) + cells.insert(7, html.td(round_num(report._results.infer_time_ratio))) diff --git a/tests/model_hub_tests/performance_tests/precommit_models b/tests/model_hub_tests/performance_tests/precommit_models new file mode 100644 index 00000000000..9b4435c90ad --- /dev/null +++ b/tests/model_hub_tests/performance_tests/precommit_models @@ -0,0 +1,10 @@ +vision/embedder/fungi_V2,https://tfhub.dev/svampeatlas/vision/embedder/fungi_V2/1?tf-hub-format=compressed,skip,Model is not available +movenet/singlepose/lightning,https://www.kaggle.com/models/google/movenet/frameworks/tensorFlow2/variations/singlepose-lightning/versions/4 +imagenet/resnet_v2_50/feature_vector,https://www.kaggle.com/models/google/resnet-v2/frameworks/tensorFlow2/variations/50-feature-vector/versions/2 +movenet/singlepose/thunder,https://www.kaggle.com/models/google/movenet/frameworks/tensorFlow2/variations/singlepose-thunder/versions/4 +movenet/multipose/lightning,https://www.kaggle.com/models/google/movenet/frameworks/tensorFlow2/variations/multipose-lightning/versions/1 +imagenet/efficientnet_v2_imagenet1k_b0/feature_vector,https://www.kaggle.com/models/google/efficientnet-v2/frameworks/tensorFlow2/variations/imagenet1k-b0-feature-vector/versions/2 +small_bert/bert_en_uncased_L-4_H-256_A-4,https://www.kaggle.com/models/tensorflow/bert/frameworks/tensorFlow2/variations/bert-en-uncased-l-4-h-256-a-4/versions/2 +# secure notebook models +unet/industrial/class_1,https://tfhub.dev/nvidia/unet/industrial/class_1/1?tf-hub-format=compressed,skip,Model is not available +movenet/singlepose/thunder,https://www.kaggle.com/models/google/movenet/frameworks/tensorFlow2/variations/singlepose-thunder/versions/4 \ No newline at end of file diff --git a/tests/model_hub_tests/performance_tests/requirements.txt b/tests/model_hub_tests/performance_tests/requirements.txt new file mode 100644 index 00000000000..9e237ce76b3 --- /dev/null +++ b/tests/model_hub_tests/performance_tests/requirements.txt @@ -0,0 +1,6 @@ +-c ../../constraints.txt +numpy +pytest +pytest-html +tensorflow-hub +py diff --git a/tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py b/tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py new file mode 100644 index 00000000000..f5dddaf2081 --- /dev/null +++ b/tests/model_hub_tests/performance_tests/test_tf_hub_perfomance_model.py @@ -0,0 +1,51 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import shutil + +import gc +import pytest +import tensorflow_hub as hub +# noinspection PyUnresolvedReferences + +from models_hub_common.test_performance_model import TestModelPerformance +from models_hub_common.utils import get_models_list +from models_hub_common.constants import tf_hub_cache_dir +from models_hub_common.constants import no_clean_cache_dir + + +def clean_cache(): + if not os.path.exists(tf_hub_cache_dir): + return + for file_name in os.listdir(tf_hub_cache_dir): + file_path = os.path.join(tf_hub_cache_dir, file_name) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception: + pass + + +class TestTFPerformanceModel(TestModelPerformance): + def load_model(self, model_name, model_link): + hub.load(model_link) + return hub.resolve(model_link) + + def teardown_method(self): + if not no_clean_cache_dir: + clean_cache() + # deallocate memory after each test case + gc.collect() + + @pytest.mark.parametrize("model_name,model_link,mark,reason", + get_models_list(os.path.join(os.path.dirname(__file__), "precommit_models"))) + @pytest.mark.precommit + def test_convert_model_precommit(self, model_name, model_link, mark, reason, ie_device): + assert mark is None or mark == 'skip', "Incorrect test case: {}, {}".format(model_name, model_link) + if mark == 'skip': + pytest.skip(reason) + self.run(model_name, model_link, ie_device)