[DOCS] Add ability to build notebooks from local files (#17797)

* Add build notebooks from local files * Add local notebook files v0.1.0-latest/20230529220816
2023-05-30 16:11:14 +02:00 · 2023-05-30 16:11:14 +02:00 · 2f2f0b850a
commit 2f2f0b850a
parent f6141ccc89
287 changed files with 46947 additions and 4 deletions
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@ -77,7 +77,7 @@ function(build_docs)
    if(ENABLE_OPENVINO_NOTEBOOKS)
        set(NBDOC_SCRIPT "${DOCS_SOURCE_DIR}/nbdoc/nbdoc.py")
        list(APPEND commands
-            COMMAND ${PYTHON_EXECUTABLE} "${NBDOC_SCRIPT}" "${RST_OUTPUT}/notebooks"
+            COMMAND ${PYTHON_EXECUTABLE} "${NBDOC_SCRIPT}" "${DOCS_SOURCE_DIR}/notebooks" "${RST_OUTPUT}/notebooks"
        )
    endif()

--- a/docs/nbdoc/nbdoc.py
+++ b/docs/nbdoc/nbdoc.py
@ -1,4 +1,5 @@
 import argparse
+import shutil
 from pathlib import Path
 from utils import (
    create_content,
@ -155,12 +156,18 @@ class NbProcessor:

 def main():
    parser = argparse.ArgumentParser()
+    parser.add_argument('sourcedir', type=Path)
    parser.add_argument('outdir', type=Path)
+    parser.add_argument('-d', '--download', action='store_true')
    args = parser.parse_args()
+    sourcedir = args.sourcedir
    outdir = args.outdir
+    if args.download:
        outdir.mkdir(parents=True, exist_ok=True)
        # Step 2. Run default pipeline for downloading
        NbTravisDownloader.download_from_jenkins(outdir)
+    else:
+        shutil.copytree(sourcedir, outdir)
    # Step 3. Run processing on downloaded file
    nbp = NbProcessor(outdir)
    buttons_list = nbp.fetch_binder_list('txt')
--- a/docs/notebooks/001-hello-world-with-output.rst
+++ b/docs/notebooks/001-hello-world-with-output.rst
@ -0,0 +1,83 @@
+Hello Image Classification
+==========================
+
+This basic introduction to OpenVINO™ shows how to do inference with an
+image classification model.
+
+A pre-trained `MobileNetV3
+model <https://docs.openvino.ai/latest/omz_models_model_mobilenet_v3_small_1_0_224_tf.html>`__
+from `Open Model
+Zoo <https://github.com/openvinotoolkit/open_model_zoo/>`__ is used in
+this tutorial. For more information about how OpenVINO IR models are
+created, refer to the `TensorFlow to
+OpenVINO <101-tensorflow-to-openvino-with-output.html>`__
+tutorial.
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import cv2
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from openvino.runtime import Core
+
+Load the Model
+--------------
+
+.. code:: ipython3
+
+    ie = Core()
+    model = ie.read_model(model="model/v3-small_224_1.0_float.xml")
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+    
+    output_layer = compiled_model.output(0)
+
+Load an Image
+-------------
+
+.. code:: ipython3
+
+    # The MobileNet model expects images in RGB format.
+    image = cv2.cvtColor(cv2.imread(filename="../data/image/coco.jpg"), code=cv2.COLOR_BGR2RGB)
+    
+    # Resize to MobileNet image shape.
+    input_image = cv2.resize(src=image, dsize=(224, 224))
+    
+    # Reshape to model input shape.
+    input_image = np.expand_dims(input_image, 0)
+    plt.imshow(image);
+
+
+
+.. image:: 001-hello-world-with-output_files/001-hello-world-with-output_6_0.png
+
+
+Do Inference
+------------
+
+.. code:: ipython3
+
+    result_infer = compiled_model([input_image])[output_layer]
+    result_index = np.argmax(result_infer)
+
+.. code:: ipython3
+
+    # Convert the inference result to a class name.
+    imagenet_classes = open("../data/datasets/imagenet/imagenet_2012.txt").read().splitlines()
+    
+    # The model description states that for this model, class 0 is a background.
+    # Therefore, a background must be added at the beginning of imagenet_classes.
+    imagenet_classes = ['background'] + imagenet_classes
+    
+    imagenet_classes[result_index]
+
+
+
+
+.. parsed-literal::
+
+    'n02099267 flat-coated retriever'
+
+
--- a/docs/notebooks/001-hello-world-with-output_files/001-hello-world-with-output_6_0.png
+++ b/docs/notebooks/001-hello-world-with-output_files/001-hello-world-with-output_6_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7511b8a4e5b047600d5fed14fbc7e9653a868bc5253abf1e0c3ef649b47bc408
+size 387941
--- a/docs/notebooks/001-hello-world-with-output_files/index.html
+++ b/docs/notebooks/001-hello-world-with-output_files/index.html
@ -0,0 +1,7 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/001-hello-world-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/001-hello-world-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="001-hello-world-with-output_6_0.png">001-hello-world-with-output_6_0.png</a>                30-May-2023 00:09              387941
+</pre><hr></body>
+</html>
--- a/docs/notebooks/002-openvino-api-with-output.rst
+++ b/docs/notebooks/002-openvino-api-with-output.rst
@ -0,0 +1,694 @@
+OpenVINO™ Runtime API Tutorial
+==============================
+
+This notebook explains the basics of the OpenVINO Runtime API. It
+covers:
+
+-  `Loading OpenVINO Runtime and Showing
+   Info <#Loading-OpenVINO-Runtime-and-Showing-Info>`__
+-  `Loading a Model <#Loading-a-Model>`__
+
+   -  `OpenVINO IR Model <#OpenVINO-IR-Model>`__
+   -  `ONNX Model <#ONNX-Model>`__
+   -  `PaddlePaddle Model <#PaddlePaddle-Model>`__
+   -  `TensorFlow Model <#TensorFlow-Model>`__
+
+-  `Getting Information about a
+   Model <#Getting-Information-about-a-Model>`__
+
+   -  `Model Inputs <#Model-Inputs>`__
+   -  `Model Outputs <#Model-Outputs>`__
+
+-  `Doing Inference on a Model <#Doing-Inference-on-a-Model>`__
+-  `Reshaping and Resizing <#Reshaping-and-Resizing>`__
+
+   -  `Change Image Size <#Change-Image-Size>`__
+   -  `Change Batch Size <#Change-Batch-Size>`__
+
+-  `Caching a Model <#Caching-a-Model>`__
+
+The notebook is divided into sections with headers. Each section is
+standalone and does not depend on previous sections. A segmentation and
+classification OpenVINO IR model and a segmentation ONNX model are
+provided as examples. These model files can be replaced with your own
+models. The exact outputs will be different, but the process is the
+same.
+
+Loading OpenVINO Runtime and Showing Info
+-----------------------------------------
+
+Initialize OpenVINO Runtime with Core()
+
+.. code:: ipython3
+
+    from openvino.runtime import Core
+    
+    ie = Core()
+
+OpenVINO Runtime can load a network on a device. A device in this
+context means a CPU, an Intel GPU, a Neural Compute Stick 2, etc. The
+``available_devices`` property shows the available devices in your
+system. The “FULL_DEVICE_NAME” option to ``ie.get_property()`` shows the
+name of the device.
+
+In this notebook, the CPU device is used. To use an integrated GPU, use
+``device_name="GPU"`` instead. Be aware that loading a network on GPU
+will be slower than loading a network on CPU, but inference will likely
+be faster.
+
+.. code:: ipython3
+
+    devices = ie.available_devices
+    
+    for device in devices:
+        device_name = ie.get_property(device, "FULL_DEVICE_NAME")
+        print(f"{device}: {device_name}")
+
+
+.. parsed-literal::
+
+    CPU: Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz
+
+
+Loading a Model
+---------------
+
+After initializing OpenVINO Runtime, first read the model file with
+``read_model()``, then compile it to the specified device with the
+``compile_model()`` method.
+
+`OpenVINO™ supports several model
+formats <https://docs.openvino.ai/latest/Supported_Model_Formats.html#doxid-supported-model-formats>`__
+and enables developers to convert them to its own OpenVINO IR format
+using a tool dedicated to this task.
+
+OpenVINO IR Model
+~~~~~~~~~~~~~~~~~
+
+An OpenVINO IR (Intermediate Representation) model consists of an
+``.xml`` file, containing information about network topology, and a
+``.bin`` file, containing the weights and biases binary data. Models in
+OpenVINO IR format are obtained by using Model Optimizer tool. The
+``read_model()`` function expects the ``.bin`` weights file to have the
+same filename and be located in the same directory as the ``.xml`` file:
+``model_weights_file == Path(model_xml).with_suffix(".bin")``. If this
+is the case, specifying the weights file is optional. If the weights
+file has a different filename, it can be specified using the ``weights``
+parameter in ``read_model()``.
+
+The OpenVINO `Model
+Optimizer <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html#doxid-openvino-docs-m-o-d-g-deep-learning-model-optimizer-dev-guide>`__
+tool is used to convert models to OpenVINO IR format. Model Optimizer
+reads the original model and creates an OpenVINO IR model (.xml and .bin
+files) so inference can be performed without delays due to format
+conversion. Optionally, Model Optimizer can adjust the model to be more
+suitable for inference, for example, by alternating input shapes,
+embedding preprocessing and cutting training parts off. For information
+on how to convert your existing TensorFlow, PyTorch or ONNX model to
+OpenVINO IR format with Model Optimizer, refer to the
+`tensorflow-to-openvino <101-tensorflow-to-openvino-with-output.html>`__
+and
+`pytorch-onnx-to-openvino <102-pytorch-onnx-to-openvino-with-output.html>`__
+notebooks.
+
+.. code:: ipython3
+
+    from openvino.runtime import Core
+    
+    ie = Core()
+    classification_model_xml = "model/classification.xml"
+    
+    model = ie.read_model(model=classification_model_xml)
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+
+ONNX Model
+~~~~~~~~~~
+
+`ONNX <https://onnx.ai/>`__ is an open format built to represent machine
+learning models. ONNX defines a common set of operators - the building
+blocks of machine learning and deep learning models - and a common file
+format to enable AI developers to use models with a variety of
+frameworks, tools, runtimes, and compilers. OpenVINO supports reading
+models in ONNX format directly,that means they can be used with OpenVINO
+Runtime without any prior conversion.
+
+Reading and loading an ONNX model, which is a single ``.onnx`` file,
+works the same way as with an OpenVINO IR model. The ``model`` argument
+points to the filename of an ONNX model.
+
+.. code:: ipython3
+
+    from openvino.runtime import Core
+    
+    ie = Core()
+    onnx_model_path = "model/segmentation.onnx"
+    
+    model_onnx = ie.read_model(model=onnx_model_path)
+    compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU")
+
+The ONNX model can be exported to OpenVINO IR with ``serialize()``:
+
+.. code:: ipython3
+
+    from openvino.runtime import serialize
+    
+    serialize(model_onnx, xml_path="model/exported_onnx_model.xml")
+
+PaddlePaddle Model
+~~~~~~~~~~~~~~~~~~
+
+`PaddlePaddle <https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html>`__
+models saved for inference can also be passed to OpenVINO Runtime
+without any conversion step. Pass the filename with extension to
+``read_model`` and exported an OpenVINO IR with ``serialize``
+
+.. code:: ipython3
+
+    from openvino.runtime import Core
+    
+    ie = Core()
+    paddle_model_path = "model/inference.pdmodel"
+    
+    model_paddle = ie.read_model(model=paddle_model_path)
+    compiled_model_paddle = ie.compile_model(model=model_paddle, device_name="CPU")
+
+.. code:: ipython3
+
+    from openvino.runtime import serialize
+    
+    serialize(model_paddle, xml_path="model/exported_paddle_model.xml")
+
+TensorFlow Model
+~~~~~~~~~~~~~~~~
+
+TensorFlow models saved in frozen graph format can also be passed to
+``read_model`` starting in OpenVINO 2022.3. > **NOTE**: Directly loading
+TensorFlow models is available as a preview feature in the OpenVINO
+2022.3 release. Fully functional support will be provided in the
+upcoming 2023 releases. > Currently support is limited to only frozen
+graph inference format. Other TensorFlow model formats must be converted
+to OpenVINO IR using `Model
+Optimizer <https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow.html>`__.
+
+.. code:: ipython3
+
+    from openvino.runtime import Core
+    
+    ie = Core()
+    tf_model_path = "model/classification.pb"
+    
+    model_tf = ie.read_model(model=tf_model_path)
+    compiled_model_tf = ie.compile_model(model=model_tf, device_name="CPU")
+
+.. code:: ipython3
+
+    from openvino.runtime import serialize
+    
+    serialize(model_tf, xml_path="model/exported_tf_model.xml")
+
+Getting Information about a Model
+---------------------------------
+
+The OpenVINO Model instance stores information about the model.
+Information about the inputs and outputs of the model are in
+``model.inputs`` and ``model.outputs``. These are also properties of the
+CompiledModel instance. While using ``model.inputs`` and
+``model.outputs`` in the cells below, you can also use
+``compiled_model.inputs`` and ``compiled_model.outputs``.
+
+Model Inputs
+~~~~~~~~~~~~
+
+Information about all input layers is stored in the ``inputs``
+dictionary.
+
+.. code:: ipython3
+
+    from openvino.runtime import Core
+    
+    ie = Core()
+    classification_model_xml = "model/classification.xml"
+    model = ie.read_model(model=classification_model_xml)
+    model.inputs
+
+
+
+
+.. parsed-literal::
+
+    [<Output: names[input, input:0] shape[1,3,224,224] type: f32>]
+
+
+
+The cell above shows that the loaded model expects one input with the
+name *input*. If you loaded a different model, you may see a different
+input layer name, and you may see more inputs. You may also obtain info
+about each input layer using ``model.input(index)``, where index is a
+numeric index of the input layers in the model. If a model has only one
+input, index can be omitted.
+
+.. code:: ipython3
+
+    input_layer = model.input(0)
+
+It is often useful to have a reference to the name of the first input
+layer. For a model with one input, ``model.input(0).any_name`` gets this
+name.
+
+.. code:: ipython3
+
+    input_layer.any_name
+
+
+
+
+.. parsed-literal::
+
+    'input'
+
+
+
+The next cell prints the input layout, precision and shape.
+
+.. code:: ipython3
+
+    print(f"input precision: {input_layer.element_type}")
+    print(f"input shape: {input_layer.shape}")
+
+
+.. parsed-literal::
+
+    input precision: <Type: 'float32'>
+    input shape: [1,3,224,224]
+
+
+This cell shows that the model expects inputs with a shape of
+[1,3,224,224], and that this is in the ``NCHW`` layout. This means that
+the model expects input data with the batch size of 1 (``N``), 3
+channels (``C``) , and images with a height (``H``) and width (``W``)
+equal to 224. The input data is expected to be of ``FP32`` (floating
+point) precision.
+
+Model Outputs
+~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    from openvino.runtime import Core
+    
+    ie = Core()
+    classification_model_xml = "model/classification.xml"
+    model = ie.read_model(model=classification_model_xml)
+    model.outputs
+
+
+
+
+.. parsed-literal::
+
+    [<Output: names[MobilenetV3/Predictions/Softmax] shape[1,1001] type: f32>]
+
+
+
+Model output info is stored in ``model.outputs``. The cell above shows
+that the model returns one output, with the
+``MobilenetV3/Predictions/Softmax`` name. Loading a different model will
+result in different output layer name, and more outputs might be
+returned. Similar to input, you may also obtain information about each
+output separately using ``model.output(index)``
+
+Since this model has one output, follow the same method as for the input
+layer to get its name.
+
+.. code:: ipython3
+
+    output_layer = model.output(0)
+    output_layer.any_name
+
+
+
+
+.. parsed-literal::
+
+    'MobilenetV3/Predictions/Softmax'
+
+
+
+Getting the output precision and shape is similar to getting the input
+precision and shape.
+
+.. code:: ipython3
+
+    print(f"output precision: {output_layer.element_type}")
+    print(f"output shape: {output_layer.shape}")
+
+
+.. parsed-literal::
+
+    output precision: <Type: 'float32'>
+    output shape: [1,1001]
+
+
+This cell shows that the model returns outputs with a shape of [1,
+1001], where 1 is the batch size (``N``) and 1001 is the number of
+classes (``C``). The output is returned as 32-bit floating point.
+
+Doing Inference on a Model
+--------------------------
+
+   **NOTE** this notebook demonstrates only the basic synchronous
+   inference API. For an async inference example, please refer to `Async
+   API notebook <115-async-api-with-output.html>`__
+
+The diagram below shows a typical inference pipeline with OpenVINO
+
+.. figure:: https://docs.openvino.ai/latest/_images/IMPLEMENT_PIPELINE_with_API_C.svg
+   :alt: image.png
+
+   image.png
+
+Creating OpenVINO Core and model compilation is covered in the previous
+steps. The next step is preparing an inference request. To do inference
+on a model, first create an inference request by calling the
+``create_infer_request()`` method of ``CompiledModel``,
+``compiled_model`` that was loaded with ``compile_model()``. Then, call
+the ``infer()`` method of ``InferRequest``. It expects one argument:
+``inputs``. This is a dictionary that maps input layer names to input
+data or list of input data in np.ndarray format, where the position of
+the input tensor corresponds to input index. If a model has a single
+input, wrapping to a dictionary or list can be omitted.
+
+**Load the network**
+
+.. code:: ipython3
+
+    from openvino.runtime import Core
+    
+    ie = Core()
+    classification_model_xml = "model/classification.xml"
+    model = ie.read_model(model=classification_model_xml)
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+    input_layer = compiled_model.input(0)
+    output_layer = compiled_model.output(0)
+
+**Load an image and convert to the input shape**
+
+To propagate an image through the network, it needs to be loaded into an
+array, resized to the shape that the network expects, and converted to
+the input layout of the network.
+
+.. code:: ipython3
+
+    import cv2
+    
+    image_filename = "../data/image/coco_hollywood.jpg"
+    image = cv2.imread(image_filename)
+    image.shape
+
+
+
+
+.. parsed-literal::
+
+    (663, 994, 3)
+
+
+
+The image has a shape of (663,994,3). It is 663 pixels in height, 994
+pixels in width, and has 3 color channels. A reference to the height and
+width expected by the network is obtained and the image is resized to
+these dimensions.
+
+.. code:: ipython3
+
+    # N,C,H,W = batch size, number of channels, height, width.
+    N, C, H, W = input_layer.shape
+    # OpenCV resize expects the destination size as (width, height).
+    resized_image = cv2.resize(src=image, dsize=(W, H))
+    resized_image.shape
+
+
+
+
+.. parsed-literal::
+
+    (224, 224, 3)
+
+
+
+Now, the image has the width and height that the network expects. This
+is still in ``HWC`` format and must be changed to ``NCHW`` format.
+First, call the ``np.transpose()`` method to change to ``CHW`` and then
+add the ``N`` dimension (where ``N``\ = 1) by calling the
+``np.expand_dims()`` method. Next, convert the data to ``FP32`` with
+``np.astype()`` method.
+
+.. code:: ipython3
+
+    import numpy as np
+    
+    input_data = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0).astype(np.float32)
+    input_data.shape
+
+
+
+
+.. parsed-literal::
+
+    (1, 3, 224, 224)
+
+
+
+**Do inference**
+
+Now that the input data is in the right shape, run inference. The
+CompiledModel inference result is a dictionary where keys are the Output
+class instances (the same keys in ``compiled_model.outputs`` that can
+also be obtained with ``compiled_model.output(index)``) and values -
+predicted result in np.array format.
+
+.. code:: ipython3
+
+    # for single input models only
+    result = compiled_model(input_data)[output_layer]
+    
+    # for multiple inputs in a list
+    result = compiled_model([input_data])[output_layer]
+    
+    # or using a dictionary, where the key is input tensor name or index
+    result = compiled_model({input_layer.any_name: input_data})[output_layer]
+
+You can also create ``InferRequest`` and run ``infer`` method on
+request.
+
+.. code:: ipython3
+
+    request = compiled_model.create_infer_request()
+    request.infer(inputs={input_layer.any_name: input_data})
+    result = request.get_output_tensor(output_layer.index).data
+
+The ``.infer()`` function sets output tensor, that can be reached, using
+``get_output_tensor()``. Since this network returns one output, and the
+reference to the output layer is in the ``output_layer.index``
+parameter, you can get the data with
+``request.get_output_tensor(output_layer.index)``. To get a numpy array
+from the output, use the ``.data`` parameter.
+
+.. code:: ipython3
+
+    result.shape
+
+
+
+
+.. parsed-literal::
+
+    (1, 1001)
+
+
+
+The output shape is (1,1001), which is the expected output shape. This
+shape indicates that the network returns probabilities for 1001 classes.
+To learn more about this notion, refer to the `hello world
+notebook <001-hello-world-with-output.html>`__.
+
+Reshaping and Resizing
+----------------------
+
+Change Image Size
+~~~~~~~~~~~~~~~~~
+
+Instead of reshaping the image to fit the model, it is also possible to
+reshape the model to fit the image. Be aware that not all models support
+reshaping, and models that do, may not support all input shapes. The
+model accuracy may also suffer if you reshape the model input shape.
+
+First check the input shape of the model, then reshape it to the new
+input shape.
+
+.. code:: ipython3
+
+    from openvino.runtime import Core, PartialShape
+    
+    ie = Core()
+    segmentation_model_xml = "model/segmentation.xml"
+    segmentation_model = ie.read_model(model=segmentation_model_xml)
+    segmentation_input_layer = segmentation_model.input(0)
+    segmentation_output_layer = segmentation_model.output(0)
+    
+    print("~~~~ ORIGINAL MODEL ~~~~")
+    print(f"input shape: {segmentation_input_layer.shape}")
+    print(f"output shape: {segmentation_output_layer.shape}")
+    
+    new_shape = PartialShape([1, 3, 544, 544])
+    segmentation_model.reshape({segmentation_input_layer.any_name: new_shape})
+    segmentation_compiled_model = ie.compile_model(model=segmentation_model, device_name="CPU")
+    # help(segmentation_compiled_model)
+    print("~~~~ RESHAPED MODEL ~~~~")
+    print(f"model input shape: {segmentation_input_layer.shape}")
+    print(
+        f"compiled_model input shape: "
+        f"{segmentation_compiled_model.input(index=0).shape}"
+    )
+    print(f"compiled_model output shape: {segmentation_output_layer.shape}")
+
+
+.. parsed-literal::
+
+    ~~~~ ORIGINAL MODEL ~~~~
+    input shape: [1,3,512,512]
+    output shape: [1,1,512,512]
+    ~~~~ RESHAPED MODEL ~~~~
+    model input shape: [1,3,544,544]
+    compiled_model input shape: [1,3,544,544]
+    compiled_model output shape: [1,1,544,544]
+
+
+The input shape for the segmentation network is [1,3,512,512], with the
+``NCHW`` layout: the network expects 3-channel images with a width and
+height of 512 and a batch size of 1. Reshape the network with the
+``.reshape()`` method of ``IENetwork`` to make it accept input images
+with a width and height of 544. This segmentation network always returns
+arrays with the input width and height of equal value. Therefore,
+setting the input dimensions to 544x544 also modifies the output
+dimensions. After reshaping, compile the network once again.
+
+Change Batch Size
+~~~~~~~~~~~~~~~~~
+
+Use the ``.reshape()`` method to set the batch size, by increasing the
+first element of ``new_shape``. For example, to set a batch size of two,
+set ``new_shape = (2,3,544,544)`` in the cell above.
+
+.. code:: ipython3
+
+    from openvino.runtime import Core, PartialShape
+    
+    ie = Core()
+    segmentation_model_xml = "model/segmentation.xml"
+    segmentation_model = ie.read_model(model=segmentation_model_xml)
+    segmentation_input_layer = segmentation_model.input(0)
+    segmentation_output_layer = segmentation_model.output(0)
+    new_shape = PartialShape([2, 3, 544, 544])
+    segmentation_model.reshape({segmentation_input_layer.any_name: new_shape})
+    segmentation_compiled_model = ie.compile_model(model=segmentation_model, device_name="CPU")
+    
+    print(f"input shape: {segmentation_input_layer.shape}")
+    print(f"output shape: {segmentation_output_layer.shape}")
+
+
+.. parsed-literal::
+
+    input shape: [2,3,544,544]
+    output shape: [2,1,544,544]
+
+
+The output shows that by setting the batch size to 2, the first element
+(``N``) of the input and output shape has a value of 2. Propagate the
+input image through the network to see the result:
+
+.. code:: ipython3
+
+    import numpy as np
+    from openvino.runtime import Core, PartialShape
+    
+    ie = Core()
+    segmentation_model_xml = "model/segmentation.xml"
+    segmentation_model = ie.read_model(model=segmentation_model_xml)
+    segmentation_input_layer = segmentation_model.input(0)
+    segmentation_output_layer = segmentation_model.output(0)
+    new_shape = PartialShape([2, 3, 544, 544])
+    segmentation_model.reshape({segmentation_input_layer.any_name: new_shape})
+    segmentation_compiled_model = ie.compile_model(model=segmentation_model, device_name="CPU")
+    input_data = np.random.rand(2, 3, 544, 544)
+    
+    output = segmentation_compiled_model([input_data])
+    
+    print(f"input data shape: {input_data.shape}")
+    print(f"result data data shape: {segmentation_output_layer.shape}")
+
+
+.. parsed-literal::
+
+    input data shape: (2, 3, 544, 544)
+    result data data shape: [2,1,544,544]
+
+
+Caching a Model
+---------------
+
+For some devices, like GPU, loading a model can take some time. Model
+Caching solves this issue by caching the model in a cache directory. If
+``ie.compile_model(model=net, device_name=device_name, config=config_dict)``
+is set, caching will be used. This option checks if a model exists in
+the cache. If so, it loads it from the cache. If not, it loads the model
+regularly, and stores it in the cache, so that the next time the model
+is loaded when this option is set, the model will be loaded from the
+cache.
+
+In the cell below, we create a *model_cache* directory as a subdirectory
+of *model*, where the model will be cached for the specified device. The
+model will be loaded to the GPU. After running this cell once, the model
+will be cached, so subsequent runs of this cell will load the model from
+the cache.
+
+*Note: Model Caching is also available on CPU devices*
+
+.. code:: ipython3
+
+    import time
+    from pathlib import Path
+    
+    from openvino.runtime import Core
+    
+    ie = Core()
+    
+    device_name = "GPU" 
+    
+    if device_name in ie.available_devices:
+        cache_path = Path("model/model_cache")
+        cache_path.mkdir(exist_ok=True)
+        # Enable caching for OpenVINO Runtime. To disable caching set enable_caching = False
+        enable_caching = True
+        config_dict = {"CACHE_DIR": str(cache_path)} if enable_caching else {}
+    
+        classification_model_xml = "model/classification.xml"
+        model = ie.read_model(model=classification_model_xml)
+    
+        start_time = time.perf_counter()
+        compiled_model = ie.compile_model(model=model, device_name=device_name, config=config_dict)
+        end_time = time.perf_counter()
+        print(f"Loading the network to the {device_name} device took {end_time-start_time:.2f} seconds.")
+
+After running the previous cell, we know the model exists in the cache
+directory. Then, we delete the compiled model and load it again. Now, we
+measure the time it takes now.
+
+.. code:: ipython3
+
+    if device_name in ie.available_devices:
+        del compiled_model
+        start_time = time.perf_counter()
+        compiled_model = ie.compile_model(model=model, device_name=device_name, config=config_dict)
+        end_time = time.perf_counter()
+        print(f"Loading the network to the {device_name} device took {end_time-start_time:.2f} seconds.")
--- a/docs/notebooks/003-hello-segmentation-with-output.rst
+++ b/docs/notebooks/003-hello-segmentation-with-output.rst
@ -0,0 +1,146 @@
+Hello Image Segmentation
+========================
+
+A very basic introduction to using segmentation models with OpenVINO™.
+
+In this tutorial, a pre-trained
+`road-segmentation-adas-0001 <https://docs.openvino.ai/latest/omz_models_model_road_segmentation_adas_0001.html>`__
+model from the `Open Model
+Zoo <https://github.com/openvinotoolkit/open_model_zoo/>`__ is used.
+ADAS stands for Advanced Driver Assistance Services. The model
+recognizes four classes: background, road, curb and mark.
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import cv2
+    import matplotlib.pyplot as plt
+    import numpy as np
+    import sys
+    from openvino.runtime import Core
+    
+    sys.path.append("../utils")
+    from notebook_utils import segmentation_map_to_image
+
+Load the Model
+--------------
+
+.. code:: ipython3
+
+    ie = Core()
+    
+    model = ie.read_model(model="model/road-segmentation-adas-0001.xml")
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+    
+    input_layer_ir = compiled_model.input(0)
+    output_layer_ir = compiled_model.output(0)
+
+Load an Image
+-------------
+
+A sample image from the `Mapillary
+Vistas <https://www.mapillary.com/dataset/vistas>`__ dataset is
+provided.
+
+.. code:: ipython3
+
+    # The segmentation network expects images in BGR format.
+    image = cv2.imread("../data/image/empty_road_mapillary.jpg")
+    
+    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image_h, image_w, _ = image.shape
+    
+    # N,C,H,W = batch size, number of channels, height, width.
+    N, C, H, W = input_layer_ir.shape
+    
+    # OpenCV resize expects the destination size as (width, height).
+    resized_image = cv2.resize(image, (W, H))
+    
+    # Reshape to the network input shape.
+    input_image = np.expand_dims(
+        resized_image.transpose(2, 0, 1), 0
+    )  
+    plt.imshow(rgb_image)
+
+
+
+
+.. parsed-literal::
+
+    <matplotlib.image.AxesImage at 0x7f02b820d640>
+
+
+
+
+.. image:: 003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_6_1.png
+
+
+Do Inference
+------------
+
+.. code:: ipython3
+
+    # Run the inference.
+    result = compiled_model([input_image])[output_layer_ir]
+    
+    # Prepare data for visualization.
+    segmentation_mask = np.argmax(result, axis=1)
+    plt.imshow(segmentation_mask.transpose(1, 2, 0))
+
+
+
+
+.. parsed-literal::
+
+    <matplotlib.image.AxesImage at 0x7f02b80ef6d0>
+
+
+
+
+.. image:: 003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_8_1.png
+
+
+Prepare Data for Visualization
+------------------------------
+
+.. code:: ipython3
+
+    # Define colormap, each color represents a class.
+    colormap = np.array([[68, 1, 84], [48, 103, 141], [53, 183, 120], [199, 216, 52]])
+    
+    # Define the transparency of the segmentation mask on the photo.
+    alpha = 0.3
+    
+    # Use function from notebook_utils.py to transform mask to an RGB image.
+    mask = segmentation_map_to_image(segmentation_mask, colormap)
+    resized_mask = cv2.resize(mask, (image_w, image_h))
+    
+    # Create an image with mask.
+    image_with_mask = cv2.addWeighted(resized_mask, alpha, rgb_image, 1 - alpha, 0)
+
+Visualize data
+--------------
+
+.. code:: ipython3
+
+    # Define titles with images.
+    data = {"Base Photo": rgb_image, "Segmentation": mask, "Masked Photo": image_with_mask}
+    
+    # Create a subplot to visualize images.
+    fig, axs = plt.subplots(1, len(data.items()), figsize=(15, 10))
+    
+    # Fill the subplot.
+    for ax, (name, image) in zip(axs, data.items()):
+        ax.axis('off')
+        ax.set_title(name)
+        ax.imshow(image)
+    
+    # Display an image.
+    plt.show(fig)
+
+
+
+.. image:: 003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_12_0.png
+
--- a/docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_12_0.png
+++ b/docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_12_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6652ac31dc4e31bcb618c0c13795dac83fa49291a436aea8058847f179b4095d
+size 260045
--- a/docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_6_1.png
+++ b/docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_6_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76113c575caa9c8a8aca45d3ec6ebd7a4b513dadffd8e9e63861a7a041d7e5de
+size 249032
--- a/docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_8_1.png
+++ b/docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_8_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9d40b840eea67857e42a78dd89b65799215e806e4f8d23b0523b0c163cb453b
+size 20550
--- a/docs/notebooks/003-hello-segmentation-with-output_files/index.html
+++ b/docs/notebooks/003-hello-segmentation-with-output_files/index.html
@ -0,0 +1,9 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/003-hello-segmentation-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/003-hello-segmentation-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="003-hello-segmentation-with-output_12_0.png">003-hello-segmentation-with-output_12_0.png</a>        30-May-2023 00:08              260045
+<a href="003-hello-segmentation-with-output_6_1.png">003-hello-segmentation-with-output_6_1.png</a>         30-May-2023 00:08              249032
+<a href="003-hello-segmentation-with-output_8_1.png">003-hello-segmentation-with-output_8_1.png</a>         30-May-2023 00:08               20550
+</pre><hr></body>
+</html>
--- a/docs/notebooks/004-hello-detection-with-output.rst
+++ b/docs/notebooks/004-hello-detection-with-output.rst
@ -0,0 +1,137 @@
+Hello Object Detection
+======================
+
+A very basic introduction to using object detection models with
+OpenVINO™.
+
+The
+`horizontal-text-detection-0001 <https://docs.openvino.ai/latest/omz_models_model_horizontal_text_detection_0001.html>`__
+model from `Open Model
+Zoo <https://github.com/openvinotoolkit/open_model_zoo/>`__ is used. It
+detects horizontal text in images and returns a blob of data in the
+shape of ``[100, 5]``. Each detected text box is stored in the
+``[x_min, y_min, x_max, y_max, conf]`` format, where the
+``(x_min, y_min)`` are the coordinates of the top left bounding box
+corner, ``(x_max, y_max)`` are the coordinates of the bottom right
+bounding box corner and ``conf`` is the confidence for the predicted
+class.
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import cv2
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from openvino.runtime import Core
+
+Load the Model
+--------------
+
+.. code:: ipython3
+
+    ie = Core()
+    
+    model = ie.read_model(model="model/horizontal-text-detection-0001.xml")
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+    
+    input_layer_ir = compiled_model.input(0)
+    output_layer_ir = compiled_model.output("boxes")
+
+Load an Image
+-------------
+
+.. code:: ipython3
+
+    # Text detection models expect an image in BGR format.
+    image = cv2.imread("../data/image/intel_rnb.jpg")
+    
+    # N,C,H,W = batch size, number of channels, height, width.
+    N, C, H, W = input_layer_ir.shape
+    
+    # Resize the image to meet network expected input sizes.
+    resized_image = cv2.resize(image, (W, H))
+    
+    # Reshape to the network input shape.
+    input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
+    
+    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));
+
+
+
+.. image:: 004-hello-detection-with-output_files/004-hello-detection-with-output_6_0.png
+
+
+Do Inference
+------------
+
+.. code:: ipython3
+
+    # Create an inference request.
+    boxes = compiled_model([input_image])[output_layer_ir]
+    
+    # Remove zero only boxes.
+    boxes = boxes[~np.all(boxes == 0, axis=1)]
+
+Visualize Results
+-----------------
+
+.. code:: ipython3
+
+    # For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:
+    # The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function
+    def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):
+        # Define colors for boxes and descriptions.
+        colors = {"red": (255, 0, 0), "green": (0, 255, 0)}
+    
+        # Fetch the image shapes to calculate a ratio.
+        (real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2]
+        ratio_x, ratio_y = real_x / resized_x, real_y / resized_y
+    
+        # Convert the base image from BGR to RGB format.
+        rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
+    
+        # Iterate through non-zero boxes.
+        for box in boxes:
+            # Pick a confidence factor from the last place in an array.
+            conf = box[-1]
+            if conf > threshold:
+                # Convert float to int and multiply corner position of each box by x and y ratio.
+                # If the bounding box is found at the top of the image, 
+                # position the upper box bar little lower to make it visible on the image. 
+                (x_min, y_min, x_max, y_max) = [
+                    int(max(corner_position * ratio_y, 10)) if idx % 2 
+                    else int(corner_position * ratio_x)
+                    for idx, corner_position in enumerate(box[:-1])
+                ]
+    
+                # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
+                rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)
+    
+                # Add text to the image based on position and confidence.
+                # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
+                if conf_labels:
+                    rgb_image = cv2.putText(
+                        rgb_image,
+                        f"{conf:.2f}",
+                        (x_min, y_min - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.8,
+                        colors["red"],
+                        1,
+                        cv2.LINE_AA,
+                    )
+    
+        return rgb_image
+
+.. code:: ipython3
+
+    plt.figure(figsize=(10, 6))
+    plt.axis("off")
+    plt.imshow(convert_result_to_image(image, resized_image, boxes, conf_labels=False));
+
+
+
+.. image:: 004-hello-detection-with-output_files/004-hello-detection-with-output_11_0.png
+
--- a/docs/notebooks/004-hello-detection-with-output_files/004-hello-detection-with-output_11_0.png
+++ b/docs/notebooks/004-hello-detection-with-output_files/004-hello-detection-with-output_11_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05843bab5f2457b274c4178bfa173fc55b6403bbae29d10d4a7e91fddc818bd5
+size 457214
--- a/docs/notebooks/004-hello-detection-with-output_files/004-hello-detection-with-output_6_0.png
+++ b/docs/notebooks/004-hello-detection-with-output_files/004-hello-detection-with-output_6_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69513616afb235a4e034b385ef3d3c9ab047e73c0ea58bd8c055e7da01b3bda0
+size 305482
--- a/docs/notebooks/004-hello-detection-with-output_files/index.html
+++ b/docs/notebooks/004-hello-detection-with-output_files/index.html
@ -0,0 +1,8 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/004-hello-detection-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/004-hello-detection-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="004-hello-detection-with-output_11_0.png">004-hello-detection-with-output_11_0.png</a>           30-May-2023 00:09              457214
+<a href="004-hello-detection-with-output_6_0.png">004-hello-detection-with-output_6_0.png</a>            30-May-2023 00:09              305482
+</pre><hr></body>
+</html>
--- a/docs/notebooks/101-tensorflow-to-openvino-with-output.rst
+++ b/docs/notebooks/101-tensorflow-to-openvino-with-output.rst
@ -0,0 +1,273 @@
+Convert a TensorFlow Model to OpenVINO™
+=======================================
+
+This short tutorial shows how to convert a TensorFlow
+`MobileNetV3 <https://docs.openvino.ai/latest/omz_models_model_mobilenet_v3_small_1_0_224_tf.html>`__
+image classification model to OpenVINO `Intermediate
+Representation <https://docs.openvino.ai/latest/openvino_docs_MO_DG_IR_and_opsets.html>`__
+(OpenVINO IR) format, using `Model
+Optimizer <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html>`__.
+After creating the OpenVINO IR, load the model in `OpenVINO
+Runtime <https://docs.openvino.ai/latest/openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide.html>`__
+and do inference with a sample image.
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import time
+    from pathlib import Path
+    
+    import cv2
+    import matplotlib.pyplot as plt
+    import numpy as np
+    import tensorflow as tf
+    from IPython.display import Markdown
+    from openvino.runtime import Core
+
+
+.. parsed-literal::
+
+    2023-05-29 22:25:46.984005: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
+    2023-05-29 22:25:47.018849: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+    To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+    2023-05-29 22:25:47.534965: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
+
+
+Settings
+--------
+
+.. code:: ipython3
+
+    # The paths of the source and converted models.
+    model_dir = Path("model")
+    model_dir.mkdir(exist_ok=True)
+    
+    model_path = Path("model/v3-small_224_1.0_float")
+    
+    ir_path = Path("model/v3-small_224_1.0_float.xml")
+
+Download model
+--------------
+
+Load model using `tf.keras.applications
+api <https://www.tensorflow.org/api_docs/python/tf/keras/applications/MobileNetV3Small>`__
+and save it to the disk.
+
+.. code:: ipython3
+
+    model = tf.keras.applications.MobileNetV3Small()
+    model.save(model_path)
+
+
+.. parsed-literal::
+
+    WARNING:tensorflow:`input_shape` is undefined or non-square, or `rows` is not 224. Weights for input shape (224, 224) will be loaded as the default.
+
+
+.. parsed-literal::
+
+    2023-05-29 22:25:48.396946: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
+    Skipping registering GPU devices...
+
+
+.. parsed-literal::
+
+    WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
+
+
+.. parsed-literal::
+
+    2023-05-29 22:25:52.606419: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,1,1,1024]
+    	 [[{{node inputs}}]]
+    2023-05-29 22:25:55.768588: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,1,1,1024]
+    	 [[{{node inputs}}]]
+    WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 54). These functions will not be directly callable after loading.
+
+
+.. parsed-literal::
+
+    INFO:tensorflow:Assets written to: model/v3-small_224_1.0_float/assets
+
+
+.. parsed-literal::
+
+    INFO:tensorflow:Assets written to: model/v3-small_224_1.0_float/assets
+
+
+Convert a Model to OpenVINO IR Format
+-------------------------------------
+
+Convert a TensorFlow Model to OpenVINO IR Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use Model Optimizer to convert a TensorFlow model to OpenVINO IR with
+``FP16`` precision. The models are saved to the current directory. Add
+mean values to the model and scale the output with the standard
+deviation with ``--scale_values``. With these options, it is not
+necessary to normalize input data before propagating it through the
+network. The original model expects input images in ``RGB`` format. The
+converted model also expects images in ``RGB`` format. If you want the
+converted model to work with ``BGR`` images, use the
+``--reverse-input-channels`` option. For more information about Model
+Optimizer, including a description of the command-line options, see the
+`Model Optimizer Developer
+Guide <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html>`__.
+For information about the model, including input shape, expected color
+order and mean values, refer to the `model
+documentation <https://docs.openvino.ai/latest/omz_models_model_mobilenet_v3_small_1_0_224_tf.html>`__.
+
+First construct the command for Model Optimizer, and then execute this
+command in the notebook by prepending the command with an ``!``. There
+may be some errors or warnings in the output. When model optimization is
+successful, the last lines of the output will include
+``[ SUCCESS ] Generated IR version 11 model.``
+
+.. code:: ipython3
+
+    # Construct the command for Model Optimizer.
+    mo_command = f"""mo
+                     --saved_model_dir "{model_path}"
+                     --input_shape "[1,224,224,3]"
+                     --model_name "{model_path.name}"
+                     --compress_to_fp16
+                     --output_dir "{model_path.parent}"
+                     """
+    mo_command = " ".join(mo_command.split())
+    print("Model Optimizer command to convert TensorFlow to OpenVINO:")
+    display(Markdown(f"`{mo_command}`"))
+
+
+.. parsed-literal::
+
+    Model Optimizer command to convert TensorFlow to OpenVINO:
+
+
+
+``mo --saved_model_dir "model/v3-small_224_1.0_float" --input_shape "[1,224,224,3]" --model_name "v3-small_224_1.0_float" --compress_to_fp16 --output_dir "model"``
+
+
+.. code:: ipython3
+
+    # Run Model Optimizer if the IR model file does not exist
+    if not ir_path.exists():
+        print("Exporting TensorFlow model to IR... This may take a few minutes.")
+        ! $mo_command
+    else:
+        print(f"IR model {ir_path} already exists.")
+
+
+.. parsed-literal::
+
+    Exporting TensorFlow model to IR... This may take a few minutes.
+    Check for a new version of Intel(R) Distribution of OpenVINO(TM) toolkit here https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html?cid=other&source=prod&campid=ww_2023_bu_IOTG_OpenVINO-2022-3&content=upg_all&medium=organic or on https://github.com/openvinotoolkit/openvino
+    [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
+    Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html
+    [ SUCCESS ] Generated IR version 11 model.
+    [ SUCCESS ] XML file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/101-tensorflow-to-openvino/model/v3-small_224_1.0_float.xml
+    [ SUCCESS ] BIN file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/101-tensorflow-to-openvino/model/v3-small_224_1.0_float.bin
+
+
+Test Inference on the Converted Model
+-------------------------------------
+
+Load the Model
+~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    ie = Core()
+    model = ie.read_model(ir_path)
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+
+Get Model Information
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    input_key = compiled_model.input(0)
+    output_key = compiled_model.output(0)
+    network_input_shape = input_key.shape 
+
+Load an Image
+~~~~~~~~~~~~~
+
+Load an image, resize it, and convert it to the input shape of the
+network.
+
+.. code:: ipython3
+
+    # The MobileNet network expects images in RGB format.
+    image = cv2.cvtColor(cv2.imread(filename="../data/image/coco.jpg"), code=cv2.COLOR_BGR2RGB)
+    
+    # Resize the image to the network input shape.
+    resized_image = cv2.resize(src=image, dsize=(224, 224))
+    
+    # Transpose the image to the network input shape.
+    input_image = np.expand_dims(resized_image, 0)
+    
+    plt.imshow(image);
+
+
+
+.. image:: 101-tensorflow-to-openvino-with-output_files/101-tensorflow-to-openvino-with-output_16_0.png
+
+
+Do Inference
+~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    result = compiled_model(input_image)[output_key]
+    
+    result_index = np.argmax(result)
+
+.. code:: ipython3
+
+    # Convert the inference result to a class name.
+    imagenet_classes = open("../data/datasets/imagenet/imagenet_2012.txt").read().splitlines()
+    
+    imagenet_classes[result_index]
+
+
+
+
+.. parsed-literal::
+
+    'n02099267 flat-coated retriever'
+
+
+
+Timing
+------
+
+Measure the time it takes to do inference on thousand images. This gives
+an indication of performance. For more accurate benchmarking, use the
+`Benchmark
+Tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__
+in OpenVINO. Note that many optimizations are possible to improve the
+performance.
+
+.. code:: ipython3
+
+    num_images = 1000
+    
+    start = time.perf_counter()
+    
+    for _ in range(num_images):
+        compiled_model([input_image])
+    
+    end = time.perf_counter()
+    time_ir = end - start
+    
+    print(
+        f"IR model in OpenVINO Runtime/CPU: {time_ir/num_images:.4f} "
+        f"seconds per image, FPS: {num_images/time_ir:.2f}"
+    )
+
+
+.. parsed-literal::
+
+    IR model in OpenVINO Runtime/CPU: 0.0010 seconds per image, FPS: 1032.55
+
--- a/docs/notebooks/101-tensorflow-to-openvino-with-output_files/101-tensorflow-to-openvino-with-output_16_0.png
+++ b/docs/notebooks/101-tensorflow-to-openvino-with-output_files/101-tensorflow-to-openvino-with-output_16_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7511b8a4e5b047600d5fed14fbc7e9653a868bc5253abf1e0c3ef649b47bc408
+size 387941
--- a/docs/notebooks/101-tensorflow-to-openvino-with-output_files/index.html
+++ b/docs/notebooks/101-tensorflow-to-openvino-with-output_files/index.html
@ -0,0 +1,7 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/101-tensorflow-to-openvino-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/101-tensorflow-to-openvino-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="101-tensorflow-to-openvino-with-output_16_0.png">101-tensorflow-to-openvino-with-output_16_0.png</a>    30-May-2023 00:08              387941
+</pre><hr></body>
+</html>
--- a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output.rst
+++ b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output.rst
@ -0,0 +1,502 @@
+Convert a PyTorch Model to ONNX and OpenVINO™ IR
+================================================
+
+This tutorial demonstrates step-by-step instructions on how to do
+inference on a PyTorch semantic segmentation model, using OpenVINO
+Runtime.
+
+First, the PyTorch model is exported in `ONNX <https://onnx.ai/>`__
+format and then converted to OpenVINO IR. Then the respective ONNX and
+OpenVINO IR models are loaded into OpenVINO Runtime to show model
+predictions. In this tutorial, we will use LR-ASPP model with
+MobileNetV3 backbone.
+
+According to the paper, `Searching for
+MobileNetV3 <https://arxiv.org/pdf/1905.02244.pdf>`__, LR-ASPP or Lite
+Reduced Atrous Spatial Pyramid Pooling has a lightweight and efficient
+segmentation decoder architecture. The diagram below illustrates the
+model architecture:
+
+.. figure:: https://user-images.githubusercontent.com/29454499/207099169-48dca3dc-a8eb-4e11-be92-40cebeec7a88.png
+   :alt: image
+
+   image
+
+The model is pre-trained on the `MS
+COCO <https://cocodataset.org/#home>`__ dataset. Instead of training on
+all 80 classes, the segmentation model has been trained on 20 classes
+from the `PASCAL VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`__
+dataset: **background, aeroplane, bicycle, bird, boat, bottle, bus, car,
+cat, chair, cow, dining table, dog, horse, motorbike, person, potted
+plant, sheep, sofa, train, tvmonitor**
+
+More information about the model is available in the `torchvision
+documentation <https://pytorch.org/vision/main/models/lraspp.html>`__
+
+Preparation
+-----------
+
+Imports
+~~~~~~~
+
+.. code:: ipython3
+
+    import sys
+    import time
+    import warnings
+    from pathlib import Path
+    
+    import cv2
+    import numpy as np
+    import torch
+    from IPython.display import Markdown, display
+    from torchvision.models.segmentation import lraspp_mobilenet_v3_large, LRASPP_MobileNet_V3_Large_Weights
+    from openvino.runtime import Core
+    
+    sys.path.append("../utils")
+    from notebook_utils import segmentation_map_to_image, viz_result_image, SegmentationMap, Label, download_file
+
+Settings
+~~~~~~~~
+
+Set a name for the model, then define width and height of the image that
+will be used by the network during inference. According to the input
+transforms function, the model is pre-trained on images with a height of
+520 and width of 780.
+
+.. code:: ipython3
+
+    IMAGE_WIDTH = 780
+    IMAGE_HEIGHT = 520
+    DIRECTORY_NAME = "model"
+    BASE_MODEL_NAME = DIRECTORY_NAME + "/lraspp_mobilenet_v3_large"
+    weights_path = Path(BASE_MODEL_NAME + ".pt")
+    
+    # Paths where ONNX and OpenVINO IR models will be stored.
+    onnx_path = weights_path.with_suffix('.onnx')
+    if not onnx_path.parent.exists():
+        onnx_path.parent.mkdir()
+    ir_path = onnx_path.with_suffix(".xml")
+
+Load Model
+~~~~~~~~~~
+
+Generally, PyTorch models represent an instance of ``torch.nn.Module``
+class, initialized by a state dictionary with model weights. Typical
+steps for getting a pre-trained model: 1. Create instance of model class
+2. Load checkpoint state dict, which contains pre-trained model weights
+3. Turn model to evaluation for switching some operations to inference
+mode
+
+The ``torchvision`` module provides a ready to use set of functions for
+model class initialization. We will use
+``torchvision.models.segmentation.lraspp_mobilenet_v3_large``. You can
+directly pass pre-trained model weights to the model initialization
+function using weights enum
+``LRASPP_MobileNet_V3_Large_Weights.COCO_WITH_VOC_LABELS_V1``. However,
+for demonstration purposes, we will create it separately. Download the
+pre-trained weights and load the model. This may take some time if you
+have not downloaded the model before.
+
+.. code:: ipython3
+
+    print("Downloading the LRASPP MobileNetV3 model (if it has not been downloaded already)...") 
+    download_file(LRASPP_MobileNet_V3_Large_Weights.COCO_WITH_VOC_LABELS_V1.url, filename=weights_path.name, directory=weights_path.parent)
+    # create model object
+    model = lraspp_mobilenet_v3_large()
+    # read state dict, use map_location argument to avoid a situation where weights are saved in cuda (which may not be unavailable on the system)
+    state_dict = torch.load(weights_path, map_location='cpu')
+    # load state dict to model
+    model.load_state_dict(state_dict)
+    # switch model from training to inference mode
+    model.eval()
+    print("Loaded PyTorch LRASPP MobileNetV3 model")
+
+
+.. parsed-literal::
+
+    Downloading the LRASPP MobileNetV3 model (if it has not been downloaded already)...
+
+
+
+.. parsed-literal::
+
+    model/lraspp_mobilenet_v3_large.pt:   0%|          | 0.00/12.5M [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    Loaded PyTorch LRASPP MobileNetV3 model
+
+
+ONNX Model Conversion
+---------------------
+
+Convert PyTorch model to ONNX
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+OpenVINO supports PyTorch models that are exported in ONNX format. We
+will use the ``torch.onnx.export`` function to obtain the ONNX model,
+you can learn more about this feature in the `PyTorch
+documentation <https://pytorch.org/docs/stable/onnx.html>`__. We need to
+provide a model object, example input for model tracing and path where
+the model will be saved. When providing example input, it is not
+necessary to use real data, dummy input data with specified shape is
+sufficient. Optionally, we can provide a target onnx opset for
+conversion and/or other parameters specified in documentation
+(e.g. input and output names or dynamic shapes).
+
+Sometimes a warning will be shown, but in most cases it is harmless, so
+let us just filter it out. When the conversion is successful, the last
+line of the output will read:
+``ONNX model exported to model/lraspp_mobilenet_v3_large.onnx.``
+
+.. code:: ipython3
+
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore")
+        if not onnx_path.exists():
+            dummy_input = torch.randn(1, 3, IMAGE_HEIGHT, IMAGE_WIDTH)
+            torch.onnx.export(
+                model,
+                dummy_input,
+                onnx_path,
+            )
+            print(f"ONNX model exported to {onnx_path}.")
+        else:
+            print(f"ONNX model {onnx_path} already exists.")
+
+
+.. parsed-literal::
+
+    ONNX model exported to model/lraspp_mobilenet_v3_large.onnx.
+
+
+Convert ONNX Model to OpenVINO IR Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use Model Optimizer to convert the ONNX model to OpenVINO IR with
+``FP16`` precision. The models are saved inside the current directory.
+For more information about Model Optimizer, see the `Model Optimizer
+Developer
+Guide <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html>`__.
+
+Executing this command may take a while. There may be some errors or
+warnings in the output. When Model Optimization is successful, the last
+lines of the output will include:
+``[ SUCCESS ] Generated IR version 11 model.``
+
+.. code:: ipython3
+
+    # Construct the command for Model Optimizer.
+    mo_command = f"""mo
+                     --input_model "{onnx_path}"
+                     --compress_to_fp16
+                     --output_dir "{ir_path.parent}"
+                     """
+    mo_command = " ".join(mo_command.split())
+    print("Model Optimizer command to convert the ONNX model to OpenVINO:")
+    display(Markdown(f"`{mo_command}`"))
+
+
+.. parsed-literal::
+
+    Model Optimizer command to convert the ONNX model to OpenVINO:
+
+
+
+``mo --input_model "model/lraspp_mobilenet_v3_large.onnx" --compress_to_fp16 --output_dir "model"``
+
+
+.. code:: ipython3
+
+    if not ir_path.exists():
+        print("Exporting ONNX model to IR... This may take a few minutes.")
+        mo_result = %sx $mo_command
+        print("\n".join(mo_result))
+    else:
+        print(f"IR model {ir_path} already exists.")
+
+
+.. parsed-literal::
+
+    Exporting ONNX model to IR... This may take a few minutes.
+    Check for a new version of Intel(R) Distribution of OpenVINO(TM) toolkit here https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html?cid=other&source=prod&campid=ww_2023_bu_IOTG_OpenVINO-2022-3&content=upg_all&medium=organic or on https://github.com/openvinotoolkit/openvino
+    [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
+    Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html
+    [ SUCCESS ] Generated IR version 11 model.
+    [ SUCCESS ] XML file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/102-pytorch-onnx-to-openvino/model/lraspp_mobilenet_v3_large.xml
+    [ SUCCESS ] BIN file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/102-pytorch-onnx-to-openvino/model/lraspp_mobilenet_v3_large.bin
+
+
+Show Results
+------------
+
+Confirm that the segmentation results look as expected by comparing
+model predictions on the ONNX, OpenVINO IR and PyTorch models.
+
+Load and Preprocess an Input Image
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Images need to be normalized before propagating through the network.
+
+.. code:: ipython3
+
+    def normalize(image: np.ndarray) -> np.ndarray:
+        """
+        Normalize the image to the given mean and standard deviation
+        for CityScapes models.
+        """
+        image = image.astype(np.float32)
+        mean = (0.485, 0.456, 0.406)
+        std = (0.229, 0.224, 0.225)
+        image /= 255.0
+        image -= mean
+        image /= std
+        return image
+
+.. code:: ipython3
+
+    image_filename = "../data/image/coco.jpg"
+    image = cv2.cvtColor(cv2.imread(image_filename), cv2.COLOR_BGR2RGB)
+    
+    resized_image = cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT))
+    normalized_image = normalize(resized_image)
+    
+    # Convert the resized images to network input shape.
+    input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0)
+    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)
+
+Load the OpenVINO IR Network and Run Inference on the ONNX model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+OpenVINO Runtime can load ONNX models directly. First, load the ONNX
+model, do inference and show the results. Then, load the model that was
+converted to OpenVINO Intermediate Representation (OpenVINO IR) with
+Model Optimizer and do inference on that model, and show the results on
+an image.
+
+1. ONNX Model in OpenVINO Runtime
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code:: ipython3
+
+    # Load the network to OpenVINO Runtime.
+    ie = Core()
+    model_onnx = ie.read_model(model=onnx_path)
+    compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU")
+    
+    output_layer_onnx = compiled_model_onnx.output(0)
+    
+    # Run inference on the input image.
+    res_onnx = compiled_model_onnx([normalized_input_image])[output_layer_onnx]
+
+Model predicts probabilities for how well each pixel corresponds to a
+specific label. To get the label with highest probability for each
+pixel, operation argmax should be applied. After that, color coding can
+be applied to each label for more convenient visualization.
+
+.. code:: ipython3
+
+    voc_labels = [
+        Label(index=0, color=(0, 0, 0), name="background"),
+        Label(index=1, color=(128, 0, 0), name="aeroplane"),
+        Label(index=2, color=(0, 128, 0), name="bicycle"),
+        Label(index=3, color=(128, 128, 0), name="bird"),
+        Label(index=4, color=(0, 0, 128), name="boat"),
+        Label(index=5, color=(128, 0, 128), name="bottle"),
+        Label(index=6, color=(0, 128, 128), name="bus"),
+        Label(index=7, color=(128, 128, 128), name="car"),
+        Label(index=8, color=(64, 0, 0), name="cat"),
+        Label(index=9, color=(192, 0, 0), name="chair"),
+        Label(index=10, color=(64, 128, 0), name="cow"),
+        Label(index=11, color=(192, 128, 0), name="dining table"),
+        Label(index=12, color=(64, 0, 128), name="dog"),
+        Label(index=13, color=(192, 0, 128), name="horse"),
+        Label(index=14, color=(64, 128, 128), name="motorbike"),
+        Label(index=15, color=(192, 128, 128), name="person"),
+        Label(index=16, color=(0, 64, 0), name="potted plant"),
+        Label(index=17, color=(128, 64, 0), name="sheep"),
+        Label(index=18, color=(0, 192, 0), name="sofa"),
+        Label(index=19, color=(128, 192, 0), name="train"),
+        Label(index=20, color=(0, 64, 128), name="tv monitor")
+    ]
+    VOCLabels = SegmentationMap(voc_labels)
+    
+    # Convert the network result to a segmentation map and display the result.
+    result_mask_onnx = np.squeeze(np.argmax(res_onnx, axis=1)).astype(np.uint8)
+    viz_result_image(
+        image,
+        segmentation_map_to_image(result_mask_onnx, VOCLabels.get_colormap()),
+        resize=True,
+    )
+
+
+
+
+.. image:: 102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_20_0.png
+
+
+
+2. OpenVINO IR Model in OpenVINO Runtime
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code:: ipython3
+
+    # Load the network in OpenVINO Runtime.
+    ie = Core()
+    model_ir = ie.read_model(model=ir_path)
+    compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")
+    
+    # Get input and output layers.
+    output_layer_ir = compiled_model_ir.output(0)
+    
+    # Run inference on the input image.
+    res_ir = compiled_model_ir([normalized_input_image])[output_layer_ir]
+
+.. code:: ipython3
+
+    result_mask_ir = np.squeeze(np.argmax(res_ir, axis=1)).astype(np.uint8)
+    viz_result_image(
+        image,
+        segmentation_map_to_image(result=result_mask_ir, colormap=VOCLabels.get_colormap()),
+        resize=True,
+    )
+
+
+
+
+.. image:: 102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_23_0.png
+
+
+
+PyTorch Comparison
+------------------
+
+Do inference on the PyTorch model to verify that the output visually
+looks the same as the output on the ONNX/OpenVINO IR models.
+
+.. code:: ipython3
+
+    model.eval()
+    with torch.no_grad():
+        result_torch = model(torch.as_tensor(normalized_input_image).float())
+    
+    result_mask_torch = torch.argmax(result_torch['out'], dim=1).squeeze(0).numpy().astype(np.uint8)
+    viz_result_image(
+        image,
+        segmentation_map_to_image(result=result_mask_torch, colormap=VOCLabels.get_colormap()),
+        resize=True,
+    )
+
+
+
+
+.. image:: 102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_25_0.png
+
+
+
+Performance Comparison
+----------------------
+
+Measure the time it takes to do inference on twenty images. This gives
+an indication of performance. For more accurate benchmarking, use the
+`Benchmark
+Tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__.
+Keep in mind that many optimizations are possible to improve the
+performance.
+
+.. code:: ipython3
+
+    num_images = 100
+    
+    with torch.no_grad():
+        start = time.perf_counter()
+        for _ in range(num_images):
+            model(torch.as_tensor(input_image).float())
+        end = time.perf_counter()
+        time_torch = end - start
+    print(
+        f"PyTorch model on CPU: {time_torch/num_images:.3f} seconds per image, "
+        f"FPS: {num_images/time_torch:.2f}"
+    )
+    
+    start = time.perf_counter()
+    for _ in range(num_images):
+        compiled_model_onnx([normalized_input_image])
+    end = time.perf_counter()
+    time_onnx = end - start
+    print(
+        f"ONNX model in OpenVINO Runtime/CPU: {time_onnx/num_images:.3f} "
+        f"seconds per image, FPS: {num_images/time_onnx:.2f}"
+    )
+    
+    start = time.perf_counter()
+    for _ in range(num_images):
+        compiled_model_ir([input_image])
+    end = time.perf_counter()
+    time_ir = end - start
+    print(
+        f"OpenVINO IR model in OpenVINO Runtime/CPU: {time_ir/num_images:.3f} "
+        f"seconds per image, FPS: {num_images/time_ir:.2f}"
+    )
+    
+    if "GPU" in ie.available_devices:
+        compiled_model_onnx_gpu = ie.compile_model(model=model_onnx, device_name="GPU")
+        start = time.perf_counter()
+        for _ in range(num_images):
+            compiled_model_onnx_gpu([input_image])
+        end = time.perf_counter()
+        time_onnx_gpu = end - start
+        print(
+            f"ONNX model in OpenVINO/GPU: {time_onnx_gpu/num_images:.3f} "
+            f"seconds per image, FPS: {num_images/time_onnx_gpu:.2f}"
+        )
+    
+        compiled_model_ir_gpu = ie.compile_model(model=model_ir, device_name="GPU")
+        start = time.perf_counter()
+        for _ in range(num_images):
+            compiled_model_ir_gpu([input_image])
+        end = time.perf_counter()
+        time_ir_gpu = end - start
+        print(
+            f"IR model in OpenVINO/GPU: {time_ir_gpu/num_images:.3f} "
+            f"seconds per image, FPS: {num_images/time_ir_gpu:.2f}"
+        )
+
+
+.. parsed-literal::
+
+    PyTorch model on CPU: 0.039 seconds per image, FPS: 25.80
+    ONNX model in OpenVINO Runtime/CPU: 0.031 seconds per image, FPS: 32.10
+    OpenVINO IR model in OpenVINO Runtime/CPU: 0.031 seconds per image, FPS: 32.29
+
+
+**Show Device Information**
+
+.. code:: ipython3
+
+    devices = ie.available_devices
+    for device in devices:
+        device_name = ie.get_property(device, "FULL_DEVICE_NAME")
+        print(f"{device}: {device_name}")
+
+
+.. parsed-literal::
+
+    CPU: Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz
+
+
+References
+----------
+
+-  `Torchvision <https://pytorch.org/vision/stable/index.html>`__
+-  `Pytorch ONNX
+   Documentation <https://pytorch.org/docs/stable/onnx.html>`__
+-  `PIP install openvino-dev <https://pypi.org/project/openvino-dev/>`__
+-  `OpenVINO ONNX
+   support <https://docs.openvino.ai/2021.4/openvino_docs_IE_DG_ONNX_Support.html>`__
+-  `Model Optimizer
+   Documentation <https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html>`__
+-  `Model Optimizer Pytorch conversion
+   guide <https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch.html>`__
--- a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_20_0.png
+++ b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_20_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c48d5afdee0d49838332deac5fea09e48ea455371a5bc88fd865b070fcffbc3
+size 465692
--- a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_23_0.png
+++ b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_23_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb50dbc3d8a370e69c72cedea217cb413f04852f64580e0c7b54aa495b6c5ad2
+size 465695
--- a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_25_0.png
+++ b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/102-pytorch-onnx-to-openvino-with-output_25_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c48d5afdee0d49838332deac5fea09e48ea455371a5bc88fd865b070fcffbc3
+size 465692
--- a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/index.html
+++ b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/index.html
@ -0,0 +1,9 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/102-pytorch-onnx-to-openvino-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/102-pytorch-onnx-to-openvino-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="102-pytorch-onnx-to-openvino-with-output_20_0.png">102-pytorch-onnx-to-openvino-with-output_20_0.png</a>  30-May-2023 00:09              465692
+<a href="102-pytorch-onnx-to-openvino-with-output_23_0.png">102-pytorch-onnx-to-openvino-with-output_23_0.png</a>  30-May-2023 00:09              465695
+<a href="102-pytorch-onnx-to-openvino-with-output_25_0.png">102-pytorch-onnx-to-openvino-with-output_25_0.png</a>  30-May-2023 00:09              465692
+</pre><hr></body>
+</html>
--- a/docs/notebooks/103-paddle-to-openvino-classification-with-output.rst
+++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output.rst
@ -0,0 +1,464 @@
+Convert a PaddlePaddle Model to OpenVINO™ IR
+============================================
+
+This notebook shows how to convert a MobileNetV3 model from
+`PaddleHub <https://github.com/PaddlePaddle/PaddleHub>`__, pre-trained
+on the `ImageNet <https://www.image-net.org>`__ dataset, to OpenVINO IR.
+It also shows how to perform classification inference on a sample image,
+using `OpenVINO
+Runtime <https://docs.openvino.ai/latest/openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide.html>`__
+and compares the results of the
+`PaddlePaddle <https://github.com/PaddlePaddle/Paddle>`__ model with the
+IR model.
+
+Source of the
+`model <https://www.paddlepaddle.org.cn/hubdetail?name=mobilenet_v3_large_imagenet_ssld&en_category=ImageClassification>`__.
+
+Preparation
+-----------
+
+Imports
+~~~~~~~
+
+.. code:: ipython3
+
+    !pip install -q "paddlepaddle==2.5.0rc0"
+
+.. code:: ipython3
+
+    !pip install -q paddleclas --no-deps
+    !pip install -q "prettytable" "ujson" "visualdl>=2.2.0" "faiss-cpu>=1.7.1"
+
+
+.. parsed-literal::
+
+    ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
+    paddleclas 2.5.1 requires easydict, which is not installed.
+    paddleclas 2.5.1 requires faiss-cpu==1.7.1.post2, but you have faiss-cpu 1.7.4 which is incompatible.
+    paddleclas 2.5.1 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible.
+    
+
+.. code:: ipython3
+
+    import time
+    import tarfile
+    from pathlib import Path
+    import sys
+    
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from paddleclas import PaddleClas
+    from IPython.display import Markdown, display
+    from PIL import Image
+    from openvino.runtime import Core
+    sys.path.append("../utils")
+    from notebook_utils import download_file
+
+
+.. parsed-literal::
+
+    2023-05-29 22:27:09 INFO: Loading faiss with AVX2 support.
+    2023-05-29 22:27:09 INFO: Successfully loaded faiss with AVX2 support.
+
+
+Settings
+~~~~~~~~
+
+Set ``IMAGE_FILENAME`` to the filename of an image to use. Set
+``MODEL_NAME`` to the PaddlePaddle model to download from PaddleHub.
+``MODEL_NAME`` will also be the base name for the IR model. The notebook
+is tested with the
+`mobilenet_v3_large_x1_0 <https://github.com/PaddlePaddle/PaddleClas/blob/release/2.5/docs/en/models/Mobile_en.md>`__
+model. Other models may use different preprocessing methods and
+therefore require some modification to get the same results on the
+original and converted model.
+
+First of all, we need to download and unpack model files. The first time
+you run this notebook, the PaddlePaddle model is downloaded from
+PaddleHub. This may take a while.
+
+.. code:: ipython3
+
+    IMAGE_FILENAME = "../data/image/coco_close.png"
+    
+    MODEL_NAME = "MobileNetV3_large_x1_0"
+    MODEL_DIR = Path("model")
+    if not MODEL_DIR.exists():
+        MODEL_DIR.mkdir()
+    MODEL_URL = 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/{}_infer.tar'.format(MODEL_NAME)
+    download_file(MODEL_URL, directory=MODEL_DIR)
+    file = tarfile.open(MODEL_DIR / '{}_infer.tar'.format(MODEL_NAME))
+    res = file.extractall(MODEL_DIR)
+    if not res:
+        print(f"Model Extracted to \"./{MODEL_DIR}\".")
+    else:
+        print("Error Extracting the model. Please check the network.")
+
+
+
+.. parsed-literal::
+
+    model/MobileNetV3_large_x1_0_infer.tar:   0%|          | 0.00/19.5M [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    Model Extracted to "./model".
+
+
+Show Inference on PaddlePaddle Model
+------------------------------------
+
+In the next cell, we load the model, load and display an image, do
+inference on that image, and then show the top three prediction results.
+
+.. code:: ipython3
+
+    classifier = PaddleClas(inference_model_dir=MODEL_DIR / '{}_infer'.format(MODEL_NAME))
+    result = next(classifier.predict(IMAGE_FILENAME))
+    class_names = result[0]['label_names']
+    scores = result[0]['scores']
+    image = Image.open(IMAGE_FILENAME)
+    plt.imshow(image)
+    for class_name, softmax_probability in zip(class_names, scores):
+        print(f"{class_name}, {softmax_probability:.5f}")
+
+
+.. parsed-literal::
+
+    [2023/05/29 22:27:44] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead.
+
+
+.. parsed-literal::
+
+    W0529 22:27:44.770324 2561117 analysis_config.cc:971] It is detected that mkldnn and memory_optimize_pass are enabled at the same time, but they are not supported yet. Currently, memory_optimize_pass is explicitly disabled
+
+
+.. parsed-literal::
+
+    Labrador retriever, 0.75138
+    German short-haired pointer, 0.02373
+    Great Dane, 0.01848
+    Rottweiler, 0.01435
+    flat-coated retriever, 0.01144
+
+
+
+.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_8_3.png
+
+
+``classifier.predict()`` takes an image file name, reads the image,
+preprocesses the input, then returns the class labels and scores of the
+image. Preprocessing the image is done behind the scenes. The
+classification model returns an array with floating point values for
+each of the 1000 ImageNet classes. The higher the value, the more
+confident the network is that the class number corresponding to that
+value (the index of that value in the network output array) is the class
+number for the image.
+
+To see PaddlePaddle’s implementation for the classification function and
+for loading and preprocessing data, uncomment the next two cells.
+
+.. code:: ipython3
+
+    # classifier??
+
+.. code:: ipython3
+
+    # classifier.get_config()
+
+The ``classifier.get_config()`` module shows the preprocessing
+configuration for the model. It should show that images are normalized,
+resized and cropped, and that the BGR image is converted to RGB before
+propagating it through the network. In the next cell, we get the
+``classifier.predictror.preprocess_ops`` property that returns list of
+preprocessing operations to do inference on the OpenVINO IR model using
+the same method.
+
+.. code:: ipython3
+
+    preprocess_ops = classifier.predictor.preprocess_ops
+    
+    
+    def process_image(image):
+        for op in preprocess_ops:
+            image = op(image)
+        return image
+
+It is useful to show the output of the ``process_image()`` function, to
+see the effect of cropping and resizing. Because of the normalization,
+the colors will look strange, and matplotlib will warn about clipping
+values.
+
+.. code:: ipython3
+
+    pil_image = Image.open(IMAGE_FILENAME)
+    processed_image = process_image(np.array(pil_image))
+    print(f"Processed image shape: {processed_image.shape}")
+    # Processed image is in (C,H,W) format, convert to (H,W,C) to show the image
+    plt.imshow(np.transpose(processed_image, (1, 2, 0)))
+
+
+.. parsed-literal::
+
+    2023-05-29 22:27:45 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
+
+
+.. parsed-literal::
+
+    Processed image shape: (3, 224, 224)
+
+
+
+
+.. parsed-literal::
+
+    <matplotlib.image.AxesImage at 0x7f68a9799eb0>
+
+
+
+
+.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_15_3.png
+
+
+To decode the labels predicted by the model to names of classes, we need
+to have a mapping between them. The model config contains information
+about ``class_id_map_file``, which stores such mapping. The code below
+shows how to parse the mapping into a dictionary to use with the
+OpenVINO model.
+
+.. code:: ipython3
+
+    class_id_map_file = classifier.get_config()['PostProcess']['Topk']['class_id_map_file']
+    class_id_map = {}
+    with open(class_id_map_file, "r") as fin:
+        lines = fin.readlines()
+        for line in lines:
+            partition = line.split("\n")[0].partition(" ")
+            class_id_map[int(partition[0])] = str(partition[-1])
+
+Convert the Model to OpenVINO IR Format
+---------------------------------------
+
+Call the OpenVINO Model Optimizer tool to convert the PaddlePaddle model
+to OpenVINO IR, with FP32 precision. The models are saved to the current
+directory. You can add the mean values to the model with
+``--mean_values`` and scale the output with the standard deviation with
+``--scale_values``. With these options, it is not necessary to normalize
+input data before propagating it through the network. However, to get
+the exact same output as the PaddlePaddle model, it is necessary to
+preprocess in the image in the same way. Therefore, for this tutorial,
+you do not add the mean and scale values to the model, and you use the
+``process_image`` function, as described in the previous section, to
+ensure that both the IR and the PaddlePaddle model use the same
+preprocessing methods. It is explained how to get the mean and scale
+values of the PaddleGAN model, so you can add them to the Model
+Optimizer command if you want. See the `PyTorch/ONNX to
+OpenVINO <102-pytorch-onnx-to-openvino-with-output.html>`__
+notebook, where these options are used.
+
+Run ``! mo --help`` in a code cell to show an overview of command line
+options for Model Optimizer. See the `Model Optimizer Developer
+Guide <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html>`__
+for more information about Model Optimizer.
+
+In the next cell, we first construct the command for Model Optimizer,
+and then execute this command in the notebook by prepending the command
+with a ``!``. When Model Optimization is successful, the last lines of
+the output include ``[ SUCCESS ] Generated IR version 11 model``.
+
+.. code:: ipython3
+
+    model_xml = Path(MODEL_NAME).with_suffix('.xml')
+    if not model_xml.exists():
+        mo_command = f'mo --input_model model/MobileNetV3_large_x1_0_infer/inference.pdmodel --model_name {MODEL_NAME}'
+        display(Markdown(f"Model Optimizer command to convert the ONNX model to IR: `{mo_command}`"))
+        display(Markdown("_Converting model to IR. This may take a few minutes..._"))
+        ! $mo_command
+    else:
+        print(f"{model_xml} already exists.")
+
+
+
+Model Optimizer command to convert the ONNX model to IR:
+``mo --input_model model/MobileNetV3_large_x1_0_infer/inference.pdmodel --model_name MobileNetV3_large_x1_0``
+
+
+
+*Converting model to IR. This may take a few minutes…*
+
+
+.. parsed-literal::
+
+    Check for a new version of Intel(R) Distribution of OpenVINO(TM) toolkit here https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html?cid=other&source=prod&campid=ww_2023_bu_IOTG_OpenVINO-2022-3&content=upg_all&medium=organic or on https://github.com/openvinotoolkit/openvino
+    [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
+    Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html
+    [ SUCCESS ] Generated IR version 11 model.
+    [ SUCCESS ] XML file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/103-paddle-to-openvino/MobileNetV3_large_x1_0.xml
+    [ SUCCESS ] BIN file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/103-paddle-to-openvino/MobileNetV3_large_x1_0.bin
+
+
+Show Inference on OpenVINO Model
+--------------------------------
+
+Load the IR model, get model information, load the image, do inference,
+convert the inference to a meaningful result, and show the output. See
+the `OpenVINO Runtime API
+Notebook <002-openvino-api-with-output.html>`__ for more
+information.
+
+.. code:: ipython3
+
+    # Load OpenVINO Runtime and OpenVINO IR model
+    ie = Core()
+    model = ie.read_model(model_xml)
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+    
+    # Get model output
+    output_layer = compiled_model.output(0)
+    
+    # Read, show, and preprocess input image
+    # See the "Show Inference on PaddlePaddle Model" section for source of process_image
+    image = Image.open(IMAGE_FILENAME)
+    plt.imshow(image)
+    input_image = process_image(np.array(image))[None,]
+    
+    # Do inference
+    ie_result = compiled_model([input_image])[output_layer][0]
+    
+    # find the top three values
+    top_indices = np.argsort(ie_result)[-3:][::-1]
+    top_scores = ie_result[top_indices]
+    
+    # Convert the inference results to class names, using the same labels as the PaddlePaddle classifier
+    for index, softmax_probability in zip(top_indices, top_scores):
+        print(f"{class_id_map[index]}, {softmax_probability:.5f}")
+
+
+.. parsed-literal::
+
+    Labrador retriever, 0.75138
+    German short-haired pointer, 0.02373
+    Great Dane, 0.01848
+
+
+
+.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_21_1.png
+
+
+Timing and Comparison
+---------------------
+
+Measure the time it takes to do inference on fifty images and compare
+the result. The timing information gives an indication of performance.
+For a fair comparison, we include the time it takes to process the
+image. For more accurate benchmarking, use the `OpenVINO benchmark
+tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__.
+Note that many optimizations are possible to improve the performance.
+
+.. code:: ipython3
+
+    num_images = 50
+    
+    image = Image.open(fp=IMAGE_FILENAME)
+
+.. code:: ipython3
+
+    # Show CPU information
+    ie = Core()
+    print(f"CPU: {ie.get_property('CPU', 'FULL_DEVICE_NAME')}")
+
+
+.. parsed-literal::
+
+    CPU: Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz
+
+
+.. code:: ipython3
+
+    # Show inference speed on PaddlePaddle model
+    start = time.perf_counter()
+    for _ in range(num_images):
+        result = next(classifier.predict(np.array(image)))
+    end = time.perf_counter()
+    time_ir = end - start
+    print(
+        f"PaddlePaddle model on CPU: {time_ir/num_images:.4f} "
+        f"seconds per image, FPS: {num_images/time_ir:.2f}\n"
+    )
+    print("PaddlePaddle result:")
+    class_names = result[0]['label_names']
+    scores = result[0]['scores']
+    for class_name, softmax_probability in zip(class_names, scores):
+        print(f"{class_name}, {softmax_probability:.5f}")
+    plt.imshow(image);
+
+
+.. parsed-literal::
+
+    PaddlePaddle model on CPU: 0.0067 seconds per image, FPS: 148.81
+    
+    PaddlePaddle result:
+    Labrador retriever, 0.75138
+    German short-haired pointer, 0.02373
+    Great Dane, 0.01848
+    Rottweiler, 0.01435
+    flat-coated retriever, 0.01144
+
+
+
+.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_25_1.png
+
+
+.. code:: ipython3
+
+    # Show inference speed on OpenVINO IR model
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+    output_layer = compiled_model.output(0)
+    
+    
+    start = time.perf_counter()
+    input_image = process_image(np.array(image))[None,]
+    for _ in range(num_images):
+        ie_result = compiled_model([input_image])[output_layer][0]
+        top_indices = np.argsort(ie_result)[-5:][::-1]
+        top_softmax = ie_result[top_indices]
+    
+    end = time.perf_counter()
+    time_ir = end - start
+    
+    print(
+        f"OpenVINO IR model in OpenVINO Runtime (CPU): {time_ir/num_images:.4f} "
+        f"seconds per image, FPS: {num_images/time_ir:.2f}"
+    )
+    print()
+    print("OpenVINO result:")
+    for index, softmax_probability in zip(top_indices, top_softmax):
+        print(f"{class_id_map[index]}, {softmax_probability:.5f}")
+    plt.imshow(image);
+
+
+.. parsed-literal::
+
+    OpenVINO IR model in OpenVINO Runtime (CPU): 0.0029 seconds per image, FPS: 342.28
+    
+    OpenVINO result:
+    Labrador retriever, 0.75138
+    German short-haired pointer, 0.02373
+    Great Dane, 0.01848
+    Rottweiler, 0.01435
+    flat-coated retriever, 0.01144
+
+
+
+.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_26_1.png
+
+
+References
+----------
+
+-  `PaddleClas <https://github.com/PaddlePaddle/PaddleClas>`__
+-  `OpenVINO PaddlePaddle
+   support <https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle.html>`__
+-  `OpenVINO Model Optimizer
+   Documentation <https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html>`__
--- a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_15_3.png
+++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_15_3.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:306bb006db6e8ef4b71e12f1007cf62a782408867b1c5e2af981d0dcde7d50e7
+size 120883
--- a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_21_1.png
+++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_21_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaa7ff0a118fe7ac95479e0467f34f793d1013d972c5c850c610e39f6983ee3c
+size 224886
--- a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_25_1.png
+++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_25_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaa7ff0a118fe7ac95479e0467f34f793d1013d972c5c850c610e39f6983ee3c
+size 224886
--- a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_26_1.png
+++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_26_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaa7ff0a118fe7ac95479e0467f34f793d1013d972c5c850c610e39f6983ee3c
+size 224886
--- a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_8_3.png
+++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_8_3.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaa7ff0a118fe7ac95479e0467f34f793d1013d972c5c850c610e39f6983ee3c
+size 224886
--- a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/index.html
+++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/index.html
@ -0,0 +1,11 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/103-paddle-to-openvino-classification-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/103-paddle-to-openvino-classification-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="103-paddle-to-openvino-classification-with-output_15_3.png">103-paddle-to-openvino-classification-with-outp..&gt;</a> 30-May-2023 00:09              120883
+<a href="103-paddle-to-openvino-classification-with-output_21_1.png">103-paddle-to-openvino-classification-with-outp..&gt;</a> 30-May-2023 00:09              224886
+<a href="103-paddle-to-openvino-classification-with-output_25_1.png">103-paddle-to-openvino-classification-with-outp..&gt;</a> 30-May-2023 00:09              224886
+<a href="103-paddle-to-openvino-classification-with-output_26_1.png">103-paddle-to-openvino-classification-with-outp..&gt;</a> 30-May-2023 00:09              224886
+<a href="103-paddle-to-openvino-classification-with-output_8_3.png">103-paddle-to-openvino-classification-with-outp..&gt;</a> 30-May-2023 00:09              224886
+</pre><hr></body>
+</html>
--- a/docs/notebooks/104-model-tools-with-output.rst
+++ b/docs/notebooks/104-model-tools-with-output.rst
@ -0,0 +1,538 @@
+Working with Open Model Zoo Models
+==================================
+
+This tutorial shows how to download a model from `Open Model
+Zoo <https://github.com/openvinotoolkit/open_model_zoo>`__, convert it
+to OpenVINO™ IR format, show information about the model, and benchmark
+the model.
+
+OpenVINO and Open Model Zoo Tools
+---------------------------------
+
+OpenVINO and Open Model Zoo tools are listed in the table below.
+
+------------+--------------+-----------------------------------------+
+| Tool       | Command      | Description                             |
+============+==============+=========================================+
+| Model      | omz_download | Download models from Open Model Zoo.    |
+| Downloader | er           |                                         |
+------------+--------------+-----------------------------------------+
+| Model      | omz_converte | Convert Open Model Zoo models to        |
+| Converter  | r            | OpenVINO’s IR format.                   |
+------------+--------------+-----------------------------------------+
+| Info       | omz_info_dum | Print information about Open Model Zoo  |
+| Dumper     | per          | models.                                 |
+------------+--------------+-----------------------------------------+
+| Benchmark  | benchmark_ap | Benchmark model performance by          |
+| Tool       | p            | computing inference time.               |
+------------+--------------+-----------------------------------------+
+
+Preparation
+-----------
+
+Model Name
+~~~~~~~~~~
+
+Set ``model_name`` to the name of the Open Model Zoo model to use in
+this notebook. Refer to the list of
+`public <https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/public/index.md>`__
+and
+`Intel <https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/intel/index.md>`__
+pre-trained models for a full list of models that can be used. Set
+``model_name`` to the model you want to use.
+
+.. code:: ipython3
+
+    # model_name = "resnet-50-pytorch"
+    model_name = "mobilenet-v2-pytorch"
+
+Imports
+~~~~~~~
+
+.. code:: ipython3
+
+    import json
+    import sys
+    from pathlib import Path
+    
+    from IPython.display import Markdown, display
+    from openvino.runtime import Core
+    
+    sys.path.append("../utils")
+    from notebook_utils import DeviceNotFoundAlert, NotebookAlert
+
+Settings and Configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Set the file and directory paths. By default, this notebook downloads
+models from Open Model Zoo to the ``open_model_zoo_models`` directory in
+your ``$HOME`` directory. On Windows, the $HOME directory is usually
+``c:\users\username``, on Linux ``/home/username``. To change the
+folder, change ``base_model_dir`` in the cell below.
+
+The following settings can be changed:
+
+-  ``base_model_dir``: Models will be downloaded into the ``intel`` and
+   ``public`` folders in this directory.
+-  ``omz_cache_dir``: Cache folder for Open Model Zoo. Specifying a
+   cache directory is not required for Model Downloader and Model
+   Converter, but it speeds up subsequent downloads.
+-  ``precision``: If specified, only models with this precision will be
+   downloaded and converted.
+
+.. code:: ipython3
+
+    base_model_dir = Path("model")
+    omz_cache_dir = Path("cache")
+    precision = "FP16"
+    
+    # Check if an iGPU is available on this system to use with Benchmark App.
+    ie = Core()
+    gpu_available = "GPU" in ie.available_devices
+    
+    print(
+        f"base_model_dir: {base_model_dir}, omz_cache_dir: {omz_cache_dir}, gpu_availble: {gpu_available}"
+    )
+
+
+.. parsed-literal::
+
+    base_model_dir: model, omz_cache_dir: cache, gpu_availble: False
+
+
+Download a Model from Open Model Zoo
+------------------------------------
+
+.. code:: ipython3
+
+    import shutil
+    
+    if Path("open_model_zoo").exists():
+        shutil.rmtree("open_model_zoo")
+        
+    !git clone https://github.com/openvinotoolkit/open_model_zoo.git
+    %cd open_model_zoo
+    !git checkout aa02473c20c1b62763de5385229ffde476e8119c
+    %cd tools/model_tools
+    
+    !pip install .
+    %cd ../../../
+
+
+.. parsed-literal::
+
+    Cloning into 'open_model_zoo'...
+    remote: Enumerating objects: 103165, done.[K
+    remote: Counting objects: 100% (889/889), done.[K
+    remote: Compressing objects: 100% (525/525), done.[K
+    remote: Total 103165 (delta 291), reused 825 (delta 268), pack-reused 102276[K
+    Receiving objects: 100% (103165/103165), 303.37 MiB | 3.87 MiB/s, done.
+    Resolving deltas: 100% (70208/70208), done.
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/104-model-tools/open_model_zoo
+    Note: switching to 'aa02473c20c1b62763de5385229ffde476e8119c'.
+    
+    You are in 'detached HEAD' state. You can look around, make experimental
+    changes and commit them, and you can discard any commits you make in this
+    state without impacting any branches by switching back to a branch.
+    
+    If you want to create a new branch to retain commits you create, you may
+    do so (now or later) by using -c with the switch command. Example:
+    
+      git switch -c <new-branch-name>
+    
+    Or undo this operation with:
+    
+      git switch -
+    
+    Turn off this advice by setting config variable advice.detachedHead to false
+    
+    HEAD is now at aa02473c2 Merge pull request #3621 from eaidova/ea/fix_imports_order
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/104-model-tools/open_model_zoo/tools/model_tools
+    Processing /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/104-model-tools/open_model_zoo/tools/model_tools
+      Installing build dependencies ... - \ | done
+      Getting requirements to build wheel ... - done
+      Preparing metadata (pyproject.toml) ... - done
+    Requirement already satisfied: openvino-telemetry>=2022.1.0 in /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omz-tools==1.0.2) (2022.3.0)
+    Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omz-tools==1.0.2) (6.0)
+    Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omz-tools==1.0.2) (2.31.0)
+    Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->omz-tools==1.0.2) (3.1.0)
+    Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->omz-tools==1.0.2) (3.4)
+    Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->omz-tools==1.0.2) (1.26.16)
+    Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->omz-tools==1.0.2) (2023.5.7)
+    Building wheels for collected packages: omz-tools
+      Building wheel for omz-tools (pyproject.toml) ... - \ | / - \ done
+      Created wheel for omz-tools: filename=omz_tools-1.0.2-py3-none-any.whl size=3364798 sha256=3180da2c121c85b9de8ca19bcdd1b0d774d203a28f153a893b993431c328c30b
+      Stored in directory: /tmp/pip-ephem-wheel-cache-tlfkktz2/wheels/fa/a1/e1/1a73fbbacdfff3d1371ea61c64d2882179d286cc1c8521f629
+    Successfully built omz-tools
+    Installing collected packages: omz-tools
+    Successfully installed omz-tools-1.0.2
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/104-model-tools
+
+
+Specify, display and run the Model Downloader command to download the
+model.
+
+.. code:: ipython3
+
+    ## Uncomment the next line to show help in omz_downloader which explains the command-line options.
+    
+    # !omz_downloader --help
+
+.. code:: ipython3
+
+    download_command = (
+        f"omz_downloader --name {model_name} --output_dir {base_model_dir} --cache_dir {omz_cache_dir}"
+    )
+    display(Markdown(f"Download command: `{download_command}`"))
+    display(Markdown(f"Downloading {model_name}..."))
+    ! $download_command
+
+
+
+Download command:
+``omz_downloader --name mobilenet-v2-pytorch --output_dir model --cache_dir cache``
+
+
+
+Downloading mobilenet-v2-pytorch…
+
+
+.. parsed-literal::
+
+    ################|| Downloading mobilenet-v2-pytorch ||################
+    
+    ========== Downloading model/public/mobilenet-v2-pytorch/mobilenet_v2-b0353104.pth
+    
+    
+
+
+Convert a Model to OpenVINO IR format
+-------------------------------------
+
+Specify, display and run the Model Converter command to convert the
+model to OpenVINO IR format. Model conversion may take a while. The
+output of the Model Converter command will be displayed. When the
+conversion is successful, the last lines of the output will include:
+``[ SUCCESS ] Generated IR version 11 model.`` For downloaded models
+that are already in OpenVINO IR format, conversion will be skipped.
+
+.. code:: ipython3
+
+    ## Uncomment the next line to show Help in omz_converter which explains the command-line options.
+    
+    # !omz_converter --help
+
+.. code:: ipython3
+
+    convert_command = f"omz_converter --name {model_name} --precisions {precision} --download_dir {base_model_dir} --output_dir {base_model_dir}"
+    display(Markdown(f"Convert command: `{convert_command}`"))
+    display(Markdown(f"Converting {model_name}..."))
+    
+    ! $convert_command
+
+
+
+Convert command:
+``omz_converter --name mobilenet-v2-pytorch --precisions FP16 --download_dir model --output_dir model``
+
+
+
+Converting mobilenet-v2-pytorch…
+
+
+.. parsed-literal::
+
+    ========== Converting mobilenet-v2-pytorch to ONNX
+    Conversion to ONNX command: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-name=mobilenet_v2 --weights=model/public/mobilenet-v2-pytorch/mobilenet_v2-b0353104.pth --import-module=torchvision.models --input-shape=1,3,224,224 --output-file=model/public/mobilenet-v2-pytorch/mobilenet-v2.onnx --input-names=data --output-names=prob
+    
+    Traceback (most recent call last):
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py", line 187, in <module>
+        main()
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py", line 179, in main
+        model = load_model(args.model_name, args.weights,
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py", line 126, in load_model
+        with prepend_to_path(model_paths):
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py", line 49, in __enter__
+        sys.path = self._preprended_paths + sys.path
+    TypeError: unsupported operand type(s) for +: 'NoneType' and 'list'
+    
+    FAILED:
+    mobilenet-v2-pytorch
+
+
+Get Model Information
+---------------------
+
+The Info Dumper prints the following information for Open Model Zoo
+models:
+
+-  Model name
+-  Description
+-  Framework that was used to train the model
+-  License URL
+-  Precisions supported by the model
+-  Subdirectory: the location of the downloaded model
+-  Task type
+
+This information can be shown by running
+``omz_info_dumper --name model_name`` in a terminal. The information can
+also be parsed and used in scripts.
+
+In the next cell, run Info Dumper and use ``json`` to load the
+information in a dictionary.
+
+.. code:: ipython3
+
+    model_info_output = %sx omz_info_dumper --name $model_name
+    model_info = json.loads(model_info_output.get_nlstr())
+    
+    if len(model_info) > 1:
+        NotebookAlert(
+            f"There are multiple IR files for the {model_name} model. The first model in the "
+            "omz_info_dumper output will be used for benchmarking. Change "
+            "`selected_model_info` in the cell below to select a different model from the list.",
+            "warning",
+        )
+    
+    model_info
+
+
+
+
+.. parsed-literal::
+
+    [{'name': 'mobilenet-v2-pytorch',
+      'composite_model_name': None,
+      'description': 'MobileNet V2 is image classification model pre-trained on ImageNet dataset. This is a PyTorch* implementation of MobileNetV2 architecture as described in the paper "Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation" <https://arxiv.org/abs/1801.04381>.\nThe model input is a blob that consists of a single image of "1, 3, 224, 224" in "RGB" order.\nThe model output is typical object classifier for the 1000 different classifications matching with those in the ImageNet database.',
+      'framework': 'pytorch',
+      'license_url': 'https://raw.githubusercontent.com/pytorch/vision/master/LICENSE',
+      'accuracy_config': '/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/models/public/mobilenet-v2-pytorch/accuracy-check.yml',
+      'model_config': '/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/models/public/mobilenet-v2-pytorch/model.yml',
+      'precisions': ['FP16', 'FP32'],
+      'quantization_output_precisions': ['FP16-INT8', 'FP32-INT8'],
+      'subdirectory': 'public/mobilenet-v2-pytorch',
+      'task_type': 'classification',
+      'input_info': [{'name': 'data',
+        'shape': [1, 3, 224, 224],
+        'layout': 'NCHW'}],
+      'model_stages': []}]
+
+
+
+Having information of the model in a JSON file enables extraction of the
+path to the model directory, and building the path to the OpenVINO IR
+file.
+
+.. code:: ipython3
+
+    selected_model_info = model_info[0]
+    model_path = (
+        base_model_dir
+        / Path(selected_model_info["subdirectory"])
+        / Path(f"{precision}/{selected_model_info['name']}.xml")
+    )
+    print(model_path, "exists:", model_path.exists())
+
+
+.. parsed-literal::
+
+    model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml exists: False
+
+
+Run Benchmark Tool
+------------------
+
+By default, Benchmark Tool runs inference for 60 seconds in asynchronous
+mode on CPU. It returns inference speed as latency (milliseconds per
+image) and throughput values (frames per second).
+
+.. code:: ipython3
+
+    ## Uncomment the next line to show Help in benchmark_app which explains the command-line options.
+    # !benchmark_app --help
+
+.. code:: ipython3
+
+    benchmark_command = f"benchmark_app -m {model_path} -t 15"
+    display(Markdown(f"Benchmark command: `{benchmark_command}`"))
+    display(Markdown(f"Benchmarking {model_name} on CPU with async inference for 15 seconds..."))
+    
+    ! $benchmark_command
+
+
+
+Benchmark command:
+``benchmark_app -m model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml -t 15``
+
+
+
+Benchmarking mobilenet-v2-pytorch on CPU with async inference for 15
+seconds…
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to THROUGHPUT.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ ERROR ] Model file /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/104-model-tools/model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml cannot be opened!
+    Traceback (most recent call last):
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 368, in main
+        model = benchmark.read_model(args.path_to_model)
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/benchmark.py", line 69, in read_model
+        return self.core.read_model(model_filename, weights_filename)
+    RuntimeError: Model file /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/104-model-tools/model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml cannot be opened!
+
+
+Benchmark with Different Settings
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``benchmark_app`` tool displays logging information that is not
+always necessary. A more compact result is achieved when the output is
+parsed with ``json``.
+
+The following cells show some examples of ``benchmark_app`` with
+different parameters. Below are some useful parameters:
+
+-  ``-d`` A device to use for inference. For example: CPU, GPU, MULTI.
+   Default: CPU.
+-  ``-t`` Time expressed in number of seconds to run inference. Default:
+   60.
+-  ``-api`` Use asynchronous (async) or synchronous (sync) inference.
+   Default: async.
+-  ``-b`` Batch size. Default: 1.
+
+Run ``! benchmark_app --help`` to get an overview of all possible
+command-line parameters.
+
+In the next cell, define the ``benchmark_model()`` function that calls
+``benchmark_app``. This makes it easy to try different combinations. In
+the cell below that, you display available devices on the system.
+
+   **Note**: In this notebook, ``benchmark_app`` runs for 15 seconds to
+   give a quick indication of performance. For more accurate
+   performance, it is recommended to run inference for at least one
+   minute by setting the ``t`` parameter to 60 or higher, and run
+   ``benchmark_app`` in a terminal/command prompt after closing other
+   applications. Copy the **benchmark command** and paste it in a
+   command prompt where you have activated the ``openvino_env``
+   environment.
+
+.. code:: ipython3
+
+    def benchmark_model(model_xml, device="CPU", seconds=60, api="async", batch=1):
+        ie = Core()
+        model_path = Path(model_xml)
+        if ("GPU" in device) and ("GPU" not in ie.available_devices):
+            DeviceNotFoundAlert("GPU")
+        else:
+            benchmark_command = f"benchmark_app -m {model_path} -d {device} -t {seconds} -api {api} -b {batch}"
+            display(Markdown(f"**Benchmark {model_path.name} with {device} for {seconds} seconds with {api} inference**"))
+            display(Markdown(f"Benchmark command: `{benchmark_command}`"))
+    
+            benchmark_output = %sx $benchmark_command
+            print("command ended")
+            benchmark_result = [line for line in benchmark_output
+                                if not (line.startswith(r"[") or line.startswith("      ") or line == "")]
+            print("\n".join(benchmark_result))
+
+.. code:: ipython3
+
+    ie = Core()
+    
+    # Show devices available for OpenVINO Runtime
+    for device in ie.available_devices:
+        device_name = ie.get_property(device, "FULL_DEVICE_NAME")
+        print(f"{device}: {device_name}")
+
+
+.. parsed-literal::
+
+    CPU: Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz
+
+
+.. code:: ipython3
+
+    benchmark_model(model_path, device="CPU", seconds=15, api="async")
+
+
+
+**Benchmark mobilenet-v2-pytorch.xml with CPU for 15 seconds with async
+inference**
+
+
+
+Benchmark command:
+``benchmark_app -m model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml -d CPU -t 15 -api async -b 1``
+
+
+.. parsed-literal::
+
+    command ended
+    Traceback (most recent call last):
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 368, in main
+        model = benchmark.read_model(args.path_to_model)
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/benchmark.py", line 69, in read_model
+        return self.core.read_model(model_filename, weights_filename)
+    RuntimeError: Model file /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/104-model-tools/model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml cannot be opened!
+
+
+.. code:: ipython3
+
+    benchmark_model(model_path, device="AUTO", seconds=15, api="async")
+
+
+
+**Benchmark mobilenet-v2-pytorch.xml with AUTO for 15 seconds with async
+inference**
+
+
+
+Benchmark command:
+``benchmark_app -m model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml -d AUTO -t 15 -api async -b 1``
+
+
+.. parsed-literal::
+
+    command ended
+    Traceback (most recent call last):
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 368, in main
+        model = benchmark.read_model(args.path_to_model)
+      File "/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/benchmark.py", line 69, in read_model
+        return self.core.read_model(model_filename, weights_filename)
+    RuntimeError: Model file /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/104-model-tools/model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml cannot be opened!
+
+
+.. code:: ipython3
+
+    benchmark_model(model_path, device="GPU", seconds=15, api="async")
+
+
+
+.. raw:: html
+
+    <div class="alert alert-warning">Running this cell requires a GPU device, which is not available on this system. The following device is available: CPU
+
+
+.. code:: ipython3
+
+    benchmark_model(model_path, device="MULTI:CPU,GPU", seconds=15, api="async")
+
+
+
+.. raw:: html
+
+    <div class="alert alert-warning">Running this cell requires a GPU device, which is not available on this system. The following device is available: CPU
+
--- a/docs/notebooks/105-language-quantize-bert-with-output.rst
+++ b/docs/notebooks/105-language-quantize-bert-with-output.rst
@ -0,0 +1,597 @@
+Quantize NLP models with Post-Training Quantization in NNCF
+============================================================
+
+This tutorial demonstrates how to apply ``INT8`` quantization to the
+Natural Language Processing model known as
+`BERT <https://en.wikipedia.org/wiki/BERT_(language_model)>`__, using
+the `Post-Training Quantization
+API <https://docs.openvino.ai/latest/nncf_ptq_introduction.html>`__
+(NNCF library). A fine-tuned `HuggingFace
+BERT <https://huggingface.co/transformers/model_doc/bert.html>`__
+`PyTorch <https://pytorch.org/>`__ model, trained on the `Microsoft
+Research Paraphrase Corpus
+(MRPC) <https://www.microsoft.com/en-us/download/details.aspx?id=52398>`__,
+will be used. The tutorial is designed to be extendable to custom models
+and datasets. It consists of the following steps:
+
+-  Download and prepare the BERT model and MRPC dataset.
+-  Define data loading and accuracy validation functionality.
+-  Prepare the model for quantization.
+-  Run optimization pipeline.
+-  Load and test quantized model.
+-  Compare the performance of the original, converted and quantized
+   models.
+
+.. code:: ipython3
+
+    !pip install -q nncf datasets evaluate
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import os
+    import sys
+    import time
+    from pathlib import Path
+    from zipfile import ZipFile
+    from typing import Iterable
+    from typing import Any
+    
+    import numpy as np
+    import torch
+    from openvino import runtime as ov
+    from openvino.tools import mo
+    from openvino.runtime import serialize, Model
+    import nncf
+    from nncf.parameters import ModelType
+    from transformers import BertForSequenceClassification, BertTokenizer
+    import datasets
+    import evaluate
+    
+    sys.path.append("../utils")
+    from notebook_utils import download_file
+
+
+.. parsed-literal::
+
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/offline_transformations/__init__.py:10: FutureWarning: The module is private and following namespace `offline_transformations` will be removed in the future, use `openvino.runtime.passes` instead!
+      warnings.warn(
+
+
+.. parsed-literal::
+
+    INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino
+
+
+Settings
+--------
+
+.. code:: ipython3
+
+    # Set the data and model directories, source URL and the filename of the model.
+    DATA_DIR = "data"
+    MODEL_DIR = "model"
+    MODEL_LINK = "https://download.pytorch.org/tutorial/MRPC.zip"
+    FILE_NAME = MODEL_LINK.split("/")[-1]
+    PRETRAINED_MODEL_DIR = os.path.join(MODEL_DIR, "MRPC")
+    
+    os.makedirs(DATA_DIR, exist_ok=True)
+    os.makedirs(MODEL_DIR, exist_ok=True)
+
+Prepare the Model
+-----------------
+
+Perform the following: - Download and unpack pre-trained BERT model for
+MRPC by PyTorch. - Convert the model to the ONNX. - Run Model Optimizer
+to convert the model from the ONNX representation to the OpenVINO
+Intermediate Representation (OpenVINO IR)
+
+.. code:: ipython3
+
+    download_file(MODEL_LINK, directory=MODEL_DIR, show_progress=True)
+    with ZipFile(f"{MODEL_DIR}/{FILE_NAME}", "r") as zip_ref:
+        zip_ref.extractall(MODEL_DIR)
+
+
+
+.. parsed-literal::
+
+    model/MRPC.zip:   0%|          | 0.00/387M [00:00<?, ?B/s]
+
+
+Convert the original PyTorch model to the ONNX representation.
+
+.. code:: ipython3
+
+    BATCH_SIZE = 1
+    MAX_SEQ_LENGTH = 128
+    
+    
+    def export_model_to_onnx(model, path):
+        with torch.no_grad():
+            default_input = torch.ones(1, MAX_SEQ_LENGTH, dtype=torch.int64)
+            inputs = {
+                "input_ids": default_input,
+                "attention_mask": default_input,
+                "token_type_ids": default_input,
+            }
+            torch.onnx.export(
+                model,
+                (inputs["input_ids"], inputs["attention_mask"], inputs["token_type_ids"]),
+                path,
+                opset_version=11,
+                input_names=["input_ids", "attention_mask", "token_type_ids"],
+                output_names=["output"]
+            )
+            print("ONNX model saved to {}".format(path))
+    
+    
+    torch_model = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL_DIR)
+    onnx_model_path = Path(MODEL_DIR) / "bert_mrpc.onnx"
+    if not onnx_model_path.exists():
+        export_model_to_onnx(torch_model, onnx_model_path)
+
+
+.. parsed-literal::
+
+    ONNX model saved to model/bert_mrpc.onnx
+
+
+Convert the ONNX Model to OpenVINO IR
+-------------------------------------
+
+Use Model Optimizer Python API to convert the model to OpenVINO IR with
+``FP32`` precision. For more information about Model Optimizer Python
+API, see the `Model Optimizer Developer
+Guide <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Python_API.html>`__.
+
+.. code:: ipython3
+
+    ir_model_xml = onnx_model_path.with_suffix(".xml")
+    
+    # Convert the ONNX model to OpenVINO IR FP32.
+    if not ir_model_xml.exists():
+        model = mo.convert_model(onnx_model_path)
+        serialize(model, str(ir_model_xml))
+
+Prepare the Dataset
+-------------------
+
+We download the General Language Understanding Evaluation (GLUE) dataset
+for the MRPC task from HuggingFace datasets. Then, we tokenize the data
+with a pre-trained BERT tokenizer from HuggingFace.
+
+.. code:: ipython3
+
+    def create_data_source():
+        raw_dataset = datasets.load_dataset('glue', 'mrpc', split='validation')
+        tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_DIR)
+    
+        def _preprocess_fn(examples):
+            texts = (examples['sentence1'], examples['sentence2'])
+            result = tokenizer(*texts, padding='max_length', max_length=MAX_SEQ_LENGTH, truncation=True)
+            result['labels'] = examples['label']
+            return result
+        processed_dataset = raw_dataset.map(_preprocess_fn, batched=True, batch_size=1)
+    
+        return processed_dataset
+    
+    
+    data_source = create_data_source()
+
+
+.. parsed-literal::
+
+    [ WARNING ] Found cached dataset glue (/opt/home/k8sworker/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
+
+
+.. parsed-literal::
+
+    [ WARNING ]  Found cached dataset glue (/opt/home/k8sworker/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
+
+
+.. parsed-literal::
+
+    [ WARNING ] Loading cached processed dataset at /opt/home/k8sworker/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-b5f4c739eb2a4a9f.arrow
+
+
+.. parsed-literal::
+
+    [ WARNING ]  Loading cached processed dataset at /opt/home/k8sworker/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-b5f4c739eb2a4a9f.arrow
+
+
+Optimize model using NNCF Post-training Quantization API
+--------------------------------------------------------
+
+`NNCF <https://github.com/openvinotoolkit/nncf>`__ provides a suite of
+advanced algorithms for Neural Networks inference optimization in
+OpenVINO with minimal accuracy drop. We will use 8-bit quantization in
+post-training mode (without the fine-tuning pipeline) to optimize BERT.
+
+   **Note**: NNCF Post-training Quantization is available as a preview
+   feature in OpenVINO 2022.3 release. Fully functional support will be
+   provided in the next releases.
+
+The optimization process contains the following steps:
+
+1. Create a Dataset for quantization
+2. Run ``nncf.quantize`` for getting an optimized model
+3. Serialize OpenVINO IR model using ``openvino.runtime.serialize``
+   function
+
+.. code:: ipython3
+
+    # Load the network in OpenVINO Runtime.
+    core = ov.Core()
+    model = core.read_model(ir_model_xml)
+    INPUT_NAMES = [x.any_name for x in model.inputs]
+    
+    
+    def transform_fn(data_item):
+        """
+        Extract the model's input from the data item.
+        The data item here is the data item that is returned from the data source per iteration.
+        This function should be passed when the data item cannot be used as model's input.
+        """
+        inputs = {
+            name: np.asarray(data_item[name], dtype=np.int64) for name in INPUT_NAMES
+        }
+        return inputs
+    
+    
+    calibration_dataset = nncf.Dataset(data_source, transform_fn)
+    # Quantize the model. By specifying model_type, we specify additional transformer patterns in the model.
+    quantized_model = nncf.quantize(model, calibration_dataset,
+                                    model_type=ModelType.TRANSFORMER)
+
+
+.. parsed-literal::
+
+    INFO:openvino.tools.pot.pipeline.pipeline:Inference Engine version:                2022.3.0-9052-9752fafe8eb-releases/2022/3
+    INFO:openvino.tools.pot.pipeline.pipeline:Model Optimizer version:                 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    INFO:openvino.tools.pot.pipeline.pipeline:Post-Training Optimization Tool version: 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    INFO:openvino.tools.pot.statistics.collector:Start computing statistics for algorithms : DefaultQuantization
+    INFO:openvino.tools.pot.statistics.collector:Computing statistics finished
+    INFO:openvino.tools.pot.pipeline.pipeline:Start algorithm: DefaultQuantization
+    INFO:openvino.tools.pot.algorithms.quantization.default.algorithm:Start computing statistics for algorithm : ActivationChannelAlignment
+    INFO:openvino.tools.pot.algorithms.quantization.default.algorithm:Computing statistics finished
+    INFO:openvino.tools.pot.algorithms.quantization.default.algorithm:Start computing statistics for algorithms : MinMaxQuantization,FastBiasCorrection
+    INFO:openvino.tools.pot.algorithms.quantization.default.algorithm:Computing statistics finished
+    INFO:openvino.tools.pot.pipeline.pipeline:Finished: DefaultQuantization
+     ===========================================================================
+
+
+.. code:: ipython3
+
+    compressed_model_xml = 'quantized_bert_mrpc.xml'
+    ov.serialize(quantized_model, compressed_model_xml)
+
+Load and Test OpenVINO Model
+----------------------------
+
+To load and test converted model, perform the following: \* Load the
+model and compile it for CPU. \* Prepare the input. \* Run the
+inference. \* Get the answer from the model output.
+
+.. code:: ipython3
+
+    core = ov.Core()
+    
+    # Read the model from files.
+    model = core.read_model(model=compressed_model_xml)
+    
+    # Assign dynamic shapes to every input layer.
+    for input_layer in model.inputs:
+        input_shape = input_layer.partial_shape
+        input_shape[1] = -1
+        model.reshape({input_layer: input_shape})
+    
+    # Compile the model for a specific device.
+    compiled_model_int8 = core.compile_model(model=model, device_name="CPU")
+    
+    output_layer = compiled_model_int8.outputs[0]
+
+The Data Source returns a pair of sentences (indicated by
+``sample_idx``) and the inference compares these sentences and outputs
+whether their meaning is the same. You can test other sentences by
+changing ``sample_idx`` to another value (from 0 to 407).
+
+.. code:: ipython3
+
+    sample_idx = 5
+    sample = data_source[sample_idx]
+    inputs = {k: torch.unsqueeze(torch.tensor(sample[k]), 0) for k in ['input_ids', 'token_type_ids', 'attention_mask']}
+    
+    result = compiled_model_int8(inputs)[output_layer]
+    result = np.argmax(result)
+    
+    print(f"Text 1: {sample['sentence1']}")
+    print(f"Text 2: {sample['sentence2']}")
+    print(f"The same meaning: {'yes' if result == 1 else 'no'}")
+
+
+.. parsed-literal::
+
+    Text 1: Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .
+    Text 2: It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
+    The same meaning: yes
+
+
+Compare F1-score of FP32 and INT8 models
+----------------------------------------
+
+.. code:: ipython3
+
+    def validate(model: Model, dataset: Iterable[Any]) -> float:
+        """
+        Evaluate the model on GLUE dataset. 
+        Returns F1 score metric.
+        """
+        compiled_model = core.compile_model(model, device_name='CPU')
+        output_layer = compiled_model.output(0)
+    
+        metric = evaluate.load('glue', 'mrpc')
+        INPUT_NAMES = [x.any_name for x in compiled_model.inputs]
+        for batch in dataset:
+            inputs = [
+                np.expand_dims(np.asarray(batch[key], dtype=np.int64), 0) for key in INPUT_NAMES
+            ]
+            outputs = compiled_model(inputs)[output_layer]
+            predictions = outputs[0].argmax(axis=-1)
+            metric.add_batch(predictions=[predictions], references=[batch['labels']])
+        metrics = metric.compute()
+        f1_score = metrics['f1']
+    
+        return f1_score
+    
+    
+    print('Checking the accuracy of the original model:')
+    metric = validate(model, data_source)
+    print(f'F1 score: {metric:.4f}')
+    
+    print('Checking the accuracy of the quantized model:')
+    metric = validate(quantized_model, data_source)
+    print(f'F1 score: {metric:.4f}')
+
+
+.. parsed-literal::
+
+    Checking the accuracy of the original model:
+    F1 score: 0.8927
+    Checking the accuracy of the quantized model:
+    F1 score: 0.9014
+
+
+Compare Performance of the Original, Converted and Quantized Models
+-------------------------------------------------------------------
+
+Compare the original PyTorch model with OpenVINO converted and quantized
+models (``FP32``, ``INT8``) to see the difference in performance. It is
+expressed in Sentences Per Second (SPS) measure, which is the same as
+Frames Per Second (FPS) for images.
+
+.. code:: ipython3
+
+    model = core.read_model(model=ir_model_xml)
+    
+    # Assign dynamic shapes to every input layer.
+    dynamic_shapes = {}
+    for input_layer in model.inputs:
+        input_shape = input_layer.partial_shape
+        input_shape[1] = -1
+    
+        dynamic_shapes[input_layer] = input_shape
+    
+    model.reshape(dynamic_shapes)
+    
+    # Compile the model for a specific device.
+    compiled_model_fp32 = core.compile_model(model=model, device_name="CPU")
+
+.. code:: ipython3
+
+    num_samples = 50
+    sample = data_source[0]
+    inputs = {k: torch.unsqueeze(torch.tensor(sample[k]), 0) for k in ['input_ids', 'token_type_ids', 'attention_mask']}
+    
+    with torch.no_grad():
+        start = time.perf_counter()
+        for _ in range(num_samples):
+            torch_model(torch.vstack(list(inputs.values())))
+        end = time.perf_counter()
+        time_torch = end - start
+    print(
+        f"PyTorch model on CPU: {time_torch / num_samples:.3f} seconds per sentence, "
+        f"SPS: {num_samples / time_torch:.2f}"
+    )
+    
+    start = time.perf_counter()
+    for _ in range(num_samples):
+        compiled_model_fp32(inputs)
+    end = time.perf_counter()
+    time_ir = end - start
+    print(
+        f"IR FP32 model in OpenVINO Runtime/CPU: {time_ir / num_samples:.3f} "
+        f"seconds per sentence, SPS: {num_samples / time_ir:.2f}"
+    )
+    
+    start = time.perf_counter()
+    for _ in range(num_samples):
+        compiled_model_int8(inputs)
+    end = time.perf_counter()
+    time_ir = end - start
+    print(
+        f"OpenVINO IR INT8 model in OpenVINO Runtime/CPU: {time_ir / num_samples:.3f} "
+        f"seconds per sentence, SPS: {num_samples / time_ir:.2f}"
+    )
+
+
+.. parsed-literal::
+
+    PyTorch model on CPU: 0.072 seconds per sentence, SPS: 13.80
+    IR FP32 model in OpenVINO Runtime/CPU: 0.020 seconds per sentence, SPS: 50.10
+    OpenVINO IR INT8 model in OpenVINO Runtime/CPU: 0.009 seconds per sentence, SPS: 113.99
+
+
+Finally, measure the inference performance of OpenVINO ``FP32`` and
+``INT8`` models. For this purpose, use `Benchmark
+Tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__
+in OpenVINO.
+
+   **Note**: The ``benchmark_app`` tool is able to measure the
+   performance of the OpenVINO Intermediate Representation (OpenVINO IR)
+   models only. For more accurate performance, run ``benchmark_app`` in
+   a terminal/command prompt after closing other applications. Run
+   ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on
+   CPU for one minute. Change ``CPU`` to ``GPU`` to benchmark on GPU.
+   Run ``benchmark_app --help`` to see an overview of all command-line
+   options.
+
+.. code:: ipython3
+
+    # Inference FP32 model (OpenVINO IR)
+    ! benchmark_app -m $ir_model_xml -d CPU -api sync
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to LATENCY.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 170.14 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input_ids (node: input_ids) : i64 / [...] / [1,128]
+    [ INFO ]     attention_mask (node: attention_mask) : i64 / [...] / [1,128]
+    [ INFO ]     token_type_ids (node: token_type_ids) : i64 / [...] / [1,128]
+    [ INFO ] Model outputs:
+    [ INFO ]     output (node: output) : f32 / [...] / [1,2]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input_ids (node: input_ids) : i64 / [...] / [1,128]
+    [ INFO ]     attention_mask (node: attention_mask) : i64 / [...] / [1,128]
+    [ INFO ]     token_type_ids (node: token_type_ids) : i64 / [...] / [1,128]
+    [ INFO ] Model outputs:
+    [ INFO ]     output (node: output) : f32 / [...] / [1,2]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 216.41 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
+    [ INFO ]   NUM_STREAMS: 1
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 12
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.LATENCY
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input_ids'!. This input will be filled with random values!
+    [ WARNING ] No input files were given for input 'attention_mask'!. This input will be filled with random values!
+    [ WARNING ] No input files were given for input 'token_type_ids'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input_ids' with random values 
+    [ INFO ] Fill input 'attention_mask' with random values 
+    [ INFO ] Fill input 'token_type_ids' with random values 
+    [Step 10/11] Measuring performance (Start inference synchronously, limits: 60000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 30.61 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            3076 iterations
+    [ INFO ] Duration:         60012.39 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        19.42 ms
+    [ INFO ]    Average:       19.42 ms
+    [ INFO ]    Min:           18.70 ms
+    [ INFO ]    Max:           22.31 ms
+    [ INFO ] Throughput:   51.49 FPS
+
+
+.. code:: ipython3
+
+    # Inference INT8 model (OpenVINO IR)
+    ! benchmark_app -m $compressed_model_xml -d CPU -api sync
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to LATENCY.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 130.18 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input_ids , input_ids:0 (node: input_ids) : i64 / [...] / [1,128]
+    [ INFO ]     attention_mask (node: attention_mask) : i64 / [...] / [1,128]
+    [ INFO ]     token_type_ids:0 , token_type_ids (node: token_type_ids) : i64 / [...] / [1,128]
+    [ INFO ] Model outputs:
+    [ INFO ]     output (node: output) : f32 / [...] / [1,2]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input_ids , input_ids:0 (node: input_ids) : i64 / [...] / [1,128]
+    [ INFO ]     attention_mask (node: attention_mask) : i64 / [...] / [1,128]
+    [ INFO ]     token_type_ids:0 , token_type_ids (node: token_type_ids) : i64 / [...] / [1,128]
+    [ INFO ] Model outputs:
+    [ INFO ]     output (node: output) : f32 / [...] / [1,2]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 387.50 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
+    [ INFO ]   NUM_STREAMS: 1
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 12
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.LATENCY
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input_ids'!. This input will be filled with random values!
+    [ WARNING ] No input files were given for input 'attention_mask'!. This input will be filled with random values!
+    [ WARNING ] No input files were given for input 'token_type_ids'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input_ids' with random values 
+    [ INFO ] Fill input 'attention_mask' with random values 
+    [ INFO ] Fill input 'token_type_ids' with random values 
+    [Step 10/11] Measuring performance (Start inference synchronously, limits: 60000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 14.96 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            6830 iterations
+    [ INFO ] Duration:         60003.55 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        8.74 ms
+    [ INFO ]    Average:       8.70 ms
+    [ INFO ]    Min:           7.54 ms
+    [ INFO ]    Max:           11.06 ms
+    [ INFO ] Throughput:   114.43 FPS
+
--- a/docs/notebooks/106-auto-device-with-output.rst
+++ b/docs/notebooks/106-auto-device-with-output.rst
@ -0,0 +1,638 @@
+Automatic Device Selection with OpenVINO™
+=========================================
+
+The `Auto
+device <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_AUTO.html>`__
+(or AUTO in short) selects the most suitable device for inference by
+considering the model precision, power efficiency and processing
+capability of the available `compute
+devices <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_Supported_Devices.html>`__.
+The model precision (such as ``FP32``, ``FP16``, ``INT8``, etc.) is the
+first consideration to filter out the devices that cannot run the
+network efficiently.
+
+Next, if dedicated accelerators are available, these devices are
+preferred (for example, integrated and discrete
+`GPU <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_GPU.html#doxid-openvino-docs-o-v-u-g-supported-plugins-g-p-u>`__
+or
+`VPU <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_VPU.html>`__).
+`CPU <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_CPU.html>`__
+is used as the default “fallback device”. Keep in mind that AUTO makes
+this selection only once, during the loading of a model.
+
+When using accelerator devices such as GPUs, loading models to these
+devices may take a long time. To address this challenge for applications
+that require fast first inference response, AUTO starts inference
+immediately on the CPU and then transparently shifts inference to the
+GPU, once it is ready. This dramatically reduces the time to execute
+first inference.
+
+.. raw:: html
+
+   <center>
+
+.. raw:: html
+
+   </center>
+
+Download and convert the model
+------------------------------
+
+This tutorial uses the
+`bvlc_googlenet <https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet>`__
+model. The bvlc_googlenet model is the first of the
+`Inception <https://github.com/tensorflow/tpu/tree/master/models/experimental/inception>`__
+family of models designed to perform image classification. Like other
+Inception models, bvlc_googlenet was pre-trained on the
+`ImageNet <https://image-net.org/>`__ data set. For more details about
+this family of models, see the `research
+paper <https://arxiv.org/abs/1512.00567>`__.
+
+.. code:: ipython3
+
+    import sys
+    
+    from pathlib import Path
+    from openvino.tools import mo
+    from openvino.runtime import serialize
+    from IPython.display import Markdown, display
+    
+    sys.path.append("../utils")
+    
+    import notebook_utils as utils
+    
+    base_model_dir = Path("./model").expanduser()
+    
+    model_name = "bvlc_googlenet"
+    caffemodel_name = f'{model_name}.caffemodel'
+    prototxt_name = f'{model_name}.prototxt'
+    
+    caffemodel_path = base_model_dir / caffemodel_name
+    prototxt_path = base_model_dir / prototxt_name
+    
+    if not caffemodel_path.exists() or not prototxt_path.exists():
+        caffemodel_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/googlenet-v1/bvlc_googlenet.caffemodel"
+        prototxt_url = "https://raw.githubusercontent.com/BVLC/caffe/88c96189bcbf3853b93e2b65c7b5e4948f9d5f67/models/bvlc_googlenet/deploy.prototxt"
+    
+        utils.download_file(caffemodel_url, caffemodel_name, base_model_dir)
+        utils.download_file(prototxt_url, prototxt_name, base_model_dir)
+    else:
+        print(f'{caffemodel_name} and {prototxt_name} already downloaded to {base_model_dir}')
+    
+    # postprocessing of model
+    text = prototxt_path.read_text()
+    text = text.replace('dim: 10', 'dim: 1')
+    res = prototxt_path.write_text(text)
+
+
+
+.. parsed-literal::
+
+    model/bvlc_googlenet.caffemodel:   0%|          | 0.00/51.1M [00:00<?, ?B/s]
+
+
+
+.. parsed-literal::
+
+    model/bvlc_googlenet.prototxt:   0%|          | 0.00/2.19k [00:00<?, ?B/s]
+
+
+Import modules and create Core
+------------------------------
+
+.. code:: ipython3
+
+    import cv2
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from openvino.runtime import Core, CompiledModel, AsyncInferQueue, InferRequest
+    import sys
+    import time
+    
+    ie = Core()
+    
+    if "GPU" not in ie.available_devices:
+        display(Markdown('<div class="alert alert-block alert-danger"><b>Warning: </b> A GPU device is not available. This notebook requires GPU device to have meaningful results. </div>'))
+
+
+
+.. container:: alert alert-block alert-danger
+
+   Warning: A GPU device is not available. This notebook requires GPU
+   device to have meaningful results.
+
+
+Convert the model to OpenVINO IR format
+---------------------------------------
+
+Use Model Optimizer to convert the Caffe model to OpenVINO IR with
+``FP16`` precision. The models are saved to the ``model/ir_model/``
+directory. For more information about Model Optimizer, see the `Model
+Optimizer Developer
+Guide <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html>`__.
+
+.. code:: ipython3
+
+    ir_model_path = base_model_dir / 'ir_model' / f'{model_name}.xml'
+    model = None
+    
+    if not ir_model_path.exists():
+        model = mo.convert_model(input_model=base_model_dir / caffemodel_name,
+                                 input_proto=base_model_dir / prototxt_name,
+                                 input_shape=[1, 3, 224, 224],
+                                 layout="NCHW",
+                                 mean_values=[104.0,117.0,123.0],
+                                 output="prob",
+                                 compress_to_fp16=True)
+        serialize(model, str(ir_model_path))
+        print("IR model saved to {}".format(ir_model_path))
+    else:
+        print("Read IR model from {}".format(ir_model_path))
+        model = ie.read_model(ir_model_path)
+
+
+.. parsed-literal::
+
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/numpy/lib/function_base.py:959: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
+      return array(a, order=order, subok=subok, copy=True)
+
+
+.. parsed-literal::
+
+    IR model saved to model/ir_model/bvlc_googlenet.xml
+
+
+(1) Simplify selection logic
+----------------------------
+
+Default behavior of Core::compile_model API without device_name
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By default, ``compile_model`` API will select **AUTO** as
+``device_name`` if no device is specified.
+
+.. code:: ipython3
+
+    # Set LOG_LEVEL to LOG_INFO.
+    ie.set_property("AUTO", {"LOG_LEVEL":"LOG_INFO"})
+    
+    # Load the model onto the target device.
+    compiled_model = ie.compile_model(model=model)
+    
+    if isinstance(compiled_model, CompiledModel):
+        print("Successfully compiled model without a device_name.")   
+
+
+.. parsed-literal::
+
+    [22:35:25.7084]I[plugin.cpp:402][AUTO] load with CNN network
+    [22:35:25.7136]I[plugin.cpp:422][AUTO] device:CPU, config:EXCLUSIVE_ASYNC_REQUESTS=NO
+    [22:35:25.7137]I[plugin.cpp:422][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY
+    [22:35:25.7137]I[plugin.cpp:422][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0
+    [22:35:25.7137]I[plugin.cpp:422][AUTO] device:CPU, config:PERF_COUNT=NO
+    [22:35:25.7137]I[plugin.cpp:435][AUTO] device:CPU, priority:0
+    [22:35:25.7141]I[auto_schedule.cpp:103][AUTO] ExecutableNetwork start
+    [22:35:25.7145]I[auto_schedule.cpp:146][AUTO] select device:CPU
+    [22:35:25.8945]I[auto_schedule.cpp:188][AUTO] device:CPU loading Network finished
+    Successfully compiled model without a device_name.
+
+
+.. code:: ipython3
+
+    # Deleted model will wait until compiling on the selected device is complete.
+    del compiled_model
+    print("Deleted compiled_model")
+
+
+.. parsed-literal::
+
+    [22:35:25.9051]I[auto_schedule.cpp:509][AUTO] ExecutableNetwork end
+    [22:35:25.9052]I[multi_schedule.cpp:254][AUTO] CPU:infer:0
+    Deleted compiled_model
+
+
+Explicitly pass AUTO as device_name to Core::compile_model API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is optional, but passing AUTO explicitly as ``device_name`` may
+improve readability of your code.
+
+.. code:: ipython3
+
+    # Set LOG_LEVEL to LOG_NONE.
+    ie.set_property("AUTO", {"LOG_LEVEL":"LOG_NONE"})
+    
+    compiled_model = ie.compile_model(model=model, device_name="AUTO")
+    
+    if isinstance(compiled_model, CompiledModel):
+        print("Successfully compiled model using AUTO.")
+
+
+.. parsed-literal::
+
+    Successfully compiled model using AUTO.
+
+
+.. code:: ipython3
+
+    # Deleted model will wait until compiling on the selected device is complete.
+    del compiled_model
+    print("Deleted compiled_model")
+
+
+.. parsed-literal::
+
+    Deleted compiled_model
+
+
+(2) Improve the first inference latency
+---------------------------------------
+
+One of the benefits of using AUTO device selection is reducing FIL
+(first inference latency). FIL is the model compilation time combined
+with the first inference execution time. Using the CPU device explicitly
+will produce the shortest first inference latency, as the OpenVINO graph
+representation loads quickly on CPU, using just-in-time (JIT)
+compilation. The challenge is with GPU devices since OpenCL graph
+complication to GPU-optimized kernels takes a few seconds to complete.
+This initialization time may be intolerable for some applications. To
+avoid this delay, the AUTO uses CPU transparently as the first inference
+device until GPU is ready. ### Load an Image
+
+.. code:: ipython3
+
+    # For demonstration purposes, load the model to CPU and get inputs for buffer preparation.
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+    
+    input_layer_ir = next(iter(compiled_model.inputs))
+    
+    # Read image in BGR format.
+    image = cv2.imread("../data/image/coco.jpg")
+    
+    # N, C, H, W = batch size, number of channels, height, width.
+    N, C, H, W = input_layer_ir.shape
+    
+    # Resize image to the input size expected by the model.
+    resized_image = cv2.resize(image, (W, H))
+    
+    # Reshape to match the input shape expected by the model.
+    input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
+    
+    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+    
+    del compiled_model
+
+
+
+.. image:: 106-auto-device-with-output_files/106-auto-device-with-output_14_0.png
+
+
+Load the model to GPU device and perform inference
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    if "GPU" not in ie.available_devices:
+        print(f"A GPU device is not available. Available devices are: {ie.available_devices}")
+    else :       
+        # Start time.
+        gpu_load_start_time = time.perf_counter()
+        compiled_model = ie.compile_model(model=model, device_name="GPU")  # load to GPU
+    
+        # Get input and output nodes.
+        input_layer = compiled_model.input(0)
+        output_layer = compiled_model.output(0)
+    
+        # Execute the first inference.
+        results = compiled_model([input_image])[output_layer]
+    
+        # Measure time to the first inference.
+        gpu_fil_end_time = time.perf_counter()
+        gpu_fil_span = gpu_fil_end_time - gpu_load_start_time
+        print(f"Time to load model on GPU device and get first inference: {gpu_fil_end_time-gpu_load_start_time:.2f} seconds.")
+        del compiled_model
+
+
+.. parsed-literal::
+
+    A GPU device is not available. Available devices are: ['CPU']
+
+
+Load the model using AUTO device and do inference
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When GPU is the best available device, the first few inferences will be
+executed on CPU until GPU is ready.
+
+.. code:: ipython3
+
+    # Start time.
+    auto_load_start_time = time.perf_counter()
+    compiled_model = ie.compile_model(model=model)  # The device_name is AUTO by default.
+    
+    # Get input and output nodes.
+    input_layer = compiled_model.input(0)
+    output_layer = compiled_model.output(0)
+    
+    # Execute the first inference.
+    results = compiled_model([input_image])[output_layer]
+    
+    
+    # Measure time to the first inference.
+    auto_fil_end_time = time.perf_counter()
+    auto_fil_span = auto_fil_end_time - auto_load_start_time
+    print(f"Time to load model using AUTO device and get first inference: {auto_fil_end_time-auto_load_start_time:.2f} seconds.")
+
+
+.. parsed-literal::
+
+    Time to load model using AUTO device and get first inference: 0.15 seconds.
+
+
+.. code:: ipython3
+
+    # Deleted model will wait for compiling on the selected device to complete.
+    del compiled_model
+
+(3) Achieve different performance for different targets
+-------------------------------------------------------
+
+It is an advantage to define **performance hints** when using Automatic
+Device Selection. By specifying a **THROUGHPUT** or **LATENCY** hint,
+AUTO optimizes the performance based on the desired metric. The
+**THROUGHPUT** hint delivers higher frame per second (FPS) performance
+than the **LATENCY** hint, which delivers lower latency. The performance
+hints do not require any device-specific settings and they are
+completely portable between devices – meaning AUTO can configure the
+performance hint on whichever device is being used.
+
+For more information, refer to the `Performance
+Hints <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_AUTO.html#performance-hints>`__
+section of `Automatic Device
+Selection <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_AUTO.html>`__
+article.
+
+Class and callback definition
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    class PerformanceMetrics:
+        """
+        Record the latest performance metrics (fps and latency), update the metrics in each @interval seconds
+        :member: fps: Frames per second, indicates the average number of inferences executed each second during the last @interval seconds.
+        :member: latency: Average latency of inferences executed in the last @interval seconds.
+        :member: start_time: Record the start timestamp of onging @interval seconds duration.
+        :member: latency_list: Record the latency of each inference execution over @interval seconds duration.
+        :member: interval: The metrics will be updated every @interval seconds
+        """
+        def __init__(self, interval):
+            """
+            Create and initilize one instance of class PerformanceMetrics.
+            :param: interval: The metrics will be updated every @interval seconds
+            :returns:
+                Instance of PerformanceMetrics
+            """
+            self.fps = 0
+            self.latency = 0
+            
+            self.start_time = time.perf_counter()
+            self.latency_list = []
+            self.interval = interval
+            
+        def update(self, infer_request: InferRequest) -> bool:
+            """
+            Update the metrics if current ongoing @interval seconds duration is expired. Record the latency only if it is not expired.
+            :param: infer_request: InferRequest returned from inference callback, which includes the result of inference request.
+            :returns:
+                True, if metrics are updated.
+                False, if @interval seconds duration is not expired and metrics are not updated.
+            """
+            self.latency_list.append(infer_request.latency)
+            exec_time = time.perf_counter() - self.start_time
+            if exec_time >= self.interval:
+                # Update the performance metrics.
+                self.start_time = time.perf_counter()
+                self.fps = len(self.latency_list) / exec_time
+                self.latency = sum(self.latency_list) / len(self.latency_list)
+                print(f"throughput: {self.fps: .2f}fps, latency: {self.latency: .2f}ms, time interval:{exec_time: .2f}s")
+                sys.stdout.flush()
+                self.latency_list = []
+                return True
+            else :
+                return False
+    
+    
+    class InferContext:
+        """
+        Inference context. Record and update peforamnce metrics via @metrics, set @feed_inference to False once @remaining_update_num <=0
+        :member: metrics: instance of class PerformanceMetrics 
+        :member: remaining_update_num: the remaining times for peforamnce metrics updating.
+        :member: feed_inference: if feed inference request is required or not.
+        """
+        def __init__(self, update_interval, num):
+            """
+            Create and initilize one instance of class InferContext.
+            :param: update_interval: The performance metrics will be updated every @update_interval seconds. This parameter will be passed to class PerformanceMetrics directly.
+            :param: num: The number of times performance metrics are updated.
+            :returns:
+                Instance of InferContext.
+            """
+            self.metrics = PerformanceMetrics(update_interval)
+            self.remaining_update_num = num
+            self.feed_inference = True
+            
+        def update(self, infer_request: InferRequest):
+            """
+            Update the context. Set @feed_inference to False if the number of remaining performance metric updates (@remaining_update_num) reaches 0
+            :param: infer_request: InferRequest returned from inference callback, which includes the result of inference request.
+            :returns: None
+            """
+            if self.remaining_update_num <= 0 :
+                self.feed_inference = False
+                
+            if self.metrics.update(infer_request) :
+                self.remaining_update_num = self.remaining_update_num - 1
+                if self.remaining_update_num <= 0 :
+                    self.feed_inference = False
+    
+    
+    def completion_callback(infer_request: InferRequest, context) -> None:
+        """
+        callback for the inference request, pass the @infer_request to @context for updating
+        :param: infer_request: InferRequest returned for the callback, which includes the result of inference request.
+        :param: context: user data which is passed as the second parameter to AsyncInferQueue:start_async()
+        :returns: None
+        """
+        context.update(infer_request)
+    
+    
+    # Performance metrics update interval (seconds) and number of times.
+    metrics_update_interval = 10
+    metrics_update_num = 6
+
+Inference with THROUGHPUT hint
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Loop for inference and update the FPS/Latency every
+@metrics_update_interval seconds.
+
+.. code:: ipython3
+
+    THROUGHPUT_hint_context = InferContext(metrics_update_interval, metrics_update_num)
+    
+    print("Compiling Model for AUTO device with THROUGHPUT hint")
+    sys.stdout.flush()
+    
+    compiled_model = ie.compile_model(model=model, config={"PERFORMANCE_HINT":"THROUGHPUT"})
+    
+    infer_queue = AsyncInferQueue(compiled_model, 0)  # Setting to 0 will query optimal number by default.
+    infer_queue.set_callback(completion_callback)
+    
+    print(f"Start inference, {metrics_update_num: .0f} groups of FPS/latency will be measured over {metrics_update_interval: .0f}s intervals")
+    sys.stdout.flush()
+    
+    while THROUGHPUT_hint_context.feed_inference:
+        infer_queue.start_async({input_layer_ir.any_name: input_image}, THROUGHPUT_hint_context)
+        
+    infer_queue.wait_all()
+    
+    # Take the FPS and latency of the latest period.
+    THROUGHPUT_hint_fps = THROUGHPUT_hint_context.metrics.fps
+    THROUGHPUT_hint_latency = THROUGHPUT_hint_context.metrics.latency
+    
+    print("Done")
+    
+    del compiled_model
+
+
+.. parsed-literal::
+
+    Compiling Model for AUTO device with THROUGHPUT hint
+    Start inference,  6 groups of FPS/latency will be measured over  10s intervals
+    throughput:  461.74fps, latency:  24.68ms, time interval: 10.01s
+    throughput:  470.76fps, latency:  24.89ms, time interval: 10.00s
+    throughput:  470.13fps, latency:  24.96ms, time interval: 10.01s
+    throughput:  470.19fps, latency:  24.89ms, time interval: 10.00s
+    throughput:  471.13fps, latency:  24.87ms, time interval: 10.00s
+    throughput:  469.51fps, latency:  24.92ms, time interval: 10.00s
+    Done
+
+
+Inference with LATENCY hint
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Loop for inference and update the FPS/Latency for each
+@metrics_update_interval seconds
+
+.. code:: ipython3
+
+    LATENCY_hint_context = InferContext(metrics_update_interval, metrics_update_num)
+    
+    print("Compiling Model for AUTO Device with LATENCY hint")
+    sys.stdout.flush()
+    
+    compiled_model = ie.compile_model(model=model, config={"PERFORMANCE_HINT":"LATENCY"})
+    
+    # Setting to 0 will query optimal number by default.
+    infer_queue = AsyncInferQueue(compiled_model, 0)
+    infer_queue.set_callback(completion_callback)
+    
+    print(f"Start inference, {metrics_update_num: .0f} groups fps/latency will be out with {metrics_update_interval: .0f}s interval")
+    sys.stdout.flush()
+    
+    while LATENCY_hint_context.feed_inference:
+        infer_queue.start_async({input_layer_ir.any_name: input_image}, LATENCY_hint_context)
+        
+    infer_queue.wait_all()
+    
+    # Take the FPS and latency of the latest period.
+    LATENCY_hint_fps = LATENCY_hint_context.metrics.fps
+    LATENCY_hint_latency = LATENCY_hint_context.metrics.latency
+    
+    print("Done")
+    
+    del compiled_model
+
+
+.. parsed-literal::
+
+    Compiling Model for AUTO Device with LATENCY hint
+    Start inference,  6 groups fps/latency will be out with  10s interval
+    throughput:  250.83fps, latency:  3.62ms, time interval: 10.00s
+    throughput:  253.12fps, latency:  3.70ms, time interval: 10.00s
+    throughput:  250.90fps, latency:  3.73ms, time interval: 10.00s
+    throughput:  249.98fps, latency:  3.74ms, time interval: 10.00s
+    throughput:  248.29fps, latency:  3.77ms, time interval: 10.00s
+    throughput:  255.00fps, latency:  3.67ms, time interval: 10.00s
+    Done
+
+
+Difference in FPS and latency
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    TPUT = 0
+    LAT = 1
+    labels = ["THROUGHPUT hint", "LATENCY hint"]
+    
+    fig1, ax1 = plt.subplots(1, 1) 
+    fig1.patch.set_visible(False)
+    ax1.axis('tight') 
+    ax1.axis('off') 
+    
+    cell_text = []
+    cell_text.append(['%.2f%s' % (THROUGHPUT_hint_fps," FPS"), '%.2f%s' % (THROUGHPUT_hint_latency, " ms")])
+    cell_text.append(['%.2f%s' % (LATENCY_hint_fps," FPS"), '%.2f%s' % (LATENCY_hint_latency, " ms")])
+    
+    table = ax1.table(cellText=cell_text, colLabels=["FPS (Higher is better)", "Latency (Lower is better)"], rowLabels=labels,  
+                      rowColours=["deepskyblue"] * 2, colColours=["deepskyblue"] * 2,
+                      cellLoc='center', loc='upper left')
+    table.auto_set_font_size(False)
+    table.set_fontsize(18)
+    table.auto_set_column_width(0)
+    table.auto_set_column_width(1)
+    table.scale(1, 3)
+    
+    fig1.tight_layout()
+    plt.show()
+
+
+
+.. image:: 106-auto-device-with-output_files/106-auto-device-with-output_27_0.png
+
+
+.. code:: ipython3
+
+    # Output the difference.
+    width = 0.4
+    fontsize = 14
+    
+    plt.rc('font', size=fontsize)
+    fig, ax = plt.subplots(1,2, figsize=(10, 8))
+    
+    rects1 = ax[0].bar([0], THROUGHPUT_hint_fps, width, label=labels[TPUT], color='#557f2d')
+    rects2 = ax[0].bar([width], LATENCY_hint_fps, width, label=labels[LAT])
+    ax[0].set_ylabel("frames per second")
+    ax[0].set_xticks([width / 2]) 
+    ax[0].set_xticklabels(["FPS"])
+    ax[0].set_xlabel("Higher is better")
+    
+    rects1 = ax[1].bar([0], THROUGHPUT_hint_latency, width, label=labels[TPUT], color='#557f2d')
+    rects2 = ax[1].bar([width], LATENCY_hint_latency, width, label=labels[LAT])
+    ax[1].set_ylabel("milliseconds")
+    ax[1].set_xticks([width / 2])
+    ax[1].set_xticklabels(["Latency (ms)"])
+    ax[1].set_xlabel("Lower is better")
+    
+    fig.suptitle('Performance Hints')
+    fig.legend(labels, fontsize=fontsize)
+    fig.tight_layout()
+    
+    plt.show()
+
+
+
+.. image:: 106-auto-device-with-output_files/106-auto-device-with-output_28_0.png
+
--- a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_14_0.png
+++ b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_14_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7511b8a4e5b047600d5fed14fbc7e9653a868bc5253abf1e0c3ef649b47bc408
+size 387941
--- a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_27_0.png
+++ b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_27_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7098141604597456849e891e131217de0721f817667cb44993edff0ad8d20da
+size 26785
--- a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_28_0.png
+++ b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_28_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a8f5540b29277e803adfa05426264388f2d5872cde0830dbace8ed9e0dc114c
+size 37899
--- a/docs/notebooks/106-auto-device-with-output_files/index.html
+++ b/docs/notebooks/106-auto-device-with-output_files/index.html
@ -0,0 +1,9 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/106-auto-device-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/106-auto-device-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="106-auto-device-with-output_14_0.png">106-auto-device-with-output_14_0.png</a>               30-May-2023 00:09              387941
+<a href="106-auto-device-with-output_27_0.png">106-auto-device-with-output_27_0.png</a>               30-May-2023 00:09               26785
+<a href="106-auto-device-with-output_28_0.png">106-auto-device-with-output_28_0.png</a>               30-May-2023 00:09               37899
+</pre><hr></body>
+</html>
--- a/docs/notebooks/107-speech-recognition-quantization-data2vec-with-output.rst
+++ b/docs/notebooks/107-speech-recognition-quantization-data2vec-with-output.rst
--- a/docs/notebooks/107-speech-recognition-quantization-wav2vec2-with-output.rst
+++ b/docs/notebooks/107-speech-recognition-quantization-wav2vec2-with-output.rst
--- a/docs/notebooks/108-gpu-device-with-output.rst
+++ b/docs/notebooks/108-gpu-device-with-output.rst
--- a/docs/notebooks/109-latency-tricks-with-output.rst
+++ b/docs/notebooks/109-latency-tricks-with-output.rst
@ -0,0 +1,617 @@
+Performance tricks in OpenVINO for latency mode
+===============================================
+
+The goal of this notebook is to provide a step-by-step tutorial for
+improving performance for inferencing in a latency mode. Low latency is
+especially desired in real-time applications when the results are needed
+as soon as possible after the data appears. This notebook assumes
+computer vision workflow and uses
+`YOLOv5n <https://github.com/ultralytics/yolov5>`__ model. We will
+simulate a camera application that provides frames one by one.
+
+The performance tips applied in this notebook could be summarized in the
+following figure. Some of the steps below can be applied to any device
+at any stage, e.g., “shared_memory”; some can be used only to specific
+devices, e.g., “inference num threads” to CPU. As the number of
+potential configurations is vast, we recommend looking at the steps
+below and then apply a trial-and-error approach. You can incorporate
+many hints simultaneously, like more inference threads + shared memory.
+It should give even better performance, but we recommend testing it
+anyway.
+
+   **NOTE**: We especially recommend trying
+   ``OpenVINO IR model + CPU + shared memory in latency mode`` or
+   ``OpenVINO IR model + CPU + shared memory + more inference threads``.
+
+The quantization and pre-post-processing API are not included here as
+they change the precision (quantization) or processing graph
+(prepostprocessor). You can find examples of how to apply them to
+optimize performance on OpenVINO IR files in
+`111-detection-quantization <../111-detection-quantization>`__ and
+`118-optimize-preprocessing <../118-optimize-preprocessing>`__.
+
+|image0|
+
+   **NOTE**: Many of the steps presented below will give you better
+   performance. However, some of them may not change anything if they
+   are strongly dependent on either the hardware or the model. Please
+   run this notebook on your computer with your model to learn which of
+   them makes sense in your case.
+
+Prerequisites
+-------------
+
+.. |image0| image:: https://user-images.githubusercontent.com/4547501/229120774-01f4f972-424d-4280-8395-220dd432985a.png
+
+.. code:: ipython3
+
+    !pip install -q seaborn ultralytics
+
+.. code:: ipython3
+
+    import os
+    import sys
+    import time
+    from pathlib import Path
+    from typing import Any, List, Tuple
+    
+    sys.path.append("../utils")
+    import notebook_utils as utils
+
+Data
+----
+
+We will use the same image of the dog sitting on a bicycle for all
+experiments below. The image is resized and preprocessed to fulfill the
+requirements of this particular object detection model.
+
+.. code:: ipython3
+
+    import numpy as np
+    import cv2
+    
+    IMAGE_WIDTH = 640
+    IMAGE_HEIGHT = 480
+    
+    # load image
+    image = utils.load_image("../data/image/coco_bike.jpg")
+    image = cv2.resize(image, dsize=(IMAGE_WIDTH, IMAGE_HEIGHT), interpolation=cv2.INTER_AREA)
+    
+    # preprocess it for YOLOv5
+    input_image = image / 255.0
+    input_image = np.transpose(input_image, axes=(2, 0, 1))
+    input_image = np.expand_dims(input_image, axis=0)
+    
+    # show the image
+    utils.show_array(image)
+
+
+
+.. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_4_0.jpg
+
+
+
+
+.. parsed-literal::
+
+    <DisplayHandle display_id=6f2409e74123f40cdda4989b2a2a5922>
+
+
+
+Model
+-----
+
+We decided to go with
+`YOLOv5n <https://github.com/ultralytics/yolov5>`__, one of the
+state-of-the-art object detection models, easily available through the
+PyTorch Hub and small enough to see the difference in performance.
+
+.. code:: ipython3
+
+    import torch
+    from IPython.utils import io
+    
+    # directory for all models
+    base_model_dir = Path("model")
+    
+    model_name = "yolov5n"
+    model_path = base_model_dir / model_name
+    
+    # load YOLOv5n from PyTorch Hub
+    pytorch_model = torch.hub.load("ultralytics/yolov5", "custom", path=model_path, device="cpu", skip_validation=True)
+    # don't print full model architecture
+    with io.capture_output():
+        pytorch_model.eval()
+
+
+.. parsed-literal::
+
+    Using cache found in /opt/home/k8sworker/.cache/torch/hub/ultralytics_yolov5_master
+    YOLOv5 🚀 2023-4-21 Python-3.8.10 torch-1.13.1+cpu CPU
+    
+
+
+.. parsed-literal::
+
+    requirements: /opt/home/k8sworker/.cache/torch/hub/requirements.txt not found, check failed.
+
+
+.. parsed-literal::
+
+    Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n.pt to model/yolov5n.pt...
+
+
+
+.. parsed-literal::
+
+      0%|          | 0.00/3.87M [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    
+    Fusing layers... 
+    YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients
+    Adding AutoShape... 
+
+
+Hardware
+--------
+
+The code below lists the available hardware we will use in the
+benchmarking process.
+
+   **NOTE**: The hardware you have is probably completely different from
+   ours. It means you can see completely different results.
+
+.. code:: ipython3
+
+    import openvino.runtime as ov
+    
+    # initialize OpenVINO
+    core = ov.Core()
+    
+    # print available devices
+    for device in core.available_devices:
+        device_name = core.get_property(device, "FULL_DEVICE_NAME")
+        print(f"{device}: {device_name}")
+
+
+.. parsed-literal::
+
+    CPU: Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz
+
+
+Helper functions
+----------------
+
+We’re defining a benchmark model function to use for all optimized
+models below. It runs inference 1000 times, averages the latency time,
+and prints two measures: seconds per image and frames per second (FPS).
+
+.. code:: ipython3
+
+    INFER_NUMBER = 1000
+    
+    
+    def benchmark_model(model: Any, input_data: np.ndarray, benchmark_name: str, device_name: str = "CPU") -> float:
+        """
+        Helper function for benchmarking the model. It measures the time and prints results.
+        """
+        # measure the first inference separately -  it may be slower as it contains also initialization
+        start = time.perf_counter()
+        model(input_data)
+        end = time.perf_counter()
+        first_infer_time = end - start
+        print(f"{benchmark_name} on {device_name}. First inference time: {first_infer_time :.4f} seconds")
+    
+        # benchmarking
+        start = time.perf_counter()
+        for _ in range(INFER_NUMBER):
+            model(input_data)
+        end = time.perf_counter()
+    
+        # elapsed time
+        infer_time = end - start
+    
+        # print second per image and FPS
+        mean_infer_time = infer_time / INFER_NUMBER
+        mean_fps = INFER_NUMBER / infer_time
+        print(f"{benchmark_name} on {device_name}: {mean_infer_time :.4f} seconds per image ({mean_fps :.2f} FPS)")
+    
+        return mean_infer_time
+
+The following functions aim to post-process results and draw boxes on
+the image.
+
+.. code:: ipython3
+
+    # https://gist.github.com/AruniRC/7b3dadd004da04c80198557db5da4bda
+    classes = [
+        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
+        "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
+        "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
+        "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
+        "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
+        "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
+        "cell phone", "microwave", "oven", "oaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+        "hair drier", "toothbrush"
+    ]
+    
+    # Colors for the classes above (Rainbow Color Map).
+    colors = cv2.applyColorMap(
+        src=np.arange(0, 255, 255 / len(classes), dtype=np.float32).astype(np.uint8),
+        colormap=cv2.COLORMAP_RAINBOW,
+    ).squeeze()
+    
+    
+    def postprocess(detections: np.ndarray) -> List[Tuple]:
+        """
+        Postprocess the raw results from the model.
+        """
+        # candidates - probability > 0.25
+        detections = detections[detections[..., 4] > 0.25]
+    
+        boxes = []
+        labels = []
+        scores = []
+        for obj in detections:
+            xmin, ymin, ww, hh = obj[:4]
+            score = obj[4]
+            label = np.argmax(obj[5:])
+            # Create a box with pixels coordinates from the box with normalized coordinates [0,1].
+            boxes.append(
+                tuple(map(int, (xmin - ww // 2, ymin - hh // 2, ww, hh)))
+            )
+            labels.append(int(label))
+            scores.append(float(score))
+    
+        # Apply non-maximum suppression to get rid of many overlapping entities.
+        # See https://paperswithcode.com/method/non-maximum-suppression
+        # This algorithm returns indices of objects to keep.
+        indices = cv2.dnn.NMSBoxes(
+            bboxes=boxes, scores=scores, score_threshold=0.25, nms_threshold=0.5
+        )
+    
+        # If there are no boxes.
+        if len(indices) == 0:
+            return []
+    
+        # Filter detected objects.
+        return [(labels[idx], scores[idx], boxes[idx]) for idx in indices.flatten()]
+    
+    
+    def draw_boxes(img: np.ndarray, boxes):
+        """
+        Draw detected boxes on the image.
+        """
+        for label, score, box in boxes:
+            # Choose color for the label.
+            color = tuple(map(int, colors[label]))
+            # Draw a box.
+            x2 = box[0] + box[2]
+            y2 = box[1] + box[3]
+            cv2.rectangle(img=img, pt1=box[:2], pt2=(x2, y2), color=color, thickness=2)
+    
+            # Draw a label name inside the box.
+            cv2.putText(
+                img=img,
+                text=f"{classes[label]} {score:.2f}",
+                org=(box[0] + 10, box[1] + 20),
+                fontFace=cv2.FONT_HERSHEY_COMPLEX,
+                fontScale=img.shape[1] / 1200,
+                color=color,
+                thickness=1,
+                lineType=cv2.LINE_AA,
+            )
+    
+    
+    def show_result(results: np.ndarray):
+        """
+        Postprocess the raw results, draw boxes and show the image.
+        """
+        output_img = image.copy()
+    
+        detections = postprocess(results)
+        draw_boxes(output_img, detections)
+    
+        utils.show_array(output_img)
+
+Optimizations
+-------------
+
+Below, we present the performance tricks for faster inference in the
+latency mode. We release resources after every benchmarking to be sure
+the same amount of resource is available for every experiment.
+
+PyTorch model
+~~~~~~~~~~~~~
+
+First, we’re benchmarking the original PyTorch model without any
+optimizations applied. We will treat it as our baseline.
+
+.. code:: ipython3
+
+    import torch
+    
+    with torch.no_grad():
+        result = pytorch_model(torch.as_tensor(input_image)).detach().numpy()[0]
+        show_result(result)
+        pytorch_infer_time = benchmark_model(pytorch_model, input_data=torch.as_tensor(input_image).float(), benchmark_name="PyTorch model")
+
+
+
+.. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_14_0.jpg
+
+
+.. parsed-literal::
+
+    PyTorch model on CPU. First inference time: 0.0288 seconds
+    PyTorch model on CPU: 0.0205 seconds per image (48.90 FPS)
+
+
+ONNX model
+~~~~~~~~~~
+
+The first optimization is exporting the PyTorch model to ONNX and
+running it in OpenVINO. It’s possible, thanks to the ONNX frontend. It
+means we don’t necessarily have to convert the model to Intermediate
+Representation (IR) to leverage the OpenVINO Runtime.
+
+.. code:: ipython3
+
+    onnx_path = base_model_dir / Path(f"{model_name}_{IMAGE_WIDTH}_{IMAGE_HEIGHT}").with_suffix(".onnx")
+    
+    # export PyTorch model to ONNX if it doesn't already exist
+    if not onnx_path.exists():
+        dummy_input = torch.randn(1, 3, IMAGE_HEIGHT, IMAGE_WIDTH)
+        torch.onnx.export(pytorch_model, dummy_input, onnx_path)
+    
+    # load and compile in OpenVINO
+    onnx_model = core.read_model(onnx_path)
+    onnx_model = core.compile_model(onnx_model, device_name="CPU")
+
+
+.. parsed-literal::
+
+    /opt/home/k8sworker/.cache/torch/hub/ultralytics_yolov5_master/models/common.py:514: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+      y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
+    /opt/home/k8sworker/.cache/torch/hub/ultralytics_yolov5_master/models/yolo.py:64: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+      if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
+
+
+.. code:: ipython3
+
+    result = onnx_model(input_image)[onnx_model.output(0)][0]
+    show_result(result)
+    onnx_infer_time = benchmark_model(model=onnx_model, input_data=input_image, benchmark_name="ONNX model")
+    
+    del onnx_model  # release resources
+
+
+
+.. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_17_0.jpg
+
+
+.. parsed-literal::
+
+    ONNX model on CPU. First inference time: 0.0165 seconds
+    ONNX model on CPU: 0.0091 seconds per image (109.62 FPS)
+
+
+OpenVINO IR model
+~~~~~~~~~~~~~~~~~
+
+Let’s convert the ONNX model to OpenVINO Intermediate Representation
+(IR) FP16 and run it. Reducing the precision is one of the well-known
+methods for faster inference provided the hardware that supports lower
+precision, such as FP16 or even INT8. If the hardware doesn’t support
+lower precisions, the model will be inferred in FP32 automatically. We
+could also use quantization (INT8), but we should experience a little
+accuracy drop. That’s why we skip that step in this notebook.
+
+.. code:: ipython3
+
+    from openvino.tools import mo
+    
+    ov_model = mo.convert_model(onnx_path, compress_to_fp16=True)
+    # save the model on disk
+    ov.serialize(ov_model, xml_path=str(onnx_path.with_suffix(".xml")))
+    
+    ov_cpu_model = core.compile_model(ov_model, device_name="CPU")
+    
+    result = ov_cpu_model(input_image)[ov_cpu_model.output(0)][0]
+    show_result(result)
+    ov_cpu_infer_time = benchmark_model(model=ov_cpu_model, input_data=input_image, benchmark_name="OpenVINO model")
+    
+    del ov_cpu_model  # release resources
+
+
+
+.. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_19_0.jpg
+
+
+.. parsed-literal::
+
+    OpenVINO model on CPU. First inference time: 0.0152 seconds
+    OpenVINO model on CPU: 0.0092 seconds per image (109.13 FPS)
+
+
+OpenVINO IR model on GPU
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Usually, a GPU device is faster than a CPU, so let’s run the above model
+on the GPU. Please note you need to have an Intel GPU and `install
+drivers <https://github.com/openvinotoolkit/openvino_notebooks/wiki/Ubuntu#1-install-python-git-and-gpu-drivers-optional>`__
+to be able to run this step. In addition, offloading to the GPU helps
+reduce CPU load and memory consumption, allowing it to be left for
+routine processes. If you cannot observe a faster inference on GPU, it
+may be because the model is too light to benefit from massive parallel
+execution.
+
+.. code:: ipython3
+
+    ov_gpu_infer_time = 0.0
+    if "GPU" in core.available_devices:
+        ov_gpu_model = core.compile_model(ov_model, device_name="GPU")
+    
+        result = ov_gpu_model(input_image)[ov_gpu_model.output(0)][0]
+        show_result(result)
+        ov_gpu_infer_time = benchmark_model(model=ov_gpu_model, input_data=input_image, benchmark_name="OpenVINO model", device_name="GPU")
+    
+        del ov_gpu_model  # release resources
+
+OpenVINO IR model + more inference threads
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There is a possibility to add a config for any device (CPU in this
+case). We will increase the number of threads to an equal number of our
+cores. It should help us a lot. There are `more
+options <https://docs.openvino.ai/latest/groupov_runtime_cpp_prop_api.html>`__
+to be changed, so it’s worth playing with them to see what works best in
+our case.
+
+.. code:: ipython3
+
+    num_cores = os.cpu_count()
+    
+    ov_cpu_config_model = core.compile_model(ov_model, device_name="CPU", config={"INFERENCE_NUM_THREADS": num_cores})
+    
+    result = ov_cpu_config_model(input_image)[ov_cpu_config_model.output(0)][0]
+    show_result(result)
+    ov_cpu_config_infer_time = benchmark_model(model=ov_cpu_config_model, input_data=input_image, benchmark_name="OpenVINO model + more threads")
+    
+    del ov_cpu_config_model  # release resources
+
+
+
+.. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_23_0.jpg
+
+
+.. parsed-literal::
+
+    OpenVINO model + more threads on CPU. First inference time: 0.0143 seconds
+    OpenVINO model + more threads on CPU: 0.0094 seconds per image (106.13 FPS)
+
+
+OpenVINO IR model in latency mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+OpenVINO offers a virtual device called
+`AUTO <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_AUTO.html>`__,
+which can select the best device for us based on a performance hint.
+There are three different hints: ``LATENCY``, ``THROUGHPUT``, and
+``CUMULATIVE_THROUGHPUT``. As this notebook is focused on the latency
+mode, we will use ``LATENCY``. The above hints can be used with other
+devices as well.
+
+.. code:: ipython3
+
+    ov_auto_model = core.compile_model(ov_model, device_name="AUTO", config={"PERFORMANCE_HINT": "LATENCY"})
+    
+    result = ov_auto_model(input_image)[ov_auto_model.output(0)][0]
+    show_result(result)
+    ov_auto_infer_time = benchmark_model(model=ov_auto_model, input_data=input_image, benchmark_name="OpenVINO model", device_name="AUTO")
+
+
+
+.. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_25_0.jpg
+
+
+.. parsed-literal::
+
+    OpenVINO model on AUTO. First inference time: 0.0134 seconds
+    OpenVINO model on AUTO: 0.0095 seconds per image (105.46 FPS)
+
+
+OpenVINO IR model in latency mode + shared memory
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+OpenVINO is a C++ toolkit with Python wrappers (API). The default
+behavior in the Python API is copying the input to the additional buffer
+and then running processing in C++, which prevents many
+multiprocessing-related issues. However, it also increases time cost. We
+can create a tensor with enabled shared memory (keeping in mind we
+cannot overwrite our input), save time for copying and improve the
+performance!
+
+.. code:: ipython3
+
+    # it must be assigned to a variable, not to be garbage collected
+    c_input_image = np.ascontiguousarray(input_image, dtype=np.float32)
+    input_tensor = ov.Tensor(c_input_image, shared_memory=True)
+    
+    result = ov_auto_model(input_tensor)[ov_auto_model.output(0)][0]
+    show_result(result)
+    ov_auto_shared_infer_time = benchmark_model(model=ov_auto_model, input_data=input_tensor, benchmark_name="OpenVINO model + shared memory", device_name="AUTO")
+    
+    del ov_auto_model  # release resources
+
+
+
+.. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_27_0.jpg
+
+
+.. parsed-literal::
+
+    OpenVINO model + shared memory on AUTO. First inference time: 0.0140 seconds
+    OpenVINO model + shared memory on AUTO: 0.0055 seconds per image (181.99 FPS)
+
+
+Other tricks
+~~~~~~~~~~~~
+
+There are other tricks for performance improvement, especially
+quantization and prepostprocessing. To get even more from your model,
+please visit
+`111-detection-quantization <../111-detection-quantization>`__ and
+`118-optimize-preprocessing <../118-optimize-preprocessing>`__.
+
+Performance comparison
+----------------------
+
+The following graphical comparison is valid for the selected model and
+hardware simultaneously. If you cannot see any improvement between some
+steps, just skip them.
+
+.. code:: ipython3
+
+    %matplotlib inline
+
+.. code:: ipython3
+
+    from matplotlib import pyplot as plt
+    
+    labels = ["PyTorch model", "ONNX model", "OpenVINO IR model", "OpenVINO IR model on GPU", "OpenVINO IR model + more inference threads",
+              "OpenVINO IR model in latency mode", "OpenVINO IR model in latency mode + shared memory"]
+    # make them milliseconds
+    times = list(map(lambda x: 1000 * x, [pytorch_infer_time, onnx_infer_time, ov_cpu_infer_time, ov_gpu_infer_time, ov_cpu_config_infer_time,
+                                          ov_auto_infer_time, ov_auto_shared_infer_time]))
+    
+    bar_colors = colors[::10] / 255.0
+    
+    fig, ax = plt.subplots(figsize=(16, 8))
+    ax.bar(labels, times, color=bar_colors)
+    
+    ax.set_ylabel("Inference time [ms]")
+    ax.set_title("Performance difference")
+    
+    plt.xticks(rotation='vertical')
+    plt.show()
+
+
+
+.. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_30_0.png
+
+
+Conclusions
+-----------
+
+We already showed the steps needed to improve the performance of an
+object detection model. Even if you experience much better performance
+after running this notebook, please note this may not be valid for every
+hardware or every model. For the most accurate results, please use
+``benchmark_app`` `command-line
+tool <https://docs.openvino.ai/latest/openvino_inference_engine_samples_benchmark_app_README.html>`__.
+Note that ``benchmark_app`` cannot measure the impact of some tricks
+above, e.g., shared memory.
--- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_14_0.jpg
+++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_14_0.jpg
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84e4f91c248768c2ea746240e307041396099f0d52fdb89b0179fa72e353894a
+size 162715
--- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_17_0.jpg
+++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_17_0.jpg
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84e4f91c248768c2ea746240e307041396099f0d52fdb89b0179fa72e353894a
+size 162715
--- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_19_0.jpg
+++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_19_0.jpg
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd7da923bb0f72430eaf7b4770175320f1f3219aaca2d460c54fa9ef07e51c2
+size 162756
--- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_23_0.jpg
+++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_23_0.jpg
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd7da923bb0f72430eaf7b4770175320f1f3219aaca2d460c54fa9ef07e51c2
+size 162756
--- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_25_0.jpg
+++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_25_0.jpg
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd7da923bb0f72430eaf7b4770175320f1f3219aaca2d460c54fa9ef07e51c2
+size 162756
--- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_27_0.jpg
+++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_27_0.jpg
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd7da923bb0f72430eaf7b4770175320f1f3219aaca2d460c54fa9ef07e51c2
+size 162756
--- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_30_0.png
+++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_30_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0244df28a761b0553005731eb6cc5dbae3cfff25cc66f4ce0cd4335882bf8120
+size 57110
--- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_4_0.jpg
+++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_4_0.jpg
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41c502fdff24ada81c63ccfca7d9153ea368b1eb3330caa03afa3281c35e4484
+size 155828
--- a/docs/notebooks/109-latency-tricks-with-output_files/index.html
+++ b/docs/notebooks/109-latency-tricks-with-output_files/index.html
@ -0,0 +1,14 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/109-latency-tricks-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/109-latency-tricks-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="109-latency-tricks-with-output_14_0.jpg">109-latency-tricks-with-output_14_0.jpg</a>            30-May-2023 00:08              162715
+<a href="109-latency-tricks-with-output_17_0.jpg">109-latency-tricks-with-output_17_0.jpg</a>            30-May-2023 00:08              162715
+<a href="109-latency-tricks-with-output_19_0.jpg">109-latency-tricks-with-output_19_0.jpg</a>            30-May-2023 00:08              162756
+<a href="109-latency-tricks-with-output_23_0.jpg">109-latency-tricks-with-output_23_0.jpg</a>            30-May-2023 00:08              162756
+<a href="109-latency-tricks-with-output_25_0.jpg">109-latency-tricks-with-output_25_0.jpg</a>            30-May-2023 00:08              162756
+<a href="109-latency-tricks-with-output_27_0.jpg">109-latency-tricks-with-output_27_0.jpg</a>            30-May-2023 00:08              162756
+<a href="109-latency-tricks-with-output_30_0.png">109-latency-tricks-with-output_30_0.png</a>            30-May-2023 00:08               57110
+<a href="109-latency-tricks-with-output_4_0.jpg">109-latency-tricks-with-output_4_0.jpg</a>             30-May-2023 00:08              155828
+</pre><hr></body>
+</html>
--- a/docs/notebooks/110-ct-scan-live-inference-with-output.rst
+++ b/docs/notebooks/110-ct-scan-live-inference-with-output.rst
@ -0,0 +1,380 @@
+Live Inference and Benchmark CT-scan Data with OpenVINO™
+========================================================
+
+Kidney Segmentation with PyTorch Lightning and OpenVINO™ - Part 4
+-----------------------------------------------------------------
+
+This tutorial is a part of a series on how to train, optimize, quantize
+and show live inference on a medical segmentation model. The goal is to
+accelerate inference on a kidney segmentation model. The
+`UNet <https://arxiv.org/abs/1505.04597>`__ model is trained from
+scratch, and the data is from
+`Kits19 <https://github.com/neheller/kits19>`__.
+
+This tutorial shows how to benchmark performance of the model and show
+live inference with async API and MULTI plugin in OpenVINO.
+
+This notebook needs a quantized OpenVINO IR model and images from the
+`KiTS-19 <https://github.com/neheller/kits19>`__ dataset, converted to
+2D images. (To learn how the model is quantized, see the `Convert and
+Quantize a UNet Model and Show Live
+Inference <110-ct-segmentation-quantize-nncf.ipynb>`__ tutorial.)
+
+This notebook provides a pre-trained model, trained for 20 epochs with
+the full KiTS-19 frames dataset, which has an F1 score on the validation
+set of 0.9. The training code is available in the `PyTorch Monai
+Training <110-ct-segmentation-quantize-with-output.html>`__
+notebook.
+
+For demonstration purposes, this tutorial will download one converted CT
+scan to use for inference.
+
+.. code:: ipython3
+
+    !pip install -q "monai>=0.9.1,<1.0.0"
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import os
+    import sys
+    import zipfile
+    from pathlib import Path
+    
+    import numpy as np
+    from monai.transforms import LoadImage
+    from openvino.runtime import Core
+    
+    from custom_segmentation import SegmentationModel
+    
+    sys.path.append("../utils")
+    from notebook_utils import download_file
+
+Settings
+--------
+
+To use the pre-trained models, set ``IR_PATH`` to
+``"pretrained_model/unet44.xml"`` and ``COMPRESSED_MODEL_PATH`` to
+``"pretrained_model/quantized_unet44.xml"``. To use a model that you
+trained or optimized yourself, adjust the model paths.
+
+.. code:: ipython3
+
+    # The directory that contains the IR model (xml and bin) files.
+    MODEL_PATH = "pretrained_model/quantized_unet_kits19.xml"
+    # Uncomment the next line to use the FP16 model instead of the quantized model.
+    # MODEL_PATH = "pretrained_model/unet_kits19.xml"
+
+Benchmark Model Performance
+---------------------------
+
+To measure the inference performance of the IR model, use `Benchmark
+Tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__
+- an inference performance measurement tool in OpenVINO. Benchmark tool
+is a command-line application that can be run in the notebook with
+``! benchmark_app`` or ``%sx benchmark_app`` commands.
+
+   **Note**: The ``benchmark_app`` tool is able to measure the
+   performance of the OpenVINO Intermediate Representation (OpenVINO IR)
+   models only. For more accurate performance, run ``benchmark_app`` in
+   a terminal/command prompt after closing other applications. Run
+   ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on
+   CPU for one minute. Change ``CPU`` to ``GPU`` to benchmark on GPU.
+   Run ``benchmark_app --help`` to see an overview of all command-line
+   options.
+
+.. code:: ipython3
+
+    ie = Core()
+    # By default, benchmark on MULTI:CPU,GPU if a GPU is available, otherwise on CPU.
+    device = "MULTI:CPU,GPU" if "GPU" in ie.available_devices else "CPU"
+    # Uncomment one of the options below to benchmark on other devices.
+    # device = "GPU"
+    # device = "CPU"
+    # device = "AUTO"
+
+.. code:: ipython3
+
+    # Benchmark model
+    ! benchmark_app -m $MODEL_PATH -d $device -t 15 -api sync
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to LATENCY.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 31.42 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : f32 / [...] / [1,1,512,512]
+    [ INFO ] Model outputs:
+    [ INFO ]     153 (node: 153) : f32 / [...] / [1,1,512,512]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : f32 / [N,C,H,W] / [1,1,512,512]
+    [ INFO ] Model outputs:
+    [ INFO ]     153 (node: 153) : f32 / [...] / [1,1,512,512]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 209.10 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: pretrained_unet_kits19
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
+    [ INFO ]   NUM_STREAMS: 1
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 12
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.LATENCY
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input.1'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input.1' with random values 
+    [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 23.91 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            1437 iterations
+    [ INFO ] Duration:         15010.38 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        10.20 ms
+    [ INFO ]    Average:       10.25 ms
+    [ INFO ]    Min:           9.93 ms
+    [ INFO ]    Max:           13.01 ms
+    [ INFO ] Throughput:   98.02 FPS
+
+
+Download and Prepare Data
+-------------------------
+
+Download one validation video for live inference.
+
+This tutorial reuses the ``KitsDataset`` class that was also used in the
+training and quantization notebook that will be released later.
+
+The data is expected in ``BASEDIR``. The ``BASEDIR`` directory should
+contain the ``case_00000`` to ``case_00299`` subdirectories. If the data
+for the case specified above does not already exist, it will be
+downloaded and extracted in the next cell.
+
+.. code:: ipython3
+
+    # Directory that contains the CT scan data. This directory should contain subdirectories
+    # case_00XXX where XXX is between 000 and 299.
+    BASEDIR = Path("kits19_frames_1")
+    # The CT scan case number. For example: 16 for data from the case_00016 directory.
+    # Currently only 117 is supported.
+    CASE = 117
+    
+    case_path = BASEDIR / f"case_{CASE:05d}"
+    
+    if not case_path.exists():
+        filename = download_file(
+            f"https://storage.openvinotoolkit.org/data/test_data/openvino_notebooks/kits19/case_{CASE:05d}.zip"
+        )
+        with zipfile.ZipFile(filename, "r") as zip_ref:
+            zip_ref.extractall(path=BASEDIR)
+        os.remove(filename)  # remove zipfile
+        print(f"Downloaded and extracted data for case_{CASE:05d}")
+    else:
+        print(f"Data for case_{CASE:05d} exists")
+
+
+
+.. parsed-literal::
+
+    case_00117.zip:   0%|          | 0.00/5.48M [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    Downloaded and extracted data for case_00117
+
+
+Show Live Inference
+-------------------
+
+To show live inference on the model in the notebook, use the
+asynchronous processing feature of OpenVINO Runtime.
+
+If you use a GPU device, with ``device="GPU"`` or
+``device="MULTI:CPU,GPU"`` to do inference on an integrated graphics
+card, model loading will be slow the first time you run this code. The
+model will be cached, so after the first time model loading will be
+faster. For more information on OpenVINO Runtime, including Model
+Caching, refer to the `OpenVINO API
+tutorial <002-openvino-api-with-output.html>`__.
+
+We will use
+`AsyncInferQueue <https://docs.openvino.ai/latest/openvino_docs_OV_UG_Python_API_exclusives.html#asyncinferqueue>`__
+to perform asynchronous inference. It can be instantiated with compiled
+model and a number of jobs - parallel execution threads. If you don’t
+pass a number of jobs or pass ``0``, then OpenVINO will pick the optimal
+number based on your device and heuristics. After acquiring the
+inference queue, there are two jobs to do: - Preprocess the data and
+push it to the inference queue. The preprocessing steps will remain the
+same. - Tell the inference queue what to do with the model output after
+the inference is finished. It is represented by the ``callback`` python
+function that takes an inference result and data that we passed to the
+inference queue along with the prepared input data
+
+Everything else will be handled by the ``AsyncInferQueue`` instance.
+
+Load Model and List of Image Files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Load the segmentation model to OpenVINO Runtime with
+``SegmentationModel``, based on the Model API from `Open Model
+Zoo <https://github.com/openvinotoolkit/open_model_zoo/>`__. This model
+implementation includes pre and post processing for the model. For
+``SegmentationModel`` this includes the code to create an overlay of the
+segmentation mask on the original image/frame. Uncomment the next cell
+to see the implementation.
+
+.. code:: ipython3
+
+    ie = Core()
+    segmentation_model = SegmentationModel(
+        ie=ie, model_path=Path(MODEL_PATH), sigmoid=True, rotate_and_flip=True
+    )
+    image_paths = sorted(case_path.glob("imaging_frames/*jpg"))
+    
+    print(f"{case_path.name}, {len(image_paths)} images")
+
+
+.. parsed-literal::
+
+    case_00117, 69 images
+
+
+Preapre images
+~~~~~~~~~~~~~~
+
+Use the ``reader = LoadImage()`` function to read the images in the same
+way as in the
+`training <110-ct-segmentation-quantize-with-output.html>`__
+tutorial.
+
+.. code:: ipython3
+
+    framebuf = []
+    
+    next_frame_id = 0
+    reader = LoadImage(image_only=True, dtype=np.uint8)
+    
+    while next_frame_id < len(image_paths) - 1:
+        image_path = image_paths[next_frame_id]
+        image = reader(str(image_path))
+        framebuf.append(image)
+        next_frame_id += 1
+
+Specify device
+~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    # Possible options for device include "CPU", "GPU", "AUTO", "MULTI".
+    device = "MULTI:CPU,GPU" if "GPU" in ie.available_devices else "CPU"
+
+Setting callback function
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When ``callback`` is set, any job that ends the inference, calls the
+Python function. The ``callback`` function must have two arguments: one
+is the request that calls the ``callback``, which provides the
+InferRequest API; the other is called “userdata”, which provides the
+possibility of passing runtime values.
+
+The ``callback`` function will show the results of inference.
+
+.. code:: ipython3
+
+    import cv2
+    import copy
+    from IPython import display
+    
+    from typing import Dict, Any
+    from openvino.runtime import InferRequest
+    
+    
+    # Define a callback function that runs every time the asynchronous pipeline completes inference on a frame
+    def completion_callback(infer_request: InferRequest, user_data: Dict[str, Any],) -> None:
+        preprocess_meta = user_data['preprocess_meta']
+        
+        raw_outputs = {out.any_name: copy.deepcopy(res.data) for out, res in zip(infer_request.model_outputs, infer_request.output_tensors)}
+        frame = segmentation_model.postprocess(raw_outputs, preprocess_meta)
+    
+        _, encoded_img = cv2.imencode(".jpg", frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90])
+        # Create IPython image
+        i = display.Image(data=encoded_img)
+    
+        # Display the image in this notebook
+        display.clear_output(wait=True)
+        display.display(i)
+
+Create asynchronous inference queue and perform it
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    import time
+    from openvino.runtime import AsyncInferQueue
+    
+    load_start_time = time.perf_counter()
+    compiled_model = ie.compile_model(segmentation_model.net, device)
+    # Create asynchronous inference queue with optimal number of infer requests
+    infer_queue = AsyncInferQueue(compiled_model)
+    infer_queue.set_callback(completion_callback)
+    load_end_time = time.perf_counter()
+    
+    results = [None] * len(framebuf)
+    frame_number = 0
+    
+    # Perform inference on every frame in the framebuffer
+    start_time = time.time()
+    for i, input_frame in enumerate(framebuf):
+        inputs, preprocessing_meta = segmentation_model.preprocess({segmentation_model.net.input(0): input_frame})
+        infer_queue.start_async(inputs, {'preprocess_meta': preprocessing_meta})
+    
+    # Wait until all inference requests in the AsyncInferQueue are completed
+    infer_queue.wait_all()
+    stop_time = time.time()
+    
+    # Calculate total inference time and FPS
+    total_time = stop_time - start_time
+    fps = len(framebuf) / total_time
+    time_per_frame = 1 / fps 
+    
+    print(f"Loaded model to {device} in {load_end_time-load_start_time:.2f} seconds.")
+    
+    print(f'Total time to infer all frames: {total_time:.3f}s')
+    print(f'Time per frame: {time_per_frame:.6f}s ({fps:.3f} FPS)')
+
+
+
+.. image:: 110-ct-scan-live-inference-with-output_files/110-ct-scan-live-inference-with-output_21_0.png
+
+
+.. parsed-literal::
+
+    Loaded model to CPU in 0.20 seconds.
+    Total time to infer all frames: 3.085s
+    Time per frame: 0.045371s (22.040 FPS)
+
--- a/docs/notebooks/110-ct-scan-live-inference-with-output_files/110-ct-scan-live-inference-with-output_21_0.png
+++ b/docs/notebooks/110-ct-scan-live-inference-with-output_files/110-ct-scan-live-inference-with-output_21_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c11bf88f1b96b85ae9d8f26f04113364bdde7d822323cee25f6cb5b01bf4d93a
+size 48780
--- a/docs/notebooks/110-ct-scan-live-inference-with-output_files/index.html
+++ b/docs/notebooks/110-ct-scan-live-inference-with-output_files/index.html
@ -0,0 +1,7 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/110-ct-scan-live-inference-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/110-ct-scan-live-inference-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="110-ct-scan-live-inference-with-output_21_0.png">110-ct-scan-live-inference-with-output_21_0.png</a>    30-May-2023 00:09               48780
+</pre><hr></body>
+</html>
--- a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output.rst
+++ b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output.rst
@ -0,0 +1,871 @@
+Quantize a Segmentation Model and Show Live Inference
+=====================================================
+
+Kidney Segmentation with PyTorch Lightning and OpenVINO™ - Part 3
+-----------------------------------------------------------------
+
+This tutorial is a part of a series on how to train, optimize, quantize
+and show live inference on a medical segmentation model. The goal is to
+accelerate inference on a kidney segmentation model. The
+`UNet <https://arxiv.org/abs/1505.04597>`__ model is trained from
+scratch; the data is from
+`Kits19 <https://github.com/neheller/kits19>`__.
+
+This third tutorial in the series shows how to:
+
+-  Convert an Original model to OpenVINO IR with `Model
+   Optimizer <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html>`__,
+   using `Model Optimizer Python
+   API <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Python_API.html>`__
+-  Quantize a PyTorch model with NNCF
+-  Evaluate the F1 score metric of the original model and the quantized
+   model
+-  Benchmark performance of the FP32 model and the INT8 quantized model
+-  Show live inference with OpenVINO’s async API
+
+All notebooks in this series:
+
+-  `Data Preparation for 2D Segmentation of 3D Medical
+   Data <data-preparation-ct-scan.ipynb>`__
+-  `Train a 2D-UNet Medical Imaging Model with PyTorch
+   Lightning <pytorch-monai-training.ipynb>`__
+-  Convert and Quantize a Segmentation Model and Show Live Inference
+   (this notebook)
+-  `Live Inference and Benchmark CT-scan
+   data <110-ct-scan-live-inference.ipynb>`__
+
+Instructions
+------------
+
+This notebook needs a trained UNet model. We provide a pre-trained
+model, trained for 20 epochs with the full
+`Kits-19 <https://github.com/neheller/kits19>`__ frames dataset, which
+has an F1 score on the validation set of 0.9. The training code is
+available in `this notebook <pytorch-monai-training.ipynb>`__.
+
+NNCF for PyTorch models requires a C++ compiler. On Windows, install
+`Microsoft Visual Studio
+2019 <https://docs.microsoft.com/en-us/visualstudio/releases/2019/release-notes>`__.
+During installation, choose Desktop development with C++ in the
+Workloads tab. On macOS, run ``xcode-select –install`` from a Terminal.
+On Linux, install gcc.
+
+Running this notebook with the full dataset will take a long time. For
+demonstration purposes, this tutorial will download one converted CT
+scan and use that scan for quantization and inference. For production
+purposes, use a representative dataset for quantizing the model.
+
+.. code:: ipython3
+
+    !pip install -q "monai>=0.9.1,<1.0.0" "torchmetrics>=0.11.0"
+
+Imports
+-------
+
+.. code:: ipython3
+
+    # On Windows, try to find the directory that contains x64 cl.exe and add it to the PATH to enable PyTorch
+    # to find the required C++ tools. This code assumes that Visual Studio is installed in the default
+    # directory. If you have a different C++ compiler, please add the correct path to os.environ["PATH"]
+    # directly. Note that the C++ Redistributable is not enough to run this notebook.
+    
+    # Adding the path to os.environ["LIB"] is not always required - it depends on the system's configuration
+    
+    import sys
+    
+    if sys.platform == "win32":
+        import distutils.command.build_ext
+        import os
+        from pathlib import Path
+    
+        if sys.getwindowsversion().build >= 20000:  # Windows 11
+            search_path = "**/Hostx64/x64/cl.exe"
+        else:
+            search_path = "**/Hostx86/x64/cl.exe"
+    
+        VS_INSTALL_DIR_2019 = r"C:/Program Files (x86)/Microsoft Visual Studio"
+        VS_INSTALL_DIR_2022 = r"C:/Program Files/Microsoft Visual Studio"
+    
+        cl_paths_2019 = sorted(list(Path(VS_INSTALL_DIR_2019).glob(search_path)))
+        cl_paths_2022 = sorted(list(Path(VS_INSTALL_DIR_2022).glob(search_path)))
+        cl_paths = cl_paths_2019 + cl_paths_2022
+    
+        if len(cl_paths) == 0:
+            raise ValueError(
+                "Cannot find Visual Studio. This notebook requires an x64 C++ compiler. If you installed "
+                "a C++ compiler, please add the directory that contains cl.exe to `os.environ['PATH']`."
+            )
+        else:
+            # If multiple versions of MSVC are installed, get the most recent version
+            cl_path = cl_paths[-1]
+            vs_dir = str(cl_path.parent)
+            os.environ["PATH"] += f"{os.pathsep}{vs_dir}"
+            # Code for finding the library dirs from
+            # https://stackoverflow.com/questions/47423246/get-pythons-lib-path
+            d = distutils.core.Distribution()
+            b = distutils.command.build_ext.build_ext(d)
+            b.finalize_options()
+            os.environ["LIB"] = os.pathsep.join(b.library_dirs)
+            print(f"Added {vs_dir} to PATH")
+
+.. code:: ipython3
+
+    import logging
+    import os
+    import random
+    import sys
+    import time
+    import warnings
+    import zipfile
+    from pathlib import Path
+    
+    warnings.filterwarnings("ignore", category=UserWarning)
+    
+    import cv2
+    import matplotlib.pyplot as plt
+    import monai
+    import numpy as np
+    import torch
+    import nncf
+    from monai.transforms import LoadImage
+    from nncf.common.logging.logger import set_log_level
+    from openvino.runtime import Core
+    from torchmetrics import F1Score as F1
+    
+    from openvino.tools import mo
+    from openvino.runtime import serialize
+    
+    set_log_level(logging.ERROR)  # Disables all NNCF info and warning messages
+    
+    from custom_segmentation import SegmentationModel
+    from async_pipeline import show_live_inference
+    
+    sys.path.append("../utils")
+    from notebook_utils import download_file
+
+
+.. parsed-literal::
+
+    2023-05-29 22:49:24.630818: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
+    2023-05-29 22:49:24.666920: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+    To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+    2023-05-29 22:49:25.225999: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
+
+
+.. parsed-literal::
+
+    INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino
+
+
+.. parsed-literal::
+
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/offline_transformations/__init__.py:10: FutureWarning: The module is private and following namespace `offline_transformations` will be removed in the future, use `openvino.runtime.passes` instead!
+      warnings.warn(
+
+
+Settings
+--------
+
+By default, this notebook will download one CT scan from the KITS19
+dataset that will be used for quantization. To use the full dataset, set
+``BASEDIR`` to the path of the dataset, as prepared according to the
+`Data Preparation <data-preparation-ct-scan.ipynb>`__ notebook.
+
+.. code:: ipython3
+
+    BASEDIR = Path("kits19_frames_1")
+    # Uncomment the line below to use the full dataset, as prepared in the data preparation notebook
+    # BASEDIR = Path("~/kits19/kits19_frames").expanduser()
+    MODEL_DIR = Path("model")
+    MODEL_DIR.mkdir(exist_ok=True)
+
+Load PyTorch Model
+------------------
+
+Download the pre-trained model weights, load the PyTorch model and the
+``state_dict`` that was saved after training. The model used in this
+notebook is a
+`BasicUnet <https://docs.monai.io/en/stable/networks.html#basicunet>`__
+model from `MONAI <https://monai.io>`__. We provide a pre-trained
+checkpoint. To see how this model performs, check out the `training
+notebook <pytorch-monai-training.ipynb>`__.
+
+.. code:: ipython3
+
+    state_dict_url = "https://github.com/helena-intel/openvino_notebooks/raw/110-nncf/notebooks/110-ct-segmentation-quantize/pretrained_model/unet_kits19_state_dict.pth"
+    state_dict_file = download_file(state_dict_url, directory="pretrained_model")
+    state_dict = torch.load(state_dict_file, map_location=torch.device("cpu"))
+    
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        new_key = k.replace("_model.", "")
+        new_state_dict[new_key] = v
+    new_state_dict.pop("loss_function.pos_weight")
+    
+    model = monai.networks.nets.BasicUNet(spatial_dims=2, in_channels=1, out_channels=1).eval()
+    model.load_state_dict(new_state_dict)
+
+
+
+.. parsed-literal::
+
+    pretrained_model/unet_kits19_state_dict.pth:   0%|          | 0.00/7.58M [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    BasicUNet features: (32, 32, 64, 128, 256, 32).
+
+
+
+
+.. parsed-literal::
+
+    <All keys matched successfully>
+
+
+
+Download CT-scan Data
+---------------------
+
+.. code:: ipython3
+
+    # The CT scan case number. For example: 2 for data from the case_00002 directory
+    # Currently only 117 is supported
+    CASE = 117
+    if not (BASEDIR / f"case_{CASE:05d}").exists():
+        BASEDIR.mkdir(exist_ok=True)
+        filename = download_file(
+            f"https://storage.openvinotoolkit.org/data/test_data/openvino_notebooks/kits19/case_{CASE:05d}.zip"
+        )
+        with zipfile.ZipFile(filename, "r") as zip_ref:
+            zip_ref.extractall(path=BASEDIR)
+        os.remove(filename)  # remove zipfile
+        print(f"Downloaded and extracted data for case_{CASE:05d}")
+    else:
+        print(f"Data for case_{CASE:05d} exists")
+
+
+.. parsed-literal::
+
+    Data for case_00117 exists
+
+
+Configuration
+-------------
+
+Dataset
+~~~~~~~
+
+The KitsDataset class in the next cell expects images and masks in the
+*basedir* directory, in a folder per patient. It is a simplified version
+of the DataSet class in the `training
+notebook <pytorch-monai-training.ipynb>`__.
+
+Images are loaded with MONAI’s
+```LoadImage`` <https://docs.monai.io/en/stable/transforms.html#loadimage>`__,
+to align with the image loading method in the training notebook. This
+method rotates and flips the images. We define a ``rotate_and_flip``
+method to display the images in the expected orientation:
+
+.. code:: ipython3
+
+    def rotate_and_flip(image):
+        """Rotate `image` by 90 degrees and flip horizontally"""
+        return cv2.flip(cv2.rotate(image, rotateCode=cv2.ROTATE_90_CLOCKWISE), flipCode=1)
+    
+    
+    class KitsDataset:
+        def __init__(self, basedir: str):
+            """
+            Dataset class for prepared Kits19 data, for binary segmentation (background/kidney)
+            Source data should exist in basedir, in subdirectories case_00000 until case_00210,
+            with each subdirectory containing directories imaging_frames, with jpg images, and
+            segmentation_frames with segmentation masks as png files.
+            See https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/110-ct-segmentation-quantize/data-preparation-ct-scan.ipynb
+    
+            :param basedir: Directory that contains the prepared CT scans
+            """
+            masks = sorted(BASEDIR.glob("case_*/segmentation_frames/*png"))
+    
+            self.basedir = basedir
+            self.dataset = masks
+            print(
+                f"Created dataset with {len(self.dataset)} items. "
+                f"Base directory for data: {basedir}"
+            )
+    
+        def __getitem__(self, index):
+            """
+            Get an item from the dataset at the specified index.
+    
+            :return: (image, segmentation_mask)
+            """
+            mask_path = self.dataset[index]
+            image_path = str(mask_path.with_suffix(".jpg")).replace(
+                "segmentation_frames", "imaging_frames"
+            )
+    
+            # Load images with MONAI's LoadImage to match data loading in training notebook
+            mask = LoadImage(image_only=True, dtype=np.uint8)(str(mask_path)).numpy()
+            img = LoadImage(image_only=True, dtype=np.float32)(str(image_path)).numpy()
+    
+            if img.shape[:2] != (512, 512):
+                img = cv2.resize(img.astype(np.uint8), (512, 512)).astype(np.float32)
+                mask = cv2.resize(mask, (512, 512))
+    
+            input_image = np.expand_dims(img, axis=0)
+            return input_image, mask
+    
+        def __len__(self):
+            return len(self.dataset)
+
+To test whether the data loader returns the expected output, we show an
+image and a mask. The image and the mask are returned by the dataloader,
+after resizing and preprocessing. Since this dataset contains a lot of
+slices without kidneys, we select a slice that contains at least 5000
+kidney pixels to verify that the annotations look correct:
+
+.. code:: ipython3
+
+    dataset = KitsDataset(BASEDIR)
+    # Find a slice that contains kidney annotations
+    # item[0] is the annotation: (id, annotation_data)
+    image_data, mask = next(item for item in dataset if np.count_nonzero(item[1]) > 5000)
+    # Remove extra image dimension and rotate and flip the image for visualization
+    image = rotate_and_flip(image_data.squeeze())
+    
+    # The data loader returns annotations as (index, mask) and mask in shape (H,W)
+    mask = rotate_and_flip(mask)
+    
+    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
+    ax[0].imshow(image, cmap="gray")
+    ax[1].imshow(mask, cmap="gray");
+
+
+.. parsed-literal::
+
+    Created dataset with 69 items. Base directory for data: kits19_frames_1
+
+
+
+.. image:: 110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_15_1.png
+
+
+Metric
+~~~~~~
+
+Define a metric to determine the performance of the model.
+
+For this demo, we use the `F1
+score <https://en.wikipedia.org/wiki/F-score>`__, or Dice coefficient,
+from the
+`TorchMetrics <https://torchmetrics.readthedocs.io/en/stable/>`__
+library.
+
+.. code:: ipython3
+
+    from typing import Union
+    from openvino.runtime.ie_api import CompiledModel
+    
+    
+    def compute_f1(model: Union[torch.nn.Module, CompiledModel], dataset: KitsDataset):
+        """
+        Compute binary F1 score of `model` on `dataset`
+        F1 score metric is provided by the torchmetrics library
+        `model` is expected to be a binary segmentation model, images in the
+        dataset are expected in (N,C,H,W) format where N==C==1
+        """
+        metric = F1(ignore_index=0, task="binary", average="macro")
+        with torch.no_grad():
+            for image, target in dataset:
+                input_image = torch.as_tensor(image).unsqueeze(0)
+                if isinstance(model, CompiledModel):
+                    output_layer = model.output(0)
+                    output = model(input_image)[output_layer]
+                    output = torch.from_numpy(output)
+                else:
+                    output = model(input_image)
+                label = torch.as_tensor(target.squeeze()).long()
+                prediction = torch.sigmoid(output.squeeze()).round().long()
+                metric.update(label.flatten(), prediction.flatten())
+        return metric.compute()
+
+Quantization
+------------
+
+Before quantizing the model, we compute the F1 score on the ``FP32``
+model, for comparison:
+
+.. code:: ipython3
+
+    fp32_f1 = compute_f1(model, dataset)
+    print(f"FP32 F1: {fp32_f1:.3f}")
+
+
+.. parsed-literal::
+
+    FP32 F1: 0.999
+
+
+We convert the PyTorch model to OpenVINO IR and serialize it for
+comparing the performance of the ``FP32`` and ``INT8`` model later in
+this notebook.
+
+.. code:: ipython3
+
+    fp32_ir_path = MODEL_DIR / Path('unet_kits19_fp32.xml')
+    
+    fp32_ir_model = mo.convert_model(model, input_shape=(1, 1, 512, 512))
+    serialize(fp32_ir_model, str(fp32_ir_path))
+
+
+.. parsed-literal::
+
+    WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.
+    [ WARNING ] Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.
+
+
+.. parsed-literal::
+
+    [ WARNING ]  Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s.
+
+
+`NNCF <https://github.com/openvinotoolkit/nncf>`__ provides a suite of
+advanced algorithms for Neural Networks inference optimization in
+OpenVINO with minimal accuracy drop. > **Note**: NNCF Post-training
+Quantization is available in OpenVINO 2023.0 release.
+
+Create a quantized model from the pre-trained ``FP32`` model and the
+calibration dataset. The optimization process contains the following
+steps: 1. Create a Dataset for quantization. 2. Run ``nncf.quantize``
+for getting an optimized model. 3. Export the quantized model to ONNX
+and then convert to OpenVINO IR model. 4. Serialize the INT8 model using
+``openvino.runtime.serialize`` function for benchmarking.
+
+.. code:: ipython3
+
+    def transform_fn(data_item):
+        """
+        Extract the model's input from the data item.
+        The data item here is the data item that is returned from the data source per iteration.
+        This function should be passed when the data item cannot be used as model's input.
+        """
+        images, _ = data_item
+        return images
+    
+    
+    data_loader = torch.utils.data.DataLoader(dataset)
+    calibration_dataset = nncf.Dataset(data_loader, transform_fn)
+    quantized_model = nncf.quantize(
+        model,
+        calibration_dataset,
+        # Do not quantize LeakyReLU activations to allow the INT8 model to run on Intel GPU
+        ignored_scope=nncf.IgnoredScope(patterns=[".*LeakyReLU.*"])
+    )
+
+
+.. parsed-literal::
+
+    No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
+
+
+Export the quantized model to ONNX and then convert it to OpenVINO IR
+model and save it.
+
+.. code:: ipython3
+
+    dummy_input = torch.randn(1, 1, 512, 512)
+    int8_onnx_path = MODEL_DIR / "unet_kits19_int8.onnx"
+    int8_ir_path = Path(int8_onnx_path).with_suffix(".xml")
+    torch.onnx.export(quantized_model, dummy_input, int8_onnx_path)
+    int8_ir_model = mo.convert_model(input_model=int8_onnx_path)
+    serialize(int8_ir_model, str(int8_ir_path))
+
+This notebook demonstrates post-training quantization with NNCF.
+
+NNCF also supports quantization-aware training, and other algorithms
+than quantization. See the `NNCF
+documentation <https://github.com/openvinotoolkit/nncf/>`__ in the NNCF
+repository for more information.
+
+Compare FP32 and INT8 Model
+---------------------------
+
+Compare File Size
+~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    fp32_ir_model_size = fp32_ir_path.with_suffix(".bin").stat().st_size / 1024
+    quantized_model_size = int8_ir_path.with_suffix(".bin").stat().st_size / 1024
+    
+    print(f"FP32 IR model size: {fp32_ir_model_size:.2f} KB")
+    print(f"INT8 model size: {quantized_model_size:.2f} KB")
+
+
+.. parsed-literal::
+
+    FP32 IR model size: 7728.27 KB
+    INT8 model size: 1953.49 KB
+
+
+Compare Metrics for the original model and the quantized model to be sure that there no degradation.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    core = Core()
+    
+    int8_compiled_model = core.compile_model(int8_ir_model)
+    int8_f1 = compute_f1(int8_compiled_model, dataset)
+    
+    print(f"FP32 F1: {fp32_f1:.3f}")
+    print(f"INT8 F1: {int8_f1:.3f}")
+
+
+.. parsed-literal::
+
+    FP32 F1: 0.999
+    INT8 F1: 0.999
+
+
+Compare Performance of the FP32 IR Model and Quantized Models
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To measure the inference performance of the ``FP32`` and ``INT8``
+models, we use `Benchmark
+Tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__
+- OpenVINO’s inference performance measurement tool. Benchmark tool is a
+command line application, part of OpenVINO development tools, that can
+be run in the notebook with ``! benchmark_app`` or
+``%sx benchmark_app``.
+
+   **NOTE**: For the most accurate performance estimation, it is
+   recommended to run ``benchmark_app`` in a terminal/command prompt
+   after closing other applications. Run
+   ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on
+   CPU for one minute. Change ``CPU`` to ``GPU`` to benchmark on GPU.
+   Run ``benchmark_app --help`` to see all command line options.
+
+.. code:: ipython3
+
+    # ! benchmark_app --help
+
+.. code:: ipython3
+
+    device = "CPU"
+
+.. code:: ipython3
+
+    # Benchmark FP32 model
+    ! benchmark_app -m $fp32_ir_path -d $device -t 15 -api sync
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to LATENCY.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 10.98 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input_0 (node: input_0) : f32 / [...] / [1,1,512,512]
+    [ INFO ] Model outputs:
+    [ INFO ]     238 (node: 238) : f32 / [...] / [1,1,512,512]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input_0 (node: input_0) : f32 / [N,C,H,W] / [1,1,512,512]
+    [ INFO ] Model outputs:
+    [ INFO ]     238 (node: 238) : f32 / [...] / [1,1,512,512]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 88.50 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
+    [ INFO ]   NUM_STREAMS: 1
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 12
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.LATENCY
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input_0'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input_0' with random values 
+    [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 52.29 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            427 iterations
+    [ INFO ] Duration:         15001.20 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        34.91 ms
+    [ INFO ]    Average:       34.93 ms
+    [ INFO ]    Min:           34.61 ms
+    [ INFO ]    Max:           37.31 ms
+    [ INFO ] Throughput:   28.64 FPS
+
+
+.. code:: ipython3
+
+    # Benchmark INT8 model
+    ! benchmark_app -m $int8_ir_path -d $device -t 15 -api sync
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to LATENCY.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 24.51 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     x.1 (node: x.1) : f32 / [...] / [1,1,512,512]
+    [ INFO ] Model outputs:
+    [ INFO ]     578 (node: 578) : f32 / [...] / [1,1,512,512]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     x.1 (node: x.1) : f32 / [N,C,H,W] / [1,1,512,512]
+    [ INFO ] Model outputs:
+    [ INFO ]     578 (node: 578) : f32 / [...] / [1,1,512,512]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 149.51 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
+    [ INFO ]   NUM_STREAMS: 1
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 12
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.LATENCY
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'x.1'!. This input will be filled with random values!
+    [ INFO ] Fill input 'x.1' with random values 
+    [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 29.36 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            995 iterations
+    [ INFO ] Duration:         15002.63 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        14.84 ms
+    [ INFO ]    Average:       14.88 ms
+    [ INFO ]    Min:           14.49 ms
+    [ INFO ]    Max:           15.59 ms
+    [ INFO ] Throughput:   67.38 FPS
+
+
+Visually Compare Inference Results
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Visualize the results of the model on four slices of the validation set.
+Compare the results of the ``FP32`` IR model with the results of the
+quantized ``INT8`` model and the reference segmentation annotation.
+
+Medical imaging datasets tend to be very imbalanced: most of the slices
+in a CT scan do not contain kidney data. The segmentation model should
+be good at finding kidneys where they exist (in medical terms: have good
+sensitivity) but also not find spurious kidneys that do not exist (have
+good specificity). In the next cell, there are four slices: two slices
+that have no kidney data, and two slices that contain kidney data. For
+this example, a slice has kidney data if at least 50 pixels in the
+slices are annotated as kidney.
+
+Run this cell again to show results on a different subset. The random
+seed is displayed to enable reproducing specific runs of this cell.
+
+   **NOTE**: the images are shown after optional augmenting and
+   resizing. In the Kits19 dataset all but one of the cases has the
+   ``(512, 512)`` input shape.
+
+.. code:: ipython3
+
+    # The sigmoid function is used to transform the result of the network
+    # to binary segmentation masks
+    def sigmoid(x):
+        return np.exp(-np.logaddexp(0, -x))
+    
+    
+    num_images = 4
+    colormap = "gray"
+    
+    # Load FP32 and INT8 models
+    core = Core()
+    fp_model = core.read_model(fp32_ir_path)
+    int8_model = core.read_model(int8_ir_path)
+    compiled_model_fp = core.compile_model(fp_model, device_name="CPU")
+    compiled_model_int8 = core.compile_model(int8_model, device_name="CPU")
+    output_layer_fp = compiled_model_fp.output(0)
+    output_layer_int8 = compiled_model_int8.output(0)
+    
+    # Create subset of dataset
+    background_slices = (item for item in dataset if np.count_nonzero(item[1]) == 0)
+    kidney_slices = (item for item in dataset if np.count_nonzero(item[1]) > 50)
+    data_subset = random.sample(list(background_slices), 2) + random.sample(list(kidney_slices), 2)
+    
+    # Set seed to current time. To reproduce specific results, copy the printed seed
+    # and manually set `seed` to that value.
+    seed = int(time.time())
+    random.seed(seed)
+    print(f"Visualizing results with seed {seed}")
+    
+    fig, ax = plt.subplots(nrows=num_images, ncols=4, figsize=(24, num_images * 4))
+    for i, (image, mask) in enumerate(data_subset):
+        display_image = rotate_and_flip(image.squeeze())
+        target_mask = rotate_and_flip(mask).astype(np.uint8)
+        # Add batch dimension to image and do inference on FP and INT8 models
+        input_image = np.expand_dims(image, 0)
+        res_fp = compiled_model_fp([input_image])
+        res_int8 = compiled_model_int8([input_image])
+    
+        # Process inference outputs and convert to binary segementation masks
+        result_mask_fp = sigmoid(res_fp[output_layer_fp]).squeeze().round().astype(np.uint8)
+        result_mask_int8 = sigmoid(res_int8[output_layer_int8]).squeeze().round().astype(np.uint8)
+        result_mask_fp = rotate_and_flip(result_mask_fp)
+        result_mask_int8 = rotate_and_flip(result_mask_int8)
+    
+        # Display images, annotations, FP32 result and INT8 result
+        ax[i, 0].imshow(display_image, cmap=colormap)
+        ax[i, 1].imshow(target_mask, cmap=colormap)
+        ax[i, 2].imshow(result_mask_fp, cmap=colormap)
+        ax[i, 3].imshow(result_mask_int8, cmap=colormap)
+        ax[i, 2].set_title("Prediction on FP32 model")
+        ax[i, 3].set_title("Prediction on INT8 model")
+
+
+.. parsed-literal::
+
+    Visualizing results with seed 1685393451
+
+
+
+.. image:: 110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_37_1.png
+
+
+Show Live Inference
+-------------------
+
+To show live inference on the model in the notebook, we will use the
+asynchronous processing feature of OpenVINO.
+
+We use the ``show_live_inference`` function from `Notebook
+Utils <utils-with-output.html>`__ to show live inference. This
+function uses `Open Model
+Zoo <https://github.com/openvinotoolkit/open_model_zoo/>`__\ ’s
+AsyncPipeline and Model API to perform asynchronous inference. After
+inference on the specified CT scan has completed, the total time and
+throughput (fps), including preprocessing and displaying, will be
+printed.
+
+   **NOTE**: If you experience flickering on Firefox, consider using
+   Chrome or Edge to run this notebook.
+
+Load Model and List of Image Files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We load the segmentation model to OpenVINO Runtime with
+``SegmentationModel``, based on the `Open Model
+Zoo <https://github.com/openvinotoolkit/open_model_zoo/>`__ Model API.
+This model implementation includes pre and post processing for the
+model. For ``SegmentationModel``, this includes the code to create an
+overlay of the segmentation mask on the original image/frame.
+
+.. code:: ipython3
+
+    CASE = 117
+    
+    segmentation_model = SegmentationModel(
+        ie=core, model_path=int8_ir_path, sigmoid=True, rotate_and_flip=True
+    )
+    case_path = BASEDIR / f"case_{CASE:05d}"
+    image_paths = sorted(case_path.glob("imaging_frames/*jpg"))
+    print(f"{case_path.name}, {len(image_paths)} images")
+
+
+.. parsed-literal::
+
+    case_00117, 69 images
+
+
+Show Inference
+~~~~~~~~~~~~~~
+
+In the next cell, we run the ``show_live_inference`` function, which
+loads the ``segmentation_model`` to the specified ``device`` (using
+caching for faster model loading on GPU devices), loads the images,
+performs inference, and displays the results on the frames loaded in
+``images`` in real-time.
+
+.. code:: ipython3
+
+    # Possible options for device include "CPU", "GPU", "AUTO", "MULTI:CPU,GPU"
+    device = "CPU"
+    reader = LoadImage(image_only=True, dtype=np.uint8)
+    show_live_inference(
+        ie=core, image_paths=image_paths, model=segmentation_model, device=device, reader=reader
+    )
+
+
+
+.. image:: 110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_42_0.jpg
+
+
+.. parsed-literal::
+
+    Loaded model to CPU in 0.14 seconds.
+    Total time for 68 frames: 2.89 seconds, fps:23.84
+
+
+References
+----------
+
+**OpenVINO** - `NNCF
+Repository <https://github.com/openvinotoolkit/nncf/>`__ - `Neural
+Network Compression Framework for fast model
+inference <https://arxiv.org/abs/2002.08679>`__ - `OpenVINO API
+Tutorial <002-openvino-api-with-output.html>`__ - `OpenVINO
+PyPI (pip install
+openvino-dev) <https://pypi.org/project/openvino-dev/>`__
+
+**Kits19 Data** - `Kits19 Challenge
+Homepage <https://kits19.grand-challenge.org/>`__ - `Kits19 Github
+Repository <https://github.com/neheller/kits19>`__ - `The KiTS19
+Challenge Data: 300 Kidney Tumor Cases with Clinical Context, CT
+Semantic Segmentations, and Surgical
+Outcomes <https://arxiv.org/abs/1904.00445>`__ - `The state of the art
+in kidney and kidney tumor segmentation in contrast-enhanced CT imaging:
+Results of the KiTS19
+challenge <https://www.sciencedirect.com/science/article/pii/S1361841520301857>`__
--- a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_15_1.png
+++ b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_15_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb2418bea000be01c041f45ac09c14c701e09a7b1ffbc551fbc9f7ce4c4fb6fd
+size 158997
--- a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_37_1.png
+++ b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_37_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45c8a85092a351c7636d6f17427ac38f29e9951137de1237e839411d503e019d
+size 395128
--- a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_42_0.jpg
+++ b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_42_0.jpg
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a32b8f52591f926f1cc9afda5f199e189afed948f93c4c01a486cc43f7bdfd9
+size 73812
--- a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/index.html
+++ b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/index.html
@ -0,0 +1,9 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/110-ct-segmentation-quantize-nncf-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/110-ct-segmentation-quantize-nncf-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="110-ct-segmentation-quantize-nncf-with-output_15_1.png">110-ct-segmentation-quantize-nncf-with-output_1..&gt;</a> 30-May-2023 00:09              158997
+<a href="110-ct-segmentation-quantize-nncf-with-output_37_1.png">110-ct-segmentation-quantize-nncf-with-output_3..&gt;</a> 30-May-2023 00:09              395128
+<a href="110-ct-segmentation-quantize-nncf-with-output_42_0.jpg">110-ct-segmentation-quantize-nncf-with-output_4..&gt;</a> 30-May-2023 00:09               73812
+</pre><hr></body>
+</html>
--- a/docs/notebooks/111-yolov5-quantization-migration-with-output.rst
+++ b/docs/notebooks/111-yolov5-quantization-migration-with-output.rst
--- a/docs/notebooks/111-yolov5-quantization-migration-with-output_files/111-yolov5-quantization-migration-with-output_33_0.png
+++ b/docs/notebooks/111-yolov5-quantization-migration-with-output_files/111-yolov5-quantization-migration-with-output_33_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f425198a6bd63de3965dd680133d01367400db27a6532b337e669e226a9de10
+size 32366
--- a/docs/notebooks/111-yolov5-quantization-migration-with-output_files/111-yolov5-quantization-migration-with-output_39_0.png
+++ b/docs/notebooks/111-yolov5-quantization-migration-with-output_files/111-yolov5-quantization-migration-with-output_39_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e203ecda608c2cd3be4c3e43cfee86ec29fa538b3efc951e2dda7858fc057c6
+size 720871
--- a/docs/notebooks/111-yolov5-quantization-migration-with-output_files/index.html
+++ b/docs/notebooks/111-yolov5-quantization-migration-with-output_files/index.html
@ -0,0 +1,8 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/111-yolov5-quantization-migration-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/111-yolov5-quantization-migration-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="111-yolov5-quantization-migration-with-output_33_0.png">111-yolov5-quantization-migration-with-output_3..&gt;</a> 30-May-2023 00:08               32366
+<a href="111-yolov5-quantization-migration-with-output_39_0.png">111-yolov5-quantization-migration-with-output_3..&gt;</a> 30-May-2023 00:08              720871
+</pre><hr></body>
+</html>
--- a/docs/notebooks/112-pytorch-post-training-quantization-nncf-with-output.rst
+++ b/docs/notebooks/112-pytorch-post-training-quantization-nncf-with-output.rst
@ -0,0 +1,663 @@
+Post-Training Quantization of PyTorch models with NNCF
+======================================================
+
+The goal of this tutorial is to demonstrate how to use the NNCF (Neural
+Network Compression Framework) 8-bit quantization in post-training mode
+(without the fine-tuning pipeline) to optimize a PyTorch model for the
+high-speed inference via OpenVINO™ Toolkit. The optimization process
+contains the following steps:
+
+1. Evaluate the original model.
+2. Transform the original model to a quantized one.
+3. Export optimized and original models to OpenVINO IR.
+4. Compare performance of the obtained ``FP32`` and ``INT8`` models.
+
+This tutorial uses a ResNet-50 model, pre-trained on Tiny ImageNet,
+which contains 100000 images of 200 classes (500 for each class)
+downsized to 64×64 colored images. The tutorial will demonstrate that
+only a tiny part of the dataset is needed for the post-training
+quantization, not demanding the fine-tuning of the model.
+
+   **NOTE**: This notebook requires that a C++ compiler is accessible on
+   the default binary search path of the OS you are running the
+   notebook.
+
+Preparations
+------------
+
+.. code:: ipython3
+
+    # On Windows, this script adds the directory that contains cl.exe to the PATH to enable PyTorch to find the
+    # required C++ tools. This code assumes that Visual Studio 2019 is installed in the default
+    # directory. If you have a different C++ compiler, add the correct path to os.environ["PATH"]
+    # directly.
+    
+    # Adding the path to os.environ["LIB"] is not always required - it depends on the system configuration.
+    
+    import sys
+    
+    if sys.platform == "win32":
+        import distutils.command.build_ext
+        import os
+        from pathlib import Path
+    
+        VS_INSTALL_DIR = r"C:/Program Files (x86)/Microsoft Visual Studio"
+        cl_paths = sorted(list(Path(VS_INSTALL_DIR).glob("**/Hostx86/x64/cl.exe")))
+        if len(cl_paths) == 0:
+            raise ValueError(
+                "Cannot find Visual Studio. This notebook requires C++. If you installed "
+                "a C++ compiler, please add the directory that contains cl.exe to "
+                "`os.environ['PATH']`"
+            )
+        else:
+            # If multiple versions of MSVC are installed, get the most recent one.
+            cl_path = cl_paths[-1]
+            vs_dir = str(cl_path.parent)
+            os.environ["PATH"] += f"{os.pathsep}{vs_dir}"
+            # The code for finding the library dirs is from
+            # https://stackoverflow.com/questions/47423246/get-pythons-lib-path
+            d = distutils.core.Distribution()
+            b = distutils.command.build_ext.build_ext(d)
+            b.finalize_options()
+            os.environ["LIB"] = os.pathsep.join(b.library_dirs)
+            print(f"Added {vs_dir} to PATH")
+
+Imports
+~~~~~~~
+
+.. code:: ipython3
+
+    import os
+    import time
+    import zipfile
+    from pathlib import Path
+    from typing import List, Tuple
+    
+    import nncf
+    from openvino.runtime import Core, serialize
+    from openvino.tools import mo
+    
+    import torch
+    from torchvision.datasets import ImageFolder
+    from torchvision.models import resnet50
+    import torchvision.transforms as transforms
+    
+    sys.path.append("../utils")
+    from notebook_utils import download_file
+
+
+.. parsed-literal::
+
+    2023-05-29 22:55:16.336185: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
+    2023-05-29 22:55:16.370677: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+    To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+    2023-05-29 22:55:16.922906: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/offline_transformations/__init__.py:10: FutureWarning: The module is private and following namespace `offline_transformations` will be removed in the future, use `openvino.runtime.passes` instead!
+      warnings.warn(
+
+
+.. parsed-literal::
+
+    INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino
+
+
+Settings
+~~~~~~~~
+
+.. code:: ipython3
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using {device} device")
+    
+    MODEL_DIR = Path("model")
+    OUTPUT_DIR = Path("output")
+    BASE_MODEL_NAME = "resnet50"
+    IMAGE_SIZE = [64, 64]
+    
+    OUTPUT_DIR.mkdir(exist_ok=True)
+    MODEL_DIR.mkdir(exist_ok=True)
+    
+    # Paths where PyTorch and OpenVINO IR models will be stored.
+    fp32_checkpoint_filename = Path(BASE_MODEL_NAME + "_fp32").with_suffix(".pth")
+    fp32_onnx_path = OUTPUT_DIR / Path(BASE_MODEL_NAME + "_fp32").with_suffix(".onnx")
+    fp32_ir_path = OUTPUT_DIR / Path(BASE_MODEL_NAME + "_fp32").with_suffix(".xml")
+    int8_onnx_path = OUTPUT_DIR / Path(BASE_MODEL_NAME + "_int8").with_suffix(".onnx")
+    int8_ir_path = OUTPUT_DIR / Path(BASE_MODEL_NAME + "_int8").with_suffix(".xml")
+    
+    
+    fp32_pth_url = "https://storage.openvinotoolkit.org/repositories/nncf/openvino_notebook_ckpts/304_resnet50_fp32.pth"
+    download_file(fp32_pth_url, directory=MODEL_DIR, filename=fp32_checkpoint_filename)
+
+
+.. parsed-literal::
+
+    Using cpu device
+
+
+
+.. parsed-literal::
+
+    model/resnet50_fp32.pth:   0%|          | 0.00/91.5M [00:00<?, ?B/s]
+
+
+
+
+.. parsed-literal::
+
+    PosixPath('/opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/112-pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth')
+
+
+
+Download and Prepare Tiny ImageNet dataset
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+-  100k images of shape 3x64x64,
+-  200 different classes: snake, spider, cat, truck, grasshopper, gull,
+   etc.
+
+.. code:: ipython3
+
+    def download_tiny_imagenet_200(
+        output_dir: Path,
+        url: str = "http://cs231n.stanford.edu/tiny-imagenet-200.zip",
+        tarname: str = "tiny-imagenet-200.zip",
+    ):
+        archive_path = output_dir / tarname
+        download_file(url, directory=output_dir, filename=tarname)
+        zip_ref = zipfile.ZipFile(archive_path, "r")
+        zip_ref.extractall(path=output_dir)
+        zip_ref.close()
+        print(f"Successfully downloaded and extracted dataset to: {output_dir}")
+    
+    
+    def create_validation_dir(dataset_dir: Path):
+        VALID_DIR = dataset_dir / "val"
+        val_img_dir = VALID_DIR / "images"
+    
+        fp = open(VALID_DIR / "val_annotations.txt", "r")
+        data = fp.readlines()
+    
+        val_img_dict = {}
+        for line in data:
+            words = line.split("\t")
+            val_img_dict[words[0]] = words[1]
+        fp.close()
+    
+        for img, folder in val_img_dict.items():
+            newpath = val_img_dir / folder
+            if not newpath.exists():
+                os.makedirs(newpath)
+            if (val_img_dir / img).exists():
+                os.rename(val_img_dir / img, newpath / img)
+    
+    
+    DATASET_DIR = OUTPUT_DIR / "tiny-imagenet-200"
+    if not DATASET_DIR.exists():
+        download_tiny_imagenet_200(OUTPUT_DIR)
+        create_validation_dir(DATASET_DIR)
+
+
+
+.. parsed-literal::
+
+    output/tiny-imagenet-200.zip:   0%|          | 0.00/237M [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    Successfully downloaded and extracted dataset to: output
+
+
+Helpers classes and functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The code below will help to count accuracy and visualize validation
+process.
+
+.. code:: ipython3
+
+    class AverageMeter(object):
+        """Computes and stores the average and current value"""
+    
+        def __init__(self, name: str, fmt: str = ":f"):
+            self.name = name
+            self.fmt = fmt
+            self.val = 0
+            self.avg = 0
+            self.sum = 0
+            self.count = 0
+    
+        def update(self, val: float, n: int = 1):
+            self.val = val
+            self.sum += val * n
+            self.count += n
+            self.avg = self.sum / self.count
+    
+        def __str__(self):
+            fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
+            return fmtstr.format(**self.__dict__)
+    
+    
+    class ProgressMeter(object):
+        """Displays the progress of validation process"""
+    
+        def __init__(self, num_batches: int, meters: List[AverageMeter], prefix: str = ""):
+            self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+            self.meters = meters
+            self.prefix = prefix
+    
+        def display(self, batch: int):
+            entries = [self.prefix + self.batch_fmtstr.format(batch)]
+            entries += [str(meter) for meter in self.meters]
+            print("\t".join(entries))
+    
+        def _get_batch_fmtstr(self, num_batches: int):
+            num_digits = len(str(num_batches // 1))
+            fmt = "{:" + str(num_digits) + "d}"
+            return "[" + fmt + "/" + fmt.format(num_batches) + "]"
+    
+    
+    def accuracy(output: torch.Tensor, target: torch.Tensor, topk: Tuple[int] = (1,)):
+        """Computes the accuracy over the k top predictions for the specified values of k"""
+        with torch.no_grad():
+            maxk = max(topk)
+            batch_size = target.size(0)
+    
+            _, pred = output.topk(maxk, 1, True, True)
+            pred = pred.t()
+            correct = pred.eq(target.view(1, -1).expand_as(pred))
+    
+            res = []
+            for k in topk:
+                correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+                res.append(correct_k.mul_(100.0 / batch_size))
+    
+            return res
+
+Validation function
+~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    from typing import Union
+    from openvino.runtime.ie_api import CompiledModel
+    
+    
+    def validate(val_loader: torch.utils.data.DataLoader, model: Union[torch.nn.Module, CompiledModel]):
+        """Compute the metrics using data from val_loader for the model"""
+        batch_time = AverageMeter("Time", ":3.3f")
+        top1 = AverageMeter("Acc@1", ":2.2f")
+        top5 = AverageMeter("Acc@5", ":2.2f")
+        progress = ProgressMeter(len(val_loader), [batch_time, top1, top5], prefix="Test: ")
+        start_time = time.time()
+        # Switch to evaluate mode.
+        if not isinstance(model, CompiledModel):
+            model.eval()
+            model.to(device)
+    
+        with torch.no_grad():
+            end = time.time()
+            for i, (images, target) in enumerate(val_loader):
+                images = images.to(device)
+                target = target.to(device)
+    
+                # Compute the output.
+                if isinstance(model, CompiledModel):
+                    output_layer = model.output(0)
+                    output = model(images)[output_layer]
+                    output = torch.from_numpy(output)
+                else:
+                    output = model(images)
+    
+                # Measure accuracy and record loss.
+                acc1, acc5 = accuracy(output, target, topk=(1, 5))
+                top1.update(acc1[0], images.size(0))
+                top5.update(acc5[0], images.size(0))
+    
+                # Measure elapsed time.
+                batch_time.update(time.time() - end)
+                end = time.time()
+    
+                print_frequency = 10
+                if i % print_frequency == 0:
+                    progress.display(i)
+    
+            print(
+                " * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f} Total time: {total_time:.3f}".format(top1=top1, top5=top5, total_time=end - start_time)
+            )
+        return top1.avg
+
+Create and load original uncompressed model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ResNet-50 from the `torchivision
+repository <https://github.com/pytorch/vision>`__ is pre-trained on
+ImageNet with more prediction classes than Tiny ImageNet, so the model
+is adjusted by swapping the last FC layer to one with fewer output
+values.
+
+.. code:: ipython3
+
+    def create_model(model_path: Path):
+        """Creates the ResNet-50 model and loads the pretrained weights"""
+        model = resnet50()
+        # Update the last FC layer for Tiny ImageNet number of classes.
+        NUM_CLASSES = 200
+        model.fc = torch.nn.Linear(in_features=2048, out_features=NUM_CLASSES, bias=True)
+        model.to(device)
+        if model_path.exists():
+            checkpoint = torch.load(str(model_path), map_location="cpu")
+            model.load_state_dict(checkpoint["state_dict"], strict=True)
+        else:
+            raise RuntimeError("There is no checkpoint to load")
+        return model
+    
+    
+    model = create_model(MODEL_DIR / fp32_checkpoint_filename)
+
+Create train and validation dataloaders
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    def create_dataloaders(batch_size: int = 128):
+        """Creates train dataloader that is used for quantization initialization and validation dataloader for computing the model accruacy"""
+        train_dir = DATASET_DIR / "train"
+        val_dir = DATASET_DIR / "val" / "images"
+        normalize = transforms.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+        )
+        train_dataset = ImageFolder(
+            train_dir,
+            transforms.Compose(
+                [
+                    transforms.Resize(IMAGE_SIZE),
+                    transforms.ToTensor(),
+                    normalize,
+                ]
+            ),
+        )
+        val_dataset = ImageFolder(
+            val_dir,
+            transforms.Compose(
+                [transforms.Resize(IMAGE_SIZE), transforms.ToTensor(), normalize]
+            ),
+        )
+    
+        train_loader = torch.utils.data.DataLoader(
+            train_dataset,
+            batch_size=batch_size,
+            shuffle=True,
+            num_workers=0,
+            pin_memory=True,
+            sampler=None,
+        )
+    
+        val_loader = torch.utils.data.DataLoader(
+            val_dataset,
+            batch_size=batch_size,
+            shuffle=False,
+            num_workers=0,
+            pin_memory=True,
+        )
+        return train_loader, val_loader
+    
+    
+    train_loader, val_loader = create_dataloaders()
+
+Model quantization and benchmarking
+-----------------------------------
+
+With the validation pipeline, model files, and data-loading procedures
+for model calibration now prepared, it’s time to proceed with the actual
+post-training quantization using NNCF.
+
+I. Evaluate the loaded model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    acc1 = validate(val_loader, model)
+    print(f"Test accuracy of FP32 model: {acc1:.3f}")
+
+
+.. parsed-literal::
+
+    Test: [ 0/79]	Time 0.257 (0.257)	Acc@1 81.25 (81.25)	Acc@5 92.19 (92.19)
+    Test: [10/79]	Time 0.230 (0.235)	Acc@1 56.25 (66.97)	Acc@5 86.72 (87.50)
+    Test: [20/79]	Time 0.231 (0.236)	Acc@1 67.97 (64.29)	Acc@5 85.16 (87.35)
+    Test: [30/79]	Time 0.230 (0.239)	Acc@1 53.12 (62.37)	Acc@5 77.34 (85.33)
+    Test: [40/79]	Time 0.232 (0.239)	Acc@1 67.19 (60.86)	Acc@5 90.62 (84.51)
+    Test: [50/79]	Time 0.231 (0.238)	Acc@1 60.16 (60.80)	Acc@5 88.28 (84.42)
+    Test: [60/79]	Time 0.253 (0.237)	Acc@1 66.41 (60.46)	Acc@5 86.72 (83.79)
+    Test: [70/79]	Time 0.232 (0.236)	Acc@1 52.34 (60.21)	Acc@5 80.47 (83.33)
+     * Acc@1 60.740 Acc@5 83.960 Total time: 18.431
+    Test accuracy of FP32 model: 60.740
+
+
+II. Create and initialize quantization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+NNCF enables post-training quantization by adding the quantization
+layers into the model graph and then using a subset of the training
+dataset to initialize the parameters of these additional quantization
+layers. The framework is designed so that modifications to your original
+training code are minor. Quantization is the simplest scenario and
+requires a few modifications. For more information about NNCF Post
+Training Quantization (PTQ) API, refer to the `Basic Quantization Flow
+Guide <https://docs.openvino.ai/latest/basic_qauntization_flow.html#doxid-basic-qauntization-flow>`__.
+
+1. Create a transformation function that accepts a sample from the
+   dataset and returns data suitable for model inference. This enables
+   the creation of an instance of the nncf.Dataset class, which
+   represents the calibration dataset (based on the training dataset)
+   necessary for post-training quantization.
+
+.. code:: ipython3
+
+    def transform_fn(data_item):
+        images, _ = data_item
+        return images
+    
+    
+    calibration_dataset = nncf.Dataset(train_loader, transform_fn)
+
+2. Create a quantized model from the pre-trained ``FP32`` model and the
+   calibration dataset.
+
+.. code:: ipython3
+
+    quantized_model = nncf.quantize(model, calibration_dataset)
+
+
+.. parsed-literal::
+
+    No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
+
+
+.. parsed-literal::
+
+    INFO:nncf:Collecting tensor statistics |█████           | 1 / 3
+    INFO:nncf:Collecting tensor statistics |██████████      | 2 / 3
+    INFO:nncf:Collecting tensor statistics |████████████████| 3 / 3
+    INFO:nncf:Compiling and loading torch extension: quantized_functions_cpu...
+    INFO:nncf:Finished loading torch extension: quantized_functions_cpu
+
+
+3. Evaluate the new model on the validation set after initialization of
+   quantization. The accuracy should be close to the accuracy of the
+   floating-point ``FP32`` model for a simple case like the one being
+   demonstrated now.
+
+.. code:: ipython3
+
+    acc1 = validate(val_loader, quantized_model)
+    print(f"Accuracy of initialized INT8 model: {acc1:.3f}")
+
+
+.. parsed-literal::
+
+    Test: [ 0/79]	Time 0.385 (0.385)	Acc@1 80.47 (80.47)	Acc@5 91.41 (91.41)
+    Test: [10/79]	Time 0.380 (0.380)	Acc@1 52.34 (66.48)	Acc@5 85.94 (87.50)
+    Test: [20/79]	Time 0.377 (0.379)	Acc@1 68.75 (63.95)	Acc@5 85.94 (87.20)
+    Test: [30/79]	Time 0.382 (0.379)	Acc@1 51.56 (62.22)	Acc@5 73.44 (85.23)
+    Test: [40/79]	Time 0.380 (0.379)	Acc@1 67.19 (60.63)	Acc@5 89.84 (84.34)
+    Test: [50/79]	Time 0.377 (0.379)	Acc@1 61.72 (60.66)	Acc@5 87.50 (84.24)
+    Test: [60/79]	Time 0.372 (0.379)	Acc@1 64.84 (60.32)	Acc@5 85.94 (83.71)
+    Test: [70/79]	Time 0.380 (0.378)	Acc@1 50.78 (60.00)	Acc@5 79.69 (83.27)
+     * Acc@1 60.570 Acc@5 83.850 Total time: 29.644
+    Accuracy of initialized INT8 model: 60.570
+
+
+It should be noted that the inference time for the quantized PyTorch
+model is longer than that of the original model, as fake quantizers are
+added to the model by NNCF. However, the model’s performance will
+significantly improve when it is in the OpenVINO Intermediate
+Representation (IR) format.
+
+III. Convert the models to OpenVINO Intermediate Representation (OpenVINO IR)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use Model Optimizer Python API to convert the Pytorch models to OpenVINO
+IR. The models will be saved to the ‘OUTPUT’ directory for latter
+benchmarking.
+
+For more information about Model Optimizer, refer to the `Model
+Optimizer Developer
+Guide <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html>`__.
+
+Before converting models export them to ONNX. Executing the following
+command may take a while.
+
+.. code:: ipython3
+
+    dummy_input = torch.randn(128, 3, *IMAGE_SIZE)
+    
+    torch.onnx.export(model, dummy_input, fp32_onnx_path)
+    model_ir = mo.convert_model(input_model=fp32_onnx_path, input_shape=[-1, 3, *IMAGE_SIZE])
+    
+    serialize(model_ir, str(fp32_ir_path))
+
+.. code:: ipython3
+
+    torch.onnx.export(quantized_model, dummy_input, int8_onnx_path)
+    quantized_model_ir = mo.convert_model(input_model=int8_onnx_path, input_shape=[-1, 3, *IMAGE_SIZE])
+    
+    serialize(quantized_model_ir, str(int8_ir_path))
+
+Evaluate the FP32 and INT8 models.
+
+.. code:: ipython3
+
+    core = Core()
+    fp32_compiled_model = core.compile_model(model_ir)
+    acc1 = validate(val_loader, fp32_compiled_model)
+    print(f"Accuracy of FP32 IR model: {acc1:.3f}")
+
+
+.. parsed-literal::
+
+    Test: [ 0/79]	Time 0.168 (0.168)	Acc@1 81.25 (81.25)	Acc@5 92.19 (92.19)
+    Test: [10/79]	Time 0.118 (0.122)	Acc@1 56.25 (66.97)	Acc@5 86.72 (87.50)
+    Test: [20/79]	Time 0.116 (0.120)	Acc@1 67.97 (64.29)	Acc@5 85.16 (87.35)
+    Test: [30/79]	Time 0.118 (0.119)	Acc@1 53.12 (62.37)	Acc@5 77.34 (85.33)
+    Test: [40/79]	Time 0.117 (0.119)	Acc@1 67.19 (60.86)	Acc@5 90.62 (84.51)
+    Test: [50/79]	Time 0.117 (0.119)	Acc@1 60.16 (60.80)	Acc@5 88.28 (84.42)
+    Test: [60/79]	Time 0.118 (0.119)	Acc@1 66.41 (60.46)	Acc@5 86.72 (83.79)
+    Test: [70/79]	Time 0.118 (0.119)	Acc@1 52.34 (60.21)	Acc@5 80.47 (83.33)
+     * Acc@1 60.740 Acc@5 83.960 Total time: 9.280
+    Accuracy of FP32 IR model: 60.740
+
+
+.. code:: ipython3
+
+    int8_compiled_model = core.compile_model(quantized_model_ir)
+    acc1 = validate(val_loader, int8_compiled_model)
+    print(f"Accuracy of INT8 IR model: {acc1:.3f}")
+
+
+.. parsed-literal::
+
+    Test: [ 0/79]	Time 0.116 (0.116)	Acc@1 80.47 (80.47)	Acc@5 91.41 (91.41)
+    Test: [10/79]	Time 0.076 (0.082)	Acc@1 54.69 (66.83)	Acc@5 85.94 (87.71)
+    Test: [20/79]	Time 0.077 (0.079)	Acc@1 69.53 (63.95)	Acc@5 85.94 (87.28)
+    Test: [30/79]	Time 0.078 (0.079)	Acc@1 51.56 (62.17)	Acc@5 73.44 (85.26)
+    Test: [40/79]	Time 0.079 (0.079)	Acc@1 68.75 (60.75)	Acc@5 89.84 (84.30)
+    Test: [50/79]	Time 0.078 (0.078)	Acc@1 60.94 (60.71)	Acc@5 87.50 (84.15)
+    Test: [60/79]	Time 0.078 (0.078)	Acc@1 64.84 (60.35)	Acc@5 85.94 (83.64)
+    Test: [70/79]	Time 0.077 (0.078)	Acc@1 51.56 (60.05)	Acc@5 79.69 (83.24)
+     * Acc@1 60.580 Acc@5 83.830 Total time: 6.118
+    Accuracy of INT8 IR model: 60.580
+
+
+IV. Compare performance of INT8 model and FP32 model in OpenVINO
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Finally, measure the inference performance of the ``FP32`` and ``INT8``
+models, using `Benchmark
+Tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__
+- an inference performance measurement tool in OpenVINO. By default,
+Benchmark Tool runs inference for 60 seconds in asynchronous mode on
+CPU. It returns inference speed as latency (milliseconds per image) and
+throughput (frames per second) values.
+
+   **NOTE**: This notebook runs benchmark_app for 15 seconds to give a
+   quick indication of performance. For more accurate performance, it is
+   recommended to run benchmark_app in a terminal/command prompt after
+   closing other applications. Run ``benchmark_app -m model.xml -d CPU``
+   to benchmark async inference on CPU for one minute. Change CPU to GPU
+   to benchmark on GPU. Run ``benchmark_app --help`` to see an overview
+   of all command-line options.
+
+.. code:: ipython3
+
+    def parse_benchmark_output(benchmark_output: str):
+        """Prints the output from benchmark_app in human-readable format"""
+        parsed_output = [line for line in benchmark_output if 'FPS' in line]
+        print(*parsed_output, sep='\n')
+    
+    
+    print('Benchmark FP32 model (OpenVINO IR)')
+    benchmark_output = ! benchmark_app -m "$fp32_ir_path" -d CPU -api async -t 15 -shape "[1, 3, 512, 512]"
+    parse_benchmark_output(benchmark_output)
+    
+    print('Benchmark INT8 model (OpenVINO IR)')
+    benchmark_output = ! benchmark_app -m "$int8_ir_path" -d CPU -api async -t 15 -shape "[1, 3, 512, 512]"
+    parse_benchmark_output(benchmark_output)
+    
+    print('Benchmark FP32 model (OpenVINO IR) synchronously')
+    benchmark_output = ! benchmark_app -m "$fp32_ir_path" -d CPU -api sync -t 15 -shape "[1, 3, 512, 512]"
+    parse_benchmark_output(benchmark_output)
+    
+    print('Benchmark INT8 model (OpenVINO IR) synchronously')
+    benchmark_output = ! benchmark_app -m "$int8_ir_path" -d CPU -api sync -t 15 -shape "[1, 3, 512, 512]"
+    parse_benchmark_output(benchmark_output)
+
+
+.. parsed-literal::
+
+    Benchmark FP32 model (OpenVINO IR)
+    [ INFO ] Throughput:   37.57 FPS
+    Benchmark INT8 model (OpenVINO IR)
+    [ INFO ] Throughput:   157.46 FPS
+    Benchmark FP32 model (OpenVINO IR) synchronously
+    [ INFO ] Throughput:   38.73 FPS
+    Benchmark INT8 model (OpenVINO IR) synchronously
+    [ INFO ] Throughput:   140.53 FPS
+
+
+Show CPU Information for reference:
+
+.. code:: ipython3
+
+    ie = Core()
+    ie.get_property("CPU", "FULL_DEVICE_NAME")
+
+
+
+
+.. parsed-literal::
+
+    'Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz'
+
+
--- a/docs/notebooks/113-image-classification-quantization-with-output.rst
+++ b/docs/notebooks/113-image-classification-quantization-with-output.rst
@ -0,0 +1,562 @@
+Quantization of Image Classification Models
+===========================================
+
+This tutorial demonstrates how to apply ``INT8`` quantization to Image
+Classification model using `Post-training Optimization Tool
+API <../../compression/api/README.md>`__. It also assumes that OpenVINO™
+is already installed and it uses the Mobilenet V2 model, trained on
+Cifar10 dataset. The code is designed to be extendable to custom models
+and datasets.
+
+This tutorial consists of the following steps: - Prepare the model for
+quantization. - Define a data loading and an accuracy validation
+functionality. - Run optimization pipeline. - Compare accuracy of the
+original and quantized models. - Compare performance of the original and
+quantized models. - Compare results on one picture.
+
+.. code:: ipython3
+
+    import os
+    from pathlib import Path
+    import sys
+    
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from openvino.tools.pot.api import DataLoader, Metric
+    from openvino.tools.pot.engines.ie_engine import IEEngine
+    from openvino.tools.pot.graph import load_model, save_model
+    from openvino.tools.pot.graph.model_utils import compress_model_weights
+    from openvino.tools.pot.pipeline.initializer import create_pipeline
+    from openvino.runtime import Core
+    from torchvision import transforms
+    from torchvision.datasets import CIFAR10
+
+
+.. parsed-literal::
+
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/offline_transformations/__init__.py:10: FutureWarning: The module is private and following namespace `offline_transformations` will be removed in the future, use `openvino.runtime.passes` instead!
+      warnings.warn(
+
+
+.. code:: ipython3
+
+    # Set the data and model directories
+    DATA_DIR = '../data/datasets/cifar10'
+    MODEL_DIR = 'model'
+    model_repo = 'pytorch-cifar-models'
+    
+    os.makedirs(DATA_DIR, exist_ok=True)
+    os.makedirs(MODEL_DIR, exist_ok=True)
+
+Prepare the Model
+-----------------
+
+Model preparation stage has the following steps: - Download a PyTorch
+model from Torchvision repository - Convert it to ONNX format - Run
+Model Optimizer to convert ONNX to OpenVINO Intermediate Representation
+(OpenVINO IR)
+
+.. code:: ipython3
+
+    if not Path(model_repo).exists():
+        !git clone https://github.com/chenyaofo/pytorch-cifar-models.git
+    
+    sys.path.append(model_repo)
+
+
+.. parsed-literal::
+
+    Cloning into 'pytorch-cifar-models'...
+    remote: Enumerating objects: 282, done.[K
+    remote: Counting objects: 100% (281/281), done.[K
+    remote: Compressing objects: 100% (95/95), done.[K
+    remote: Total 282 (delta 136), reused 269 (delta 129), pack-reused 1[K
+    Receiving objects: 100% (282/282), 9.22 MiB | 4.06 MiB/s, done.
+    Resolving deltas: 100% (136/136), done.
+
+
+.. code:: ipython3
+
+    from pytorch_cifar_models import cifar10_mobilenetv2_x1_0
+    
+    model = cifar10_mobilenetv2_x1_0(pretrained=True)
+
+.. code:: ipython3
+
+    import torch
+    
+    model.eval()
+    
+    dummy_input = torch.randn(1, 3, 32, 32)
+    
+    onnx_model_path = Path(MODEL_DIR) / 'mobilenet_v2.onnx'
+    ir_model_xml = onnx_model_path.with_suffix('.xml')
+    ir_model_bin = onnx_model_path.with_suffix('.bin')
+    
+    torch.onnx.export(model, dummy_input, onnx_model_path)
+    
+    # Run Model Optimizer to convert ONNX to OpenVINO IR.
+    !mo --compress_to_fp16 -m $onnx_model_path  --output_dir $MODEL_DIR
+
+
+.. parsed-literal::
+
+    Check for a new version of Intel(R) Distribution of OpenVINO(TM) toolkit here https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html?cid=other&source=prod&campid=ww_2023_bu_IOTG_OpenVINO-2022-3&content=upg_all&medium=organic or on https://github.com/openvinotoolkit/openvino
+    [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
+    Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html
+    [ SUCCESS ] Generated IR version 11 model.
+    [ SUCCESS ] XML file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/113-image-classification-quantization/model/mobilenet_v2.xml
+    [ SUCCESS ] BIN file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/113-image-classification-quantization/model/mobilenet_v2.bin
+
+
+Define Data Loader
+------------------
+
+In this step, the ``DataLoader`` interface from POT API is implemented.
+
+.. code:: ipython3
+
+    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
+    dataset = CIFAR10(root=DATA_DIR, train=False, transform=transform, download=True)
+
+
+.. parsed-literal::
+
+    Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/datasets/cifar10/cifar-10-python.tar.gz
+
+
+
+.. parsed-literal::
+
+      0%|          | 0/170498071 [00:00<?, ?it/s]
+
+
+.. parsed-literal::
+
+    Extracting ../data/datasets/cifar10/cifar-10-python.tar.gz to ../data/datasets/cifar10
+
+
+.. code:: ipython3
+
+    # Create a DataLoader from a CIFAR10 dataset.
+    class CifarDataLoader(DataLoader):
+    
+        def __init__(self, config):
+            """
+            Initialize config and dataset.
+            :param config: created config with DATA_DIR path.
+            """
+            super().__init__(config)
+            self.dataset = dataset
+            
+        def __len__(self):
+            return len(self.dataset)
+    
+        def __getitem__(self, index):
+            """
+            Return one sample of index, label and picture.
+            :param index: index of the taken sample.
+            """
+            image, label = self.dataset[index]
+            return (index, label), image.numpy()
+    
+        def load_data(self, dataset):
+            """
+            Load dataset in needed format. 
+            :param dataset:  downloaded dataset.
+            """
+            pictures, labels, indexes = [], [], []
+            
+            for idx, sample in enumerate(dataset):
+                pictures.append(sample[0])
+                labels.append(sample[1])
+                indexes.append(idx)
+    
+            return indexes, pictures, labels
+
+Define Accuracy Metric Calculation
+----------------------------------
+
+In this step, the ``Metric`` interface for accuracy Top-1 metric is
+implemented. It is used for validating accuracy of quantized model.
+
+.. code:: ipython3
+
+    # Custom implementation of classification accuracy metric.
+    
+    class Accuracy(Metric):
+    
+        # Required methods
+        def __init__(self, top_k=1):
+            super().__init__()
+            self._top_k = top_k
+            self._name = 'accuracy@top{}'.format(self._top_k)
+            self._matches = []
+    
+        @property
+        def value(self):
+            """ Returns accuracy metric value for the last model output. """
+            return {self._name: self._matches[-1]}
+    
+        @property
+        def avg_value(self):
+            """ Returns accuracy metric value for all model outputs. """
+            return {self._name: np.ravel(self._matches).mean()}
+    
+        def update(self, output, target):
+            """ Updates prediction matches.
+            :param output: model output
+            :param target: annotations
+            """
+            if len(output) > 1:
+                raise Exception('The accuracy metric cannot be calculated '
+                                'for a model with multiple outputs')
+            if isinstance(target, dict):
+                target = list(target.values())
+            predictions = np.argsort(output[0], axis=1)[:, -self._top_k:]
+            match = [float(t in predictions[i]) for i, t in enumerate(target)]
+    
+            self._matches.append(match)
+    
+        def reset(self):
+            """ Resets collected matches """
+            self._matches = []
+    
+        def get_attributes(self):
+            """
+            Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.
+            Required attributes: 'direction': 'higher-better' or 'higher-worse'
+                                 'type': metric type
+            """
+            return {self._name: {'direction': 'higher-better',
+                                 'type': 'accuracy'}}
+
+Run Quantization Pipeline and compare the accuracy of the original and quantized models
+---------------------------------------------------------------------------------------
+
+In this step, define a configuration for the quantization pipeline and
+run it.
+
+   **NOTE**: Use built-in ``IEEngine`` implementation of the ``Engine``
+   interface from the POT API for model inference. ``IEEngine`` is built
+   on top of OpenVINO Python API for inference and provides basic
+   functionality for inference of simple models. If you have a more
+   complicated inference flow for your model/models, you should create
+   your own implementation of ``Engine`` interface, for example, by
+   inheriting from ``IEEngine`` and extending it.
+
+.. code:: ipython3
+
+    model_config = {
+        'model_name': 'mobilenet_v2',
+        'model': ir_model_xml,
+        'weights': ir_model_bin
+    }
+    engine_config = {'device': 'CPU'}
+    dataset_config = {
+        'data_source': DATA_DIR
+    }
+    algorithms = [
+        {
+            'name': 'DefaultQuantization',
+            'params': {
+                'target_device': 'CPU',
+                'preset': 'performance',
+                'stat_subset_size': 300
+            }
+        }
+    ]
+    
+    # Steps 1-7: Model optimization
+    # Step 1: Load the model.
+    model = load_model(model_config)
+    
+    # Step 2: Initialize the data loader.
+    data_loader = CifarDataLoader(dataset_config)
+    
+    # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric.
+    metric = Accuracy(top_k=1)
+    
+    # Step 4: Initialize the engine for metric calculation and statistics collection.
+    engine = IEEngine(engine_config, data_loader, metric)
+    
+    # Step 5: Create a pipeline of compression algorithms.
+    pipeline = create_pipeline(algorithms, engine)
+    
+    # Step 6: Execute the pipeline.
+    compressed_model = pipeline.run(model)
+    
+    # Step 7 (Optional): Compress model weights quantized precision
+    #                    in order to reduce the size of final .bin file.
+    compress_model_weights(compressed_model)
+    
+    # Step 8: Save the compressed model to the desired path.
+    compressed_model_paths = save_model(model=compressed_model, save_path=MODEL_DIR, model_name="quantized_mobilenet_v2"
+    )
+    compressed_model_xml = compressed_model_paths[0]["model"]
+    compressed_model_bin = Path(compressed_model_paths[0]["model"]).with_suffix(".bin")
+    
+    # Step 9: Compare accuracy of the original and quantized models.
+    metric_results = pipeline.evaluate(model)
+    if metric_results:
+        for name, value in metric_results.items():
+            print(f"Accuracy of the original model: {name}: {value}")
+    
+    metric_results = pipeline.evaluate(compressed_model)
+    if metric_results:
+        for name, value in metric_results.items():
+            print(f"Accuracy of the optimized model: {name}: {value}")
+
+
+.. parsed-literal::
+
+    Accuracy of the original model: accuracy@top1: 0.9348
+    Accuracy of the optimized model: accuracy@top1: 0.9348
+
+
+Compare Performance of the Original and Quantized Models
+--------------------------------------------------------
+
+Finally, measure the inference performance of the ``FP32`` and ``INT8``
+models, using `Benchmark
+Tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__
+- an inference performance measurement tool in OpenVINO.
+
+   **NOTE**: For more accurate performance, it is recommended to run
+   benchmark_app in a terminal/command prompt after closing other
+   applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark
+   async inference on CPU for one minute. Change CPU to GPU to benchmark
+   on GPU. Run ``benchmark_app --help`` to see an overview of all
+   command-line options.
+
+.. code:: ipython3
+
+    # Inference FP16 model (OpenVINO IR)
+    !benchmark_app -m $ir_model_xml -d CPU -api async
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to THROUGHPUT.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 31.89 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : f32 / [...] / [1,3,32,32]
+    [ INFO ] Model outputs:
+    [ INFO ]     536 (node: 536) : f32 / [...] / [1,10]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : u8 / [N,C,H,W] / [1,3,32,32]
+    [ INFO ] Model outputs:
+    [ INFO ]     536 (node: 536) : f32 / [...] / [1,10]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 176.27 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12
+    [ INFO ]   NUM_STREAMS: 12
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 24
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.THROUGHPUT
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input.1'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input.1' with random values 
+    [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 60000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 3.08 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            338496 iterations
+    [ INFO ] Duration:         60002.33 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        1.97 ms
+    [ INFO ]    Average:       1.97 ms
+    [ INFO ]    Min:           1.11 ms
+    [ INFO ]    Max:           20.45 ms
+    [ INFO ] Throughput:   5641.38 FPS
+
+
+.. code:: ipython3
+
+    # Inference INT8 model (OpenVINO IR)
+    !benchmark_app -m $compressed_model_xml -d CPU -api async
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to THROUGHPUT.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 18.32 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : f32 / [...] / [1,3,32,32]
+    [ INFO ] Model outputs:
+    [ INFO ]     536 (node: 536) : f32 / [...] / [1,10]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : u8 / [N,C,H,W] / [1,3,32,32]
+    [ INFO ] Model outputs:
+    [ INFO ]     536 (node: 536) : f32 / [...] / [1,10]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 256.00 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12
+    [ INFO ]   NUM_STREAMS: 12
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 24
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.THROUGHPUT
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input.1'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input.1' with random values 
+    [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 60000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 1.56 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            761820 iterations
+    [ INFO ] Duration:         60001.53 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        0.90 ms
+    [ INFO ]    Average:       0.92 ms
+    [ INFO ]    Min:           0.62 ms
+    [ INFO ]    Max:           5.38 ms
+    [ INFO ] Throughput:   12696.68 FPS
+
+
+Compare results on four pictures.
+---------------------------------
+
+.. code:: ipython3
+
+    ie = Core()
+    
+    # Read and load a float model.
+    float_model = ie.read_model(
+        model=ir_model_xml, weights=ir_model_bin
+    )
+    float_compiled_model = ie.compile_model(model=float_model, device_name="CPU")
+    
+    # Read and load a quantized model.
+    quantized_model = ie.read_model(
+        model=compressed_model_xml, weights=compressed_model_bin
+    )
+    quantized_compiled_model = ie.compile_model(model=quantized_model, device_name="CPU")
+
+.. code:: ipython3
+
+    # Define all possible labels from the CIFAR10 dataset.
+    labels_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
+    all_pictures = []
+    all_labels = []
+    
+    # Get all pictures and their labels.
+    for i, batch in enumerate(data_loader):
+        all_pictures.append(batch[1])
+        all_labels.append(batch[0][1])
+
+.. code:: ipython3
+
+    def plot_pictures(indexes: list, all_pictures=all_pictures, all_labels=all_labels):
+        """Plot 4 pictures.
+        :param indexes: a list of indexes of pictures to be displayed.
+        :param all_batches: batches with pictures.
+        """
+        images, labels = [], []
+        num_pics = len(indexes)
+        assert num_pics == 4, f'No enough indexes for pictures to be displayed, got {num_pics}'
+        for idx in indexes:
+            assert idx < 10000, 'Cannot get such index, there are only 10000'
+            pic = np.rollaxis(all_pictures[idx].squeeze(), 0, 3)
+            images.append(pic)
+    
+            labels.append(labels_names[all_labels[idx]])
+    
+        f, axarr = plt.subplots(1, 4)
+        axarr[0].imshow(images[0])
+        axarr[0].set_title(labels[0])
+    
+        axarr[1].imshow(images[1])
+        axarr[1].set_title(labels[1])
+    
+        axarr[2].imshow(images[2])
+        axarr[2].set_title(labels[2])
+    
+        axarr[3].imshow(images[3])
+        axarr[3].set_title(labels[3])
+
+.. code:: ipython3
+
+    def infer_on_pictures(model, indexes: list, all_pictures=all_pictures):
+        """ Inference model on a few pictures.
+        :param net: model on which do inference
+        :param indexes: list of indexes 
+        """
+        output_key = model.output(0)
+        predicted_labels = []
+        for idx in indexes:
+            assert idx < 10000, 'Cannot get such index, there are only 10000'
+            result = model([all_pictures[idx][None,]])[output_key]
+            result = labels_names[np.argmax(result[0])]
+            predicted_labels.append(result)
+        return predicted_labels
+
+.. code:: ipython3
+
+    indexes_to_infer = [7, 12, 15, 20]  # To plot, specify 4 indexes.
+    
+    plot_pictures(indexes_to_infer)
+    
+    results_float = infer_on_pictures(float_compiled_model, indexes_to_infer)
+    results_quanized = infer_on_pictures(quantized_compiled_model, indexes_to_infer)
+    
+    print(f"Labels for picture from float model : {results_float}.")
+    print(f"Labels for picture from quantized model : {results_quanized}.")
+
+
+.. parsed-literal::
+
+    Labels for picture from float model : ['frog', 'dog', 'ship', 'horse'].
+    Labels for picture from quantized model : ['frog', 'dog', 'ship', 'horse'].
+
+
+
+.. image:: 113-image-classification-quantization-with-output_files/113-image-classification-quantization-with-output_22_1.png
+
--- a/docs/notebooks/113-image-classification-quantization-with-output_files/113-image-classification-quantization-with-output_22_1.png
+++ b/docs/notebooks/113-image-classification-quantization-with-output_files/113-image-classification-quantization-with-output_22_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b843161ca6e7220e4a7e33d700810df2dcc71bd501dec5ed138aa08a3bc6ebee
+size 14855
--- a/docs/notebooks/113-image-classification-quantization-with-output_files/index.html
+++ b/docs/notebooks/113-image-classification-quantization-with-output_files/index.html
@ -0,0 +1,7 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/113-image-classification-quantization-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/113-image-classification-quantization-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="113-image-classification-quantization-with-output_22_1.png">113-image-classification-quantization-with-outp..&gt;</a> 30-May-2023 00:09               14855
+</pre><hr></body>
+</html>
--- a/docs/notebooks/114-quantization-simplified-mode-with-output.rst
+++ b/docs/notebooks/114-quantization-simplified-mode-with-output.rst
@ -0,0 +1,418 @@
+INT8 Quantization with Post-training Optimization Tool (POT) in Simplified Mode tutorial
+========================================================================================
+
+This tutorial shows how to quantize a
+`ResNet20 <https://github.com/chenyaofo/pytorch-cifar-models>`__ image
+classification model, trained on
+`CIFAR10 <http://pytorch.org/vision/main/generated/torchvision.datasets.CIFAR10.html>`__
+dataset, using the Post-Training Optimization Tool (POT) in Simplified
+Mode.
+
+Simplified Mode is designed to make the data preparation step easier,
+before model optimization. The mode is represented by an implementation
+of the engine interface in the POT API in OpenVINO™. It enables reading
+data from an arbitrary folder specified by the user. Currently,
+Simplified Mode is available only for image data in PNG or JPEG formats,
+stored in a single folder.
+
+   **NOTE:** This mode cannot be used with the accuracy-aware method. It
+   is not possible to control accuracy after optimization using this
+   mode. However, Simplified Mode can be useful for estimating
+   performance improvements when optimizing models.
+
+This tutorial includes the following steps:
+
+-  Downloading and saving the CIFAR10 dataset.
+-  Preparing the model for quantization.
+-  Compressing the prepared model.
+-  Measuring and comparing the performance of the original and quantized
+   models.
+-  Demonstrating the use of the quantized model for image
+   classification.
+
+.. code:: ipython3
+
+    import os
+    from pathlib import Path
+    import warnings
+    
+    import torch
+    from torchvision import transforms as T
+    from torchvision.datasets import CIFAR10
+    
+    import matplotlib.pyplot as plt
+    import numpy as np
+    
+    from openvino.runtime import Core, Tensor
+    
+    warnings.filterwarnings("ignore")
+    
+    # Set the data and model directories
+    MODEL_DIR = 'model'
+    CALIB_DIR = 'calib'
+    CIFAR_DIR = '../data/datasets/cifar10'
+    CALIB_SET_SIZE = 300
+    MODEL_NAME = 'resnet20'
+    
+    os.makedirs(MODEL_DIR, exist_ok=True)
+    os.makedirs(CALIB_DIR, exist_ok=True)
+
+Prepare the calibration dataset
+-------------------------------
+
+The following steps are required to prepare the calibration dataset: -
+Download the CIFAR10 dataset from `Torchvision.datasets
+repository <https://pytorch.org/vision/stable/datasets.html>`__. - Save
+the selected number of elements from this dataset as ``.png`` images in
+a separate folder.
+
+.. code:: ipython3
+
+    transform = T.Compose([T.ToTensor()])
+    dataset = CIFAR10(root=CIFAR_DIR, train=False, transform=transform, download=True)
+
+
+.. parsed-literal::
+
+    Files already downloaded and verified
+
+
+.. code:: ipython3
+
+    pil_converter = T.ToPILImage(mode="RGB")
+    
+    for idx, info in enumerate(dataset):
+        im = info[0]
+        if idx >= CALIB_SET_SIZE:
+            break
+        label = info[1]
+        pil_converter(im.squeeze(0)).save(Path(CALIB_DIR) / f'{label}_{idx}.png')
+
+Prepare the Model
+-----------------
+
+Model preparation includes the following steps: - Download PyTorch model
+from Torchvision repository. - Convert the model to ONNX format. - Run
+Model Optimizer to convert ONNX to OpenVINO Intermediate Representation
+(OpenVINO IR).
+
+.. code:: ipython3
+
+    model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_resnet20", pretrained=True, skip_validation=True)
+    dummy_input = torch.randn(1, 3, 32, 32)
+    
+    onnx_model_path = Path(MODEL_DIR) / '{}.onnx'.format(MODEL_NAME)
+    ir_model_xml = onnx_model_path.with_suffix('.xml')
+    ir_model_bin = onnx_model_path.with_suffix('.bin')
+    
+    torch.onnx.export(model, dummy_input, onnx_model_path)
+
+
+.. parsed-literal::
+
+    Using cache found in /opt/home/k8sworker/.cache/torch/hub/chenyaofo_pytorch-cifar-models_master
+
+
+Now, convert this model into the OpenVINO IR using Model Optimizer:
+
+.. code:: ipython3
+
+    !mo -m $onnx_model_path  --output_dir $MODEL_DIR
+
+
+.. parsed-literal::
+
+    Check for a new version of Intel(R) Distribution of OpenVINO(TM) toolkit here https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html?cid=other&source=prod&campid=ww_2023_bu_IOTG_OpenVINO-2022-3&content=upg_all&medium=organic or on https://github.com/openvinotoolkit/openvino
+    [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
+    Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html
+    [ SUCCESS ] Generated IR version 11 model.
+    [ SUCCESS ] XML file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/114-quantization-simplified-mode/model/resnet20.xml
+    [ SUCCESS ] BIN file: /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/notebooks/114-quantization-simplified-mode/model/resnet20.bin
+
+
+Compression stage
+-----------------
+
+Compress the model with the following command:
+
+``pot -q default -m <path_to_xml> -w <path_to_bin> --engine simplified --data-source <path_to_data>``
+
+.. code:: ipython3
+
+    !pot -q default -m $ir_model_xml -w $ir_model_bin --engine simplified --data-source $CALIB_DIR --output-dir compressed --direct-dump --name $MODEL_NAME
+
+
+.. parsed-literal::
+
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/offline_transformations/__init__.py:10: FutureWarning: The module is private and following namespace `offline_transformations` will be removed in the future, use `openvino.runtime.passes` instead!
+      warnings.warn(
+    INFO:openvino.tools.pot.app.run:Output log dir: compressed
+    INFO:openvino.tools.pot.app.run:Creating pipeline:
+     Algorithm: DefaultQuantization
+     Parameters:
+    	preset                     : performance
+    	stat_subset_size           : 300
+    	target_device              : ANY
+    	model_type                 : None
+    	dump_intermediate_model    : False
+    	inplace_statistics         : True
+    	exec_log_dir               : compressed
+     ===========================================================================
+    INFO:openvino.tools.pot.data_loaders.image_loader:Layout value is set [N,C,H,W]
+    INFO:openvino.tools.pot.pipeline.pipeline:Inference Engine version:                2022.3.0-9052-9752fafe8eb-releases/2022/3
+    INFO:openvino.tools.pot.pipeline.pipeline:Model Optimizer version:                 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    INFO:openvino.tools.pot.pipeline.pipeline:Post-Training Optimization Tool version: 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    INFO:openvino.tools.pot.statistics.collector:Start computing statistics for algorithms : DefaultQuantization
+    INFO:openvino.tools.pot.statistics.collector:Computing statistics finished
+    INFO:openvino.tools.pot.pipeline.pipeline:Start algorithm: DefaultQuantization
+    INFO:openvino.tools.pot.algorithms.quantization.default.algorithm:Start computing statistics for algorithm : ActivationChannelAlignment
+    INFO:openvino.tools.pot.algorithms.quantization.default.algorithm:Computing statistics finished
+    INFO:openvino.tools.pot.algorithms.quantization.default.algorithm:Start computing statistics for algorithms : MinMaxQuantization,FastBiasCorrection
+    INFO:openvino.tools.pot.algorithms.quantization.default.algorithm:Computing statistics finished
+    INFO:openvino.tools.pot.pipeline.pipeline:Finished: DefaultQuantization
+     ===========================================================================
+
+
+Compare Performance of the Original and Quantized Models
+--------------------------------------------------------
+
+Finally, measure the inference performance of the ``FP32`` and ``INT8``
+models, using `Benchmark
+Tool <https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html>`__
+- an inference performance measurement tool in OpenVINO.
+
+   **NOTE**: For more accurate performance, it is recommended to run
+   benchmark_app in a terminal/command prompt after closing other
+   applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark
+   async inference on CPU for one minute. Change CPU to GPU to benchmark
+   on GPU. Run ``benchmark_app --help`` to see an overview of all
+   command-line options.
+
+.. code:: ipython3
+
+    optimized_model_path = Path('compressed/optimized')
+    optimized_model_xml = optimized_model_path / '{}.xml'.format(MODEL_NAME)
+    optimized_model_bin = optimized_model_path / '{}.bin'.format(MODEL_NAME)
+
+.. code:: ipython3
+
+    # Inference FP32 model (OpenVINO IR)
+    !benchmark_app -m $ir_model_xml -d CPU -api async
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to THROUGHPUT.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 6.18 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : f32 / [...] / [1,3,32,32]
+    [ INFO ] Model outputs:
+    [ INFO ]     208 (node: 208) : f32 / [...] / [1,10]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : u8 / [N,C,H,W] / [1,3,32,32]
+    [ INFO ] Model outputs:
+    [ INFO ]     208 (node: 208) : f32 / [...] / [1,10]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 75.31 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12
+    [ INFO ]   NUM_STREAMS: 12
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 24
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.THROUGHPUT
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input.1'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input.1' with random values 
+    [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 60000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 1.08 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            969060 iterations
+    [ INFO ] Duration:         60000.75 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        0.69 ms
+    [ INFO ]    Average:       0.71 ms
+    [ INFO ]    Min:           0.41 ms
+    [ INFO ]    Max:           12.50 ms
+    [ INFO ] Throughput:   16150.80 FPS
+
+
+.. code:: ipython3
+
+    # Inference INT8 model (OpenVINO IR)
+    !benchmark_app -m $optimized_model_xml -d CPU -api async
+
+
+.. parsed-literal::
+
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to THROUGHPUT.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 9.50 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : f32 / [...] / [1,3,32,32]
+    [ INFO ] Model outputs:
+    [ INFO ]     208 (node: 208) : f32 / [...] / [1,10]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input.1 (node: input.1) : u8 / [N,C,H,W] / [1,3,32,32]
+    [ INFO ] Model outputs:
+    [ INFO ]     208 (node: 208) : f32 / [...] / [1,10]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 117.27 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12
+    [ INFO ]   NUM_STREAMS: 12
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 24
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.THROUGHPUT
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input.1'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input.1' with random values 
+    [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 60000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 0.68 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            1587024 iterations
+    [ INFO ] Duration:         60000.59 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        0.35 ms
+    [ INFO ]    Average:       0.36 ms
+    [ INFO ]    Min:           0.22 ms
+    [ INFO ]    Max:           13.28 ms
+    [ INFO ] Throughput:   26450.14 FPS
+
+
+Demonstration of the results
+----------------------------
+
+This section demonstrates how to use the compressed model by running the
+optimized model on a subset of images from the CIFAR10 dataset and shows
+predictions, using the model.
+
+The first step is to load the model:
+
+.. code:: ipython3
+
+    ie = Core()
+    
+    compiled_model = ie.compile_model(str(optimized_model_xml))
+
+.. code:: ipython3
+
+    # Define all possible labels from the CIFAR10 dataset.
+    labels_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
+    all_images = []
+    all_labels = []
+    
+    # Get all images and their labels. 
+    for batch in dataset:
+        all_images.append(torch.unsqueeze(batch[0], 0))
+        all_labels.append(batch[1])
+
+The code below defines the function that shows the images and their
+labels, using the indexes and two lists created in the previous step:
+
+.. code:: ipython3
+
+    def plot_pictures(indexes: list, images=all_images, labels=all_labels):
+        """Plot images with the specified indexes.
+        :param indexes: a list of indexes of images to be displayed.
+        :param images: a list of images from the dataset.
+        :param labels: a list of labels for each image.
+        """
+        num_pics = len(indexes)
+        _, axarr = plt.subplots(1, num_pics)
+        for idx, im_idx in enumerate(indexes):
+            assert idx < 10000, 'Cannot get such index, there are only 10000'
+            pic = np.rollaxis(images[im_idx].squeeze().numpy(), 0, 3)
+            axarr[idx].imshow(pic)
+            axarr[idx].set_title(labels_names[labels[im_idx]])
+
+Use the code below, to define a function that uses the optimized model
+to obtain predictions for the selected images:
+
+.. code:: ipython3
+
+    def infer_on_images(net, indexes: list, images=all_images):
+        """ Inference model on a set of images.
+        :param net: model on which do inference
+        :param indexes: a list of indexes of images to infer on.
+        :param images: a list of images from the dataset.
+        """
+        predicted_labels = []
+        infer_request = net.create_infer_request()
+        for idx in indexes:
+            assert idx < 10000, 'Cannot get such index, there are only 10000'
+            input_tensor = Tensor(array=images[idx].detach().numpy(), shared_memory=True)
+            infer_request.set_input_tensor(input_tensor)
+            infer_request.start_async()
+            infer_request.wait()
+            output = infer_request.get_output_tensor()
+            result = list(output.data)
+            result = labels_names[np.argmax(result[0])]
+            predicted_labels.append(result)
+        return predicted_labels
+
+.. code:: ipython3
+
+    indexes_to_infer = [0, 1, 2]  # to plot specify indexes
+    
+    plot_pictures(indexes_to_infer)
+    
+    results_quanized = infer_on_images(compiled_model, indexes_to_infer)
+    
+    print(f"Image labels using the quantized model : {results_quanized}.")
+
+
+.. parsed-literal::
+
+    Image labels using the quantized model : ['cat', 'ship', 'ship'].
+
+
+
+.. image:: 114-quantization-simplified-mode-with-output_files/114-quantization-simplified-mode-with-output_22_1.png
+
--- a/docs/notebooks/114-quantization-simplified-mode-with-output_files/114-quantization-simplified-mode-with-output_22_1.png
+++ b/docs/notebooks/114-quantization-simplified-mode-with-output_files/114-quantization-simplified-mode-with-output_22_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb6fad57e6e22313d4c934b4fdf1f6dc41ddf15d28a0f347cb194d1698433d26
+size 18766
--- a/docs/notebooks/114-quantization-simplified-mode-with-output_files/index.html
+++ b/docs/notebooks/114-quantization-simplified-mode-with-output_files/index.html
@ -0,0 +1,7 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/114-quantization-simplified-mode-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/114-quantization-simplified-mode-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="114-quantization-simplified-mode-with-output_22_1.png">114-quantization-simplified-mode-with-output_22..&gt;</a> 30-May-2023 00:08               18766
+</pre><hr></body>
+</html>
--- a/docs/notebooks/115-async-api-with-output.rst
+++ b/docs/notebooks/115-async-api-with-output.rst
@ -0,0 +1,511 @@
+Asynchronous Inference with OpenVINO™
+=====================================
+
+This notebook demonstrates how to use the `Async
+API <https://docs.openvino.ai/nightly/openvino_docs_deployment_optimization_guide_common.html>`__
+for asynchronous execution with OpenVINO.
+
+OpenVINO Runtime supports inference in either synchronous or
+asynchronous mode. The key advantage of the Async API is that when a
+device is busy with inference, the application can perform other tasks
+in parallel (for example, populating inputs or scheduling other
+requests) rather than wait for the current inference to complete first.
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import sys
+    import cv2
+    import time
+    import numpy as np
+    from openvino.runtime import Core, AsyncInferQueue
+    import openvino.runtime as ov
+    from IPython import display
+    import matplotlib.pyplot as plt
+    sys.path.append("../utils")
+    import notebook_utils as utils
+
+Prepare model and data processing
+---------------------------------
+
+Download test model
+~~~~~~~~~~~~~~~~~~~
+
+We use a pre-trained model from OpenVINO’s `Open Model
+Zoo <https://docs.openvino.ai/nightly/model_zoo.html>`__ to start the
+test. In this case, the model will be executed to detect the person in
+each frame of the video.
+
+.. code:: ipython3
+
+    # directory where model will be downloaded
+    base_model_dir = "model"
+    
+    # model name as named in Open Model Zoo
+    model_name = "person-detection-0202"
+    precision = "FP16"
+    model_path = (
+        f"model/intel/{model_name}/{precision}/{model_name}.xml"
+    )
+    download_command = f"omz_downloader " \
+                       f"--name {model_name} " \
+                       f"--precision {precision} " \
+                       f"--output_dir {base_model_dir} " \
+                       f"--cache_dir {base_model_dir}"
+    ! $download_command
+
+
+.. parsed-literal::
+
+    ################|| Downloading person-detection-0202 ||################
+    
+    ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.xml
+    
+    
+    ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.bin
+    
+    
+
+
+Load the model
+~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    # initialize OpenVINO runtime
+    ie = Core()
+    
+    # read the network and corresponding weights from file
+    model = ie.read_model(model=model_path)
+    
+    # compile the model for the CPU (you can choose manually CPU, GPU, MYRIAD etc.)
+    # or let the engine choose the best available device (AUTO)
+    compiled_model = ie.compile_model(model=model, device_name="CPU")
+    
+    # get input node
+    input_layer_ir = model.input(0)
+    N, C, H, W = input_layer_ir.shape
+    shape = (H, W)
+
+Create functions for data processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    def preprocess(image):
+        """
+        Define the preprocess function for input data
+        
+        :param: image: the orignal input frame
+        :returns:
+                resized_image: the image processed
+        """
+        resized_image = cv2.resize(image, shape)
+        resized_image = cv2.cvtColor(np.array(resized_image), cv2.COLOR_BGR2RGB)
+        resized_image = resized_image.transpose((2, 0, 1))
+        resized_image = np.expand_dims(resized_image, axis=0).astype(np.float32)
+        return resized_image
+    
+    
+    def postprocess(result, image, fps):
+        """
+        Define the postprocess function for output data
+        
+        :param: result: the inference results
+                image: the orignal input frame
+                fps: average throughput calculated for each frame
+        :returns:
+                image: the image with bounding box and fps message
+        """
+        detections = result.reshape(-1, 7)
+        for i, detection in enumerate(detections):
+            _, image_id, confidence, xmin, ymin, xmax, ymax = detection
+            if confidence > 0.5:
+                xmin = int(max((xmin * image.shape[1]), 10))
+                ymin = int(max((ymin * image.shape[0]), 10))
+                xmax = int(min((xmax * image.shape[1]), image.shape[1] - 10))
+                ymax = int(min((ymax * image.shape[0]), image.shape[0] - 10))
+                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
+                cv2.putText(image, str(round(fps, 2)) + " fps", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 3) 
+        return image
+
+Get the test video
+~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    video_path = "../data/video/CEO Pat Gelsinger on Leading Intel.mp4"
+
+How to improve the throughput of video processing
+-------------------------------------------------
+
+Below, we compare the performance of the synchronous and async-based
+approaches:
+
+Sync Mode (default)
+~~~~~~~~~~~~~~~~~~~
+
+Let us see how video processing works with the default approach. Using
+the synchronous approach, the frame is captured with OpenCV and then
+immediately processed:
+
+::
+
+   while(true) {
+   // capture frame
+   // populate CURRENT InferRequest
+   // Infer CURRENT InferRequest
+   //this call is synchronous
+   // display CURRENT result
+   }
+
+.. code:: ipython3
+
+    def sync_api(source, flip, fps, use_popup, skip_first_frames):
+        """
+        Define the main function for video processing in sync mode
+        
+        :param: source: the video path or the ID of your webcam
+        :returns:
+                sync_fps: the inference throughput in sync mode
+        """
+        frame_number = 0
+        infer_request = compiled_model.create_infer_request()
+        player = None
+        try:
+            # Create a video player
+            player = utils.VideoPlayer(source, flip=flip, fps=fps, skip_first_frames=skip_first_frames)
+            # Start capturing
+            start_time = time.time()
+            player.start()
+            if use_popup:
+                title = "Press ESC to Exit"
+                cv2.namedWindow(title, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)
+            while True:
+                frame = player.next()
+                if frame is None:
+                    print("Source ended")
+                    break
+                resized_frame = preprocess(frame)
+                infer_request.set_tensor(input_layer_ir, ov.Tensor(resized_frame))
+                # Start the inference request in synchronous mode 
+                infer_request.infer()
+                res = infer_request.get_output_tensor(0).data
+                stop_time = time.time()
+                total_time = stop_time - start_time
+                frame_number = frame_number + 1
+                sync_fps = frame_number / total_time 
+                frame = postprocess(res, frame, sync_fps)
+                # Display the results
+                if use_popup:
+                    cv2.imshow(title, frame)
+                    key = cv2.waitKey(1)
+                    # escape = 27
+                    if key == 27:
+                        break
+                else:
+                    # Encode numpy array to jpg
+                    _, encoded_img = cv2.imencode(".jpg", frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90])
+                    # Create IPython image
+                    i = display.Image(data=encoded_img)
+                    # Display the image in this notebook
+                    display.clear_output(wait=True)
+                    display.display(i)         
+        # ctrl-c
+        except KeyboardInterrupt:
+            print("Interrupted")
+        # Any different error
+        except RuntimeError as e:
+            print(e)
+        finally:
+            if use_popup:
+                cv2.destroyAllWindows()
+            if player is not None:
+                # stop capturing
+                player.stop()
+            return sync_fps
+
+Test performance in Sync Mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    sync_fps = sync_api(source=video_path, flip=False, fps=30, use_popup=False, skip_first_frames=800)
+    print(f"average throuput in sync mode: {sync_fps:.2f} fps")
+
+
+
+.. image:: 115-async-api-with-output_files/115-async-api-with-output_14_0.png
+
+
+.. parsed-literal::
+
+    Source ended
+    average throuput in sync mode: 41.15 fps
+
+
+Async Mode
+~~~~~~~~~~
+
+Let us see how the OpenVINO Async API can improve the overall frame rate
+of an application. The key advantage of the Async approach is as
+follows: while a device is busy with the inference, the application can
+do other things in parallel (for example, populating inputs or
+scheduling other requests) rather than wait for the current inference to
+complete first.
+
+In the example below, inference is applied to the results of the video
+decoding. So it is possible to keep multiple infer requests, and while
+the current request is processed, the input frame for the next is being
+captured. This essentially hides the latency of capturing, so that the
+overall frame rate is rather determined only by the slowest part of the
+pipeline (decoding vs inference) and not by the sum of the stages.
+
+::
+
+   while(true) {
+   // capture frame
+   // populate NEXT InferRequest
+   // start NEXT InferRequest
+   // this call is async and returns immediately
+   // wait for the CURRENT InferRequest
+   // display CURRENT result
+   // swap CURRENT and NEXT InferRequests
+   }
+
+.. code:: ipython3
+
+    def async_api(source, flip, fps, use_popup, skip_first_frames):
+        """
+        Define the main function for video processing in async mode
+        
+        :param: source: the video path or the ID of your webcam
+        :returns:
+                async_fps: the inference throughput in async mode
+        """
+        frame_number = 0
+        # Create 2 infer requests
+        curr_request = compiled_model.create_infer_request()
+        next_request = compiled_model.create_infer_request()
+        player = None
+        try:
+            # Create a video player
+            player = utils.VideoPlayer(source, flip=flip, fps=fps, skip_first_frames=skip_first_frames)
+            # Start capturing
+            start_time = time.time()
+            player.start()
+            if use_popup:
+                title = "Press ESC to Exit"
+                cv2.namedWindow(title, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)
+            # Capture CURRENT frame
+            frame = player.next()
+            resized_frame = preprocess(frame)
+            curr_request.set_tensor(input_layer_ir, ov.Tensor(resized_frame))
+            # Start the CURRENT inference request
+            curr_request.start_async()
+            while True:
+                # Capture NEXT frame
+                next_frame = player.next()
+                if next_frame is None:
+                    print("Source ended")
+                    break
+                resized_frame = preprocess(next_frame)
+                next_request.set_tensor(input_layer_ir, ov.Tensor(resized_frame))
+                # Start the NEXT inference request
+                next_request.start_async()
+                # Waiting for CURRENT inference result
+                if curr_request.wait_for(-1) == 1:
+                    res = curr_request.get_output_tensor(0).data
+                    stop_time = time.time()
+                    total_time = stop_time - start_time
+                    frame_number = frame_number + 1
+                    async_fps = frame_number / total_time  
+                    frame = postprocess(res, frame, async_fps)
+                    # Display the results
+                    if use_popup:
+                        cv2.imshow(title, frame)
+                        key = cv2.waitKey(1)
+                        # escape = 27
+                        if key == 27:
+                            break
+                    else:
+                        # Encode numpy array to jpg
+                        _, encoded_img = cv2.imencode(".jpg", frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90])
+                        # Create IPython image
+                        i = display.Image(data=encoded_img)
+                        # Display the image in this notebook
+                        display.clear_output(wait=True)
+                        display.display(i)
+                # Swap CURRENT and NEXT frames
+                frame = next_frame
+                # Swap CURRENT and NEXT infer requests
+                curr_request, next_request = next_request, curr_request         
+        # ctrl-c
+        except KeyboardInterrupt:
+            print("Interrupted")
+        # Any different error
+        except RuntimeError as e:
+            print(e)
+        finally:
+            if use_popup:
+                cv2.destroyAllWindows()
+            if player is not None:
+                # stop capturing
+                player.stop()
+            return async_fps
+
+Test the performance in Async Mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    async_fps = async_api(source=video_path, flip=False, fps=30, use_popup=False, skip_first_frames=800)
+    print(f"average throuput in async mode: {async_fps:.2f} fps")
+
+
+
+.. image:: 115-async-api-with-output_files/115-async-api-with-output_18_0.png
+
+
+.. parsed-literal::
+
+    Source ended
+    average throuput in async mode: 71.19 fps
+
+
+Compare the performance
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    width = 0.4
+    fontsize = 14
+    
+    plt.rc('font', size=fontsize)
+    fig, ax = plt.subplots(1, 1, figsize=(10, 8))
+    
+    rects1 = ax.bar([0], sync_fps, width, color='#557f2d')
+    rects2 = ax.bar([width], async_fps, width)
+    ax.set_ylabel("frames per second")
+    ax.set_xticks([0, width]) 
+    ax.set_xticklabels(["Sync mode", "Async mode"])
+    ax.set_xlabel("Higher is better")
+    
+    fig.suptitle('Sync mode VS Async mode')
+    fig.tight_layout()
+    
+    plt.show()
+
+
+
+.. image:: 115-async-api-with-output_files/115-async-api-with-output_20_0.png
+
+
+AsyncInferQueue
+---------------
+
+Asynchronous mode pipelines can be supported with the
+`AsyncInferQueue <https://docs.openvino.ai/latest/openvino_docs_OV_UG_Python_API_exclusives.html#asyncinferqueue>`__
+wrapper class. This class automatically spawns the pool of InferRequest
+objects (also called “jobs”) and provides synchronization mechanisms to
+control the flow of the pipeline. It is a simpler way to manage the
+infer request queue in Asynchronous mode.
+
+Setting Callback
+~~~~~~~~~~~~~~~~
+
+When ``callback`` is set, any job that ends inference calls upon the
+Python function. The ``callback`` function must have two arguments: one
+is the request that calls the ``callback``, which provides the
+InferRequest API; the other is called “userdata”, which provides the
+possibility of passing runtime values.
+
+.. code:: ipython3
+
+    def callback(infer_request, info) -> None:
+        """
+        Define the callback function for postprocessing
+        
+        :param: infer_request: the infer_request object
+                info: a tuple includes original frame and starts time
+        :returns:
+                None
+        """
+        global frame_number
+        global total_time
+        global inferqueue_fps
+        stop_time = time.time()
+        frame, start_time = info
+        total_time = stop_time - start_time
+        frame_number = frame_number + 1
+        inferqueue_fps = frame_number / total_time
+        
+        res = infer_request.get_output_tensor(0).data[0]
+        frame = postprocess(res, frame, inferqueue_fps)
+        # Encode numpy array to jpg
+        _, encoded_img = cv2.imencode(".jpg", frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90])
+        # Create IPython image
+        i = display.Image(data=encoded_img)
+        # Display the image in this notebook
+        display.clear_output(wait=True)
+        display.display(i)
+
+.. code:: ipython3
+
+    def inferqueue(source, flip, fps, skip_first_frames) -> None:
+        """
+        Define the main function for video processing with async infer queue
+        
+        :param: source: the video path or the ID of your webcam
+        :retuns:
+            None
+        """
+        # Create infer requests queue
+        infer_queue = AsyncInferQueue(compiled_model, 2)
+        infer_queue.set_callback(callback)
+        player = None
+        try:
+            # Create a video player
+            player = utils.VideoPlayer(source, flip=flip, fps=fps, skip_first_frames=skip_first_frames)
+            # Start capturing
+            start_time = time.time()
+            player.start()
+            while True:
+                # Capture frame
+                frame = player.next()
+                if frame is None:
+                    print("Source ended")
+                    break
+                resized_frame = preprocess(frame)
+                # Start the inference request with async infer queue 
+                infer_queue.start_async({input_layer_ir.any_name: resized_frame}, (frame, start_time))
+        except KeyboardInterrupt:
+            print("Interrupted")
+        # Any different error
+        except RuntimeError as e:
+            print(e)
+        finally:
+            infer_queue.wait_all()
+            player.stop()
+
+Test the performance with AsyncInferQueue
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    frame_number = 0
+    total_time = 0
+    inferqueue(source=video_path, flip=False, fps=30, skip_first_frames=800)
+    print(f"average throughput in async mode with async infer queue: {inferqueue_fps:.2f} fps")
+
+
+
+.. image:: 115-async-api-with-output_files/115-async-api-with-output_26_0.png
+
+
+.. parsed-literal::
+
+    average throughput in async mode with async infer queue: 111.22 fps
+
--- a/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_14_0.png
+++ b/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_14_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ad51650644959fe669787d637d49fe35cfc85c9c8f5637470403470f56f6abb
+size 4307
--- a/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_18_0.png
+++ b/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_18_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ad51650644959fe669787d637d49fe35cfc85c9c8f5637470403470f56f6abb
+size 4307
--- a/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_20_0.png
+++ b/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_20_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff21ce3891ef2571506610d671a32b99a694bb6afa66d47820cac5d4476b6b41
+size 30403
--- a/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_26_0.png
+++ b/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_26_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ad51650644959fe669787d637d49fe35cfc85c9c8f5637470403470f56f6abb
+size 4307
--- a/docs/notebooks/115-async-api-with-output_files/index.html
+++ b/docs/notebooks/115-async-api-with-output_files/index.html
@ -0,0 +1,10 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/115-async-api-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/115-async-api-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="115-async-api-with-output_14_0.png">115-async-api-with-output_14_0.png</a>                 30-May-2023 00:08                4307
+<a href="115-async-api-with-output_18_0.png">115-async-api-with-output_18_0.png</a>                 30-May-2023 00:08                4307
+<a href="115-async-api-with-output_20_0.png">115-async-api-with-output_20_0.png</a>                 30-May-2023 00:08               30403
+<a href="115-async-api-with-output_26_0.png">115-async-api-with-output_26_0.png</a>                 30-May-2023 00:08                4307
+</pre><hr></body>
+</html>
--- a/docs/notebooks/116-sparsity-optimization-with-output.rst
+++ b/docs/notebooks/116-sparsity-optimization-with-output.rst
@ -0,0 +1,351 @@
+Accelerate Inference of Sparse Transformer Models with OpenVINO™ and 4th Gen Intel® Xeon® Scalable Processors
+=============================================================================================================
+
+This tutorial demonstrates how to improve performance of sparse
+Transformer models with `OpenVINO <https://docs.openvino.ai/>`__ on 4th
+Gen Intel® Xeon® Scalable processors.
+
+The tutorial downloads `a BERT-base
+model <https://huggingface.co/OpenVINO/bert-base-uncased-sst2-int8-unstructured80>`__
+which has been quantized, sparsified, and tuned for `SST2
+datasets <https://huggingface.co/datasets/sst2>`__ using
+`Optimum-Intel <https://github.com/huggingface/optimum-intel>`__. It
+demonstrates the inference performance advantage on 4th Gen Intel® Xeon®
+Scalable Processors by running it with `Sparse Weight
+Decompression <https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_CPU.html#sparse-weights-decompression>`__,
+a runtime option that seizes model sparsity for efficiency. The notebook
+consists of the following steps:
+
+-  Install prerequisites
+-  Download and quantize sparse public BERT model, using the OpenVINO
+   integration with Hugging Face Optimum.
+-  Compare sparse 8-bit vs. dense 8-bit inference performance.
+
+Prerequisites
+-------------
+
+.. code:: ipython3
+
+    !pip install -q "git+https://github.com/huggingface/optimum-intel.git" datasets onnx onnxruntime
+
+
+.. parsed-literal::
+
+    ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
+    tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.
+    
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import shutil
+    from pathlib import Path
+    
+    from optimum.intel.openvino import OVModelForSequenceClassification
+    from transformers import AutoTokenizer, pipeline
+    from huggingface_hub import hf_hub_download
+
+
+.. parsed-literal::
+
+    2023-05-29 23:05:52.938123: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
+    2023-05-29 23:05:52.973374: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+    To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+    2023-05-29 23:05:53.524698: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
+    /opt/home/k8sworker/cibuilds/ov-notebook/OVNotebookOps-416/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/offline_transformations/__init__.py:10: FutureWarning: The module is private and following namespace `offline_transformations` will be removed in the future, use `openvino.runtime.passes` instead!
+      warnings.warn(
+
+
+.. parsed-literal::
+
+    INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino
+
+
+.. parsed-literal::
+
+    No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
+
+
+Download, quantize and sparsify the model, using Hugging Face Optimum API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The first step is to download a quantized sparse transformers which has
+been translated to OpenVINO IR. Then, it will be put through a
+classification as a simple validation of a working downloaded model. To
+find out how the model is being quantized and sparsified, refer to the
+`OpenVINO/bert-base-uncased-sst2-int8-unstructured80 <https://huggingface.co/OpenVINO/bert-base-uncased-sst2-int8-unstructured80>`__
+model card on Hugging Face.
+
+.. code:: ipython3
+
+    # The following model has been quantized, sparsified using Optimum-Intel 1.7 which is enabled by OpenVINO and NNCF
+    # for reproducibility, refer https://huggingface.co/OpenVINO/bert-base-uncased-sst2-int8-unstructured80
+    model_id = "OpenVINO/bert-base-uncased-sst2-int8-unstructured80"
+    
+    # The following two steps will set up the model and download them to HF Cache folder
+    ov_model = OVModelForSequenceClassification.from_pretrained(model_id)
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    
+    # Let's take the model for a spin!
+    sentiment_classifier = pipeline("text-classification", model=ov_model, tokenizer=tokenizer)
+    
+    text = "He's a dreadful magician."
+    outputs = sentiment_classifier(text)
+    
+    print(outputs)
+
+
+.. parsed-literal::
+
+    Compiling the model and creating the inference request ...
+    Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
+    pip install xformers.
+
+
+.. parsed-literal::
+
+    [{'label': 'negative', 'score': 0.9981877207756042}]
+
+
+For benchmarking, we will use OpenVINO’s benchmark application and put
+the IRs into a single folder.
+
+.. code:: ipython3
+
+    # create a folder
+    quantized_sparse_dir = Path("bert_80pc_sparse_quantized_ir")
+    quantized_sparse_dir.mkdir(parents=True, exist_ok=True)
+    
+    # following return path to specified filename in cache folder (which we've with the 
+    ov_ir_xml_path = hf_hub_download(repo_id=model_id, filename="openvino_model.xml")
+    ov_ir_bin_path = hf_hub_download(repo_id=model_id, filename="openvino_model.bin")
+    
+    # copy IRs to the folder
+    shutil.copy(ov_ir_xml_path, quantized_sparse_dir)
+    shutil.copy(ov_ir_bin_path, quantized_sparse_dir)                                
+
+
+
+
+.. parsed-literal::
+
+    'bert_80pc_sparse_quantized_ir/openvino_model.bin'
+
+
+
+Benchmark quantized dense inference performance
+-----------------------------------------------
+
+Benchmark dense inference performance using parallel execution on four
+CPU cores to simulate a small instance in the cloud infrastructure.
+Sequence length is dependent on use cases, 16 is common for
+conversational AI while 160 for question answering task. It is set to 64
+as an example. It is recommended to tune based on your applications.
+
+.. code:: ipython3
+
+    # Dump benchmarking config for dense inference
+    with (quantized_sparse_dir / "perf_config.json").open("w") as outfile:
+        outfile.write(
+            """
+            {
+                "CPU": {"NUM_STREAMS": 4, "INFERENCE_NUM_THREADS": 4}
+            }
+            """
+        )
+
+.. code:: ipython3
+
+    !benchmark_app -m $quantized_sparse_dir/openvino_model.xml -shape "input_ids[1,64],attention_mask[1,64],token_type_ids[1,64]" -load_config $quantized_sparse_dir/perf_config.json
+
+
+.. parsed-literal::
+
+    huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+    To disable this warning, you can either:
+    	- Avoid using `tokenizers` before the fork if possible
+    	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to THROUGHPUT.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 192.47 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input_ids (node: input_ids) : i64 / [...] / [?,?]
+    [ INFO ]     attention_mask (node: attention_mask) : i64 / [...] / [?,?]
+    [ INFO ]     token_type_ids (node: token_type_ids) : i64 / [...] / [?,?]
+    [ INFO ] Model outputs:
+    [ INFO ]     logits (node: logits) : f32 / [...] / [?,2]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [ INFO ] Reshaping model: 'input_ids': [1,64], 'attention_mask': [1,64], 'token_type_ids': [1,64]
+    [ INFO ] Reshape model took 33.52 ms
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input_ids (node: input_ids) : i64 / [...] / [1,64]
+    [ INFO ]     attention_mask (node: attention_mask) : i64 / [...] / [1,64]
+    [ INFO ]     token_type_ids (node: token_type_ids) : i64 / [...] / [1,64]
+    [ INFO ] Model outputs:
+    [ INFO ]     logits (node: logits) : f32 / [...] / [1,2]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 1506.31 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 4
+    [ INFO ]   NUM_STREAMS: 4
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 4
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.THROUGHPUT
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input_ids'!. This input will be filled with random values!
+    [ WARNING ] No input files were given for input 'attention_mask'!. This input will be filled with random values!
+    [ WARNING ] No input files were given for input 'token_type_ids'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input_ids' with random values 
+    [ INFO ] Fill input 'attention_mask' with random values 
+    [ INFO ] Fill input 'token_type_ids' with random values 
+    [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests, limits: 60000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 33.84 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            8796 iterations
+    [ INFO ] Duration:         60041.91 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        27.14 ms
+    [ INFO ]    Average:       27.19 ms
+    [ INFO ]    Min:           26.12 ms
+    [ INFO ]    Max:           40.25 ms
+    [ INFO ] Throughput:   146.50 FPS
+
+
+Benchmark quantized sparse inference performance
+------------------------------------------------
+
+To enable sparse weight decompression feature, users can add it to
+runtime config like below. ``CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE``
+takes values between 0.5 and 1.0. It is a layer-level sparsity threshold
+for which a layer will be enabled.
+
+.. code:: ipython3
+
+    # Dump benchmarking config for dense inference
+    # "CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE" controls minimum sparsity rate for weights to consider 
+    # for sparse optimization at the runtime.
+    with (quantized_sparse_dir / "perf_config_sparse.json").open("w") as outfile:
+        outfile.write(
+            """
+            {
+                "CPU": {"NUM_STREAMS": 4, "INFERENCE_NUM_THREADS": 4, "CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE": 0.75}
+            }
+            """
+        )
+
+.. code:: ipython3
+
+    !benchmark_app -m $quantized_sparse_dir/openvino_model.xml -shape "input_ids[1,64],attention_mask[1,64],token_type_ids[1,64]" -load_config $quantized_sparse_dir/perf_config_sparse.json
+
+
+.. parsed-literal::
+
+    huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+    To disable this warning, you can either:
+    	- Avoid using `tokenizers` before the fork if possible
+    	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+    [Step 1/11] Parsing and validating input arguments
+    [ INFO ] Parsing input parameters
+    [Step 2/11] Loading OpenVINO Runtime
+    [ INFO ] OpenVINO:
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] Device info:
+    [ INFO ] CPU
+    [ INFO ] Build ................................. 2022.3.0-9052-9752fafe8eb-releases/2022/3
+    [ INFO ] 
+    [ INFO ] 
+    [Step 3/11] Setting device configuration
+    [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to THROUGHPUT.
+    [Step 4/11] Reading model files
+    [ INFO ] Loading model files
+    [ INFO ] Read model took 194.14 ms
+    [ INFO ] Original model I/O parameters:
+    [ INFO ] Model inputs:
+    [ INFO ]     input_ids (node: input_ids) : i64 / [...] / [?,?]
+    [ INFO ]     attention_mask (node: attention_mask) : i64 / [...] / [?,?]
+    [ INFO ]     token_type_ids (node: token_type_ids) : i64 / [...] / [?,?]
+    [ INFO ] Model outputs:
+    [ INFO ]     logits (node: logits) : f32 / [...] / [?,2]
+    [Step 5/11] Resizing model to match image sizes and given batch
+    [ INFO ] Model batch size: 1
+    [ INFO ] Reshaping model: 'input_ids': [1,64], 'attention_mask': [1,64], 'token_type_ids': [1,64]
+    [ INFO ] Reshape model took 33.32 ms
+    [Step 6/11] Configuring input of the model
+    [ INFO ] Model inputs:
+    [ INFO ]     input_ids (node: input_ids) : i64 / [...] / [1,64]
+    [ INFO ]     attention_mask (node: attention_mask) : i64 / [...] / [1,64]
+    [ INFO ]     token_type_ids (node: token_type_ids) : i64 / [...] / [1,64]
+    [ INFO ] Model outputs:
+    [ INFO ]     logits (node: logits) : f32 / [...] / [1,2]
+    [Step 7/11] Loading the model to the device
+    [ INFO ] Compile model took 1517.33 ms
+    [Step 8/11] Querying optimal runtime parameters
+    [ INFO ] Model:
+    [ INFO ]   NETWORK_NAME: torch_jit
+    [ INFO ]   OPTIMAL_NUMBER_OF_INFER_REQUESTS: 4
+    [ INFO ]   NUM_STREAMS: 4
+    [ INFO ]   AFFINITY: Affinity.CORE
+    [ INFO ]   INFERENCE_NUM_THREADS: 4
+    [ INFO ]   PERF_COUNT: False
+    [ INFO ]   INFERENCE_PRECISION_HINT: <Type: 'float32'>
+    [ INFO ]   PERFORMANCE_HINT: PerformanceMode.THROUGHPUT
+    [ INFO ]   PERFORMANCE_HINT_NUM_REQUESTS: 0
+    [Step 9/11] Creating infer requests and preparing input tensors
+    [ WARNING ] No input files were given for input 'input_ids'!. This input will be filled with random values!
+    [ WARNING ] No input files were given for input 'attention_mask'!. This input will be filled with random values!
+    [ WARNING ] No input files were given for input 'token_type_ids'!. This input will be filled with random values!
+    [ INFO ] Fill input 'input_ids' with random values 
+    [ INFO ] Fill input 'attention_mask' with random values 
+    [ INFO ] Fill input 'token_type_ids' with random values 
+    [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests, limits: 60000 ms duration)
+    [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).
+    [ INFO ] First inference took 29.59 ms
+    [Step 11/11] Dumping statistics report
+    [ INFO ] Count:            8796 iterations
+    [ INFO ] Duration:         60033.65 ms
+    [ INFO ] Latency:
+    [ INFO ]    Median:        27.01 ms
+    [ INFO ]    Average:       27.06 ms
+    [ INFO ]    Min:           24.65 ms
+    [ INFO ]    Max:           40.45 ms
+    [ INFO ] Throughput:   146.52 FPS
+
+
+When this might be helpful
+--------------------------
+
+This feature can improve inference performance for models with sparse
+weights in the scenarios when the model is deployed to handle multiple
+requests in parallel asynchronously. It is especially helpful with a
+small sequence length, for example, 32 and lower.
+
+For more details about asynchronous inference with OpenVINO, refer to
+the following documentation: - `Deployment Optimization
+Guide <https://docs.openvino.ai/latest/openvino_docs_deployment_optimization_guide_common.html#doxid-openvino-docs-deployment-optimization-guide-common-1async-api>`__
+- `Inference Request
+API <https://docs.openvino.ai/latest/openvino_docs_OV_UG_Infer_request.html#doxid-openvino-docs-o-v-u-g-infer-request-1in-out-tensors>`__
--- a/docs/notebooks/117-model-server-with-output.rst
+++ b/docs/notebooks/117-model-server-with-output.rst
@ -0,0 +1,849 @@
+Hello Model Server
+==================
+
+Introduction to OpenVINO™ Model Server (OVMS).
+
+What is Model Serving?
+----------------------
+
+A model server hosts models and makes them accessible to software
+components over standard network protocols. A client sends a request to
+the model server, which performs inference and sends a response back to
+the client. Model serving offers many advantages for efficient model
+deployment:
+
+-  Remote inference enables using lightweight clients with only the
+   necessary functions to perform API calls to edge or cloud
+   deployments.
+-  Applications are independent of the model framework, hardware device,
+   and infrastructure.
+-  Client applications in any programming language that supports REST or
+   gRPC calls can be used to run inference remotely on the model server.
+-  Clients require fewer updates since client libraries change very
+   rarely.
+-  Model topology and weights are not exposed directly to client
+   applications, making it easier to control access to the model.
+-  Ideal architecture for microservices-based applications and
+   deployments in cloud environments – including Kubernetes and
+   OpenShift clusters.
+-  Efficient resource utilization with horizontal and vertical inference
+   scaling.
+
+.. figure:: https://user-images.githubusercontent.com/91237924/215658773-4720df00-3b95-4a84-85a2-40f06138e914.png
+   :alt: ovms_diagram
+
+   ovms_diagram
+
+Serving with OpenVINO Model Server
+----------------------------------
+
+OpenVINO Model Server (OVMS) is a high-performance system for serving
+models. Implemented in C++ for scalability and optimized for deployment
+on Intel architectures, the model server uses the same architecture and
+API as TensorFlow Serving and KServe while applying OpenVINO for
+inference execution. Inference service is provided via gRPC or REST API,
+making deploying new algorithms and AI experiments easy.
+
+.. figure:: https://user-images.githubusercontent.com/91237924/215658767-0e0fc221-aed0-4db1-9a82-6be55f244dba.png
+   :alt: ovms_high_level
+
+   ovms_high_level
+
+To quickly start using OpenVINO™ Model Server, follow these steps:
+
+Step 1: Prepare Docker
+----------------------
+
+Install `Docker Engine <https://docs.docker.com/engine/install/>`__,
+including its
+`post-installation <https://docs.docker.com/engine/install/linux-postinstall/>`__
+steps, on your development system. To verify installation, test it,
+using the following command. When it is ready, it will display a test
+image and a message.
+
+.. code:: ipython3
+
+    !docker run hello-world
+
+
+.. parsed-literal::
+
+    
+    Hello from Docker!
+    This message shows that your installation appears to be working correctly.
+    
+    To generate this message, Docker took the following steps:
+     1. The Docker client contacted the Docker daemon.
+     2. The Docker daemon pulled the "hello-world" image from the Docker Hub.
+        (amd64)
+     3. The Docker daemon created a new container from that image which runs the
+        executable that produces the output you are currently reading.
+     4. The Docker daemon streamed that output to the Docker client, which sent it
+        to your terminal.
+    
+    To try something more ambitious, you can run an Ubuntu container with:
+     $ docker run -it ubuntu bash
+    
+    Share images, automate workflows, and more with a free Docker ID:
+     https://hub.docker.com/
+    
+    For more examples and ideas, visit:
+     https://docs.docker.com/get-started/
+    
+
+
+Step 2: Preparing a Model Repository
+------------------------------------
+
+The models need to be placed and mounted in a particular directory
+structure and according to the following rules:
+
+::
+
+   tree models/
+   models/
+   ├── model1
+   │   ├── 1
+   │   │   ├── ir_model.bin
+   │   │   └── ir_model.xml
+   │   └── 2
+   │       ├── ir_model.bin
+   │       └── ir_model.xml
+   ├── model2
+   │   └── 1
+   │       ├── ir_model.bin
+   │       ├── ir_model.xml
+   │       └── mapping_config.json
+   ├── model3
+   │    └── 1
+   │        └── model.onnx
+   ├── model4
+   │      └── 1
+   │        ├── model.pdiparams
+   │        └── model.pdmodel
+   └── model5
+          └── 1
+            └── TF_fronzen_model.pb
+
+-  Each model should be stored in a dedicated directory, for example,
+   model1 and model2.
+
+-  Each model directory should include a sub-folder for each of its
+   versions (1,2, etc). The versions and their folder names should be
+   positive integer values.
+
+-  Note that in execution, the versions are enabled according to a
+   pre-defined version policy. If the client does not specify the
+   version number in parameters, by default, the latest version is
+   served.
+
+-  Every version folder must include model files, that is, ``.bin`` and
+   ``.xml`` for OpenVINO IR, ``.onnx`` for ONNX, ``.pdiparams`` and
+   ``.pdmodel`` for Paddle Paddle, and ``.pb`` for TensorFlow. The file
+   name can be arbitrary.
+
+.. code:: ipython3
+
+    import os
+    import shutil
+    
+    dedicated_dir = "models"
+    model_name = "detection"
+    model_version = "1"
+    
+    MODEL_DIR = f"{dedicated_dir}/{model_name}/{model_version}"
+    XML_PATH = "../004-hello-detection/model/horizontal-text-detection-0001.xml"
+    BIN_PATH = "../004-hello-detection/model/horizontal-text-detection-0001.bin"
+    os.makedirs(MODEL_DIR, exist_ok=True)
+    shutil.copy(XML_PATH, MODEL_DIR)
+    shutil.copy(BIN_PATH, MODEL_DIR)
+    print(f"Model Copied to \"./{MODEL_DIR}\".")
+
+
+.. parsed-literal::
+
+    Model Copied to "./models/detection/1".
+
+
+Step 3: Start the Model Server Container
+----------------------------------------
+
+Pull and start the container:
+
+.. code:: ipython3
+
+    !docker run -d --rm --name="ovms" -v $(pwd)/models:/models -p 9000:9000 openvino/model_server:latest --model_path /models/detection/ --model_name detection --port 9000
+
+
+.. parsed-literal::
+
+    7bf50596c18d5ad93d131eb9e435439dfb3cedf994518c5e89cc7727f5d3530e
+
+
+Check whether the OVMS container is running normally:
+
+.. code:: ipython3
+
+    !docker ps | grep ovms
+
+
+.. parsed-literal::
+
+    7bf50596c18d   openvino/model_server:latest   "/ovms/bin/ovms --mo…"   Less than a second ago   Up Less than a second   0.0.0.0:9000->9000/tcp, :::9000->9000/tcp   ovms
+
+
+The required Model Server parameters are listed below. For additional
+configuration options, see the `Model Server Parameters
+section <https://docs.openvino.ai/latest/ovms_docs_parameters.html#doxid-ovms-docs-parameters>`__.
+
+.. raw:: html
+
+   <table class="table">
+
+.. raw:: html
+
+   <colgroup>
+
+.. raw:: html
+
+   <col style="width: 20%" />
+
+.. raw:: html
+
+   <col style="width: 80%" />
+
+.. raw:: html
+
+   </colgroup>
+
+.. raw:: html
+
+   <tbody>
+
+.. raw:: html
+
+   <tr class="row-odd">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+–rm
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      remove the container when exiting the Docker container
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   <tr class="row-even">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+-d
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      runs the container in the background
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   <tr class="row-odd">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+-v
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      defines how to mount the model folder in the Docker container
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   <tr class="row-even">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+-p
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      exposes the model serving port outside the Docker container
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   <tr class="row-odd">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+openvino/model_server:latest
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      represents the image name; the ovms binary is the Docker entry
+      point
+
+   .. container:: line
+
+      varies by tag and build process - see tags:
+      https://hub.docker.com/r/openvino/model_server/tags/ for a full
+      tag list.
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   <tr class="row-even">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+–model_path
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      model location, which can be:
+
+   .. container:: line
+
+      a Docker container path that is mounted during start-up
+
+   .. container:: line
+
+      a Google Cloud Storage path gs://<bucket>/<model_path>
+
+   .. container:: line
+
+      an AWS S3 path s3://<bucket>/<model_path>
+
+   .. container:: line
+
+      an Azure blob path az://<container>/<model_path>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   <tr class="row-odd">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+–model_name
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      the name of the model in the model_path
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   <tr class="row-even">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+–port
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      the gRPC server port
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   <tr class="row-odd">
+
+.. raw:: html
+
+   <td>
+
+.. raw:: html
+
+   <p>
+
+–rest_port
+
+.. raw:: html
+
+   </p>
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   <td>
+
+.. container:: line-block
+
+   .. container:: line
+
+      the REST server port
+
+.. raw:: html
+
+   </td>
+
+.. raw:: html
+
+   </tr>
+
+.. raw:: html
+
+   </tbody>
+
+.. raw:: html
+
+   </table>
+
+If the serving port ``9000`` is already in use, please switch it to
+another available port on your system. For example:\ ``-p 9020:9000``
+
+Step 4: Prepare the Example Client Components
+---------------------------------------------
+
+OpenVINO Model Server exposes two sets of APIs: one compatible with
+``TensorFlow Serving`` and another one, with ``KServe API``, for
+inference. Both APIs work on ``gRPC`` and ``REST``\ interfaces.
+Supporting two sets of APIs makes OpenVINO Model Server easier to plug
+into existing systems the already leverage one of these APIs for
+inference. This example will demonstrate how to write a TensorFlow
+Serving API client for object detection.
+
+Prerequisites
+~~~~~~~~~~~~~
+
+Install necessary packages.
+
+.. code:: ipython3
+
+    !pip install -q ovmsclient
+
+
+.. parsed-literal::
+
+    Collecting ovmsclient
+      Downloading ovmsclient-2022.3-py3-none-any.whl (163 kB)
+    [2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 164.0/164.0 KB 2.1 MB/s eta 0:00:00a 0:00:01
+    Requirement already satisfied: numpy>=1.16.6 in /home/adrian/repos/openvino_notebooks_adrian/venv/lib/python3.9/site-packages (from ovmsclient) (1.23.4)
+    Requirement already satisfied: requests>=2.27.1 in /home/adrian/repos/openvino_notebooks_adrian/venv/lib/python3.9/site-packages (from ovmsclient) (2.27.1)
+    Collecting grpcio>=1.47.0
+      Downloading grpcio-1.51.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)
+    [2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.8/4.8 MB 5.6 MB/s eta 0:00:0000:0100:01
+    Requirement already satisfied: protobuf>=3.19.4 in /home/adrian/repos/openvino_notebooks_adrian/venv/lib/python3.9/site-packages (from ovmsclient) (3.19.6)
+    Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/adrian/repos/openvino_notebooks_adrian/venv/lib/python3.9/site-packages (from requests>=2.27.1->ovmsclient) (1.26.9)
+    Requirement already satisfied: idna<4,>=2.5 in /home/adrian/repos/openvino_notebooks_adrian/venv/lib/python3.9/site-packages (from requests>=2.27.1->ovmsclient) (3.3)
+    Requirement already satisfied: certifi>=2017.4.17 in /home/adrian/repos/openvino_notebooks_adrian/venv/lib/python3.9/site-packages (from requests>=2.27.1->ovmsclient) (2021.10.8)
+    Requirement already satisfied: charset-normalizer~=2.0.0 in /home/adrian/repos/openvino_notebooks_adrian/venv/lib/python3.9/site-packages (from requests>=2.27.1->ovmsclient) (2.0.12)
+    Installing collected packages: grpcio, ovmsclient
+      Attempting uninstall: grpcio
+        Found existing installation: grpcio 1.34.1
+        Uninstalling grpcio-1.34.1:
+          Successfully uninstalled grpcio-1.34.1
+    Successfully installed grpcio-1.51.3 ovmsclient-2022.3
+    WARNING: You are using pip version 22.0.4; however, version 23.0.1 is available.
+    You should consider upgrading via the '/home/adrian/repos/openvino_notebooks_adrian/venv/bin/python -m pip install --upgrade pip' command.
+    
+
+Imports
+~~~~~~~
+
+.. code:: ipython3
+
+    import cv2
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from ovmsclient import make_grpc_client
+
+Request Model Status
+~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    address = "localhost:9000"
+    
+    # Bind the grpc address to the client object
+    client = make_grpc_client(address)
+    model_status = client.get_model_status(model_name=model_name)
+    print(model_status)
+
+
+.. parsed-literal::
+
+    {1: {'state': 'AVAILABLE', 'error_code': 0, 'error_message': 'OK'}}
+
+
+Request Model Metadata
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    model_metadata = client.get_model_metadata(model_name=model_name)
+    print(model_metadata)
+
+
+.. parsed-literal::
+
+    {'model_version': 1, 'inputs': {'image': {'shape': [1, 3, 704, 704], 'dtype': 'DT_FLOAT'}}, 'outputs': {'1469_1470.0': {'shape': [-1], 'dtype': 'DT_FLOAT'}, '1078_1079.0': {'shape': [1000], 'dtype': 'DT_FLOAT'}, '1330_1331.0': {'shape': [36], 'dtype': 'DT_FLOAT'}, 'labels': {'shape': [-1], 'dtype': 'DT_INT32'}, '1267_1268.0': {'shape': [121], 'dtype': 'DT_FLOAT'}, '1141_1142.0': {'shape': [1000], 'dtype': 'DT_FLOAT'}, '1204_1205.0': {'shape': [484], 'dtype': 'DT_FLOAT'}, 'boxes': {'shape': [-1, 5], 'dtype': 'DT_FLOAT'}}}
+
+
+Load input image
+~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    # Text detection models expect an image in BGR format.
+    image = cv2.imread("../data/image/intel_rnb.jpg")
+    fp_image = image.astype("float32")
+    
+    # Resize the image to meet network expected input sizes.
+    input_shape = model_metadata['inputs']['image']['shape']
+    height, width = input_shape[2], input_shape[3]
+    resized_image = cv2.resize(fp_image, (height, width))
+    
+    # Reshape to the network input shape.
+    input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
+    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+
+
+
+
+.. parsed-literal::
+
+    <matplotlib.image.AxesImage at 0x7fee22d6ecd0>
+
+
+
+
+.. image:: 117-model-server-with-output_files/117-model-server-with-output_20_1.png
+
+
+Request Prediction on a Numpy Array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    inputs = {"image": input_image}
+    
+    # Run inference on model server and receive the result data
+    boxes = client.predict(inputs=inputs, model_name=model_name)['boxes']
+    
+    # Remove zero only boxes.
+    boxes = boxes[~np.all(boxes == 0, axis=1)]
+    print(boxes)
+
+
+.. parsed-literal::
+
+    [[3.9992419e+02 8.1032524e+01 5.6187299e+02 1.3619952e+02 5.3706491e-01]
+     [2.6189725e+02 6.8310547e+01 3.8541251e+02 1.2095630e+02 4.7559953e-01]
+     [6.1644586e+02 2.8008759e+02 6.6627545e+02 3.1178854e+02 4.4982004e-01]
+     [2.0762042e+02 6.2798470e+01 2.3444728e+02 1.0706525e+02 3.7216505e-01]
+     [5.1742780e+02 5.5603595e+02 5.4927539e+02 5.8736023e+02 3.2588077e-01]
+     [2.2261986e+01 4.5406548e+01 1.8868817e+02 1.0225631e+02 3.0407205e-01]]
+
+
+Visualization
+~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    # For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:
+    # The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function
+    def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):
+        # Define colors for boxes and descriptions.
+        colors = {"red": (255, 0, 0), "green": (0, 255, 0)}
+    
+        # Fetch the image shapes to calculate a ratio.
+        (real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2]
+        ratio_x, ratio_y = real_x / resized_x, real_y / resized_y
+    
+        # Convert the base image from BGR to RGB format.
+        rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
+    
+        # Iterate through non-zero boxes.
+        for box in boxes:
+            # Pick a confidence factor from the last place in an array.
+            conf = box[-1]
+            if conf > threshold:
+                # Convert float to int and multiply corner position of each box by x and y ratio.
+                # If the bounding box is found at the top of the image, 
+                # position the upper box bar little lower to make it visible on the image. 
+                (x_min, y_min, x_max, y_max) = [
+                    int(max(corner_position * ratio_y, 10)) if idx % 2 
+                    else int(corner_position * ratio_x)
+                    for idx, corner_position in enumerate(box[:-1])
+                ]
+    
+                # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
+                rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)
+    
+                # Add text to the image based on position and confidence.
+                # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
+                if conf_labels:
+                    rgb_image = cv2.putText(
+                        rgb_image,
+                        f"{conf:.2f}",
+                        (x_min, y_min - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.8,
+                        colors["red"],
+                        1,
+                        cv2.LINE_AA,
+                    )
+    
+        return rgb_image
+
+.. code:: ipython3
+
+    plt.figure(figsize=(10, 6))
+    plt.axis("off")
+    plt.imshow(convert_result_to_image(image, resized_image, boxes, conf_labels=False))
+
+
+
+
+.. parsed-literal::
+
+    <matplotlib.image.AxesImage at 0x7fee219e4df0>
+
+
+
+
+.. image:: 117-model-server-with-output_files/117-model-server-with-output_25_1.png
+
+
+To stop and remove the model server container, you can use the following
+command:
+
+.. code:: ipython3
+
+    !docker stop ovms
+
+
+.. parsed-literal::
+
+    ovms
+
+
+References
+----------
+
+1. `OpenVINO™ Model
+   Server <https://docs.openvino.ai/latest/ovms_what_is_openvino_model_server.html>`__
+2. `openvinotoolkit/model_server <https://github.com/openvinotoolkit/model_server/>`__
--- a/docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_20_1.png
+++ b/docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_20_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e20e0cc48768c8a971c0e0cd72666c63f0bd5940256a1d43b0a2de2f7874eeef
+size 112408
--- a/docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_25_1.png
+++ b/docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_25_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df4dbdbd56ef78ad7619bfb7fcfca6c96dccce598b3a5263486387b186e8d08f
+size 232667
--- a/docs/notebooks/117-model-server-with-output_files/index.html
+++ b/docs/notebooks/117-model-server-with-output_files/index.html
@ -0,0 +1,8 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/117-model-server-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/117-model-server-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="117-model-server-with-output_20_1.png">117-model-server-with-output_20_1.png</a>              30-May-2023 00:08              112408
+<a href="117-model-server-with-output_25_1.png">117-model-server-with-output_25_1.png</a>              30-May-2023 00:08              232667
+</pre><hr></body>
+</html>
--- a/docs/notebooks/118-optimize-preprocessing-with-output.rst
+++ b/docs/notebooks/118-optimize-preprocessing-with-output.rst
@ -0,0 +1,624 @@
+Optimize Preprocessing
+======================
+
+When input data does not fit the model input tensor perfectly,
+additional operations/steps are needed to transform the data to the
+format expected by the model. This tutorial demonstrates how it could be
+performed with Preprocessing API. Preprocessing API is an easy-to-use
+instrument, that enables integration of preprocessing steps into an
+execution graph and performing it on a selected device, which can
+improve device utilization. For more information about Preprocessing
+API, see this
+`overview <https://docs.openvino.ai/latest/openvino_docs_OV_UG_Preprocessing_Overview.html#>`__
+and
+`details <https://docs.openvino.ai/latest/openvino_docs_OV_UG_Preprocessing_Details.html>`__
+
+This tutorial include following steps: - Downloading the model. - Setup
+preprocessing with ModelOptimizer, loading the model and inference with
+original image. - Setup preprocessing with Preprocessing API, loading
+the model and inference with original image. - Fitting image to the
+model input type and inference with prepared image. - Comparing results
+on one picture. - Comparing performance.
+
+Settings
+--------
+
+Imports
+-------
+
+.. code:: ipython3
+
+    import cv2
+    import time
+    
+    import numpy as np
+    import tensorflow as tf
+    from pathlib import Path
+    from openvino.tools import mo
+    import matplotlib.pyplot as plt
+    from openvino.runtime import Core, serialize
+
+
+.. parsed-literal::
+
+    2023-05-29 23:08:10.880625: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
+    2023-05-29 23:08:10.915307: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+    To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+    2023-05-29 23:08:11.460713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
+
+
+Setup image and device
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    image_path = "../data/image/coco.jpg"
+    device = "CPU"
+    # device = "GPU"
+
+Downloading the model
+~~~~~~~~~~~~~~~~~~~~~
+
+This tutorial uses the
+`InceptionResNetV2 <https://www.tensorflow.org/api_docs/python/tf/keras/applications/inception_resnet_v2>`__.
+The InceptionResNetV2 model is the second of the
+`Inception <https://github.com/tensorflow/tpu/tree/master/models/experimental/inception>`__
+family of models designed to perform image classification. Like other
+Inception models, InceptionResNetV2 has been pre-trained on the
+`ImageNet <https://image-net.org/>`__ data set. For more details about
+this family of models, see the `research
+paper <https://arxiv.org/abs/1602.07261>`__.
+
+Load the model by using `tf.keras.applications
+api <https://www.tensorflow.org/api_docs/python/tf/keras/applications/inception_resnet_v2>`__
+and save it to the disk.
+
+.. code:: ipython3
+
+    model_name = "InceptionResNetV2"
+    
+    model_dir = Path("model")
+    model_dir.mkdir(exist_ok=True)
+    
+    model_path = model_dir / model_name
+    
+    model = tf.keras.applications.InceptionV3()
+    model.save(model_path)
+
+
+.. parsed-literal::
+
+    2023-05-29 23:08:12.767813: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
+    Skipping registering GPU devices...
+
+
+.. parsed-literal::
+
+    WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
+
+
+.. parsed-literal::
+
+    WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 94). These functions will not be directly callable after loading.
+
+
+.. parsed-literal::
+
+    INFO:tensorflow:Assets written to: model/InceptionResNetV2/assets
+
+
+.. parsed-literal::
+
+    INFO:tensorflow:Assets written to: model/InceptionResNetV2/assets
+
+
+Create core
+~~~~~~~~~~~
+
+.. code:: ipython3
+
+    core = Core()
+
+Check the original parameters of image
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    image = cv2.imread(image_path)
+    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));
+    print(f"The original shape of the image is {image.shape}")
+    print(f"The original data type of the image is {image.dtype}")
+
+
+.. parsed-literal::
+
+    The original shape of the image is (577, 800, 3)
+    The original data type of the image is uint8
+
+
+
+.. image:: 118-optimize-preprocessing-with-output_files/118-optimize-preprocessing-with-output_11_1.png
+
+
+Convert model to OpenVINO IR and setup preprocessing steps with Model Optimizer
+-------------------------------------------------------------------------------
+
+Use Model Optimizer to convert a TensorFlow model to OpenVINO IR.
+``mo.convert_model`` python function will be used for converting model
+using `OpenVINO Model
+Optimizer <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Python_API.html>`__.
+The function returns instance of OpenVINO Model class, which is ready to
+use in Python interface but can also be serialized to OpenVINO IR format
+for future execution using ``openvino.runtime.serialize``. The models
+will be saved to the ``./model/ir_model/`` directory.
+
+In this step, some conversions can be setup, which will enable reduction
+of work on processing the input data before propagating it through the
+network. These conversions will be inserted as additional input
+pre-processing sub-graphs into the converted model.
+
+Setup the following conversions: - mean normalization with
+``mean_values`` parameter. - scale with ``scale_values``. - color
+conversion, the color format of example image will be ``BGR``, but the
+model required ``RGB`` format, so add ``reverse_input_channels=True`` to
+process the image into the desired format.
+
+Also converting of layout could be specified with ``layout`` option.
+More information and parameters described in the `Embedding
+Preprocessing Computation
+article <https://docs.openvino.ai/latest/openvino_docs_MO_DG_Additional_Optimization_Use_Cases.html#embedding-preprocessing-computation>`__.
+
+.. code:: ipython3
+
+    ir_path_mo_preprocess = model_dir / "ir_model" / f"{model_name}_mo_preproc.xml"
+    
+    ov_model_mo_preprocess = None
+    
+    if ir_path_mo_preprocess.exists():
+        ov_model_mo_preprocess = core.read_model(model=ir_path_mo_preprocess)
+        print(f"Model in OpenVINO format already exists: {ir_path_mo_preprocess}")
+    else: 
+        ov_model_mo_preprocess = mo.convert_model(saved_model_dir=model_path,
+                                                  model_name=model_path.name,
+                                                  mean_values=[127.5,127.5,127.5],
+                                                  scale_values=[127.5,127.5,127.5],
+                                                  reverse_input_channels=True,
+                                                  input_shape=[1,299,299,3])
+        serialize(ov_model_mo_preprocess, str(ir_path_mo_preprocess))
+
+
+.. parsed-literal::
+
+    2023-05-29 23:08:40.329004: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
+    2023-05-29 23:08:40.329124: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
+    2023-05-29 23:08:40.467499: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
+    Skipping registering GPU devices...
+
+
+Prepare image
+~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    def prepare_image_mo_preprocess(image_path, model):
+        img = cv2.imread(filename=image_path)
+    
+        input_layer_ir = next(iter(model.inputs))
+    
+        # N, H, W, C = batch size, height, width, number of channels
+        N, H, W, C = input_layer_ir.shape
+        # Resize image to the input size expected by the model.
+        img = cv2.resize(img, (H, W))
+    
+        # Fit image data type to expected by the model value
+        img = np.float32(img)
+    
+        # Reshape to match the input shape expected by the model.
+        input_tensor = np.expand_dims(img, axis=0)
+    
+        return input_tensor
+    
+    
+    mo_pp_input_tensor = prepare_image_mo_preprocess(image_path, ov_model_mo_preprocess)
+    
+    print(f"The shape of the image is {mo_pp_input_tensor.shape}")
+    print(f"The data type of the image is {mo_pp_input_tensor.dtype}")
+
+
+.. parsed-literal::
+
+    The shape of the image is (1, 299, 299, 3)
+    The data type of the image is float32
+
+
+Compile model and perform inerence
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    compiled_model_mo_pp = core.compile_model(model=ov_model_mo_preprocess, device_name=device)
+    
+    output_layer = compiled_model_mo_pp.output(0)
+    
+    result = compiled_model_mo_pp(mo_pp_input_tensor)[output_layer]
+
+Setup preprocessing steps with Preprocessing API and perform inference
+----------------------------------------------------------------------
+
+Intuitively, preprocessing API consists of the following parts: - Tensor
+- declares user data format, like shape, layout, precision, color format
+from actual user’s data. - Steps - describes sequence of preprocessing
+steps which need to be applied to user data. - Model - specifies model
+data format. Usually, precision and shape are already known for model,
+only additional information, like layout can be specified.
+
+Graph modifications of a model shall be performed after the model is
+read from a drive and before it is loaded on the actual device.
+
+Pre-processing support following operations (please, see more details
+`here <https://docs.openvino.ai/latest/classov_1_1preprocess_1_1PreProcessSteps.html#doxid-classov-1-1preprocess-1-1-pre-process-steps-1aeacaf406d72a238e31a359798ebdb3b7>`__)
+- Mean/Scale Normalization - Converting Precision - Converting layout
+(transposing) - Resizing Image - Color Conversion - Custom Operations
+
+Convert model to OpenVINO IR with Model Optimizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The options for preprocessing are not required.
+
+.. code:: ipython3
+
+    ir_path = model_dir / "ir_model" / f"{model_name}.xml"
+    
+    ppp_model = None
+    
+    if ir_path.exists():
+        ppp_model = core.read_model(model=ir_path)
+        print(f"Model in OpenVINO format already exists: {ir_path}")
+    else: 
+        ppp_model = mo.convert_model(saved_model_dir=model_path,
+                                     input_shape=[1,299,299,3])
+        serialize(ppp_model, str(ir_path))
+
+
+.. parsed-literal::
+
+    2023-05-29 23:09:07.322003: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
+    2023-05-29 23:09:07.322139: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
+    2023-05-29 23:09:07.323643: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
+    Skipping registering GPU devices...
+
+
+Create PrePostProcessor Object
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The
+`PrePostProcessor() <https://docs.openvino.ai/latest/classov_1_1preprocess_1_1PrePostProcessor.html#doxid-classov-1-1preprocess-1-1-pre-post-processor>`__
+class enables specifying the preprocessing and postprocessing steps for
+a model.
+
+.. code:: ipython3
+
+    from openvino.preprocess import PrePostProcessor
+    
+    ppp = PrePostProcessor(ppp_model)
+
+Declare User’s Data Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To address particular input of a model/preprocessor, use the
+``PrePostProcessor.input(input_name)`` method. If the model has only one
+input, then simple ``PrePostProcessor.input()`` will get a reference to
+pre-processing builder for this input (a tensor, the steps, a model). In
+general, when a model has multiple inputs/outputs, each one can be
+addressed by a tensor name or by its index. By default, information
+about user’s input tensor will be initialized to same data
+(type/shape/etc) as model’s input parameter. User application can
+override particular parameters according to application’s data. Refer to
+the following
+`page <https://docs.openvino.ai/latest/classov_1_1preprocess_1_1InputTensorInfo.html#doxid-classov-1-1preprocess-1-1-input-tensor-info-1a98fb73ff9178c8c71d809ddf8927faf5>`__
+for more information about parameters for overriding.
+
+Below is all the specified input information: - Precision is ``U8``
+(unsigned 8-bit integer). - Size is non-fixed, setup of one determined
+shape size can be done with ``.set_shape([1, 577, 800, 3])`` - Layout is
+``“NHWC”``. It means, for example: height=577, width=800, channels=3.
+
+The height and width are necessary for resizing, and channels are needed
+for mean/scale normalization.
+
+.. code:: ipython3
+
+    from openvino.runtime import Type, Layout
+    
+    # setup formant of data
+    ppp.input().tensor().set_element_type(Type.u8)\
+                        .set_spatial_dynamic_shape()\
+                        .set_layout(Layout('NHWC'))
+
+
+
+
+.. parsed-literal::
+
+    <openvino._pyopenvino.preprocess.InputTensorInfo at 0x7f1eb04c0630>
+
+
+
+Declaring Model Layout
+~~~~~~~~~~~~~~~~~~~~~~
+
+Model input already has information about precision and shape.
+Preprocessing API is not intended to modify this. The only thing that
+may be specified is input data
+`layout <https://docs.openvino.ai/latest/openvino_docs_OV_UG_Layout_Overview.html#doxid-openvino-docs-o-v-u-g-layout-overview>`__.
+
+.. code:: ipython3
+
+    input_layer_ir = next(iter(ppp_model.inputs))
+    print(f"The input shape of the model is {input_layer_ir.shape}")
+    
+    ppp.input().model().set_layout(Layout('NHWC'))
+
+
+.. parsed-literal::
+
+    The input shape of the model is [1,299,299,3]
+
+
+
+
+.. parsed-literal::
+
+    <openvino._pyopenvino.preprocess.InputModelInfo at 0x7f1ff40f9e70>
+
+
+
+Preprocessing Steps
+~~~~~~~~~~~~~~~~~~~
+
+Now, the sequence of preprocessing steps can be defined. For more
+information about preprocessing steps, see
+`here <https://docs.openvino.ai/latest/api/ie_python_api/_autosummary/openvino.preprocess.PreProcessSteps.html>`__.
+
+Perform the following: - Convert ``U8`` to ``FP32`` precision. - Resize
+to height/width of a model. Be aware that if a model accepts dynamic
+size, for example, ``{?, 3, ?, ?}`` resize will not know how to resize
+the picture. Therefore, in this case, target height/ width should be
+specified. For more details, see also the
+`PreProcessSteps.resize() <https://docs.openvino.ai/latest/classov_1_1preprocess_1_1PreProcessSteps.html#doxid-classov-1-1preprocess-1-1-pre-process-steps-1a40dab78be1222fee505ed6a13400efe6>`__.
+- Subtract mean from each channel. - Divide each pixel data to
+appropriate scale value.
+
+There is no need to specify conversion layout. If layouts are different,
+then such conversion will be added explicitly.
+
+.. code:: ipython3
+
+    from openvino.preprocess import ResizeAlgorithm
+    
+    ppp.input().preprocess().convert_element_type(Type.f32) \
+                            .resize(ResizeAlgorithm.RESIZE_LINEAR)\
+                            .mean([127.5,127.5,127.5])\
+                            .scale([127.5,127.5,127.5])
+
+
+
+
+.. parsed-literal::
+
+    <openvino._pyopenvino.preprocess.PreProcessSteps at 0x7f1ea3068cb0>
+
+
+
+Integrating Steps into a Model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Once the preprocessing steps have been finished, the model can be
+finally built. It is possible to display PrePostProcessor configuration
+for debugging purposes.
+
+.. code:: ipython3
+
+    print(f'Dump preprocessor: {ppp}')
+    model_with_preprocess = ppp.build()
+
+
+.. parsed-literal::
+
+    Dump preprocessor: Input "Func/StatefulPartitionedCall/input/_0:0":
+        User's input tensor: [1,?,?,3], [N,H,W,C], u8
+        Model's expected tensor: [1,299,299,3], [N,H,W,C], f32
+        Pre-processing steps (4):
+          convert type (f32): ([1,?,?,3], [N,H,W,C], u8) -> ([1,?,?,3], [N,H,W,C], f32)
+          resize to model width/height: ([1,?,?,3], [N,H,W,C], f32) -> ([1,299,299,3], [N,H,W,C], f32)
+          mean (127.5,127.5,127.5): ([1,299,299,3], [N,H,W,C], f32) -> ([1,299,299,3], [N,H,W,C], f32)
+          scale (127.5,127.5,127.5): ([1,299,299,3], [N,H,W,C], f32) -> ([1,299,299,3], [N,H,W,C], f32)
+    
+
+
+Load model and perform inference
+--------------------------------
+
+.. code:: ipython3
+
+    def prepare_image_api_preprocess(image_path, model=None):
+        image = cv2.imread(image_path)
+        input_tensor = np.expand_dims(image, 0)
+        return input_tensor
+    
+    
+    compiled_model_with_preprocess_api = core.compile_model(model=ppp_model, device_name=device)
+    
+    ppp_output_layer = compiled_model_with_preprocess_api.output(0)
+    
+    ppp_input_tensor = prepare_image_api_preprocess(image_path)
+    results = compiled_model_with_preprocess_api(ppp_input_tensor)[ppp_output_layer][0]
+
+Fit image manually and perform inference
+----------------------------------------
+
+Load the model
+~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    model = core.read_model(model=ir_path)
+    compiled_model = core.compile_model(model=model, device_name=device)
+
+Load image and fit it to model input
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    def manual_image_preprocessing(path_to_image, compiled_model):
+        input_layer_ir = next(iter(compiled_model.inputs))
+    
+        # N, H, W, C = batch size, height, width, number of channels
+        N, H, W, C = input_layer_ir.shape
+        
+        # load  image, image will be resized to model input size and converted to RGB
+        img = tf.keras.preprocessing.image.load_img(image_path, target_size=(H, W), color_mode='rgb')
+    
+        x = tf.keras.preprocessing.image.img_to_array(img)
+        x = np.expand_dims(x, axis=0)
+    
+        # will scale input pixels between -1 and 1
+        input_tensor = tf.keras.applications.inception_resnet_v2.preprocess_input(x)
+    
+        return input_tensor
+    
+    
+    input_tensor = manual_image_preprocessing(image_path, compiled_model)
+    print(f"The shape of the image is {input_tensor.shape}")
+    print(f"The data type of the image is {input_tensor.dtype}")
+
+
+.. parsed-literal::
+
+    The shape of the image is (1, 299, 299, 3)
+    The data type of the image is float32
+
+
+Perform inference
+~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    output_layer = compiled_model.output(0)
+    
+    result = compiled_model(input_tensor)[output_layer]
+
+Compare results
+---------------
+
+Compare results on one image
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    def check_results(input_tensor, compiled_model, imagenet_classes):
+        output_layer = compiled_model.output(0)
+    
+        results = compiled_model(input_tensor)[output_layer][0]
+    
+        top_indices = np.argsort(results)[-5:][::-1]
+        top_softmax = results[top_indices]
+    
+        for index, softmax_probability in zip(top_indices, top_softmax):
+            print(f"{imagenet_classes[index]}, {softmax_probability:.5f}")
+    
+        return top_indices, top_softmax
+    
+    
+    # Convert the inference result to a class name.
+    imagenet_classes = open("../data/datasets/imagenet/imagenet_2012.txt").read().splitlines()
+    imagenet_classes = ['background'] + imagenet_classes
+    
+    # get result for inference with preprocessing api
+    print("Result of inference for preprocessing with ModelOptimizer:")
+    res = check_results(mo_pp_input_tensor, compiled_model_mo_pp, imagenet_classes)
+    
+    print("\n")
+    
+    # get result for inference with preprocessing api
+    print("Result of inference with Preprocessing API:")
+    res = check_results(ppp_input_tensor, compiled_model_with_preprocess_api, imagenet_classes)
+    
+    print("\n")
+    
+    # get result for inference with the manual preparing of the image
+    print("Result of inference with manual image setup:")
+    res = check_results(input_tensor, compiled_model, imagenet_classes)
+
+
+.. parsed-literal::
+
+    Result of inference for preprocessing with ModelOptimizer:
+    n02099601 golden retriever, 0.78978
+    n02098413 Lhasa, Lhasa apso, 0.11520
+    n02108915 French bulldog, 0.01851
+    n02111129 Leonberg, 0.00819
+    n02097047 miniature schnauzer, 0.00293
+    
+    
+    Result of inference with Preprocessing API:
+    n02099601 golden retriever, 0.80560
+    n02098413 Lhasa, Lhasa apso, 0.10039
+    n02108915 French bulldog, 0.01915
+    n02111129 Leonberg, 0.00825
+    n02097047 miniature schnauzer, 0.00294
+    
+    
+    Result of inference with manual image setup:
+    n02098413 Lhasa, Lhasa apso, 0.76848
+    n02099601 golden retriever, 0.19304
+    n02111129 Leonberg, 0.00725
+    n02097047 miniature schnauzer, 0.00290
+    n02100877 Irish setter, red setter, 0.00116
+
+
+Compare performance
+~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    def check_performance(compiled_model, preprocessing_function=None):
+        num_images = 1000
+    
+        start = time.perf_counter()
+    
+        for _ in range(num_images):
+            input_tensor = preprocessing_function(image_path, compiled_model)
+            compiled_model(input_tensor)
+    
+        end = time.perf_counter()
+        time_ir = end - start
+    
+        return time_ir, num_images
+    
+    
+    time_ir, num_images = check_performance(compiled_model_mo_pp, prepare_image_mo_preprocess)
+    print(
+        f"IR model in OpenVINO Runtime/CPU with preprocessing API: {time_ir/num_images:.4f} "
+        f"seconds per image, FPS: {num_images/time_ir:.2f}"
+    )
+    
+    time_ir, num_images = check_performance(compiled_model, manual_image_preprocessing)
+    print(
+        f"IR model in OpenVINO Runtime/CPU with preprocessing API: {time_ir/num_images:.4f} "
+        f"seconds per image, FPS: {num_images/time_ir:.2f}"
+    )
+    
+    time_ir, num_images = check_performance(compiled_model_with_preprocess_api, prepare_image_api_preprocess)
+    print(
+        f"IR model in OpenVINO Runtime/CPU with preprocessing API: {time_ir/num_images:.4f} "
+        f"seconds per image, FPS: {num_images/time_ir:.2f}"
+    )
+
+
+.. parsed-literal::
+
+    IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0202 seconds per image, FPS: 49.59
+    IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0157 seconds per image, FPS: 63.85
+    IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0193 seconds per image, FPS: 51.92
+
--- a/docs/notebooks/118-optimize-preprocessing-with-output_files/118-optimize-preprocessing-with-output_11_1.png
+++ b/docs/notebooks/118-optimize-preprocessing-with-output_files/118-optimize-preprocessing-with-output_11_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7511b8a4e5b047600d5fed14fbc7e9653a868bc5253abf1e0c3ef649b47bc408
+size 387941
--- a/docs/notebooks/118-optimize-preprocessing-with-output_files/index.html
+++ b/docs/notebooks/118-optimize-preprocessing-with-output_files/index.html
@ -0,0 +1,7 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/118-optimize-preprocessing-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/118-optimize-preprocessing-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="118-optimize-preprocessing-with-output_11_1.png">118-optimize-preprocessing-with-output_11_1.png</a>    30-May-2023 00:08              387941
+</pre><hr></body>
+</html>
--- a/docs/notebooks/201-vision-monodepth-with-output.rst
+++ b/docs/notebooks/201-vision-monodepth-with-output.rst
--- a/docs/notebooks/201-vision-monodepth-with-output_files/201-vision-monodepth-with-output_14_0.png
+++ b/docs/notebooks/201-vision-monodepth-with-output_files/201-vision-monodepth-with-output_14_0.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5b82e40b1bf5b8f0dbc2c4ffabd21207bbc1b59c558aace759c3ab5ca6787f
+size 959858
--- a/docs/notebooks/201-vision-monodepth-with-output_files/index.html
+++ b/docs/notebooks/201-vision-monodepth-with-output_files/index.html
@ -0,0 +1,7 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/201-vision-monodepth-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/201-vision-monodepth-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="201-vision-monodepth-with-output_14_0.png">201-vision-monodepth-with-output_14_0.png</a>          30-May-2023 00:09              959858
+</pre><hr></body>
+</html>
--- a/docs/notebooks/202-vision-superresolution-image-with-output.rst
+++ b/docs/notebooks/202-vision-superresolution-image-with-output.rst
@ -0,0 +1,675 @@
+Single Image Super Resolution with OpenVINO™
+============================================
+
+Super Resolution is the process of enhancing the quality of an image by
+increasing the pixel count using deep learning. This notebook shows the
+Single Image Super Resolution (SISR) which takes just one low resolution
+image. A model called
+`single-image-super-resolution-1032 <https://docs.openvino.ai/latest/omz_models_model_single_image_super_resolution_1032.html>`__,
+which is available in Open Model Zoo, is used in this tutorial. It is
+based on the research paper cited below.
+
+Y. Liu et al., `“An Attention-Based Approach for Single Image Super
+Resolution,” <https://arxiv.org/abs/1807.06779>`__ 2018 24th
+International Conference on Pattern Recognition (ICPR), 2018,
+pp. 2777-2784, doi: 10.1109/ICPR.2018.8545760.
+
+Preparation
+-----------
+
+Imports
+~~~~~~~
+
+.. code:: ipython3
+
+    import os
+    import time
+    import requests
+    from pathlib import Path
+    
+    import cv2
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from IPython.display import HTML, FileLink
+    from IPython.display import Image as DisplayImage
+    from IPython.display import Pretty, ProgressBar, clear_output, display
+    from PIL import Image
+    from openvino.runtime import Core
+
+Settings
+~~~~~~~~
+
+.. code:: ipython3
+
+    # Device to use for inference. For example, "CPU", or "GPU".
+    DEVICE = "CPU"
+    # 1032: 4x superresolution, 1033: 3x superresolution
+    MODEL_FILE = "model/single-image-super-resolution-1032.xml"
+    model_name = os.path.basename(MODEL_FILE)
+    model_xml_path = Path(MODEL_FILE)
+
+Functions
+~~~~~~~~~
+
+.. code:: ipython3
+
+    def write_text_on_image(image: np.ndarray, text: str) -> np.ndarray:
+        """
+        Write the specified text in the top left corner of the image
+        as white text with a black border.
+    
+        :param image: image as numpy arry with HWC shape, RGB or BGR
+        :param text: text to write
+        :return: image with written text, as numpy array
+        """
+        font = cv2.FONT_HERSHEY_PLAIN
+        org = (20, 20)
+        font_scale = 4
+        font_color = (255, 255, 255)
+        line_type = 1
+        font_thickness = 2
+        text_color_bg = (0, 0, 0)
+        x, y = org
+    
+        image = cv2.UMat(image)
+        (text_w, text_h), _ = cv2.getTextSize(text, font, font_scale, font_thickness)
+        result_im = cv2.rectangle(image, org, (x + text_w, y + text_h), text_color_bg, -1)
+    
+        textim = cv2.putText(
+            result_im,
+            text,
+            (x, y + text_h + font_scale - 1),
+            font,
+            font_scale,
+            font_color,
+            font_thickness,
+            line_type,
+        )
+        return textim.get()
+    
+    
+    def load_image(path: str) -> np.ndarray:
+        """
+        Loads an image from `path` and returns it as BGR numpy array.
+    
+        :param path: path to an image filename or url
+        :return: image as numpy array, with BGR channel order
+        """
+        if path.startswith("http"):
+            # Set User-Agent to Mozilla because some websites block requests
+            # with User-Agent Python.
+            response = requests.get(path, headers={"User-Agent": "Mozilla/5.0"})
+            array = np.asarray(bytearray(response.content), dtype="uint8")
+            image = cv2.imdecode(array, -1)  # Loads the image as BGR.
+        else:
+            image = cv2.imread(path)
+        return image
+    
+    
+    def convert_result_to_image(result) -> np.ndarray:
+        """
+        Convert network result of floating point numbers to image with integer
+        values from 0-255. Values outside this range are clipped to 0 and 255.
+    
+        :param result: a single superresolution network result in N,C,H,W shape
+        """
+        result = result.squeeze(0).transpose(1, 2, 0)
+        result *= 255
+        result[result < 0] = 0
+        result[result > 255] = 255
+        result = result.astype(np.uint8)
+        return result
+    
+    
+    def to_rgb(image_data) -> np.ndarray:
+        """
+        Convert image_data from BGR to RGB
+        """
+        return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
+
+Load the Superresolution Model
+------------------------------
+
+The Super Resolution model expects two inputs: the input image and a
+bicubic interpolation of the input image to the target size of
+1920x1080. It returns the super resolution version of the image in
+1920x1800 (for the default superresolution model (1032)).
+
+Load the model in OpenVINO Runtime with ``ie.read_model``, compile it
+for the specified device with ``ie.compile_model``, and get information
+about the network inputs and outputs.
+
+.. code:: ipython3
+
+    ie = Core()
+    model = ie.read_model(model=model_xml_path)
+    compiled_model = ie.compile_model(model=model, device_name=DEVICE)
+    
+    # Network inputs and outputs are dictionaries. Get the keys for the
+    # dictionaries.
+    original_image_key, bicubic_image_key = compiled_model.inputs
+    output_key = compiled_model.output(0)
+    
+    # Get the expected input and target shape. The `.dims[2:]` returns the height
+    # and width. The `resize` function of OpenCV expects the shape as (width, height),
+    # so reverse the shape with `[::-1]` and convert it to a tuple.
+    input_height, input_width = list(original_image_key.shape)[2:]
+    target_height, target_width = list(bicubic_image_key.shape)[2:]
+    
+    upsample_factor = int(target_height / input_height)
+    
+    print(f"The network expects inputs with a width of {input_width}, " f"height of {input_height}")
+    print(f"The network returns images with a width of {target_width}, " f"height of {target_height}")
+    
+    print(
+        f"The image sides are upsampled by a factor of {upsample_factor}. "
+        f"The new image is {upsample_factor**2} times as large as the "
+        "original image"
+    )
+
+
+.. parsed-literal::
+
+    The network expects inputs with a width of 480, height of 270
+    The network returns images with a width of 1920, height of 1080
+    The image sides are upsampled by a factor of 4. The new image is 16 times as large as the original image
+
+
+Load and Show the Input Image
+-----------------------------
+
+   **NOTE**: For the best results, use raw images (like TIFF, BMP or
+   PNG). Compressed images (like JPEG) may appear distorted after
+   processing with the super resolution model.
+
+.. code:: ipython3
+
+    IMAGE_PATH = Path("../data/image/tower.jpg")
+    OUTPUT_PATH = Path("output/")
+    
+    os.makedirs(str(OUTPUT_PATH), exist_ok=True)
+    full_image = load_image(str(IMAGE_PATH))
+    
+    # Uncomment these lines to load a raw image as BGR.
+    # import rawpy
+    # with rawpy.imread(IMAGE_PATH) as raw:
+    #     full_image = raw.postprocess()[:,:,(2,1,0)]
+    
+    plt.imshow(to_rgb(full_image))
+    print(f"Showing full image with width {full_image.shape[1]} " f"and height {full_image.shape[0]}")
+
+
+.. parsed-literal::
+
+    Showing full image with width 5976 and height 3770
+
+
+
+.. image:: 202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_10_1.png
+
+
+Superresolution on a Crop of the Image
+--------------------------------------
+
+Crop the Input Image once.
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Crop the network input size. Give the X (width) and Y (height)
+coordinates for the top left corner of the crop. Set the ``CROP_FACTOR``
+variable to 2 to make a crop that is larger than the network input size
+(this only works with the ``single-image-super-resolution-1032`` model).
+The crop will be downsampled before propagating to the network. This is
+useful for very high resolution images, where a crop of the network
+input size is too small to show enough information. It can also improve
+the result. Keep in mind that with a ``CROP_FACTOR`` or 2 the net
+upsampling factor will be halved. If the superresolution network
+increases the side lengths of the image by a factor of 4, it upsamples a
+480x270 crop to 1920x1080. With a ``CROP_FACTOR`` of 2, a 960x540 crop
+is upsampled to the same 1920x1080: the side lengths are twice as large
+as the crop size.
+
+.. code:: ipython3
+
+    # Set `CROP_FACTOR` to 2 to crop with twice the input width and height
+    # This only works with the 1032 (4x) superresolution model!
+    # Set it to 1 to crop the image with the exact input size.
+    CROP_FACTOR = 2
+    adjusted_upsample_factor = upsample_factor // CROP_FACTOR
+    
+    image_id = "flag"  # A tag to recognize the saved images.
+    starty = 3200
+    startx = 0
+    
+    # Perform the crop.
+    image_crop = full_image[
+        starty : starty + input_height * CROP_FACTOR,
+        startx : startx + input_width * CROP_FACTOR,
+    ]
+    
+    # Show the cropped image.
+    print(f"Showing image crop with width {image_crop.shape[1]} and " f"height {image_crop.shape[0]}.")
+    plt.imshow(to_rgb(image_crop));
+
+
+.. parsed-literal::
+
+    Showing image crop with width 960 and height 540.
+
+
+
+.. image:: 202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_12_1.png
+
+
+Reshape/Resize Crop for Model Input
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The input image is resized to a network input size, and reshaped to
+(N,C,H,W) (N=number of images, C=number of channels, H=height, W=width).
+The image is also resized to the network output size, with bicubic
+interpolation. This bicubic image is the second input to the network.
+
+.. code:: ipython3
+
+    # Resize the image to the target shape with bicubic interpolation.
+    bicubic_image = cv2.resize(
+        src=image_crop, dsize=(target_width, target_height), interpolation=cv2.INTER_CUBIC
+    )
+    
+    # If required, resize the image to the input image shape.
+    if CROP_FACTOR > 1:
+        image_crop = cv2.resize(src=image_crop, dsize=(input_width, input_height))
+    
+    # Reshape the images from (H,W,C) to (N,C,H,W).
+    input_image_original = np.expand_dims(image_crop.transpose(2, 0, 1), axis=0)
+    input_image_bicubic = np.expand_dims(bicubic_image.transpose(2, 0, 1), axis=0)
+
+Do Inference
+~~~~~~~~~~~~
+
+Do inference and convert the inference result to an ``RGB`` image.
+
+.. code:: ipython3
+
+    result = compiled_model(
+        {
+            original_image_key.any_name: input_image_original,
+            bicubic_image_key.any_name: input_image_bicubic,
+        }
+    )[output_key]
+    
+    # Get inference result as numpy array and reshape to image shape and data type
+    result_image = convert_result_to_image(result)
+
+Show and Save Results
+~~~~~~~~~~~~~~~~~~~~~
+
+Show the bicubic image and the enhanced superresolution image.
+
+.. code:: ipython3
+
+    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(30, 15))
+    ax[0].imshow(to_rgb(bicubic_image))
+    ax[1].imshow(to_rgb(result_image))
+    ax[0].set_title("Bicubic")
+    ax[1].set_title("Superresolution")
+
+
+
+
+.. parsed-literal::
+
+    Text(0.5, 1.0, 'Superresolution')
+
+
+
+
+.. image:: 202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_18_1.png
+
+
+Save Superresolution and Bicubic Image Crop
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code:: ipython3
+
+    # Add a text with "SUPER" or "BICUBIC" to the superresolution or bicubic image.
+    image_super = write_text_on_image(image=result_image, text="SUPER")
+    image_bicubic = write_text_on_image(image=bicubic_image, text="BICUBIC")
+    
+    # Store the image and the results.
+    crop_image_path = Path(f"{OUTPUT_PATH.stem}/{image_id}_{adjusted_upsample_factor}x_crop.png")
+    superres_image_path = Path(
+        f"{OUTPUT_PATH.stem}/{image_id}_{adjusted_upsample_factor}x_crop_superres.png"
+    )
+    bicubic_image_path = Path(
+        f"{OUTPUT_PATH.stem}/{image_id}_{adjusted_upsample_factor}x_crop_bicubic.png"
+    )
+    cv2.imwrite(filename=str(crop_image_path), img=image_crop, params=[cv2.IMWRITE_PNG_COMPRESSION, 0])
+    cv2.imwrite(
+        filename=str(superres_image_path), img=image_super, params=[cv2.IMWRITE_PNG_COMPRESSION, 0]
+    )
+    cv2.imwrite(
+        filename=str(bicubic_image_path), img=image_bicubic, params=[cv2.IMWRITE_PNG_COMPRESSION, 0]
+    )
+    print(f"Images written to directory: {OUTPUT_PATH}")
+
+
+.. parsed-literal::
+
+    Images written to directory: output
+
+
+Write Animated GIF with Bicubic/Superresolution Comparison
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code:: ipython3
+
+    print(image_bicubic.shape)
+    print(image_super.shape)
+    
+    result_pil = Image.fromarray(to_rgb(image_super))
+    bicubic_pil = Image.fromarray(to_rgb(image_bicubic))
+    gif_image_path = Path(f"{OUTPUT_PATH.stem}/{image_id}_comparison_{adjusted_upsample_factor}x.gif")
+    
+    result_pil.save(
+        fp=str(gif_image_path),
+        format="GIF",
+        append_images=[bicubic_pil],
+        save_all=True,
+        duration=1000,
+        loop=0,
+    )
+    
+    # The `DisplayImage(str(gif_image_path))` function does not work in Colab.
+    DisplayImage(data=open(gif_image_path, "rb").read(), width=1920 // 2)
+
+
+.. parsed-literal::
+
+    (1080, 1920, 3)
+    (1080, 1920, 3)
+
+
+
+
+.. image:: 202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_22_1.png
+   :width: 960px
+
+
+
+Create a Video with Sliding Bicubic/Superresolution Comparison
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This may take a while. For the video, the superresolution and bicubic
+image are resized by a factor of 2 to improve processing speed. This
+gives an indication of the superresolution effect. The video is saved as
+an ``.avi`` file. You can click on the link to download the video, or
+open it directly from the images directory, and play it locally.
+
+.. code:: ipython3
+
+    FOURCC = cv2.VideoWriter_fourcc(*"MJPG")
+    
+    result_video_path = Path(
+        f"{OUTPUT_PATH.stem}/{image_id}_crop_comparison_{adjusted_upsample_factor}x.avi"
+    )
+    video_target_height, video_target_width = (
+        result_image.shape[0] // 2,
+        result_image.shape[1] // 2,
+    )
+    
+    out_video = cv2.VideoWriter(
+        filename=str(result_video_path),
+        fourcc=FOURCC,
+        fps=90,
+        frameSize=(video_target_width, video_target_height),
+    )
+    
+    resized_result_image = cv2.resize(src=result_image, dsize=(video_target_width, video_target_height))
+    resized_bicubic_image = cv2.resize(
+        src=bicubic_image, dsize=(video_target_width, video_target_height)
+    )
+    
+    progress_bar = ProgressBar(total=video_target_width)
+    progress_bar.display()
+    
+    for i in range(video_target_width):
+        # Create a frame where the left part (until i pixels width) contains the
+        # superresolution image, and the right part (from i pixels width) contains
+        # the bicubic image.
+        comparison_frame = np.hstack(
+            (
+                resized_result_image[:, :i, :],
+                resized_bicubic_image[:, i:, :],
+            )
+        )
+        # Create a small black border line between the superresolution
+        # and bicubic part of the image.
+        comparison_frame[:, i - 1 : i + 1, :] = 0
+        out_video.write(image=comparison_frame)
+        progress_bar.progress = i
+        progress_bar.update()
+    out_video.release()
+    clear_output()
+    
+    video_link = FileLink(result_video_path)
+    video_link.html_link_str = "<a href='%s' download>%s</a>"
+    display(HTML(f"The video has been saved to {video_link._repr_html_()}"))
+
+
+
+.. raw:: html
+
+    The video has been saved to output/flag_crop_comparison_2x.avi<br>
+
+
+Superresolution on full input image
+-----------------------------------
+
+Superresolution on the full image is done by dividing the image into
+patches of equal size, doing superresolution on each path, and then
+stitching the resulting patches together again. For this demo, patches
+near the border of the image are ignored.
+
+Adjust the ``CROPLINES`` setting in the next cell if you see boundary
+effects.
+
+Compute patches
+~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    # Set the number of lines to crop from the network result to prevent
+    # boundary effects. The value of `CROPLINES` should be an integer >= 1.
+    CROPLINES = 10
+    # See Superresolution on one crop of the image for description of `CROP_FACTOR`.
+    CROP_FACTOR = 2
+    
+    full_image_height, full_image_width = full_image.shape[:2]
+    
+    # Compute x and y coordinates of left top of image tiles.
+    x_coords = list(range(0, full_image_width, input_width * CROP_FACTOR - CROPLINES * 2))
+    while full_image_width - x_coords[-1] < input_width * CROP_FACTOR:
+        x_coords.pop(-1)
+    y_coords = list(range(0, full_image_height, input_height * CROP_FACTOR - CROPLINES * 2))
+    while full_image_height - y_coords[-1] < input_height * CROP_FACTOR:
+        y_coords.pop(-1)
+    
+    # Compute the width and height to crop the full image. The full image is
+    # cropped at the border to tiles of the input size.
+    crop_width = x_coords[-1] + input_width * CROP_FACTOR
+    crop_height = y_coords[-1] + input_height * CROP_FACTOR
+    
+    # Compute the width and height of the target superresolution image.
+    new_width = (
+        x_coords[-1] * (upsample_factor // CROP_FACTOR)
+        + target_width
+        - CROPLINES * 2 * (upsample_factor // CROP_FACTOR)
+    )
+    new_height = (
+        y_coords[-1] * (upsample_factor // CROP_FACTOR)
+        + target_height
+        - CROPLINES * 2 * (upsample_factor // CROP_FACTOR)
+    )
+    print(f"The output image will have a width of {new_width} " f"and a height of {new_height}")
+
+
+.. parsed-literal::
+
+    The output image will have a width of 11280 and a height of 7280
+
+
+Do Inference
+~~~~~~~~~~~~
+
+The code below reads one patch of the image at a time. Each patch is
+reshaped to the network input shape and upsampled with bicubic
+interpolation to the target shape. Both the original and the bicubic
+images are propagated through the network. The network result is a numpy
+array with floating point values, with a shape of ``(1,3,1920,1080)``.
+This array is converted to an 8-bit image with the ``(1080,1920,3)``
+shape and written to a ``full_superresolution_image``. The bicubic image
+is written to a ``full_bicubic_image`` for comparison. A progress bar
+shows the progress of the process. Inference time is measured, as well
+as total time to process each patch.
+
+.. code:: ipython3
+
+    start_time = time.perf_counter()
+    patch_nr = 0
+    num_patches = len(x_coords) * len(y_coords)
+    progress_bar = ProgressBar(total=num_patches)
+    progress_bar.display()
+    
+    # Crop image to fit tiles of the input size.
+    full_image_crop = full_image.copy()[:crop_height, :crop_width, :]
+    
+    # Create an empty array of the target size.
+    full_superresolution_image = np.empty((new_height, new_width, 3), dtype=np.uint8)
+    
+    # Create a bicubic upsampled image of the target size for comparison.
+    full_bicubic_image = cv2.resize(
+        src=full_image_crop[CROPLINES:-CROPLINES, CROPLINES:-CROPLINES, :],
+        dsize=(new_width, new_height),
+        interpolation=cv2.INTER_CUBIC,
+    )
+    
+    total_inference_duration = 0
+    for y in y_coords:
+        for x in x_coords:
+            patch_nr += 1
+    
+            # Crop the input image.
+            image_crop = full_image_crop[
+                y : y + input_height * CROP_FACTOR,
+                x : x + input_width * CROP_FACTOR,
+            ]
+    
+            # Resize the images to the target shape with bicubic interpolation
+            bicubic_image = cv2.resize(
+                src=image_crop,
+                dsize=(target_width, target_height),
+                interpolation=cv2.INTER_CUBIC,
+            )
+    
+            if CROP_FACTOR > 1:
+                image_crop = cv2.resize(src=image_crop, dsize=(input_width, input_height))
+    
+            input_image_original = np.expand_dims(image_crop.transpose(2, 0, 1), axis=0)
+    
+            input_image_bicubic = np.expand_dims(bicubic_image.transpose(2, 0, 1), axis=0)
+    
+            # Do inference.
+            inference_start_time = time.perf_counter()
+    
+            result = compiled_model(
+                {
+                    original_image_key.any_name: input_image_original,
+                    bicubic_image_key.any_name: input_image_bicubic,
+                }
+            )[output_key]
+    
+            inference_stop_time = time.perf_counter()
+            inference_duration = inference_stop_time - inference_start_time
+            total_inference_duration += inference_duration
+    
+            # Reshape an inference result to the image shape and the data type.
+            result_image = convert_result_to_image(result)
+    
+            # Add the inference result of this patch to the full superresolution
+            # image.
+            adjusted_upsample_factor = upsample_factor // CROP_FACTOR
+            new_y = y * adjusted_upsample_factor
+            new_x = x * adjusted_upsample_factor
+            full_superresolution_image[
+                new_y : new_y + target_height - CROPLINES * adjusted_upsample_factor * 2,
+                new_x : new_x + target_width - CROPLINES * adjusted_upsample_factor * 2,
+            ] = result_image[
+                CROPLINES * adjusted_upsample_factor : -CROPLINES * adjusted_upsample_factor,
+                CROPLINES * adjusted_upsample_factor : -CROPLINES * adjusted_upsample_factor,
+                :,
+            ]
+    
+            progress_bar.progress = patch_nr
+            progress_bar.update()
+    
+            if patch_nr % 10 == 0:
+                clear_output(wait=True)
+                progress_bar.display()
+                display(
+                    Pretty(
+                        f"Processed patch {patch_nr}/{num_patches}. "
+                        f"Inference time: {inference_duration:.2f} seconds "
+                        f"({1/inference_duration:.2f} FPS)"
+                    )
+                )
+    
+    end_time = time.perf_counter()
+    duration = end_time - start_time
+    clear_output(wait=True)
+    print(
+        f"Processed {num_patches} patches in {duration:.2f} seconds. "
+        f"Total patches per second (including processing): "
+        f"{num_patches/duration:.2f}.\nInference patches per second: "
+        f"{num_patches/total_inference_duration:.2f} "
+    )
+
+
+.. parsed-literal::
+
+    Processed 42 patches in 4.63 seconds. Total patches per second (including processing): 9.08.
+    Inference patches per second: 20.79 
+
+
+Save superresolution image and the bicubic image
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    full_superresolution_image_path = Path(
+        f"{OUTPUT_PATH.stem}/full_superres_{adjusted_upsample_factor}x.jpg"
+    )
+    full_bicubic_image_path = Path(f"{OUTPUT_PATH.stem}/full_bicubic_{adjusted_upsample_factor}x.jpg")
+    
+    cv2.imwrite(str(full_superresolution_image_path), full_superresolution_image)
+    cv2.imwrite(str(full_bicubic_image_path), full_bicubic_image);
+
+.. code:: ipython3
+
+    bicubic_link = FileLink(full_bicubic_image_path)
+    image_link = FileLink(full_superresolution_image_path)
+    bicubic_link.html_link_str = "<a href='%s' download>%s</a>"
+    image_link.html_link_str = "<a href='%s' download>%s</a>"
+    display(
+        HTML(
+            "The images are saved in the images directory. You can also download "
+            "them by clicking on these links:"
+            f"<ul><li>{image_link._repr_html_()}<li>{bicubic_link._repr_html_()}"
+        )
+    )
+
+
+
+.. raw:: html
+
+    The images are saved in the images directory. You can also download them by clicking on these links:<ul><li>output/full_bicubic_2x.jpg<br>
+
--- a/docs/notebooks/202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_10_1.png
+++ b/docs/notebooks/202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_10_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5c1332401a4fe7d1d22a5cdd4eed046ff8cb16e87d465b65b77600e402450dd
+size 272963
--- a/docs/notebooks/202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_12_1.png
+++ b/docs/notebooks/202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_12_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdeb3cb86e2423a803844582cf965e5bc777eb8c288170ce117b664682e63147
+size 356735
--- a/docs/notebooks/202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_18_1.png
+++ b/docs/notebooks/202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_18_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:463999d9642365bff6e17a7bea49339b4cd2cd629a2782cf9cf4c298ae9db2b2
+size 2896276
--- a/docs/notebooks/202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_22_1.png
+++ b/docs/notebooks/202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_22_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:339272b851882ca236dc1d735637eb3dd2dda810c6dd2c71cfc935872287a144
+size 3207711
--- a/docs/notebooks/202-vision-superresolution-image-with-output_files/index.html
+++ b/docs/notebooks/202-vision-superresolution-image-with-output_files/index.html
@ -0,0 +1,10 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/202-vision-superresolution-image-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/202-vision-superresolution-image-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="202-vision-superresolution-image-with-output_10_1.png">202-vision-superresolution-image-with-output_10..&gt;</a> 30-May-2023 00:09              272963
+<a href="202-vision-superresolution-image-with-output_12_1.png">202-vision-superresolution-image-with-output_12..&gt;</a> 30-May-2023 00:08              356735
+<a href="202-vision-superresolution-image-with-output_18_1.png">202-vision-superresolution-image-with-output_18..&gt;</a> 30-May-2023 00:09             2896276
+<a href="202-vision-superresolution-image-with-output_22_1.png">202-vision-superresolution-image-with-output_22..&gt;</a> 30-May-2023 00:09             3207711
+</pre><hr></body>
+</html>
--- a/docs/notebooks/202-vision-superresolution-video-with-output.rst
+++ b/docs/notebooks/202-vision-superresolution-video-with-output.rst
--- a/docs/notebooks/203-meter-reader-with-output.rst
+++ b/docs/notebooks/203-meter-reader-with-output.rst
@ -0,0 +1,721 @@
+Industrial Meter Reader
+=======================
+
+This notebook shows how to create a industrial meter reader with
+OpenVINO Runtime. We use the pre-trained
+`PPYOLOv2 <https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyolo>`__
+PaddlePaddle model and
+`DeepLabV3P <https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.5/configs/deeplabv3p>`__
+to build up a multiple inference task pipeline:
+
+1. Run detection model to find the meters, and crop them from the origin
+   photo.
+2. Run segmentation model on these cropped meters to get the pointer and
+   scale instance.
+3. Find the location of the pointer in scale map.
+
+.. figure:: https://user-images.githubusercontent.com/91237924/166137115-67284fa5-f703-4468-98f4-c43d2c584763.png
+   :alt: workflow
+
+   workflow
+
+Import
+------
+
+.. code:: ipython3
+
+    import os
+    import sys
+    from pathlib import Path
+    import numpy as np
+    import math
+    import cv2
+    import tarfile
+    import matplotlib.pyplot as plt
+    import openvino.runtime as ov
+    
+    sys.path.append("../utils")
+    from notebook_utils import download_file, segmentation_map_to_image
+
+Prepare the Model and Test Image
+--------------------------------
+
+Download PPYolov2 and DeepLabV3P pre-trained models from PaddlePaddle
+community.
+
+.. code:: ipython3
+
+    MODEL_DIR = "model"
+    DATA_DIR = "data"
+    DET_MODEL_LINK = "https://bj.bcebos.com/paddlex/examples2/meter_reader/meter_det_model.tar.gz"
+    SEG_MODEL_LINK = "https://bj.bcebos.com/paddlex/examples2/meter_reader/meter_seg_model.tar.gz"
+    DET_FILE_NAME = DET_MODEL_LINK.split("/")[-1]
+    SEG_FILE_NAME = SEG_MODEL_LINK.split("/")[-1]
+    IMG_LINK = "https://user-images.githubusercontent.com/91237924/170696219-f68699c6-1e82-46bf-aaed-8e2fc3fa5f7b.jpg"
+    IMG_FILE_NAME = IMG_LINK.split("/")[-1]
+    IMG_PATH = Path(f"{DATA_DIR}/{IMG_FILE_NAME}")
+    
+    os.makedirs(MODEL_DIR, exist_ok=True)
+    
+    download_file(DET_MODEL_LINK, directory=MODEL_DIR, show_progress=True)
+    file = tarfile.open(f"model/{DET_FILE_NAME}")
+    res = file.extractall("model")
+    if not res:
+        print(f"Detection Model Extracted to \"./{MODEL_DIR}\".")
+    else:
+        print("Error Extracting the Detection model. Please check the network.")
+    
+    download_file(SEG_MODEL_LINK, directory=MODEL_DIR, show_progress=True)
+    file = tarfile.open(f"model/{SEG_FILE_NAME}")
+    res = file.extractall("model")
+    if not res:
+        print(f"Segmentation Model Extracted to \"./{MODEL_DIR}\".")
+    else:
+        print("Error Extracting the Segmentation model. Please check the network.")
+    
+    download_file(IMG_LINK, directory=DATA_DIR, show_progress=True)
+    if IMG_PATH.is_file():
+        print(f"Test Image Saved to \"./{DATA_DIR}\".")
+    else:
+        print("Error Downloading the Test Image. Please check the network.")
+
+
+
+.. parsed-literal::
+
+    model/meter_det_model.tar.gz:   0%|          | 0.00/192M [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    Detection Model Extracted to "./model".
+
+
+
+.. parsed-literal::
+
+    model/meter_seg_model.tar.gz:   0%|          | 0.00/94.9M [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    Segmentation Model Extracted to "./model".
+
+
+
+.. parsed-literal::
+
+    data/170696219-f68699c6-1e82-46bf-aaed-8e2fc3fa5f7b.jpg:   0%|          | 0.00/183k [00:00<?, ?B/s]
+
+
+.. parsed-literal::
+
+    Test Image Saved to "./data".
+
+
+Configuration
+-------------
+
+Add parameter configuration for reading calculation.
+
+.. code:: ipython3
+
+    METER_SHAPE = [512, 512] 
+    CIRCLE_CENTER = [256, 256] 
+    CIRCLE_RADIUS = 250
+    PI = math.pi
+    RECTANGLE_HEIGHT = 120
+    RECTANGLE_WIDTH = 1570
+    TYPE_THRESHOLD = 40
+    COLORMAP = np.array([[28, 28, 28], [238, 44, 44], [250, 250, 250]])
+    
+    # There are 2 types of meters in test image datasets
+    METER_CONFIG = [{
+        'scale_interval_value': 25.0 / 50.0,
+        'range': 25.0,
+        'unit': "(MPa)"
+    }, {
+        'scale_interval_value': 1.6 / 32.0,
+        'range': 1.6,
+        'unit': "(MPa)"
+    }]
+    
+    SEG_LABEL = {'background': 0, 'pointer': 1, 'scale': 2}
+
+Load the Models
+---------------
+
+Define a common class for model loading and inference
+
+.. code:: ipython3
+
+    # Initialize OpenVINO Runtime
+    ie_core = ov.Core()
+    
+    
+    class Model:
+        """
+        This class represents a OpenVINO model object.
+    
+        """
+        def __init__(self, model_path, new_shape):
+            """
+            Initialize the model object
+            
+            Param: 
+                model_path (string): path of inference model
+                new_shape (dict): new shape of model input
+    
+            """
+            self.model = ie_core.read_model(model=model_path)
+            self.model.reshape(new_shape)
+            self.compiled_model = ie_core.compile_model(model=self.model, device_name="CPU")
+            self.output_layer = self.compiled_model.output(0)
+    
+        def predict(self, input_image):
+            """
+            Run inference
+            
+            Param: 
+                input_image (np.array): input data
+                
+            Retuns:
+                result (np.array)): model output data
+            """
+            result = self.compiled_model(input_image)[self.output_layer]
+            return result
+
+Data Process
+------------
+
+Including the preprocessing and postprocessing tasks of each model.
+
+.. code:: ipython3
+
+    def det_preprocess(input_image, target_size):
+        """
+        Preprocessing the input data for detection task
+    
+        Param: 
+            input_image (np.array): input data
+            size (int): the image size required by model input layer
+        Retuns:
+            img.astype (np.array): preprocessed image
+        
+        """
+        img = cv2.resize(input_image, (target_size, target_size))
+        img = np.transpose(img, [2, 0, 1]) / 255
+        img = np.expand_dims(img, 0)
+        img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
+        img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
+        img -= img_mean
+        img /= img_std
+        return img.astype(np.float32)
+    
+    
+    def filter_bboxes(det_results, score_threshold):
+        """
+        Filter out the detection results with low confidence
+    
+        Param：
+            det_results (list[dict]): detection results
+            score_threshold (float)： confidence threshold
+    
+        Retuns：
+            filtered_results (list[dict]): filter detection results
+        
+        """
+        filtered_results = []
+        for i in range(len(det_results)):
+            if det_results[i, 1] > score_threshold:
+                filtered_results.append(det_results[i])
+        return filtered_results
+    
+    
+    def roi_crop(image, results, scale_x, scale_y):
+        """
+        Crop the area of detected meter of original image
+    
+        Param：
+            img (np.array)：original image。
+            det_results (list[dict]): detection results
+            scale_x (float): the scale value in x axis
+            scale_y (float): the scale value in y axis
+    
+        Retuns：
+            roi_imgs (list[np.array]): the list of meter images
+            loc (list[int]): the list of meter locations
+        
+        """
+        roi_imgs = []
+        loc = []
+        for result in results:
+            bbox = result[2:]
+            xmin, ymin, xmax, ymax = [int(bbox[0] * scale_x), int(bbox[1] * scale_y), int(bbox[2] * scale_x), int(bbox[3] * scale_y)]
+            sub_img = image[ymin:(ymax + 1), xmin:(xmax + 1), :]
+            roi_imgs.append(sub_img)
+            loc.append([xmin, ymin, xmax, ymax])
+        return roi_imgs, loc
+    
+    
+    def roi_process(input_images, target_size, interp=cv2.INTER_LINEAR):
+        """
+        Prepare the roi image of detection results data
+        Preprocessing the input data for segmentation task
+    
+        Param：
+            input_images (list[np.array])：the list of meter images
+            target_size (list|tuple)： height and width of resized image， e.g [heigh,width]
+            interp (int)：the interp method for image reszing
+    
+        Retuns：
+            img_list (list[np.array])：the list of processed images
+            resize_img (list[np.array]): for visualization
+        
+        """
+        img_list = list()
+        resize_list = list()
+        for img in input_images:
+            img_shape = img.shape
+            scale_x = float(target_size[1]) / float(img_shape[1])
+            scale_y = float(target_size[0]) / float(img_shape[0])
+            resize_img = cv2.resize(img, None, None, fx=scale_x, fy=scale_y, interpolation=interp)
+            resize_list.append(resize_img)
+            resize_img = resize_img.transpose(2, 0, 1) / 255
+            img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1))
+            img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1))
+            resize_img -= img_mean
+            resize_img /= img_std
+            img_list.append(resize_img)
+        return img_list, resize_list
+    
+    
+    def erode(seg_results, erode_kernel):
+        """
+        Erode the segmentation result to get the more clear instance of pointer and scale
+    
+        Param：
+            seg_results (list[dict])：segmentation results
+            erode_kernel (int): size of erode_kernel
+    
+        Return：
+            eroded_results (list[dict])： the lab map of eroded_results
+            
+        """
+        kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
+        eroded_results = seg_results
+        for i in range(len(seg_results)):
+            eroded_results[i] = cv2.erode(seg_results[i].astype(np.uint8), kernel)
+        return eroded_results
+    
+    
+    def circle_to_rectangle(seg_results):
+        """
+        Switch the shape of label_map from circle to rectangle
+    
+        Param：
+            seg_results (list[dict])：segmentation results
+    
+        Return：
+            rectangle_meters (list[np.array])：the rectangle of label map
+    
+        """
+        rectangle_meters = list()
+        for i, seg_result in enumerate(seg_results):
+            label_map = seg_result
+    
+            # The size of rectangle_meter is determined by RECTANGLE_HEIGHT and RECTANGLE_WIDTH
+            rectangle_meter = np.zeros((RECTANGLE_HEIGHT, RECTANGLE_WIDTH), dtype=np.uint8)
+            for row in range(RECTANGLE_HEIGHT):
+                for col in range(RECTANGLE_WIDTH):
+                    theta = PI * 2 * (col + 1) / RECTANGLE_WIDTH
+                    
+                    # The radius of meter circle will be mapped to the height of rectangle image
+                    rho = CIRCLE_RADIUS - row - 1
+                    y = int(CIRCLE_CENTER[0] + rho * math.cos(theta) + 0.5)
+                    x = int(CIRCLE_CENTER[1] - rho * math.sin(theta) + 0.5)
+                    rectangle_meter[row, col] = label_map[y, x]
+            rectangle_meters.append(rectangle_meter)
+        return rectangle_meters
+    
+    
+    def rectangle_to_line(rectangle_meters):
+        """
+        Switch the dimension of rectangle label map from 2D to 1D
+    
+        Param：
+            rectangle_meters (list[np.array])：2D rectangle OF label_map。
+    
+        Return：
+            line_scales (list[np.array])： the list of scales value
+            line_pointers (list[np.array])：the list of pointers value
+    
+        """
+        line_scales = list()
+        line_pointers = list()
+        for rectangle_meter in rectangle_meters:
+            height, width = rectangle_meter.shape[0:2]
+            line_scale = np.zeros((width), dtype=np.uint8)
+            line_pointer = np.zeros((width), dtype=np.uint8)
+            for col in range(width):
+                for row in range(height):
+                    if rectangle_meter[row, col] == SEG_LABEL['pointer']:
+                        line_pointer[col] += 1
+                    elif rectangle_meter[row, col] == SEG_LABEL['scale']:
+                        line_scale[col] += 1
+            line_scales.append(line_scale)
+            line_pointers.append(line_pointer)
+        return line_scales, line_pointers
+    
+    
+    def mean_binarization(data_list):
+        """
+        Binarize the data
+    
+        Param：
+            data_list (list[np.array])：input data
+    
+        Return：
+            binaried_data_list (list[np.array])：output data。
+    
+        """
+        batch_size = len(data_list)
+        binaried_data_list = data_list
+        for i in range(batch_size):
+            mean_data = np.mean(data_list[i])
+            width = data_list[i].shape[0]
+            for col in range(width):
+                if data_list[i][col] < mean_data:
+                    binaried_data_list[i][col] = 0
+                else:
+                    binaried_data_list[i][col] = 1
+        return binaried_data_list
+    
+    
+    def locate_scale(line_scales):
+        """
+        Find location of center of each scale
+    
+        Param：
+            line_scales (list[np.array])：the list of binaried scales value
+    
+        Return：
+            scale_locations (list[list])：location of each scale
+    
+        """
+        batch_size = len(line_scales)
+        scale_locations = list()
+        for i in range(batch_size):
+            line_scale = line_scales[i]
+            width = line_scale.shape[0]
+            find_start = False
+            one_scale_start = 0
+            one_scale_end = 0
+            locations = list()
+            for j in range(width - 1):
+                if line_scale[j] > 0 and line_scale[j + 1] > 0:
+                    if not find_start:
+                        one_scale_start = j
+                        find_start = True
+                if find_start:
+                    if line_scale[j] == 0 and line_scale[j + 1] == 0:
+                        one_scale_end = j - 1
+                        one_scale_location = (one_scale_start + one_scale_end) / 2
+                        locations.append(one_scale_location)
+                        one_scale_start = 0
+                        one_scale_end = 0
+                        find_start = False
+            scale_locations.append(locations)
+        return scale_locations
+    
+    
+    def locate_pointer(line_pointers):
+        """
+        Find location of center of pointer
+    
+        Param：
+            line_scales (list[np.array])：the list of binaried pointer value
+    
+        Return：
+            scale_locations (list[list])：location of pointer
+    
+        """
+        batch_size = len(line_pointers)
+        pointer_locations = list()
+        for i in range(batch_size):
+            line_pointer = line_pointers[i]
+            find_start = False
+            pointer_start = 0
+            pointer_end = 0
+            location = 0
+            width = line_pointer.shape[0]
+            for j in range(width - 1):
+                if line_pointer[j] > 0 and line_pointer[j + 1] > 0:
+                    if not find_start:
+                        pointer_start = j
+                        find_start = True
+                if find_start:
+                    if line_pointer[j] == 0 and line_pointer[j + 1] == 0 :
+                        pointer_end = j - 1
+                        location = (pointer_start + pointer_end) / 2
+                        find_start = False
+                        break
+            pointer_locations.append(location)
+        return pointer_locations
+    
+    
+    def get_relative_location(scale_locations, pointer_locations):
+        """
+        Match location of pointer and scales
+    
+        Param：
+            scale_locations (list[list])：location of each scale
+            pointer_locations (list[list])：location of pointer
+    
+        Return：
+            pointed_scales (list[dict])： a list of dict with:
+                                         'num_scales': total number of scales
+                                         'pointed_scale': predicted number of scales
+                
+        """
+        pointed_scales = list()
+        for scale_location, pointer_location in zip(scale_locations,
+                                                    pointer_locations):
+            num_scales = len(scale_location)
+            pointed_scale = -1
+            if num_scales > 0:
+                for i in range(num_scales - 1):
+                    if scale_location[i] <= pointer_location < scale_location[i + 1]:
+                        pointed_scale = i + (pointer_location - scale_location[i]) / (scale_location[i + 1] - scale_location[i] + 1e-05) + 1
+            result = {'num_scales': num_scales, 'pointed_scale': pointed_scale}
+            pointed_scales.append(result)
+        return pointed_scales
+    
+    
+    def calculate_reading(pointed_scales):
+        """
+        Calculate the value of meter according to the type of meter
+    
+        Param：
+            pointed_scales (list[list])：predicted number of scales
+    
+        Return：
+            readings (list[float])： the list of values read from meter
+                
+        """
+        readings = list()
+        batch_size = len(pointed_scales)
+        for i in range(batch_size):
+            pointed_scale = pointed_scales[i]
+            # find the type of meter according the total number of scales
+            if pointed_scale['num_scales'] > TYPE_THRESHOLD:
+                reading = pointed_scale['pointed_scale'] * METER_CONFIG[0]['scale_interval_value']
+            else:
+                reading = pointed_scale['pointed_scale'] * METER_CONFIG[1]['scale_interval_value']
+            readings.append(reading)
+        return readings
+
+Main Function
+-------------
+
+Initialize the model and parameters.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The number of detected meter from detection network can be arbitrary in
+some scenarios, which means the batch size of segmentation network input
+is a `dynamic
+dimension <https://docs.openvino.ai/latest/openvino_docs_OV_UG_DynamicShapes.html>`__,
+and it should be specified as ``-1`` or the ``ov::Dimension()`` instead
+of a positive number used for static dimensions. In this case, for
+memory consumption optimization, we can specify the lower and/or upper
+bounds of input batch size.
+
+.. code:: ipython3
+
+    img_file = f"{DATA_DIR}/{IMG_FILE_NAME}"
+    det_model_path = f"{MODEL_DIR}/meter_det_model/model.pdmodel"
+    det_model_shape = {'image': [1, 3, 608, 608], 'im_shape': [1, 2], 'scale_factor': [1, 2]}
+    seg_model_path = f"{MODEL_DIR}/meter_seg_model/model.pdmodel"
+    seg_model_shape = {'image': [ov.Dimension(1, 2), 3, 512, 512]}
+    
+    erode_kernel = 4
+    score_threshold = 0.5
+    seg_batch_size = 2
+    input_shape = 608
+    
+    # Intialize the model objects
+    detector = Model(det_model_path, det_model_shape)
+    segmenter = Model(seg_model_path, seg_model_shape)
+    
+    # Visulize a original input photo
+    image = cv2.imread(img_file)
+    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    plt.imshow(rgb_image)
+
+
+
+
+.. parsed-literal::
+
+    <matplotlib.image.AxesImage at 0x7f33fc644490>
+
+
+
+
+.. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_12_1.png
+
+
+Run meter detection model
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Detect the location of the meter and prepare the ROI images for
+segmentation.
+
+.. code:: ipython3
+
+    # Prepare the input data for meter detection model
+    im_shape = np.array([[input_shape, input_shape]]).astype('float32')
+    scale_factor = np.array([[1, 2]]).astype('float32')
+    input_image = det_preprocess(image, input_shape)
+    inputs_dict = {'image': input_image, "im_shape": im_shape, "scale_factor": scale_factor}
+    
+    # Run meter detection model
+    det_results = detector.predict(inputs_dict)
+    
+    # Filter out the bounding box with low confidence
+    filtered_results = filter_bboxes(det_results, score_threshold)
+    
+    # Prepare the input data for meter segmentation model
+    scale_x = image.shape[1] / input_shape * 2
+    scale_y = image.shape[0] / input_shape
+    
+    # Create the individual picture for each detected meter
+    roi_imgs, loc = roi_crop(image, filtered_results, scale_x, scale_y)
+    roi_imgs, resize_imgs = roi_process(roi_imgs, METER_SHAPE)
+    
+    # Create the pictures of detection results
+    roi_stack = np.hstack(resize_imgs)
+    
+    if cv2.imwrite(f"{DATA_DIR}/detection_results.jpg", roi_stack):
+        print("The detection result image has been saved as \"detection_results.jpg\" in data")
+        plt.imshow(cv2.cvtColor(roi_stack, cv2.COLOR_BGR2RGB))
+
+
+.. parsed-literal::
+
+    The detection result image has been saved as "detection_results.jpg" in data
+
+
+
+.. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_14_1.png
+
+
+Run meter segmentation model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Get the results of segmentation task on detected ROI.
+
+.. code:: ipython3
+
+    seg_results = list()
+    mask_list = list()
+    num_imgs = len(roi_imgs)
+    
+    # Run meter segmentation model on all detected meters
+    for i in range(0, num_imgs, seg_batch_size):
+        batch = roi_imgs[i : min(num_imgs, i + seg_batch_size)]
+        seg_result = segmenter.predict({"image": np.array(batch)})
+        seg_results.extend(seg_result)
+    results = []
+    for i in range(len(seg_results)):
+        results.append(np.argmax(seg_results[i], axis=0)) 
+    seg_results = erode(results, erode_kernel)
+    
+    # Create the pictures of segmentation results
+    for i in range(len(seg_results)):
+        mask_list.append(segmentation_map_to_image(seg_results[i], COLORMAP))
+    mask_stack = np.hstack(mask_list)
+    
+    if cv2.imwrite(f"{DATA_DIR}/segmentation_results.jpg", cv2.cvtColor(mask_stack, cv2.COLOR_RGB2BGR)):
+        print("The segmentation result image has been saved as \"segmentation_results.jpg\" in data")
+        plt.imshow(mask_stack)
+
+
+.. parsed-literal::
+
+    The segmentation result image has been saved as "segmentation_results.jpg" in data
+
+
+
+.. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_16_1.png
+
+
+Postprocess the models result and calculate the final readings
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use OpenCV function to find the location of the pointer in a scale map.
+
+.. code:: ipython3
+
+    # Find the pointer location in scale map and calculate the meters reading 
+    rectangle_meters = circle_to_rectangle(seg_results)
+    line_scales, line_pointers = rectangle_to_line(rectangle_meters)
+    binaried_scales = mean_binarization(line_scales)
+    binaried_pointers = mean_binarization(line_pointers)
+    scale_locations = locate_scale(binaried_scales)
+    pointer_locations = locate_pointer(binaried_pointers)
+    pointed_scales = get_relative_location(scale_locations, pointer_locations)
+    meter_readings = calculate_reading(pointed_scales)
+    
+    rectangle_list = list()
+    # Plot the rectangle meters
+    for i in range(len(rectangle_meters)):
+        rectangle_list.append(segmentation_map_to_image(rectangle_meters[i], COLORMAP))
+    rectangle_meters_stack = np.hstack(rectangle_list)
+    
+    if cv2.imwrite(f"{DATA_DIR}/rectangle_meters.jpg", cv2.cvtColor(rectangle_meters_stack, cv2.COLOR_RGB2BGR)):
+        print("The rectangle_meters result image has been saved as \"rectangle_meters.jpg\" in data")
+        plt.imshow(rectangle_meters_stack)
+
+
+.. parsed-literal::
+
+    The rectangle_meters result image has been saved as "rectangle_meters.jpg" in data
+
+
+
+.. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_18_1.png
+
+
+Get the reading result on the meter picture
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+    # Create a final result photo with reading
+    for i in range(len(meter_readings)):
+        print("Meter {}: {:.3f}".format(i + 1, meter_readings[i]))
+        
+    result_image = image.copy()
+    for i in range(len(loc)):
+        cv2.rectangle(result_image,(loc[i][0], loc[i][1]), (loc[i][2], loc[i][3]), (0, 150, 0), 3)
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        cv2.rectangle(result_image, (loc[i][0], loc[i][1]), (loc[i][0] + 100, loc[i][1] + 40), (0, 150, 0), -1)
+        cv2.putText(result_image, "#{:.3f}".format(meter_readings[i]), (loc[i][0],loc[i][1] + 25), font, 0.8, (255, 255, 255), 2, cv2.LINE_AA)
+    if cv2.imwrite(f"{DATA_DIR}/reading_results.jpg", result_image):
+        print("The reading results image has been saved as \"reading_results.jpg\" in data")
+        plt.imshow(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB))
+
+
+.. parsed-literal::
+
+    Meter 1: 1.100
+    Meter 2: 6.185
+    The reading results image has been saved as "reading_results.jpg" in data
+
+
+
+.. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_20_1.png
+
+
+## Try it with your meter photos!
--- a/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_12_1.png
+++ b/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_12_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4e63f932cdaa94469f0f610ed8aba4aa921ac15de1e812710eed39e62773d07
+size 170121
--- a/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_14_1.png
+++ b/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_14_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8421fa74049d206099a84bca6e053b0cb22993138dc270ee31c7d3f7a1f45c83
+size 190271
--- a/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_16_1.png
+++ b/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_16_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19a39e28d96ff64b2cee0dd685beadb67d1277d521c46bcc868c9fb58f14b3de
+size 26914
--- a/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_18_1.png
+++ b/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_18_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f31361d4f4a0fa08f795fce4c8d13ee8b954b43ad1d16542efe3afe56fbe1961
+size 8966
--- a/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_20_1.png
+++ b/docs/notebooks/203-meter-reader-with-output_files/203-meter-reader-with-output_20_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a6ba3a2718bb7cde43661e132c804bf37f980491e0779f67a270ebb29e9bd79
+size 170338
--- a/docs/notebooks/203-meter-reader-with-output_files/index.html
+++ b/docs/notebooks/203-meter-reader-with-output_files/index.html
@ -0,0 +1,11 @@
+<html>
+<head><title>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/203-meter-reader-with-output_files/</title></head>
+<body bgcolor="white">
+<h1>Index of /projects/ov-notebook/0.1.0-latest/20230529220816/dist/rst_files/203-meter-reader-with-output_files/</h1><hr><pre><a href="../">../</a>
+<a href="203-meter-reader-with-output_12_1.png">203-meter-reader-with-output_12_1.png</a>              30-May-2023 00:08              170121
+<a href="203-meter-reader-with-output_14_1.png">203-meter-reader-with-output_14_1.png</a>              30-May-2023 00:08              190271
+<a href="203-meter-reader-with-output_16_1.png">203-meter-reader-with-output_16_1.png</a>              30-May-2023 00:08               26914
+<a href="203-meter-reader-with-output_18_1.png">203-meter-reader-with-output_18_1.png</a>              30-May-2023 00:08                8966
+<a href="203-meter-reader-with-output_20_1.png">203-meter-reader-with-output_20_1.png</a>              30-May-2023 00:08              170338
+</pre><hr></body>
+</html>
--- a/Show More
+++ b/Show More