From 30884a8161315fe9ead9ef08470e5f2ac4bfc3ae Mon Sep 17 00:00:00 2001 From: RavirajSitaram Date: Mon, 28 Mar 2022 14:24:11 +0530 Subject: [PATCH 1/9] Fix -Winfinite-recursion error reported by compiler (#11247) Signed-off-by: Raviraj P Sitaram --- src/core/include/ngraph/opsets/opset.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/include/ngraph/opsets/opset.hpp b/src/core/include/ngraph/opsets/opset.hpp index 100b3abfc33..8a8142d0a15 100644 --- a/src/core/include/ngraph/opsets/opset.hpp +++ b/src/core/include/ngraph/opsets/opset.hpp @@ -26,7 +26,7 @@ public: OpSet() = default; /// \brief Insert an op into the opset with a particular name and factory void insert(const std::string& name, const NodeTypeInfo& type_info, FactoryRegistry::Factory factory) { - return insert(name, type_info, std::move(factory)); + return ov::OpSet::insert(name, type_info, std::move(factory)); } /// \brief Insert OP_TYPE into the opset with a special name and the default factory template From 10698abc29d2c1cdfd015a894aa1cf525cc8d415 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Mon, 28 Mar 2022 12:18:29 +0300 Subject: [PATCH 2/9] Revert vpu custom kernel master (#11228) * Added original VPU custom kernel doc * Moved to new API * Added links from introduction * Fixed intro --- docs/Extensibility_UG/Intro.md | 3 +- docs/Extensibility_UG/VPU_Extensibility.md | 619 +++++++++++++++++++++ docs/snippets/CMakeLists.txt | 1 + docs/snippets/vpu/custom_op.cpp | 12 + 4 files changed, 634 insertions(+), 1 deletion(-) create mode 100644 docs/Extensibility_UG/VPU_Extensibility.md create mode 100644 docs/snippets/vpu/custom_op.cpp diff --git a/docs/Extensibility_UG/Intro.md b/docs/Extensibility_UG/Intro.md index 47de4c1f907..39f1c145460 100644 --- a/docs/Extensibility_UG/Intro.md +++ b/docs/Extensibility_UG/Intro.md @@ -9,6 +9,7 @@ openvino_docs_Extensibility_UG_add_openvino_ops openvino_docs_Extensibility_UG_Frontend_Extensions openvino_docs_Extensibility_UG_GPU + openvino_docs_IE_DG_Extensibility_DG_VPU_Kernel openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer @endsphinxdirective @@ -28,7 +29,7 @@ Importing models with such operations requires additional steps. This guide illu Defining a new custom operation basically consist of two parts: -1. Definition of operation semantics in OpenVINO, the code that describes how this operation should be inferred consuming input tensor(s) and producing output tensor(s). +1. Definition of operation semantics in OpenVINO, the code that describes how this operation should be inferred consuming input tensor(s) and producing output tensor(s). How to implement execution kernels for [GPU](./GPU_Extensibility.md) and [VPU](./VPU_Extensibility.md) is described in separate guides. 2. Mapping rule that facilitates conversion of framework operation representation to OpenVINO defined operation semantics. diff --git a/docs/Extensibility_UG/VPU_Extensibility.md b/docs/Extensibility_UG/VPU_Extensibility.md new file mode 100644 index 00000000000..3b45e150140 --- /dev/null +++ b/docs/Extensibility_UG/VPU_Extensibility.md @@ -0,0 +1,619 @@ +# How to Implement Custom Layers for VPU (Intel® Neural Compute Stick 2) {#openvino_docs_IE_DG_Extensibility_DG_VPU_Kernel} + +To enable operations not supported by OpenVINO™ out of the box, you need a custom extension for Model Optimizer, a custom nGraph operation set, and a custom kernel for the device you will target. This page describes custom kernel support for one the VPU, the Intel® Neural Compute Stick 2 device, which uses the MYRIAD device plugin. + +> **NOTES:** +> * OpenCL\* custom layer support is available in the preview mode. +> * This section assumes you are familiar with developing kernels using OpenCL. +To customize your topology with an OpenCL layer, carry out the tasks described on this page: + +1. Write and compile your OpenCL code with the standalone offline OpenCL compiler (`clc`). +2. Write a configuration file to bind the OpenCL kernel to the topology file (`.xml`) of the model IR. +3. Pass the configuration file to the OpenVINO™ Runtime with the model IR. + +## Compile OpenCL code for VPU (Intel® Neural Compute Stick 2) + +> **NOTE**: OpenCL compiler, targeting Intel® Neural Compute Stick 2 for the SHAVE* processor only, is redistributed with OpenVINO. +OpenCL support is provided by ComputeAorta* and is distributed under a license agreement between Intel® and Codeplay* Software Ltd. +The OpenCL toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only, so first compile OpenCL C code using the standalone `clc` compiler. You can find the compiler binary at `/tools/cl_compiler`. + +> **NOTE**: By design, custom OpenCL layers support any OpenCL kernels written assuming OpenCL version 1.2. It also supports half float extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs. +1. Prior to running a compilation, make sure that the following variables are set: + * `SHAVE_MA2X8XLIBS_DIR=/tools/cl_compiler/lib/` + * `SHAVE_LDSCRIPT_DIR=/tools/cl_compiler/ldscripts/` + * `SHAVE_MYRIAD_LD_DIR=/tools/cl_compiler/bin/` + * `SHAVE_MOVIASM_DIR=/tools/cl_compiler/bin/` +2. Run the compilation with the command below. You should use `--strip-binary-header` to make an OpenCL runtime-agnostic binary runnable with the OpenVINO™ Runtime. + ```bash + cd /tools/cl_compiler/bin + ./clc --strip-binary-header custom_layer.cl -o custom_layer.bin + ``` + +## Write a Configuration File + +To tie the topology IR for a layer you customize, prepare a configuration file, so that the OpenVINO™ Runtime can find parameters for your kernel and the execution work grid is described. +For example, consider the following OpenCL kernel signature: +```cpp +__kernel void reorg_nhwc(__global const half *src, __global half *out, int w, int h, int c, int stride); +``` +A configuration file for this kernel might be the following: +```xml + + + + + + + + + + + + + + +``` +Each custom layer is described with the `CustomLayer` node. It has the following nodes and attributes: + - Root node `CustomLayer` contains the following attributes: + - `name` – (Required) The name of the OpenVINO™ Runtime layer to bind the kernel with. + - `type` and `version` – (Required) Reserved for future use. Set them to `MVCL` and `1` respectively. + - `max-shaves` – (Optional) The maximum number of SHAVE cores that should be dedicated for the layer. It is useful for debugging concurrency issues or for resource saving that memory bound kernel does not scale well with the number of cores, so more resources can be left for the rest of a topology. + - Sub-node `Kernel` must contain the following attributes: + - `entry` – The name of your kernel function as you defined it in a source file. In the example above, it is `reorg_nhwc`. + - Node `Source` must contain the following attributes: + - `filename` – The path to a compiled binary relative to the XML configuration file. + - Sub-node `Parameters` – Describes parameters bindings. For more information, see the description below. + - Sub-node `WorkSizes` – Describes local and global work group sizes and the source for dimension deduction as a pair `direction,port`. In the example above, the work group is described relatively to the dimension of the input tensor that comes through port 0 in the IR. `global` and `local` work group configurations support any simple math expressions with +,-,\*,/, and () from `B`(batch), `Y`(height), `X`(width) and `F`(channels). + - Sub-node `Where` – Allows to customize bindings with the `key="value"` attribute. For example, to substitute only 3x3 convolutions, write `` in the binding xml. + + Parameter description supports `Tensor` of one of tensor types such as `input`, `output`, `input_buffer`, `output_buffer` or `data`, `Scalar`, or `Data` nodes and has the following format: + - Each `Tensor` node of `input` or `output` type must contain the following attributes: + - `arg-name` – The name of a kernel parameter in the kernel signature. + - `type` – Node type: `input` or `output` as specified in the IR. + - `port-index` – A number of input/output ports as specified in the IR. + - `format` – The channel order in the tensor. Optional conversion layers are generated if the custom layer format is not compatible with formats of neighboring layers. `BFXY`, `BYXF`, and `ANY` formats are supported currently. + - Each `Tensor` node of `input_buffer` or `output_buffer` type must contain the following attributes: + - `arg-name` – The name of a kernel parameter in the kernel signature. + - `type` – Node type: `input_buffer` or `output_buffer`. Use the appropriate type to bind multiple kernels that correspond to different stages of the same layer. + - `port-index` – The unique identifier to bind by. + - `dim` – The dim source with the same `direction,port` format used for `WorkSizes` bindings. + - `size` – Amount of bytes needed. Current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and might be expended in the future. + + Here is an example of multi-stage MVN layer binding: + ```xml + + + + + + + + + + + + + + + + + + + + + + + + + + ``` + - Each `Tensor` node that has the type `data` must contain the following attributes: + - `source` – A name of the blob as it is in the IR. Typical example is `weights` for convolution. + - `format` – Specifies the channel order in the tensor. Optional conversion layers are generated if the custom layer format is not. + ```xml + + + + + + + + + + + + + ``` + - Each `Scalar` node must contain the following attributes: + - `arg-name` – The name of a kernel parameter in the kernel signature. + - `type` – `int` or `float` value. It is used for correct argument extraction from IR parameters. + - `source` – Contains the name of the parameter in the IR file or input/output (`I`/`O`, `In`/`On`, where `n` is a port number) + followed by dimension `B`(batch), `Y`(height), `X`(width), or `F`(channels). + + - Each `Data` node must contain the following attributes: + - `arg-name` – The name of a kernel parameter in the kernel signature. + - `type` – Node type. Currently, `local_data` is the only supported value, which defines buffer allocated in fast local on-chip memory. It is limited to 100KB for all `__local` and + `__private` arrays defined inside the kernel as well as all `__local` parameters passed to the kernel. Note that a manual-DMA extension requires double buffering. + If the custom layer is detected to run out of local memory, the inference fails. + - `dim` – The dim source with the same `direction,port` format used for `WorkSizes` bindings. + - `size` – Amount of bytes needed. The current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and may be extended in the future. + The example binding below illustrates a kernel with two local buffers passed to the kernel. + ```xml + + + + + + + + + + + + + + +``` + +## Pass Configuration File to OpenVINO™ Runtime + +> **NOTE**: If both native and custom layer implementations are present, the custom kernel has a priority over the native one. +Before loading the network that features the custom layers, provide a separate configuration file and load it using the ov::Core::set_property() method with the "CONFIG_KEY" key and the configuration file name as a value before loading the network that uses custom operations to the plugin: + +@snippet docs/snippets/vpu/custom_op.cpp part0 + +## Optimizing Kernels with OpenCL for VPU (Intel® Neural Compute Stick 2) + +This section provides optimization guidelines on writing custom layers with OpenCL for VPU devices. Knowledge about general OpenCL +programming model and OpenCL kernel language is assumed and not a subject of this section. The OpenCL model mapping to VPU is described in the table below. + +| OpenCL Model | VPU Mapping| +|-----|----| +| Device code | Executed on SHAVE cores | +| Private memory | Mapped to CMX internal memory, limited to 100KB per work group, valid only while the work group is executed | +| Local memory | Mapped to CMX internal memory, limited to 100KB per work group, valid only while the work group is executed | +| Global memory | Mapped to DDR, used to pass execution preserved parameters for inputs, outputs, and blobs | +| Work group | Executed on a single SHAVE core iterating over multiple work items | + +Note that by the OpenCL specification, the work group execution order is not specified. This means that it is your +responsibility to ensure that race conditions among work groups are not introduced. Custom layer runtime spits evenly +work grid among available compute resources and executes them in an arbitrary order. This static scheduling approach works best if the load is evenly spread out across work groups, which is a typical case for Deep Learning kernels. The following guidelines are recommended to use for work group partitioning: + +1. Split work evenly across work groups. +2. Adjust work group granularity to maintain equal workload for all compute codes. +3. Set the maximum number of cores using the `max-shaves` attribute for the `CustomLayer` node. This keeps more resources for the rest of topology. It is also useful if the kernel scalability reached its limits, which may happen while optimizing memory bound kernels or kernels with poor parallelization. +4. Try an alternate data layout (`BFXY`/`BYXF`) for the kernel if it improves work group partitioning or data access patterns. +Consider not just specific layer boost, but full topology performance because data conversion layers would be automatically inserted +as appropriate. + +Offline OpenCL compiler (`clc`) features automatic vectorization over `get_global_id(0)` usage, if uniform access is detected. +For example, the kernel below could be automatically vectorized: +```cpp +__kernel void cvtf32f16(__global float* restrict inImage, __global half* restrict outImage, + float scale, float bais) +{ + int idx = get_global_id(0) + get_global_id(1) * get_global_size(0) + get_global_id(2) * get_global_size(0) * get_global_size(1); + outImage[idx] = convert_half(inImage[idx]*scale+bais); +} +``` +However, this work-group based vectorizer (WGV) conflicts with the default LLVM vectorizer based on superword level parallelism +(SLP) for the current compiler version. Manual vectorization is recommended to provide the best performance for non-uniform code +patterns. WGV works if and only if vector types are not used in the code. + +Here is a short list of optimization tips: + +1. Help auto-vectorizer ensure non-aliasing pointers for kernel parameters by putting `restrict` where possible. + - This can give a performance boost, especially for kernels with unrolling, like `ocl_grn` from the example below. + - Place `restrict` markers for kernels with manually vectorized codes. In the `ocl_grn` kernel below, the unrolled version without `restrict` is up to 20% slower than the most optimal one, which combines unrolling and `restrict`. +2. Put `#‍pragma unroll N` to your loop header. The compiler does not trigger unrolling by default, so it is your responsibility to +annotate the code with pragmas as appropriate. The `ocl_grn` version with `#‍pragma unroll 4` is up to 50% faster, most of which comes from unrolling the first loop, because LLVM, in general, is better in scheduling 3-stage loops (load-compute-store), while the fist loop + `variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);` is only 2-stage (load-compute). Pay +attention to unrolling such cases first. Unrolling factor is loop-dependent. Choose the smallest number that +still improves performance as an optimum between the kernel size and execution speed. For this specific kernel, changing the unroll factor from `4` to `6` results in the same performance, so unrolling factor equal to 4 is an optimum. For Intel® Neural Compute Stick 2, unrolling is conjugated with the automatic software pipelining for load, store, and compute stages: +```cpp +__kernel void ocl_grn(__global const half* restrict src_data, __global half* restrict dst_data, int C, float bias) +{ + int x = get_global_id(0); + int W = get_global_size(0); + int y = get_global_id(1); + int H = get_global_size(1); + float variance = bias + 1e-9f; + #pragma unroll 4 + for (int c = 0; c < C; c++) + variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]); + variance = 1.f / native_sqrt(variance); + #pragma unroll 4 + for (int c = 0; c < C; c++) + dst_data[c*H*W + y*W + x] = (half)((float)src_data[c*H*W + y*W + x] * variance); +} +``` +To check the efficiency of WGV, you can compare performance of the kernel above with the kernel below, which is manually vectorized over width: +```cpp +__kernel void ocl_grn_line(__global const half* restrict src_data, __global half* restrict dst_data, int C, int W, float bias) +{ + int y = get_global_id(1); + int H = get_global_size(1); + for (int x = 0; x < W/8; x++) + { + float8 variance = (float8)(bias+1e-9f); + #pragma unroll 4 + for (int c = 0; c < C; c++) + { + __global const half8* restrict src_line = ((__global const half8 * restrict)(src_data + c*H*W + y*W)); + half8 sh = src_line[x]; + variance += convert_float8(sh*sh); + } + variance = 1.f/native_sqrt(variance); + #pragma unroll 4 + for (int c = 0; c < C; c++) + { + __global const half8* restrict src_line = ((__global const half8 * restrict)(src_data + c*H*W + y*W)); + __global half8* restrict dst_line = ((__global half8 * restrict)(dst_data + c*H*W + y*W)); + dst_line[x] = convert_half8(convert_float8(src_line[x])*variance); + } + } + for (int x = W/8*8; x < W; x++) + { + float variance = bias+1e-9f; + #pragma unroll 4 + for (int c = 0; c < C; c++) + variance += (float)(src_data[c*H*W + y*W + x]*src_data[c*H*W + y*W + x]); + variance = 1.f/native_sqrt(variance); + #pragma unroll 4 + for (int c = 0; c < C; c++) + dst_data[c*H*W + y*W + x] = (float)src_data[c*H*W + y*W + x]*variance; + } +} +``` +Both versions perform the same, but the second one has more complex code. + +3. If it is easy to predict the work group size, you can also use the `reqd_work_group_size` kernel attribute to ask the compiler +to unroll the code up to the local size of the work group. Note that if the kernel is actually executed with the +different work group configuration, the result is undefined. + +4. Prefer to use the `half` compute if it keeps reasonable accuracy. 16-bit float is a native type for Intel® Neural Compute Stick 2, most of the functions `half_*` are mapped to a single hardware instruction. +Use the standard `native_*` function for the rest of types. + +5. Prefer to use the `convert_half` function over `vstore_half` if conversion to 32-bit float is required. `convert_half` is mapped to a single hardware instruction. For the `cvtf32f16` kernel above, the line `outImage[idx] = convert_half(inImage[idx]*scale+bais);` is eight times slower than the code with `vstore_half`. + +6. Mind early exits. Early exit can be extremely costly for the current version of the `clc` compiler due to conflicts with the +auto-vectorizer. The generic advice would be to setup local size by `x` dimension equal to inputs or/and outputs width. +If it is impossible to define the work grid that exactly matches inputs or/and outputs to eliminate checks, for example, +`if (get_global_id(0) >= width) return`, use line-wise kernel variant with manual vectorization. +The kernel example below demonstrates the impact of early exits on kernel performance. + ```cpp + // Initial version + __kernel void reorg(const __global half* restrict src, __global half* restrict out, int stride) + { + int w = get_global_id(0); + int W = get_global_size(0); + int h = get_global_id(1); + int H = get_global_size(1); + int c = get_global_id(2); + int C = get_global_size(2); + int C2 = C/(stride*stride); + int offset = c / C2; + int c2 = c - C2 * offset; + int H2 = H*stride; + int W2 = W*stride; + int h2 = h*stride + offset / stride; + int w2 = w*stride + offset - stride * (offset / stride); + out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2]; + } + ``` +This `reorg` kernel is auto-vectorizable, but an input for YOLO v2 topology is `NCHW=<1,64,26,26>` and it is not multiple of vector width, which is `8` for `half` data type. As a result, the Inference Engine does not select the auto-vectorized kernel. +To compare performance of auto-vectorized and scalar version of the kernel, change the input size to`NCHW=<1,64,26,32>`. This enables the auto-vectorized version to be selected by the Inference Engine and can give you about 30% uplift. +Since the auto-vectorized version is faster, it makes sense to enable it for the YOLO v2 topology input size by setting the local size multiple of vector, for example, 32, and adjust global sizes accordingly. As a result, the execution work grid exceeds actual input dimension, so out-of-bound checks should be inserted. See the updated kernel version below: + ```cpp + // Version with out-of-bound checks added + __kernel void reorg(const __global half* restrict src, __global half* restrict out, int W, int stride) + { + int w = get_global_id(0); + w = min(w, W-1); + int h = get_global_id(1); + int H = get_global_size(1); + int c = get_global_id(2); + int C = get_global_size(2); + int C2 = C/(stride*stride); + int offset = c / C2; + int c2 = c - C2 * offset; + int H2 = H*stride; + int W2 = W*stride; + int h2 = h*stride + offset / stride; + int w2 = w*stride + offset - stride * (offset / stride); + out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2]; + } + ``` +This code performs the same as the initial kernel above (scalar) due to branching overhead. If you replace min/max expression `w = min(w, W-1);` with `if (w >= W) return;`, runtime increases up to 2x against to code without branching (initial version).
+If branching is inevitable for your element-based kernel, it is recommended to change the scheme to line-based. See the kernel variant below: +```cpp +// Line-wise version +__kernel void reorg(const __global half* restrict src, __global half* restrict out, int H, int W, int stride) +{ + int h = min((int)get_global_id(0), H-1); + int c = get_global_id(1); + int C = get_global_size(1); + int C2 = C/(stride*stride); + int offset = c / C2; + int c2 = c - C2 * offset; + int H2 = H*stride; + int W2 = W*stride; + for (int w = 0; w < W; ++w) + { + int h2 = h*stride + offset / stride; + int w2 = w*stride + offset - stride * (offset / stride); + out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2]; + } +} +``` +This decreases the execution time up to 40% against the best performing vectorized kernel without early exits (initial version). +7. Reuse computations among work items by using line-based kernels or sharing values though `__local` memory. +8. Improve data access locality. Most of custom kernels are memory bound while convolution and fully connected layers are hardware-implemented. The code below demonstrates a further optimized version of the `reorg` kernel unrolled by `stride`: + ```cpp + // Unrolled line-wise version + __kernel void reorg_unrolled_by_stride(const __global half* restrict src, __global half* restrict dst, + int H, int W, int stride) + { + int h = min((int)get_global_id(0), H-1); + int c2 = get_global_id(1); + int C2 = get_global_size(1); + int C = C2*stride*stride; + int H2 = H*stride; + int W2 = W*stride; + for (int stride_y = 0; stride_y < stride; stride_y++) + for (int stride_x = 0; stride_x < stride; stride_x++) + for (int w2 = 0, w = 0; w < W; w2 += stride, w++) + dst[W*H*C2*(stride_y*stride+stride_x) + W*H*c2 + W*h + w] = src[W2*H2*c2 + W2*h*stride + W2*stride_y + w2 + stride_x]; + } + ``` +`scr` data in this case loaded only once. As the result, the cycle count drops up to 45% against the line-wise version. + +9. Copy data from `__dlobal` to `__local` or `__private` memory if the data is accessed more than once. Access to +`__dlobal` memory is orders of magnitude slower than access to `__local`/`__private` due to statically scheduled pipeline, which +stalls completely on memory access without any prefetch. The same recommendation is applicable for scalar load/store +from/to a `__blobal` pointer since work-group copying could be done in a vector fashion. + +10. Use a manual DMA extension. Local (on-chip) memory throughput is up to 24x higher than DDR throughput. Starting from OpenVINO™ 2020.1, VPU OpenCL features manual-DMA kernel extension to copy sub-tensor used by work group into local memory and performing compute without DDR evolved. Here is the simple GRN kernel implementation that runs over DDR. Local size is in the form (width of the input tensor, 1, 1) to define a large enough work group to get code automatically vectorized and unrolled, while global size is (width of the input tensor, height of the input tensor, 1): + ```cpp + __kernel void grn_NCHW( + __global const half* restrict src_data, + __global half* restrict dst_data, + int C, + float bias) + { + float variance = bias + 1e-9f; + #pragma unroll 4 + for (int c = 0; c < C; c++) + { + float val = (float) src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)]; + variance += val*val; + } + half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f); + #pragma unroll 4 + for (int c = 0; c < C; c++) + { + dst_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] + = src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] * hvariance; + } + } + ``` + +This kernel can be rewritten to introduce special data binding `__dma_preload` and `__dma_postwrite intrinsics`. This means that instead of one kernel, a group of three kernels should be implemented: `kernelName`, `__dma_preload_kernelName`, and `__dma_postwrite_kernelName`. `__dma_preload_kernelName` for a particular work group `n` is guaranteed to be executed before the `n`-th work group itself, while `__dma_postwrite_kernelName` is guaranteed to be executed after a corresponding work group. You can define one of those functions that are intended to be used to copy data from-to `__global` and `__local` memory. The syntactics requires exact functional signature match. The example below illustrates how to prepare your kernel for manual-DMA. + + ```cpp + __kernel void __dma_preload_grn_NCHW( + __global const half* restrict src, + __global half* restrict dst, + __local half* restrict local_src, + __local half* restrict local_dst, + int C, + float bias) + { + // ToDO: copy required piece of src tensor into local_src + } + + __kernel void __dma_postwrite_grn_NCHW( + __global const half* restrict src, + __global half* restrict dst, + __local const half* restrict local_src, + __local half* restrict local_dst, + int C, + float bias) + { + // ToDO: copy back computed piece of local_dst into dst + } + + __kernel void grn_NCHW( + __global const half* restrict src_data, + __global half* restrict dst_data, + __local half* restrict src, + __local half* restrict dst, + int C, + float bias) + { + // same as the example above + } + ``` +The GRN kernel operates on channel-major tensors to compute average over full channel range and then normalizes input elements to produce the output. +As a part of the manual DMA extension, a group of work group copy functions are introduced in addition to `async_work_group_copy`, which is also mapped to a DMA call. + +Here is the list of supported functions: +```cpp +// 2D sub-tensor copy +event_t WorkGroupDmaCreateStrideTransaction( + const local T *src, + global T *dst, + size_t src_width, // width of the line of source in bytes + size_t dst_width, // width of the line of destination in bytes + size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes + size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes + size_t size, // total number of bytes loaded for all lines from source to destination + event_t event) __OVERLOAD; +event_t WorkGroupDmaCreateStrideTransaction( + const global T *src, + local T *dst, + size_t src_width, // width of the line of source in bytes + size_t dst_width, // width of the line of destination in bytes + size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes + size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes + size_t size, // total number of bytes loaded for all lines from source to destination + event_t event) __OVERLOAD; +// 3D sub-tensor copy +event_t WorkGroupDmaCreate3DTransaction( + const local T *src, + global T *dst, + size_t src_width, // width of the line of source in bytes + size_t dst_width, // width of the line of destination in bytes + size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes + size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes + size_t num_planes, // number of planes to be copied + size_t src_plane_stride, // stride between corresponding 2 consecutive planes of source in bytes + size_t dst_plane_stride, // stride between corresponding 2 consecutive planes of destination in bytes + size_t size, // size of the loaded plane in bytes, analogues to the size in 2D case + event_t event) __OVERLOAD; +event_t WorkGroupDmaCreate3DTransaction( + const global T *src, + local T *dst, + size_t src_width, // width of the line of source in bytes + size_t dst_width, // width of the line of destination in bytes + size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes + size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes + size_t num_planes, // number of planes to be copied + size_t src_plane_stride, // stride between corresponding 2 consecutive planes of source in bytes + size_t dst_plane_stride, // stride between corresponding 2 consecutive planes of destination in bytes + size_t size, // size of the loaded plane in bytes, analogues to the size in 2D case + event_t event) __OVERLOAD; +``` +where `T` can be `uchar`, `char`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `half` or `float`. + +Modified version of the GRN kernel could be the following: +```cpp +__kernel void __dma_preload_grn_NCHW( + __global const half* restrict src, + __global half* restrict dst, + __local half* restrict local_src, + __local half* restrict local_dst, + int C, + float bias) +{ + WorkGroupDmaCreate3DTransaction( + src + get_group_id(0)*get_local_size(0) + + get_group_id(1)*get_local_size(1)*get_global_size(0), // src + local_src, // dst + get_local_size(0) * sizeof(half), // src width + get_local_size(0) * sizeof(half), // dst width + get_global_size(0) * sizeof(half), // src stride + get_local_size(0) * sizeof(half), // dst stride + C, // num planes + get_global_size(0) * get_global_size(1) * sizeof(half), // src plane stride + get_local_size(0) * get_local_size(1) * sizeof(half), // dst plane stride + get_local_size(0) * get_local_size(1) * sizeof(half), // plane size + 0); +} +__kernel void __dma_postwrite_grn_NCHW( + __global const half* restrict src, + __global half* restrict dst, + __local const half* restrict local_src, + __local half* restrict local_dst, + int C, + float bias) +{ + WorkGroupDmaCreate3DTransaction( + local_dst, // src + dst + get_group_id(0)*get_local_size(0) + + get_group_id(1)*get_local_size(1)*get_global_size(0), // dst + get_local_size(0) * sizeof(half), // src width + get_local_size(0) * sizeof(half), // dst width + get_local_size(0) * sizeof(half), // src stride + get_global_size(0) * sizeof(half), // dst stride + C, // num planes + get_local_size(0) * get_local_size(1) * sizeof(half), // src plane stride + get_global_size(0) * get_global_size(1) * sizeof(half), // dst plane stride + get_local_size(0) * get_local_size(1) * sizeof(half), // plane size + 0); +} +__kernel void grn_NCHW( + __global const half* restrict src_data, + __global half* restrict dst_data, + __local half* restrict src, + __local half* restrict dst, + int C, + float bias) +{ + float variance = bias + 1e-9f; + #pragma unroll 8 + for (int c = 0; c < C; c++) + { + float val = (float) src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]; + variance += val*val; + } + half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f); + #pragma unroll 8 + for (int c = 0; c < C; c++) + { + dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] + = src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance; + } +} +``` + +Note the `get_local_size` and `get_local_id` usage inside the kernel. 21x speedup is expected for a kernel on enet-curbs setup because it was completely limited by memory usage. + +An alternative method to using DMA is to use work item copy extension. Those functions are executed inside a kernel and requires work groups equal to single work item. + +Here is the list of supported work item functions: +```cpp +item_dma_event_t WorkItemDmaCreateTransaction( + const global T *src, + private T *dst, + size_t size, + item_dma_event_t event) __OVERLOAD; +item_dma_event_t WorkItemDmaCreateTransaction( + const private T *src, + global T *dst, + size_t size, + item_dma_event_t event) __OVERLOAD; +item_dma_event_t WorkItemDmaCreateStrideTransaction( + const global T *src, + private T *dst, + size_t src_width, + size_t dst_width, + size_t src_stride, + size_t dst_stride, + size_t size, + item_dma_event_t event) __OVERLOAD; +item_dma_event_t WorkItemDmaCreateStrideTransaction( + const private T *src, + global T *dst, + size_t src_width, + size_t dst_width, + size_t src_stride, + size_t dst_stride, + size_t size, + item_dma_event_t event) __OVERLOAD; +item_dma_event_t WorkItemDmaCreate3DTransaction( + const global T *src, + private T *dst, + size_t src_width, + size_t dst_width, + size_t src_stride, + size_t dst_stride, + size_t num_planes, + size_t src_plane_stride, + size_t dst_plane_stride, + size_t size, + item_dma_event_t event) __OVERLOAD; +item_dma_event_t WorkItemDmaCreate3DTransaction( + const private T *src, + global T *dst, + size_t src_width, + size_t dst_width, + size_t src_stride, + size_t dst_stride, + size_t num_planes, + size_t src_plane_stride, + size_t dst_plane_stride, + size_t size, + item_dma_event_t event) __OVERLOAD; +``` +where `T` can be `uchar`, `char`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `half` or `float`. diff --git a/docs/snippets/CMakeLists.txt b/docs/snippets/CMakeLists.txt index dd128b85f10..1daed4817b6 100644 --- a/docs/snippets/CMakeLists.txt +++ b/docs/snippets/CMakeLists.txt @@ -12,6 +12,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG) endif() file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/vpu/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cpp") find_package(OpenCL) diff --git a/docs/snippets/vpu/custom_op.cpp b/docs/snippets/vpu/custom_op.cpp new file mode 100644 index 00000000000..1b471fe7955 --- /dev/null +++ b/docs/snippets/vpu/custom_op.cpp @@ -0,0 +1,12 @@ +#include + +int main() { +//! [part0] +ov::Core core; +// Load Myriad Extensions +core.set_property("MYRIAD", {{CONFIG_KEY(CONFIG_FILE), ""}}); +//! [part0] + +return 0; +} + From 76e2f2697f8b422b878f4e6b32e9b28bd8f6db87 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Mon, 28 Mar 2022 12:29:27 +0300 Subject: [PATCH 3/9] [CONFORMANCE] Fix run of Conformance tests (#11225) --- .../op_impl_check/single_op_graph.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp index 4769c7947ae..8f4aa9b289b 100644 --- a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp +++ b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp @@ -849,7 +849,7 @@ std::shared_ptr generate(const std::shared_ptr(ov::element::f16, {}, {0.f}); const auto max_value = ngraph::builder::makeConstant(ov::element::f16, {}, {1.f}); - auto Node = std::make_shared(params.at(0), min_value, max_value, ov::element::f32, 10, 10); + auto Node = std::make_shared(params.at(0), min_value, max_value, ov::element::f16, 10, 10); ov::ResultVector results{std::make_shared(Node)}; return std::make_shared(results, params, "RandomUniformGraph"); } @@ -1603,7 +1603,7 @@ std::shared_ptr generateRNNCellBase(const std::shared_ptr RNNCellBaseNode = std::make_shared(params.at(0), params.at(1), W, R, B, 3); ov::ResultVector results{std::make_shared(RNNCellBaseNode)}; - return std::make_shared(results, params, "RNNCellBaseGraph"); + return std::make_shared(results, params, "GRUCell3BaseGraph"); } else if (ov::is_type(node)) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f16, {{2, 3}, {2, 3}, {2, 3}}); const auto W = ngraph::builder::makeConstant(ov::element::f16, {12, 3}, {}, true); @@ -1614,7 +1614,7 @@ std::shared_ptr generateRNNCellBase(const std::shared_ptr W, R, B, P, 3); ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), std::make_shared(RNNCellBaseNode->output(1))}; - return std::make_shared(results, params, "RNNCellBaseGraph"); + return std::make_shared(results, params, "LSTMCell1BaseGraph"); } else if (ov::is_type(node)) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f16, {{2, 3}, {2, 3}, {2, 3}}); const auto W = ngraph::builder::makeConstant(ov::element::f16, {12, 3}, {}, true); @@ -1624,7 +1624,7 @@ std::shared_ptr generateRNNCellBase(const std::shared_ptr W, R, B, 3); ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), std::make_shared(RNNCellBaseNode->output(1))};; - return std::make_shared(results, params, "RNNCellBaseGraph"); + return std::make_shared(results, params, "LSTMCell4BaseGraph"); } else if (ov::is_type(node)) { const auto params = ngraph::builder::makeDynamicParams({ov::element::f16, ov::element::f16, ov::element::f16, ov::element::i64}, {{5, 10, 10}, {5, 1, 10}, {5, 1, 10}, {5}}); @@ -1636,7 +1636,7 @@ std::shared_ptr generateRNNCellBase(const std::shared_ptr ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), std::make_shared(RNNCellBaseNode->output(1)), std::make_shared(RNNCellBaseNode->output(2))}; - return std::make_shared(results, params, "RNNCellBaseGraph"); + return std::make_shared(results, params, "LSTMSeqBaseGraph"); } else if (ov::is_type(node)) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f16, {{2, 3}, {2, 3}}); const auto W = ngraph::builder::makeConstant(ov::element::f16, {3, 3}, {}, true); @@ -1656,7 +1656,7 @@ std::shared_ptr generateRNNCellBase(const std::shared_ptr W, R, B, 3, ov::op::RecurrentSequenceDirection::FORWARD); ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), std::make_shared(RNNCellBaseNode->output(1))}; - return std::make_shared(results, params, "RNNCellBaseGraph"); + return std::make_shared(results, params, "RNNSeqBaseGraph"); } else { return nullptr; } From 52937967bb6317383287d3d8866ec1e8b7d033c9 Mon Sep 17 00:00:00 2001 From: Valentin Dymchishin Date: Mon, 28 Mar 2022 12:51:53 +0300 Subject: [PATCH 4/9] Add dynamism in memory tests (API 2) (#10589) --- tests/memory_tests/CMakeLists.txt | 1 + .../memory_tests_helper/memory_counter.h | 2 +- .../src/memory_tests/CMakeLists.txt | 2 +- .../src/memory_tests/memtest_infer.cpp | 10 +++- .../src/memory_tests/memtest_infer_api_2.cpp | 57 +++++++++++++------ .../src/memory_tests_helper/CMakeLists.txt | 2 +- .../src/memory_tests_helper/cli.h | 32 ++++++++--- .../src/memory_tests_helper/main.cpp | 19 +++++-- 8 files changed, 89 insertions(+), 36 deletions(-) diff --git a/tests/memory_tests/CMakeLists.txt b/tests/memory_tests/CMakeLists.txt index 57be14b1131..d07250daa64 100644 --- a/tests/memory_tests/CMakeLists.txt +++ b/tests/memory_tests/CMakeLists.txt @@ -29,3 +29,4 @@ add_subdirectory(src) install(DIRECTORY test_runner/ DESTINATION tests/memory_tests/test_runner COMPONENT tests EXCLUDE_FROM_ALL) install(DIRECTORY .automation/ DESTINATION tests/memory_tests/test_runner/.automation COMPONENT tests EXCLUDE_FROM_ALL) install(DIRECTORY scripts/ DESTINATION tests/memory_tests/scripts COMPONENT tests EXCLUDE_FROM_ALL) +install(DIRECTORY ../utils/ DESTINATION tests/utils COMPONENT tests EXCLUDE_FROM_ALL) diff --git a/tests/memory_tests/include/memory_tests_helper/memory_counter.h b/tests/memory_tests/include/memory_tests_helper/memory_counter.h index 63c479adaad..17c86eafe5f 100644 --- a/tests/memory_tests/include/memory_tests_helper/memory_counter.h +++ b/tests/memory_tests/include/memory_tests_helper/memory_counter.h @@ -22,6 +22,6 @@ public: MemoryCounter(const std::string &mem_counter_name); }; -#define MEMORY_SNAPSHOT(mem_counter_name) MemoryTest::MemoryCounter (#mem_counter_name); +#define MEMORY_SNAPSHOT(mem_counter_name) MemoryTest::MemoryCounter mem_counter_name(#mem_counter_name); } // namespace MemoryTest diff --git a/tests/memory_tests/src/memory_tests/CMakeLists.txt b/tests/memory_tests/src/memory_tests/CMakeLists.txt index 0c6fdc5010b..cfe2093c030 100644 --- a/tests/memory_tests/src/memory_tests/CMakeLists.txt +++ b/tests/memory_tests/src/memory_tests/CMakeLists.txt @@ -15,7 +15,7 @@ foreach(test_source ${tests}) get_filename_component(test_name ${test_source} NAME_WE) add_executable(${test_name} ${test_source}) - target_link_libraries(${test_name} PRIVATE memory_tests_helper tests_shared_lib) + target_link_libraries(${test_name} PRIVATE tests_shared_lib memory_tests_helper) add_dependencies(memory_tests ${test_name}) diff --git a/tests/memory_tests/src/memory_tests/memtest_infer.cpp b/tests/memory_tests/src/memory_tests/memtest_infer.cpp index 17d601d761f..709d62cd82f 100644 --- a/tests/memory_tests/src/memory_tests/memtest_infer.cpp +++ b/tests/memory_tests/src/memory_tests/memtest_infer.cpp @@ -15,8 +15,12 @@ * main(). The function should not throw any exceptions and responsible for * handling it by itself. */ -int runPipeline(const std::string &model, const std::string &device) { - auto pipeline = [](const std::string &model, const std::string &device) { +int runPipeline(const std::string &model, const std::string &device, + std::map reshapeShapes, + std::map> dataShapes) { + auto pipeline = [](const std::string &model, const std::string &device, + std::map reshapeShapes, + std::map> dataShapes) { InferenceEngine::Core ie; InferenceEngine::CNNNetwork cnnNetwork; InferenceEngine::ExecutableNetwork exeNetwork; @@ -53,7 +57,7 @@ int runPipeline(const std::string &model, const std::string &device) { }; try { - pipeline(model, device); + pipeline(model, device, reshapeShapes, dataShapes); } catch (const InferenceEngine::Exception &iex) { std::cerr << "Inference Engine pipeline failed with Inference Engine exception:\n" diff --git a/tests/memory_tests/src/memory_tests/memtest_infer_api_2.cpp b/tests/memory_tests/src/memory_tests/memtest_infer_api_2.cpp index 8eccc9c210f..2151af0468e 100644 --- a/tests/memory_tests/src/memory_tests/memtest_infer_api_2.cpp +++ b/tests/memory_tests/src/memory_tests/memtest_infer_api_2.cpp @@ -1,15 +1,15 @@ // Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include -#include -#include #include #include "common_utils.h" +#include "reshape_utils.h" #include "memory_tests_helper/memory_counter.h" #include "memory_tests_helper/utils.h" -#include "openvino/runtime/core.hpp" + /** @@ -17,43 +17,66 @@ * main(). The function should not throw any exceptions and responsible for * handling it by itself. */ -int runPipeline(const std::string &model, const std::string &device) { - auto pipeline = [](const std::string &model, const std::string &device) { +int runPipeline(const std::string &model, const std::string &device, + std::map reshapeShapes, + std::map> dataShapes) { + auto pipeline = [](const std::string &model, const std::string &device, + std::map reshapeShapes, + std::map> dataShapes) { ov::Core ie; - std::shared_ptr network; - ov::CompiledModel compiled_model; - ov::InferRequest infer_request; + std::shared_ptr cnnNetwork; + ov::CompiledModel exeNetwork; + ov::InferRequest inferRequest; + + std::vector> defaultInputs; + + bool reshape = false; + if (!reshapeShapes.empty()) { + reshape = true; + } ie.get_versions(device); MEMORY_SNAPSHOT(load_plugin); if (MemoryTest::fileExt(model) == "blob") { std::ifstream streamModel{model}; - compiled_model = ie.import_model(streamModel, device); + exeNetwork = ie.import_model(streamModel, device); MEMORY_SNAPSHOT(import_network); } else { - network = ie.read_model(model); + cnnNetwork = ie.read_model(model); MEMORY_SNAPSHOT(read_network); - compiled_model = ie.compile_model(network, device); + if (reshape) { + defaultInputs = getCopyOfDefaultInputs(cnnNetwork->inputs()); + cnnNetwork->reshape(reshapeShapes); + MEMORY_SNAPSHOT(reshape); + } + + exeNetwork = ie.compile_model(cnnNetwork, device); MEMORY_SNAPSHOT(load_network); } MEMORY_SNAPSHOT(create_exenetwork); - infer_request = compiled_model.create_infer_request(); + inferRequest = exeNetwork.create_infer_request(); - auto inputs = network->inputs(); - fillTensors(infer_request, inputs); - MEMORY_SNAPSHOT(fill_inputs) + std::vector> inputs = exeNetwork.inputs(); + if (reshape && dataShapes.empty()) { + fillTensors(inferRequest, defaultInputs); + } else if (reshape && !dataShapes.empty()) { + fillTensorsWithSpecifiedShape(inferRequest, inputs, dataShapes); + } else { + fillTensors(inferRequest, inputs); + } + MEMORY_SNAPSHOT(fill_inputs); - infer_request.infer(); + inferRequest.infer(); MEMORY_SNAPSHOT(first_inference); MEMORY_SNAPSHOT(full_run); }; try { - pipeline(model, device); + pipeline(model, device, reshapeShapes, dataShapes); } catch (const InferenceEngine::Exception &iex) { std::cerr << "Inference Engine pipeline failed with Inference Engine exception:\n" diff --git a/tests/memory_tests/src/memory_tests_helper/CMakeLists.txt b/tests/memory_tests/src/memory_tests_helper/CMakeLists.txt index 37d7cd07f34..e9a0bc50028 100644 --- a/tests/memory_tests/src/memory_tests_helper/CMakeLists.txt +++ b/tests/memory_tests/src/memory_tests_helper/CMakeLists.txt @@ -12,4 +12,4 @@ add_subdirectory(${OpenVINO_SOURCE_DIR}/thirdparty/gflags ${CMAKE_CURRENT_BINARY_DIR}/gflags_build EXCLUDE_FROM_ALL) -target_link_libraries(${TARGET_NAME} PUBLIC gflags) +target_link_libraries(${TARGET_NAME} PUBLIC gflags tests_shared_lib) diff --git a/tests/memory_tests/src/memory_tests_helper/cli.h b/tests/memory_tests/src/memory_tests_helper/cli.h index c6ed93db117..dfceff6eab5 100644 --- a/tests/memory_tests/src/memory_tests_helper/cli.h +++ b/tests/memory_tests/src/memory_tests_helper/cli.h @@ -26,6 +26,16 @@ static const char target_device_message[] = "plugin. " "The application looks for a suitable plugin for the specified device."; +/// @brief message for shapes argument +static const char reshape_shapes_message[] = + "Not required. Use this key to run memory tests with reshape. \n" + "Example: 'input*1..2 3 100 100'. Use '&' delimiter for several inputs. Example: 'input1*1..2 100&input2*1..2 100' "; + +/// @brief message for shapes argument +static const char data_shapes_message[] = + "Not required. Use this key to run memory tests with reshape. Used with 'reshape_shapes' arg. \n" + "Only static shapes for data. Example: 'input*1 3 100 100'. Use '&' delimiter for several inputs. Example: 'input1*1 100&input2*1 100' "; + /// @brief message for statistics path argument static const char statistics_path_message[] = "Required. Path to a file to write statistics."; @@ -44,6 +54,14 @@ DEFINE_string(m, "", model_message); /// It is a required parameter DEFINE_string(d, "", target_device_message); +/// @brief Define parameter for set shapes to reshape function
+/// It is a non-required parameter +DEFINE_string(reshape_shapes, "", reshape_shapes_message); + +/// @brief Define parameter for set shapes of the network data
+/// It is a non-required parameter +DEFINE_string(data_shapes, "", data_shapes_message); + /// @brief Define parameter for set path to a file to write statistics
/// It is a required parameter DEFINE_string(s, "", statistics_path_message); @@ -53,13 +71,13 @@ DEFINE_string(s, "", statistics_path_message); */ static void showUsage() { std::cout << std::endl; - std::cout << "TimeTests [OPTION]" << std::endl; + std::cout << "MemoryInfer [OPTION]" << std::endl; std::cout << "Options:" << std::endl; std::cout << std::endl; - std::cout << " -h, --help " << help_message << std::endl; - std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -d \"\" " << target_device_message - << std::endl; - std::cout << " -s \"\" " << statistics_path_message - << std::endl; + std::cout << " -h, --help " << help_message << std::endl; + std::cout << " -m \"\" " << model_message << std::endl; + std::cout << " -d \"\" " << target_device_message << std::endl; + std::cout << " -s \"\" " << statistics_path_message << std::endl; + std::cout << " -reshape_shapes " << reshape_shapes_message << std::endl; + std::cout << " -data_shapes " << data_shapes_message << std::endl; } diff --git a/tests/memory_tests/src/memory_tests_helper/main.cpp b/tests/memory_tests/src/memory_tests_helper/main.cpp index 2d71137bfaa..357edbb1a4b 100644 --- a/tests/memory_tests/src/memory_tests_helper/main.cpp +++ b/tests/memory_tests/src/memory_tests_helper/main.cpp @@ -4,11 +4,14 @@ #include "cli.h" #include "statistics_writer.h" +#include "reshape_utils.h" #include "memory_tests_helper/memory_counter.h" #include -int runPipeline(const std::string &model, const std::string &device); +int runPipeline(const std::string &model, const std::string &device, + std::map reshapeShapes, + std::map> dataShapes); /** * @brief Parses command line and check required arguments @@ -38,10 +41,11 @@ bool parseAndCheckCommandLine(int argc, char **argv) { /** * @brief Function calls `runPipeline` with mandatory memory values tracking of full run */ -int _runPipeline() { - auto status = runPipeline(FLAGS_m, FLAGS_d); - MEMORY_SNAPSHOT(after_objects_release); - return status; +int _runPipeline(std::map dynamicShapes, + std::map> staticShapes) { + auto status = runPipeline(FLAGS_m, FLAGS_d, dynamicShapes, staticShapes); + MEMORY_SNAPSHOT(after_objects_release); + return status; } /** @@ -51,7 +55,10 @@ int main(int argc, char **argv) { if (!parseAndCheckCommandLine(argc, argv)) return -1; - auto status = _runPipeline(); + auto dynamicShapes = parseReshapeShapes(FLAGS_reshape_shapes); + auto staticShapes = parseDataShapes(FLAGS_data_shapes); + + auto status = _runPipeline(dynamicShapes, staticShapes); StatisticsWriter::Instance().setFile(FLAGS_s); StatisticsWriter::Instance().write(); return status; From 27741d316e74a9b4997e6ae08e26edbe1ac0dd97 Mon Sep 17 00:00:00 2001 From: Dmitry Belyakin Date: Mon, 28 Mar 2022 15:45:35 +0300 Subject: [PATCH 5/9] [OMZ]: update submodule (#11239) * [OMZ]: update submodule * bump omz ver --- thirdparty/open_model_zoo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index 7c5acb97be0..8841555105e 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit 7c5acb97be00bf0bc0a1973e47e5b09eba2667b4 +Subproject commit 8841555105e261e05e3b949e60b7260c74c6410e From 8b591c141ebe08e72389ecb52c6200dc8027ce6f Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Mon, 28 Mar 2022 17:15:32 +0200 Subject: [PATCH 6/9] Update installing-openvino-overview.md (#11271) --- .../installing-openvino-overview.md | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/docs/install_guides/installing-openvino-overview.md b/docs/install_guides/installing-openvino-overview.md index 6edfaf528df..045fbbc1a8f 100644 --- a/docs/install_guides/installing-openvino-overview.md +++ b/docs/install_guides/installing-openvino-overview.md @@ -1,20 +1,19 @@ # Overview {#openvino_docs_install_guides_overview} -Intel® Distribution of OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. It accelerates applications with high-performance, AI and deep learning inference deployed from edge to cloud by: +Intel® Distribution of OpenVINO™ toolkit is a comprehensive toolkit for developing applications and solutions based on deep learning tasks, such as: emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, etc. It provides high-performance and rich deployment options, from edge to cloud. Some of its advantages are: * Enabling CNN-based deep learning inference on the edge. -* Supporting heterogeneous execution across Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs. +* Supporting various execution modes across Intel® technologies: Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs. * Speeding time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels. ## Installation Options -From the 2022.1 release, the OpenVINO installation package has been separated into two parts: OpenVINO Runtime and OpenVINO Development Tools. See the following instructions to decide your installation process. - +Since the 2022.1 release, the OpenVINO installation package has been distributed in two parts: OpenVINO Runtime and OpenVINO Development Tools. See the following instructions to choose your installation process. ### Decide What to Install -**If you have already finished your model development and want to deploy your applications on various devices, [install OpenVINO Runtime](installing-openvino-runtime.md)**, which contains a set of libraries for an easy inference integration into your applications and supports heterogeneous execution across Intel® CPU and Intel® GPU hardware. +**If you have already finished your model development and want to deploy your applications on various devices, [install OpenVINO Runtime](installing-openvino-runtime.md)**, which contains a set of libraries for easy inference integration with your products. -**If you want to download model from [Open Model Zoo](../model_zoo.md), convert to [OpenVINO IR](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md), [optimize](../optimization_guide/model_optimization_guide.md) and tune pre-trained deep learning models**, [install OpenVINO Development Tools](installing-model-dev-tools.md), which provides the following tools: +**If you want to download models from [Open Model Zoo](../model_zoo.md), [convert your own models to OpenVINO IR](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md), or [optimize and tune pre-trained deep learning models](../optimization_guide/model_optimization_guide.md)**, [install OpenVINO Development Tools](installing-model-dev-tools.md), which provides the following tools: * Model Optimizer * Post-Training Optimization Tool @@ -25,15 +24,14 @@ From the 2022.1 release, the OpenVINO installation package has been separated in ### Choose Your Installation Method -For Python developers, you can [install OpenVINO from PyPI](installing-openvino-pip.md), which contains both OpenVINO Runtime and Development Tools and less steps. +For Python developers, you can [install OpenVINO from PyPI](installing-openvino-pip.md), which contains both OpenVINO Runtime and Development Tools, while requiring fewer steps. -For C++ developers, you may choose one of the following installation options to install OpenVINO Runtime on your specific operating system: +For C++ developers, you may choose one of the following installation options for OpenVINO Runtime on your specific operating system: -* Linux: You can install OpenVINO Runtime using an [Installer](installing-openvino-linux.md), [APT](installing-openvino-apt.md), [YUM](installing-openvino-yum.md), [Anaconda Cloud](installing-openvino-conda.md) or [Docker](installing-openvino-docker-linux.md). -* Windows: You can install OpenVINO Runtime using an [Installer](installing-openvino-windows.md), [Anaconda Cloud](installing-openvino-conda.md) or [Docker](installing-openvino-docker-windows.md). +* Linux: You can install OpenVINO Runtime using an [Installer](installing-openvino-linux.md), [APT](installing-openvino-apt.md), [YUM](installing-openvino-yum.md), [Anaconda Cloud](installing-openvino-conda.md), or [Docker](installing-openvino-docker-linux.md). +* Windows: You can install OpenVINO Runtime using an [Installer](installing-openvino-windows.md), [Anaconda Cloud](installing-openvino-conda.md), or [Docker](installing-openvino-docker-windows.md). * macOS: You can install OpenVINO Runtime using an [Installer](installing-openvino-macos.md) or [Anaconda Cloud](installing-openvino-conda.md). * [Raspbian OS](installing-openvino-raspbian.md). -> **NOTE**: From the 2022.1 release, OpenVINO Development Tools can **only** be installed via PyPI. See [Install OpenVINO Development Tools](installing-model-dev-tools.md) for detailed steps. - -Besides, the open source version is also available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino/). You can build it for supported platforms using the [OpenVINO Build Instructions](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). +> **NOTE**: With the introduction of the 2022.1 release, the OpenVINO Development Tools can be installed **only** via PyPI. See [Install OpenVINO Development Tools](installing-model-dev-tools.md) for detailed steps. +Source files are also available in the [OpenVINO toolkit GitHub repository](https://github.com/openvinotoolkit/openvino/), so you can build your own package for the supported platforms, as described in [OpenVINO Build Instructions](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). \ No newline at end of file From 19d0e5ba52419206ea61aac5bff104e454f617a2 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 28 Mar 2022 19:32:21 +0300 Subject: [PATCH 7/9] CMAKE: IE_VERSION => OpenVINO_VERSION (#11242) * IE_VERSION => OpenVINO_VERSION * Reverted installation of python unconditionally --- .../IEDevScriptsConfig.cmake | 4 +- .../frontends/frontends.cmake | 11 ++-- cmake/developer_package/packaging.cmake | 4 +- cmake/developer_package/version.cmake | 45 ++++++------- .../vs_version/vs_version.cmake | 6 +- .../InferenceEngineConfig-version.cmake.in | 6 +- ...renceEngineDeveloperPackageConfig.cmake.in | 15 +++-- .../templates/OpenVINOConfig-version.cmake.in | 6 +- cmake/templates/OpenVINOConfig.cmake.in | 4 +- .../OpenVINODeveloperPackageConfig.cmake.in | 21 +++---- install_build_dependencies.sh | 1 + samples/cpp/benchmark_app/CMakeLists.txt | 2 +- src/bindings/python/wheel/CMakeLists.txt | 4 +- src/cmake/install_tbb.cmake | 3 +- src/cmake/openvino.cmake | 63 ++++++++++--------- .../CMakeLists.txt | 10 +-- src/common/snippets/CMakeLists.txt | 3 +- src/common/transformations/CMakeLists.txt | 10 +-- src/core/CMakeLists.txt | 2 +- src/frontends/common/CMakeLists.txt | 5 +- tools/CMakeLists.txt | 16 ++--- tools/openvino_dev/CMakeLists.txt | 4 +- 22 files changed, 133 insertions(+), 112 deletions(-) diff --git a/cmake/developer_package/IEDevScriptsConfig.cmake b/cmake/developer_package/IEDevScriptsConfig.cmake index 29ec0fbf7af..f0d4777233e 100644 --- a/cmake/developer_package/IEDevScriptsConfig.cmake +++ b/cmake/developer_package/IEDevScriptsConfig.cmake @@ -14,8 +14,8 @@ set(CMAKE_MODULE_PATH "${IEDevScripts_DIR}") function(set_ci_build_number) set(repo_root "${CMAKE_SOURCE_DIR}") include(version) - foreach(var CI_BUILD_NUMBER IE_VERSION IE_VERSION_BUILD - IE_VERSION_MAJOR IE_VERSION_MINOR IE_VERSION_PATCH) + foreach(var CI_BUILD_NUMBER OpenVINO_VERSION OpenVINO_VERSION_BUILD + OpenVINO_VERSION_MAJOR OpenVINO_VERSION_MINOR OpenVINO_VERSION_PATCH) if(NOT DEFINED ${var}) message(FATAL_ERROR "${var} version component is not defined") endif() diff --git a/cmake/developer_package/frontends/frontends.cmake b/cmake/developer_package/frontends/frontends.cmake index 0deec4aacc4..0779ed286d6 100644 --- a/cmake/developer_package/frontends/frontends.cmake +++ b/cmake/developer_package/frontends/frontends.cmake @@ -82,10 +82,11 @@ unset(protobuf_installed CACHE) # # ov_add_frontend(NAME -# FILEDESCRIPTION -# [LINKABLE_FRONTEND] -# [SKIP_INSTALL] -# [PROTOBUF_LITE] +# FILEDESCRIPTION # used on Windows to describe DLL file +# [LINKABLE_FRONTEND] # whether we can use FE API directly or via FEM only +# [SKIP_INSTALL] # private frontend, not for end users +# [PROTOBUF_LITE] # requires only libprotobuf-lite +# [SKIP_NCC_STYLE] # use custom NCC rules # [LINK_LIBRARIES ]) # macro(ov_add_frontend) @@ -242,7 +243,7 @@ macro(ov_add_frontend) endif() if(OV_FRONTEND_LINKABLE_FRONTEND) - # install -dev part + # install library development files install(DIRECTORY ${${TARGET_NAME}_INCLUDE_DIR}/openvino DESTINATION ${FRONTEND_INSTALL_INCLUDE}/ COMPONENT core_dev diff --git a/cmake/developer_package/packaging.cmake b/cmake/developer_package/packaging.cmake index cc287ff1f3b..458ddade93a 100644 --- a/cmake/developer_package/packaging.cmake +++ b/cmake/developer_package/packaging.cmake @@ -69,8 +69,8 @@ macro(ie_cpack) endif() foreach(ver IN LISTS MAJOR MINOR PATCH) - if(DEFINED IE_VERSION_${ver}) - set(CPACK_PACKAGE_VERSION_${ver} ${IE_VERSION_${ver}}) + if(DEFINED OpenVINO_VERSION_${ver}) + set(CPACK_PACKAGE_VERSION_${ver} ${OpenVINO_VERSION_${ver}}) endif() endforeach() diff --git a/cmake/developer_package/version.cmake b/cmake/developer_package/version.cmake index c274de22b79..8567fde76ad 100644 --- a/cmake/developer_package/version.cmake +++ b/cmake/developer_package/version.cmake @@ -26,20 +26,22 @@ function (commitHash VAR) set (${VAR} ${GIT_COMMIT_HASH} PARENT_SCOPE) endfunction() -macro(ie_parse_ci_build_number) - set(IE_VERSION_BUILD 000) +macro(ov_parse_ci_build_number) + set(OpenVINO_VERSION_BUILD 000) + set(IE_VERSION_BUILD ${OpenVINO_VERSION_BUILD}) + if(CI_BUILD_NUMBER MATCHES "^([0-9]+)\.([0-9]+)\.([0-9]+)\-([0-9]+)\-.*") - set(IE_VERSION_MAJOR ${CMAKE_MATCH_1}) - set(IE_VERSION_MINOR ${CMAKE_MATCH_2}) - set(IE_VERSION_PATCH ${CMAKE_MATCH_3}) - set(IE_VERSION_BUILD ${CMAKE_MATCH_4}) + set(OpenVINO_VERSION_MAJOR ${CMAKE_MATCH_1}) + set(OpenVINO_VERSION_MINOR ${CMAKE_MATCH_2}) + set(OpenVINO_VERSION_PATCH ${CMAKE_MATCH_3}) + set(OpenVINO_VERSION_BUILD ${CMAKE_MATCH_4}) endif() if(NOT DEFINED repo_root) message(FATAL_ERROR "repo_root is not defined") endif() - macro(ie_get_hpp_version) + macro(ov_get_hpp_version) if(NOT DEFINED OpenVINO_SOURCE_DIR) return() endif() @@ -59,11 +61,12 @@ macro(ie_parse_ci_build_number) foreach(suffix MAJOR MINOR PATCH) set(ie_version_name "IE_VERSION_${suffix}") - set(ov_version_name "OPENVINO_VERSION_${suffix}") + set(ov_version_name "OpenVINO_VERSION_${suffix}") + set(ov_version_name_hpp "OPENVINO_VERSION_${suffix}") string(REGEX REPLACE ".+${ie_version_name}[ ]+([0-9]+).*" "\\1" ${ie_version_name}_HPP "${IE_VERSION_PARTS}") - string(REGEX REPLACE ".+${ov_version_name}[ ]+([0-9]+).*" "\\1" + string(REGEX REPLACE ".+${ov_version_name_hpp}[ ]+([0-9]+).*" "\\1" ${ov_version_name}_HPP "${OV_VERSION_PARTS}") if(NOT ${ie_version_name}_HPP EQUAL ${ov_version_name}_HPP) @@ -72,26 +75,26 @@ macro(ie_parse_ci_build_number) endif() endforeach() - set(ie_hpp_version_is_found ON) + set(ov_hpp_version_is_found ON) endmacro() - # detect OpenVINO version via ie_version.hpp - ie_get_hpp_version() + # detect OpenVINO version via openvino/core/version.hpp and ie_version.hpp + ov_get_hpp_version() - if(ie_hpp_version_is_found) - foreach(var IE_VERSION_MAJOR IE_VERSION_MINOR IE_VERSION_PATCH) + if(ov_hpp_version_is_found) + foreach(var OpenVINO_VERSION_MAJOR OpenVINO_VERSION_MINOR OpenVINO_VERSION_PATCH) if(DEFINED ${var} AND NOT ${var} EQUAL ${var}_HPP) message(FATAL_ERROR "${var} parsed from CI_BUILD_NUMBER (${${var}}) \ - and from ie_version.hpp (${${var}_HPP}) are different") + and from openvino/core/version.hpp (${${var}_HPP}) are different") else() - # CI_BUILD_NUMBER is not defined well, take info from ie_verison.hpp as a baseline + # CI_BUILD_NUMBER is not defined well, take info from openvino/core/version.hpp as a baseline set(${var} ${${var}_HPP}) endif() endforeach() endif() - set(IE_VERSION "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH}") - message(STATUS "OpenVINO version is ${IE_VERSION}") + set(OpenVINO_VERSION "${OpenVINO_VERSION_MAJOR}.${OpenVINO_VERSION_MINOR}.${OpenVINO_VERSION_PATCH}") + message(STATUS "OpenVINO version is ${OpenVINO_VERSION} (Build ${OpenVINO_VERSION_BUILD})") endmacro() if (DEFINED ENV{CI_BUILD_NUMBER}) @@ -104,10 +107,10 @@ else() set(CI_BUILD_NUMBER "${custom_build}") endif() -# provides Inference Engine version +# provides OpenVINO version # 1. If CI_BUILD_NUMBER is defined, parses this information -# 2. Otherwise, parses ie_version.hpp -ie_parse_ci_build_number() +# 2. Otherwise, parses openvino/core/version.hpp +ov_parse_ci_build_number() macro (addVersionDefines FILE) set(__version_file ${FILE}) diff --git a/cmake/developer_package/vs_version/vs_version.cmake b/cmake/developer_package/vs_version/vs_version.cmake index ca00da1a281..9d821fd512a 100644 --- a/cmake/developer_package/vs_version/vs_version.cmake +++ b/cmake/developer_package/vs_version/vs_version.cmake @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 # -set(IE_VS_VER_FILEVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0") -set(IE_VS_VER_PRODUCTVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0") -set(IE_VS_VER_FILEVERSION_STR "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH}.0") +set(IE_VS_VER_FILEVERSION_QUAD "${OpenVINO_VERSION_MAJOR},${OpenVINO_VERSION_MINOR},${OpenVINO_VERSION_PATCH},${OpenVINO_VERSION_BUILD}") +set(IE_VS_VER_PRODUCTVERSION_QUAD "${OpenVINO_VERSION_MAJOR},${OpenVINO_VERSION_MINOR},${OpenVINO_VERSION_PATCH},${OpenVINO_VERSION_BUILD}") +set(IE_VS_VER_FILEVERSION_STR "${OpenVINO_VERSION_MAJOR}.${OpenVINO_VERSION_MINOR}.${OpenVINO_VERSION_PATCH}.${OpenVINO_VERSION_BUILD}") set(IE_VS_VER_COMPANY_NAME_STR "Intel Corporation") set(IE_VS_VER_PRODUCTVERSION_STR "${CI_BUILD_NUMBER}") diff --git a/cmake/templates/InferenceEngineConfig-version.cmake.in b/cmake/templates/InferenceEngineConfig-version.cmake.in index 8d7743687b8..631393d0ba1 100644 --- a/cmake/templates/InferenceEngineConfig-version.cmake.in +++ b/cmake/templates/InferenceEngineConfig-version.cmake.in @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 # -set(PACKAGE_VERSION_MAJOR @IE_VERSION_MAJOR@) -set(PACKAGE_VERSION_MINOR @IE_VERSION_MINOR@) -set(PACKAGE_VERSION_PATCH @IE_VERSION_PATCH@) +set(PACKAGE_VERSION_MAJOR @OpenVINO_VERSION_MAJOR@) +set(PACKAGE_VERSION_MINOR @OpenVINO_VERSION_MINOR@) +set(PACKAGE_VERSION_PATCH @OpenVINO_VERSION_PATCH@) set(PACKAGE_VERSION "${PACKAGE_VERSION_MAJOR}.${PACKAGE_VERSION_MINOR}.${PACKAGE_VERSION_PATCH}") set(PACKAGE_VERSION_EXACT False) diff --git a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in index bcec89b34a0..de3109b4b14 100644 --- a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in +++ b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in @@ -12,8 +12,9 @@ set_and_check(OpenVINO_MAIN_SOURCE_DIR "@OpenVINO_SOURCE_DIR@") # KMB # Variables to export in plugin's projects -set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH") -list(APPEND ie_options CMAKE_CXX_COMPILER_LAUNCHER CMAKE_C_COMPILER_LAUNCHER) +set(ie_options "@IE_OPTIONS@") +list(APPEND ie_options CMAKE_CXX_COMPILER_LAUNCHER CMAKE_C_COMPILER_LAUNCHER + CMAKE_BUILD_TYPE CMAKE_SKIP_RPATH CMAKE_INSTALL_PREFIX) file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path) message(STATUS "The following CMake options are exported from Inference Engine Developer package") @@ -86,13 +87,11 @@ endif() # Extra Compile Flags # -if(NOT MSVC) +if(CMAKE_COMPILER_IS_GNUCXX) ie_add_compiler_flags(-Wno-error=unused-variable) - if(CMAKE_COMPILER_IS_GNUCXX) - ie_add_compiler_flags(-Wno-error=unused-but-set-variable) - if(SUGGEST_OVERRIDE_SUPPORTED) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override") - endif() + ie_add_compiler_flags(-Wno-error=unused-but-set-variable) + if(SUGGEST_OVERRIDE_SUPPORTED) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override") endif() endif() diff --git a/cmake/templates/OpenVINOConfig-version.cmake.in b/cmake/templates/OpenVINOConfig-version.cmake.in index 8e38540abfb..88aec38e9e9 100644 --- a/cmake/templates/OpenVINOConfig-version.cmake.in +++ b/cmake/templates/OpenVINOConfig-version.cmake.in @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 # -set(PACKAGE_VERSION_MAJOR @IE_VERSION_MAJOR@) -set(PACKAGE_VERSION_MINOR @IE_VERSION_MINOR@) -set(PACKAGE_VERSION_PATCH @IE_VERSION_PATCH@) +set(PACKAGE_VERSION_MAJOR @OpenVINO_VERSION_MAJOR@) +set(PACKAGE_VERSION_MINOR @OpenVINO_VERSION_MINOR@) +set(PACKAGE_VERSION_PATCH @OpenVINO_VERSION_PATCH@) set(PACKAGE_VERSION "${PACKAGE_VERSION_MAJOR}.${PACKAGE_VERSION_MINOR}.${PACKAGE_VERSION_PATCH}") set(PACKAGE_VERSION_EXACT False) diff --git a/cmake/templates/OpenVINOConfig.cmake.in b/cmake/templates/OpenVINOConfig.cmake.in index 00e892f2b1d..2c755836a77 100644 --- a/cmake/templates/OpenVINOConfig.cmake.in +++ b/cmake/templates/OpenVINOConfig.cmake.in @@ -12,6 +12,7 @@ # * `Runtime`: OpenVINO C++ and C Core & Inference Runtime, frontend common # * `ONNX`: OpenVINO ONNX frontend # * `Paddle`: OpenVINO Paddle frontend +# * `TF`: OpenVINO TensorFlow frontend # # If no components are specified, `Runtime` component is provided: # @@ -184,7 +185,8 @@ if(NOT TARGET openvino) set(_ov_as_external_package ON) include("${CMAKE_CURRENT_LIST_DIR}/OpenVINOTargets.cmake") - # TODO: WA for cmake version < 3.16 + # WA for cmake version < 3.16 which does not export + # IMPORTED_LINK_DEPENDENT_LIBRARIES_** properties if no PUBLIC dependencies for the library if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBB_FOUND) foreach (type RELEASE DEBUG RELWITHDEBINFO MINSIZEREL) set_property(TARGET openvino::runtime APPEND PROPERTY IMPORTED_LINK_DEPENDENT_LIBRARIES_${type} "TBB::tbb;TBB::tbbmalloc") diff --git a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in index 5f7603e2abf..1f8a7b77633 100644 --- a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in +++ b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in @@ -10,13 +10,14 @@ set_and_check(OpenVINO_SOURCE_DIR "@OpenVINO_SOURCE_DIR@") # Variables to export in plugin's projects -set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH") -list(APPEND ie_options CMAKE_CXX_COMPILER_LAUNCHER CMAKE_C_COMPILER_LAUNCHER) +set(ov_options "@IE_OPTIONS@") +list(APPEND ov_options CMAKE_CXX_COMPILER_LAUNCHER CMAKE_C_COMPILER_LAUNCHER + CMAKE_BUILD_TYPE CMAKE_SKIP_RPATH CMAKE_INSTALL_PREFIX) file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path) message(STATUS "The following CMake options are exported from OpenVINO Developer package") message("") -foreach(option IN LISTS ie_options) +foreach(option IN LISTS ov_options) if(NOT DEFINED "${option}") load_cache("${cache_path}" READ_WITH_PREFIX "" ${option}) endif() @@ -51,10 +52,10 @@ endforeach() if(ENABLE_SYSTEM_PUGIXML) find_dependency(PugiXML) set_property(TARGET pugixml PROPERTY IMPORTED_GLOBAL TRUE) - add_library(IE::pugixml ALIAS pugixml) + add_library(openvino::pugixml ALIAS pugixml) endif() -# inherit OpenCV from main IE project if enabled +# inherit OpenCV from main OpenVINO project if enabled if ("@OpenCV_FOUND@") load_cache("${cache_path}" READ_WITH_PREFIX "" OpenCV_DIR) find_dependency(OpenCV) @@ -64,13 +65,11 @@ endif() # Extra Compile Flags # -if(NOT MSVC) +if(CMAKE_COMPILER_IS_GNUCXX) ie_add_compiler_flags(-Wno-error=unused-variable) - if(CMAKE_COMPILER_IS_GNUCXX) - ie_add_compiler_flags(-Wno-error=unused-but-set-variable) - if(SUGGEST_OVERRIDE_SUPPORTED) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override") - endif() + ie_add_compiler_flags(-Wno-error=unused-but-set-variable) + if(SUGGEST_OVERRIDE_SUPPORTED) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override") endif() endif() diff --git a/install_build_dependencies.sh b/install_build_dependencies.sh index 33f4145096d..527f56980aa 100755 --- a/install_build_dependencies.sh +++ b/install_build_dependencies.sh @@ -64,6 +64,7 @@ if [ -f /etc/lsb-release ]; then libgstreamer1.0-0 \ gstreamer1.0-plugins-base \ libusb-1.0-0-dev \ + libtinfo5 \ libopenblas-dev if apt-cache search --names-only '^libjson-c2'| grep -q libjson-c2; then sudo -E apt-get install -y libjson-c2 diff --git a/samples/cpp/benchmark_app/CMakeLists.txt b/samples/cpp/benchmark_app/CMakeLists.txt index d25561b4da2..39f7bf890e2 100644 --- a/samples/cpp/benchmark_app/CMakeLists.txt +++ b/samples/cpp/benchmark_app/CMakeLists.txt @@ -49,7 +49,7 @@ if(OpenCL_FOUND AND OpenCL_HEADERS) target_compile_definitions(${TARGET_NAME} PRIVATE HAVE_GPU_DEVICE_MEM_SUPPORT) endif() -# Optional OpenCV dependnency +# Optional OpenCV dependency find_package(OpenCV COMPONENTS core QUIET) if(NOT OpenCV_FOUND) diff --git a/src/bindings/python/wheel/CMakeLists.txt b/src/bindings/python/wheel/CMakeLists.txt index 0daee5cf0b7..0402dc67af0 100644 --- a/src/bindings/python/wheel/CMakeLists.txt +++ b/src/bindings/python/wheel/CMakeLists.txt @@ -4,8 +4,8 @@ find_package(PythonInterp 3 REQUIRED) set(PYTHON_VERSION python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}) -set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE) -set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE) +set(WHEEL_VERSION "${OpenVINO_VERSION}" CACHE STRING "Version of this release" FORCE) +set(WHEEL_BUILD "${OpenVINO_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE) set(PYTHON_BRIDGE_CPACK_PATH "python") set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}) set(TBB_LIBS_DIR runtime/3rdparty/tbb/lib) diff --git a/src/cmake/install_tbb.cmake b/src/cmake/install_tbb.cmake index e2203501901..6e1ab3f3982 100644 --- a/src/cmake/install_tbb.cmake +++ b/src/cmake/install_tbb.cmake @@ -17,7 +17,7 @@ endif() # install TBB -# define variables for InferenceEngineConfig.cmake +# define variables for OpenVINOConfig.cmake if(THREADING MATCHES "^(TBB|TBB_AUTO)$") set(IE_TBB_DIR "${TBB_DIR}") list(APPEND PATH_VARS "IE_TBB_DIR") @@ -58,6 +58,7 @@ if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND TBBROOT MATCHES ${TEMP}) COMPONENT tbb_dev) endif() +# install tbbbind for static OpenVINO case if(install_tbbbind) install(DIRECTORY "${TBBBIND_2_5}/lib" DESTINATION runtime/3rdparty/tbb_bind_2_5 diff --git a/src/cmake/openvino.cmake b/src/cmake/openvino.cmake index 5541b00af8c..ea9d966629e 100644 --- a/src/cmake/openvino.cmake +++ b/src/cmake/openvino.cmake @@ -4,23 +4,27 @@ set(TARGET_NAME openvino) -add_library(${TARGET_NAME} $ - $ - $ - $ - $ - ) +# +# Add openvino library +# + +add_library(${TARGET_NAME} + $ + $ + $ + $ + $) add_library(openvino::runtime ALIAS ${TARGET_NAME}) set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME runtime) ie_add_vs_version_file(NAME ${TARGET_NAME} FILEDESCRIPTION "OpenVINO runtime library") ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) -target_include_directories(${TARGET_NAME} PUBLIC $ - $ - $ - $ -) +target_include_directories(${TARGET_NAME} PUBLIC + $ + $ + $ + $) target_link_libraries(${TARGET_NAME} PRIVATE ngraph_reference ngraph_builders @@ -61,20 +65,25 @@ install(TARGETS ${TARGET_NAME} EXPORT OpenVINOTargets INCLUDES DESTINATION runtime/include runtime/include/ie) -# --------------- OpenVINO runtime library dev ------------------------------ +# +# Add openvin::dev target +# + add_library(${TARGET_NAME}_dev INTERFACE) -target_include_directories(${TARGET_NAME}_dev INTERFACE $ - $ - $ - $ - $ - ) +add_library(openvino::runtime::dev ALIAS ${TARGET_NAME}_dev) + +target_include_directories(${TARGET_NAME}_dev INTERFACE + $ + $ + $ + $ + $) target_compile_definitions(${TARGET_NAME}_dev INTERFACE $) target_link_libraries(${TARGET_NAME}_dev INTERFACE ${TARGET_NAME} pugixml::static openvino::itt openvino::util) -add_library(openvino::runtime::dev ALIAS ${TARGET_NAME}_dev) + set_ie_threading_interface_for(${TARGET_NAME}_dev) set_target_properties(${TARGET_NAME}_dev PROPERTIES EXPORT_NAME runtime::dev) @@ -82,13 +91,16 @@ openvino_developer_export_targets(COMPONENT core TARGETS ${TARGET_NAME}_dev) # Install static libraries for case BUILD_SHARED_LIBS=OFF ov_install_static_lib(${TARGET_NAME}_dev core) -# --------------- OpenVINO runtime library dev ------------------------------ - +# # Install OpenVINO runtime +# -list(APPEND PATH_VARS "IE_INCLUDE_DIR" "OV_CORE_DIR" - "IE_PARALLEL_CMAKE") +list(APPEND PATH_VARS "IE_INCLUDE_DIR") + +if(ENABLE_INTEL_GNA) + list(APPEND PATH_VARS "GNA_PATH") +endif() ie_cpack_add_component(core REQUIRED DEPENDS ${core_components}) ie_cpack_add_component(core_dev REQUIRED DEPENDS core ${core_dev_components}) @@ -117,10 +129,8 @@ install(EXPORT OpenVINOTargets DESTINATION runtime/cmake COMPONENT core_dev) -set(OV_CORE_DIR "${CMAKE_BINARY_DIR}/src/core") set(PUBLIC_HEADERS_DIR "${OpenVINO_SOURCE_DIR}/src/inference/include") set(IE_INCLUDE_DIR "${PUBLIC_HEADERS_DIR}/ie") -set(IE_PARALLEL_CMAKE "${OpenVINO_SOURCE_DIR}/src/cmake/ie_parallel.cmake") configure_package_config_file("${OpenVINO_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in" "${CMAKE_BINARY_DIR}/InferenceEngineConfig.cmake" @@ -133,10 +143,8 @@ configure_package_config_file("${OpenVINO_SOURCE_DIR}/cmake/templates/OpenVINOCo PATH_VARS ${PATH_VARS}) set(IE_INCLUDE_DIR "include/ie") -set(OV_CORE_DIR ".") set(IE_TBB_DIR "${IE_TBB_DIR_INSTALL}") set(IE_TBBBIND_DIR "${IE_TBBBIND_DIR_INSTALL}") -set(IE_PARALLEL_CMAKE "cmake/ie_parallel.cmake") set(GNA_PATH "../${IE_CPACK_RUNTIME_PATH}") if(WIN32) set(GNA_PATH "../${IE_CPACK_LIBRARY_PATH}/../Release") @@ -159,7 +167,6 @@ configure_file("${OpenVINO_SOURCE_DIR}/cmake/templates/OpenVINOConfig-version.cm install(FILES "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake" "${CMAKE_BINARY_DIR}/InferenceEngineConfig-version.cmake" - "${OpenVINO_SOURCE_DIR}/src/cmake/ie_parallel.cmake" DESTINATION runtime/cmake COMPONENT core_dev) diff --git a/src/common/low_precision_transformations/CMakeLists.txt b/src/common/low_precision_transformations/CMakeLists.txt index 972d306700a..8e534fd9b15 100644 --- a/src/common/low_precision_transformations/CMakeLists.txt +++ b/src/common/low_precision_transformations/CMakeLists.txt @@ -33,14 +33,12 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE $> $>) +# LTO + +set_target_properties(${TARGET_NAME}_obj PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) + # developer package openvino_developer_export_targets(COMPONENT core_legacy TARGETS ${TARGET_NAME}) diff --git a/src/common/snippets/CMakeLists.txt b/src/common/snippets/CMakeLists.txt index 3850a74c52e..272cb0715ea 100644 --- a/src/common/snippets/CMakeLists.txt +++ b/src/common/snippets/CMakeLists.txt @@ -36,8 +36,9 @@ add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) ie_mark_target_as_cc(${TARGET_NAME}) # LTO + set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) - # install + ov_install_static_lib(${TARGET_NAME} core) diff --git a/src/common/transformations/CMakeLists.txt b/src/common/transformations/CMakeLists.txt index 5e2c2b339d1..c8c6d878775 100644 --- a/src/common/transformations/CMakeLists.txt +++ b/src/common/transformations/CMakeLists.txt @@ -34,14 +34,12 @@ add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}_obj) ie_mark_target_as_cc(${TARGET_NAME}_obj) -# LTO - -set_target_properties(${TARGET_NAME}_obj PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) - if(NOT BUILD_SHARED_LIBS) target_compile_definitions(${TARGET_NAME}_obj PUBLIC OPENVINO_STATIC_LIBRARY) endif() +# INTERFACE library for BW compatibility + add_library(${TARGET_NAME} INTERFACE) target_include_directories(${TARGET_NAME} INTERFACE @@ -50,6 +48,10 @@ target_include_directories(${TARGET_NAME} INTERFACE target_link_libraries(${TARGET_NAME} INTERFACE openvino::runtime) +# LTO + +set_target_properties(${TARGET_NAME}_obj PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) + # developer package openvino_developer_export_targets(COMPONENT core_legacy TARGETS ${TARGET_NAME}) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index cc64b94ca32..86dd6c1edb3 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -125,7 +125,7 @@ configure_package_config_file(${OpenVINO_SOURCE_DIR}/cmake/templates/ngraphConfi INSTALL_DESTINATION cmake) write_basic_package_version_file(${CMAKE_BINARY_DIR}/ngraphConfigVersion.cmake - VERSION ${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH} + VERSION ${OpenVINO_VERSION_MAJOR}.${OpenVINO_VERSION_MINOR}.${OpenVINO_VERSION_PATCH} COMPATIBILITY SameMajorVersion) install(FILES ${CMAKE_BINARY_DIR}/ngraphConfig.cmake diff --git a/src/frontends/common/CMakeLists.txt b/src/frontends/common/CMakeLists.txt index f4633b0e7a1..31af221f6da 100644 --- a/src/frontends/common/CMakeLists.txt +++ b/src/frontends/common/CMakeLists.txt @@ -55,6 +55,8 @@ ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME}_obj ADDITIONAL_INCLUDE_DIRECTORIES $) +# INTERFACE library for BW compatibility + add_library(${TARGET_NAME} INTERFACE) target_link_libraries(${TARGET_NAME} INTERFACE openvino::runtime) target_include_directories(${TARGET_NAME} INTERFACE $ @@ -66,7 +68,8 @@ add_library(${TARGET_NAME}::static ALIAS ${TARGET_NAME}) openvino_developer_export_targets(COMPONENT core_legacy TARGETS ${TARGET_NAME}) -# Installation rules for shared version only +# Installation rules header files + install(DIRECTORY ${FRONTEND_INCLUDE_DIR}/openvino DESTINATION ${FRONTEND_INSTALL_INCLUDE} COMPONENT core_dev) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index c8638f7fd39..2fe0fd97425 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -25,18 +25,18 @@ install(DIRECTORY deployment_manager USE_SOURCE_PERMISSIONS) # outbound requirements.txt files for openvino-dev package -if (DEFINED ENV{CI_BUILD_DEV_TAG} AND NOT $ENV{CI_BUILD_DEV_TAG} STREQUAL "") - set(WHEEL_VERSION "${IE_VERSION}.$ENV{CI_BUILD_DEV_TAG}") +if(DEFINED ENV{CI_BUILD_DEV_TAG} AND NOT $ENV{CI_BUILD_DEV_TAG} STREQUAL "") + set(WHEEL_VERSION "${OpenVINO_VERSION}.$ENV{CI_BUILD_DEV_TAG}") else() - set(WHEEL_VERSION ${IE_VERSION}) + set(WHEEL_VERSION ${OpenVINO_VERSION}) endif() set(REQUIREMENTS_IN "${CMAKE_CURRENT_SOURCE_DIR}/requirements_dev.in") set(EXTRAS_LIST _ caffe kaldi mxnet onnx pytorch tensorflow tensorflow2) ie_cpack_add_component(openvino_dev_req_files) -foreach(EXTRAS ${EXTRAS_LIST}) - if(${EXTRAS} STREQUAL "_") +foreach(EXTRAS IN LISTS EXTRAS_LIST) + if(EXTRAS STREQUAL "_") set(REQUIREMENTS_OUT "requirements.txt") set(EXTRAS "") else() @@ -46,9 +46,9 @@ foreach(EXTRAS ${EXTRAS_LIST}) configure_file(${REQUIREMENTS_IN} ${REQUIREMENTS_OUT}) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${REQUIREMENTS_OUT} - DESTINATION tools - COMPONENT openvino_dev_req_files - EXCLUDE_FROM_ALL) + DESTINATION tools + COMPONENT openvino_dev_req_files + EXCLUDE_FROM_ALL) endforeach() add_subdirectory(mo) diff --git a/tools/openvino_dev/CMakeLists.txt b/tools/openvino_dev/CMakeLists.txt index 99119d57139..f7c310ce7c1 100644 --- a/tools/openvino_dev/CMakeLists.txt +++ b/tools/openvino_dev/CMakeLists.txt @@ -2,8 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 # -set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE) -set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE) +set(WHEEL_VERSION "${OpenVINO_VERSION}" CACHE STRING "Version of this release" FORCE) +set(WHEEL_BUILD "${OpenVINO_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE) set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py") set(openvino_wheel_name "openvino_dev-${WHEEL_VERSION}-${WHEEL_BUILD}-py3-none-any.whl") From a925ec6a29b1873aaaa8d3504963000836a0136f Mon Sep 17 00:00:00 2001 From: Dmitrii Khurtin Date: Mon, 28 Mar 2022 19:48:08 +0300 Subject: [PATCH 8/9] changed symlink order of libgna (#11267) --- cmake/dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index e2f941fcd99..f2636136e26 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -276,8 +276,8 @@ if(ENABLE_INTEL_GNA) GNA_LIB_DIR libGNA_INCLUDE_DIRS libGNA_LIBRARIES_BASE_PATH) - set(GNA_VERSION "03.00.00.1455.0") - set(GNA_HASH "99891696269d8fa10116c96e6b7bda4362736881f0df8df8b56c751ee18e5820") + set(GNA_VERSION "03.00.00.1455.2") + set(GNA_HASH "e52785d3f730fefb4e794bb7ab40c8676537ef2f7c69c5b4bb89a5d3cc0bbe60") set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include) if(WIN32) From 30ec7366bba28a4a2692128b9e646046274cc648 Mon Sep 17 00:00:00 2001 From: Dmitry Belyakin Date: Mon, 28 Mar 2022 19:48:53 +0300 Subject: [PATCH 9/9] [OMZ]: update submodule (#11279) --- thirdparty/open_model_zoo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index 8841555105e..46db87e2c7b 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit 8841555105e261e05e3b949e60b7260c74c6410e +Subproject commit 46db87e2c7b1324ffccc472fc405582df220e032