Generic ARM fixes (#16994)

2023-04-17 20:37:10 +04:00
parent f9098cd67c
commit f4fe8400a7
16 changed files with 65 additions and 60 deletions
--- a/src/common/itt/include/openvino/itt.hpp
+++ b/src/common/itt/include/openvino/itt.hpp
@@ -9,11 +9,13 @@

 #pragma once

-#include <openvino/function_name.hpp>
-#include <openvino/util/pp.hpp>
+#include <cstdint>
 #include <string>
 #include <utility>

+#include "openvino/function_name.hpp"
+#include "openvino/util/pp.hpp"
+
 /** @ingroup ov_dev_profiling
  * @brief openvino namespace
  */
--- a/src/common/preprocessing/src/ie_preprocess_gapi_kernels.hpp
+++ b/src/common/preprocessing/src/ie_preprocess_gapi_kernels.hpp
@@ -8,6 +8,7 @@
 # error non standalone GAPI
 # endif

+#include <cstdint>
 #include <tuple>

 #include <opencv2/gapi/opencv_includes.hpp>
--- a/src/common/util/include/openvino/util/file_util.hpp
+++ b/src/common/util/include/openvino/util/file_util.hpp
@@ -4,6 +4,7 @@

 #pragma once

+#include <cstdint>
 #include <fstream>
 #include <functional>
 #include <string>
--- a/src/core/include/openvino/core/partial_shape.hpp
+++ b/src/core/include/openvino/core/partial_shape.hpp
@@ -157,7 +157,7 @@ public:
    ///        rank is dynamic and `r` is static, updates this shape to have a rank of `r`
    ///        with dimensions all dynamic.
    /// \return `true` if this shape's rank is compatible with `r`, else `false`.
-    bool merge_rank(Rank r);
+    bool merge_rank(const Rank& r);

    /// \brief Convert a static PartialShape to a PartialShape.
    /// \return A new PartialShape `s` where `s[i] = size_t((*this)[i])`.
--- a/src/core/src/partial_shape.cpp
+++ b/src/core/src/partial_shape.cpp
@@ -97,6 +97,7 @@ ov::Shape ov::PartialShape::get_max_shape() const {
        return Shape();
    } else {
        Shape shape;
+        shape.reserve(rank().get_length());
        for (auto dimension : m_dimensions) {
            shape.push_back(dimension.get_interval().get_max_val());
        }
@@ -109,6 +110,7 @@ ov::Shape ov::PartialShape::get_min_shape() const {
        return Shape();
    } else {
        Shape shape;
+        shape.reserve(rank().get_length());
        for (auto dimension : m_dimensions) {
            shape.push_back(dimension.get_interval().get_min_val());
        }
@@ -119,6 +121,7 @@ ov::Shape ov::PartialShape::get_min_shape() const {
 ov::Shape ov::PartialShape::get_shape() const {
    NGRAPH_CHECK(rank().is_static(), "get_shape() must be called on a static shape");
    Shape shape;
+    shape.reserve(rank().get_length());
    for (auto dimension : m_dimensions) {
        auto min_val = dimension.get_interval().get_min_val();
        auto max_val = dimension.get_interval().get_max_val();
@@ -137,8 +140,9 @@ ov::PartialShape ov::operator+(const PartialShape& s1, const PartialShape& s2) {
        throw std::invalid_argument("rank mismatch");
    }

-    PartialShape result{};
+    PartialShape result;
    result.m_rank_is_static = true;
+    result.m_dimensions.reserve(s1.m_dimensions.size());
    for (size_t i = 0; i < s1.m_dimensions.size(); i++) {
        result.m_dimensions.push_back(s1.m_dimensions[i] + s2.m_dimensions[i]);
    }
@@ -206,13 +210,12 @@ bool ov::PartialShape::same_scheme(const PartialShape& s) const {
            return false;
        }

-        bool success = true;
-
        for (int64_t i = 0; i < rank().get_length(); i++) {
-            success &= (*this)[i].same_scheme(s[i]);
+            if (!m_dimensions[i].same_scheme(s.m_dimensions[i]))
+                return false;
        }

-        return success;
+        return true;
    } else {
        return false;
    }
@@ -222,13 +225,12 @@ bool ov::PartialShape::relaxes(const PartialShape& s) const {
    if (rank().is_dynamic()) {
        return true;
    } else if (s.rank().is_static() && rank().get_length() == s.rank().get_length()) {
-        bool all_relax = true;
-
        for (int64_t i = 0; i < rank().get_length(); i++) {
-            all_relax &= ((*this)[i].relaxes(s[i]));
+            if (!m_dimensions[i].relaxes(s.m_dimensions[i]))
+                return false;
        }

-        return all_relax;
+        return true;
    } else {
        return false;
    }
@@ -238,19 +240,18 @@ bool ov::PartialShape::refines(const PartialShape& s) const {
    if (s.rank().is_dynamic()) {
        return true;
    } else if (rank().is_static() && rank().get_length() == s.rank().get_length()) {
-        bool all_refine = true;
-
        for (int64_t i = 0; i < rank().get_length(); i++) {
-            all_refine &= ((*this)[i].refines(s[i]));
+            if (!m_dimensions[i].refines(s.m_dimensions[i]))
+                return false;
        }

-        return all_refine;
+        return true;
    } else {
        return false;
    }
 }

-bool ov::PartialShape::merge_rank(Rank r) {
+bool ov::PartialShape::merge_rank(const Rank& r) {
    if (r.is_dynamic()) {
        return true;
    } else if (!m_rank_is_static) {
--- a/src/plugins/intel_cpu/CMakeLists.txt
+++ b/src/plugins/intel_cpu/CMakeLists.txt
@@ -11,11 +11,13 @@ set(TARGET_NAME "openvino_intel_cpu_plugin")
 if(CMAKE_COMPILER_IS_GNUCXX)
    ie_add_compiler_flags(-Wno-sign-compare)
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    # C4267, 4244 issues from mkl-dnn headers conversion from 'XXX' to 'YYY', possible loss of data
+    # C4267, 4244 issues from oneDNN headers conversion from 'XXX' to 'YYY', possible loss of data
    ie_add_compiler_flags(/wd4267)
    ie_add_compiler_flags(/wd4244)
    # mkldnn headers: '<<': result of 32-bit shift implicitly converted to 64 bits
    ie_add_compiler_flags(/wd4334)
+    # oneDNN arm64: unary minus operator applied to unsigned type, result still unsigned
+    ie_add_compiler_flags(/wd4146)
 elseif(OV_COMPILER_IS_CLANG)
    ie_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
 endif()
@@ -66,7 +68,7 @@ if (NOT X86_64)
                      ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/snippets/x64/*)
 endif()

-if (NOT AARCH64)
+if (NOT (AARCH64 OR ARM))
    set(EXCLUDE_PATHS ${EXCLUDE_PATHS}
                      ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/cpu_opset/arm/*)
 endif()
@@ -87,7 +89,7 @@ ie_add_plugin(NAME ${TARGET_NAME}
 if(ARM OR AARCH64)
    set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME "openvino_arm_cpu_plugin")
 elseif(RISCV64)
-    set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME "openvino_risv_cpu_plugin")
+    set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME "openvino_riscv_cpu_plugin")
 endif()

 set_ie_threading_interface_for(${TARGET_NAME})
--- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp
+++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp
@@ -70,7 +70,7 @@ std::string DnnlMemoryDesc::serializeFormat() const {

 size_t DnnlMemoryDesc::getMaxMemSize() const {
    if (shape.isDynamic()) {
-        IE_THROW() << "Can't compute max mem size for DnnlMemoryDesc with dynaimc shape";
+        IE_THROW() << "Can't compute max mem size for DnnlMemoryDesc with dynamic shape";
    }

    return getCurrentMemSize();
--- a/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.cpp
+++ b/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.cpp
@@ -69,7 +69,7 @@ bool StaticShape::same_scheme(const StaticShape& s) const {
    return true;
 }

-bool StaticShape::merge_rank(Rank r) {
+bool StaticShape::merge_rank(const Rank& r) {
    if (r.is_dynamic()) {
        return true;
    } else {
--- a/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.hpp
+++ b/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.hpp
@@ -41,7 +41,7 @@ public:
    bool compatible(const StaticShape& s) const;
    bool same_scheme(const StaticShape& s) const;
    bool refines(const StaticShape& s) const;
-    bool merge_rank(Rank r);
+    bool merge_rank(const Rank& r);

    ov::Shape to_shape() const;
    PartialShape to_partial_shape() const;
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp
@@ -40,9 +40,9 @@ const std::vector<QuantRange> ranges_i32 = {
        { INT32_MIN, INT32_MAX }
 };

-const std::vector<size_t> levels_8 = {256};
-const std::vector<size_t> levels_16 = {65536};
-const std::vector<size_t> levels_32 = {4294967296};
+const std::vector<uint64_t> levels_8 = {256};
+const std::vector<uint64_t> levels_16 = {65536};
+const std::vector<uint64_t> levels_32 = {4294967296};
 const std::vector<QuantizationGranularity> granularity = {Pertensor};

 const auto quantParams_i8 = ::testing::Combine(
--- a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt
+++ b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt
@@ -47,50 +47,45 @@ function(ie_add_onednn)
        set(OpenMP_cmake_included ON) ## to skip "omp simd" inside a code. Lead to some crashes inside NDK LLVM..
    endif()

-    if(SUGGEST_OVERRIDE_SUPPORTED)
-        # xbyak compilation fails
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override")
-    endif()
    if(X86_64)
        set(DNNL_TARGET_ARCH "X64" CACHE STRING "" FORCE)
    elseif(X86)
        set(DNNL_TARGET_ARCH "X86" CACHE STRING "" FORCE)
    elseif(RISCV64)
        set(DNNL_TARGET_ARCH "RV64" CACHE STRING "" FORCE)
-    elseif(AARCH64)
-        set(DNNL_TARGET_ARCH "AARCH64" CACHE STRING "" FORCE)
-        set(DNNL_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
+    elseif(AARCH64 OR ARM)
        # TODO: fix warning
        if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
            ie_add_compiler_flags(-Wno-macro-redefined)
        endif()

-        # move to separate ACL cmake
-        if(APPLE)
-            # Apple M1 / M2 is assumed
-            set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv8.2-a)
+        set(ARM_COMPUTE_SCONS_JOBS "8" CACHE STRING "Number of parallel threads to build ARM Compute Library")
+
+        if(ARM)
+            set(DNNL_TARGET_ARCH "ARM" CACHE STRING "" FORCE)
+            set(DNNL_AARCH64_USE_ACL OFF CACHE BOOL "" FORCE)
+            set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv7a)
+            set(ARM_COMPUTE_TARGET_ARCHS armv7a)
        else()
-            set(ARM_COMPUTE_TARGET_ARCH_DEFAULT arm64-v8a)
+            set(DNNL_TARGET_ARCH "AARCH64" CACHE STRING "" FORCE)
+            set(DNNL_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
+            # move to separate ACL cmake
+            if(APPLE)
+                # Apple M1 / M2 is assumed
+                set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv8.2-a)
+            else()
+                set(ARM_COMPUTE_TARGET_ARCH_DEFAULT arm64-v8a)
+            endif()
+            set(ARM_COMPUTE_TARGET_ARCHS arm64-v8a arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2
+                                        armv8a armv8.2-a armv8.2-a-sve armv8.6-a armv8.6-a-sve armv8.6-a-sve2
+                                        armv8r64)
        endif()
-        set(ARM_COMPUTE_TARGET_ARCHS arm64-v8a arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2
-                                     armv8a armv8.2-a armv8.2-a-sve armv8.6-a armv8.6-a-sve armv8.6-a-sve2
-                                     armv8r64)

        set(ARM_COMPUTE_TARGET_ARCH "${ARM_COMPUTE_TARGET_ARCH_DEFAULT}" CACHE STRING "Architecture for ARM ComputeLibrary")
        set_property(CACHE ARM_COMPUTE_TARGET_ARCH PROPERTY STRINGS ${ARM_COMPUTE_TARGET_ARCHS})
    else()
        message(FATAL_ERROR "Unsupported system processor ${CMAKE_SYSTEM_PROCESSOR}")
    endif()
-    if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
-        ie_add_compiler_flags(-Wno-undef)
-        ie_add_compiler_flags(-Wno-missing-declarations)
-        if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12 AND CMAKE_COMPILER_IS_GNUCXX)
-            ie_add_compiler_flags(-Wno-error=array-bounds)
-            ie_add_compiler_flags(-Wno-error=stringop-overflow=)
-        endif()
-    elseif(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable=10121")
-    endif()

    # WA for old TBBConfig.cmake like tbb2019_20180718oss
    # they don't check that imported target is already created
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/quantized_mat_mul.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/quantized_mat_mul.hpp
@@ -4,6 +4,7 @@

 #pragma once

+#include <cstdint>
 #include <memory>
 #include <string>
 #include <tuple>
@@ -16,7 +17,7 @@ namespace SubgraphTestsDefinitions {
 typedef std::pair<float, float> QuantRange;

 typedef std::tuple<
-        size_t,
+        uint64_t,
        QuantRange,
        QuantRange,
        ngraph::helpers::QuantizationGranularity,
--- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp
@@ -4,6 +4,7 @@

 #pragma once

+#include <cstdint>
 #include <memory>
 #include <vector>
 #include <ngraph/ngraph.hpp>
@@ -17,7 +18,7 @@ public:
    FakeQuantizeOnData();

    FakeQuantizeOnData(
-        const size_t quantizationLevel,
+        const uint64_t quantizationLevel,
        const ngraph::Shape& constantShape,
        const std::vector<float>& inputLowValues,
        const std::vector<float>& inputHighValues,
@@ -31,7 +32,7 @@ public:
    bool isSigned() const;
    virtual bool empty() const;

-    size_t quantizationLevel;
+    uint64_t quantizationLevel;
    ngraph::Shape constantShape;
    std::vector<float> inputLowValues;
    std::vector<float> inputHighValues;
@@ -67,7 +68,7 @@ public:
    FakeQuantizeOnDataWithConstant();

    FakeQuantizeOnDataWithConstant(
-        const size_t quantizationLevel,
+        const uint64_t quantizationLevel,
        const std::vector<ngraph::Shape>& constantShapes,
        const std::vector<float>& inputLowValues,
        const std::vector<float>& inputHighValues,
@@ -80,7 +81,7 @@ public:

    virtual bool empty() const;

-    size_t quantizationLevel;
+    uint64_t quantizationLevel;
    std::vector<ngraph::Shape> constantShapes;
    std::vector<float> inputLowValues;
    std::vector<float> inputHighValues;
--- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_weights.hpp
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_weights.hpp
@@ -4,6 +4,7 @@

 #pragma once

+#include <cstdint>
 #include <memory>
 #include <vector>
 #include <ngraph/ngraph.hpp>
@@ -18,7 +19,7 @@ public:
    FakeQuantizeOnWeights();

    FakeQuantizeOnWeights(
-        const size_t quantizationLevel,
+        const uint64_t quantizationLevel,
        const ngraph::Shape& constantShape,
        const std::vector<float>& inputLowValues,
        const std::vector<float>& inputHighValues,
--- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp
@@ -12,7 +12,7 @@ namespace subgraph {
 FakeQuantizeOnData::FakeQuantizeOnData() : quantizationLevel(0) {}

 FakeQuantizeOnData::FakeQuantizeOnData(
-    const size_t quantizationLevel,
+    const uint64_t quantizationLevel,
    const ngraph::Shape& constantShape,
    const std::vector<float>& inputLowValues,
    const std::vector<float>& inputHighValues,
@@ -51,7 +51,7 @@ FakeQuantizeOnDataWithConstant::FakeQuantizeOnDataWithConstant() :
    outputPrecision(ngraph::element::undefined) {}

 FakeQuantizeOnDataWithConstant::FakeQuantizeOnDataWithConstant(
-    const size_t quantizationLevel,
+    const uint64_t quantizationLevel,
    const std::vector<ngraph::Shape>& constantShapes,
    const std::vector<float>& inputLowValues,
    const std::vector<float>& inputHighValues,
--- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_weights.cpp
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_weights.cpp
@@ -12,7 +12,7 @@ namespace subgraph {
 FakeQuantizeOnWeights::FakeQuantizeOnWeights() {}

 FakeQuantizeOnWeights::FakeQuantizeOnWeights(
-    const size_t quantizationLevel,
+    const uint64_t quantizationLevel,
    const ngraph::Shape& constantShape,
    const std::vector<float>& inputLowValues,
    const std::vector<float>& inputHighValues,