32 bits support in Intel CPU plugin (#16900)

2023-04-19 22:10:20 +04:00 · 2023-04-19 22:10:20 +04:00 · 1ec22a3180
commit 1ec22a3180
parent fab8236af3
11 changed files with 80 additions and 27 deletions
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@ -6,7 +6,7 @@
 # Common cmake options
 #
-ie_dependent_option (ENABLE_INTEL_CPU "CPU plugin for OpenVINO Runtime" ON "RISCV64 OR X86 OR X86_64 OR AARCH64" OFF)
+ie_dependent_option (ENABLE_INTEL_CPU "CPU plugin for OpenVINO Runtime" ON "RISCV64 OR X86 OR X86_64 OR AARCH64 OR ARM" OFF)
 ie_dependent_option (ENABLE_ARM_COMPUTE_CMAKE "Enable ARM Compute build via cmake" OFF "ENABLE_INTEL_CPU" OFF)
--- a/src/plugins/intel_cpu/src/cpu_shape.h
+++ b/src/plugins/intel_cpu/src/cpu_shape.h
@ -19,24 +19,26 @@ public:
    Shape() = default;
    explicit Shape(const ov::PartialShape& shape) {
-        minDims = shape.get_min_shape();
+        if (!shape.rank().is_dynamic()) {
-        std::transform(minDims.begin(), minDims.end(), minDims.begin(), [](Dim x){ return ov::Interval::s_max == x ? UNDEFINED_DIM : x;});
+            const auto shape_rank = shape.rank().get_length();
-        maxDims = shape.get_max_shape();
+            minDims.reserve(shape_rank);
-        std::transform(maxDims.begin(), maxDims.end(), maxDims.begin(), [](Dim x){ return ov::Interval::s_max == x ? UNDEFINED_DIM : x;});
+            maxDims.reserve(shape_rank);
        type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
            for (const auto& d : shape) {
                minDims.push_back(d.get_min_length() == ov::Interval::s_max ? UNDEFINED_DIM : d.get_min_length());
                maxDims.push_back(d.get_max_length() == ov::Interval::s_max ? UNDEFINED_DIM : d.get_max_length());
            }
        }
        type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
        initDims();
        hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
    }
    explicit Shape(const VectorDims& shape) {
-        minDims = shape;
+        dims = minDims = maxDims = shape;
        maxDims = shape;
        type = ShapeType::Static;
        initDims();
        hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
    }
--- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp
+++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp
@ -441,8 +441,8 @@ void ExtractImagePatches::ExtractImagePatchesRefExecutor::executeReference(
    const std::vector<size_t> ostrides_partial = { ostrides[0], jpp.KW * IC * ostrides[1], IC * ostrides[1], ostrides[1] };
    parallel_for4d(OB, jpp.KH, jpp.KW, IC, [&](const size_t ob, const size_t kh, const size_t kw, const size_t ic) {
-        const int64_t iw_start = kw * RW - PL;
+        const int64_t iw_start = static_cast<int64_t>(kw * RW) - PL;
-        const int64_t ih_start = kh * RH - PT;
+        const int64_t ih_start = static_cast<int64_t>(kh * RH) - PT;
        const size_t ih_lpad = ih_start >= 0 ? 0 : std::ceil(-1.f * ih_start / jpp.SH);
        const size_t iw_lpad = iw_start >= 0 ? 0 : std::ceil(-1.f * iw_start / jpp.SW);
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/decompose_integer_divide.cpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/decompose_integer_divide.cpp
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/decompose_integer_divide.hpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/decompose_integer_divide.hpp
--- a/src/plugins/intel_cpu/src/transformations/defs.hpp
+++ b/src/plugins/intel_cpu/src/transformations/defs.hpp
@ -33,7 +33,24 @@ namespace intel_cpu {
 #define CPU_ENABLE_PASS_X64(MANAGER, PASS)
 #define CPU_SET_CALLBACK_X64(MANAGER, CALLBACK, ...)
-#endif
+#endif // OPENVINO_ARCH_X86_64
 #if defined(OPENVINO_ARCH_X86)
 #define CPU_REGISTER_PASS_X86(MANAGER, PASS, ...) CPU_REGISTER_PASS_COMMON(MANAGER, PASS, __VA_ARGS__)
 #define CPU_DISABLE_PASS_X86(MANAGER, PASS) CPU_DISABLE_PASS_COMMON(MANAGER, PASS)
 #define CPU_ENABLE_PASS_X86(MANAGER, PASS) CPU_ENABLE_PASS_COMMON(MANAGER, PASS)
 #define CPU_SET_CALLBACK_X86(MANAGER, CALLBACK, ...) CPU_SET_CALLBACK_COMMON(MANAGER, CALLBACK, __VA_ARGS__)
 #else
 #define CPU_REGISTER_PASS_X86(MANAGER, PASS, ...)
 #define CPU_DISABLE_PASS_X86(MANAGER, PASS)
 #define CPU_ENABLE_PASS_X86(MANAGER, PASS)
 #define CPU_SET_CALLBACK_X86(MANAGER, CALLBACK, ...)
 #endif // OPENVINO_ARCH_X86
 #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
@ -47,9 +64,9 @@ namespace intel_cpu {
 #define CPU_REGISTER_PASS_ARM(MANAGER, PASS, ...)
 #define CPU_DISABLE_PASS_ARM(MANAGER, PASS)
 #define CPU_ENABLE_PASS_ARM(MANAGER, PASS)
-#define CPU_SET_CALLBACK_ARM(MANAGER, CALLBACK, ...)
+#define CPU_SET_CALLBACK_ARM(MANAGER, CALLBACK, ...)\
-#endif
+#endif // OPENVINO_ARCH_ARM || OPENVINO_ARCH_ARM64
 }   // namespace intel_cpu
 }   // namespace ov
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@ -99,7 +99,7 @@
 #include "transformations/cpu_opset/arm/pass/convert_group_conv1d.hpp"
 #include "transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp"
 #include "transformations/cpu_opset/arm/pass/mish_decomposition.hpp"
-#include "transformations/cpu_opset/arm/pass/decompose_integer_divide.hpp"
+#include "transformations/cpu_opset/common/pass/decompose_integer_divide.hpp"
 #include "transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.hpp"
 #include "transformations/cpu_opset/common/pass/move_eltwise_up_data_movement.hpp"
 #include "transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp"
@ -266,6 +266,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
    // The plugin computes Divide in floating point precision.
    // To preserve correct math for integer division we need to insert explicit Floor operation.
    CPU_REGISTER_PASS_ARM(manager, DecomposeIntegerDivide);
    CPU_REGISTER_PASS_X86(manager, DecomposeIntegerDivide);
    // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
    CPU_SET_CALLBACK_COMMON(manager,
--- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt
+++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt
@ -16,7 +16,7 @@ if (ENABLE_OV_ONNX_FRONTEND)
 else()
    set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/extension ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
 endif()
-if(ARM OR AARCH64)
+if(NOT X86_64)
    list(APPEND EXCLUDED_SOURCE_PATHS
        ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests
        ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@ -184,12 +184,37 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*UniqueLayerTestCPU.*axis.*True.*)",
    };
 #if defined(OPENVINO_ARCH_X86)
    retVector.emplace_back(R"(ONNXQuantizedModels/QuantizedModelsTests.*)");
    {
        // TODO: generate new 'expected' runtime graph for x86 CPU
        retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
        retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
    }
    retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.CompareWithRefs.*)");
    retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)");
    retVector.emplace_back(R"(.*convolution_backprop_quantize_type.*)");
    retVector.emplace_back(R"(.*DetectionOutputLayerTest.*)");
    // WIP: plugin cannot be loaded for some reason
    retVector.emplace_back(R"(.*HeteroSyntheticTest.*)");
    retVector.emplace_back(R"(.*IEClassBasicTestP.*)");
    // int8 / code-generation specific
    retVector.emplace_back(R"(smoke_LPT.*)");
    retVector.emplace_back(R"(smoke_Snippets.*)");
 #endif
 #if defined(OPENVINO_ARCH_ARM)
    retVector.emplace_back(R"(smoke_If/SimpleIfNotConstConditionAndDimsIncreaseTest.*)");
 #endif
 #if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM)
    retVector.emplace_back(R"(OVClassBasicPropsTest.smoke_SetConfigAffinity.*)");
    retVector.emplace_back(R"(ONNXQuantizedModels/QuantizedModelsTests.*)");
-    // TODO: generate new 'expected' runtime graph for CPU ARM
+    {
-    retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
+        // TODO: generate new 'expected' runtime graph for CPU ARM
-    retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
+        retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
        retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
    }
    {
        // TODO: enable once streams / tput mode is supported
        retVector.emplace_back(R"(OVClassConfigTestCPU.smoke_Check(Model|Core)StreamsHasHigherPriorityThanLatencyHint.*)");
@ -200,17 +225,23 @@ std::vector<std::string> disabledTestPatterns() {
        retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckSecondaryPropertiesTest.*)");
        retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckWithSecondaryPropertiesDoubleTest.*)");
    }
    retVector.emplace_back(R"(smoke_LPT.*)");
    retVector.emplace_back(R"(smoke_Decomposition_(3|4)D/Mvn6LayerTest.CompareWithRefs.*)");
    retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_CeilRounding/PoolingLayerTest.CompareWithRefs.*)");
    retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.CompareWithRefs.*)");
    retVector.emplace_back(R"(smoke_TestsSelect_numpy/SelectLayerTest.CompareWithRefImpl/COND=BOOL.*)");
    retVector.emplace_back(R"(smoke_Snippets.*)");
    retVector.emplace_back(R"(smoke_Quantized.*)");
    retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)");
    retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)");
    retVector.emplace_back(R"(smoke_If/SimpleIfTest.CompareWithRefs.*)");
    retVector.emplace_back(R"(smoke_If/SimpleIfNotConstConditionTest.CompareWithRefs.*)");
    // invalid test: checks u8 precision for runtime graph, while it should be f32
    retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)");
    // int8 / code-generation specific
    retVector.emplace_back(R"(smoke_LPT.*)");
    retVector.emplace_back(R"(smoke_Snippets.*)");
    retVector.emplace_back(R"(smoke_Quantized.*)");
 #endif
 #if !defined(OPENVINO_ARCH_X86_64)
    // very time-consuming test
    retVector.emplace_back(R"(.*OVInferConsistencyTest.*)");
 #endif
 #if defined(_WIN32) || defined(_WIN64)
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@ -1 +1 @@
-Subproject commit 478d6ef239027651a0c3843ac9c3c1d444afb53f
+Subproject commit 11e62a6f5077b1e44e83096db4b47274eee65273
--- a/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/infer_correctness.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/infer_correctness.cpp
@ -183,10 +183,12 @@ void OVInferConsistencyTest::FillInput(InferContext& inferContext, int index) {
 }
 TEST_P(OVInferConsistencyTest, Infer) {
    SKIP_IF_CURRENT_TEST_IS_DISABLED();
    InferCheck(true);
 }
 TEST_P(OVInferConsistencyTest, AsyncInfer) {
    SKIP_IF_CURRENT_TEST_IS_DISABLED();
    InferCheck(false);
 }
		`@ -1 +1 @@`
			`Subproject commit 478d6ef239027651a0c3843ac9c3c1d444afb53f`				`Subproject commit 11e62a6f5077b1e44e83096db4b47274eee65273`