32 bits support in Intel CPU plugin (#16900)

This commit is contained in:
Ilya Lavrenov 2023-04-19 22:10:20 +04:00 committed by GitHub
parent fab8236af3
commit 1ec22a3180
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 80 additions and 27 deletions

View File

@ -6,7 +6,7 @@
# Common cmake options
#
ie_dependent_option (ENABLE_INTEL_CPU "CPU plugin for OpenVINO Runtime" ON "RISCV64 OR X86 OR X86_64 OR AARCH64" OFF)
ie_dependent_option (ENABLE_INTEL_CPU "CPU plugin for OpenVINO Runtime" ON "RISCV64 OR X86 OR X86_64 OR AARCH64 OR ARM" OFF)
ie_dependent_option (ENABLE_ARM_COMPUTE_CMAKE "Enable ARM Compute build via cmake" OFF "ENABLE_INTEL_CPU" OFF)

View File

@ -19,24 +19,26 @@ public:
Shape() = default;
explicit Shape(const ov::PartialShape& shape) {
minDims = shape.get_min_shape();
std::transform(minDims.begin(), minDims.end(), minDims.begin(), [](Dim x){ return ov::Interval::s_max == x ? UNDEFINED_DIM : x;});
maxDims = shape.get_max_shape();
std::transform(maxDims.begin(), maxDims.end(), maxDims.begin(), [](Dim x){ return ov::Interval::s_max == x ? UNDEFINED_DIM : x;});
type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
if (!shape.rank().is_dynamic()) {
const auto shape_rank = shape.rank().get_length();
minDims.reserve(shape_rank);
maxDims.reserve(shape_rank);
for (const auto& d : shape) {
minDims.push_back(d.get_min_length() == ov::Interval::s_max ? UNDEFINED_DIM : d.get_min_length());
maxDims.push_back(d.get_max_length() == ov::Interval::s_max ? UNDEFINED_DIM : d.get_max_length());
}
}
type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
initDims();
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
}
explicit Shape(const VectorDims& shape) {
minDims = shape;
maxDims = shape;
dims = minDims = maxDims = shape;
type = ShapeType::Static;
initDims();
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
}

View File

@ -441,8 +441,8 @@ void ExtractImagePatches::ExtractImagePatchesRefExecutor::executeReference(
const std::vector<size_t> ostrides_partial = { ostrides[0], jpp.KW * IC * ostrides[1], IC * ostrides[1], ostrides[1] };
parallel_for4d(OB, jpp.KH, jpp.KW, IC, [&](const size_t ob, const size_t kh, const size_t kw, const size_t ic) {
const int64_t iw_start = kw * RW - PL;
const int64_t ih_start = kh * RH - PT;
const int64_t iw_start = static_cast<int64_t>(kw * RW) - PL;
const int64_t ih_start = static_cast<int64_t>(kh * RH) - PT;
const size_t ih_lpad = ih_start >= 0 ? 0 : std::ceil(-1.f * ih_start / jpp.SH);
const size_t iw_lpad = iw_start >= 0 ? 0 : std::ceil(-1.f * iw_start / jpp.SW);

View File

@ -33,7 +33,24 @@ namespace intel_cpu {
#define CPU_ENABLE_PASS_X64(MANAGER, PASS)
#define CPU_SET_CALLBACK_X64(MANAGER, CALLBACK, ...)
#endif
#endif // OPENVINO_ARCH_X86_64
#if defined(OPENVINO_ARCH_X86)
#define CPU_REGISTER_PASS_X86(MANAGER, PASS, ...) CPU_REGISTER_PASS_COMMON(MANAGER, PASS, __VA_ARGS__)
#define CPU_DISABLE_PASS_X86(MANAGER, PASS) CPU_DISABLE_PASS_COMMON(MANAGER, PASS)
#define CPU_ENABLE_PASS_X86(MANAGER, PASS) CPU_ENABLE_PASS_COMMON(MANAGER, PASS)
#define CPU_SET_CALLBACK_X86(MANAGER, CALLBACK, ...) CPU_SET_CALLBACK_COMMON(MANAGER, CALLBACK, __VA_ARGS__)
#else
#define CPU_REGISTER_PASS_X86(MANAGER, PASS, ...)
#define CPU_DISABLE_PASS_X86(MANAGER, PASS)
#define CPU_ENABLE_PASS_X86(MANAGER, PASS)
#define CPU_SET_CALLBACK_X86(MANAGER, CALLBACK, ...)
#endif // OPENVINO_ARCH_X86
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
@ -47,9 +64,9 @@ namespace intel_cpu {
#define CPU_REGISTER_PASS_ARM(MANAGER, PASS, ...)
#define CPU_DISABLE_PASS_ARM(MANAGER, PASS)
#define CPU_ENABLE_PASS_ARM(MANAGER, PASS)
#define CPU_SET_CALLBACK_ARM(MANAGER, CALLBACK, ...)
#define CPU_SET_CALLBACK_ARM(MANAGER, CALLBACK, ...)\
#endif
#endif // OPENVINO_ARCH_ARM || OPENVINO_ARCH_ARM64
} // namespace intel_cpu
} // namespace ov

View File

@ -99,7 +99,7 @@
#include "transformations/cpu_opset/arm/pass/convert_group_conv1d.hpp"
#include "transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp"
#include "transformations/cpu_opset/arm/pass/mish_decomposition.hpp"
#include "transformations/cpu_opset/arm/pass/decompose_integer_divide.hpp"
#include "transformations/cpu_opset/common/pass/decompose_integer_divide.hpp"
#include "transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.hpp"
#include "transformations/cpu_opset/common/pass/move_eltwise_up_data_movement.hpp"
#include "transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp"
@ -266,6 +266,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
// The plugin computes Divide in floating point precision.
// To preserve correct math for integer division we need to insert explicit Floor operation.
CPU_REGISTER_PASS_ARM(manager, DecomposeIntegerDivide);
CPU_REGISTER_PASS_X86(manager, DecomposeIntegerDivide);
// SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
CPU_SET_CALLBACK_COMMON(manager,

View File

@ -16,7 +16,7 @@ if (ENABLE_OV_ONNX_FRONTEND)
else()
set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/extension ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
endif()
if(ARM OR AARCH64)
if(NOT X86_64)
list(APPEND EXCLUDED_SOURCE_PATHS
${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests
${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests

View File

@ -184,12 +184,37 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*UniqueLayerTestCPU.*axis.*True.*)",
};
#if defined(OPENVINO_ARCH_X86)
retVector.emplace_back(R"(ONNXQuantizedModels/QuantizedModelsTests.*)");
{
// TODO: generate new 'expected' runtime graph for x86 CPU
retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
}
retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.CompareWithRefs.*)");
retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)");
retVector.emplace_back(R"(.*convolution_backprop_quantize_type.*)");
retVector.emplace_back(R"(.*DetectionOutputLayerTest.*)");
// WIP: plugin cannot be loaded for some reason
retVector.emplace_back(R"(.*HeteroSyntheticTest.*)");
retVector.emplace_back(R"(.*IEClassBasicTestP.*)");
// int8 / code-generation specific
retVector.emplace_back(R"(smoke_LPT.*)");
retVector.emplace_back(R"(smoke_Snippets.*)");
#endif
#if defined(OPENVINO_ARCH_ARM)
retVector.emplace_back(R"(smoke_If/SimpleIfNotConstConditionAndDimsIncreaseTest.*)");
#endif
#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM)
retVector.emplace_back(R"(OVClassBasicPropsTest.smoke_SetConfigAffinity.*)");
retVector.emplace_back(R"(ONNXQuantizedModels/QuantizedModelsTests.*)");
// TODO: generate new 'expected' runtime graph for CPU ARM
retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
{
// TODO: generate new 'expected' runtime graph for CPU ARM
retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
}
{
// TODO: enable once streams / tput mode is supported
retVector.emplace_back(R"(OVClassConfigTestCPU.smoke_Check(Model|Core)StreamsHasHigherPriorityThanLatencyHint.*)");
@ -200,17 +225,23 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckSecondaryPropertiesTest.*)");
retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckWithSecondaryPropertiesDoubleTest.*)");
}
retVector.emplace_back(R"(smoke_LPT.*)");
retVector.emplace_back(R"(smoke_Decomposition_(3|4)D/Mvn6LayerTest.CompareWithRefs.*)");
retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_CeilRounding/PoolingLayerTest.CompareWithRefs.*)");
retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.CompareWithRefs.*)");
retVector.emplace_back(R"(smoke_TestsSelect_numpy/SelectLayerTest.CompareWithRefImpl/COND=BOOL.*)");
retVector.emplace_back(R"(smoke_Snippets.*)");
retVector.emplace_back(R"(smoke_Quantized.*)");
retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)");
retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)");
retVector.emplace_back(R"(smoke_If/SimpleIfTest.CompareWithRefs.*)");
retVector.emplace_back(R"(smoke_If/SimpleIfNotConstConditionTest.CompareWithRefs.*)");
// invalid test: checks u8 precision for runtime graph, while it should be f32
retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)");
// int8 / code-generation specific
retVector.emplace_back(R"(smoke_LPT.*)");
retVector.emplace_back(R"(smoke_Snippets.*)");
retVector.emplace_back(R"(smoke_Quantized.*)");
#endif
#if !defined(OPENVINO_ARCH_X86_64)
// very time-consuming test
retVector.emplace_back(R"(.*OVInferConsistencyTest.*)");
#endif
#if defined(_WIN32) || defined(_WIN64)

@ -1 +1 @@
Subproject commit 478d6ef239027651a0c3843ac9c3c1d444afb53f
Subproject commit 11e62a6f5077b1e44e83096db4b47274eee65273

View File

@ -183,10 +183,12 @@ void OVInferConsistencyTest::FillInput(InferContext& inferContext, int index) {
}
TEST_P(OVInferConsistencyTest, Infer) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
InferCheck(true);
}
TEST_P(OVInferConsistencyTest, AsyncInfer) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
InferCheck(false);
}