32 bits support in Intel CPU plugin (#16900)
This commit is contained in:
parent
fab8236af3
commit
1ec22a3180
@ -6,7 +6,7 @@
|
|||||||
# Common cmake options
|
# Common cmake options
|
||||||
#
|
#
|
||||||
|
|
||||||
ie_dependent_option (ENABLE_INTEL_CPU "CPU plugin for OpenVINO Runtime" ON "RISCV64 OR X86 OR X86_64 OR AARCH64" OFF)
|
ie_dependent_option (ENABLE_INTEL_CPU "CPU plugin for OpenVINO Runtime" ON "RISCV64 OR X86 OR X86_64 OR AARCH64 OR ARM" OFF)
|
||||||
|
|
||||||
ie_dependent_option (ENABLE_ARM_COMPUTE_CMAKE "Enable ARM Compute build via cmake" OFF "ENABLE_INTEL_CPU" OFF)
|
ie_dependent_option (ENABLE_ARM_COMPUTE_CMAKE "Enable ARM Compute build via cmake" OFF "ENABLE_INTEL_CPU" OFF)
|
||||||
|
|
||||||
|
@ -19,24 +19,26 @@ public:
|
|||||||
Shape() = default;
|
Shape() = default;
|
||||||
|
|
||||||
explicit Shape(const ov::PartialShape& shape) {
|
explicit Shape(const ov::PartialShape& shape) {
|
||||||
minDims = shape.get_min_shape();
|
if (!shape.rank().is_dynamic()) {
|
||||||
std::transform(minDims.begin(), minDims.end(), minDims.begin(), [](Dim x){ return ov::Interval::s_max == x ? UNDEFINED_DIM : x;});
|
const auto shape_rank = shape.rank().get_length();
|
||||||
maxDims = shape.get_max_shape();
|
minDims.reserve(shape_rank);
|
||||||
std::transform(maxDims.begin(), maxDims.end(), maxDims.begin(), [](Dim x){ return ov::Interval::s_max == x ? UNDEFINED_DIM : x;});
|
maxDims.reserve(shape_rank);
|
||||||
type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
|
|
||||||
|
|
||||||
|
for (const auto& d : shape) {
|
||||||
|
minDims.push_back(d.get_min_length() == ov::Interval::s_max ? UNDEFINED_DIM : d.get_min_length());
|
||||||
|
maxDims.push_back(d.get_max_length() == ov::Interval::s_max ? UNDEFINED_DIM : d.get_max_length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
|
||||||
initDims();
|
initDims();
|
||||||
|
|
||||||
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
|
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit Shape(const VectorDims& shape) {
|
explicit Shape(const VectorDims& shape) {
|
||||||
minDims = shape;
|
dims = minDims = maxDims = shape;
|
||||||
maxDims = shape;
|
|
||||||
type = ShapeType::Static;
|
type = ShapeType::Static;
|
||||||
|
|
||||||
initDims();
|
|
||||||
|
|
||||||
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
|
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -441,8 +441,8 @@ void ExtractImagePatches::ExtractImagePatchesRefExecutor::executeReference(
|
|||||||
const std::vector<size_t> ostrides_partial = { ostrides[0], jpp.KW * IC * ostrides[1], IC * ostrides[1], ostrides[1] };
|
const std::vector<size_t> ostrides_partial = { ostrides[0], jpp.KW * IC * ostrides[1], IC * ostrides[1], ostrides[1] };
|
||||||
|
|
||||||
parallel_for4d(OB, jpp.KH, jpp.KW, IC, [&](const size_t ob, const size_t kh, const size_t kw, const size_t ic) {
|
parallel_for4d(OB, jpp.KH, jpp.KW, IC, [&](const size_t ob, const size_t kh, const size_t kw, const size_t ic) {
|
||||||
const int64_t iw_start = kw * RW - PL;
|
const int64_t iw_start = static_cast<int64_t>(kw * RW) - PL;
|
||||||
const int64_t ih_start = kh * RH - PT;
|
const int64_t ih_start = static_cast<int64_t>(kh * RH) - PT;
|
||||||
const size_t ih_lpad = ih_start >= 0 ? 0 : std::ceil(-1.f * ih_start / jpp.SH);
|
const size_t ih_lpad = ih_start >= 0 ? 0 : std::ceil(-1.f * ih_start / jpp.SH);
|
||||||
const size_t iw_lpad = iw_start >= 0 ? 0 : std::ceil(-1.f * iw_start / jpp.SW);
|
const size_t iw_lpad = iw_start >= 0 ? 0 : std::ceil(-1.f * iw_start / jpp.SW);
|
||||||
|
|
||||||
|
@ -33,7 +33,24 @@ namespace intel_cpu {
|
|||||||
#define CPU_ENABLE_PASS_X64(MANAGER, PASS)
|
#define CPU_ENABLE_PASS_X64(MANAGER, PASS)
|
||||||
#define CPU_SET_CALLBACK_X64(MANAGER, CALLBACK, ...)
|
#define CPU_SET_CALLBACK_X64(MANAGER, CALLBACK, ...)
|
||||||
|
|
||||||
#endif
|
#endif // OPENVINO_ARCH_X86_64
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(OPENVINO_ARCH_X86)
|
||||||
|
|
||||||
|
#define CPU_REGISTER_PASS_X86(MANAGER, PASS, ...) CPU_REGISTER_PASS_COMMON(MANAGER, PASS, __VA_ARGS__)
|
||||||
|
#define CPU_DISABLE_PASS_X86(MANAGER, PASS) CPU_DISABLE_PASS_COMMON(MANAGER, PASS)
|
||||||
|
#define CPU_ENABLE_PASS_X86(MANAGER, PASS) CPU_ENABLE_PASS_COMMON(MANAGER, PASS)
|
||||||
|
#define CPU_SET_CALLBACK_X86(MANAGER, CALLBACK, ...) CPU_SET_CALLBACK_COMMON(MANAGER, CALLBACK, __VA_ARGS__)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define CPU_REGISTER_PASS_X86(MANAGER, PASS, ...)
|
||||||
|
#define CPU_DISABLE_PASS_X86(MANAGER, PASS)
|
||||||
|
#define CPU_ENABLE_PASS_X86(MANAGER, PASS)
|
||||||
|
#define CPU_SET_CALLBACK_X86(MANAGER, CALLBACK, ...)
|
||||||
|
|
||||||
|
#endif // OPENVINO_ARCH_X86
|
||||||
|
|
||||||
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
|
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
|
||||||
|
|
||||||
@ -47,9 +64,9 @@ namespace intel_cpu {
|
|||||||
#define CPU_REGISTER_PASS_ARM(MANAGER, PASS, ...)
|
#define CPU_REGISTER_PASS_ARM(MANAGER, PASS, ...)
|
||||||
#define CPU_DISABLE_PASS_ARM(MANAGER, PASS)
|
#define CPU_DISABLE_PASS_ARM(MANAGER, PASS)
|
||||||
#define CPU_ENABLE_PASS_ARM(MANAGER, PASS)
|
#define CPU_ENABLE_PASS_ARM(MANAGER, PASS)
|
||||||
#define CPU_SET_CALLBACK_ARM(MANAGER, CALLBACK, ...)
|
#define CPU_SET_CALLBACK_ARM(MANAGER, CALLBACK, ...)\
|
||||||
|
|
||||||
#endif
|
#endif // OPENVINO_ARCH_ARM || OPENVINO_ARCH_ARM64
|
||||||
|
|
||||||
} // namespace intel_cpu
|
} // namespace intel_cpu
|
||||||
} // namespace ov
|
} // namespace ov
|
||||||
|
@ -99,7 +99,7 @@
|
|||||||
#include "transformations/cpu_opset/arm/pass/convert_group_conv1d.hpp"
|
#include "transformations/cpu_opset/arm/pass/convert_group_conv1d.hpp"
|
||||||
#include "transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp"
|
#include "transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp"
|
||||||
#include "transformations/cpu_opset/arm/pass/mish_decomposition.hpp"
|
#include "transformations/cpu_opset/arm/pass/mish_decomposition.hpp"
|
||||||
#include "transformations/cpu_opset/arm/pass/decompose_integer_divide.hpp"
|
#include "transformations/cpu_opset/common/pass/decompose_integer_divide.hpp"
|
||||||
#include "transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.hpp"
|
#include "transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.hpp"
|
||||||
#include "transformations/cpu_opset/common/pass/move_eltwise_up_data_movement.hpp"
|
#include "transformations/cpu_opset/common/pass/move_eltwise_up_data_movement.hpp"
|
||||||
#include "transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp"
|
#include "transformations/cpu_opset/common/pass/ref_convert_i64_i32.hpp"
|
||||||
@ -266,6 +266,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
|
|||||||
// The plugin computes Divide in floating point precision.
|
// The plugin computes Divide in floating point precision.
|
||||||
// To preserve correct math for integer division we need to insert explicit Floor operation.
|
// To preserve correct math for integer division we need to insert explicit Floor operation.
|
||||||
CPU_REGISTER_PASS_ARM(manager, DecomposeIntegerDivide);
|
CPU_REGISTER_PASS_ARM(manager, DecomposeIntegerDivide);
|
||||||
|
CPU_REGISTER_PASS_X86(manager, DecomposeIntegerDivide);
|
||||||
|
|
||||||
// SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
|
// SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
|
||||||
CPU_SET_CALLBACK_COMMON(manager,
|
CPU_SET_CALLBACK_COMMON(manager,
|
||||||
|
@ -16,7 +16,7 @@ if (ENABLE_OV_ONNX_FRONTEND)
|
|||||||
else()
|
else()
|
||||||
set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/extension ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
|
set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/extension ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
|
||||||
endif()
|
endif()
|
||||||
if(ARM OR AARCH64)
|
if(NOT X86_64)
|
||||||
list(APPEND EXCLUDED_SOURCE_PATHS
|
list(APPEND EXCLUDED_SOURCE_PATHS
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests
|
${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests
|
${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests
|
||||||
|
@ -184,12 +184,37 @@ std::vector<std::string> disabledTestPatterns() {
|
|||||||
R"(.*UniqueLayerTestCPU.*axis.*True.*)",
|
R"(.*UniqueLayerTestCPU.*axis.*True.*)",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(OPENVINO_ARCH_X86)
|
||||||
|
retVector.emplace_back(R"(ONNXQuantizedModels/QuantizedModelsTests.*)");
|
||||||
|
{
|
||||||
|
// TODO: generate new 'expected' runtime graph for x86 CPU
|
||||||
|
retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
|
||||||
|
retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
|
||||||
|
}
|
||||||
|
retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.CompareWithRefs.*)");
|
||||||
|
retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)");
|
||||||
|
retVector.emplace_back(R"(.*convolution_backprop_quantize_type.*)");
|
||||||
|
retVector.emplace_back(R"(.*DetectionOutputLayerTest.*)");
|
||||||
|
// WIP: plugin cannot be loaded for some reason
|
||||||
|
retVector.emplace_back(R"(.*HeteroSyntheticTest.*)");
|
||||||
|
retVector.emplace_back(R"(.*IEClassBasicTestP.*)");
|
||||||
|
// int8 / code-generation specific
|
||||||
|
retVector.emplace_back(R"(smoke_LPT.*)");
|
||||||
|
retVector.emplace_back(R"(smoke_Snippets.*)");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(OPENVINO_ARCH_ARM)
|
||||||
|
retVector.emplace_back(R"(smoke_If/SimpleIfNotConstConditionAndDimsIncreaseTest.*)");
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM)
|
#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM)
|
||||||
retVector.emplace_back(R"(OVClassBasicPropsTest.smoke_SetConfigAffinity.*)");
|
retVector.emplace_back(R"(OVClassBasicPropsTest.smoke_SetConfigAffinity.*)");
|
||||||
retVector.emplace_back(R"(ONNXQuantizedModels/QuantizedModelsTests.*)");
|
retVector.emplace_back(R"(ONNXQuantizedModels/QuantizedModelsTests.*)");
|
||||||
// TODO: generate new 'expected' runtime graph for CPU ARM
|
{
|
||||||
retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
|
// TODO: generate new 'expected' runtime graph for CPU ARM
|
||||||
retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
|
retVector.emplace_back(R"(smoke_serialization/ExecGraphSerializationTest.ExecutionGraph.*)");
|
||||||
|
retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=(EltwiseWithTwoDynamicInputs|FakeQuantizeRelu).*)");
|
||||||
|
}
|
||||||
{
|
{
|
||||||
// TODO: enable once streams / tput mode is supported
|
// TODO: enable once streams / tput mode is supported
|
||||||
retVector.emplace_back(R"(OVClassConfigTestCPU.smoke_Check(Model|Core)StreamsHasHigherPriorityThanLatencyHint.*)");
|
retVector.emplace_back(R"(OVClassConfigTestCPU.smoke_Check(Model|Core)StreamsHasHigherPriorityThanLatencyHint.*)");
|
||||||
@ -200,17 +225,23 @@ std::vector<std::string> disabledTestPatterns() {
|
|||||||
retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckSecondaryPropertiesTest.*)");
|
retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckSecondaryPropertiesTest.*)");
|
||||||
retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckWithSecondaryPropertiesDoubleTest.*)");
|
retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckWithSecondaryPropertiesDoubleTest.*)");
|
||||||
}
|
}
|
||||||
retVector.emplace_back(R"(smoke_LPT.*)");
|
|
||||||
retVector.emplace_back(R"(smoke_Decomposition_(3|4)D/Mvn6LayerTest.CompareWithRefs.*)");
|
retVector.emplace_back(R"(smoke_Decomposition_(3|4)D/Mvn6LayerTest.CompareWithRefs.*)");
|
||||||
retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_CeilRounding/PoolingLayerTest.CompareWithRefs.*)");
|
retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_CeilRounding/PoolingLayerTest.CompareWithRefs.*)");
|
||||||
retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.CompareWithRefs.*)");
|
retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.CompareWithRefs.*)");
|
||||||
retVector.emplace_back(R"(smoke_TestsSelect_numpy/SelectLayerTest.CompareWithRefImpl/COND=BOOL.*)");
|
|
||||||
retVector.emplace_back(R"(smoke_Snippets.*)");
|
|
||||||
retVector.emplace_back(R"(smoke_Quantized.*)");
|
|
||||||
retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)");
|
|
||||||
retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)");
|
retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)");
|
||||||
retVector.emplace_back(R"(smoke_If/SimpleIfTest.CompareWithRefs.*)");
|
retVector.emplace_back(R"(smoke_If/SimpleIfTest.CompareWithRefs.*)");
|
||||||
retVector.emplace_back(R"(smoke_If/SimpleIfNotConstConditionTest.CompareWithRefs.*)");
|
retVector.emplace_back(R"(smoke_If/SimpleIfNotConstConditionTest.CompareWithRefs.*)");
|
||||||
|
// invalid test: checks u8 precision for runtime graph, while it should be f32
|
||||||
|
retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)");
|
||||||
|
// int8 / code-generation specific
|
||||||
|
retVector.emplace_back(R"(smoke_LPT.*)");
|
||||||
|
retVector.emplace_back(R"(smoke_Snippets.*)");
|
||||||
|
retVector.emplace_back(R"(smoke_Quantized.*)");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(OPENVINO_ARCH_X86_64)
|
||||||
|
// very time-consuming test
|
||||||
|
retVector.emplace_back(R"(.*OVInferConsistencyTest.*)");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
2
src/plugins/intel_cpu/thirdparty/onednn
vendored
2
src/plugins/intel_cpu/thirdparty/onednn
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 478d6ef239027651a0c3843ac9c3c1d444afb53f
|
Subproject commit 11e62a6f5077b1e44e83096db4b47274eee65273
|
@ -183,10 +183,12 @@ void OVInferConsistencyTest::FillInput(InferContext& inferContext, int index) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(OVInferConsistencyTest, Infer) {
|
TEST_P(OVInferConsistencyTest, Infer) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED();
|
||||||
InferCheck(true);
|
InferCheck(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(OVInferConsistencyTest, AsyncInfer) {
|
TEST_P(OVInferConsistencyTest, AsyncInfer) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED();
|
||||||
InferCheck(false);
|
InferCheck(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user