diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 3361a80d237..607fd002ae5 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -43,7 +43,7 @@ endif() add_definitions(-DOV_CPU_WITH_DNNL) set(OV_CPU_WITH_DNNL ON) -if(DNNL_AARCH64_USE_ACL) +if(DNNL_USE_ACL) add_definitions(-DOV_CPU_WITH_ACL) set(OV_CPU_WITH_ACL ON) endif() diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp index d308cba78d8..cc94d1c9216 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp @@ -26,6 +26,110 @@ inline VectorDims reshape_sizes(VectorDims dims) { return result_dims; } + +bool AclEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, + const std::vector& srcDescs, + const std::vector& dstDescs) const { + auto checkPrecision = [&srcDescs, &dstDescs](std::vector srcVecPrc, Precision dstPrc) -> bool { + for (int i = 0; i < srcDescs.size(); i++) { + if (srcDescs[i]->getPrecision() != srcVecPrc[i]) return false; + } + if (dstDescs[0]->getPrecision() != dstPrc) { return false; } + return true; + }; + + switch (eltwiseAttrs.algorithm) { + case Algorithm::EltwiseSqrt: + case Algorithm::EltwiseDivide: + case Algorithm::EltwiseRelu: +#ifdef OPENVINO_ARCH_ARM64 + case Algorithm::EltwiseGeluErf: +#endif + case Algorithm::EltwiseElu: + case Algorithm::EltwiseTanh: + case Algorithm::EltwiseSigmoid: +// case Algorithm::EltwisePowerDynamic: // TODO: ACL version doesn't work https://github.com/ARM-software/ComputeLibrary/issues/1047 + case Algorithm::EltwiseSoftRelu: + case Algorithm::EltwiseClamp: + //case Algorithm::EltwiseSwish: // TODO: efficientdet-d0 accuracy drops if ACL Swish is used + case Algorithm::EltwisePrelu: + case Algorithm::EltwiseHswish: + if (!(checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || + checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { + return false; + } + break; + case Algorithm::EltwiseAbs: + case Algorithm::EltwiseExp: + case Algorithm::EltwiseLog: + if (!(checkPrecision({Precision::I32, Precision::I32}, Precision::I32) || + checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || + checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { + return false; + } + break; + case Algorithm::EltwiseMaximum: + case Algorithm::EltwiseMinimum: + case Algorithm::EltwiseSquaredDifference: + if (!(checkPrecision({Precision::I16, Precision::I16}, Precision::I16) || + checkPrecision({Precision::I32, Precision::I32}, Precision::I32) || + checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || + checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { + return false; + } + break; + case Algorithm::EltwiseAdd: + case Algorithm::EltwiseSubtract: + if (!(checkPrecision({Precision::U8, Precision::U8}, Precision::U8) || + checkPrecision({Precision::I16, Precision::I16}, Precision::I16) || + checkPrecision({Precision::I32, Precision::I32}, Precision::I32) || + checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || + checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { + return false; + } + break; + case Algorithm::EltwiseMultiply: + if (!(checkPrecision({Precision::U8, Precision::U8}, Precision::U8) || + checkPrecision({Precision::U8, Precision::U8}, Precision::I16) || + checkPrecision({Precision::U8, Precision::I16}, Precision::I16) || + checkPrecision({Precision::I16, Precision::U8}, Precision::I16) || + checkPrecision({Precision::I16, Precision::I16}, Precision::I16) || + checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || + checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { + return false; + } + break; + // ACL supports only U8 precision on output for comparison operations + case Algorithm::EltwiseEqual: + case Algorithm::EltwiseNotEqual: + case Algorithm::EltwiseGreater: + case Algorithm::EltwiseGreaterEqual: + case Algorithm::EltwiseLess: + case Algorithm::EltwiseLessEqual: + if (!(checkPrecision({Precision::U8, Precision::U8}, Precision::U8) || + checkPrecision({Precision::I16, Precision::I16}, Precision::U8) || + checkPrecision({Precision::I32, Precision::I32}, Precision::U8) || + checkPrecision({Precision::FP16, Precision::FP16}, Precision::U8) || + checkPrecision({Precision::FP32, Precision::FP32}, Precision::U8))) { + return false; + } + break; + default: + return false; + } + + for (const auto & srcDesc : srcDescs) { + if (getAclDataLayoutByMemoryDesc(srcDesc) == arm_compute::DataLayout::UNKNOWN) + return false; + } + for (const auto & dstDesc : dstDescs) { + if (getAclDataLayoutByMemoryDesc(dstDesc) == arm_compute::DataLayout::UNKNOWN) + return false; + } + + return true; +} + AclEltwiseExecutor::AclEltwiseExecutor(const ExecutorContext::CPtr context) : EltwiseExecutor(context) {} bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vector &srcDescs, diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp index f6a0ed16f12..5848db0f9f6 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp @@ -40,104 +40,7 @@ class AclEltwiseExecutorBuilder : public EltwiseExecutorBuilder { public: bool isSupported(const EltwiseAttrs& eltwiseAttrs, const std::vector& srcDescs, - const std::vector& dstDescs) const override { - auto checkPrecision = [&srcDescs, &dstDescs](std::vector srcVecPrc, Precision dstPrc) -> bool { - for (int i = 0; i < srcDescs.size(); i++) { - if (srcDescs[i]->getPrecision() != srcVecPrc[i]) return false; - } - if (dstDescs[0]->getPrecision() != dstPrc) { return false; } - return true; - }; - - switch (eltwiseAttrs.algorithm) { - case Algorithm::EltwiseSqrt: - case Algorithm::EltwiseDivide: - case Algorithm::EltwiseRelu: - case Algorithm::EltwiseGeluErf: - case Algorithm::EltwiseElu: - case Algorithm::EltwiseTanh: - case Algorithm::EltwiseSigmoid: -// case Algorithm::EltwisePowerDynamic: // TODO: ACL version doesn't work https://github.com/ARM-software/ComputeLibrary/issues/1047 - case Algorithm::EltwiseSoftRelu: - case Algorithm::EltwiseClamp: - //case Algorithm::EltwiseSwish: // TODO: efficientdet-d0 accuracy drops if ACL Swish is used - case Algorithm::EltwisePrelu: - case Algorithm::EltwiseHswish: - if (!(checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || - checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { - return false; - } - break; - case Algorithm::EltwiseAbs: - case Algorithm::EltwiseExp: - case Algorithm::EltwiseLog: - if (!(checkPrecision({Precision::I32, Precision::I32}, Precision::I32) || - checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || - checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { - return false; - } - break; - case Algorithm::EltwiseMaximum: - case Algorithm::EltwiseMinimum: - case Algorithm::EltwiseSquaredDifference: - if (!(checkPrecision({Precision::I16, Precision::I16}, Precision::I16) || - checkPrecision({Precision::I32, Precision::I32}, Precision::I32) || - checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || - checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { - return false; - } - break; - case Algorithm::EltwiseAdd: - case Algorithm::EltwiseSubtract: - if (!(checkPrecision({Precision::U8, Precision::U8}, Precision::U8) || - checkPrecision({Precision::I16, Precision::I16}, Precision::I16) || - checkPrecision({Precision::I32, Precision::I32}, Precision::I32) || - checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || - checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { - return false; - } - break; - case Algorithm::EltwiseMultiply: - if (!(checkPrecision({Precision::U8, Precision::U8}, Precision::U8) || - checkPrecision({Precision::U8, Precision::U8}, Precision::I16) || - checkPrecision({Precision::U8, Precision::I16}, Precision::I16) || - checkPrecision({Precision::I16, Precision::U8}, Precision::I16) || - checkPrecision({Precision::I16, Precision::I16}, Precision::I16) || - checkPrecision({Precision::FP16, Precision::FP16}, Precision::FP16) || - checkPrecision({Precision::FP32, Precision::FP32}, Precision::FP32))) { - return false; - } - break; - // ACL supports only U8 precision on output for comparison operations - case Algorithm::EltwiseEqual: - case Algorithm::EltwiseNotEqual: - case Algorithm::EltwiseGreater: - case Algorithm::EltwiseGreaterEqual: - case Algorithm::EltwiseLess: - case Algorithm::EltwiseLessEqual: - if (!(checkPrecision({Precision::U8, Precision::U8}, Precision::U8) || - checkPrecision({Precision::I16, Precision::I16}, Precision::U8) || - checkPrecision({Precision::I32, Precision::I32}, Precision::U8) || - checkPrecision({Precision::FP16, Precision::FP16}, Precision::U8) || - checkPrecision({Precision::FP32, Precision::FP32}, Precision::U8))) { - return false; - } - break; - default: - return false; - } - - for (const auto & srcDesc : srcDescs) { - if (getAclDataLayoutByMemoryDesc(srcDesc) == arm_compute::DataLayout::UNKNOWN) - return false; - } - for (const auto & dstDesc : dstDescs) { - if (getAclDataLayoutByMemoryDesc(dstDesc) == arm_compute::DataLayout::UNKNOWN) - return false; - } - - return true; - } + const std::vector& dstDescs) const override; EltwiseExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override { return std::make_shared(context); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 16281089abd..8d2cb841e3b 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -194,6 +194,11 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_Quantized.*)"); #endif +#if defined(OPENVINO_ARCH_ARM) + // TODO: rounding errors + retVector.emplace_back(R"(.*iv_secondaryInputType=PARAMETER_opType=VECTOR_NetType=i32.*)"); +#endif + #if !defined(OPENVINO_ARCH_X86_64) // very time-consuming test retVector.emplace_back(R"(.*OVInferConsistencyTest.*)"); diff --git a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt index db8dd7e32be..458bbdffa4c 100644 --- a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt @@ -60,15 +60,14 @@ function(ie_add_onednn) endif() set(ARM_COMPUTE_SCONS_JOBS "8" CACHE STRING "Number of parallel threads to build ARM Compute Library") + set(DNNL_USE_ACL ON CACHE BOOL "" FORCE) if(ARM) set(DNNL_TARGET_ARCH "ARM" CACHE STRING "" FORCE) - set(DNNL_AARCH64_USE_ACL OFF CACHE BOOL "" FORCE) set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv7a) set(ARM_COMPUTE_TARGET_ARCHS armv7a armv7a-hf) else() set(DNNL_TARGET_ARCH "AARCH64" CACHE STRING "" FORCE) - set(DNNL_AARCH64_USE_ACL ON CACHE BOOL "" FORCE) # move to separate ACL cmake if(APPLE) # Apple M1 / M2 is assumed diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index 9b547e7cb60..e8793658212 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit 9b547e7cb6017a3603509d879dd11e1aee7dce7f +Subproject commit e8793658212f13a67e1f96f32f93f2b8d8809a52