From f2167a95454f8dbfeedb5e45d60245b90a79329a Mon Sep 17 00:00:00 2001 From: Nesterov Alexander Date: Wed, 30 Aug 2023 11:12:25 +0200 Subject: [PATCH] [ARM CPU] Remove configure from exec func in eltwise, reduce and pooling (#19071) --- .../src/nodes/executors/acl/acl_eltwise.cpp | 112 +++++++++--------- .../src/nodes/executors/acl/acl_eltwise.hpp | 2 +- .../src/nodes/executors/acl/acl_pooling.cpp | 16 +-- .../src/nodes/executors/acl/acl_pooling.hpp | 2 +- .../src/nodes/executors/acl/acl_reduce.cpp | 13 +- .../src/nodes/executors/acl/acl_reduce.hpp | 2 +- 6 files changed, 76 insertions(+), 71 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp index 3f8b6d91d57..cb5911e9083 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp @@ -226,123 +226,124 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto dstTensors[i].allocator()->init(dstTensorsInfo[i]); } + std::function(void)> exec_func; switch (aclEltwiseAttrs.algorithm) { case Algorithm::EltwiseAdd: if (!NEArithmeticAddition::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], ConvertPolicy::SATURATE)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ConvertPolicy::SATURATE); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseMultiply: if (!NEPixelWiseMultiplication::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseSubtract: if (!NEArithmeticSubtraction::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], ConvertPolicy::SATURATE)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ConvertPolicy::SATURATE); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseDivide: if (!NEElementwiseDivision::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseMaximum: if (!NEElementwiseMax::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseMinimum: if (!NEElementwiseMin::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseSquaredDifference: if (!NEElementwiseSquaredDiff::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseEqual: if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], ComparisonOperation::Equal)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::Equal); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseNotEqual: if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], ComparisonOperation::NotEqual)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::NotEqual); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseGreater: if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], ComparisonOperation::Greater)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::Greater); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseGreaterEqual: if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], ComparisonOperation::GreaterEqual)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::GreaterEqual); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseLess: if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], ComparisonOperation::Less)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::Less); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseLessEqual: if (!NEElementwiseComparison::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0], ComparisonOperation::LessEqual)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0], ComparisonOperation::LessEqual); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseRelu: @@ -355,7 +356,7 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto {ActivationLayerInfo::ActivationFunction::LEAKY_RELU, aclEltwiseAttrs.alpha})) return false; } - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); if (aclEltwiseAttrs.alpha == 0) { acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::RELU); @@ -363,136 +364,137 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto acl_op->configure(&srcTensors[0], &dstTensors[0], {ActivationLayerInfo::ActivationFunction::LEAKY_RELU, aclEltwiseAttrs.alpha}); } - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseGeluErf: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::GELU)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::GELU); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseElu: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], {ActivationLayerInfo::ActivationFunction::ELU, aclEltwiseAttrs.alpha})) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], {ActivationLayerInfo::ActivationFunction::ELU, aclEltwiseAttrs.alpha}); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseTanh: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], {ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f})) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], {ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f}); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseSigmoid: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::LOGISTIC)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::LOGISTIC); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseAbs: if (!NEAbsLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0]); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseSqrt: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::SQRT)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::SQRT); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseSoftRelu: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::SOFT_RELU)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::SOFT_RELU); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseExp: if (!NEExpLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0]); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseClamp: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], {ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, aclEltwiseAttrs.beta, aclEltwiseAttrs.alpha})) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], {ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, aclEltwiseAttrs.beta, aclEltwiseAttrs.alpha}); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseSwish: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], {ActivationLayerInfo::ActivationFunction::SWISH, aclEltwiseAttrs.alpha})) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], {ActivationLayerInfo::ActivationFunction::SWISH, aclEltwiseAttrs.alpha}); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwisePrelu: if (!NEPReluLayer::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0])) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &srcTensors[1], &dstTensors[0]); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseHswish: if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::HARD_SWISH)) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::HARD_SWISH); - acl_op->run(); + return acl_op; }; break; case Algorithm::EltwiseLog: if (!NELogLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0])) return false; - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensors[0], &dstTensors[0]); - acl_op->run(); + return acl_op; }; break; default: IE_THROW() << "Unsupported operation type for ACL Eltwise executor: " << static_cast(aclEltwiseAttrs.algorithm); } + ifunc = exec_func(); return true; } @@ -505,7 +507,7 @@ void AclEltwiseExecutor::exec(const std::vector &src, const std::vec dstTensors[i].allocator()->import_memory(dst[i]->getData()); } - exec_func(); + ifunc->run(); for (size_t i = 0; i < src.size(); i++) { srcTensors[i].allocator()->free(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp index 989fa315cbf..67e62d7cceb 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp @@ -34,7 +34,7 @@ private: EltwiseAttrs aclEltwiseAttrs{}; impl_desc_type implType = impl_desc_type::acl; std::vector srcTensors, dstTensors; - std::function exec_func; + std::unique_ptr ifunc; }; class AclEltwiseExecutorBuilder : public EltwiseExecutorBuilder { diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp index c63fe501739..72021a5c6c5 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp @@ -110,6 +110,7 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, srcTensor.allocator()->init(srcTensorInfo); dstTensor.allocator()->init(dstTensorInfo); + std::function(void)> exec_func; if (srcDims.size() == 5u) { if (dstDescs.size() == 1u) { Pooling3dLayerInfo pool_info; @@ -123,10 +124,10 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, nullptr, &pool_info)) return false; - exec_func = [this, pool_info]{ + exec_func = [this, pool_info]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensor, &dstTensor, pool_info); - acl_op->run(); + return acl_op; }; } } else { @@ -146,10 +147,10 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, TensorInfo indTensorInfo = TensorInfo(shapeCast(indDims), 1, precisionToAclDataType(dstDescs[1]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[1])); indTensor.allocator()->init(indTensorInfo); - exec_func = [this, pool_info]{ + exec_func = [this, pool_info]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensor, &dstTensor, pool_info, &indTensor); - acl_op->run(); + return acl_op; }; } else { if (!isSupported(srcTensorInfo, @@ -162,13 +163,14 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, &pool_info, nullptr)) return false; - exec_func = [this, pool_info]{ + exec_func = [this, pool_info]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensor, &dstTensor, pool_info); - acl_op->run(); + return acl_op; }; } } + ifunc = exec_func(); return true; } @@ -177,7 +179,7 @@ void AclPoolingExecutor::exec(const std::vector& src, const std::vec dstTensor.allocator()->import_memory(dst[0]->getData()); if (dst.size() > 1u) indTensor.allocator()->import_memory(dst[1]->getData()); - exec_func(); + ifunc->run(); srcTensor.allocator()->free(); dstTensor.allocator()->free(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp index 669a90baf9a..44a2e999057 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp @@ -38,7 +38,7 @@ public: } private: - std::function exec_func; + std::unique_ptr ifunc; PoolingAttrs poolingAttrs; impl_desc_type implType = impl_desc_type::acl; diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp index 10d7641fcb1..ade42e53c08 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp @@ -47,6 +47,7 @@ bool AclReduceExecutor::init(const ReduceAttrs& reduceAttrs, srcTensor.allocator()->init(srcTensorInfo); dstTensor.allocator()->init(dstTensorInfo); + std::function(void)> exec_func; switch (reduceAttrs.operation) { case Algorithm::ReduceMean: { for (size_t i = 0; i < reduceAttrs.axes.size(); ++i) { @@ -59,10 +60,10 @@ bool AclReduceExecutor::init(const ReduceAttrs& reduceAttrs, DEBUG_LOG("NEReduceMean validation failed: ", reduceMeanStatus.error_description()); return false; } - exec_func = [this]{ + exec_func = [this]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensor, axesMean, this->reduceAttrs.keepDims, &dstTensor); - acl_op->run(); + return acl_op; }; break; } @@ -76,18 +77,18 @@ bool AclReduceExecutor::init(const ReduceAttrs& reduceAttrs, DEBUG_LOG("NEReductionOperation validation with indices failed: ", reductionOperationStatus.error_description()); return false; } - exec_func = [this, srcDims]{ + exec_func = [this, srcDims]() -> std::unique_ptr { auto acl_op = std::make_unique(); acl_op->configure(&srcTensor, &dstTensor, axisCast(this->reduceAttrs.axes[0], srcDims.size()), getAclReductionOperationByAlgorithm(this->reduceAttrs.operation), this->reduceAttrs.keepDims); - acl_op->run(); + return acl_op; }; break; } default: IE_THROW() << "Unsupported operation type for ACL Reduce executor: " << static_cast(reduceAttrs.operation); } - + ifunc = exec_func(); return true; } @@ -95,7 +96,7 @@ void AclReduceExecutor::exec(const std::vector& src, const std::vect srcTensor.allocator()->import_memory(src[0]->getData()); dstTensor.allocator()->import_memory(dst[0]->getData()); - exec_func(); + ifunc->run(); srcTensor.allocator()->free(); dstTensor.allocator()->free(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp index 43a7911670f..bdc06362c0d 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp @@ -30,7 +30,7 @@ public: } private: - std::function exec_func; + std::unique_ptr ifunc; ReduceAttrs reduceAttrs; impl_desc_type implType = impl_desc_type::acl;