From 05ab0f32d708842b5a58f6f85af33fd4d2a5a1bf Mon Sep 17 00:00:00 2001 From: Tingqian Li Date: Wed, 29 Mar 2023 16:27:08 +0800 Subject: [PATCH] [CPU] Simple fix of redundant const-weight reordering for brgconv node in dynamic model (#16305) --- src/plugins/intel_cpu/src/graph.cpp | 18 ++-- src/plugins/intel_cpu/src/node.cpp | 45 ++++++++++ src/plugins/intel_cpu/src/node.h | 10 +++ src/plugins/intel_cpu/src/nodes/conv.cpp | 82 +++++++++++++------ src/plugins/intel_cpu/src/nodes/conv.h | 4 +- .../intel_cpu/src/nodes/fullyconnected.cpp | 45 ---------- .../intel_cpu/src/nodes/fullyconnected.h | 6 -- .../src/utils/debug_capabilities.cpp | 47 +++++++---- ...ntwise_branch_selection_transformation.cpp | 4 - .../snippets/conv_eltwise.cpp | 4 +- .../fake_quantize_decomposition_test.cpp | 4 +- .../src/concat_const_inplace.cpp | 4 +- 12 files changed, 163 insertions(+), 110 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 0c275fc183b..3c1b32b8ca0 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -450,11 +450,19 @@ void Graph::InitDescriptors() { node->filterSupportedPrimitiveDescriptors(); #ifdef CPU_DEBUG_CAPS - DEBUG_LOG("=================="); - for (auto & pd : node->getSupportedPrimitiveDescriptors()) - DEBUG_LOG("#", node->getExecIndex(), - " ", node->getName(), - " SupportedPrimitiveDescriptor:\n", pd); + const auto& SPDs = node->getSupportedPrimitiveDescriptors(); + for (int i = 0; i < SPDs.size(); i++) { + DEBUG_LOG("#", + node->getExecIndex(), + " ", + node->getName(), + " SupportedPrimitiveDescriptors [", + i, + "/", + SPDs.size(), + "]: \n", + SPDs[i]); + } #endif } diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index f80ed309383..67e289aa1cb 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -821,6 +821,51 @@ void Node::prepareMemory(dnnl::primitive_desc_iterator& itpd) { Node::prepareMemory(intDescs); } +MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { + if (!getParentEdgeAt(1)->getParent()->isConstant()) + IE_THROW() << "Weight input is not const for node " << getName() << "."; + auto edgeMem = getParentEdgeAt(1)->getMemoryPtr(); + if (!edgeMem) + IE_THROW() << "Cannot get const weights edgeMem for node " << getName() << "."; + + auto constDnnlMemOutDesc = edgeMem->GetDescWithType(); + auto weightSrcDesc = constDnnlMemOutDesc->getDnnlDesc(); + weightSrcDesc = weightSrcDesc.reshape(weightDesc->getDnnlDesc().get_dims()); + auto create = [&] () { + auto newSrcDesc = DnnlExtensionUtils::makeDescriptor(weightSrcDesc); + + Memory srcMemory{ getEngine() }; + srcMemory.Create(newSrcDesc, edgeMem->GetData()); + + MemoryPtr _ptr = std::make_shared(getEngine()); + _ptr->Create(weightDesc); + node::Reorder::reorderData(srcMemory, *_ptr, context->getParamsCache()); + + return _ptr; + }; + + MemoryPtr ptr; + const auto& format = weightDesc->serializeFormat(); + auto itr = privateWeightCache.find(format); + if (privateWeightCache.end() != itr) { + ptr = itr->second; + } else { + auto weightCache = context->getWeightsCache(); + if (weightCache != nullptr) { + const std::string string_hash = getName() + "_" + format + + "_" + std::to_string(edgeMem->GetSize()) + + "_" + std::to_string(reinterpret_cast(edgeMem->GetData())); + + ptr = *weightCache->findOrCreate(string_hash, create); + } else { + ptr = create(); + } + privateWeightCache[format] = ptr; + } + + return ptr; +} + bool Node::isInPlace() { if (inplace == InPlaceType::Unknown) { auto selected_pd = getSelectedPrimitiveDescriptor(); diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index dd78bfd0159..d9f242b353d 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -619,6 +619,8 @@ protected: void prepareMemory(const std::vector& intDescs); void prepareMemory(dnnl::primitive_desc_iterator& itpd); + MemoryPtr prepareWeightMemory(DnnlMemoryDescPtr weightDesc); + bool isDynamic = false; bool isInputTensorAtPortEmpty(size_t port) const; @@ -687,6 +689,14 @@ private: enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 }; ConstantType checkConstant(LOOK look, std::vector& checkNodes); + // we cannot rely on per-NUMA weightCache for caching weights because: + // 1.it may not exist(in single stream configuration) + // 2.it only holds weak references, the life-cycle of cached item + // is still under control of strong references outside of cache. + // privateWeightCache is for holding strong references to constant weight + // copies of same content with different layouts. + std::unordered_map privateWeightCache; + #ifdef CPU_DEBUG_CAPS friend class Verbose; #endif diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index a7b86667825..94b6481b8cf 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -57,6 +57,8 @@ struct ConvKey { dnnl::primitive_attr attr; impl_desc_type implType; + bool constWeight; + size_t hash() const; bool operator==(const ConvKey& rhs) const; }; @@ -80,6 +82,7 @@ size_t ConvKey::hash() const { seed = hash_combine(seed, get_attr_hash(*attr.get())); seed = hash_combine(seed, implType); + seed = hash_combine(seed, constWeight); return seed; } @@ -103,7 +106,7 @@ bool ConvKey::operator==(const ConvKey &rhs) const { retVal = retVal && paddingL == rhs.paddingL; retVal = retVal && paddingR == rhs.paddingR; - retVal = retVal && *attr.get() == *rhs.attr.get() && implType == rhs.implType; + retVal = retVal && *attr.get() == *rhs.attr.get() && implType == rhs.implType && constWeight == rhs.constWeight; return retVal; } @@ -851,6 +854,14 @@ createDescriptorInternal(const dnnl::engine& engine, } } // namespace +static memory::data_type deriveWeightDataType(memory::data_type src_dt) { + memory::data_type wdt = src_dt; + if (one_of(src_dt, memory::data_type::s8, memory::data_type::u8)) { + wdt = memory::data_type::s8; + } + return wdt; +} + void Convolution::createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) { MemoryDescPtr inpDesc; @@ -874,12 +885,7 @@ void Convolution::createDescriptor(const std::vector& inputDesc, const auto& inDnnlDesc = definedInpMemDesc->getDnnlDesc(); const auto& outDnnlDesc = definedOutMemDesc->getDnnlDesc(); - memory::data_type dt = inDnnlDesc.get_data_type(); - memory::data_type wdt = dt; - - if (one_of(dt, memory::data_type::s8, memory::data_type::u8)) { - wdt = memory::data_type::s8; - } + memory::data_type wdt = deriveWeightDataType(inDnnlDesc.get_data_type()); dnnl::memory::desc weightDnnlDesc(DnnlExtensionUtils::convertToDnnlDims(weightDims), wdt, memory::format_tag::any); dnnl::memory::desc biasDnnlDesc; @@ -1143,6 +1149,11 @@ bool Convolution::isPossibleToSkipInitConfig(const dnnl::primitive_desc &desc) c } std::shared_ptr Convolution::getSrcMemDesc(dnnl::primitive_desc_iterator &primitive_desc_it, size_t idx) { + if (idx == 1) { + // report original plain layout for weight since it needs to be reordered dynamically at runtime + return std::make_shared(getOriginalInputPrecisionAtPort(idx), + Shape(getInputShapeAtPort(idx).getStaticDims())); + } auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx); if (getInputShapeAtPort(idx).isDynamic()) { return DnnlExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx)); @@ -1352,10 +1363,17 @@ void Convolution::prepareParams() { paddingL, paddingR, *pAttrLocal, - selected_pd->getImplementationType()}; + selected_pd->getImplementationType(), + getParentEdgeAt(1)->getParent()->isConstant()}; auto engine = getEngine(); auto builder = [&engine](const ConvKey& key) -> executorPtr { + // remove the requirement on weight memory layout to let primitive + // report the best layout for weight to be reordered dynamically at runtime + auto wghDescAny = + dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp1->getShape().getStaticDims()), + deriveWeightDataType(key.inp0->getDataType()), + memory::format_tag::any); auto createDnnlConvDesc = [](const dnnl::engine engine, const dnnl::memory::desc& srcDesc, const dnnl::memory::desc& wghDesc, @@ -1390,7 +1408,7 @@ void Convolution::prepareParams() { const auto alg = (key.implType & impl_desc_type::winograd) ? dnnl::algorithm::convolution_winograd : dnnl::algorithm::convolution_direct; dnnl::primitive_desc desc = createDnnlConvDesc(engine, key.inp0->getDnnlDesc(), - key.inp1->getDnnlDesc(), + wghDescAny, key.out->getDnnlDesc(), key.bias, key.stride, @@ -1401,7 +1419,6 @@ void Convolution::prepareParams() { key.attr); auto itpd = desc; - executorPtr execPtr = nullptr; while (static_cast(itpd)) { impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -1412,7 +1429,8 @@ void Convolution::prepareParams() { key.inp0->getDnnlDesc(), key.inp1->getDnnlDesc(), key.out->getDnnlDesc(), - engine); + engine, + key.constWeight); break; } @@ -1425,16 +1443,13 @@ void Convolution::prepareParams() { auto inDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp0->getShape().getStaticDims()), key.inp0->getDataType(), memory::format_tag::any); - auto wghDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp1->getShape().getStaticDims()), - key.inp1->getDataType(), - memory::format_tag::any); auto outDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.out->getShape().getStaticDims()), key.out->getDataType(), memory::format_tag::any); auto reorderConvDesc = createDnnlConvDesc(engine, inDesc, - wghDesc, + wghDescAny, outDesc, key.bias, key.stride, @@ -1450,13 +1465,15 @@ void Convolution::prepareParams() { key.inp0->getDnnlDesc(), key.inp1->getDnnlDesc(), key.out->getDnnlDesc(), - engine); + engine, + key.constWeight); } } return execPtr; }; + auto prevExecPtr = execPtr; execPtr = nullptr; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); @@ -1465,9 +1482,22 @@ void Convolution::prepareParams() { if (execPtr) { primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); - primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive(); primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); + if (key.constWeight) { + // const weight preparation/reordering needs to be done once at next execution + // when the input weight data is guaranteed to be ready (considering possible const-folding + // subgraphs inserted between constant weight node and conv) + auto it = primArgs.find(DNNL_ARG_WEIGHTS); + if (it == primArgs.end() || !prevExecPtr || + !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { + pendingConstWeightReorder = true; + } + } else { + // non-const weight will be reordered by executor on every exec + primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive(); + } + if (withBiases) { primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive(); } @@ -1497,12 +1527,14 @@ Convolution::ConvolutionExecutor::ConvolutionExecutor(const dnnl::convolution_fo const dnnl::memory::desc& inMemDesc, const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, - const dnnl::engine& engine) : DnnlExecutor(pd) { + const dnnl::engine& engine, + bool constWeight) : DnnlExecutor(pd) { if (inMemDesc != getDnnlSrcDesc()) { inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, getDnnlSrcDesc(), engine)}); } - if (weightMemDesc != getDnnlWeightDesc()) { + if (!constWeight && weightMemDesc != getDnnlWeightDesc()) { + // const weight will be reordered at first execution inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, getDnnlWeightDesc(), engine)}); } @@ -1516,6 +1548,11 @@ void Convolution::execute(dnnl::stream strm) { IE_THROW() << "Can't execute Convolution node with name: " << getName() << ", because executor is not compiled"; } + if (pendingConstWeightReorder) { + primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive(); + pendingConstWeightReorder = false; + } + execPtr->exec(primArgs, strm); } @@ -1630,13 +1667,8 @@ void Convolution::appendZeroPointsArgs() { } } -// brgconv will be enabled by default: -// 1, static shape(dynamic shape may change weights layout if the input shape changes and cause performance issue: 86948) -// 2, hw supports avx512+ +// brgconv will be enabled by default when HW supports avx512+ void Convolution::initTryBrgconvFlag() { - if (isDynamicNode()) - return; - if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) { shouldTryBrgconv = true; } diff --git a/src/plugins/intel_cpu/src/nodes/conv.h b/src/plugins/intel_cpu/src/nodes/conv.h index fb9385601ca..d0e4c48c151 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.h +++ b/src/plugins/intel_cpu/src/nodes/conv.h @@ -94,8 +94,10 @@ private: const dnnl::memory::desc& inMemDesc, const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, - const dnnl::engine& engine); + const dnnl::engine& engine, + bool constWeight); }; + bool pendingConstWeightReorder = false; void prepareParams() override; void execute(dnnl::stream strm) override; diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 608ed26ac45..7e2181c444c 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -913,51 +913,6 @@ bool FullyConnected::canBeExecutedInConv1x1() const { return retVal; } -MemoryPtr FullyConnected::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { - if (!getParentEdgeAt(1)->getParent()->isConstant()) - IE_THROW() << "Weight input is not const for node " << getName() << "."; - auto blob = getParentEdgeAt(1)->getMemoryPtr(); - if (!blob) - IE_THROW() << "Cannot get const weights blob for node " << getName() << "."; - - auto constDnnlMemOutDesc = blob->GetDescWithType(); - auto weightSrcDesc = constDnnlMemOutDesc->getDnnlDesc(); - weightSrcDesc = weightSrcDesc.reshape(weightDesc->getDnnlDesc().get_dims()); - auto create = [&] () { - auto newSrcDesc = DnnlExtensionUtils::makeDescriptor(weightSrcDesc); - - Memory srcMemory{ getEngine() }; - srcMemory.Create(newSrcDesc, blob->GetData()); - - MemoryPtr _ptr = std::make_shared(getEngine()); - _ptr->Create(weightDesc); - node::Reorder::reorderData(srcMemory, *_ptr, context->getParamsCache()); - - return _ptr; - }; - - MemoryPtr ptr; - const auto& format = weightDesc->serializeFormat(); - auto itr = privateWeightCache.find(format); - if (privateWeightCache.end() != itr) { - ptr = itr->second; - } else { - auto weightCache = context->getWeightsCache(); - if (weightCache != nullptr) { - const std::string string_hash = getName() + "_" + format - + "_" + std::to_string(blob->GetSize()) - + "_" + std::to_string(reinterpret_cast(blob->GetData())); - - ptr = *weightCache->findOrCreate(string_hash, create); - } else { - ptr = create(); - } - privateWeightCache[format] = ptr; - } - - return ptr; -} - bool FullyConnected::useSparseWeightsDecompression() { // minSparseRate == 1 means that sparse feature is switched off if (minSparseRate == 1.f) { diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index 3f0983f2fc2..8add77440fd 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -83,11 +83,6 @@ private: bool useConv1x1 = false; impl_desc_type implementationTypeIP; MemoryDescPtr weightDescIP; - // when weightCache is not enabled (such as stream=1), brgconv weights may change due to - // different shapes. Weights will be cached in privateWeightCache. - // When weightCache is enabled, it holds weight ptr reference since weightCache does not hold the - // reference - std::unordered_map privateWeightCache; dnnl::primitive_attr attr; static dnnl::convolution_forward::primitive_desc @@ -99,7 +94,6 @@ private: const dnnl::engine& engine); bool canBeExecutedInConv1x1() const; - MemoryPtr prepareWeightMemory(const DnnlMemoryDescPtr weightDesc); // sparse weights bool useSparseWeights = false; diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp index fb13000708c..b791f18cec1 100644 --- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp +++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp @@ -21,6 +21,17 @@ namespace ov { namespace intel_cpu { +namespace { + size_t replace_all(std::string & inout, std::string what, std::string with) { + std::size_t count{}; + for (std::string::size_type pos{}; inout.npos != (pos = inout.find(what.data(), pos, what.length())); + pos += with.length(), ++count) { + inout.replace(pos, what.length(), with.data(), with.length()); + } + return count; + } +} + DebugLogEnabled::DebugLogEnabled(const char* file, const char* func, int line, const char* name) { // check ENV const char* p_filters = std::getenv("OV_CPU_DEBUG_LOG"); @@ -96,19 +107,27 @@ std::ostream & operator<<(std::ostream & os, const MemoryDesc& desc) { } std::ostream & operator<<(std::ostream & os, const NodeDesc& desc) { - os << " ImplementationType: " << impl_type_to_string(desc.getImplementationType()) << std::endl; + std::stringstream ss; + ss << " " << impl_type_to_string(desc.getImplementationType()) << "("; + const char * sep = ""; for (auto & conf : desc.getConfig().inConfs) { - os << " inConfs: " << *conf.getMemDesc(); - if (conf.inPlace() >= 0) os << " inPlace:" << conf.inPlace(); - if (conf.constant()) os << " constant"; - os << std::endl; + ss << sep << *conf.getMemDesc(); + if (conf.inPlace() >= 0) ss << " inPlace:" << conf.inPlace(); + if (conf.constant()) ss << " constant"; + sep = ","; } + ss << ") -> ("; + sep = ""; for (auto & conf : desc.getConfig().outConfs) { - os << " outConfs: " << *conf.getMemDesc(); - if (conf.inPlace() >= 0) os << " inPlace:" << conf.inPlace(); - if (conf.constant()) os << " constant"; - os << std::endl; + ss << sep << *conf.getMemDesc(); + if (conf.inPlace() >= 0) ss << " inPlace:" << conf.inPlace(); + if (conf.constant()) ss << " constant"; + sep = ","; } + ss << ")" << std::endl; + auto str = ss.str(); + replace_all(str, "0 - ?", "?"); + os << str; return os; } @@ -137,15 +156,7 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) { } return true; }; - auto replace_all = [](std::string& inout, std::string what, std::string with) { - std::size_t count{}; - for (std::string::size_type pos{}; - inout.npos != (pos = inout.find(what.data(), pos, what.length())); - pos += with.length(), ++count) { - inout.replace(pos, what.length(), with.data(), with.length()); - } - return count; - }; + auto nodeDesc = node.getSelectedPrimitiveDescriptor(); std::stringstream leftside; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp index f95c319d706..ff0642926ce 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp @@ -42,8 +42,6 @@ const std::vector p }, { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, { - {"Constant", "convolution1"}, - {"Constant", "convolution2"}, {"fakeQuantizeBefore1", "convolution1"}, {"fakeQuantizeBefore2", "convolution2"}, {"maxPool", "result"} @@ -75,8 +73,6 @@ const std::vector p }, { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, { - {"Constant", "convolution1"}, - {"Constant", "convolution2"}, {"fakeQuantizeBefore1", "convolution1"}, {"fakeQuantizeBefore2", "convolution2"}, {"maxPool", "result"} diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/conv_eltwise.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/conv_eltwise.cpp index ffc6ef57add..ab0aaf27ef1 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/conv_eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/conv_eltwise.cpp @@ -16,7 +16,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvAdd, ConvEltwise, ::testing::Values(convInputShape), ::testing::Values(convInputShape), ::testing::Values(std::shared_ptr (std::make_shared())), // non-tokenizable - ::testing::Values(6), // num nodes = 6: Convert + Convolution + 4 Reorders on Convs in&outs + ::testing::Values(5), // num nodes = 5: Convert + Convolution + 3 Reorders on Convs in&outs ::testing::Values(0), // num subgraphs = 0: No subgraph since all ops eltwises fused into Convolution ::testing::Values(CommonTestUtils::DEVICE_CPU)), ConvEltwise::getTestCaseName); @@ -26,7 +26,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvMul, ConvEltwise, ::testing::Values(convInputShape), ::testing::Values(convInputShape), ::testing::Values(std::shared_ptr (std::make_shared())), // fully-tokenizable - ::testing::Values(7), //num nodes = 7: Convert + Convolution + Subgraph + Reorders + ::testing::Values(6), //num nodes = 6: Convert + Convolution + Subgraph + Reorders ::testing::Values(1), // num subgraphs = 1: Mul (2 inputs) can't be fused into Conv => Subgraph is created ::testing::Values(CommonTestUtils::DEVICE_CPU)), ConvEltwise::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp index a231dd9a595..6333c339a3d 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp @@ -121,8 +121,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( ::testing::ValuesIn(testValuesLegacyFuse), ::testing::ValuesIn(operations), - // reorder (nChw[16|8]c) + MaxPool + reorder(nhwc) + reorder(ABcd16b16a) + Convolution + reorder(nchw) - ::testing::Values(std::pair{6, 0}), + // reorder (nChw[16|8]c) + MaxPool + reorder(nhwc) + Convolution(with internal weight reordering) + reorder(nchw) + ::testing::Values(std::pair{5, 0}), ::testing::Values(CommonTestUtils::DEVICE_CPU)), FakeQuantizeDecompositionTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_const_inplace.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_const_inplace.cpp index ec9eff60542..9e4ee465ee5 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_const_inplace.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_const_inplace.cpp @@ -71,9 +71,9 @@ namespace { TEST_P(ConcatConstantInPlaceTest, smoke_ConcatConstantInPlaceTest_CPU) { Run(); if (this->GetParam() == Precision::BF16) - CheckNumberOfNodesWithType(executableNetwork, "Reorder", 4); - else CheckNumberOfNodesWithType(executableNetwork, "Reorder", 3); + else + CheckNumberOfNodesWithType(executableNetwork, "Reorder", 2); } INSTANTIATE_TEST_SUITE_P(smoke_ConcatConstantInPlaceTest_CPU, ConcatConstantInPlaceTest,