diff --git a/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.cpp b/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.cpp index 643a5002f1a..9dc830f3300 100644 --- a/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.cpp +++ b/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.cpp @@ -52,9 +52,9 @@ static NodeConfig make_plain_config(const std::shared_ptr& op) { return config; } -static void redefineToMemories(const std::vector& to_mems, const std::shared_ptr new_desc) { +static void redefineToMemories(const std::vector& to_mems, const MemoryDesc& new_desc) { const auto &currDesc = to_mems.front()->getDesc(); - if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != new_desc->getShape().getStaticDims()) { + if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != new_desc.getShape().getStaticDims()) { // WA [DS] : need to rewrite it. Updated copypaste is from MKLDNNNode::redefineOutputMemory // this path is necessary if there are several edges per one port // in this case edge memory share same physical memory @@ -77,6 +77,20 @@ static void redefineToMemories(const std::vector& to_mems, cons } } +// this method get all memory ptrs of childs of one port to redefine descs for them +static std::vector getToMemories(const MKLDNNNode* node, const size_t port) { + std::vector memories; + for (auto& edge : node->getChildEdgesAtPort(port)) + memories.push_back(edge->getMemoryPtr()); + return memories; +} + +static void nullifyUndefinedDims(VectorDims& dims) { + std::transform(dims.begin(), dims.end(), dims.begin(), [](const size_t& dim) { + return dim == Shape::UNDEFINED_DIM ? 0 : dim; + }); +} + class PortIteratorHelper : public PortMapHelper { public: PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src, @@ -223,8 +237,8 @@ private: int value; }; -DynamicBuffer::DynamicBuffer(const MKLDNNMemoryPtr &from, const std::vector &to, - const PortMap &map_rule) : from(from), to(to), map_rule(map_rule) { +DynamicBuffer::DynamicBuffer(const MKLDNNMemoryPtr &from_, const std::vector &to_, + const PortMap &map_rule_) : from(from_), to(to_), map_rule(map_rule_) { elem_size = MKLDNNExtensionUtils::sizeOfDataType(from->GetDataType()); } @@ -305,11 +319,21 @@ void DynamicBuffer::move_data() { } void DynamicBuffer::transfer(const MKLDNNNode* node) { - const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims( - MKLDNNExtensionUtils::convertToVectorDims(mem_holder_buffer->get_desc().dims())); - redefineToMemories(to, desc); + if (mem_holder_buffer) { + const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims( + MKLDNNExtensionUtils::convertToVectorDims(mem_holder_buffer->get_desc().dims())); + redefineToMemories(to, *desc); - copy(get_ptr(*mem_holder_buffer.get()), reinterpret_cast(to.front()->GetPtr()), 0, 0, 1, to.front()->GetSize()); + copy(get_ptr(*mem_holder_buffer.get()), reinterpret_cast(to.front()->GetPtr()), 0, 0, 1, to.front()->GetSize()); + } else { + VectorDims newDims = to.front()->GetShape().getDims(); + nullifyUndefinedDims(newDims); + + const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(newDims); + redefineToMemories(to, *desc); + } + + mem_holder_buffer.reset(); } void DynamicBuffer::copy(const uint8_t* src, uint8_t* dst, const size_t src_stride, const size_t dst_stride, const size_t count, const size_t len) { @@ -455,8 +479,10 @@ void MKLDNNTensorIteratorNode::initSupportedPrimitiveDescriptors() { void MKLDNNTensorIteratorNode::createPrimitive() { if (loopBodyConditionOutputIdx == -1) continue_cond_check.reset(new staticValueCheck(true)); // always true - if (loopExecutionConditionIdx == -1) + if (loopExecutionConditionIdx == -1) { initial_cond_check.reset(new staticValueCheck(true)); + lastUsedCond = initial_cond_check->getStatus(); + } if (isDynamicNode()) prepareDynamicBuffers(); @@ -476,22 +502,24 @@ bool MKLDNNTensorIteratorNode::needPrepareParams() const { } void MKLDNNTensorIteratorNode::prepareParams() { - reshapeSubgraphInput(); + prepareTripCount(); + prepareInitialCond(); first_mappers.clear(); before_mappers.clear(); back_mappers.clear(); - prepareInputPorts(); - prepareInitialCond(); - prepareContinueCond(); - prepareTripCount(); - // special purpose ports - prepareLoopBodyCurrentIteration(); + if ((lastUsedCond && lastUsedTripCount != 0) || !isDynamicNode()) { + reshapeSubgraphInput(); - if (!isDynamicNode()) { - prepareOutputPorts(); - prepareBackEdges(); + prepareInputPorts(); + prepareContinueCond(); + prepareLoopBodyCurrentIteration(); + + if (!isDynamicNode()) { + prepareOutputPorts(); + prepareBackEdges(); + } } } @@ -534,9 +562,6 @@ void MKLDNNTensorIteratorNode::executeDynamicImpl(mkldnn::stream strm) { for (auto &mapper : first_mappers) mapper->execute(strm); - if (!continue_cond || max_num_iter == 0) - THROW_ERROR << "has incorrect iteration count for dynamic execution"; - // use "i != max_num_iter" only to allow "-1" works like infinite loop for (int i = 0; i != max_num_iter && continue_cond; i++) { // copy data to subgraph iteration @@ -566,7 +591,7 @@ void MKLDNNTensorIteratorNode::prepareInputPorts() { const auto &eng = getEngine(); for (auto map_rule : inputPortMap) { auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); - auto &to_mem = input_mems[map_rule.to].front(); // first memory is enough to get common memory ptr + auto &to_mem = input_mems[map_rule.to].front(); // first memory is enough to access the shared underlying physical memory if (map_rule.axis == -1) first_mappers.emplace_back(std::make_shared(from_mem, to_mem, eng)); @@ -607,7 +632,7 @@ void MKLDNNTensorIteratorNode::prepareDynamicBackEdges() { auto to_mems = input_mems[map_rule.to]; const auto& desc = from_mem->getDesc(); - redefineToMemories(to_mems, desc.clone()); + redefineToMemories(to_mems, desc); // first memory is enough to get common memory ptr back_mappers.emplace_back(std::make_shared(from_mem, to_mems.front(), eng)); @@ -668,7 +693,7 @@ void MKLDNNTensorIteratorNode::reshapeSubgraphInput() { new_dims[map_rule.axis] = abs(map_rule.stride); const auto desc = std::make_shared(to_mems.front()->getDesc().getPrecision(), Shape(new_dims)); - redefineToMemories(to_mems, desc); + redefineToMemories(to_mems, *desc); } } @@ -679,11 +704,18 @@ void MKLDNNTensorIteratorNode::reshapeAndFillOutput(mkldnn::stream strm) { auto to_mems = getToMemories(this, map_rule.from); auto &from_mem = output_mem[map_rule.to]; - const auto desc = getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(from_mem->getStaticDims()); - redefineToMemories(to_mems, desc); + // if Loop or TI isn't executed we should fill dynamic dims by zero + auto newShape = from_mem->GetShape(); + auto newDims = newShape.getDims(); + nullifyUndefinedDims(newDims); - BackEdgePortHelper mapper(from_mem, to_mems.front(), eng); - mapper.execute(strm); + const auto desc = getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(newDims); + redefineToMemories(to_mems, *desc); + + if (!newShape.isDynamic()) { + BackEdgePortHelper mapper(from_mem, to_mems.front(), eng); + mapper.execute(strm); + } } } @@ -777,13 +809,6 @@ int MKLDNNTensorIteratorNode::getNumIteration(const std::vector& inputP return numIterations; } -std::vector MKLDNNTensorIteratorNode::getToMemories(const MKLDNNNode* node, const size_t port) const { - std::vector memories; - for (auto edge : node->getChildEdgesAtPort(port)) - memories.push_back(edge->getMemoryPtr()); - return memories; -} - bool MKLDNNTensorIteratorNode::created() const { return getType() == TensorIterator; } diff --git a/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.h b/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.h index 911c0ff1635..32efdadba86 100644 --- a/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.h +++ b/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.h @@ -62,7 +62,7 @@ protected: */ class DynamicBuffer { public: - DynamicBuffer(const MKLDNNMemoryPtr &from, const std::vector &to, const PortMap &map_rule); + DynamicBuffer(const MKLDNNMemoryPtr &from_, const std::vector &to_, const PortMap &map_rule_); ~DynamicBuffer() = default; void execute(const mkldnn::engine& eng, const int iter); @@ -131,9 +131,6 @@ private: void reshapeAndFillOutput(mkldnn::stream strm); int getNumIteration(const std::vector& inputPortMap, const std::vector& outputPortMap) const; - // this method get all memory ptrs of childs of one port to redefine descs for them - std::vector getToMemories(const MKLDNNNode* node, const size_t port) const; - MKLDNNExtensionManager::Ptr ext_mng; MKLDNNGraph sub_graph; std::vector> input_mems; diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index b04b8ac7270..bc96c25ca51 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -173,6 +173,11 @@ std::vector disabledTestPatterns() { // Issue: 75022 R"(.*OVExecutableNetworkBaseTest.*LoadNetworkToDefaultDeviceNoThrow.*)", R"(.*IEClassBasicTest.*LoadNetworkToDefaultDeviceNoThrow.*)", + // Issue: 77390 + R"(.*LoopLayerCPUTest.*exec_cond=0.*)", + R"(.*LoopLayerCPUTest.*trip_count=0.*)", + R"(.*LoopForDiffShapesLayerCPUTest.*exec_cond=0.*)", + R"(.*LoopForDiffShapesLayerCPUTest.*trip_count=0.*)", }; #define FIX_62820 0 diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/loop.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/loop.cpp index cb581b22577..355e0fcd75a 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/loop.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/loop.cpp @@ -22,6 +22,7 @@ enum LOOP_IN_TYPE { using LoopParams = typename std::tuple< InputLayerType, // TripCount is a constant? int64_t, // TripCount, -1 means infinity + bool, // Execution condition std::vector, // InputShapes std::vector, // Type ElementType>; // Input element type @@ -33,10 +34,11 @@ public: static std::string getTestCaseName(testing::TestParamInfo obj) { InputLayerType trip_count_type; int64_t trip_count; + bool exec_cond; std::vector shapes; std::vector types; ElementType netType; - std::tie(trip_count_type, trip_count, shapes, types, netType) = obj.param; + std::tie(trip_count_type, trip_count, exec_cond, shapes, types, netType) = obj.param; std::ostringstream result; for (size_t i = 0; i < shapes.size(); i++) { @@ -52,6 +54,7 @@ public: result << type << "_"; result << "trip_count_type=" << trip_count_type << "_"; result << "trip_count=" << trip_count << "_"; + result << "exec_cond=" << exec_cond << "_"; result << "netType=" << netType; return result.str(); } @@ -83,10 +86,11 @@ protected: void SetUp() override { InputLayerType trip_count_type; int64_t trip_count; + bool exec_cond; std::vector shapes; std::vector types; ElementType netType; - std::tie(trip_count_type, trip_count, shapes, types, netType) = this->GetParam(); + std::tie(trip_count_type, trip_count, exec_cond, shapes, types, netType) = this->GetParam(); targetDevice = CommonTestUtils::DEVICE_CPU; init_input_shapes(shapes); @@ -102,7 +106,7 @@ protected: } auto body_condition_const = std::make_shared(ngraph::element::boolean, ngraph::Shape{1}, true); - auto exec_condition = std::make_shared(ngraph::element::boolean, ngraph::Shape{1}, true); + auto exec_condition = std::make_shared(ngraph::element::boolean, ngraph::Shape{1}, exec_cond); std::shared_ptr trip_count_input; int shift = 0; if (trip_count_type == InputLayerType::PARAMETER) { @@ -163,9 +167,10 @@ protected: void SetUp() override { InputLayerType trip_count_type; int64_t trip_count; + bool exec_cond; std::vector shapes; std::vector types; - std::tie(trip_count_type, trip_count, shapes, types, inType) = this->GetParam(); + std::tie(trip_count_type, trip_count, exec_cond, shapes, types, inType) = this->GetParam(); targetDevice = CommonTestUtils::DEVICE_CPU; init_input_shapes(shapes); @@ -181,7 +186,7 @@ protected: body_params.emplace_back(std::make_shared(inType, pshape)); } - auto exec_condition = std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto exec_condition = std::make_shared(ngraph::element::boolean, ngraph::Shape{}, exec_cond); auto trip_count_input = std::make_shared(ngraph::element::i64, ngraph::Shape{}); trip_count_input->set_friendly_name("trip_count"); params.insert(params.begin(), trip_count_input); @@ -233,9 +238,10 @@ protected: void SetUp() override { InputLayerType trip_count_type; int64_t trip_count; + bool exec_cond; std::vector shapes; std::vector types; - std::tie(trip_count_type, trip_count, shapes, types, inType) = this->GetParam(); + std::tie(trip_count_type, trip_count, exec_cond, shapes, types, inType) = this->GetParam(); targetDevice = CommonTestUtils::DEVICE_CPU; init_input_shapes(shapes); @@ -251,7 +257,7 @@ protected: } auto body_condition_const = std::make_shared(ngraph::element::boolean, ngraph::Shape{1}, true); - auto exec_condition = std::make_shared(ngraph::element::boolean, ngraph::Shape{1}, true); + auto exec_condition = std::make_shared(ngraph::element::boolean, ngraph::Shape{1}, exec_cond); std::shared_ptr trip_count_input; int shift = 0; if (trip_count_type == InputLayerType::PARAMETER) { @@ -320,10 +326,11 @@ const std::vector inputPrecisions = { }; std::vector trip_count_type { InputLayerType::CONSTANT, InputLayerType::PARAMETER }; -std::vector trip_count { 1, 5 }; // works only if trip_count_type is constant +std::vector trip_count { 0, 1, 5 }; +std::vector exec_cond { true, false }; // dim[axis] = 1 because loop supports concatenation only with stride = part_size = 1 -// first loop suit test is with output concatenation +// the first loop suit test is with output concatenation std::vector> inputs = { { //first test suit { //dynamic shape for first input @@ -393,6 +400,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LoopForCommon, LoopLayerCPUTest, ::testing::Combine( ::testing::ValuesIn(trip_count_type), ::testing::ValuesIn(trip_count), + ::testing::ValuesIn(exec_cond), ::testing::ValuesIn(inputs), ::testing::Values(types), ::testing::ValuesIn(inputPrecisions)), @@ -428,6 +436,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LoopWhileCommon, LoopWhileLayerCPUTest, ::testing::Combine( ::testing::Values(trip_count_type[0]), ::testing::Values(-1), + ::testing::Values(true), ::testing::ValuesIn(inputs_2), ::testing::Values(std::vector{}), ::testing::ValuesIn(inputPrecisions)), @@ -462,6 +471,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LoopForDiffShapesConcat, LoopForDiffShapesLayerCP ::testing::Combine( ::testing::ValuesIn(trip_count_type), ::testing::ValuesIn(trip_count), + ::testing::ValuesIn(exec_cond), ::testing::ValuesIn(inputs_3), ::testing::Values(std::vector{}), ::testing::ValuesIn(inputPrecisions)),