diff --git a/src/core/reference/include/ngraph/runtime/reference/unique.hpp b/src/core/reference/include/ngraph/runtime/reference/unique.hpp index 2f4642c56e4..9c5d4a8f1b3 100644 --- a/src/core/reference/include/ngraph/runtime/reference/unique.hpp +++ b/src/core/reference/include/ngraph/runtime/reference/unique.hpp @@ -113,16 +113,24 @@ UniqueElements find_unique_elements(const Data_t* data, return *(data + lhs.idx) < *(data + rhs.idx); }; + int64_t axisVal = 0; + if (axis) { + axisVal = *axis; + if (axisVal < 0) { + axisVal += data_shape.size(); + } + } + const auto slices_ascending_order = [&](const TensorSlice& lhs, const TensorSlice& rhs) { - const auto shape_to_iterate = slice_shape_to_iterate(data_shape, *axis); + const auto shape_to_iterate = slice_shape_to_iterate(data_shape, axisVal); for (auto it = CoordinateIterator(shape_to_iterate); it != CoordinateIterator::end(); ++it) { auto elem_coord_lhs = *it; - elem_coord_lhs.insert(elem_coord_lhs.cbegin() + *axis, lhs.idx); + elem_coord_lhs.insert(elem_coord_lhs.cbegin() + axisVal, lhs.idx); auto elem_coord_rhs = *it; - elem_coord_rhs.insert(elem_coord_rhs.cbegin() + *axis, rhs.idx); + elem_coord_rhs.insert(elem_coord_rhs.cbegin() + axisVal, rhs.idx); const auto lhs_elem_idx = ngraph::coordinate_index(elem_coord_lhs, data_shape); const auto rhs_elem_idx = ngraph::coordinate_index(elem_coord_rhs, data_shape); @@ -149,15 +157,15 @@ UniqueElements find_unique_elements(const Data_t* data, // the individual elements in the two compared slices are always separated by the same offset // and this can be used to compare them elementwise - const auto slices_offset = calc_slices_offset(lhs, rhs, data_shape_strides, *axis); - const auto shape_to_iterate = slice_shape_to_iterate(data_shape, *axis); + const auto slices_offset = calc_slices_offset(lhs, rhs, data_shape_strides, axisVal); + const auto shape_to_iterate = slice_shape_to_iterate(data_shape, axisVal); for (auto it = CoordinateIterator(shape_to_iterate); it != CoordinateIterator::end(); ++it) { // All slice elements have a "slice index" constant value at the axis position, only the other dimensions // vary for each slice element. Those dimensions are provided by CoordinateIterator, the value at axis // needs to be injected manually. auto elem_coord = *it; - elem_coord.insert(elem_coord.cbegin() + *axis, slice_with_lower_idx.idx); + elem_coord.insert(elem_coord.cbegin() + axisVal, slice_with_lower_idx.idx); const auto lhs_elem_idx = ngraph::coordinate_index(elem_coord, data_shape); const auto rhs_elem_idx = lhs_elem_idx + slices_offset; if (*(data + lhs_elem_idx) != *(data + rhs_elem_idx)) { @@ -219,8 +227,8 @@ UniqueElements find_unique_elements(const Data_t* data, } } } else { - ret.axis = *axis; - ret.all_tensor_elements = generate_descriptors(data_shape[*axis], DescriptorType::SLICE); + ret.axis = axisVal; + ret.all_tensor_elements = generate_descriptors(data_shape[axisVal], DescriptorType::SLICE); if (sorted) { std::stable_sort(begin(ret.all_tensor_elements), end(ret.all_tensor_elements), slices_ascending_order); @@ -228,7 +236,7 @@ UniqueElements find_unique_elements(const Data_t* data, ret.all_tensor_elements[0].rev_idx = 0; ret.unique_tensor_elements.push_back(ret.all_tensor_elements[0]); - for (size_t i = 1; i < data_shape[*axis]; ++i) { + for (size_t i = 1; i < data_shape[axisVal]; ++i) { auto& tensor_element = ret.all_tensor_elements[i]; auto existing_unique = end(ret.unique_tensor_elements); @@ -264,10 +272,17 @@ std::tuple make_tensor_shapes(const UniqueElements type_t { "PriorBoxClustered", Type::PriorBoxClustered}, {"Interaction", Type::Interaction}, { "MHA", Type::MHA}, + { "Unique", Type::Unique} }; Type TypeFromName(const std::string& type) { @@ -402,6 +403,8 @@ std::string NameFromType(const Type type) { return "Subgraph"; case Type::MHA: return "MHA"; + case Type::Unique: + return "Unique"; default: return "Unknown"; } diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index 671efdd5bef..d508147298f 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -110,7 +110,8 @@ enum class Type { PriorBox, PriorBoxClustered, Interaction, - MHA + MHA, + Unique }; enum class Algorithm { diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp new file mode 100644 index 00000000000..62463cd6643 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -0,0 +1,495 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "unique.hpp" +#include +#include + +using namespace InferenceEngine; +using namespace ov::intel_cpu; +using namespace ov::intel_cpu::node; + +#define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' " + +bool Unique::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (!ov::is_type(op)) { + errorMessage = "Not supported Unique operation version. CPU plug-in supports only 10th version."; + return false; + } + if (op->get_input_size() > AXIS && !ov::is_type(op->get_input_node_ptr(AXIS))) { + errorMessage = "CPU plug-in supports only constant Axis input."; + return false; + } + } catch (...) { + return false; + } + + return true; +} + +Unique::Unique(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache, InternalDynShapeInferFactory()) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + + if (!one_of(op->get_input_size(), 1, 2) || op->get_output_size() != 4) + THROW_ERROR << "has incorrect number of input/output edges."; + + for (int i = 0; i < 4; i++) { + definedOutputs[i] = !op->get_output_target_inputs(i).empty(); + } + + sorted = ov::as_type_ptr(op)->get_sorted(); + if (op->get_input_size() > AXIS) { + flattened = false; + axis = ov::as_type(op->get_input_node_ptr(AXIS))->cast_vector()[0]; + if (axis < 0) { + axis += op->get_input_partial_shape(IN_DATA).rank().get_length(); + } + if (axis < 0 || axis >= op->get_input_partial_shape(IN_DATA).rank().get_length()) { + THROW_ERROR << "has invalid axis value: " << ov::as_type(op->get_input_node_ptr(AXIS))->cast_vector()[0]; + } + } else { + flattened = true; + } +} + +void Unique::initSupportedPrimitiveDescriptors() { + dataPrecision = getOriginalInputPrecisionAtPort(IN_DATA); + if (dataPrecision != Precision::I32 && dataPrecision != Precision::I8 && dataPrecision != Precision::U8) { + dataPrecision = Precision::FP32; + } + dataTypeSize = dataPrecision.size(); + const InferenceEngine::Precision axisPrecision = Precision::I32; + + impl_desc_type implType = ref; + + std::vector inPortConfigs = { {LayoutType::ncsp, dataPrecision} }; + if (!flattened) { + inPortConfigs.push_back({LayoutType::ncsp, axisPrecision}); + } + std::vector outPortConfigs; + for (int i = 0; i < 4; i++) { + outPortConfigs.push_back({LayoutType::ncsp, i == 0 ? dataPrecision : axisPrecision}); + } + + addSupportedPrimDesc(inPortConfigs, outPortConfigs, implType, isDynamicNode()); +} + +void Unique::createPrimitive() { + Node::createPrimitive(); +} + +void Unique::prepareParams() { + auto& dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); + if (!dataMemPtr || !dataMemPtr->isAllocated()) { + THROW_ERROR << " has not allocated input data memory."; + } + for (int i = 0; i < 4; i++) { + if (definedOutputs[i]) { + auto& dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); + if (!dstMemPtr || !dstMemPtr->isAllocated()) { + THROW_ERROR << " has not allocated output memory at port " << i; + } + } + } + if (getSelectedPrimitiveDescriptor() == nullptr) { + THROW_ERROR << " has unidentified preferable primitive descriptor."; + } + + size_t srcLen = 1; + if (flattened) { + srcLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / dataTypeSize; + } else { + auto dstDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims(); + srcLen = dstDataShape[axis]; + } + firstUniTmp.resize(srcLen, 0); + inToOutTmp.resize(srcLen); + occurTmp.resize(srcLen); +} + +template +struct Unique::flattenExec { + void operator()(Unique *node) { + node->flattenTensorExec(); + } +}; + +template +struct Unique::slicedExec { + void operator()(Unique *node) { + node->slicedTensorExec(); + } +}; + +void Unique::execute(dnnl::stream strm) { + if (flattened) { + OV_SWITCH(intel_cpu, flattenExec, this, dataPrecision, + OV_CASE(Precision::FP32, float), + OV_CASE(Precision::I32, int32_t), + OV_CASE(Precision::I8, int8_t), + OV_CASE(Precision::U8, uint8_t)) + } else { + OV_SWITCH(intel_cpu, slicedExec, this, dataPrecision, + OV_CASE(Precision::FP32, float), + OV_CASE(Precision::I32, int32_t), + OV_CASE(Precision::I8, int8_t), + OV_CASE(Precision::U8, uint8_t)) + } +} + +void Unique::executeDynamicImpl(dnnl::stream strm) { + const auto& srcDataDims = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims(); + VectorDims dstDataDims; + Dim uniqLen = 1; + if (flattened) { + uniqLen = std::accumulate(srcDataDims.begin(), srcDataDims.end(), 1, std::multiplies()); + dstDataDims = { uniqLen }; + } else { + uniqLen = srcDataDims[axis]; + dstDataDims = srcDataDims; + } + redefineOutputMemory({ dstDataDims, {uniqLen}, {uniqLen}, {uniqLen}}); + + execute(strm); +} + +template +void Unique::flattenTensorExec() { + const T* srcDataPtr = reinterpret_cast(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr()); + const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T); + std::vector uniDataTmp(inputLen); + auto uniDataTmpPtr = uniDataTmp.data(); + int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr; + if (definedOutputs[FIRST_UNIQUE_IDX]) { + firstTmpPtr = firstUniTmp.data(); + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + inToOutTmpPtr = inToOutTmp.data(); + } + if (definedOutputs[OCCURRENCES_NUM]) { + occurTmpPtr = occurTmp.data(); + } + uniqueLen = inputLen; + + if (sorted) { + std::memcpy(uniDataTmpPtr, srcDataPtr, inputLen * sizeof(T)); + std::sort(uniDataTmpPtr, uniDataTmpPtr + inputLen); + auto last = std::unique(uniDataTmpPtr, uniDataTmpPtr + inputLen); + uniqueLen = last - uniDataTmpPtr; + + if (definedOutputs[FIRST_UNIQUE_IDX]) { + T* first = uniDataTmpPtr; + for (T* it = first; it < last; it++) { + for (int i = 0; i < inputLen; i++) { + if (srcDataPtr[i] == *it) { + *firstTmpPtr++ = i; + first++; + break; + } + } + } + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + for (int i = 0; i < inputLen; i++) { + if (i > 0 && srcDataPtr[i] == srcDataPtr[i - 1]) { + inToOutTmpPtr[i] = inToOutTmpPtr[i - 1]; + continue; + } + for (int j = 0; j < uniqueLen; j++) { + if (srcDataPtr[i] == uniDataTmpPtr[j]) { + inToOutTmpPtr[i] = j; + break; + } + } + } + } + if (definedOutputs[OCCURRENCES_NUM]) { + std::fill(occurTmpPtr, occurTmpPtr + uniqueLen, 0); + for (int j = 0; j < uniqueLen; j++) { + for (int i = 0; i < inputLen; i++) { + if (srcDataPtr[i] == uniDataTmpPtr[j]) { + occurTmpPtr[j]++; + } + } + } + } + } else { + uniDataTmpPtr[0] = srcDataPtr[0]; + if (definedOutputs[FIRST_UNIQUE_IDX]) { + firstTmpPtr[0] = 0; + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + inToOutTmpPtr[0] = 0; + } + if (definedOutputs[OCCURRENCES_NUM]) { + std::fill(occurTmpPtr, occurTmpPtr + inputLen, 1); + } + uniqueLen = 1; + + for (int i = 1; i < inputLen; i++) { + bool found = false; + int j = 0; + for (; j < uniqueLen; j++) { + if (uniDataTmpPtr[j] == srcDataPtr[i]) { + found = true; + break; + } + } + if (!found) { + uniDataTmpPtr[uniqueLen] = srcDataPtr[i]; + if (definedOutputs[FIRST_UNIQUE_IDX]) { + firstTmpPtr[uniqueLen] = i; + } + uniqueLen++; + } else { + if (definedOutputs[OCCURRENCES_NUM]) { + occurTmpPtr[j]++; + } + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + inToOutTmpPtr[i] = j; + } + } + } + + redefineOutputMemory({ {uniqueLen}, {uniqueLen}, {inputLen}, {uniqueLen}}); + + T* uniDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr()); + memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T)); + if (definedOutputs[FIRST_UNIQUE_IDX]) { + int *firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr()); + memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int)); + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + auto inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr()); + memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int)); + } + if (definedOutputs[OCCURRENCES_NUM]) { + auto occurPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr()); + memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int)); + } +} + +template +void Unique::slicedTensorExec() { + const T* srcDataPtr = reinterpret_cast(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr()); + const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T); + std::vector uniDataTmp(inputLen); + auto uniDataTmpPtr = uniDataTmp.data(); + int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr; + if (definedOutputs[FIRST_UNIQUE_IDX]) { + firstTmpPtr = firstUniTmp.data(); + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + inToOutTmpPtr = inToOutTmp.data(); + } + if (definedOutputs[OCCURRENCES_NUM]) { + occurTmpPtr = occurTmp.data(); + } + + const auto& srcDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims(); + + const auto cmpBlNum = srcDataShape[axis]; // Blocks to compare. + int64_t partsInBl = 1; // Parts in block + if (axis > 0) { + partsInBl = std::accumulate(srcDataShape.begin(), srcDataShape.begin() + axis, 1, std::multiplies()); + } + int64_t elPerPart = 1; // Elements number in part. + if (axis < srcDataShape.size() - 1) { + elPerPart = std::accumulate(srcDataShape.begin() + axis + 1, srcDataShape.end(), 1, std::multiplies()); + } + const auto partLenB = elPerPart * dataPrecision.size(); + const auto partStep = elPerPart * cmpBlNum; + + if (definedOutputs[FIRST_UNIQUE_IDX]) { + firstTmpPtr[0] = 0; + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + inToOutTmpPtr[0] = 0; + } + if (definedOutputs[OCCURRENCES_NUM]) { + occurTmpPtr[0] = 1; + std::fill(occurTmpPtr, occurTmpPtr + cmpBlNum, 1); + } + + uniqueLen = 1; + std::vector uniqIdx(cmpBlNum, 0); + for (int b1 = 1; b1 < cmpBlNum; b1++) { + auto first1 = srcDataPtr + b1 * elPerPart; + auto last1 = srcDataPtr + (b1 + 1) * elPerPart; + bool equal = true; + int b2 = 0; + // Compare with unique blocks. + for (; b2 < uniqueLen; b2++) { + auto first2 = srcDataPtr + uniqIdx[b2] * elPerPart; + equal = true; + for (int p = 0; p < partsInBl; p++) { + equal = std::equal(first1, last1, first2); + if (!equal) { + break; + } + first1 += partStep; + last1 += partStep; + first2 += partStep; + } + if (equal) { + break; + } + } + if (!equal) { + if (definedOutputs[FIRST_UNIQUE_IDX]) { + firstTmpPtr[uniqueLen] = b1; + } + + uniqIdx[uniqueLen++] = b1; + } else { + if (definedOutputs[OCCURRENCES_NUM]) { + occurTmpPtr[b2]++; + } + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + inToOutTmpPtr[b1] = b2; + } + } + + const auto dstPrtStep = elPerPart * uniqueLen; + for (int b1 = 0; b1 < uniqueLen; b1++) { + auto first1 = srcDataPtr + uniqIdx[b1] * elPerPart; + auto first2 = uniDataTmpPtr + b1 * elPerPart; + for (int p = 0; p < partsInBl; p++) { + memcpy(first2, first1, partLenB); + first1 += partStep; + first2 += dstPrtStep; + } + } + + if (sorted) { + const auto elInBl = elPerPart * partsInBl; + struct OrdEl { + T val; + int64_t idx; + }; + + std::vector colToSort(uniqueLen); + std::vector moveTo(uniqueLen); + for (int k = 0; k < uniqueLen; k++) { + moveTo[k] = k; + } + std::vector buff1(elPerPart); + std::vector buff2(elPerPart); + for (int64_t p = partsInBl - 1; p >= 0; p--) { + for (int64_t e = elPerPart - 1; e >= 0 ; e--) { + int64_t pos1 = p * dstPrtStep + e; + for (int64_t i = 0; i < uniqueLen; i++) { + int64_t pos2 = i * elInBl + pos1; + colToSort[i] = {uniDataTmpPtr[pos2], i}; + } + std::stable_sort(colToSort.begin(), colToSort.end(), [](const OrdEl &el1, const OrdEl &el2) { return el1.val < el2.val; }); + for (int k = 0; k < uniqueLen; k++) { + moveTo[colToSort[k].idx] = k; + } + + // perm + for (int64_t pb = 0; pb < partsInBl; pb++) { + auto currDst = uniDataTmpPtr + pb * dstPrtStep; + memcpy(buff1.data(), currDst, partLenB); + auto dstIdx = moveTo[0]; + for (int64_t b = 0; b < uniqueLen; b++) { + if (dstIdx == moveTo[dstIdx]) { + dstIdx = moveTo[++dstIdx]; + continue; + } + T* dst = currDst + dstIdx * elPerPart; + + auto& bSrc = b % 2 == 0 ? buff1 : buff2; + auto& bDst = b % 2 == 0 ? buff2 : buff1; + memcpy(bDst.data(), dst, partLenB); + memcpy(dst, bSrc.data(), partLenB); + + dstIdx = moveTo[dstIdx]; + } + } + + auto mPos = moveTo[0]; + int32_t firstSrc = 0, firstDst = 0, ocSrc = 0, ocDst = 0; + if (definedOutputs[FIRST_UNIQUE_IDX]) { + firstSrc = firstTmpPtr[0]; + } + if (definedOutputs[OCCURRENCES_NUM]) { + ocSrc = occurTmpPtr[0]; + } + for (int k = 0; k < uniqueLen; k++) { + if (mPos == moveTo[mPos]) { + mPos = moveTo[++mPos]; + continue; + } + + if (definedOutputs[FIRST_UNIQUE_IDX]) { + auto& fSrc = k % 2 == 0 ? firstSrc : firstDst; + auto& fDst = k % 2 == 0 ? firstDst : firstSrc; + fDst = firstTmpPtr[mPos]; + firstTmpPtr[mPos] = fSrc; + } + if (definedOutputs[OCCURRENCES_NUM]) { + auto& oSrc = k % 2 == 0 ? ocSrc : ocDst; + auto& oDst = k % 2 == 0 ? ocDst : ocSrc; + oDst = occurTmpPtr[mPos]; + occurTmpPtr[mPos] = oSrc; + } + + mPos = moveTo[mPos]; + } + } + } + + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + for (int b1 = 0; b1 < cmpBlNum; b1++) { + auto first1 = srcDataPtr + b1 * elPerPart; + auto last1 = srcDataPtr + (b1 + 1) * elPerPart; + bool equal = true; + for (int b2 = 0; b2 < uniqueLen; b2++) { + auto first2 = uniDataTmpPtr + b2 * elPerPart; + equal = true; + for (int p = 0; p < partsInBl; p++) { + equal = std::equal(first1, last1, first2); + if (!equal) { + break; + } + first2 += dstPrtStep; + } + if (equal) { + inToOutTmpPtr[b1] = b2; + } + } + } + } + } + + auto dstDataShape = srcDataShape; + dstDataShape[axis] = uniqueLen; + redefineOutputMemory({ dstDataShape, {uniqueLen}, {cmpBlNum}, {uniqueLen}}); + + T* uniDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr()); + memcpy(uniDataPtr, uniDataTmpPtr, getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetSize()); + if (definedOutputs[FIRST_UNIQUE_IDX]) { + int *firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr()); + memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int)); + } + if (definedOutputs[INPUT_TO_UNIQ_IDX]) { + auto inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr()); + memcpy(inToOutPtr, inToOutTmp.data(), cmpBlNum * sizeof(int)); + } + if (definedOutputs[OCCURRENCES_NUM]) { + auto occurPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr()); + memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int)); + } +} diff --git a/src/plugins/intel_cpu/src/nodes/unique.hpp b/src/plugins/intel_cpu/src/nodes/unique.hpp new file mode 100644 index 00000000000..c96c8d9d8a6 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/unique.hpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include +#include + +namespace ov { +namespace intel_cpu { +namespace node { + +class Unique : public Node { +public: + Unique(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache); + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + + void getSupportedDescriptors() override {}; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override; + void execute(dnnl::stream strm) override; + bool created() const override { return getType() == Type::Unique; } + +protected: + void executeDynamicImpl(dnnl::stream strm) override; + void prepareParams() override; + bool needShapeInfer() const override { return false; } + +private: + template + void flattenTensorExec(); + template + void slicedTensorExec(); + + template + struct flattenExec; + template + struct slicedExec; + + std::vector firstUniTmp; + std::vector inToOutTmp; + std::vector occurTmp; + + bool sorted = false; + bool flattened = true; + int axis = 0; + bool definedOutputs[4] = { false, false, false, false }; + InferenceEngine::Precision dataPrecision; + int64_t dataTypeSize = 1; + size_t uniqueLen = 1; + + int threadsNum = 1; + + static constexpr size_t IN_DATA = 0; + static constexpr size_t AXIS = 1; + static constexpr size_t UNIQUE_DATA = 0; + static constexpr size_t FIRST_UNIQUE_IDX = 1; + static constexpr size_t INPUT_TO_UNIQ_IDX = 2; + static constexpr size_t OCCURRENCES_NUM = 3; +}; + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes_factory.cpp b/src/plugins/intel_cpu/src/nodes_factory.cpp index 27bb20e43c7..76903bb31af 100644 --- a/src/plugins/intel_cpu/src/nodes_factory.cpp +++ b/src/plugins/intel_cpu/src/nodes_factory.cpp @@ -91,6 +91,7 @@ #include "nodes/eye.h" #include "nodes/interaction.h" #include "nodes/mha.h" +#include "nodes/unique.hpp" namespace ov { namespace intel_cpu { @@ -194,6 +195,7 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(Eye, Type::Eye); INTEL_CPU_NODE(Interaction, Type::Interaction); INTEL_CPU_NODE(MHA, Type::MHA); + INTEL_CPU_NODE(Unique, Type::Unique); } #undef INTEL_CPU_NODE diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 5d349918cbe..4601c89b253 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -87,7 +87,7 @@ #include #include #include "transformations/op_conversions/eye_decomposition.hpp" -#include "transformations/smart_reshape/smart_reshape.hpp" +#include "transformations/op_conversions/unique_decomposition.hpp" #include "ngraph_transformations/convert_to_cpu_specific_opset.hpp" #include "ngraph_transformations/snippets_mark_skipped.hpp" @@ -287,9 +287,9 @@ static bool fuse_type_to_convert(const std::shared_ptr& node, ov:: static void TransformationUpToCPUSpecificOpSet(std::shared_ptr nGraphFunc, const bool _enableLPT, const bool _enableBF16, const bool _enableSnippets, const bool isLegacyApi) { - ngraph::pass::Manager manager; + ov::pass::Manager manager; manager.set_per_pass_validation(false); - manager.register_pass(); + manager.register_pass(); const bool useLpt = _enableLPT && @@ -331,32 +331,32 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}}; manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); if (useLpt) { CPU_LPT_SCOPE(LowPrecisionTransformations_Part2); manager.register_pass(defaultPrecisions); } - manager.register_pass(); - manager.register_pass(precisions, type_to_fuse); - manager.register_pass(); + manager.register_pass(); + manager.register_pass(precisions, type_to_fuse); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -366,15 +366,15 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr using const_node_ptr = const std::shared_ptr; // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5 - pass_config->set_callback( + pass_config->set_callback( [](const_node_ptr &node) -> bool { return node->input_value(0).get_shape().size() <= 5lu && node->input_value(0).get_shape().size() == node->get_output_shape(0).size(); }); - pass_config->set_callback( + pass_config->set_callback( [](const_node_ptr &node) -> bool { const auto & rank = node->input(0).get_partial_shape().rank().get_length(); return rank == 4lu || rank == 5lu; @@ -443,33 +443,33 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr return false; }; - pass_config->set_callback( + pass_config->set_callback( [isSequencePrimitiveSupported](const_node_ptr &node) -> bool { return isSequencePrimitiveSupported(node); }); - pass_config->set_callback( + pass_config->set_callback( [isCellPrimitiveSupported](const_node_ptr &node) -> bool { return isCellPrimitiveSupported(node); }); - pass_config->set_callback( + pass_config->set_callback( [](const_node_ptr &node) -> bool { std::string errorMessage; return node::MVN::isSupportedOperation(node, errorMessage); }); - pass_config->set_callback( + pass_config->set_callback( [](const_node_ptr &node) -> bool { std::string errorMsg; return node::NormalizeL2::isSupportedOperation(node, errorMsg); }); - pass_config->enable(); - pass_config->set_callback( + pass_config->enable(); + pass_config->set_callback( [](const_node_ptr &node) -> bool { return node->input_value(0).get_partial_shape().rank().get_length() <= 5; }); @@ -487,9 +487,9 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr return true; }; - pass_config->set_callback(nmsCallback); - pass_config->set_callback(nmsCallback); - pass_config->set_callback(nmsCallback); + pass_config->set_callback(nmsCallback); + pass_config->set_callback(nmsCallback); + pass_config->set_callback(nmsCallback); } // List of enabled/disabled transformations @@ -499,46 +499,47 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr pass_config->disable(); pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); - pass_config->enable(); - pass_config->enable(); - pass_config->enable(); - pass_config->enable(); - pass_config->enable(); + pass_config->enable(); + pass_config->enable(); + pass_config->enable(); + pass_config->enable(); + pass_config->enable(); if (useLpt) { CPU_LPT_SCOPE(LowPrecisionTransformations_Part3); - pass_config->set_callback([](const_node_ptr &node) -> bool { + pass_config->set_callback([](const_node_ptr &node) -> bool { std::string errMsg; return !node::FakeQuantize::isSupportedOperation(node, errMsg); }); - pass_config->set_callback([&defaultPrecisions](const_node_ptr &node) -> bool { + pass_config->set_callback([&defaultPrecisions](const_node_ptr &node) -> bool { return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions); }); } @@ -597,7 +598,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr supportedPrecisions = std::vector({}); } - ngraph::pass::Manager lptManager; + ov::pass::Manager lptManager; lptManager.register_pass( supportedPrecisions, quantizationRestrictions, @@ -619,10 +620,10 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr lptManager.run_passes(nGraphFunc); } - ngraph::pass::Manager postLPTPassManager; - postLPTPassManager.register_pass(); + ov::pass::Manager postLPTPassManager; + postLPTPassManager.register_pass(); postLPTPassManager.register_pass(); - postLPTPassManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { + postLPTPassManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation return node->get_rt_info().count("UNROLL_TI") == 0; }); @@ -634,7 +635,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr return false; }); - postLPTPassManager.register_pass(); + postLPTPassManager.register_pass(); // Snippets may brake MHA patterns so the fusion has to performed before postLPTPassManager.register_pass(); @@ -663,7 +664,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr postLPTPassManager.run_passes(nGraphFunc); if (_enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) { - ngraph::pass::Manager snippetsManager; + ov::pass::Manager snippetsManager; snippetsManager.register_pass(); snippetsManager.register_pass(); snippetsManager.register_pass(); @@ -697,13 +698,13 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr snippetsManager.run_passes(nGraphFunc); } - ngraph::pass::Manager postSnippetsManager; - postSnippetsManager.register_pass(); - postSnippetsManager.get_pass_config()->set_callback([](const_node_ptr& node) -> bool { + ov::pass::Manager postSnippetsManager; + postSnippetsManager.register_pass(); + postSnippetsManager.get_pass_config()->set_callback([](const_node_ptr& node) -> bool { std::string errMsg; return node::FakeQuantize::isSupportedOperation(node, errMsg); }); - postSnippetsManager.register_pass(); + postSnippetsManager.register_pass(); postSnippetsManager.run_passes(nGraphFunc); } diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 6174371e256..649c553aef3 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -186,12 +186,15 @@ std::vector disabledTestPatterns() { // Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation. R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*(i32|i8).*)", // 94989. BF16 Reference produces different results. - R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*gridPrc=bf16.*)", + // GridSample regression on bf16 data. + R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*bf16.*)", // // Issue: 95915 R"(smoke_dynamic/AUGRUCellCPUTest.CompareWithRefs/IS=\(\[\?\.1\]_\[\?\.1\]_\[\?\.1\]_\)_TS=\{\(1\.1\)_\(1\.1\)_\(1\.1\)\}_\{\(3\.1\)_\(3\.1\)_\(3\.1\)\}_\{\(5\.1\)_\(5\.1\)_\(5\.1\)\}_decompose=0_activations=\(sigmoid\.tanh\)_clip=0_linear=0_netPrec=f32__inFmts=nc\.nc_outFmts=nc_primitive=ref_any_PluginConf_ENFORCE_BF16=YES)", // NOLINT R"(smoke_dynamic/GRUCellCPUTest.CompareWithRefs/IS=\(\[\?.1\]_\[\?\.1\]_\)_TS=\{\(1\.1\)_\(1\.1\)\}_\{\(3\.1\)_\(3\.1\)\}_\{\(5\.1\)_\(5\.1\)\}_decompose=0_activations=\(sigmoid\.tanh\)_clip=0_linear=0_netPrec=f32__inFmts=nc\.nc_outFmts=nc_primitive=ref_any_PluginConf_ENFORCE_BF16=YES)", // NOLINT R"(nightly_dynamic_bf16/RNNSequenceCPUTest.*activations=\(relu\).*)", R"(smoke_dynamic_BatchSizeOne/RNNSequenceCPUTest.*IS=\(\[1\.\?\.10\]_\[1\.1\.10\]_\[\?\]_\)_TS=\{\(1\.2\.10\)_\(1\.1\.10\)_\(1\)\}_\{\(1\.4\.10\)_\(1\.1\.10\)_\(1\)\}_\{\(1\.8\.10\)_\(1\.1\.10\)_\(1\)\}_seqMode=PURE_SEQ_activations=\(relu\)_clip=0_direction=forward_netPrec=f32__inFmts=ncw\.ntc_outFmts=ncw\.ncw_primitive=ref_any)", // NOLINT + // 98151. Not valid sorting for slices in reference. + R"(.*UniqueLayerTestCPU.*axis.*True.*)" }; #define FIX_62820 0 diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/ctc_Loss.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/ctc_loss.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/single_layer_tests/ctc_Loss.cpp rename to src/plugins/intel_cpu/tests/functional/single_layer_tests/ctc_loss.cpp diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp new file mode 100644 index 00000000000..0928f3ef3c7 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp @@ -0,0 +1,261 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/builders.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +typedef std::tuple< + std::vector, // Input shapes + std::tuple, // Is flattened and axis + bool, // Sorted + ElementType, // Data precision + CPUSpecificParams, // CPU specific params + std::map // Additional config +> UniqueLayerTestCPUParams; + +class UniqueLayerTestCPU : public testing::WithParamInterface, + virtual public SubgraphBaseTest, public CPUTestsBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::vector inputShapes; + std::tuple flatOrAxis; + bool sorted; + ElementType dataPrecision; + CPUSpecificParams cpuParams; + std::map additionalConfig; + + std::tie(inputShapes, flatOrAxis, sorted, dataPrecision, cpuParams, additionalConfig) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < inputShapes.size(); i++) { + result << CommonTestUtils::partialShape2str({inputShapes[i].first}) << (i < inputShapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < inputShapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < inputShapes.size(); j++) { + result << CommonTestUtils::vec2str(inputShapes[j].second[i]) << (j < inputShapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + + if (!std::get<0>(flatOrAxis)) { + result << "axis=" << std::get<1>(flatOrAxis) << "_"; + } else { + result << "flattened" << "_"; + } + result << "sorted=" << (sorted ? "True" : "False") << "_"; + result << "dataPrc=" << dataPrecision; + result << CPUTestsBase::getTestCaseName(cpuParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto &item : additionalConfig) { + if (item.second == InferenceEngine::PluginConfigParams::YES) + result << "_" << item.first << "=" << item.second; + } + } + + return result.str(); + } + +protected: + void SetUp() override { + std::vector inputShapes; + std::tuple flatOrAxis; + bool sorted, flattened; + int axis; + ElementType dataPrecision; + CPUSpecificParams cpuParams; + std::map additionalConfig; + + std::tie(inputShapes, flatOrAxis, sorted, dataPrecision, cpuParams, additionalConfig) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + targetDevice = CommonTestUtils::DEVICE_CPU; + init_input_shapes(inputShapes); + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + flattened = std::get<0>(flatOrAxis); + + if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { + selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16); + } else { + if (dataPrecision == ElementType::bf16) { + dataPrecision = ElementType::f32; + } + selectedType = makeSelectedTypeStr(selectedType, dataPrecision); + } + + auto params = ngraph::builder::makeDynamicParams(dataPrecision, inputDynamicShapes); + params[0]->set_friendly_name("data"); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + std::shared_ptr uniqueNode; + if (flattened) { + uniqueNode = std::make_shared(paramOuts[0], sorted); + } else { + axis = std::get<1>(flatOrAxis); + uniqueNode = std::make_shared(paramOuts[0], + ov::op::v0::Constant::create(ov::element::i64, ov::Shape({1}), {axis}), + sorted); + } + + function = makeNgraphFunction(dataPrecision, params, uniqueNode, "UniqueCPU"); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + + for (int i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::runtime::Tensor tensor; + + if (funcInput.get_node()->get_friendly_name() == "data") { + int32_t range = std::accumulate(targetInputStaticShapes[0].begin(), targetInputStaticShapes[0].end(), 1, std::multiplies()); + tensor = utils::create_and_fill_tensor( + funcInput.get_element_type(), targetInputStaticShapes[0], range, -range / 2, 1); + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } +}; + +TEST_P(UniqueLayerTestCPU, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); + CheckPluginRelatedResults(compiledModel, "Unique"); +} + +namespace { + +const std::vector dataPrecisionSmoke = { + ElementType::f32, + ElementType::i32 +}; +const std::vector dataPrecisionNightly = { + ElementType::bf16, + ElementType::i8 +}; + +std::vector> flatOrAxis { {true, 0}, {false, 0}, {false, 1}, {false, -1} }; + +std::vector sorted { true, false}; + +std::vector> additionalConfig + = {{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO}}, + {{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES}}}; + +std::vector getCPUInfo() { + std::vector resCPUParams; + resCPUParams.push_back(CPUSpecificParams{{}, {}, {"ref"}, "ref"}); + return resCPUParams; +} + +std::vector> getStaticShapes() { + std::vector> result = { + { { {}, { {1, 1, 1} } } }, // Static shapes + { { {}, { {1, 2, 1} } } }, // Static shapes + { { {}, { {1, 1, 3} } } }, // Static shapes + { { {}, { {2, 2, 1} } } }, // Static shapes + { { {}, { {1, 4, 1} } } }, // Static shapes + { { {}, { {1, 5, 1} } } }, // Static shapes + { { {}, { {3, 2, 1} } } }, // Static shapes + { { {}, { {1, 1, 7} } } }, // Static shapes + { { {}, { {2, 2, 2} } } }, // Static shapes + { { {}, { {1, 8, 1} } } }, // Static shapes + { { {}, { {3, 3, 1, 1} } } }, // Static shapes + { { {}, { {1, 5, 2, 1} } } }, // Static shapes + { { {}, { {1, 1, 11} } } }, // Static shapes + { { {}, { {32, 35, 37} } } }, // Static shapes + { { {}, { {2, 3, 2} } } }, // Static shapes + { { {}, { {1, 1, 13} } } }, // Static shapes + { { {}, { {7, 1, 2} } } }, // Static shapes + { { {}, { {3, 5, 1} } } }, // Static shapes + { { {}, { {4, 2, 2} } } }, // Static shapes + { { {}, { {1, 17, 1} } } }, // Static shapes + { { {}, { {3, 2, 3, 1} } } }, // Static shapes + { { {}, { {8, 16, 32} } } }, // Static shapes + { { {}, { {37, 19, 11} } } }, // Static shapes + { { {}, { {1, 19, 1} } } }, // Static shapes + { { {}, { {2, 5, 2} } } }, // Static shapes + { { {}, { {1, 3, 7} } } }, // Static shapes + { { {}, { {11, 1, 2} } } }, // Static shapes + { { {}, { {1, 1, 23} } } }, // Static shapes + { { {}, { {4, 3, 2} } } }, // Static shapes + { { {}, { {5, 1, 5} } } }, // Static shapes + { { {}, { {100, 1, 1} } } }, // Static shapes + { { {}, { {5, 5, 5} } } } // Static shapes + }; + + return result; +} + +INSTANTIATE_TEST_SUITE_P(smoke_static, UniqueLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(getStaticShapes()), + ::testing::ValuesIn(flatOrAxis), + ::testing::ValuesIn(sorted), + ::testing::ValuesIn(dataPrecisionSmoke), + ::testing::ValuesIn(getCPUInfo()), + ::testing::Values(additionalConfig[0])), + UniqueLayerTestCPU::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_static, UniqueLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(getStaticShapes()), + ::testing::ValuesIn(flatOrAxis), + ::testing::ValuesIn(sorted), + ::testing::ValuesIn(dataPrecisionNightly), + ::testing::ValuesIn(getCPUInfo()), + ::testing::Values(additionalConfig[0])), + UniqueLayerTestCPU::getTestCaseName); + +const std::vector> dynamicInSapes = { + { { { ov::Dimension(1, 15), -1, -1, -1 }, // Dynamic shape + { {1, 1, 1, 1}, {6, 3, 1, 2}, {4, 5, 3, 1}, {2, 7, 2, 2} } } }, // Target shapes + { { { -1, -1, -1, -1 }, // Dynamic shape + { {1, 2, 1, 5}, {3, 4, 2, 3}, {5, 6, 7, 1}, {7, 8, 2, 4} } } }, // Target shapes + { { { ov::Dimension(2, 15), -1, -1, -1 }, // Dynamic shape + { {8, 3, 3, 3}, {6, 5, 2, 5}, {4, 7, 1, 11}, {2, 9, 3, 4} } } }, // Target shapes + { { { 3, 4, 4, 5 }, // Dynamic shape + { {3, 4, 4, 5}, {3, 4, 4, 5}, {3, 4, 4, 5}, {3, 4, 4, 5} } } }, // Target shapes + { { { -1, -1, -1, -1 }, // Dynamic shape + { {1, 2, 1, 13}, {3, 4, 7, 2}, {5, 6, 3, 5}, {7, 8, 4, 4} } } }, // Target shapes + { { { -1, -1, -1, -1 }, // Dynamic shape + { {2, 11, 1, 17}, {4, 9, 6, 3}, {6, 7, 7, 3}, {8, 3, 2, 11} } } }, // Target shapes + { { { 3, -1, -1, -1 }, // Dynamic shape + { {3, 2, 1, 23}, {3, 4, 3, 8}, {3, 6, 5, 5}, {3, 8, 31, 1} } } }, // Target shapes + { { { -1, 3, -1, -1 }, // Dynamic shape + { {8, 3, 8, 4}, {6, 3, 33, 1}, {4, 3, 8, 6}, {2, 3, 8, 8} } } } // Target shapes +}; + +INSTANTIATE_TEST_SUITE_P(smoke_dynamic, UniqueLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(dynamicInSapes), + ::testing::ValuesIn(flatOrAxis), + ::testing::ValuesIn(sorted), + ::testing::ValuesIn(dataPrecisionSmoke), + ::testing::ValuesIn(getCPUInfo()), + ::testing::Values(additionalConfig[0])), + UniqueLayerTestCPU::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_dynamic, UniqueLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(dynamicInSapes), + ::testing::ValuesIn(flatOrAxis), + ::testing::ValuesIn(sorted), + ::testing::ValuesIn(dataPrecisionNightly), + ::testing::ValuesIn(getCPUInfo()), + ::testing::Values(additionalConfig[0])), + UniqueLayerTestCPU::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions