[CPU] Unique operation implementation. (#14250)

This commit is contained in:
Nikolay Shchegolev 2022-12-05 17:45:20 +04:00 committed by GitHub
parent 8c3425ff69
commit 3959890691
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 944 additions and 95 deletions

View File

@ -113,16 +113,24 @@ UniqueElements<Index_t, Count_t> find_unique_elements(const Data_t* data,
return *(data + lhs.idx) < *(data + rhs.idx); return *(data + lhs.idx) < *(data + rhs.idx);
}; };
int64_t axisVal = 0;
if (axis) {
axisVal = *axis;
if (axisVal < 0) {
axisVal += data_shape.size();
}
}
const auto slices_ascending_order = [&](const TensorSlice<Index_t, Count_t>& lhs, const auto slices_ascending_order = [&](const TensorSlice<Index_t, Count_t>& lhs,
const TensorSlice<Index_t, Count_t>& rhs) { const TensorSlice<Index_t, Count_t>& rhs) {
const auto shape_to_iterate = slice_shape_to_iterate(data_shape, *axis); const auto shape_to_iterate = slice_shape_to_iterate(data_shape, axisVal);
for (auto it = CoordinateIterator(shape_to_iterate); it != CoordinateIterator::end(); ++it) { for (auto it = CoordinateIterator(shape_to_iterate); it != CoordinateIterator::end(); ++it) {
auto elem_coord_lhs = *it; auto elem_coord_lhs = *it;
elem_coord_lhs.insert(elem_coord_lhs.cbegin() + *axis, lhs.idx); elem_coord_lhs.insert(elem_coord_lhs.cbegin() + axisVal, lhs.idx);
auto elem_coord_rhs = *it; auto elem_coord_rhs = *it;
elem_coord_rhs.insert(elem_coord_rhs.cbegin() + *axis, rhs.idx); elem_coord_rhs.insert(elem_coord_rhs.cbegin() + axisVal, rhs.idx);
const auto lhs_elem_idx = ngraph::coordinate_index(elem_coord_lhs, data_shape); const auto lhs_elem_idx = ngraph::coordinate_index(elem_coord_lhs, data_shape);
const auto rhs_elem_idx = ngraph::coordinate_index(elem_coord_rhs, data_shape); const auto rhs_elem_idx = ngraph::coordinate_index(elem_coord_rhs, data_shape);
@ -149,15 +157,15 @@ UniqueElements<Index_t, Count_t> find_unique_elements(const Data_t* data,
// the individual elements in the two compared slices are always separated by the same offset // the individual elements in the two compared slices are always separated by the same offset
// and this can be used to compare them elementwise // and this can be used to compare them elementwise
const auto slices_offset = calc_slices_offset(lhs, rhs, data_shape_strides, *axis); const auto slices_offset = calc_slices_offset(lhs, rhs, data_shape_strides, axisVal);
const auto shape_to_iterate = slice_shape_to_iterate(data_shape, *axis); const auto shape_to_iterate = slice_shape_to_iterate(data_shape, axisVal);
for (auto it = CoordinateIterator(shape_to_iterate); it != CoordinateIterator::end(); ++it) { for (auto it = CoordinateIterator(shape_to_iterate); it != CoordinateIterator::end(); ++it) {
// All slice elements have a "slice index" constant value at the axis position, only the other dimensions // All slice elements have a "slice index" constant value at the axis position, only the other dimensions
// vary for each slice element. Those dimensions are provided by CoordinateIterator, the value at axis // vary for each slice element. Those dimensions are provided by CoordinateIterator, the value at axis
// needs to be injected manually. // needs to be injected manually.
auto elem_coord = *it; auto elem_coord = *it;
elem_coord.insert(elem_coord.cbegin() + *axis, slice_with_lower_idx.idx); elem_coord.insert(elem_coord.cbegin() + axisVal, slice_with_lower_idx.idx);
const auto lhs_elem_idx = ngraph::coordinate_index(elem_coord, data_shape); const auto lhs_elem_idx = ngraph::coordinate_index(elem_coord, data_shape);
const auto rhs_elem_idx = lhs_elem_idx + slices_offset; const auto rhs_elem_idx = lhs_elem_idx + slices_offset;
if (*(data + lhs_elem_idx) != *(data + rhs_elem_idx)) { if (*(data + lhs_elem_idx) != *(data + rhs_elem_idx)) {
@ -219,8 +227,8 @@ UniqueElements<Index_t, Count_t> find_unique_elements(const Data_t* data,
} }
} }
} else { } else {
ret.axis = *axis; ret.axis = axisVal;
ret.all_tensor_elements = generate_descriptors<Index_t, Count_t>(data_shape[*axis], DescriptorType::SLICE); ret.all_tensor_elements = generate_descriptors<Index_t, Count_t>(data_shape[axisVal], DescriptorType::SLICE);
if (sorted) { if (sorted) {
std::stable_sort(begin(ret.all_tensor_elements), end(ret.all_tensor_elements), slices_ascending_order); std::stable_sort(begin(ret.all_tensor_elements), end(ret.all_tensor_elements), slices_ascending_order);
@ -228,7 +236,7 @@ UniqueElements<Index_t, Count_t> find_unique_elements(const Data_t* data,
ret.all_tensor_elements[0].rev_idx = 0; ret.all_tensor_elements[0].rev_idx = 0;
ret.unique_tensor_elements.push_back(ret.all_tensor_elements[0]); ret.unique_tensor_elements.push_back(ret.all_tensor_elements[0]);
for (size_t i = 1; i < data_shape[*axis]; ++i) { for (size_t i = 1; i < data_shape[axisVal]; ++i) {
auto& tensor_element = ret.all_tensor_elements[i]; auto& tensor_element = ret.all_tensor_elements[i];
auto existing_unique = end(ret.unique_tensor_elements); auto existing_unique = end(ret.unique_tensor_elements);
@ -264,10 +272,17 @@ std::tuple<Shape, Shape, Shape> make_tensor_shapes(const UniqueElements<Index_t,
// if the axis was specified we need to return a data shape with a modified dimension-at-axis // if the axis was specified we need to return a data shape with a modified dimension-at-axis
// this is where we need to insert the number of detected unique elements // this is where we need to insert the number of detected unique elements
// all other dimensions stay the same as in the original data_shape // all other dimensions stay the same as in the original data_shape
int64_t axisVal = 0;
if (axis) {
axisVal = *axis;
if (axisVal < 0) {
axisVal += data_shape.size();
}
}
auto output0 = data_shape; auto output0 = data_shape;
output0[*axis] = unique_elements.unique_tensor_elements.size(); output0[axisVal] = unique_elements.unique_tensor_elements.size();
const auto output1_3 = Shape{unique_elements.unique_tensor_elements.size()}; const auto output1_3 = Shape{unique_elements.unique_tensor_elements.size()};
const auto output2 = Shape{data_shape[*axis]}; const auto output2 = Shape{data_shape[axisVal]};
return std::make_tuple(output0, output1_3, output2); return std::make_tuple(output0, output1_3, output2);
} else { } else {
const auto output0 = Shape{unique_elements.unique_tensor_elements.size()}; const auto output0 = Shape{unique_elements.unique_tensor_elements.size()};

View File

@ -205,6 +205,7 @@ const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_t
{ "PriorBoxClustered", Type::PriorBoxClustered}, { "PriorBoxClustered", Type::PriorBoxClustered},
{"Interaction", Type::Interaction}, {"Interaction", Type::Interaction},
{ "MHA", Type::MHA}, { "MHA", Type::MHA},
{ "Unique", Type::Unique}
}; };
Type TypeFromName(const std::string& type) { Type TypeFromName(const std::string& type) {
@ -402,6 +403,8 @@ std::string NameFromType(const Type type) {
return "Subgraph"; return "Subgraph";
case Type::MHA: case Type::MHA:
return "MHA"; return "MHA";
case Type::Unique:
return "Unique";
default: default:
return "Unknown"; return "Unknown";
} }

View File

@ -110,7 +110,8 @@ enum class Type {
PriorBox, PriorBox,
PriorBoxClustered, PriorBoxClustered,
Interaction, Interaction,
MHA MHA,
Unique
}; };
enum class Algorithm { enum class Algorithm {

View File

@ -0,0 +1,495 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <vector>
#include "unique.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <utils/shape_inference/shape_inference_internal_dyn.hpp>
using namespace InferenceEngine;
using namespace ov::intel_cpu;
using namespace ov::intel_cpu::node;
#define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' "
bool Unique::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
try {
if (!ov::is_type<op::v10::Unique>(op)) {
errorMessage = "Not supported Unique operation version. CPU plug-in supports only 10th version.";
return false;
}
if (op->get_input_size() > AXIS && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))) {
errorMessage = "CPU plug-in supports only constant Axis input.";
return false;
}
} catch (...) {
return false;
}
return true;
}
Unique::Unique(const std::shared_ptr<ov::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) :
Node(op, eng, cache, InternalDynShapeInferFactory()) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
}
if (!one_of(op->get_input_size(), 1, 2) || op->get_output_size() != 4)
THROW_ERROR << "has incorrect number of input/output edges.";
for (int i = 0; i < 4; i++) {
definedOutputs[i] = !op->get_output_target_inputs(i).empty();
}
sorted = ov::as_type_ptr<ov::op::v10::Unique>(op)->get_sorted();
if (op->get_input_size() > AXIS) {
flattened = false;
axis = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
if (axis < 0) {
axis += op->get_input_partial_shape(IN_DATA).rank().get_length();
}
if (axis < 0 || axis >= op->get_input_partial_shape(IN_DATA).rank().get_length()) {
THROW_ERROR << "has invalid axis value: " << ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
}
} else {
flattened = true;
}
}
void Unique::initSupportedPrimitiveDescriptors() {
dataPrecision = getOriginalInputPrecisionAtPort(IN_DATA);
if (dataPrecision != Precision::I32 && dataPrecision != Precision::I8 && dataPrecision != Precision::U8) {
dataPrecision = Precision::FP32;
}
dataTypeSize = dataPrecision.size();
const InferenceEngine::Precision axisPrecision = Precision::I32;
impl_desc_type implType = ref;
std::vector<PortConfigurator> inPortConfigs = { {LayoutType::ncsp, dataPrecision} };
if (!flattened) {
inPortConfigs.push_back({LayoutType::ncsp, axisPrecision});
}
std::vector<PortConfigurator> outPortConfigs;
for (int i = 0; i < 4; i++) {
outPortConfigs.push_back({LayoutType::ncsp, i == 0 ? dataPrecision : axisPrecision});
}
addSupportedPrimDesc(inPortConfigs, outPortConfigs, implType, isDynamicNode());
}
void Unique::createPrimitive() {
Node::createPrimitive();
}
void Unique::prepareParams() {
auto& dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr();
if (!dataMemPtr || !dataMemPtr->isAllocated()) {
THROW_ERROR << " has not allocated input data memory.";
}
for (int i = 0; i < 4; i++) {
if (definedOutputs[i]) {
auto& dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->isAllocated()) {
THROW_ERROR << " has not allocated output memory at port " << i;
}
}
}
if (getSelectedPrimitiveDescriptor() == nullptr) {
THROW_ERROR << " has unidentified preferable primitive descriptor.";
}
size_t srcLen = 1;
if (flattened) {
srcLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / dataTypeSize;
} else {
auto dstDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims();
srcLen = dstDataShape[axis];
}
firstUniTmp.resize(srcLen, 0);
inToOutTmp.resize(srcLen);
occurTmp.resize(srcLen);
}
template<typename T>
struct Unique::flattenExec {
void operator()(Unique *node) {
node->flattenTensorExec<T>();
}
};
template<typename T>
struct Unique::slicedExec {
void operator()(Unique *node) {
node->slicedTensorExec<T>();
}
};
void Unique::execute(dnnl::stream strm) {
if (flattened) {
OV_SWITCH(intel_cpu, flattenExec, this, dataPrecision,
OV_CASE(Precision::FP32, float),
OV_CASE(Precision::I32, int32_t),
OV_CASE(Precision::I8, int8_t),
OV_CASE(Precision::U8, uint8_t))
} else {
OV_SWITCH(intel_cpu, slicedExec, this, dataPrecision,
OV_CASE(Precision::FP32, float),
OV_CASE(Precision::I32, int32_t),
OV_CASE(Precision::I8, int8_t),
OV_CASE(Precision::U8, uint8_t))
}
}
void Unique::executeDynamicImpl(dnnl::stream strm) {
const auto& srcDataDims = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims();
VectorDims dstDataDims;
Dim uniqLen = 1;
if (flattened) {
uniqLen = std::accumulate(srcDataDims.begin(), srcDataDims.end(), 1, std::multiplies<Dim>());
dstDataDims = { uniqLen };
} else {
uniqLen = srcDataDims[axis];
dstDataDims = srcDataDims;
}
redefineOutputMemory({ dstDataDims, {uniqLen}, {uniqLen}, {uniqLen}});
execute(strm);
}
template <typename T>
void Unique::flattenTensorExec() {
const T* srcDataPtr = reinterpret_cast<const T*>(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr());
const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T);
std::vector<T> uniDataTmp(inputLen);
auto uniDataTmpPtr = uniDataTmp.data();
int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
if (definedOutputs[FIRST_UNIQUE_IDX]) {
firstTmpPtr = firstUniTmp.data();
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
inToOutTmpPtr = inToOutTmp.data();
}
if (definedOutputs[OCCURRENCES_NUM]) {
occurTmpPtr = occurTmp.data();
}
uniqueLen = inputLen;
if (sorted) {
std::memcpy(uniDataTmpPtr, srcDataPtr, inputLen * sizeof(T));
std::sort(uniDataTmpPtr, uniDataTmpPtr + inputLen);
auto last = std::unique(uniDataTmpPtr, uniDataTmpPtr + inputLen);
uniqueLen = last - uniDataTmpPtr;
if (definedOutputs[FIRST_UNIQUE_IDX]) {
T* first = uniDataTmpPtr;
for (T* it = first; it < last; it++) {
for (int i = 0; i < inputLen; i++) {
if (srcDataPtr[i] == *it) {
*firstTmpPtr++ = i;
first++;
break;
}
}
}
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
for (int i = 0; i < inputLen; i++) {
if (i > 0 && srcDataPtr[i] == srcDataPtr[i - 1]) {
inToOutTmpPtr[i] = inToOutTmpPtr[i - 1];
continue;
}
for (int j = 0; j < uniqueLen; j++) {
if (srcDataPtr[i] == uniDataTmpPtr[j]) {
inToOutTmpPtr[i] = j;
break;
}
}
}
}
if (definedOutputs[OCCURRENCES_NUM]) {
std::fill(occurTmpPtr, occurTmpPtr + uniqueLen, 0);
for (int j = 0; j < uniqueLen; j++) {
for (int i = 0; i < inputLen; i++) {
if (srcDataPtr[i] == uniDataTmpPtr[j]) {
occurTmpPtr[j]++;
}
}
}
}
} else {
uniDataTmpPtr[0] = srcDataPtr[0];
if (definedOutputs[FIRST_UNIQUE_IDX]) {
firstTmpPtr[0] = 0;
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
inToOutTmpPtr[0] = 0;
}
if (definedOutputs[OCCURRENCES_NUM]) {
std::fill(occurTmpPtr, occurTmpPtr + inputLen, 1);
}
uniqueLen = 1;
for (int i = 1; i < inputLen; i++) {
bool found = false;
int j = 0;
for (; j < uniqueLen; j++) {
if (uniDataTmpPtr[j] == srcDataPtr[i]) {
found = true;
break;
}
}
if (!found) {
uniDataTmpPtr[uniqueLen] = srcDataPtr[i];
if (definedOutputs[FIRST_UNIQUE_IDX]) {
firstTmpPtr[uniqueLen] = i;
}
uniqueLen++;
} else {
if (definedOutputs[OCCURRENCES_NUM]) {
occurTmpPtr[j]++;
}
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
inToOutTmpPtr[i] = j;
}
}
}
redefineOutputMemory({ {uniqueLen}, {uniqueLen}, {inputLen}, {uniqueLen}});
T* uniDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr());
memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T));
if (definedOutputs[FIRST_UNIQUE_IDX]) {
int *firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr());
memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
auto inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr());
memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int));
}
if (definedOutputs[OCCURRENCES_NUM]) {
auto occurPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr());
memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
}
}
template <typename T>
void Unique::slicedTensorExec() {
const T* srcDataPtr = reinterpret_cast<const T*>(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr());
const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T);
std::vector<T> uniDataTmp(inputLen);
auto uniDataTmpPtr = uniDataTmp.data();
int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
if (definedOutputs[FIRST_UNIQUE_IDX]) {
firstTmpPtr = firstUniTmp.data();
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
inToOutTmpPtr = inToOutTmp.data();
}
if (definedOutputs[OCCURRENCES_NUM]) {
occurTmpPtr = occurTmp.data();
}
const auto& srcDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims();
const auto cmpBlNum = srcDataShape[axis]; // Blocks to compare.
int64_t partsInBl = 1; // Parts in block
if (axis > 0) {
partsInBl = std::accumulate(srcDataShape.begin(), srcDataShape.begin() + axis, 1, std::multiplies<Dim>());
}
int64_t elPerPart = 1; // Elements number in part.
if (axis < srcDataShape.size() - 1) {
elPerPart = std::accumulate(srcDataShape.begin() + axis + 1, srcDataShape.end(), 1, std::multiplies<Dim>());
}
const auto partLenB = elPerPart * dataPrecision.size();
const auto partStep = elPerPart * cmpBlNum;
if (definedOutputs[FIRST_UNIQUE_IDX]) {
firstTmpPtr[0] = 0;
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
inToOutTmpPtr[0] = 0;
}
if (definedOutputs[OCCURRENCES_NUM]) {
occurTmpPtr[0] = 1;
std::fill(occurTmpPtr, occurTmpPtr + cmpBlNum, 1);
}
uniqueLen = 1;
std::vector<int64_t> uniqIdx(cmpBlNum, 0);
for (int b1 = 1; b1 < cmpBlNum; b1++) {
auto first1 = srcDataPtr + b1 * elPerPart;
auto last1 = srcDataPtr + (b1 + 1) * elPerPart;
bool equal = true;
int b2 = 0;
// Compare with unique blocks.
for (; b2 < uniqueLen; b2++) {
auto first2 = srcDataPtr + uniqIdx[b2] * elPerPart;
equal = true;
for (int p = 0; p < partsInBl; p++) {
equal = std::equal(first1, last1, first2);
if (!equal) {
break;
}
first1 += partStep;
last1 += partStep;
first2 += partStep;
}
if (equal) {
break;
}
}
if (!equal) {
if (definedOutputs[FIRST_UNIQUE_IDX]) {
firstTmpPtr[uniqueLen] = b1;
}
uniqIdx[uniqueLen++] = b1;
} else {
if (definedOutputs[OCCURRENCES_NUM]) {
occurTmpPtr[b2]++;
}
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
inToOutTmpPtr[b1] = b2;
}
}
const auto dstPrtStep = elPerPart * uniqueLen;
for (int b1 = 0; b1 < uniqueLen; b1++) {
auto first1 = srcDataPtr + uniqIdx[b1] * elPerPart;
auto first2 = uniDataTmpPtr + b1 * elPerPart;
for (int p = 0; p < partsInBl; p++) {
memcpy(first2, first1, partLenB);
first1 += partStep;
first2 += dstPrtStep;
}
}
if (sorted) {
const auto elInBl = elPerPart * partsInBl;
struct OrdEl {
T val;
int64_t idx;
};
std::vector<OrdEl> colToSort(uniqueLen);
std::vector<int64_t> moveTo(uniqueLen);
for (int k = 0; k < uniqueLen; k++) {
moveTo[k] = k;
}
std::vector<T> buff1(elPerPart);
std::vector<T> buff2(elPerPart);
for (int64_t p = partsInBl - 1; p >= 0; p--) {
for (int64_t e = elPerPart - 1; e >= 0 ; e--) {
int64_t pos1 = p * dstPrtStep + e;
for (int64_t i = 0; i < uniqueLen; i++) {
int64_t pos2 = i * elInBl + pos1;
colToSort[i] = {uniDataTmpPtr[pos2], i};
}
std::stable_sort(colToSort.begin(), colToSort.end(), [](const OrdEl &el1, const OrdEl &el2) { return el1.val < el2.val; });
for (int k = 0; k < uniqueLen; k++) {
moveTo[colToSort[k].idx] = k;
}
// perm
for (int64_t pb = 0; pb < partsInBl; pb++) {
auto currDst = uniDataTmpPtr + pb * dstPrtStep;
memcpy(buff1.data(), currDst, partLenB);
auto dstIdx = moveTo[0];
for (int64_t b = 0; b < uniqueLen; b++) {
if (dstIdx == moveTo[dstIdx]) {
dstIdx = moveTo[++dstIdx];
continue;
}
T* dst = currDst + dstIdx * elPerPart;
auto& bSrc = b % 2 == 0 ? buff1 : buff2;
auto& bDst = b % 2 == 0 ? buff2 : buff1;
memcpy(bDst.data(), dst, partLenB);
memcpy(dst, bSrc.data(), partLenB);
dstIdx = moveTo[dstIdx];
}
}
auto mPos = moveTo[0];
int32_t firstSrc = 0, firstDst = 0, ocSrc = 0, ocDst = 0;
if (definedOutputs[FIRST_UNIQUE_IDX]) {
firstSrc = firstTmpPtr[0];
}
if (definedOutputs[OCCURRENCES_NUM]) {
ocSrc = occurTmpPtr[0];
}
for (int k = 0; k < uniqueLen; k++) {
if (mPos == moveTo[mPos]) {
mPos = moveTo[++mPos];
continue;
}
if (definedOutputs[FIRST_UNIQUE_IDX]) {
auto& fSrc = k % 2 == 0 ? firstSrc : firstDst;
auto& fDst = k % 2 == 0 ? firstDst : firstSrc;
fDst = firstTmpPtr[mPos];
firstTmpPtr[mPos] = fSrc;
}
if (definedOutputs[OCCURRENCES_NUM]) {
auto& oSrc = k % 2 == 0 ? ocSrc : ocDst;
auto& oDst = k % 2 == 0 ? ocDst : ocSrc;
oDst = occurTmpPtr[mPos];
occurTmpPtr[mPos] = oSrc;
}
mPos = moveTo[mPos];
}
}
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
for (int b1 = 0; b1 < cmpBlNum; b1++) {
auto first1 = srcDataPtr + b1 * elPerPart;
auto last1 = srcDataPtr + (b1 + 1) * elPerPart;
bool equal = true;
for (int b2 = 0; b2 < uniqueLen; b2++) {
auto first2 = uniDataTmpPtr + b2 * elPerPart;
equal = true;
for (int p = 0; p < partsInBl; p++) {
equal = std::equal(first1, last1, first2);
if (!equal) {
break;
}
first2 += dstPrtStep;
}
if (equal) {
inToOutTmpPtr[b1] = b2;
}
}
}
}
}
auto dstDataShape = srcDataShape;
dstDataShape[axis] = uniqueLen;
redefineOutputMemory({ dstDataShape, {uniqueLen}, {cmpBlNum}, {uniqueLen}});
T* uniDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr());
memcpy(uniDataPtr, uniDataTmpPtr, getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetSize());
if (definedOutputs[FIRST_UNIQUE_IDX]) {
int *firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr());
memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
auto inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr());
memcpy(inToOutPtr, inToOutTmp.data(), cmpBlNum * sizeof(int));
}
if (definedOutputs[OCCURRENCES_NUM]) {
auto occurPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr());
memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
}
}

View File

@ -0,0 +1,68 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <node.h>
#include <memory>
#include <string>
#include <vector>
namespace ov {
namespace intel_cpu {
namespace node {
class Unique : public Node {
public:
Unique(const std::shared_ptr<ov::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache);
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(dnnl::stream strm) override;
bool created() const override { return getType() == Type::Unique; }
protected:
void executeDynamicImpl(dnnl::stream strm) override;
void prepareParams() override;
bool needShapeInfer() const override { return false; }
private:
template <typename T>
void flattenTensorExec();
template <typename T>
void slicedTensorExec();
template<typename T>
struct flattenExec;
template<typename T>
struct slicedExec;
std::vector<int32_t> firstUniTmp;
std::vector<int32_t> inToOutTmp;
std::vector<int32_t> occurTmp;
bool sorted = false;
bool flattened = true;
int axis = 0;
bool definedOutputs[4] = { false, false, false, false };
InferenceEngine::Precision dataPrecision;
int64_t dataTypeSize = 1;
size_t uniqueLen = 1;
int threadsNum = 1;
static constexpr size_t IN_DATA = 0;
static constexpr size_t AXIS = 1;
static constexpr size_t UNIQUE_DATA = 0;
static constexpr size_t FIRST_UNIQUE_IDX = 1;
static constexpr size_t INPUT_TO_UNIQ_IDX = 2;
static constexpr size_t OCCURRENCES_NUM = 3;
};
} // namespace node
} // namespace intel_cpu
} // namespace ov

View File

@ -91,6 +91,7 @@
#include "nodes/eye.h" #include "nodes/eye.h"
#include "nodes/interaction.h" #include "nodes/interaction.h"
#include "nodes/mha.h" #include "nodes/mha.h"
#include "nodes/unique.hpp"
namespace ov { namespace ov {
namespace intel_cpu { namespace intel_cpu {
@ -194,6 +195,7 @@ Node::NodesFactory::NodesFactory()
INTEL_CPU_NODE(Eye, Type::Eye); INTEL_CPU_NODE(Eye, Type::Eye);
INTEL_CPU_NODE(Interaction, Type::Interaction); INTEL_CPU_NODE(Interaction, Type::Interaction);
INTEL_CPU_NODE(MHA, Type::MHA); INTEL_CPU_NODE(MHA, Type::MHA);
INTEL_CPU_NODE(Unique, Type::Unique);
} }
#undef INTEL_CPU_NODE #undef INTEL_CPU_NODE

View File

@ -87,7 +87,7 @@
#include <transformations/op_conversions/convert_roi_align_v3_to_v9.hpp> #include <transformations/op_conversions/convert_roi_align_v3_to_v9.hpp>
#include <transformations/op_conversions/softsign_decomposition.hpp> #include <transformations/op_conversions/softsign_decomposition.hpp>
#include "transformations/op_conversions/eye_decomposition.hpp" #include "transformations/op_conversions/eye_decomposition.hpp"
#include "transformations/smart_reshape/smart_reshape.hpp" #include "transformations/op_conversions/unique_decomposition.hpp"
#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp" #include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
#include "ngraph_transformations/snippets_mark_skipped.hpp" #include "ngraph_transformations/snippets_mark_skipped.hpp"
@ -287,9 +287,9 @@ static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::
static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function> nGraphFunc, const bool _enableLPT, const bool _enableBF16, static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function> nGraphFunc, const bool _enableLPT, const bool _enableBF16,
const bool _enableSnippets, const bool isLegacyApi) { const bool _enableSnippets, const bool isLegacyApi) {
ngraph::pass::Manager manager; ov::pass::Manager manager;
manager.set_per_pass_validation(false); manager.set_per_pass_validation(false);
manager.register_pass<ngraph::pass::InitNodeInfo>(); manager.register_pass<ov::pass::InitNodeInfo>();
const bool useLpt = const bool useLpt =
_enableLPT && _enableLPT &&
@ -331,32 +331,32 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}}; type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
manager.register_pass<ov::pass::AUGRUCellFusion>(); manager.register_pass<ov::pass::AUGRUCellFusion>();
manager.register_pass<ngraph::pass::CommonOptimizations>(); manager.register_pass<ov::pass::CommonOptimizations>();
manager.register_pass<ngraph::pass::WrapInterpolateIntoTransposes>(); manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
manager.register_pass<ngraph::pass::TransposeSinking>(); manager.register_pass<ov::pass::TransposeSinking>();
manager.register_pass<ngraph::pass::ConvertSequenceToTensorIterator>(); manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>(); manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>(); manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
manager.register_pass<ngraph::pass::LSTMCellDecomposition>(); manager.register_pass<ov::pass::LSTMCellDecomposition>();
manager.register_pass<ngraph::pass::GRUCellDecomposition>(); manager.register_pass<ov::pass::GRUCellDecomposition>();
manager.register_pass<ngraph::pass::RNNCellDecomposition>(); manager.register_pass<ov::pass::RNNCellDecomposition>();
manager.register_pass<ngraph::pass::ConvertNMS1ToNMS9>(); manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
manager.register_pass<ngraph::pass::ConvertNMS3ToNMS9>(); manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
manager.register_pass<ngraph::pass::ConvertNMS4ToNMS9>(); manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
manager.register_pass<ngraph::pass::ConvertNMS5ToNMS9>(); manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>(); manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(); manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
manager.register_pass<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(); manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
manager.register_pass<ngraph::pass::TransposeMatMul>(); manager.register_pass<ov::pass::TransposeMatMul>();
manager.register_pass<ngraph::pass::ConstantFolding>(); manager.register_pass<ov::pass::ConstantFolding>();
if (useLpt) { if (useLpt) {
CPU_LPT_SCOPE(LowPrecisionTransformations_Part2); CPU_LPT_SCOPE(LowPrecisionTransformations_Part2);
manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions); manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
} }
manager.register_pass<ngraph::pass::Validate>(); manager.register_pass<ov::pass::Validate>();
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions, type_to_fuse); manager.register_pass<ov::pass::ConvertPrecision>(precisions, type_to_fuse);
manager.register_pass<ngraph::pass::EliminateConvert>(); manager.register_pass<ov::pass::EliminateConvert>();
manager.register_pass<SwapConvertTranspose>(); manager.register_pass<SwapConvertTranspose>();
manager.register_pass<ConvertToInteraction>(); manager.register_pass<ConvertToInteraction>();
manager.register_pass<ConvertInteractionInt8>(); manager.register_pass<ConvertInteractionInt8>();
@ -366,15 +366,15 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
using const_node_ptr = const std::shared_ptr<const ngraph::Node>; using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
// SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5 // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth, pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
ngraph::pass::ConvertDepthToSpace>( ov::pass::ConvertDepthToSpace>(
[](const_node_ptr &node) -> bool { [](const_node_ptr &node) -> bool {
return node->input_value(0).get_shape().size() <= 5lu && return node->input_value(0).get_shape().size() <= 5lu &&
node->input_value(0).get_shape().size() == node->get_output_shape(0).size(); node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
}); });
pass_config->set_callback<ngraph::pass::ConvertBatchToSpace, pass_config->set_callback<ov::pass::ConvertBatchToSpace,
ngraph::pass::ConvertSpaceToBatch>( ov::pass::ConvertSpaceToBatch>(
[](const_node_ptr &node) -> bool { [](const_node_ptr &node) -> bool {
const auto & rank = node->input(0).get_partial_shape().rank().get_length(); const auto & rank = node->input(0).get_partial_shape().rank().get_length();
return rank == 4lu || rank == 5lu; return rank == 4lu || rank == 5lu;
@ -443,33 +443,33 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
return false; return false;
}; };
pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator, pass_config->set_callback<ov::pass::ConvertRNNSequenceToTensorIterator,
ngraph::pass::ConvertGRUSequenceToTensorIterator, ov::pass::ConvertGRUSequenceToTensorIterator,
ngraph::pass::ConvertLSTMSequenceToTensorIterator>( ov::pass::ConvertLSTMSequenceToTensorIterator>(
[isSequencePrimitiveSupported](const_node_ptr &node) -> bool { [isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
return isSequencePrimitiveSupported(node); return isSequencePrimitiveSupported(node);
}); });
pass_config->set_callback<ngraph::pass::RNNCellDecomposition, ngraph::pass::GRUCellDecomposition, pass_config->set_callback<ov::pass::RNNCellDecomposition, ov::pass::GRUCellDecomposition,
ngraph::pass::LSTMCellDecomposition>( ov::pass::LSTMCellDecomposition>(
[isCellPrimitiveSupported](const_node_ptr &node) -> bool { [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
return isCellPrimitiveSupported(node); return isCellPrimitiveSupported(node);
}); });
pass_config->set_callback<ngraph::pass::MVN6Decomposition>( pass_config->set_callback<ov::pass::MVN6Decomposition>(
[](const_node_ptr &node) -> bool { [](const_node_ptr &node) -> bool {
std::string errorMessage; std::string errorMessage;
return node::MVN::isSupportedOperation(node, errorMessage); return node::MVN::isSupportedOperation(node, errorMessage);
}); });
pass_config->set_callback<ngraph::pass::NormalizeL2Decomposition>( pass_config->set_callback<ov::pass::NormalizeL2Decomposition>(
[](const_node_ptr &node) -> bool { [](const_node_ptr &node) -> bool {
std::string errorMsg; std::string errorMsg;
return node::NormalizeL2::isSupportedOperation(node, errorMsg); return node::NormalizeL2::isSupportedOperation(node, errorMsg);
}); });
pass_config->enable<ngraph::pass::SoftmaxDecomposition>(); pass_config->enable<ov::pass::SoftmaxDecomposition>();
pass_config->set_callback<ngraph::pass::SoftmaxDecomposition>( pass_config->set_callback<ov::pass::SoftmaxDecomposition>(
[](const_node_ptr &node) -> bool { [](const_node_ptr &node) -> bool {
return node->input_value(0).get_partial_shape().rank().get_length() <= 5; return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
}); });
@ -487,9 +487,9 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
return true; return true;
}; };
pass_config->set_callback<ngraph::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback); pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
pass_config->set_callback<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback); pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
pass_config->set_callback<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback); pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
} }
// List of enabled/disabled transformations // List of enabled/disabled transformations
@ -499,46 +499,47 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>(); pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
pass_config->disable<ov::pass::EyeDecomposition>(); pass_config->disable<ov::pass::EyeDecomposition>();
pass_config->disable<ngraph::pass::ConvertGELU>(); pass_config->disable<ov::pass::ConvertGELU>();
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>(); pass_config->disable<ov::pass::ConvertShuffleChannels3>();
pass_config->disable<ngraph::pass::Gelu7Downgrade>(); pass_config->disable<ov::pass::Gelu7Downgrade>();
pass_config->disable<ngraph::pass::HSwishDecomposition>(); pass_config->disable<ov::pass::HSwishDecomposition>();
pass_config->disable<ngraph::pass::ReduceL1Decomposition>(); pass_config->disable<ov::pass::ReduceL1Decomposition>();
pass_config->disable<ngraph::pass::ReduceL2Decomposition>(); pass_config->disable<ov::pass::ReduceL2Decomposition>();
pass_config->disable<ngraph::pass::SoftPlusDecomposition>(); pass_config->disable<ov::pass::SoftPlusDecomposition>();
pass_config->disable<ngraph::pass::HSigmoidDecomposition>(); pass_config->disable<ov::pass::HSigmoidDecomposition>();
pass_config->disable<ngraph::pass::ConvertMod>(); pass_config->disable<ov::pass::ConvertMod>();
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>(); pass_config->disable<ov::pass::ConvertShuffleChannels3>();
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>(); pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>(); pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
pass_config->disable<ngraph::pass::ConvertGather7ToGather1>(); pass_config->disable<ov::pass::ConvertGather7ToGather1>();
pass_config->disable<ngraph::pass::ConvertGather8ToGather7>(); pass_config->disable<ov::pass::ConvertGather8ToGather7>();
pass_config->disable<ngraph::pass::ConvertMinimum>(); pass_config->disable<ov::pass::ConvertMinimum>();
pass_config->disable<ngraph::pass::ConvertBroadcastToTiles>(); pass_config->disable<ov::pass::ConvertBroadcastToTiles>();
pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>(); pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
pass_config->disable<ngraph::pass::ConvertReduceMaxToPooling>(); pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
pass_config->disable<ngraph::pass::ConvertReduceSumToPooling>(); pass_config->disable<ov::pass::ConvertReduceSumToPooling>();
pass_config->disable<ngraph::pass::SliceToStridedSlice>(); pass_config->disable<ov::pass::SliceToStridedSlice>();
pass_config->disable<ngraph::pass::ConvertDetectionOutput8ToDetectionOutput1>(); pass_config->disable<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
pass_config->disable<ngraph::pass::ConvertROIAlign9To3>(); pass_config->disable<ov::pass::ConvertROIAlign9To3>();
pass_config->disable<ngraph::pass::SoftSignDecomposition>(); pass_config->disable<ov::pass::SoftSignDecomposition>();
pass_config->disable<ov::pass::UniqueDecomposition>();
pass_config->enable<ngraph::pass::NormalizeL2Decomposition>(); pass_config->enable<ov::pass::NormalizeL2Decomposition>();
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>(); pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
pass_config->enable<ngraph::pass::ConvertGather1ToGather7>(); pass_config->enable<ov::pass::ConvertGather1ToGather7>();
pass_config->enable<ngraph::pass::ConvertDetectionOutput1ToDetectionOutput8>(); pass_config->enable<ov::pass::ConvertDetectionOutput1ToDetectionOutput8>();
pass_config->enable<ngraph::pass::ConvertROIAlign3To9>(); pass_config->enable<ov::pass::ConvertROIAlign3To9>();
if (useLpt) { if (useLpt) {
CPU_LPT_SCOPE(LowPrecisionTransformations_Part3); CPU_LPT_SCOPE(LowPrecisionTransformations_Part3);
pass_config->set_callback<ngraph::pass::AddFakeQuantizeFusion, pass_config->set_callback<ov::pass::AddFakeQuantizeFusion,
ngraph::pass::MulFakeQuantizeFusion, ov::pass::MulFakeQuantizeFusion,
ngraph::pass::FakeQuantizeMulFusion>([](const_node_ptr &node) -> bool { ov::pass::FakeQuantizeMulFusion>([](const_node_ptr &node) -> bool {
std::string errMsg; std::string errMsg;
return !node::FakeQuantize::isSupportedOperation(node, errMsg); return !node::FakeQuantize::isSupportedOperation(node, errMsg);
}); });
pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool { pass_config->set_callback<ov::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions); return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions);
}); });
} }
@ -597,7 +598,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
supportedPrecisions = std::vector<PrecisionsRestriction>({}); supportedPrecisions = std::vector<PrecisionsRestriction>({});
} }
ngraph::pass::Manager lptManager; ov::pass::Manager lptManager;
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>( lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
supportedPrecisions, supportedPrecisions,
quantizationRestrictions, quantizationRestrictions,
@ -619,10 +620,10 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
lptManager.run_passes(nGraphFunc); lptManager.run_passes(nGraphFunc);
} }
ngraph::pass::Manager postLPTPassManager; ov::pass::Manager postLPTPassManager;
postLPTPassManager.register_pass<ngraph::pass::UnrollTensorIterator>(); postLPTPassManager.register_pass<ov::pass::UnrollTensorIterator>();
postLPTPassManager.register_pass<ReshapePRelu>(); postLPTPassManager.register_pass<ReshapePRelu>();
postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool { postLPTPassManager.get_pass_config()->set_callback<ov::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
// UnrollTI transformation is disabled by default, is turned on by LowLatency transformation // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
return node->get_rt_info().count("UNROLL_TI") == 0; return node->get_rt_info().count("UNROLL_TI") == 0;
}); });
@ -634,7 +635,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
return false; return false;
}); });
postLPTPassManager.register_pass<ngraph::pass::ConstantFolding>(); postLPTPassManager.register_pass<ov::pass::ConstantFolding>();
// Snippets may brake MHA patterns so the fusion has to performed before // Snippets may brake MHA patterns so the fusion has to performed before
postLPTPassManager.register_pass<MHAFusion>(); postLPTPassManager.register_pass<MHAFusion>();
@ -663,7 +664,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
postLPTPassManager.run_passes(nGraphFunc); postLPTPassManager.run_passes(nGraphFunc);
if (_enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) { if (_enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) {
ngraph::pass::Manager snippetsManager; ov::pass::Manager snippetsManager;
snippetsManager.register_pass<SnippetsMarkSkipped>(); snippetsManager.register_pass<SnippetsMarkSkipped>();
snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>(); snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>(); snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
@ -697,13 +698,13 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
snippetsManager.run_passes(nGraphFunc); snippetsManager.run_passes(nGraphFunc);
} }
ngraph::pass::Manager postSnippetsManager; ov::pass::Manager postSnippetsManager;
postSnippetsManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>(); postSnippetsManager.register_pass<ov::pass::FakeQuantizeDecomposition>();
postSnippetsManager.get_pass_config()->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool { postSnippetsManager.get_pass_config()->set_callback<ov::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
std::string errMsg; std::string errMsg;
return node::FakeQuantize::isSupportedOperation(node, errMsg); return node::FakeQuantize::isSupportedOperation(node, errMsg);
}); });
postSnippetsManager.register_pass<ngraph::pass::ConstantFolding>(); postSnippetsManager.register_pass<ov::pass::ConstantFolding>();
postSnippetsManager.run_passes(nGraphFunc); postSnippetsManager.run_passes(nGraphFunc);
} }

View File

@ -186,12 +186,15 @@ std::vector<std::string> disabledTestPatterns() {
// Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation. // Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation.
R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*(i32|i8).*)", R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*(i32|i8).*)",
// 94989. BF16 Reference produces different results. // 94989. BF16 Reference produces different results.
R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*gridPrc=bf16.*)", // GridSample regression on bf16 data.
R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*bf16.*)",
// // Issue: 95915 // // Issue: 95915
R"(smoke_dynamic/AUGRUCellCPUTest.CompareWithRefs/IS=\(\[\?\.1\]_\[\?\.1\]_\[\?\.1\]_\)_TS=\{\(1\.1\)_\(1\.1\)_\(1\.1\)\}_\{\(3\.1\)_\(3\.1\)_\(3\.1\)\}_\{\(5\.1\)_\(5\.1\)_\(5\.1\)\}_decompose=0_activations=\(sigmoid\.tanh\)_clip=0_linear=0_netPrec=f32__inFmts=nc\.nc_outFmts=nc_primitive=ref_any_PluginConf_ENFORCE_BF16=YES)", // NOLINT R"(smoke_dynamic/AUGRUCellCPUTest.CompareWithRefs/IS=\(\[\?\.1\]_\[\?\.1\]_\[\?\.1\]_\)_TS=\{\(1\.1\)_\(1\.1\)_\(1\.1\)\}_\{\(3\.1\)_\(3\.1\)_\(3\.1\)\}_\{\(5\.1\)_\(5\.1\)_\(5\.1\)\}_decompose=0_activations=\(sigmoid\.tanh\)_clip=0_linear=0_netPrec=f32__inFmts=nc\.nc_outFmts=nc_primitive=ref_any_PluginConf_ENFORCE_BF16=YES)", // NOLINT
R"(smoke_dynamic/GRUCellCPUTest.CompareWithRefs/IS=\(\[\?.1\]_\[\?\.1\]_\)_TS=\{\(1\.1\)_\(1\.1\)\}_\{\(3\.1\)_\(3\.1\)\}_\{\(5\.1\)_\(5\.1\)\}_decompose=0_activations=\(sigmoid\.tanh\)_clip=0_linear=0_netPrec=f32__inFmts=nc\.nc_outFmts=nc_primitive=ref_any_PluginConf_ENFORCE_BF16=YES)", // NOLINT R"(smoke_dynamic/GRUCellCPUTest.CompareWithRefs/IS=\(\[\?.1\]_\[\?\.1\]_\)_TS=\{\(1\.1\)_\(1\.1\)\}_\{\(3\.1\)_\(3\.1\)\}_\{\(5\.1\)_\(5\.1\)\}_decompose=0_activations=\(sigmoid\.tanh\)_clip=0_linear=0_netPrec=f32__inFmts=nc\.nc_outFmts=nc_primitive=ref_any_PluginConf_ENFORCE_BF16=YES)", // NOLINT
R"(nightly_dynamic_bf16/RNNSequenceCPUTest.*activations=\(relu\).*)", R"(nightly_dynamic_bf16/RNNSequenceCPUTest.*activations=\(relu\).*)",
R"(smoke_dynamic_BatchSizeOne/RNNSequenceCPUTest.*IS=\(\[1\.\?\.10\]_\[1\.1\.10\]_\[\?\]_\)_TS=\{\(1\.2\.10\)_\(1\.1\.10\)_\(1\)\}_\{\(1\.4\.10\)_\(1\.1\.10\)_\(1\)\}_\{\(1\.8\.10\)_\(1\.1\.10\)_\(1\)\}_seqMode=PURE_SEQ_activations=\(relu\)_clip=0_direction=forward_netPrec=f32__inFmts=ncw\.ntc_outFmts=ncw\.ncw_primitive=ref_any)", // NOLINT R"(smoke_dynamic_BatchSizeOne/RNNSequenceCPUTest.*IS=\(\[1\.\?\.10\]_\[1\.1\.10\]_\[\?\]_\)_TS=\{\(1\.2\.10\)_\(1\.1\.10\)_\(1\)\}_\{\(1\.4\.10\)_\(1\.1\.10\)_\(1\)\}_\{\(1\.8\.10\)_\(1\.1\.10\)_\(1\)\}_seqMode=PURE_SEQ_activations=\(relu\)_clip=0_direction=forward_netPrec=f32__inFmts=ncw\.ntc_outFmts=ncw\.ncw_primitive=ref_any)", // NOLINT
// 98151. Not valid sorting for slices in reference.
R"(.*UniqueLayerTestCPU.*axis.*True.*)"
}; };
#define FIX_62820 0 #define FIX_62820 0

View File

@ -0,0 +1,261 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "ngraph_functions/builders.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include <common_test_utils/ov_tensor_utils.hpp>
using namespace CPUTestUtils;
using namespace ov::test;
namespace CPULayerTestsDefinitions {
typedef std::tuple<
std::vector<InputShape>, // Input shapes
std::tuple<bool, int>, // Is flattened and axis
bool, // Sorted
ElementType, // Data precision
CPUSpecificParams, // CPU specific params
std::map<std::string, std::string> // Additional config
> UniqueLayerTestCPUParams;
class UniqueLayerTestCPU : public testing::WithParamInterface<UniqueLayerTestCPUParams>,
virtual public SubgraphBaseTest, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<UniqueLayerTestCPUParams> obj) {
std::vector<InputShape> inputShapes;
std::tuple<bool, int> flatOrAxis;
bool sorted;
ElementType dataPrecision;
CPUSpecificParams cpuParams;
std::map<std::string, std::string> additionalConfig;
std::tie(inputShapes, flatOrAxis, sorted, dataPrecision, cpuParams, additionalConfig) = obj.param;
std::ostringstream result;
result << "IS=(";
for (size_t i = 0lu; i < inputShapes.size(); i++) {
result << CommonTestUtils::partialShape2str({inputShapes[i].first}) << (i < inputShapes.size() - 1lu ? "_" : "");
}
result << ")_TS=";
for (size_t i = 0lu; i < inputShapes.front().second.size(); i++) {
result << "{";
for (size_t j = 0lu; j < inputShapes.size(); j++) {
result << CommonTestUtils::vec2str(inputShapes[j].second[i]) << (j < inputShapes.size() - 1lu ? "_" : "");
}
result << "}_";
}
if (!std::get<0>(flatOrAxis)) {
result << "axis=" << std::get<1>(flatOrAxis) << "_";
} else {
result << "flattened" << "_";
}
result << "sorted=" << (sorted ? "True" : "False") << "_";
result << "dataPrc=" << dataPrecision;
result << CPUTestsBase::getTestCaseName(cpuParams);
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto &item : additionalConfig) {
if (item.second == InferenceEngine::PluginConfigParams::YES)
result << "_" << item.first << "=" << item.second;
}
}
return result.str();
}
protected:
void SetUp() override {
std::vector<InputShape> inputShapes;
std::tuple<bool, int> flatOrAxis;
bool sorted, flattened;
int axis;
ElementType dataPrecision;
CPUSpecificParams cpuParams;
std::map<std::string, std::string> additionalConfig;
std::tie(inputShapes, flatOrAxis, sorted, dataPrecision, cpuParams, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
targetDevice = CommonTestUtils::DEVICE_CPU;
init_input_shapes(inputShapes);
configuration.insert(additionalConfig.begin(), additionalConfig.end());
flattened = std::get<0>(flatOrAxis);
if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) {
selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16);
} else {
if (dataPrecision == ElementType::bf16) {
dataPrecision = ElementType::f32;
}
selectedType = makeSelectedTypeStr(selectedType, dataPrecision);
}
auto params = ngraph::builder::makeDynamicParams(dataPrecision, inputDynamicShapes);
params[0]->set_friendly_name("data");
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
std::shared_ptr<ov::Node> uniqueNode;
if (flattened) {
uniqueNode = std::make_shared<ov::op::v10::Unique>(paramOuts[0], sorted);
} else {
axis = std::get<1>(flatOrAxis);
uniqueNode = std::make_shared<ov::op::v10::Unique>(paramOuts[0],
ov::op::v0::Constant::create(ov::element::i64, ov::Shape({1}), {axis}),
sorted);
}
function = makeNgraphFunction(dataPrecision, params, uniqueNode, "UniqueCPU");
}
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
inputs.clear();
const auto& funcInputs = function->inputs();
for (int i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
ov::runtime::Tensor tensor;
if (funcInput.get_node()->get_friendly_name() == "data") {
int32_t range = std::accumulate(targetInputStaticShapes[0].begin(), targetInputStaticShapes[0].end(), 1, std::multiplies<size_t>());
tensor = utils::create_and_fill_tensor(
funcInput.get_element_type(), targetInputStaticShapes[0], range, -range / 2, 1);
}
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
}
}
};
TEST_P(UniqueLayerTestCPU, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
CheckPluginRelatedResults(compiledModel, "Unique");
}
namespace {
const std::vector<ElementType> dataPrecisionSmoke = {
ElementType::f32,
ElementType::i32
};
const std::vector<ElementType> dataPrecisionNightly = {
ElementType::bf16,
ElementType::i8
};
std::vector<std::tuple<bool, int>> flatOrAxis { {true, 0}, {false, 0}, {false, 1}, {false, -1} };
std::vector<bool> sorted { true, false};
std::vector<std::map<std::string, std::string>> additionalConfig
= {{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO}},
{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES}}};
std::vector<CPUSpecificParams> getCPUInfo() {
std::vector<CPUSpecificParams> resCPUParams;
resCPUParams.push_back(CPUSpecificParams{{}, {}, {"ref"}, "ref"});
return resCPUParams;
}
std::vector<std::vector<InputShape>> getStaticShapes() {
std::vector<std::vector<InputShape>> result = {
{ { {}, { {1, 1, 1} } } }, // Static shapes
{ { {}, { {1, 2, 1} } } }, // Static shapes
{ { {}, { {1, 1, 3} } } }, // Static shapes
{ { {}, { {2, 2, 1} } } }, // Static shapes
{ { {}, { {1, 4, 1} } } }, // Static shapes
{ { {}, { {1, 5, 1} } } }, // Static shapes
{ { {}, { {3, 2, 1} } } }, // Static shapes
{ { {}, { {1, 1, 7} } } }, // Static shapes
{ { {}, { {2, 2, 2} } } }, // Static shapes
{ { {}, { {1, 8, 1} } } }, // Static shapes
{ { {}, { {3, 3, 1, 1} } } }, // Static shapes
{ { {}, { {1, 5, 2, 1} } } }, // Static shapes
{ { {}, { {1, 1, 11} } } }, // Static shapes
{ { {}, { {32, 35, 37} } } }, // Static shapes
{ { {}, { {2, 3, 2} } } }, // Static shapes
{ { {}, { {1, 1, 13} } } }, // Static shapes
{ { {}, { {7, 1, 2} } } }, // Static shapes
{ { {}, { {3, 5, 1} } } }, // Static shapes
{ { {}, { {4, 2, 2} } } }, // Static shapes
{ { {}, { {1, 17, 1} } } }, // Static shapes
{ { {}, { {3, 2, 3, 1} } } }, // Static shapes
{ { {}, { {8, 16, 32} } } }, // Static shapes
{ { {}, { {37, 19, 11} } } }, // Static shapes
{ { {}, { {1, 19, 1} } } }, // Static shapes
{ { {}, { {2, 5, 2} } } }, // Static shapes
{ { {}, { {1, 3, 7} } } }, // Static shapes
{ { {}, { {11, 1, 2} } } }, // Static shapes
{ { {}, { {1, 1, 23} } } }, // Static shapes
{ { {}, { {4, 3, 2} } } }, // Static shapes
{ { {}, { {5, 1, 5} } } }, // Static shapes
{ { {}, { {100, 1, 1} } } }, // Static shapes
{ { {}, { {5, 5, 5} } } } // Static shapes
};
return result;
}
INSTANTIATE_TEST_SUITE_P(smoke_static, UniqueLayerTestCPU,
::testing::Combine(
::testing::ValuesIn(getStaticShapes()),
::testing::ValuesIn(flatOrAxis),
::testing::ValuesIn(sorted),
::testing::ValuesIn(dataPrecisionSmoke),
::testing::ValuesIn(getCPUInfo()),
::testing::Values(additionalConfig[0])),
UniqueLayerTestCPU::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(nightly_static, UniqueLayerTestCPU,
::testing::Combine(
::testing::ValuesIn(getStaticShapes()),
::testing::ValuesIn(flatOrAxis),
::testing::ValuesIn(sorted),
::testing::ValuesIn(dataPrecisionNightly),
::testing::ValuesIn(getCPUInfo()),
::testing::Values(additionalConfig[0])),
UniqueLayerTestCPU::getTestCaseName);
const std::vector<std::vector<InputShape>> dynamicInSapes = {
{ { { ov::Dimension(1, 15), -1, -1, -1 }, // Dynamic shape
{ {1, 1, 1, 1}, {6, 3, 1, 2}, {4, 5, 3, 1}, {2, 7, 2, 2} } } }, // Target shapes
{ { { -1, -1, -1, -1 }, // Dynamic shape
{ {1, 2, 1, 5}, {3, 4, 2, 3}, {5, 6, 7, 1}, {7, 8, 2, 4} } } }, // Target shapes
{ { { ov::Dimension(2, 15), -1, -1, -1 }, // Dynamic shape
{ {8, 3, 3, 3}, {6, 5, 2, 5}, {4, 7, 1, 11}, {2, 9, 3, 4} } } }, // Target shapes
{ { { 3, 4, 4, 5 }, // Dynamic shape
{ {3, 4, 4, 5}, {3, 4, 4, 5}, {3, 4, 4, 5}, {3, 4, 4, 5} } } }, // Target shapes
{ { { -1, -1, -1, -1 }, // Dynamic shape
{ {1, 2, 1, 13}, {3, 4, 7, 2}, {5, 6, 3, 5}, {7, 8, 4, 4} } } }, // Target shapes
{ { { -1, -1, -1, -1 }, // Dynamic shape
{ {2, 11, 1, 17}, {4, 9, 6, 3}, {6, 7, 7, 3}, {8, 3, 2, 11} } } }, // Target shapes
{ { { 3, -1, -1, -1 }, // Dynamic shape
{ {3, 2, 1, 23}, {3, 4, 3, 8}, {3, 6, 5, 5}, {3, 8, 31, 1} } } }, // Target shapes
{ { { -1, 3, -1, -1 }, // Dynamic shape
{ {8, 3, 8, 4}, {6, 3, 33, 1}, {4, 3, 8, 6}, {2, 3, 8, 8} } } } // Target shapes
};
INSTANTIATE_TEST_SUITE_P(smoke_dynamic, UniqueLayerTestCPU,
::testing::Combine(
::testing::ValuesIn(dynamicInSapes),
::testing::ValuesIn(flatOrAxis),
::testing::ValuesIn(sorted),
::testing::ValuesIn(dataPrecisionSmoke),
::testing::ValuesIn(getCPUInfo()),
::testing::Values(additionalConfig[0])),
UniqueLayerTestCPU::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(nightly_dynamic, UniqueLayerTestCPU,
::testing::Combine(
::testing::ValuesIn(dynamicInSapes),
::testing::ValuesIn(flatOrAxis),
::testing::ValuesIn(sorted),
::testing::ValuesIn(dataPrecisionNightly),
::testing::ValuesIn(getCPUInfo()),
::testing::Values(additionalConfig[0])),
UniqueLayerTestCPU::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions