[CPU] Unique operation implementation. (#14250)
This commit is contained in:
parent
8c3425ff69
commit
3959890691
@ -113,16 +113,24 @@ UniqueElements<Index_t, Count_t> find_unique_elements(const Data_t* data,
|
||||
return *(data + lhs.idx) < *(data + rhs.idx);
|
||||
};
|
||||
|
||||
int64_t axisVal = 0;
|
||||
if (axis) {
|
||||
axisVal = *axis;
|
||||
if (axisVal < 0) {
|
||||
axisVal += data_shape.size();
|
||||
}
|
||||
}
|
||||
|
||||
const auto slices_ascending_order = [&](const TensorSlice<Index_t, Count_t>& lhs,
|
||||
const TensorSlice<Index_t, Count_t>& rhs) {
|
||||
const auto shape_to_iterate = slice_shape_to_iterate(data_shape, *axis);
|
||||
const auto shape_to_iterate = slice_shape_to_iterate(data_shape, axisVal);
|
||||
|
||||
for (auto it = CoordinateIterator(shape_to_iterate); it != CoordinateIterator::end(); ++it) {
|
||||
auto elem_coord_lhs = *it;
|
||||
elem_coord_lhs.insert(elem_coord_lhs.cbegin() + *axis, lhs.idx);
|
||||
elem_coord_lhs.insert(elem_coord_lhs.cbegin() + axisVal, lhs.idx);
|
||||
|
||||
auto elem_coord_rhs = *it;
|
||||
elem_coord_rhs.insert(elem_coord_rhs.cbegin() + *axis, rhs.idx);
|
||||
elem_coord_rhs.insert(elem_coord_rhs.cbegin() + axisVal, rhs.idx);
|
||||
|
||||
const auto lhs_elem_idx = ngraph::coordinate_index(elem_coord_lhs, data_shape);
|
||||
const auto rhs_elem_idx = ngraph::coordinate_index(elem_coord_rhs, data_shape);
|
||||
@ -149,15 +157,15 @@ UniqueElements<Index_t, Count_t> find_unique_elements(const Data_t* data,
|
||||
|
||||
// the individual elements in the two compared slices are always separated by the same offset
|
||||
// and this can be used to compare them elementwise
|
||||
const auto slices_offset = calc_slices_offset(lhs, rhs, data_shape_strides, *axis);
|
||||
const auto shape_to_iterate = slice_shape_to_iterate(data_shape, *axis);
|
||||
const auto slices_offset = calc_slices_offset(lhs, rhs, data_shape_strides, axisVal);
|
||||
const auto shape_to_iterate = slice_shape_to_iterate(data_shape, axisVal);
|
||||
|
||||
for (auto it = CoordinateIterator(shape_to_iterate); it != CoordinateIterator::end(); ++it) {
|
||||
// All slice elements have a "slice index" constant value at the axis position, only the other dimensions
|
||||
// vary for each slice element. Those dimensions are provided by CoordinateIterator, the value at axis
|
||||
// needs to be injected manually.
|
||||
auto elem_coord = *it;
|
||||
elem_coord.insert(elem_coord.cbegin() + *axis, slice_with_lower_idx.idx);
|
||||
elem_coord.insert(elem_coord.cbegin() + axisVal, slice_with_lower_idx.idx);
|
||||
const auto lhs_elem_idx = ngraph::coordinate_index(elem_coord, data_shape);
|
||||
const auto rhs_elem_idx = lhs_elem_idx + slices_offset;
|
||||
if (*(data + lhs_elem_idx) != *(data + rhs_elem_idx)) {
|
||||
@ -219,8 +227,8 @@ UniqueElements<Index_t, Count_t> find_unique_elements(const Data_t* data,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ret.axis = *axis;
|
||||
ret.all_tensor_elements = generate_descriptors<Index_t, Count_t>(data_shape[*axis], DescriptorType::SLICE);
|
||||
ret.axis = axisVal;
|
||||
ret.all_tensor_elements = generate_descriptors<Index_t, Count_t>(data_shape[axisVal], DescriptorType::SLICE);
|
||||
|
||||
if (sorted) {
|
||||
std::stable_sort(begin(ret.all_tensor_elements), end(ret.all_tensor_elements), slices_ascending_order);
|
||||
@ -228,7 +236,7 @@ UniqueElements<Index_t, Count_t> find_unique_elements(const Data_t* data,
|
||||
ret.all_tensor_elements[0].rev_idx = 0;
|
||||
ret.unique_tensor_elements.push_back(ret.all_tensor_elements[0]);
|
||||
|
||||
for (size_t i = 1; i < data_shape[*axis]; ++i) {
|
||||
for (size_t i = 1; i < data_shape[axisVal]; ++i) {
|
||||
auto& tensor_element = ret.all_tensor_elements[i];
|
||||
auto existing_unique = end(ret.unique_tensor_elements);
|
||||
|
||||
@ -264,10 +272,17 @@ std::tuple<Shape, Shape, Shape> make_tensor_shapes(const UniqueElements<Index_t,
|
||||
// if the axis was specified we need to return a data shape with a modified dimension-at-axis
|
||||
// this is where we need to insert the number of detected unique elements
|
||||
// all other dimensions stay the same as in the original data_shape
|
||||
int64_t axisVal = 0;
|
||||
if (axis) {
|
||||
axisVal = *axis;
|
||||
if (axisVal < 0) {
|
||||
axisVal += data_shape.size();
|
||||
}
|
||||
}
|
||||
auto output0 = data_shape;
|
||||
output0[*axis] = unique_elements.unique_tensor_elements.size();
|
||||
output0[axisVal] = unique_elements.unique_tensor_elements.size();
|
||||
const auto output1_3 = Shape{unique_elements.unique_tensor_elements.size()};
|
||||
const auto output2 = Shape{data_shape[*axis]};
|
||||
const auto output2 = Shape{data_shape[axisVal]};
|
||||
return std::make_tuple(output0, output1_3, output2);
|
||||
} else {
|
||||
const auto output0 = Shape{unique_elements.unique_tensor_elements.size()};
|
||||
|
@ -205,6 +205,7 @@ const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_t
|
||||
{ "PriorBoxClustered", Type::PriorBoxClustered},
|
||||
{"Interaction", Type::Interaction},
|
||||
{ "MHA", Type::MHA},
|
||||
{ "Unique", Type::Unique}
|
||||
};
|
||||
|
||||
Type TypeFromName(const std::string& type) {
|
||||
@ -402,6 +403,8 @@ std::string NameFromType(const Type type) {
|
||||
return "Subgraph";
|
||||
case Type::MHA:
|
||||
return "MHA";
|
||||
case Type::Unique:
|
||||
return "Unique";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
|
@ -110,7 +110,8 @@ enum class Type {
|
||||
PriorBox,
|
||||
PriorBoxClustered,
|
||||
Interaction,
|
||||
MHA
|
||||
MHA,
|
||||
Unique
|
||||
};
|
||||
|
||||
enum class Algorithm {
|
||||
|
495
src/plugins/intel_cpu/src/nodes/unique.cpp
Normal file
495
src/plugins/intel_cpu/src/nodes/unique.cpp
Normal file
@ -0,0 +1,495 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "unique.hpp"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <utils/shape_inference/shape_inference_internal_dyn.hpp>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov::intel_cpu;
|
||||
using namespace ov::intel_cpu::node;
|
||||
|
||||
#define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' "
|
||||
|
||||
bool Unique::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (!ov::is_type<op::v10::Unique>(op)) {
|
||||
errorMessage = "Not supported Unique operation version. CPU plug-in supports only 10th version.";
|
||||
return false;
|
||||
}
|
||||
if (op->get_input_size() > AXIS && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))) {
|
||||
errorMessage = "CPU plug-in supports only constant Axis input.";
|
||||
return false;
|
||||
}
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Unique::Unique(const std::shared_ptr<ov::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) :
|
||||
Node(op, eng, cache, InternalDynShapeInferFactory()) {
|
||||
std::string errorMessage;
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
|
||||
if (!one_of(op->get_input_size(), 1, 2) || op->get_output_size() != 4)
|
||||
THROW_ERROR << "has incorrect number of input/output edges.";
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
definedOutputs[i] = !op->get_output_target_inputs(i).empty();
|
||||
}
|
||||
|
||||
sorted = ov::as_type_ptr<ov::op::v10::Unique>(op)->get_sorted();
|
||||
if (op->get_input_size() > AXIS) {
|
||||
flattened = false;
|
||||
axis = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
|
||||
if (axis < 0) {
|
||||
axis += op->get_input_partial_shape(IN_DATA).rank().get_length();
|
||||
}
|
||||
if (axis < 0 || axis >= op->get_input_partial_shape(IN_DATA).rank().get_length()) {
|
||||
THROW_ERROR << "has invalid axis value: " << ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
|
||||
}
|
||||
} else {
|
||||
flattened = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Unique::initSupportedPrimitiveDescriptors() {
|
||||
dataPrecision = getOriginalInputPrecisionAtPort(IN_DATA);
|
||||
if (dataPrecision != Precision::I32 && dataPrecision != Precision::I8 && dataPrecision != Precision::U8) {
|
||||
dataPrecision = Precision::FP32;
|
||||
}
|
||||
dataTypeSize = dataPrecision.size();
|
||||
const InferenceEngine::Precision axisPrecision = Precision::I32;
|
||||
|
||||
impl_desc_type implType = ref;
|
||||
|
||||
std::vector<PortConfigurator> inPortConfigs = { {LayoutType::ncsp, dataPrecision} };
|
||||
if (!flattened) {
|
||||
inPortConfigs.push_back({LayoutType::ncsp, axisPrecision});
|
||||
}
|
||||
std::vector<PortConfigurator> outPortConfigs;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
outPortConfigs.push_back({LayoutType::ncsp, i == 0 ? dataPrecision : axisPrecision});
|
||||
}
|
||||
|
||||
addSupportedPrimDesc(inPortConfigs, outPortConfigs, implType, isDynamicNode());
|
||||
}
|
||||
|
||||
void Unique::createPrimitive() {
|
||||
Node::createPrimitive();
|
||||
}
|
||||
|
||||
void Unique::prepareParams() {
|
||||
auto& dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr();
|
||||
if (!dataMemPtr || !dataMemPtr->isAllocated()) {
|
||||
THROW_ERROR << " has not allocated input data memory.";
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (definedOutputs[i]) {
|
||||
auto& dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->isAllocated()) {
|
||||
THROW_ERROR << " has not allocated output memory at port " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr) {
|
||||
THROW_ERROR << " has unidentified preferable primitive descriptor.";
|
||||
}
|
||||
|
||||
size_t srcLen = 1;
|
||||
if (flattened) {
|
||||
srcLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / dataTypeSize;
|
||||
} else {
|
||||
auto dstDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims();
|
||||
srcLen = dstDataShape[axis];
|
||||
}
|
||||
firstUniTmp.resize(srcLen, 0);
|
||||
inToOutTmp.resize(srcLen);
|
||||
occurTmp.resize(srcLen);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
struct Unique::flattenExec {
|
||||
void operator()(Unique *node) {
|
||||
node->flattenTensorExec<T>();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct Unique::slicedExec {
|
||||
void operator()(Unique *node) {
|
||||
node->slicedTensorExec<T>();
|
||||
}
|
||||
};
|
||||
|
||||
void Unique::execute(dnnl::stream strm) {
|
||||
if (flattened) {
|
||||
OV_SWITCH(intel_cpu, flattenExec, this, dataPrecision,
|
||||
OV_CASE(Precision::FP32, float),
|
||||
OV_CASE(Precision::I32, int32_t),
|
||||
OV_CASE(Precision::I8, int8_t),
|
||||
OV_CASE(Precision::U8, uint8_t))
|
||||
} else {
|
||||
OV_SWITCH(intel_cpu, slicedExec, this, dataPrecision,
|
||||
OV_CASE(Precision::FP32, float),
|
||||
OV_CASE(Precision::I32, int32_t),
|
||||
OV_CASE(Precision::I8, int8_t),
|
||||
OV_CASE(Precision::U8, uint8_t))
|
||||
}
|
||||
}
|
||||
|
||||
void Unique::executeDynamicImpl(dnnl::stream strm) {
|
||||
const auto& srcDataDims = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims();
|
||||
VectorDims dstDataDims;
|
||||
Dim uniqLen = 1;
|
||||
if (flattened) {
|
||||
uniqLen = std::accumulate(srcDataDims.begin(), srcDataDims.end(), 1, std::multiplies<Dim>());
|
||||
dstDataDims = { uniqLen };
|
||||
} else {
|
||||
uniqLen = srcDataDims[axis];
|
||||
dstDataDims = srcDataDims;
|
||||
}
|
||||
redefineOutputMemory({ dstDataDims, {uniqLen}, {uniqLen}, {uniqLen}});
|
||||
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Unique::flattenTensorExec() {
|
||||
const T* srcDataPtr = reinterpret_cast<const T*>(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr());
|
||||
const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T);
|
||||
std::vector<T> uniDataTmp(inputLen);
|
||||
auto uniDataTmpPtr = uniDataTmp.data();
|
||||
int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr = firstUniTmp.data();
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
inToOutTmpPtr = inToOutTmp.data();
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurTmpPtr = occurTmp.data();
|
||||
}
|
||||
uniqueLen = inputLen;
|
||||
|
||||
if (sorted) {
|
||||
std::memcpy(uniDataTmpPtr, srcDataPtr, inputLen * sizeof(T));
|
||||
std::sort(uniDataTmpPtr, uniDataTmpPtr + inputLen);
|
||||
auto last = std::unique(uniDataTmpPtr, uniDataTmpPtr + inputLen);
|
||||
uniqueLen = last - uniDataTmpPtr;
|
||||
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
T* first = uniDataTmpPtr;
|
||||
for (T* it = first; it < last; it++) {
|
||||
for (int i = 0; i < inputLen; i++) {
|
||||
if (srcDataPtr[i] == *it) {
|
||||
*firstTmpPtr++ = i;
|
||||
first++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
for (int i = 0; i < inputLen; i++) {
|
||||
if (i > 0 && srcDataPtr[i] == srcDataPtr[i - 1]) {
|
||||
inToOutTmpPtr[i] = inToOutTmpPtr[i - 1];
|
||||
continue;
|
||||
}
|
||||
for (int j = 0; j < uniqueLen; j++) {
|
||||
if (srcDataPtr[i] == uniDataTmpPtr[j]) {
|
||||
inToOutTmpPtr[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
std::fill(occurTmpPtr, occurTmpPtr + uniqueLen, 0);
|
||||
for (int j = 0; j < uniqueLen; j++) {
|
||||
for (int i = 0; i < inputLen; i++) {
|
||||
if (srcDataPtr[i] == uniDataTmpPtr[j]) {
|
||||
occurTmpPtr[j]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uniDataTmpPtr[0] = srcDataPtr[0];
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr[0] = 0;
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
inToOutTmpPtr[0] = 0;
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
std::fill(occurTmpPtr, occurTmpPtr + inputLen, 1);
|
||||
}
|
||||
uniqueLen = 1;
|
||||
|
||||
for (int i = 1; i < inputLen; i++) {
|
||||
bool found = false;
|
||||
int j = 0;
|
||||
for (; j < uniqueLen; j++) {
|
||||
if (uniDataTmpPtr[j] == srcDataPtr[i]) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
uniDataTmpPtr[uniqueLen] = srcDataPtr[i];
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr[uniqueLen] = i;
|
||||
}
|
||||
uniqueLen++;
|
||||
} else {
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurTmpPtr[j]++;
|
||||
}
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
inToOutTmpPtr[i] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
redefineOutputMemory({ {uniqueLen}, {uniqueLen}, {inputLen}, {uniqueLen}});
|
||||
|
||||
T* uniDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T));
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
int *firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
auto inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
auto occurPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Unique::slicedTensorExec() {
|
||||
const T* srcDataPtr = reinterpret_cast<const T*>(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr());
|
||||
const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T);
|
||||
std::vector<T> uniDataTmp(inputLen);
|
||||
auto uniDataTmpPtr = uniDataTmp.data();
|
||||
int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr = firstUniTmp.data();
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
inToOutTmpPtr = inToOutTmp.data();
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurTmpPtr = occurTmp.data();
|
||||
}
|
||||
|
||||
const auto& srcDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims();
|
||||
|
||||
const auto cmpBlNum = srcDataShape[axis]; // Blocks to compare.
|
||||
int64_t partsInBl = 1; // Parts in block
|
||||
if (axis > 0) {
|
||||
partsInBl = std::accumulate(srcDataShape.begin(), srcDataShape.begin() + axis, 1, std::multiplies<Dim>());
|
||||
}
|
||||
int64_t elPerPart = 1; // Elements number in part.
|
||||
if (axis < srcDataShape.size() - 1) {
|
||||
elPerPart = std::accumulate(srcDataShape.begin() + axis + 1, srcDataShape.end(), 1, std::multiplies<Dim>());
|
||||
}
|
||||
const auto partLenB = elPerPart * dataPrecision.size();
|
||||
const auto partStep = elPerPart * cmpBlNum;
|
||||
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr[0] = 0;
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
inToOutTmpPtr[0] = 0;
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurTmpPtr[0] = 1;
|
||||
std::fill(occurTmpPtr, occurTmpPtr + cmpBlNum, 1);
|
||||
}
|
||||
|
||||
uniqueLen = 1;
|
||||
std::vector<int64_t> uniqIdx(cmpBlNum, 0);
|
||||
for (int b1 = 1; b1 < cmpBlNum; b1++) {
|
||||
auto first1 = srcDataPtr + b1 * elPerPart;
|
||||
auto last1 = srcDataPtr + (b1 + 1) * elPerPart;
|
||||
bool equal = true;
|
||||
int b2 = 0;
|
||||
// Compare with unique blocks.
|
||||
for (; b2 < uniqueLen; b2++) {
|
||||
auto first2 = srcDataPtr + uniqIdx[b2] * elPerPart;
|
||||
equal = true;
|
||||
for (int p = 0; p < partsInBl; p++) {
|
||||
equal = std::equal(first1, last1, first2);
|
||||
if (!equal) {
|
||||
break;
|
||||
}
|
||||
first1 += partStep;
|
||||
last1 += partStep;
|
||||
first2 += partStep;
|
||||
}
|
||||
if (equal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!equal) {
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr[uniqueLen] = b1;
|
||||
}
|
||||
|
||||
uniqIdx[uniqueLen++] = b1;
|
||||
} else {
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurTmpPtr[b2]++;
|
||||
}
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
inToOutTmpPtr[b1] = b2;
|
||||
}
|
||||
}
|
||||
|
||||
const auto dstPrtStep = elPerPart * uniqueLen;
|
||||
for (int b1 = 0; b1 < uniqueLen; b1++) {
|
||||
auto first1 = srcDataPtr + uniqIdx[b1] * elPerPart;
|
||||
auto first2 = uniDataTmpPtr + b1 * elPerPart;
|
||||
for (int p = 0; p < partsInBl; p++) {
|
||||
memcpy(first2, first1, partLenB);
|
||||
first1 += partStep;
|
||||
first2 += dstPrtStep;
|
||||
}
|
||||
}
|
||||
|
||||
if (sorted) {
|
||||
const auto elInBl = elPerPart * partsInBl;
|
||||
struct OrdEl {
|
||||
T val;
|
||||
int64_t idx;
|
||||
};
|
||||
|
||||
std::vector<OrdEl> colToSort(uniqueLen);
|
||||
std::vector<int64_t> moveTo(uniqueLen);
|
||||
for (int k = 0; k < uniqueLen; k++) {
|
||||
moveTo[k] = k;
|
||||
}
|
||||
std::vector<T> buff1(elPerPart);
|
||||
std::vector<T> buff2(elPerPart);
|
||||
for (int64_t p = partsInBl - 1; p >= 0; p--) {
|
||||
for (int64_t e = elPerPart - 1; e >= 0 ; e--) {
|
||||
int64_t pos1 = p * dstPrtStep + e;
|
||||
for (int64_t i = 0; i < uniqueLen; i++) {
|
||||
int64_t pos2 = i * elInBl + pos1;
|
||||
colToSort[i] = {uniDataTmpPtr[pos2], i};
|
||||
}
|
||||
std::stable_sort(colToSort.begin(), colToSort.end(), [](const OrdEl &el1, const OrdEl &el2) { return el1.val < el2.val; });
|
||||
for (int k = 0; k < uniqueLen; k++) {
|
||||
moveTo[colToSort[k].idx] = k;
|
||||
}
|
||||
|
||||
// perm
|
||||
for (int64_t pb = 0; pb < partsInBl; pb++) {
|
||||
auto currDst = uniDataTmpPtr + pb * dstPrtStep;
|
||||
memcpy(buff1.data(), currDst, partLenB);
|
||||
auto dstIdx = moveTo[0];
|
||||
for (int64_t b = 0; b < uniqueLen; b++) {
|
||||
if (dstIdx == moveTo[dstIdx]) {
|
||||
dstIdx = moveTo[++dstIdx];
|
||||
continue;
|
||||
}
|
||||
T* dst = currDst + dstIdx * elPerPart;
|
||||
|
||||
auto& bSrc = b % 2 == 0 ? buff1 : buff2;
|
||||
auto& bDst = b % 2 == 0 ? buff2 : buff1;
|
||||
memcpy(bDst.data(), dst, partLenB);
|
||||
memcpy(dst, bSrc.data(), partLenB);
|
||||
|
||||
dstIdx = moveTo[dstIdx];
|
||||
}
|
||||
}
|
||||
|
||||
auto mPos = moveTo[0];
|
||||
int32_t firstSrc = 0, firstDst = 0, ocSrc = 0, ocDst = 0;
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstSrc = firstTmpPtr[0];
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
ocSrc = occurTmpPtr[0];
|
||||
}
|
||||
for (int k = 0; k < uniqueLen; k++) {
|
||||
if (mPos == moveTo[mPos]) {
|
||||
mPos = moveTo[++mPos];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
auto& fSrc = k % 2 == 0 ? firstSrc : firstDst;
|
||||
auto& fDst = k % 2 == 0 ? firstDst : firstSrc;
|
||||
fDst = firstTmpPtr[mPos];
|
||||
firstTmpPtr[mPos] = fSrc;
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
auto& oSrc = k % 2 == 0 ? ocSrc : ocDst;
|
||||
auto& oDst = k % 2 == 0 ? ocDst : ocSrc;
|
||||
oDst = occurTmpPtr[mPos];
|
||||
occurTmpPtr[mPos] = oSrc;
|
||||
}
|
||||
|
||||
mPos = moveTo[mPos];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
for (int b1 = 0; b1 < cmpBlNum; b1++) {
|
||||
auto first1 = srcDataPtr + b1 * elPerPart;
|
||||
auto last1 = srcDataPtr + (b1 + 1) * elPerPart;
|
||||
bool equal = true;
|
||||
for (int b2 = 0; b2 < uniqueLen; b2++) {
|
||||
auto first2 = uniDataTmpPtr + b2 * elPerPart;
|
||||
equal = true;
|
||||
for (int p = 0; p < partsInBl; p++) {
|
||||
equal = std::equal(first1, last1, first2);
|
||||
if (!equal) {
|
||||
break;
|
||||
}
|
||||
first2 += dstPrtStep;
|
||||
}
|
||||
if (equal) {
|
||||
inToOutTmpPtr[b1] = b2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto dstDataShape = srcDataShape;
|
||||
dstDataShape[axis] = uniqueLen;
|
||||
redefineOutputMemory({ dstDataShape, {uniqueLen}, {cmpBlNum}, {uniqueLen}});
|
||||
|
||||
T* uniDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(uniDataPtr, uniDataTmpPtr, getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetSize());
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
int *firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
auto inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(inToOutPtr, inToOutTmp.data(), cmpBlNum * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
auto occurPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
|
||||
}
|
||||
}
|
68
src/plugins/intel_cpu/src/nodes/unique.hpp
Normal file
68
src/plugins/intel_cpu/src/nodes/unique.hpp
Normal file
@ -0,0 +1,68 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <node.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
namespace node {
|
||||
|
||||
class Unique : public Node {
|
||||
public:
|
||||
Unique(const std::shared_ptr<ov::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache);
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(dnnl::stream strm) override;
|
||||
bool created() const override { return getType() == Type::Unique; }
|
||||
|
||||
protected:
|
||||
void executeDynamicImpl(dnnl::stream strm) override;
|
||||
void prepareParams() override;
|
||||
bool needShapeInfer() const override { return false; }
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void flattenTensorExec();
|
||||
template <typename T>
|
||||
void slicedTensorExec();
|
||||
|
||||
template<typename T>
|
||||
struct flattenExec;
|
||||
template<typename T>
|
||||
struct slicedExec;
|
||||
|
||||
std::vector<int32_t> firstUniTmp;
|
||||
std::vector<int32_t> inToOutTmp;
|
||||
std::vector<int32_t> occurTmp;
|
||||
|
||||
bool sorted = false;
|
||||
bool flattened = true;
|
||||
int axis = 0;
|
||||
bool definedOutputs[4] = { false, false, false, false };
|
||||
InferenceEngine::Precision dataPrecision;
|
||||
int64_t dataTypeSize = 1;
|
||||
size_t uniqueLen = 1;
|
||||
|
||||
int threadsNum = 1;
|
||||
|
||||
static constexpr size_t IN_DATA = 0;
|
||||
static constexpr size_t AXIS = 1;
|
||||
static constexpr size_t UNIQUE_DATA = 0;
|
||||
static constexpr size_t FIRST_UNIQUE_IDX = 1;
|
||||
static constexpr size_t INPUT_TO_UNIQ_IDX = 2;
|
||||
static constexpr size_t OCCURRENCES_NUM = 3;
|
||||
};
|
||||
|
||||
} // namespace node
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
@ -91,6 +91,7 @@
|
||||
#include "nodes/eye.h"
|
||||
#include "nodes/interaction.h"
|
||||
#include "nodes/mha.h"
|
||||
#include "nodes/unique.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
@ -194,6 +195,7 @@ Node::NodesFactory::NodesFactory()
|
||||
INTEL_CPU_NODE(Eye, Type::Eye);
|
||||
INTEL_CPU_NODE(Interaction, Type::Interaction);
|
||||
INTEL_CPU_NODE(MHA, Type::MHA);
|
||||
INTEL_CPU_NODE(Unique, Type::Unique);
|
||||
}
|
||||
|
||||
#undef INTEL_CPU_NODE
|
||||
|
@ -87,7 +87,7 @@
|
||||
#include <transformations/op_conversions/convert_roi_align_v3_to_v9.hpp>
|
||||
#include <transformations/op_conversions/softsign_decomposition.hpp>
|
||||
#include "transformations/op_conversions/eye_decomposition.hpp"
|
||||
#include "transformations/smart_reshape/smart_reshape.hpp"
|
||||
#include "transformations/op_conversions/unique_decomposition.hpp"
|
||||
|
||||
#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
|
||||
#include "ngraph_transformations/snippets_mark_skipped.hpp"
|
||||
@ -287,9 +287,9 @@ static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::
|
||||
|
||||
static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function> nGraphFunc, const bool _enableLPT, const bool _enableBF16,
|
||||
const bool _enableSnippets, const bool isLegacyApi) {
|
||||
ngraph::pass::Manager manager;
|
||||
ov::pass::Manager manager;
|
||||
manager.set_per_pass_validation(false);
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
manager.register_pass<ov::pass::InitNodeInfo>();
|
||||
|
||||
const bool useLpt =
|
||||
_enableLPT &&
|
||||
@ -331,32 +331,32 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
|
||||
|
||||
manager.register_pass<ov::pass::AUGRUCellFusion>();
|
||||
manager.register_pass<ngraph::pass::CommonOptimizations>();
|
||||
manager.register_pass<ngraph::pass::WrapInterpolateIntoTransposes>();
|
||||
manager.register_pass<ngraph::pass::TransposeSinking>();
|
||||
manager.register_pass<ngraph::pass::ConvertSequenceToTensorIterator>();
|
||||
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
|
||||
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
|
||||
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
|
||||
manager.register_pass<ngraph::pass::GRUCellDecomposition>();
|
||||
manager.register_pass<ngraph::pass::RNNCellDecomposition>();
|
||||
manager.register_pass<ngraph::pass::ConvertNMS1ToNMS9>();
|
||||
manager.register_pass<ngraph::pass::ConvertNMS3ToNMS9>();
|
||||
manager.register_pass<ngraph::pass::ConvertNMS4ToNMS9>();
|
||||
manager.register_pass<ngraph::pass::ConvertNMS5ToNMS9>();
|
||||
manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
|
||||
manager.register_pass<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
|
||||
manager.register_pass<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>();
|
||||
manager.register_pass<ngraph::pass::TransposeMatMul>();
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
manager.register_pass<ov::pass::CommonOptimizations>();
|
||||
manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
|
||||
manager.register_pass<ov::pass::TransposeSinking>();
|
||||
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
|
||||
manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
|
||||
manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
|
||||
manager.register_pass<ov::pass::LSTMCellDecomposition>();
|
||||
manager.register_pass<ov::pass::GRUCellDecomposition>();
|
||||
manager.register_pass<ov::pass::RNNCellDecomposition>();
|
||||
manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
|
||||
manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
|
||||
manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
|
||||
manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
|
||||
manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
|
||||
manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
|
||||
manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
|
||||
manager.register_pass<ov::pass::TransposeMatMul>();
|
||||
manager.register_pass<ov::pass::ConstantFolding>();
|
||||
|
||||
if (useLpt) {
|
||||
CPU_LPT_SCOPE(LowPrecisionTransformations_Part2);
|
||||
manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
|
||||
}
|
||||
manager.register_pass<ngraph::pass::Validate>();
|
||||
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions, type_to_fuse);
|
||||
manager.register_pass<ngraph::pass::EliminateConvert>();
|
||||
manager.register_pass<ov::pass::Validate>();
|
||||
manager.register_pass<ov::pass::ConvertPrecision>(precisions, type_to_fuse);
|
||||
manager.register_pass<ov::pass::EliminateConvert>();
|
||||
manager.register_pass<SwapConvertTranspose>();
|
||||
manager.register_pass<ConvertToInteraction>();
|
||||
manager.register_pass<ConvertInteractionInt8>();
|
||||
@ -366,15 +366,15 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
|
||||
|
||||
// SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
|
||||
pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
|
||||
ngraph::pass::ConvertDepthToSpace>(
|
||||
pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
|
||||
ov::pass::ConvertDepthToSpace>(
|
||||
[](const_node_ptr &node) -> bool {
|
||||
return node->input_value(0).get_shape().size() <= 5lu &&
|
||||
node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
|
||||
});
|
||||
|
||||
pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
|
||||
ngraph::pass::ConvertSpaceToBatch>(
|
||||
pass_config->set_callback<ov::pass::ConvertBatchToSpace,
|
||||
ov::pass::ConvertSpaceToBatch>(
|
||||
[](const_node_ptr &node) -> bool {
|
||||
const auto & rank = node->input(0).get_partial_shape().rank().get_length();
|
||||
return rank == 4lu || rank == 5lu;
|
||||
@ -443,33 +443,33 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
return false;
|
||||
};
|
||||
|
||||
pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator,
|
||||
ngraph::pass::ConvertGRUSequenceToTensorIterator,
|
||||
ngraph::pass::ConvertLSTMSequenceToTensorIterator>(
|
||||
pass_config->set_callback<ov::pass::ConvertRNNSequenceToTensorIterator,
|
||||
ov::pass::ConvertGRUSequenceToTensorIterator,
|
||||
ov::pass::ConvertLSTMSequenceToTensorIterator>(
|
||||
[isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
|
||||
return isSequencePrimitiveSupported(node);
|
||||
});
|
||||
|
||||
pass_config->set_callback<ngraph::pass::RNNCellDecomposition, ngraph::pass::GRUCellDecomposition,
|
||||
ngraph::pass::LSTMCellDecomposition>(
|
||||
pass_config->set_callback<ov::pass::RNNCellDecomposition, ov::pass::GRUCellDecomposition,
|
||||
ov::pass::LSTMCellDecomposition>(
|
||||
[isCellPrimitiveSupported](const_node_ptr &node) -> bool {
|
||||
return isCellPrimitiveSupported(node);
|
||||
});
|
||||
|
||||
pass_config->set_callback<ngraph::pass::MVN6Decomposition>(
|
||||
pass_config->set_callback<ov::pass::MVN6Decomposition>(
|
||||
[](const_node_ptr &node) -> bool {
|
||||
std::string errorMessage;
|
||||
return node::MVN::isSupportedOperation(node, errorMessage);
|
||||
});
|
||||
|
||||
pass_config->set_callback<ngraph::pass::NormalizeL2Decomposition>(
|
||||
pass_config->set_callback<ov::pass::NormalizeL2Decomposition>(
|
||||
[](const_node_ptr &node) -> bool {
|
||||
std::string errorMsg;
|
||||
return node::NormalizeL2::isSupportedOperation(node, errorMsg);
|
||||
});
|
||||
|
||||
pass_config->enable<ngraph::pass::SoftmaxDecomposition>();
|
||||
pass_config->set_callback<ngraph::pass::SoftmaxDecomposition>(
|
||||
pass_config->enable<ov::pass::SoftmaxDecomposition>();
|
||||
pass_config->set_callback<ov::pass::SoftmaxDecomposition>(
|
||||
[](const_node_ptr &node) -> bool {
|
||||
return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
|
||||
});
|
||||
@ -487,9 +487,9 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
return true;
|
||||
};
|
||||
|
||||
pass_config->set_callback<ngraph::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
|
||||
pass_config->set_callback<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
|
||||
pass_config->set_callback<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
|
||||
pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
|
||||
pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
|
||||
pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
|
||||
}
|
||||
|
||||
// List of enabled/disabled transformations
|
||||
@ -499,46 +499,47 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
|
||||
pass_config->disable<ov::pass::EyeDecomposition>();
|
||||
|
||||
pass_config->disable<ngraph::pass::ConvertGELU>();
|
||||
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
|
||||
pass_config->disable<ngraph::pass::Gelu7Downgrade>();
|
||||
pass_config->disable<ngraph::pass::HSwishDecomposition>();
|
||||
pass_config->disable<ngraph::pass::ReduceL1Decomposition>();
|
||||
pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
|
||||
pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
|
||||
pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
|
||||
pass_config->disable<ngraph::pass::ConvertMod>();
|
||||
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
|
||||
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
|
||||
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
||||
pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
|
||||
pass_config->disable<ngraph::pass::ConvertGather8ToGather7>();
|
||||
pass_config->disable<ngraph::pass::ConvertMinimum>();
|
||||
pass_config->disable<ngraph::pass::ConvertBroadcastToTiles>();
|
||||
pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>();
|
||||
pass_config->disable<ngraph::pass::ConvertReduceMaxToPooling>();
|
||||
pass_config->disable<ngraph::pass::ConvertReduceSumToPooling>();
|
||||
pass_config->disable<ngraph::pass::SliceToStridedSlice>();
|
||||
pass_config->disable<ngraph::pass::ConvertDetectionOutput8ToDetectionOutput1>();
|
||||
pass_config->disable<ngraph::pass::ConvertROIAlign9To3>();
|
||||
pass_config->disable<ngraph::pass::SoftSignDecomposition>();
|
||||
pass_config->disable<ov::pass::ConvertGELU>();
|
||||
pass_config->disable<ov::pass::ConvertShuffleChannels3>();
|
||||
pass_config->disable<ov::pass::Gelu7Downgrade>();
|
||||
pass_config->disable<ov::pass::HSwishDecomposition>();
|
||||
pass_config->disable<ov::pass::ReduceL1Decomposition>();
|
||||
pass_config->disable<ov::pass::ReduceL2Decomposition>();
|
||||
pass_config->disable<ov::pass::SoftPlusDecomposition>();
|
||||
pass_config->disable<ov::pass::HSigmoidDecomposition>();
|
||||
pass_config->disable<ov::pass::ConvertMod>();
|
||||
pass_config->disable<ov::pass::ConvertShuffleChannels3>();
|
||||
pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
|
||||
pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
||||
pass_config->disable<ov::pass::ConvertGather7ToGather1>();
|
||||
pass_config->disable<ov::pass::ConvertGather8ToGather7>();
|
||||
pass_config->disable<ov::pass::ConvertMinimum>();
|
||||
pass_config->disable<ov::pass::ConvertBroadcastToTiles>();
|
||||
pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
|
||||
pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
|
||||
pass_config->disable<ov::pass::ConvertReduceSumToPooling>();
|
||||
pass_config->disable<ov::pass::SliceToStridedSlice>();
|
||||
pass_config->disable<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
|
||||
pass_config->disable<ov::pass::ConvertROIAlign9To3>();
|
||||
pass_config->disable<ov::pass::SoftSignDecomposition>();
|
||||
pass_config->disable<ov::pass::UniqueDecomposition>();
|
||||
|
||||
pass_config->enable<ngraph::pass::NormalizeL2Decomposition>();
|
||||
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
|
||||
pass_config->enable<ngraph::pass::ConvertGather1ToGather7>();
|
||||
pass_config->enable<ngraph::pass::ConvertDetectionOutput1ToDetectionOutput8>();
|
||||
pass_config->enable<ngraph::pass::ConvertROIAlign3To9>();
|
||||
pass_config->enable<ov::pass::NormalizeL2Decomposition>();
|
||||
pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
|
||||
pass_config->enable<ov::pass::ConvertGather1ToGather7>();
|
||||
pass_config->enable<ov::pass::ConvertDetectionOutput1ToDetectionOutput8>();
|
||||
pass_config->enable<ov::pass::ConvertROIAlign3To9>();
|
||||
|
||||
if (useLpt) {
|
||||
CPU_LPT_SCOPE(LowPrecisionTransformations_Part3);
|
||||
pass_config->set_callback<ngraph::pass::AddFakeQuantizeFusion,
|
||||
ngraph::pass::MulFakeQuantizeFusion,
|
||||
ngraph::pass::FakeQuantizeMulFusion>([](const_node_ptr &node) -> bool {
|
||||
pass_config->set_callback<ov::pass::AddFakeQuantizeFusion,
|
||||
ov::pass::MulFakeQuantizeFusion,
|
||||
ov::pass::FakeQuantizeMulFusion>([](const_node_ptr &node) -> bool {
|
||||
std::string errMsg;
|
||||
return !node::FakeQuantize::isSupportedOperation(node, errMsg);
|
||||
});
|
||||
|
||||
pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
|
||||
pass_config->set_callback<ov::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
|
||||
return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions);
|
||||
});
|
||||
}
|
||||
@ -597,7 +598,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
supportedPrecisions = std::vector<PrecisionsRestriction>({});
|
||||
}
|
||||
|
||||
ngraph::pass::Manager lptManager;
|
||||
ov::pass::Manager lptManager;
|
||||
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
|
||||
supportedPrecisions,
|
||||
quantizationRestrictions,
|
||||
@ -619,10 +620,10 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
lptManager.run_passes(nGraphFunc);
|
||||
}
|
||||
|
||||
ngraph::pass::Manager postLPTPassManager;
|
||||
postLPTPassManager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
ov::pass::Manager postLPTPassManager;
|
||||
postLPTPassManager.register_pass<ov::pass::UnrollTensorIterator>();
|
||||
postLPTPassManager.register_pass<ReshapePRelu>();
|
||||
postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
|
||||
postLPTPassManager.get_pass_config()->set_callback<ov::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
|
||||
// UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
|
||||
return node->get_rt_info().count("UNROLL_TI") == 0;
|
||||
});
|
||||
@ -634,7 +635,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
return false;
|
||||
});
|
||||
|
||||
postLPTPassManager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
postLPTPassManager.register_pass<ov::pass::ConstantFolding>();
|
||||
|
||||
// Snippets may brake MHA patterns so the fusion has to performed before
|
||||
postLPTPassManager.register_pass<MHAFusion>();
|
||||
@ -663,7 +664,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
postLPTPassManager.run_passes(nGraphFunc);
|
||||
|
||||
if (_enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) {
|
||||
ngraph::pass::Manager snippetsManager;
|
||||
ov::pass::Manager snippetsManager;
|
||||
snippetsManager.register_pass<SnippetsMarkSkipped>();
|
||||
snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
|
||||
snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
|
||||
@ -697,13 +698,13 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
snippetsManager.run_passes(nGraphFunc);
|
||||
}
|
||||
|
||||
ngraph::pass::Manager postSnippetsManager;
|
||||
postSnippetsManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
|
||||
postSnippetsManager.get_pass_config()->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
|
||||
ov::pass::Manager postSnippetsManager;
|
||||
postSnippetsManager.register_pass<ov::pass::FakeQuantizeDecomposition>();
|
||||
postSnippetsManager.get_pass_config()->set_callback<ov::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
|
||||
std::string errMsg;
|
||||
return node::FakeQuantize::isSupportedOperation(node, errMsg);
|
||||
});
|
||||
postSnippetsManager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
postSnippetsManager.register_pass<ov::pass::ConstantFolding>();
|
||||
postSnippetsManager.run_passes(nGraphFunc);
|
||||
}
|
||||
|
||||
|
@ -186,12 +186,15 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation.
|
||||
R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*(i32|i8).*)",
|
||||
// 94989. BF16 Reference produces different results.
|
||||
R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*gridPrc=bf16.*)",
|
||||
// GridSample regression on bf16 data.
|
||||
R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*bf16.*)",
|
||||
// // Issue: 95915
|
||||
R"(smoke_dynamic/AUGRUCellCPUTest.CompareWithRefs/IS=\(\[\?\.1\]_\[\?\.1\]_\[\?\.1\]_\)_TS=\{\(1\.1\)_\(1\.1\)_\(1\.1\)\}_\{\(3\.1\)_\(3\.1\)_\(3\.1\)\}_\{\(5\.1\)_\(5\.1\)_\(5\.1\)\}_decompose=0_activations=\(sigmoid\.tanh\)_clip=0_linear=0_netPrec=f32__inFmts=nc\.nc_outFmts=nc_primitive=ref_any_PluginConf_ENFORCE_BF16=YES)", // NOLINT
|
||||
R"(smoke_dynamic/GRUCellCPUTest.CompareWithRefs/IS=\(\[\?.1\]_\[\?\.1\]_\)_TS=\{\(1\.1\)_\(1\.1\)\}_\{\(3\.1\)_\(3\.1\)\}_\{\(5\.1\)_\(5\.1\)\}_decompose=0_activations=\(sigmoid\.tanh\)_clip=0_linear=0_netPrec=f32__inFmts=nc\.nc_outFmts=nc_primitive=ref_any_PluginConf_ENFORCE_BF16=YES)", // NOLINT
|
||||
R"(nightly_dynamic_bf16/RNNSequenceCPUTest.*activations=\(relu\).*)",
|
||||
R"(smoke_dynamic_BatchSizeOne/RNNSequenceCPUTest.*IS=\(\[1\.\?\.10\]_\[1\.1\.10\]_\[\?\]_\)_TS=\{\(1\.2\.10\)_\(1\.1\.10\)_\(1\)\}_\{\(1\.4\.10\)_\(1\.1\.10\)_\(1\)\}_\{\(1\.8\.10\)_\(1\.1\.10\)_\(1\)\}_seqMode=PURE_SEQ_activations=\(relu\)_clip=0_direction=forward_netPrec=f32__inFmts=ncw\.ntc_outFmts=ncw\.ncw_primitive=ref_any)", // NOLINT
|
||||
// 98151. Not valid sorting for slices in reference.
|
||||
R"(.*UniqueLayerTestCPU.*axis.*True.*)"
|
||||
};
|
||||
|
||||
#define FIX_62820 0
|
||||
|
@ -0,0 +1,261 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include <common_test_utils/ov_tensor_utils.hpp>
|
||||
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
std::vector<InputShape>, // Input shapes
|
||||
std::tuple<bool, int>, // Is flattened and axis
|
||||
bool, // Sorted
|
||||
ElementType, // Data precision
|
||||
CPUSpecificParams, // CPU specific params
|
||||
std::map<std::string, std::string> // Additional config
|
||||
> UniqueLayerTestCPUParams;
|
||||
|
||||
class UniqueLayerTestCPU : public testing::WithParamInterface<UniqueLayerTestCPUParams>,
|
||||
virtual public SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<UniqueLayerTestCPUParams> obj) {
|
||||
std::vector<InputShape> inputShapes;
|
||||
std::tuple<bool, int> flatOrAxis;
|
||||
bool sorted;
|
||||
ElementType dataPrecision;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
|
||||
std::tie(inputShapes, flatOrAxis, sorted, dataPrecision, cpuParams, additionalConfig) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=(";
|
||||
for (size_t i = 0lu; i < inputShapes.size(); i++) {
|
||||
result << CommonTestUtils::partialShape2str({inputShapes[i].first}) << (i < inputShapes.size() - 1lu ? "_" : "");
|
||||
}
|
||||
result << ")_TS=";
|
||||
for (size_t i = 0lu; i < inputShapes.front().second.size(); i++) {
|
||||
result << "{";
|
||||
for (size_t j = 0lu; j < inputShapes.size(); j++) {
|
||||
result << CommonTestUtils::vec2str(inputShapes[j].second[i]) << (j < inputShapes.size() - 1lu ? "_" : "");
|
||||
}
|
||||
result << "}_";
|
||||
}
|
||||
|
||||
if (!std::get<0>(flatOrAxis)) {
|
||||
result << "axis=" << std::get<1>(flatOrAxis) << "_";
|
||||
} else {
|
||||
result << "flattened" << "_";
|
||||
}
|
||||
result << "sorted=" << (sorted ? "True" : "False") << "_";
|
||||
result << "dataPrc=" << dataPrecision;
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
|
||||
if (!additionalConfig.empty()) {
|
||||
result << "_PluginConf";
|
||||
for (auto &item : additionalConfig) {
|
||||
if (item.second == InferenceEngine::PluginConfigParams::YES)
|
||||
result << "_" << item.first << "=" << item.second;
|
||||
}
|
||||
}
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
std::vector<InputShape> inputShapes;
|
||||
std::tuple<bool, int> flatOrAxis;
|
||||
bool sorted, flattened;
|
||||
int axis;
|
||||
ElementType dataPrecision;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
|
||||
std::tie(inputShapes, flatOrAxis, sorted, dataPrecision, cpuParams, additionalConfig) = this->GetParam();
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
init_input_shapes(inputShapes);
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
flattened = std::get<0>(flatOrAxis);
|
||||
|
||||
if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) {
|
||||
selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16);
|
||||
} else {
|
||||
if (dataPrecision == ElementType::bf16) {
|
||||
dataPrecision = ElementType::f32;
|
||||
}
|
||||
selectedType = makeSelectedTypeStr(selectedType, dataPrecision);
|
||||
}
|
||||
|
||||
auto params = ngraph::builder::makeDynamicParams(dataPrecision, inputDynamicShapes);
|
||||
params[0]->set_friendly_name("data");
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
|
||||
std::shared_ptr<ov::Node> uniqueNode;
|
||||
if (flattened) {
|
||||
uniqueNode = std::make_shared<ov::op::v10::Unique>(paramOuts[0], sorted);
|
||||
} else {
|
||||
axis = std::get<1>(flatOrAxis);
|
||||
uniqueNode = std::make_shared<ov::op::v10::Unique>(paramOuts[0],
|
||||
ov::op::v0::Constant::create(ov::element::i64, ov::Shape({1}), {axis}),
|
||||
sorted);
|
||||
}
|
||||
|
||||
function = makeNgraphFunction(dataPrecision, params, uniqueNode, "UniqueCPU");
|
||||
}
|
||||
|
||||
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
|
||||
inputs.clear();
|
||||
const auto& funcInputs = function->inputs();
|
||||
|
||||
for (int i = 0; i < funcInputs.size(); ++i) {
|
||||
const auto& funcInput = funcInputs[i];
|
||||
ov::runtime::Tensor tensor;
|
||||
|
||||
if (funcInput.get_node()->get_friendly_name() == "data") {
|
||||
int32_t range = std::accumulate(targetInputStaticShapes[0].begin(), targetInputStaticShapes[0].end(), 1, std::multiplies<size_t>());
|
||||
tensor = utils::create_and_fill_tensor(
|
||||
funcInput.get_element_type(), targetInputStaticShapes[0], range, -range / 2, 1);
|
||||
}
|
||||
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(UniqueLayerTestCPU, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
CheckPluginRelatedResults(compiledModel, "Unique");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<ElementType> dataPrecisionSmoke = {
|
||||
ElementType::f32,
|
||||
ElementType::i32
|
||||
};
|
||||
const std::vector<ElementType> dataPrecisionNightly = {
|
||||
ElementType::bf16,
|
||||
ElementType::i8
|
||||
};
|
||||
|
||||
std::vector<std::tuple<bool, int>> flatOrAxis { {true, 0}, {false, 0}, {false, 1}, {false, -1} };
|
||||
|
||||
std::vector<bool> sorted { true, false};
|
||||
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||
= {{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO}},
|
||||
{{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES}}};
|
||||
|
||||
std::vector<CPUSpecificParams> getCPUInfo() {
|
||||
std::vector<CPUSpecificParams> resCPUParams;
|
||||
resCPUParams.push_back(CPUSpecificParams{{}, {}, {"ref"}, "ref"});
|
||||
return resCPUParams;
|
||||
}
|
||||
|
||||
std::vector<std::vector<InputShape>> getStaticShapes() {
|
||||
std::vector<std::vector<InputShape>> result = {
|
||||
{ { {}, { {1, 1, 1} } } }, // Static shapes
|
||||
{ { {}, { {1, 2, 1} } } }, // Static shapes
|
||||
{ { {}, { {1, 1, 3} } } }, // Static shapes
|
||||
{ { {}, { {2, 2, 1} } } }, // Static shapes
|
||||
{ { {}, { {1, 4, 1} } } }, // Static shapes
|
||||
{ { {}, { {1, 5, 1} } } }, // Static shapes
|
||||
{ { {}, { {3, 2, 1} } } }, // Static shapes
|
||||
{ { {}, { {1, 1, 7} } } }, // Static shapes
|
||||
{ { {}, { {2, 2, 2} } } }, // Static shapes
|
||||
{ { {}, { {1, 8, 1} } } }, // Static shapes
|
||||
{ { {}, { {3, 3, 1, 1} } } }, // Static shapes
|
||||
{ { {}, { {1, 5, 2, 1} } } }, // Static shapes
|
||||
{ { {}, { {1, 1, 11} } } }, // Static shapes
|
||||
{ { {}, { {32, 35, 37} } } }, // Static shapes
|
||||
{ { {}, { {2, 3, 2} } } }, // Static shapes
|
||||
{ { {}, { {1, 1, 13} } } }, // Static shapes
|
||||
{ { {}, { {7, 1, 2} } } }, // Static shapes
|
||||
{ { {}, { {3, 5, 1} } } }, // Static shapes
|
||||
{ { {}, { {4, 2, 2} } } }, // Static shapes
|
||||
{ { {}, { {1, 17, 1} } } }, // Static shapes
|
||||
{ { {}, { {3, 2, 3, 1} } } }, // Static shapes
|
||||
{ { {}, { {8, 16, 32} } } }, // Static shapes
|
||||
{ { {}, { {37, 19, 11} } } }, // Static shapes
|
||||
{ { {}, { {1, 19, 1} } } }, // Static shapes
|
||||
{ { {}, { {2, 5, 2} } } }, // Static shapes
|
||||
{ { {}, { {1, 3, 7} } } }, // Static shapes
|
||||
{ { {}, { {11, 1, 2} } } }, // Static shapes
|
||||
{ { {}, { {1, 1, 23} } } }, // Static shapes
|
||||
{ { {}, { {4, 3, 2} } } }, // Static shapes
|
||||
{ { {}, { {5, 1, 5} } } }, // Static shapes
|
||||
{ { {}, { {100, 1, 1} } } }, // Static shapes
|
||||
{ { {}, { {5, 5, 5} } } } // Static shapes
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static, UniqueLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(getStaticShapes()),
|
||||
::testing::ValuesIn(flatOrAxis),
|
||||
::testing::ValuesIn(sorted),
|
||||
::testing::ValuesIn(dataPrecisionSmoke),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
UniqueLayerTestCPU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_static, UniqueLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(getStaticShapes()),
|
||||
::testing::ValuesIn(flatOrAxis),
|
||||
::testing::ValuesIn(sorted),
|
||||
::testing::ValuesIn(dataPrecisionNightly),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
UniqueLayerTestCPU::getTestCaseName);
|
||||
|
||||
const std::vector<std::vector<InputShape>> dynamicInSapes = {
|
||||
{ { { ov::Dimension(1, 15), -1, -1, -1 }, // Dynamic shape
|
||||
{ {1, 1, 1, 1}, {6, 3, 1, 2}, {4, 5, 3, 1}, {2, 7, 2, 2} } } }, // Target shapes
|
||||
{ { { -1, -1, -1, -1 }, // Dynamic shape
|
||||
{ {1, 2, 1, 5}, {3, 4, 2, 3}, {5, 6, 7, 1}, {7, 8, 2, 4} } } }, // Target shapes
|
||||
{ { { ov::Dimension(2, 15), -1, -1, -1 }, // Dynamic shape
|
||||
{ {8, 3, 3, 3}, {6, 5, 2, 5}, {4, 7, 1, 11}, {2, 9, 3, 4} } } }, // Target shapes
|
||||
{ { { 3, 4, 4, 5 }, // Dynamic shape
|
||||
{ {3, 4, 4, 5}, {3, 4, 4, 5}, {3, 4, 4, 5}, {3, 4, 4, 5} } } }, // Target shapes
|
||||
{ { { -1, -1, -1, -1 }, // Dynamic shape
|
||||
{ {1, 2, 1, 13}, {3, 4, 7, 2}, {5, 6, 3, 5}, {7, 8, 4, 4} } } }, // Target shapes
|
||||
{ { { -1, -1, -1, -1 }, // Dynamic shape
|
||||
{ {2, 11, 1, 17}, {4, 9, 6, 3}, {6, 7, 7, 3}, {8, 3, 2, 11} } } }, // Target shapes
|
||||
{ { { 3, -1, -1, -1 }, // Dynamic shape
|
||||
{ {3, 2, 1, 23}, {3, 4, 3, 8}, {3, 6, 5, 5}, {3, 8, 31, 1} } } }, // Target shapes
|
||||
{ { { -1, 3, -1, -1 }, // Dynamic shape
|
||||
{ {8, 3, 8, 4}, {6, 3, 33, 1}, {4, 3, 8, 6}, {2, 3, 8, 8} } } } // Target shapes
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_dynamic, UniqueLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(dynamicInSapes),
|
||||
::testing::ValuesIn(flatOrAxis),
|
||||
::testing::ValuesIn(sorted),
|
||||
::testing::ValuesIn(dataPrecisionSmoke),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
UniqueLayerTestCPU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_dynamic, UniqueLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(dynamicInSapes),
|
||||
::testing::ValuesIn(flatOrAxis),
|
||||
::testing::ValuesIn(sorted),
|
||||
::testing::ValuesIn(dataPrecisionNightly),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
UniqueLayerTestCPU::getTestCaseName);
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user