[CPU] ONNX Unique tests failing on CPU (#18100)
* [CPU] ONNX Unique tests failing on CPU * Fixes as per comments.
This commit is contained in:
parent
fb676a9e76
commit
886fd0debb
@ -36,8 +36,6 @@ xfail_issue_33488 = xfail_test(reason="RuntimeError: OV does not support the fol
|
||||
"MaxUnpool")
|
||||
skip_issue_38084 = pytest.mark.skip(reason="Aborted (core dumped) Assertion "
|
||||
"`(layer->get_output_partial_shape(i).is_static())' failed.")
|
||||
xfail_issue_33595 = xfail_test(reason="RuntimeError: OV does not support the following ONNX operations: "
|
||||
"Unique")
|
||||
xfail_issue_33596 = xfail_test(reason="RuntimeError: OV does not support different sequence operations: "
|
||||
"ConcatFromSequence, SequenceConstruct, SequenceAt, SplitToSequence, "
|
||||
"SequenceEmpty, SequenceInsert, SequenceErase, SequenceLength ")
|
||||
|
@ -10,7 +10,6 @@ from tests import (
|
||||
skip_rng_tests,
|
||||
xfail_issue_33488,
|
||||
xfail_issue_33581,
|
||||
xfail_issue_33595,
|
||||
xfail_issue_33596,
|
||||
xfail_issue_33606,
|
||||
xfail_issue_33651,
|
||||
@ -195,11 +194,6 @@ tests_expected_to_fail = [
|
||||
"OnnxBackendNodeModelTest.test_castlike_STRING_to_FLOAT_cpu",
|
||||
"OnnxBackendNodeModelTest.test_castlike_STRING_to_FLOAT_expanded_cpu",
|
||||
),
|
||||
(
|
||||
xfail_issue_33595,
|
||||
"OnnxBackendNodeModelTest.test_unique_sorted_with_negative_axis_cpu",
|
||||
"OnnxBackendNodeModelTest.test_unique_sorted_with_axis_3d_cpu",
|
||||
),
|
||||
(
|
||||
xfail_issue_33651,
|
||||
"OnnxBackendNodeModelTest.test_tfidfvectorizer_tf_batch_onlybigrams_skip5_cpu",
|
||||
|
@ -6,6 +6,8 @@
|
||||
|
||||
#include <cstring>
|
||||
#include "ie_api.h"
|
||||
#include <ie_parallel.hpp>
|
||||
#include <onednn/dnnl.h>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
@ -51,5 +53,20 @@ inline int cpu_memcpy_s(void* dst, size_t dst_size, const void* src, size_t coun
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline void cpu_parallel_memcpy(void* dst, const void* src, size_t count) {
|
||||
const size_t l2_cache_size = dnnl::utils::get_cache_size(2, true);
|
||||
if (count >= l2_cache_size) {
|
||||
auto src_int8 = static_cast<const uint8_t *>(src);
|
||||
auto dst_int8 = static_cast<uint8_t *>(dst);
|
||||
parallel_nt(0, [&](const size_t ithr, const size_t nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
splitter(count, nthr, ithr, start, end);
|
||||
cpu_memcpy(dst_int8 + start, src_int8 + start, end - start);
|
||||
});
|
||||
} else {
|
||||
cpu_memcpy(dst, src, count);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
|
@ -2,11 +2,11 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "unique.hpp"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#include "ie_parallel.hpp"
|
||||
#include <openvino/op/unique.hpp>
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include <utils/shape_inference/shape_inference_internal_dyn.hpp>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
@ -21,7 +21,7 @@ bool Unique::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std
|
||||
errorMessage = "Not supported Unique operation version. CPU plug-in supports only 10th version.";
|
||||
return false;
|
||||
}
|
||||
if (op->get_input_size() > AXIS && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))) {
|
||||
if (op->get_input_size() > AXIS && !ov::is_type<op::v0::Constant>(op->get_input_node_ptr(AXIS))) {
|
||||
errorMessage = "CPU plug-in supports only constant Axis input.";
|
||||
return false;
|
||||
}
|
||||
@ -46,15 +46,15 @@ Unique::Unique(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr con
|
||||
definedOutputs[i] = !op->get_output_target_inputs(i).empty();
|
||||
}
|
||||
|
||||
sorted = ov::as_type_ptr<ov::op::v10::Unique>(op)->get_sorted();
|
||||
sorted = ov::as_type_ptr<op::v10::Unique>(op)->get_sorted();
|
||||
if (op->get_input_size() > AXIS) {
|
||||
flattened = false;
|
||||
axis = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
|
||||
axis = ov::as_type<op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
|
||||
if (axis < 0) {
|
||||
axis += op->get_input_partial_shape(IN_DATA).rank().get_length();
|
||||
}
|
||||
if (axis < 0 || axis >= op->get_input_partial_shape(IN_DATA).rank().get_length()) {
|
||||
THROW_ERROR << "has invalid axis value: " << ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
|
||||
THROW_ERROR << "has invalid axis value: " << ov::as_type<op::v0::Constant>(op->get_input_node_ptr(AXIS))->cast_vector<int>()[0];
|
||||
}
|
||||
} else {
|
||||
flattened = true;
|
||||
@ -181,7 +181,7 @@ void Unique::flattenTensorExec() {
|
||||
uniqueLen = inputLen;
|
||||
|
||||
if (sorted) {
|
||||
std::memcpy(uniDataTmpPtr, srcDataPtr, inputLen * sizeof(T));
|
||||
cpu_parallel_memcpy(uniDataTmpPtr, srcDataPtr, inputLen * sizeof(T));
|
||||
std::sort(uniDataTmpPtr, uniDataTmpPtr + inputLen);
|
||||
auto last = std::unique(uniDataTmpPtr, uniDataTmpPtr + inputLen);
|
||||
uniqueLen = last - uniDataTmpPtr;
|
||||
@ -264,27 +264,25 @@ void Unique::flattenTensorExec() {
|
||||
redefineOutputMemory({ {uniqueLen}, {uniqueLen}, {inputLen}, {uniqueLen}});
|
||||
|
||||
T* uniDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T));
|
||||
cpu_parallel_memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T));
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
int *firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
|
||||
cpu_parallel_memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
auto inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int));
|
||||
cpu_parallel_memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
auto occurPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
|
||||
cpu_parallel_memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Unique::slicedTensorExec() {
|
||||
const T* srcDataPtr = reinterpret_cast<const T*>(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr());
|
||||
const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T);
|
||||
std::vector<T> uniDataTmp(inputLen);
|
||||
auto uniDataTmpPtr = uniDataTmp.data();
|
||||
auto inDataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr();
|
||||
auto srcDataPtr = reinterpret_cast<const T*>(inDataMemPtr->GetPtr());
|
||||
int *firstTmpPtr = nullptr, *inToOutTmpPtr = nullptr, *occurTmpPtr = nullptr;
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr = firstUniTmp.data();
|
||||
@ -296,19 +294,19 @@ void Unique::slicedTensorExec() {
|
||||
occurTmpPtr = occurTmp.data();
|
||||
}
|
||||
|
||||
const auto& srcDataShape = getParentEdgeAt(IN_DATA)->getMemoryPtr()->getStaticDims();
|
||||
const auto& srcDataShape = inDataMemPtr->getStaticDims();
|
||||
|
||||
const auto cmpBlNum = srcDataShape[axis]; // Blocks to compare.
|
||||
int64_t partsInBl = 1; // Parts in block
|
||||
const auto axisDim = srcDataShape[axis];
|
||||
int64_t outerLen = 1lu;
|
||||
if (axis > 0) {
|
||||
partsInBl = std::accumulate(srcDataShape.begin(), srcDataShape.begin() + axis, 1, std::multiplies<Dim>());
|
||||
outerLen = std::accumulate(srcDataShape.begin(), srcDataShape.begin() + axis, 1, std::multiplies<Dim>());
|
||||
}
|
||||
int64_t elPerPart = 1; // Elements number in part.
|
||||
int64_t innerLen = 1;
|
||||
if (static_cast<size_t>(axis) < srcDataShape.size() - 1) {
|
||||
elPerPart = std::accumulate(srcDataShape.begin() + axis + 1, srcDataShape.end(), 1, std::multiplies<Dim>());
|
||||
innerLen = std::accumulate(srcDataShape.begin() + axis + 1, srcDataShape.end(), 1, std::multiplies<Dim>());
|
||||
}
|
||||
const auto partLenB = elPerPart * dataPrecision.size();
|
||||
const auto partStep = elPerPart * cmpBlNum;
|
||||
const auto innerSizeB = innerLen * sizeof(T);
|
||||
const auto srcOuterStep = innerLen * axisDim;
|
||||
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr[0] = 0;
|
||||
@ -318,28 +316,29 @@ void Unique::slicedTensorExec() {
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurTmpPtr[0] = 1;
|
||||
std::fill(occurTmpPtr, occurTmpPtr + cmpBlNum, 1);
|
||||
std::fill(occurTmpPtr, occurTmpPtr + axisDim, 1);
|
||||
}
|
||||
|
||||
uniqueLen = 1;
|
||||
std::vector<int64_t> uniqIdx(cmpBlNum, 0);
|
||||
for (size_t b1 = 1; b1 < cmpBlNum; b1++) {
|
||||
auto first1 = srcDataPtr + b1 * elPerPart;
|
||||
auto last1 = srcDataPtr + (b1 + 1) * elPerPart;
|
||||
uniqueLen = 1lu;
|
||||
std::vector<size_t> uniqIdx(axisDim, 0lu);
|
||||
// Search for unique slices.
|
||||
for (size_t a = 1lu; a < axisDim; a++) {
|
||||
auto first1 = srcDataPtr + a * innerLen;
|
||||
auto last1 = srcDataPtr + (a + 1lu) * innerLen;
|
||||
bool equal = true;
|
||||
size_t b2 = 0;
|
||||
size_t uIdx = 0lu;
|
||||
// Compare with unique blocks.
|
||||
for (; b2 < uniqueLen; b2++) {
|
||||
auto first2 = srcDataPtr + uniqIdx[b2] * elPerPart;
|
||||
for (; uIdx < uniqueLen; uIdx++) {
|
||||
auto first2 = srcDataPtr + uniqIdx[uIdx] * innerLen;
|
||||
equal = true;
|
||||
for (int p = 0; p < partsInBl; p++) {
|
||||
for (int64_t o = 0lu; o < outerLen; o++) {
|
||||
equal = std::equal(first1, last1, first2);
|
||||
if (!equal) {
|
||||
break;
|
||||
}
|
||||
first1 += partStep;
|
||||
last1 += partStep;
|
||||
first2 += partStep;
|
||||
first1 += srcOuterStep;
|
||||
last1 += srcOuterStep;
|
||||
first2 += srcOuterStep;
|
||||
}
|
||||
if (equal) {
|
||||
break;
|
||||
@ -347,149 +346,141 @@ void Unique::slicedTensorExec() {
|
||||
}
|
||||
if (!equal) {
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstTmpPtr[uniqueLen] = b1;
|
||||
firstTmpPtr[uniqueLen] = a;
|
||||
}
|
||||
|
||||
uniqIdx[uniqueLen++] = b1;
|
||||
uniqIdx[uniqueLen++] = a;
|
||||
} else {
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurTmpPtr[b2]++;
|
||||
occurTmpPtr[uIdx]++;
|
||||
}
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
inToOutTmpPtr[b1] = b2;
|
||||
inToOutTmpPtr[a] = uIdx;
|
||||
}
|
||||
}
|
||||
|
||||
const auto dstPrtStep = elPerPart * uniqueLen;
|
||||
for (size_t b1 = 0; b1 < uniqueLen; b1++) {
|
||||
auto first1 = srcDataPtr + uniqIdx[b1] * elPerPart;
|
||||
auto first2 = uniDataTmpPtr + b1 * elPerPart;
|
||||
for (int p = 0; p < partsInBl; p++) {
|
||||
memcpy(first2, first1, partLenB);
|
||||
first1 += partStep;
|
||||
first2 += dstPrtStep;
|
||||
}
|
||||
// Redefinition of output shapes.
|
||||
auto dstDataShape = srcDataShape;
|
||||
dstDataShape[axis] = uniqueLen;
|
||||
redefineOutputMemory({ dstDataShape, {uniqueLen}, {axisDim}, {uniqueLen}});
|
||||
|
||||
int *firstPtr = nullptr, *inToOutPtr = nullptr, *occurNPtr = nullptr;
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurNPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr());
|
||||
}
|
||||
|
||||
T* dstDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr());
|
||||
const auto dstOuterStep = innerLen * uniqueLen;
|
||||
// Filling of the first output if needed.
|
||||
if (sorted || definedOutputs[UNIQUE_DATA]) {
|
||||
parallel_for(uniqueLen, [&](size_t u) {
|
||||
auto first1 = srcDataPtr + uniqIdx[u] * innerLen;
|
||||
auto first2 = dstDataPtr + u * innerLen;
|
||||
for (int64_t p = 0lu; p < outerLen; p++) {
|
||||
cpu_memcpy(first2, first1, innerSizeB);
|
||||
first1 += srcOuterStep;
|
||||
first2 += dstOuterStep;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const auto uniqueLenIB = uniqueLen * sizeof(T);
|
||||
|
||||
if (sorted) {
|
||||
const auto elInBl = elPerPart * partsInBl;
|
||||
const auto dstUniDataLen = dstOuterStep * outerLen;
|
||||
std::vector<T> vDstBuff(dstUniDataLen);
|
||||
auto dstBuff = vDstBuff.data();
|
||||
|
||||
struct OrdEl {
|
||||
T val;
|
||||
int64_t idx;
|
||||
};
|
||||
|
||||
std::vector<OrdEl> colToSort(uniqueLen);
|
||||
std::vector<int64_t> moveTo(uniqueLen);
|
||||
for (size_t k = 0; k < uniqueLen; k++) {
|
||||
moveTo[k] = k;
|
||||
}
|
||||
std::vector<T> buff1(elPerPart);
|
||||
std::vector<T> buff2(elPerPart);
|
||||
for (int64_t p = partsInBl - 1; p >= 0; p--) {
|
||||
for (int64_t e = elPerPart - 1; e >= 0 ; e--) {
|
||||
int64_t pos1 = p * dstPrtStep + e;
|
||||
for (int64_t i = 0; i < static_cast<int64_t>(uniqueLen); i++) {
|
||||
int64_t pos2 = i * elInBl + pos1;
|
||||
colToSort[i] = {uniDataTmpPtr[pos2], i};
|
||||
T *dst1 = dstDataPtr, *dst2 = dstBuff;
|
||||
int *first1 = firstPtr, *first2 = firstTmpPtr;
|
||||
int *occurN1 = occurNPtr, *occurN2 = occurTmpPtr;
|
||||
int *inToOut1 = inToOutPtr, *inToOut2 = inToOutTmpPtr;
|
||||
|
||||
const bool defined3outputs = definedOutputs[FIRST_UNIQUE_IDX] || definedOutputs[OCCURRENCES_NUM] || definedOutputs[INPUT_TO_UNIQ_IDX];
|
||||
|
||||
for (int64_t o = outerLen - 1; o >= 0; o--) { // Backward loop through the outer block.
|
||||
const int64_t pos1Lim = o * dstOuterStep;
|
||||
int64_t pos1 = pos1Lim + innerLen - 1;
|
||||
for (; pos1 >= pos1Lim ; pos1--) { // Backward loop through the inner block.
|
||||
int64_t pos2 = pos1;
|
||||
for (int64_t k = 0; k < static_cast<int64_t>(uniqueLen); k++, pos2 += innerLen) {
|
||||
colToSort[k] = { dst1[pos2], k };
|
||||
}
|
||||
std::stable_sort(colToSort.begin(), colToSort.end(), [](const OrdEl &el1, const OrdEl &el2) { return el1.val < el2.val; });
|
||||
for (size_t k = 0; k < uniqueLen; k++) {
|
||||
moveTo[colToSort[k].idx] = k;
|
||||
}
|
||||
|
||||
// perm
|
||||
for (int64_t pb = 0; pb < partsInBl; pb++) {
|
||||
auto currDst = uniDataTmpPtr + pb * dstPrtStep;
|
||||
memcpy(buff1.data(), currDst, partLenB);
|
||||
auto dstIdx = moveTo[0];
|
||||
for (size_t b = 0; b < uniqueLen; b++) {
|
||||
if (dstIdx == moveTo[dstIdx]) {
|
||||
dstIdx = moveTo[dstIdx + 1];
|
||||
continue;
|
||||
// Permutation
|
||||
parallel_for2d(outerLen, uniqueLen, [&](int64_t ot, size_t u) {
|
||||
auto src = dst1 + ot * dstOuterStep + colToSort[u].idx * innerLen;
|
||||
auto dst = dst2 + ot * dstOuterStep + u * innerLen;
|
||||
|
||||
cpu_memcpy(dst, src, innerSizeB);
|
||||
});
|
||||
|
||||
if (defined3outputs) {
|
||||
parallel_for(uniqueLen, [&](size_t u) {
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
first1[u] = first2[colToSort[u].idx];
|
||||
}
|
||||
T* dst = currDst + dstIdx * elPerPart;
|
||||
|
||||
auto& bSrc = b % 2 == 0 ? buff1 : buff2;
|
||||
auto& bDst = b % 2 == 0 ? buff2 : buff1;
|
||||
memcpy(bDst.data(), dst, partLenB);
|
||||
memcpy(dst, bSrc.data(), partLenB);
|
||||
|
||||
dstIdx = moveTo[dstIdx];
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
occurN1[u] = occurN2[colToSort[u].idx];
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
for (size_t ax = 0; ax < axisDim; ax++) {
|
||||
if (inToOut2[ax] == colToSort[u].idx) {
|
||||
inToOut1[ax] = u;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
auto mPos = moveTo[0];
|
||||
int32_t firstSrc = 0, firstDst = 0, ocSrc = 0, ocDst = 0;
|
||||
std::swap(dst1, dst2);
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
firstSrc = firstTmpPtr[0];
|
||||
std::swap(first1, first2);
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
ocSrc = occurTmpPtr[0];
|
||||
std::swap(occurN1, occurN2);
|
||||
}
|
||||
for (size_t k = 0; k < uniqueLen; k++) {
|
||||
if (mPos == moveTo[mPos]) {
|
||||
mPos = moveTo[mPos + 1];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
auto& fSrc = k % 2 == 0 ? firstSrc : firstDst;
|
||||
auto& fDst = k % 2 == 0 ? firstDst : firstSrc;
|
||||
fDst = firstTmpPtr[mPos];
|
||||
firstTmpPtr[mPos] = fSrc;
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
auto& oSrc = k % 2 == 0 ? ocSrc : ocDst;
|
||||
auto& oDst = k % 2 == 0 ? ocDst : ocSrc;
|
||||
oDst = occurTmpPtr[mPos];
|
||||
occurTmpPtr[mPos] = oSrc;
|
||||
}
|
||||
|
||||
mPos = moveTo[mPos];
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
std::swap(inToOut1, inToOut2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (definedOutputs[UNIQUE_DATA] && dst1 != dstDataPtr) {
|
||||
cpu_parallel_memcpy(dstDataPtr, dst1, dstUniDataLen * sizeof(T));
|
||||
}
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX] && first2 != firstPtr) {
|
||||
cpu_parallel_memcpy(firstPtr, first2, uniqueLenIB);
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX] && inToOut2 != inToOutPtr) {
|
||||
cpu_parallel_memcpy(inToOutPtr, inToOut2, axisDim * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM] && occurN2 != occurNPtr) {
|
||||
cpu_parallel_memcpy(occurNPtr, occurN2, uniqueLenIB);
|
||||
}
|
||||
} else {
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
cpu_parallel_memcpy(firstPtr, firstUniTmp.data(), uniqueLenIB);
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
for (size_t b1 = 0; b1 < cmpBlNum; b1++) {
|
||||
auto first1 = srcDataPtr + b1 * elPerPart;
|
||||
auto last1 = srcDataPtr + (b1 + 1) * elPerPart;
|
||||
bool equal = true;
|
||||
for (size_t b2 = 0; b2 < uniqueLen; b2++) {
|
||||
auto first2 = uniDataTmpPtr + b2 * elPerPart;
|
||||
equal = true;
|
||||
for (int p = 0; p < partsInBl; p++) {
|
||||
equal = std::equal(first1, last1, first2);
|
||||
if (!equal) {
|
||||
break;
|
||||
}
|
||||
first2 += dstPrtStep;
|
||||
}
|
||||
if (equal) {
|
||||
inToOutTmpPtr[b1] = b2;
|
||||
}
|
||||
}
|
||||
}
|
||||
cpu_parallel_memcpy(inToOutPtr, inToOutTmp.data(), axisDim * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
cpu_parallel_memcpy(occurNPtr, occurTmp.data(), uniqueLenIB);
|
||||
}
|
||||
}
|
||||
|
||||
auto dstDataShape = srcDataShape;
|
||||
dstDataShape[axis] = uniqueLen;
|
||||
redefineOutputMemory({ dstDataShape, {uniqueLen}, {cmpBlNum}, {uniqueLen}});
|
||||
|
||||
T* uniDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(uniDataPtr, uniDataTmpPtr, getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetSize());
|
||||
if (definedOutputs[FIRST_UNIQUE_IDX]) {
|
||||
int *firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
|
||||
auto inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(inToOutPtr, inToOutTmp.data(), cmpBlNum * sizeof(int));
|
||||
}
|
||||
if (definedOutputs[OCCURRENCES_NUM]) {
|
||||
auto occurPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr());
|
||||
memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
@ -6,10 +6,6 @@
|
||||
|
||||
#include <node.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
namespace node {
|
||||
@ -50,8 +46,8 @@ private:
|
||||
int axis = 0;
|
||||
bool definedOutputs[4] = { false, false, false, false };
|
||||
InferenceEngine::Precision dataPrecision;
|
||||
int64_t dataTypeSize = 1;
|
||||
size_t uniqueLen = 1;
|
||||
int64_t dataTypeSize = 1l;
|
||||
size_t uniqueLen = 1lu;
|
||||
|
||||
static constexpr size_t IN_DATA = 0;
|
||||
static constexpr size_t AXIS = 1;
|
||||
|
@ -167,8 +167,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// The kernel does not have such garbage. The diff 0.000000745 is taken into account in calculations and affects further type conversion.
|
||||
// Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation.
|
||||
R"(.*GridSampleLayerTestCPU.*(BILINEAR|BICUBIC).*(i32|i8).*)",
|
||||
// 98151. Not valid sorting for slices in reference.
|
||||
R"(.*UniqueLayerTestCPU.*axis.*True.*)",
|
||||
// AUTO does not support import / export
|
||||
R"(.*smoke_Auto_BehaviorTests/OVCompiledGraphImportExportTest.*(mportExport|readFromV10IR).*/targetDevice=(AUTO).*)",
|
||||
// AdaptiveAvgPool is converted into Reduce op for suitable parameters. CPU Reduce impl doesn't support non planar layout for 3D case
|
||||
|
@ -160,6 +160,26 @@ std::vector<CPUSpecificParams> getCPUInfo() {
|
||||
return resCPUParams;
|
||||
}
|
||||
|
||||
std::vector<std::vector<InputShape>> statShapes1D = {
|
||||
{{{}, {{1}}}}, // Static shapes
|
||||
{{{}, {{5}}}}, // Static shapes
|
||||
{{{}, {{8}}}}, // Static shapes
|
||||
{{{}, {{16}}}}, // Static shapes
|
||||
{{{}, {{32}}}}, // Static shapes
|
||||
{{{}, {{64}}}}, // Static shapes
|
||||
{{{}, {{99}}}}, // Static shapes
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static_1D, UniqueLayerTestCPU,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(statShapes1D),
|
||||
::testing::ValuesIn(std::vector<std::tuple<bool, int>>{{true, 0}, {false, 0}}),
|
||||
::testing::ValuesIn(sorted),
|
||||
::testing::ValuesIn(dataPrecisionSmoke),
|
||||
::testing::ValuesIn(getCPUInfo()),
|
||||
::testing::Values(additionalConfig[0])),
|
||||
UniqueLayerTestCPU::getTestCaseName);
|
||||
|
||||
std::vector<std::vector<InputShape>> getStaticShapes() {
|
||||
std::vector<std::vector<InputShape>> result = {
|
||||
{ { {}, { {1, 1, 1} } } }, // Static shapes
|
||||
|
Loading…
Reference in New Issue
Block a user