Luwei/extend reorder test (#10003)
* Extend the reorder unit test. * Update CMake * fix somme issues. * Update * Update * Update * Update * Update and fix caused by input portconfig only support NCSP. * Update Copyright * Add more tests * Apply review comments. * Update * Update * Fix building error. * Applied review comments. * Update * Update * Update * Fix CI * Update * Update Cmake
This commit is contained in:
parent
8e9eaaee91
commit
c73f6576e0
@ -14,6 +14,9 @@ addIeTargetTest(
|
|||||||
INCLUDES
|
INCLUDES
|
||||||
PUBLIC
|
PUBLIC
|
||||||
$<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/src
|
$<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/src
|
||||||
|
$<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/src/nodes
|
||||||
|
$<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/thirdparty/onednn
|
||||||
|
$<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/thirdparty/onednn/src
|
||||||
$<TARGET_PROPERTY:openvino::conditional_compilation,INTERFACE_INCLUDE_DIRECTORIES>
|
$<TARGET_PROPERTY:openvino::conditional_compilation,INTERFACE_INCLUDE_DIRECTORIES>
|
||||||
PRIVATE
|
PRIVATE
|
||||||
$<TARGET_PROPERTY:inference_engine_snippets,SOURCE_DIR>/include
|
$<TARGET_PROPERTY:inference_engine_snippets,SOURCE_DIR>/include
|
||||||
|
@ -1,266 +1,422 @@
|
|||||||
// Copyright (C) 2018-2022 Intel Corporation
|
// Copyright (C) 2018-2022 Intel Corporation
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
#include <common/blocked_desc_creator.h>
|
||||||
|
#include <cpu_types.h>
|
||||||
|
#include <edge.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <ie_common.h>
|
#include <ie_common.h>
|
||||||
|
#include <memory_desc/cpu_memory_desc_utils.h>
|
||||||
#include <nodes/reorder.h>
|
#include <memory_desc/dnnl_memory_desc.h>
|
||||||
#include "nodes/input.h"
|
|
||||||
#include <edge.h>
|
|
||||||
#include <node.h>
|
#include <node.h>
|
||||||
|
#include <nodes/reorder.h>
|
||||||
|
|
||||||
|
#include <common/memory_desc_wrapper.hpp>
|
||||||
|
#include <dnnl.hpp>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "../../../ie_test_utils/common_test_utils/common_utils.hpp"
|
||||||
#include "cache/multi_cache.h"
|
#include "cache/multi_cache.h"
|
||||||
|
#include "nodes/input.h"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace ov::intel_cpu;
|
||||||
|
namespace ReorderCPUTest {
|
||||||
|
void checkReorder(const ov::intel_cpu::Memory& inputMemory,
|
||||||
|
const ov::intel_cpu::Memory& outputMemory,
|
||||||
|
const InferenceEngine::Precision& prescision) {
|
||||||
|
auto srcData = inputMemory.GetData();
|
||||||
|
auto dstData = outputMemory.GetData();
|
||||||
|
auto mdInput = inputMemory.GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||||
|
auto mdOutput = outputMemory.GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||||
|
|
||||||
|
const dnnl::impl::memory_desc_wrapper mdwInput(mdInput.data);
|
||||||
|
const dnnl::impl::memory_desc_wrapper mdwOutput(mdOutput.data);
|
||||||
|
auto nelems = mdwInput.nelems();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < nelems; ++i) {
|
||||||
|
auto srcOffset = mdwInput.off_l(i, false);
|
||||||
|
auto dstOffset = mdwOutput.off_l(i, false);
|
||||||
|
switch (prescision) {
|
||||||
|
case InferenceEngine::Precision::FP32: {
|
||||||
|
auto s = *(static_cast<float*>(srcData) + srcOffset);
|
||||||
|
auto d = *(static_cast<float*>(dstData) + dstOffset);
|
||||||
|
ASSERT_EQ(s, d) << "mismatch at position " << i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case InferenceEngine::Precision::I8: {
|
||||||
|
auto s = *(static_cast<int8_t*>(srcData) + srcOffset);
|
||||||
|
auto d = *(static_cast<int8_t*>(dstData) + dstOffset);
|
||||||
|
ASSERT_EQ(s, d) << "mismatch at position " << i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
FAIL() << "Unsupported data precision in the test" << prescision.name();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string layoutName(const LayoutType& layout) {
|
||||||
|
if (layout == LayoutType::nspc)
|
||||||
|
return "nspc";
|
||||||
|
if (layout == LayoutType::ncsp)
|
||||||
|
return "ncsp";
|
||||||
|
if (layout == LayoutType::nCsp8c)
|
||||||
|
return "nCsp8c";
|
||||||
|
if (layout == LayoutType::nCsp16c)
|
||||||
|
return "nCsp16c";
|
||||||
|
return "Unsupported layout type";
|
||||||
|
}
|
||||||
|
|
||||||
|
void fillData(const ov::intel_cpu::Memory& inputMemory, const InferenceEngine::Precision& prec) {
|
||||||
|
ov::intel_cpu::DnnlMemoryDescPtr dnnlMdInput = inputMemory.GetDescWithType<DnnlMemoryDesc>();
|
||||||
|
const dnnl::impl::memory_desc_wrapper mdInput{dnnlMdInput->getDnnlDesc().data};
|
||||||
|
auto elemNum = mdInput.nelems();
|
||||||
|
auto inputReorderData = inputMemory.GetData();
|
||||||
|
switch (prec) {
|
||||||
|
case InferenceEngine::Precision::FP32:
|
||||||
|
for (size_t i = 0; i < elemNum; ++i)
|
||||||
|
*(static_cast<float*>(inputReorderData) + mdInput.off_l(i, false)) = static_cast<float>(i);
|
||||||
|
break;
|
||||||
|
case InferenceEngine::Precision::I8:
|
||||||
|
for (size_t i = 0; i < elemNum; ++i)
|
||||||
|
*(static_cast<int8_t*>(inputReorderData) + mdInput.off_l(i, false)) = static_cast<int8_t>(i);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FAIL() << "Unsupported data precision in the test" << prec.name();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
struct ReorderCustomImplTestParamSet {
|
||||||
|
// logical dimension of input
|
||||||
|
std::vector<size_t> srcDims;
|
||||||
|
bool isNspc2Ncsp;
|
||||||
|
uint32_t strideFactor;
|
||||||
|
InferenceEngine::Precision prec;
|
||||||
|
size_t stridedAxis;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ReorderCPUTestParamSet {
|
||||||
|
ngraph::PartialShape inputPartialShape;
|
||||||
|
// logical dimension vector of input
|
||||||
|
std::vector<std::vector<size_t>> inputShapes;
|
||||||
|
LayoutType srcLayout;
|
||||||
|
LayoutType dstLayout;
|
||||||
|
InferenceEngine::Precision prec;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ReorderCPUTestGraph {
|
||||||
|
public:
|
||||||
|
void buildReorderGraph(const ov::intel_cpu::CpuBlockedMemoryDesc& inputDesc,
|
||||||
|
const ov::intel_cpu::CpuBlockedMemoryDesc& outputDesc) {
|
||||||
|
const dnnl::engine cpuEngine = {dnnl::engine::kind::cpu, 0};
|
||||||
|
ov::intel_cpu::WeightsSharing::Ptr weightsCache;
|
||||||
|
|
||||||
|
inputNode = std::make_shared<ov::intel_cpu::node::Input>(inputDesc.clone(),
|
||||||
|
"Reorder_Input",
|
||||||
|
"Parameter",
|
||||||
|
cpuEngine,
|
||||||
|
weightsCache);
|
||||||
|
reorderNode = std::make_shared<ov::intel_cpu::node::Reorder>("Reorder", cpuEngine, weightsCache);
|
||||||
|
outputNode = std::make_shared<ov::intel_cpu::node::Input>(outputDesc.clone(),
|
||||||
|
"Reorder_Output",
|
||||||
|
"Result",
|
||||||
|
cpuEngine,
|
||||||
|
weightsCache);
|
||||||
|
|
||||||
|
parentEdge = std::make_shared<ov::intel_cpu::Edge>(inputNode, reorderNode, 0, 0);
|
||||||
|
childEdge = std::make_shared<ov::intel_cpu::Edge>(reorderNode, outputNode, 0, 0);
|
||||||
|
parentEdge->changeStatus(ov::intel_cpu::Edge::Status::NeedAllocation);
|
||||||
|
childEdge->changeStatus(ov::intel_cpu::Edge::Status::NeedAllocation);
|
||||||
|
reorderNode->addEdge(parentEdge);
|
||||||
|
reorderNode->addEdge(childEdge);
|
||||||
|
|
||||||
|
auto rtParamsCache = std::make_shared<ov::intel_cpu::MultiCache>(100);
|
||||||
|
|
||||||
|
auto parentMemory = std::make_shared<ov::intel_cpu::Memory>(cpuEngine);
|
||||||
|
auto childMemory = std::make_shared<ov::intel_cpu::Memory>(cpuEngine);
|
||||||
|
parentMemory->Create(inputDesc, nullptr);
|
||||||
|
childMemory->Create(outputDesc, nullptr);
|
||||||
|
|
||||||
|
parentEdge->reuse(parentMemory);
|
||||||
|
childEdge->reuse(childMemory);
|
||||||
|
|
||||||
|
reorderNode->setDescs(inputDesc, outputDesc);
|
||||||
|
reorderNode->setRuntimeCache(rtParamsCache);
|
||||||
|
std::array<std::shared_ptr<ov::intel_cpu::Node>, 3> nodes{inputNode, reorderNode, outputNode};
|
||||||
|
for (auto& n : nodes) {
|
||||||
|
n->init();
|
||||||
|
n->getSupportedDescriptors();
|
||||||
|
n->initSupportedPrimitiveDescriptors();
|
||||||
|
n->selectPrimitiveDescriptorByIndex(0);
|
||||||
|
}
|
||||||
|
stream = dnnl::stream{cpuEngine};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
dnnl::stream stream;
|
||||||
|
std::shared_ptr<ov::intel_cpu::node::Input> inputNode;
|
||||||
|
std::shared_ptr<ov::intel_cpu::node::Reorder> reorderNode;
|
||||||
|
std::shared_ptr<ov::intel_cpu::node::Input> outputNode;
|
||||||
|
std::shared_ptr<ov::intel_cpu::Edge> parentEdge;
|
||||||
|
std::shared_ptr<ov::intel_cpu::Edge> childEdge;
|
||||||
|
InferenceEngine::Precision prec;
|
||||||
|
};
|
||||||
|
|
||||||
|
}// namespace ReorderCPUTest
|
||||||
|
|
||||||
|
using namespace ReorderCPUTest;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test Reorder::optimizedNcsp2Nspc() and Reorder::optimizedNspc2Ncsp() for
|
* Test Reorder::optimizedNcsp2Nspc() and Reorder::optimizedNspc2Ncsp() for
|
||||||
* inPlace and non-inPlace cases. Specifically, the test checks that dst batch strides are
|
* inPlace and non-inPlace cases. Specifically, the test checks that dst batch strides are
|
||||||
* correctly taken into account by the custom impls (the case when the reorder is followed by an inplace concat).
|
* correctly taken into account by the custom impls (the case when the reorder is followed by an inplace concat).
|
||||||
*/
|
*/
|
||||||
typedef std::tuple<
|
class ReorderCustomizedStrideTest : public ::testing::Test,
|
||||||
std::vector<size_t>, // srcDims
|
public ::testing::WithParamInterface<ReorderCustomImplTestParamSet>,
|
||||||
bool> // forceInplace;
|
public ::ReorderCPUTest::ReorderCPUTestGraph {
|
||||||
ReorderCustomImplTestParamSet;
|
|
||||||
|
|
||||||
class ReorderCustomImplTestBase: public ::testing::Test {
|
|
||||||
public:
|
public:
|
||||||
static std::string getTestCaseName(const testing::TestParamInfo<ReorderCustomImplTestParamSet> &obj) {
|
static std::string getTestCaseName(const testing::TestParamInfo<ReorderCustomImplTestParamSet>& obj) {
|
||||||
std::vector<size_t> srcDims;
|
ReorderCustomImplTestParamSet p = obj.param;
|
||||||
bool inPlace;
|
|
||||||
std::tie(srcDims, inPlace) = obj.param;
|
|
||||||
std::ostringstream result;
|
std::ostringstream result;
|
||||||
result << "IS=(";
|
result << "IS:(";
|
||||||
for (const auto s : srcDims)
|
result << CommonTestUtils::vec2str(p.srcDims);
|
||||||
result << s << ".";
|
result << (p.isNspc2Ncsp ? "_NSPC2NCSP" : "_NCSP2NSPC");
|
||||||
result.seekp(-1, result.cur);
|
result << "_InputDataType:" << p.prec.name();
|
||||||
|
result << "_OutputDataType:" << p.prec.name();
|
||||||
|
result << "_StrideFactor:" << p.strideFactor;
|
||||||
|
result << "_StridedLogicChannelIndice:" << p.stridedAxis;
|
||||||
result << ")";
|
result << ")";
|
||||||
result << "_InPlace=" << inPlace;
|
|
||||||
return result.str();
|
return result.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Run() {
|
||||||
|
buildCustomizedReorderGraph();
|
||||||
|
infer();
|
||||||
|
validate();
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void executeReorderNode(const void* srcData, void* dstData) {
|
void SetUp() override {
|
||||||
auto getBlockedDims = [](const std::vector<size_t>& dims, const std::vector<size_t>& order){
|
ReorderCustomImplTestParamSet p = ::testing::TestWithParam<ReorderCustomImplTestParamSet>::GetParam();
|
||||||
|
srcDims = p.srcDims;
|
||||||
|
|
||||||
|
if (p.isNspc2Ncsp) {
|
||||||
|
// The custom NSPC2NCSP impl is used only if an input shape complies with:
|
||||||
|
ASSERT_TRUE(srcDims[1] <= 64 && srcDims[1] >= 16 && (getNumElems(srcDims) / srcDims[1]) >= 128);
|
||||||
|
// The custom NSPC2NCSP impl is used only for FP32
|
||||||
|
prec = InferenceEngine::Precision::FP32;
|
||||||
|
srcOrder = std::vector<size_t>{0, 2, 3, 1};
|
||||||
|
dstOrder = std::vector<size_t>{0, 1, 2, 3};
|
||||||
|
} else {
|
||||||
|
ASSERT_LE(getNumElems(srcDims), 256);
|
||||||
|
srcOrder = std::vector<size_t>{0, 1, 2, 3};
|
||||||
|
dstOrder = std::vector<size_t>{0, 2, 3, 1};
|
||||||
|
// The custom NSPC2NCSP impl is used only for U8
|
||||||
|
prec = InferenceEngine::Precision::I8;
|
||||||
|
}
|
||||||
|
dstDims = srcDims;
|
||||||
|
// Create strided dst layout for the inPlace case,
|
||||||
|
// For example: If need channel axis stride changes, need to set the height axis dimension.
|
||||||
|
dstDims[p.stridedAxis + 1] *= p.strideFactor;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buildCustomizedReorderGraph() {
|
||||||
|
auto getBlockedDims = [](const std::vector<size_t>& dims, const std::vector<size_t>& order) {
|
||||||
std::vector<size_t> result;
|
std::vector<size_t> result;
|
||||||
result.reserve(order.size());
|
result.reserve(order.size());
|
||||||
for (auto i : order)
|
for (auto i : order)
|
||||||
result.push_back(dims[i]);
|
result.push_back(dims[i]);
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
auto getStrides = [](const std::vector<size_t>& dims){
|
auto getStrides = [](const std::vector<size_t>& dims) {
|
||||||
std::vector<size_t> result(dims.size());
|
std::vector<size_t> result(dims.size());
|
||||||
result[dims.size() - 1] = 1;
|
result[dims.size() - 1] = 1;
|
||||||
for (int i = dims.size() - 2; i >= 0; --i) {
|
for (int i = dims.size() - 2; i >= 0; --i) {
|
||||||
result[i] = result[i+1] * dims[i+1];
|
result[i] = result[i + 1] * dims[i + 1];
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
const dnnl::engine cpuEngine(dnnl::engine::kind::cpu, 0);
|
|
||||||
ov::intel_cpu::WeightsSharing::Ptr weightsCache;
|
|
||||||
|
|
||||||
auto inputNode = std::make_shared<ov::intel_cpu::node::Input>(ov::intel_cpu::Shape(srcDims),
|
|
||||||
prec,
|
|
||||||
"Reorder_Input", "Input",
|
|
||||||
cpuEngine, weightsCache);
|
|
||||||
auto reorderNode = std::make_shared<ov::intel_cpu::node::Reorder>("Reorder", cpuEngine, weightsCache);
|
|
||||||
auto outputNode = std::make_shared<ov::intel_cpu::node::Input>(ov::intel_cpu::Shape(dstDims),
|
|
||||||
prec,
|
|
||||||
"Reorder_Output", "Output",
|
|
||||||
cpuEngine, weightsCache);
|
|
||||||
|
|
||||||
auto parentEdge = std::make_shared<ov::intel_cpu::Edge>(inputNode, reorderNode, 0, 0);
|
|
||||||
auto childEdge = std::make_shared<ov::intel_cpu::Edge>(reorderNode, outputNode, 0, 0);
|
|
||||||
parentEdge->changeStatus(ov::intel_cpu::Edge::Status::NeedAllocation);
|
|
||||||
childEdge->changeStatus(ov::intel_cpu::Edge::Status::NeedAllocation);
|
|
||||||
reorderNode->addEdge(parentEdge);
|
|
||||||
reorderNode->addEdge(childEdge);
|
|
||||||
auto rtParamsCache = std::make_shared<ov::intel_cpu::MultiCache>(100);
|
|
||||||
|
|
||||||
const std::vector<size_t> srcBlockedDims = getBlockedDims(srcDims, srcOrder);
|
const std::vector<size_t> srcBlockedDims = getBlockedDims(srcDims, srcOrder);
|
||||||
const std::vector<size_t> srcStrides = getStrides(srcBlockedDims);
|
const std::vector<size_t> srcStrides = getStrides(srcBlockedDims);
|
||||||
const std::vector<size_t> offsetPaddingToData(srcDims.size(), 0);
|
const std::vector<size_t> offsetPaddingToData(srcDims.size(), 0);
|
||||||
|
|
||||||
const std::vector<size_t> dstBlockedDims = getBlockedDims(dstDims, dstOrder);
|
const std::vector<size_t> dstBlockedDims = getBlockedDims(dstDims, dstOrder);
|
||||||
const std::vector<size_t> dstStrides = getStrides(dstBlockedDims);
|
const std::vector<size_t> dstStrides = getStrides(dstBlockedDims);
|
||||||
|
|
||||||
const ov::intel_cpu::CpuBlockedMemoryDesc inputDesc(prec, ov::intel_cpu::Shape(srcDims),
|
const ov::intel_cpu::CpuBlockedMemoryDesc inputDesc(prec,
|
||||||
srcBlockedDims, srcOrder,
|
ov::intel_cpu::Shape(srcDims),
|
||||||
0, offsetPaddingToData, srcStrides);
|
srcBlockedDims,
|
||||||
|
srcOrder,
|
||||||
|
0,
|
||||||
|
offsetPaddingToData,
|
||||||
|
srcStrides);
|
||||||
|
|
||||||
const ov::intel_cpu::CpuBlockedMemoryDesc outputDesc(prec, ov::intel_cpu::Shape(srcDims),
|
const ov::intel_cpu::CpuBlockedMemoryDesc outputDesc(prec,
|
||||||
getBlockedDims(srcDims, dstOrder), dstOrder,
|
ov::intel_cpu::Shape(srcDims),
|
||||||
0, offsetPaddingToData, dstStrides);
|
getBlockedDims(srcDims, dstOrder),
|
||||||
|
dstOrder,
|
||||||
|
0,
|
||||||
|
offsetPaddingToData,
|
||||||
|
dstStrides);
|
||||||
|
buildReorderGraph(inputDesc, outputDesc);
|
||||||
|
}
|
||||||
|
|
||||||
auto parentMemory = std::make_shared<ov::intel_cpu::Memory>(cpuEngine);
|
void infer() {
|
||||||
auto childMemory = std::make_shared<ov::intel_cpu::Memory>(cpuEngine);
|
generateInput();
|
||||||
parentMemory->Create(inputDesc, srcData);
|
|
||||||
childMemory->Create(outputDesc, dstData);
|
|
||||||
parentEdge->reuse(parentMemory);
|
|
||||||
childEdge->reuse(childMemory);
|
|
||||||
|
|
||||||
reorderNode->setDescs(inputDesc, outputDesc);
|
|
||||||
reorderNode->setRuntimeCache(rtParamsCache);
|
|
||||||
std::vector<std::shared_ptr<ov::intel_cpu::Node>> nodes {inputNode, reorderNode, outputNode};
|
|
||||||
for (auto &n : nodes) {
|
|
||||||
n->init();
|
|
||||||
n->getSupportedDescriptors();
|
|
||||||
n->initSupportedPrimitiveDescriptors();
|
|
||||||
n->selectPrimitiveDescriptorByIndex(0);
|
|
||||||
}
|
|
||||||
auto config = outputNode->getSelectedPrimitiveDescriptor()->getConfig();
|
|
||||||
config.inConfs.resize(1);
|
|
||||||
config.inConfs[0].inPlace(forceInplace ? 0 : -1);
|
|
||||||
outputNode->getSelectedPrimitiveDescriptor()->setConfig(config);
|
|
||||||
reorderNode->createPrimitive();
|
reorderNode->createPrimitive();
|
||||||
|
reorderNode->execute(stream);
|
||||||
dnnl::stream strm(cpuEngine);
|
|
||||||
reorderNode->execute(strm);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
void validate(void) {
|
||||||
void Run(const std::vector<T>& srcData, std::vector<T>& dstData) {
|
checkReorder(parentEdge->getMemory(), childEdge->getMemory(), prec);
|
||||||
fillData();
|
|
||||||
executeReorderNode(srcData.data(), dstData.data());
|
|
||||||
EXPECT_TRUE(resultIsCorrect(dstData));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill srcData so that the results of NSPC2NCSP and NCSP2NSPC reorders are incremental numbers 0,1,2,...
|
// Fill srcData so that the results of NSPC2NCSP and NCSP2NSPC reorders are incremental numbers 0,1,2,...
|
||||||
// Fill dstData with zeros
|
// Fill dstData with zeros
|
||||||
virtual void fillData() = 0;
|
void generateInput() {
|
||||||
template<typename T>
|
fillData(parentEdge->getMemory(), prec);
|
||||||
bool resultIsCorrect(const std::vector<T>& dstData) {
|
memset(childEdge->getMemory().GetData(), 0, childEdge->getMemory().GetSize());
|
||||||
const size_t numElems = getNumElems(dstDims);
|
|
||||||
auto b = dstData.begin();
|
|
||||||
std::vector<T> expectedData(blockSize);
|
|
||||||
for (int i = 0; i < numElems / blockSize; i++, b += blockSize) {
|
|
||||||
if (i % 2 == 0) {
|
|
||||||
std::iota(expectedData.begin(), expectedData.end(), i / 2 * blockSize);
|
|
||||||
if (!std::equal(b, b + blockSize, expectedData.begin()))
|
|
||||||
return false;
|
|
||||||
} else if (!std::all_of(b, b + blockSize, [](T x){return x == 0;})) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getNumElems(const std::vector<size_t>& dims) {
|
size_t getNumElems(const std::vector<size_t>& dims) {
|
||||||
size_t result = 1;
|
size_t result = 1;
|
||||||
for (auto d : dims)
|
for (auto d : dims)
|
||||||
result *= d;
|
result *= d;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
std::vector<size_t> srcDims;
|
std::vector<size_t> srcDims;
|
||||||
std::vector<size_t> srcOrder;
|
std::vector<size_t> srcOrder;
|
||||||
std::vector<size_t> dstDims;
|
std::vector<size_t> dstDims;
|
||||||
std::vector<size_t> dstOrder;
|
std::vector<size_t> dstOrder;
|
||||||
InferenceEngine::Precision prec;
|
|
||||||
bool forceInplace;
|
|
||||||
size_t blockSize;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class ReorderNSPC2NCSPTest: public testing::WithParamInterface<ReorderCustomImplTestParamSet>,
|
TEST_P(ReorderCustomizedStrideTest, OutputIsStrided) {
|
||||||
public ReorderCustomImplTestBase{
|
|
||||||
protected:
|
|
||||||
void SetUp() override {
|
|
||||||
std::tie(srcDims, forceInplace) = this->GetParam();
|
|
||||||
// The custom NSPC2NCSP impl is used only if an input shape complies with:
|
|
||||||
assert(srcDims[1] <= 64 && srcDims[1] >= 16 && (getNumElems(srcDims) / srcDims[1]) >= 128);
|
|
||||||
// The custom NSPC2NCSP impl is used only for FP32
|
|
||||||
prec = InferenceEngine::Precision::FP32;
|
|
||||||
srcOrder = std::vector<size_t> {0, 2, 3, 1};
|
|
||||||
dstOrder = std::vector<size_t> {0, 1, 2, 3};
|
|
||||||
dstDims = srcDims;
|
|
||||||
blockSize = getNumElems(srcDims);
|
|
||||||
// Create channel-strided dst layout for the inPlace case
|
|
||||||
// Other dstDims could also be supported, but fillData() and resultIsCorrect() should be updated accordingly.
|
|
||||||
if (forceInplace) {
|
|
||||||
dstDims[1] *= 2;
|
|
||||||
blockSize /= srcDims[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void Run() {
|
|
||||||
ReorderCustomImplTestBase::Run(srcData, dstData);
|
|
||||||
}
|
|
||||||
void fillData() override {
|
|
||||||
dstData.resize(getNumElems(dstDims));
|
|
||||||
std::fill(dstData.begin(), dstData.end(), 0);
|
|
||||||
srcData.resize(getNumElems(srcDims));
|
|
||||||
const int numChannels = srcDims[1];
|
|
||||||
const int spBlockSize = srcDims[2] * srcDims[3];
|
|
||||||
const int batchSize = spBlockSize * numChannels;
|
|
||||||
int i = 0;
|
|
||||||
for (int n = 0; n < getNumElems(srcDims); n += batchSize) {
|
|
||||||
for (int sp = n; sp < n + spBlockSize; sp++) {
|
|
||||||
for (int c = sp; c < sp + batchSize; c += spBlockSize) {
|
|
||||||
srcData[i++] = static_cast<float>(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::vector<float> dstData;
|
|
||||||
std::vector<float> srcData;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ReorderNCSP2NSPCTest: public testing::WithParamInterface<ReorderCustomImplTestParamSet>,
|
|
||||||
public ReorderCustomImplTestBase{
|
|
||||||
protected:
|
|
||||||
void SetUp() override {
|
|
||||||
std::tie(srcDims, forceInplace) = this->GetParam();
|
|
||||||
// Avoid uint8_t overflow or modify fillNCSP2NSPC() and resultIsCorrect()
|
|
||||||
assert(getNumElems(srcDims) <= 256);
|
|
||||||
srcOrder = std::vector<size_t> {0, 1, 2, 3};
|
|
||||||
dstOrder = std::vector<size_t> {0, 2, 3, 1};
|
|
||||||
// The custom NSPC2NCSP impl is used only for U8
|
|
||||||
prec = InferenceEngine::Precision::U8;
|
|
||||||
dstDims = srcDims;
|
|
||||||
blockSize = getNumElems(srcDims);
|
|
||||||
// Create channel-strided dst layout for the inPlace case
|
|
||||||
// Other dstDims could also be supported, but fillData() and resultIsCorrect() should be updated accordingly.
|
|
||||||
if (forceInplace) {
|
|
||||||
dstDims[1] *= 2;
|
|
||||||
blockSize = srcDims[1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void Run() {
|
|
||||||
ReorderCustomImplTestBase::Run(srcData, dstData);
|
|
||||||
}
|
|
||||||
void fillData() override {
|
|
||||||
dstData.resize(getNumElems(dstDims));
|
|
||||||
std::fill(dstData.begin(), dstData.end(), 0);
|
|
||||||
srcData.resize(getNumElems(srcDims));
|
|
||||||
const int numChannels = srcDims[1];
|
|
||||||
const int batchSize = srcDims[2] * srcDims[3] * numChannels;
|
|
||||||
int i = 0;
|
|
||||||
for (int n = 0; n < getNumElems(srcDims); n += batchSize) {
|
|
||||||
for (int c = n; c < n + numChannels; c ++) {
|
|
||||||
for (int sp = c; sp < c + batchSize; sp += numChannels) {
|
|
||||||
srcData[i++] = static_cast<uint8_t>(sp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::vector<uint8_t> dstData;
|
|
||||||
std::vector<uint8_t> srcData;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_P(ReorderNSPC2NCSPTest, NSPC2NCSP) {
|
|
||||||
Run();
|
Run();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(ReorderNCSP2NSPCTest, NCSP2NSPC) {
|
const auto stridedParameter =
|
||||||
|
::testing::Values(ReorderCustomImplTestParamSet{{2, 16, 8, 8}, true, 2, InferenceEngine::Precision::FP32, 0},
|
||||||
|
ReorderCustomImplTestParamSet{{2, 16, 8, 8}, true, 4, InferenceEngine::Precision::FP32, 1},
|
||||||
|
ReorderCustomImplTestParamSet{{2, 16, 8, 8}, true, 3, InferenceEngine::Precision::FP32, 1},
|
||||||
|
ReorderCustomImplTestParamSet{{2, 16, 8, 8}, true, 1, InferenceEngine::Precision::FP32, 2},
|
||||||
|
ReorderCustomImplTestParamSet{{2, 8, 4, 4}, false, 2, InferenceEngine::Precision::I8, 0},
|
||||||
|
ReorderCustomImplTestParamSet{{2, 8, 4, 4}, false, 5, InferenceEngine::Precision::I8, 1},
|
||||||
|
ReorderCustomImplTestParamSet{{2, 8, 4, 4}, false, 1, InferenceEngine::Precision::I8, 2});
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_ReorderTestCustomStrideWithFactor,
|
||||||
|
ReorderCustomizedStrideTest,
|
||||||
|
stridedParameter,
|
||||||
|
ReorderCustomizedStrideTest::getTestCaseName);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReorderCPUTest to test the CPU plugin-in dynamism and RT cache
|
||||||
|
*/
|
||||||
|
class ReorderDynamismCPUTest : public ::testing::Test,
|
||||||
|
public ::testing::WithParamInterface<ReorderCPUTestParamSet>,
|
||||||
|
public ::ReorderCPUTest::ReorderCPUTestGraph {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<ReorderCPUTestParamSet>& obj) {
|
||||||
|
ReorderCPUTestParamSet p = obj.param;
|
||||||
|
std::ostringstream result;
|
||||||
|
result << "IS:(";
|
||||||
|
result << "InputPartialShape:" << CommonTestUtils::partialShape2str({p.inputPartialShape});
|
||||||
|
for (const auto inputShape : p.inputShapes) {
|
||||||
|
result << CommonTestUtils::vec2str(inputShape);
|
||||||
|
}
|
||||||
|
result << "_InputLayoutType:" << layoutName(p.srcLayout) << ".";
|
||||||
|
result << "_OutputLayoutType:" << layoutName(p.dstLayout) << ".";
|
||||||
|
result << "_InputDataType:" << p.prec.name();
|
||||||
|
result << "_OutputDataType:" << p.prec.name();
|
||||||
|
result << ")";
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Run() {
|
||||||
|
for (auto inputshape : inputShapes) {
|
||||||
|
generate_inputs(inputshape);
|
||||||
|
infer();
|
||||||
|
validate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void generate_inputs(const std::vector<size_t>& inputShape) {
|
||||||
|
parentEdge->getParent()->redefineOutputMemory({inputShape});
|
||||||
|
fillData(parentEdge->getMemory(), prec);
|
||||||
|
}
|
||||||
|
void infer() {
|
||||||
|
reorderNode->executeDynamic(stream);
|
||||||
|
}
|
||||||
|
void validate(void) {
|
||||||
|
checkReorder(parentEdge->getMemory(), childEdge->getMemory(), prec);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BuildReorderParams {
|
||||||
|
ov::intel_cpu::Shape srcShape;
|
||||||
|
ov::intel_cpu::Shape dstShape;
|
||||||
|
LayoutType srcLayout;
|
||||||
|
LayoutType dstLayout;
|
||||||
|
};
|
||||||
|
|
||||||
|
void SetUp() override {
|
||||||
|
ReorderCPUTestParamSet reorderTestParam = this->GetParam();
|
||||||
|
BuildReorderParams reorderParams;
|
||||||
|
reorderParams.srcLayout = reorderTestParam.srcLayout;
|
||||||
|
reorderParams.dstLayout = reorderTestParam.dstLayout;
|
||||||
|
reorderParams.srcShape = ov::intel_cpu::Shape(reorderTestParam.inputPartialShape);
|
||||||
|
reorderParams.dstShape = reorderParams.srcShape;
|
||||||
|
inputShapes = reorderTestParam.inputShapes;
|
||||||
|
prec = reorderTestParam.prec;
|
||||||
|
|
||||||
|
buildReorderDynamismGraph(reorderParams);
|
||||||
|
}
|
||||||
|
|
||||||
|
void buildReorderDynamismGraph(const BuildReorderParams& reorderParams) {
|
||||||
|
BlockedDescCreator::CreatorsMap blockCreatorMap = BlockedDescCreator::getCommonCreators();
|
||||||
|
auto srcBlockedDescCreator = blockCreatorMap[reorderParams.srcLayout];
|
||||||
|
auto dstBlockedDescCreator = blockCreatorMap[reorderParams.dstLayout];
|
||||||
|
|
||||||
|
const ov::intel_cpu::CpuBlockedMemoryDesc inputDesc =
|
||||||
|
srcBlockedDescCreator->createDesc(prec, reorderParams.srcShape);
|
||||||
|
|
||||||
|
const ov::intel_cpu::CpuBlockedMemoryDesc outputDesc =
|
||||||
|
dstBlockedDescCreator->createDesc(prec, reorderParams.dstShape);
|
||||||
|
|
||||||
|
buildReorderGraph(inputDesc, outputDesc);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<std::vector<size_t>> inputShapes;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(ReorderDynamismCPUTest, CompareResult) {
|
||||||
Run();
|
Run();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<bool> forceInplace {false, true};
|
const auto reorderCpuTestDynamismParams =
|
||||||
const auto NSPC2NCSPparams =::testing::Combine(
|
::testing::Values(ReorderCPUTestParamSet{{2, 16, 8, -1},
|
||||||
::testing::Values(std::vector<size_t> {2, 16, 8, 8}),
|
{{2, 16, 8, 8}, {2, 16, 8, 16}, {2, 16, 8, 8}},
|
||||||
::testing::ValuesIn(forceInplace));
|
LayoutType::nspc,
|
||||||
|
LayoutType::ncsp,
|
||||||
|
InferenceEngine::Precision::FP32},
|
||||||
|
ReorderCPUTestParamSet{{-1, -1, -1, -1},
|
||||||
|
{{2, 8, 4, 4}, {2, 8, 8, 4}, {2, 8, 4, 4}},
|
||||||
|
LayoutType::ncsp,
|
||||||
|
LayoutType::nspc,
|
||||||
|
InferenceEngine::Precision::FP32},
|
||||||
|
ReorderCPUTestParamSet{{2, 32, -1, 4},
|
||||||
|
{{2, 32, 3, 4}, {2, 32, 6, 4}, {2, 32, 3, 4}},
|
||||||
|
LayoutType::ncsp,
|
||||||
|
LayoutType::nCsp8c,
|
||||||
|
InferenceEngine::Precision::FP32},
|
||||||
|
ReorderCPUTestParamSet{{-1, 32, -1, -1},
|
||||||
|
{{2, 32, 3, 4}, {2, 32, 6, 4}, {2, 32, 3, 4}},
|
||||||
|
LayoutType::nCsp16c,
|
||||||
|
LayoutType::nspc,
|
||||||
|
InferenceEngine::Precision::I8});
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(smoke_ReorderTestCustomNSPC, ReorderNSPC2NCSPTest, NSPC2NCSPparams,
|
INSTANTIATE_TEST_SUITE_P(smoke_ReorderTestDynamism,
|
||||||
ReorderCustomImplTestBase::getTestCaseName);
|
ReorderDynamismCPUTest,
|
||||||
|
reorderCpuTestDynamismParams,
|
||||||
const auto NCSP2NSPCparams =::testing::Combine(
|
ReorderDynamismCPUTest::getTestCaseName);
|
||||||
::testing::Values(std::vector<size_t> {2, 8, 4, 4}),
|
|
||||||
::testing::ValuesIn(forceInplace));
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(smoke_ReorderTestCustomNCSP, ReorderNCSP2NSPCTest, NCSP2NSPCparams,
|
|
||||||
ReorderCustomImplTestBase::getTestCaseName);
|
|
||||||
|
Loading…
Reference in New Issue
Block a user