[CPU] Added Gather-7 support (#5552)

This commit is contained in:
Alexandra Sidorova 2021-05-20 09:09:28 +03:00 committed by GitHub
parent 0a629716de
commit dcc288680b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 240 additions and 119 deletions

View File

@ -30,6 +30,8 @@
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
#include <transformations/op_conversions/convert_space_to_depth.hpp>
#include <transformations/op_conversions/convert_gelu.hpp>
#include <transformations/op_conversions/convert_gather_v7_to_gather_v1.hpp>
#include <transformations/op_conversions/convert_gather_v1_to_gather_v7.hpp>
#include <transformations/op_conversions/gelu7_downgrade.hpp>
#include <transformations/op_conversions/hswish_decomposition.hpp>
#include <transformations/op_conversions/hsigmoid_decomposition.hpp>
@ -291,8 +293,10 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
pass_config->enable<ngraph::pass::ConvertGather1ToGather7>();
if (useLpt) {
pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([](const_node_ptr &node) -> bool {

View File

@ -2,14 +2,12 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <cmath>
#include <vector>
#include <string>
#include <mkldnn_types.h>
#include "ie_parallel.hpp"
#include "mkldnn_gather_node.h"
#include <ngraph/opsets/opset1.hpp>
#include <precision_utils.h>
#include "common/cpu_memcpy.h"
using namespace MKLDNNPlugin;
@ -17,9 +15,9 @@ using namespace InferenceEngine;
bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
auto gatherOp = ngraph::as_type_ptr<const ngraph::op::v1::Gather>(op);
auto gatherOp = ngraph::as_type_ptr<const ngraph::op::v7::Gather>(op);
if (!gatherOp) {
errorMessage = "Only opset1 Gather operation is supported";
errorMessage = "Only opset7 Gather operation is supported";
return false;
}
@ -44,90 +42,96 @@ MKLDNNGatherNode::MKLDNNGatherNode(const std::shared_ptr<ngraph::Node>& op, cons
IE_THROW(NotImplemented) << errorMessage;
}
auto gatherOp = ngraph::as_type_ptr<ngraph::op::v1::Gather>(op);
auto gatherOp = ngraph::as_type_ptr<ngraph::op::v7::Gather>(op);
if (gatherOp->get_input_size() != 3 || gatherOp->get_output_size() != 1)
IE_THROW() << errorPrefix_ << "has incorrect number of input/output edges!";
const SizeVector& dictionary_dims = gatherOp->get_input_shape(GATHER_DICTIONARY);
if (dictionary_dims.size() == 0)
const SizeVector& srcDims = gatherOp->get_input_shape(GATHER_DATA);
const SizeVector& idxDims = gatherOp->get_input_shape(GATHER_INDEXES);
if (srcDims.size() == 0)
IE_THROW() << errorPrefix_ << "has incorrect input parameters dimension!";
axis = static_cast<int>(gatherOp->get_axis());
if (axis < 0)
axis += dictionary_dims.size();
// Dictionary must be at least rank axis + 1
if (!(-static_cast<int>(dictionary_dims.size()) <= axis && axis < static_cast<int>(dictionary_dims.size())))
axis += srcDims.size();
if (!(0 <= axis && axis < static_cast<int>(srcDims.size())))
IE_THROW() << errorPrefix_ << "has incorrect input parameters dimensions and axis number!";
// Find number of dictionaries, index range and data length
for (int i = 0; i < axis; i++)
numDictionaries *= dictionary_dims[i];
indexRange = dictionary_dims[axis];
for (size_t i = axis + 1; i < dictionary_dims.size(); i++)
dataLength *= dictionary_dims[i];
batchDims = static_cast<int>(gatherOp->get_batch_dims());
if (batchDims < 0)
batchDims += idxDims.size();
if (!(0 <= batchDims && batchDims <= std::min(static_cast<int>(srcDims.size()), static_cast<int>(idxDims.size()))) ||
batchDims > axis)
IE_THROW() << errorPrefix_ << "has incorrect batch_dims " << batchDims << "!";
if (dataLength == 0)
IE_THROW() << errorPrefix_ << "had incorrect input parameters dimension!";
for (int i = 0; i < batchDims; i++) {
if (srcDims[i] != idxDims[i])
IE_THROW() << errorPrefix_ << "has incorrect first " << batchDims << " data and indices dimensions!";
}
}
void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
Precision inIdxPrecision = getOriginalInputPrecisionAtPort(GATHER_INDEXES);
if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16)
inIdxPrecision = Precision::I32;
Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DICTIONARY);
Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA);
addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision},
{TensorDescCreatorTypes::ncsp, inIdxPrecision},
{TensorDescCreatorTypes::ncsp, Precision::I32},
{TensorDescCreatorTypes::ncsp, Precision::I32}},
{{TensorDescCreatorTypes::ncsp, dataPrecision}},
impl_desc_type::ref_any);
}
template <typename index_t, class Conversion>
void MKLDNNGatherNode::gather() {
size_t src_indexSize = getParentEdgeAt(GATHER_INDEXES)->getBlob()->size();
size_t outputSize = getChildEdgeAt(0)->getBlob()->byteSize();
const auto *src_index = reinterpret_cast<const index_t *>(getParentEdgeAt(GATHER_INDEXES)->getMemoryPtr()->GetPtr());
const auto *src_dataDict = reinterpret_cast<const uint8_t *>(getParentEdgeAt(GATHER_DICTIONARY)->getMemoryPtr()->GetPtr());
auto *dst_data = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
void MKLDNNGatherNode::createPrimitive() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
IE_THROW() << errorPrefix_ << " has not allocated destination memory.";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
IE_THROW() << errorPrefix_ << " has not allocated input memory.";
if (getSelectedPrimitiveDescriptor() == nullptr)
IE_THROW() << errorPrefix_ << " has unidentified preferable primitive descriptor.";
size_t len = dataLength * getParentEdgeAt(GATHER_DICTIONARY)->getDesc().getPrecision().size();
const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getDims().ToSizeVector();
const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getDims().ToSizeVector();
const SizeVector dstDims = getChildEdgeAt(0)->getDims().ToSizeVector();
dataSize = getParentEdgeAt(GATHER_DATA)->getDesc().getPrecision().size();
parallel_for(src_indexSize, [&](size_t i) {
unsigned int idx = Conversion()(src_index[i]);
indexRange = srcDims[axis];
batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies<size_t>());
outerSize = std::accumulate(srcDims.begin() + batchDims, srcDims.begin() + axis, 1, std::multiplies<size_t>());
dataLength = std::accumulate(srcDims.begin() + axis + 1, srcDims.end(), 1, std::multiplies<size_t>());
srcBatchStride = std::accumulate(srcDims.begin() + batchDims, srcDims.end(), 1, std::multiplies<size_t>());
idxBatchStride = std::accumulate(idxDims.begin() + batchDims, idxDims.end(), 1, std::multiplies<size_t>());
dstBatchStride = std::accumulate(dstDims.begin() + batchDims, dstDims.end(), 1, std::multiplies<size_t>());
len = dataLength * dataSize;
// Index clipping
if (idx < indexRange) {
// Copying data to destination from Dictionary
for (size_t j = 0; j < numDictionaries; j++) {
cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)],
outputSize - (len * (i + j * src_indexSize)),
&src_dataDict[len * (idx + j * indexRange)],
len);
}
} else {
for (size_t j = 0; j < numDictionaries; j++) {
memset(&dst_data[len * (i + j * src_indexSize)], 0, len);
}
}
});
if (dataLength == 0)
IE_THROW() << errorPrefix_ << "had incorrect input parameters dimension!";
}
void MKLDNNGatherNode::execute(mkldnn::stream strm) {
switch (getParentEdgeAt(GATHER_INDEXES)->getDesc().getPrecision()) {
case Precision::FP32:
gather<float, f32toUi32>();
break;
case Precision::I32:
gather<int32_t, i32toUi32>();
break;
default:
return IE_THROW() << "Unsupported indices input precision";
}
const int32_t* srcIndexes = reinterpret_cast<const int32_t*>(getParentEdgeAt(GATHER_INDEXES)->getMemoryPtr()->GetPtr());
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr());
uint8_t* dstData = reinterpret_cast<uint8_t*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
parallel_for2d(batchSize, idxBatchStride, [&](const size_t i, const size_t j) {
const unsigned int idx = static_cast<uint32_t>(srcIndexes[i * idxBatchStride + j]);
// while negative indices are not supported, should set zero
if (idx < indexRange) {
for (size_t k = 0; k < outerSize; ++k) {
const size_t srcStride = (i * srcBatchStride + k * dataLength * indexRange) * dataSize;
const size_t dstStride = (i * dstBatchStride + k * dataLength * idxBatchStride) * dataSize;
cpu_memcpy(&dstData[dstStride + j * len], &srcData[srcStride + idx * len], len);
}
} else {
for (size_t k = 0; k < outerSize; ++k) {
memset(&dstData[(i * dstBatchStride + k * dataLength * idxBatchStride) * dataSize + j * len], 0, len);
}
}
});
}
bool MKLDNNGatherNode::created() const {

View File

@ -18,37 +18,31 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override {};
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
private:
struct f32toUi32 {
inline unsigned int operator()(const float value) {
return static_cast<unsigned int>(value);
}
};
struct i32toUi32 {
inline unsigned int operator()(const int32_t value) {
return static_cast<unsigned int>(value);
}
};
int axis = 0;
size_t numDictionaries = 1;
int batchDims = 0;
size_t indexRange = 0;
size_t batchSize = 1;
size_t outerSize = 1;
size_t dataLength = 1;
static const size_t GATHER_DICTIONARY = 0;
size_t srcBatchStride = 1;
size_t idxBatchStride = 1;
size_t dstBatchStride = 1;
size_t dataSize = 1;
size_t len = 1;
static const size_t GATHER_DATA = 0;
static const size_t GATHER_INDEXES = 1;
static const size_t GATHER_AXIS = 2;
std::string errorPrefix_;
template <typename index_t, class Conversion>
void gather();
};
} // namespace MKLDNNPlugin

View File

@ -12,29 +12,30 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::I64,
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
InferenceEngine::Precision::BF16,
InferenceEngine::Precision::I8
};
const std::vector<std::vector<size_t>> inputShapes = {
std::vector<size_t>{10, 20, 30, 40},
};
const std::vector<std::vector<int>> indices = {
std::vector<int>{0, 3, 2, 1},
};
const std::vector<std::vector<size_t>> indicesShapes = {
const std::vector<std::vector<size_t>> inputShapes_1D = {
std::vector<size_t>{4},
std::vector<size_t>{2, 2}
};
const std::vector<int> axes = {0, 1, 2, 3, -1};
const std::vector<std::vector<size_t>> indicesShapes_1D = {
std::vector<size_t>{1},
std::vector<size_t>{3},
};
const std::vector<std::tuple<int, int>> axes_batchdims_1D = {
std::tuple<int, int>{0, 0}
};
const auto params = testing::Combine(
testing::ValuesIn(indices),
testing::ValuesIn(indicesShapes),
testing::ValuesIn(axes),
testing::ValuesIn(inputShapes),
const auto gather7Params_1D = testing::Combine(
testing::ValuesIn(inputShapes_1D),
testing::ValuesIn(indicesShapes_1D),
testing::ValuesIn(axes_batchdims_1D),
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@ -43,11 +44,148 @@ const auto params = testing::Combine(
testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_Gather,
GatherLayerTest,
params,
GatherLayerTest::getTestCaseName
INSTANTIATE_TEST_CASE_P(smoke_Gather7_1D, Gather7LayerTest, gather7Params_1D, Gather7LayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> inputShapes_2D = {
std::vector<size_t>{4, 19},
};
const std::vector<std::vector<size_t>> indicesShapes_2D = {
std::vector<size_t>{4},
std::vector<size_t>{4, 2},
};
const std::vector<std::tuple<int, int>> axes_batchdims_2D = {
std::tuple<int, int>{0, 0},
std::tuple<int, int>{1, 0},
std::tuple<int, int>{1, 1},
std::tuple<int, int>{-1, -1},
};
const auto gather7Params_2D = testing::Combine(
testing::ValuesIn(inputShapes_2D),
testing::ValuesIn(indicesShapes_2D),
testing::ValuesIn(axes_batchdims_2D),
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_CASE_P(smoke_Gather7_2D, Gather7LayerTest, gather7Params_2D, Gather7LayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> inputShapes4D = {
std::vector<size_t>{4, 5, 6, 7},
};
const std::vector<std::vector<size_t>> indicesShapes_BD0 = {
std::vector<size_t>{4},
std::vector<size_t>{2, 2},
std::vector<size_t>{3, 2, 4},
};
const std::vector<std::tuple<int, int>> axes_BD0 = {
std::tuple<int, int>{0, 0},
std::tuple<int, int>{1, 0},
std::tuple<int, int>{2, 0},
std::tuple<int, int>{-1, 0},
};
const auto gather7ParamsSubset_BD0 = testing::Combine(
testing::ValuesIn(inputShapes4D),
testing::ValuesIn(indicesShapes_BD0),
testing::ValuesIn(axes_BD0),
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD0, Gather7LayerTest, gather7ParamsSubset_BD0, Gather7LayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> indicesShapes_BD1 = {
std::vector<size_t>{4, 2},
std::vector<size_t>{4, 5, 3},
std::vector<size_t>{4, 1, 2, 3},
};
const std::vector<std::tuple<int, int>> axes_BD1 = {
std::tuple<int, int>{1, 1},
std::tuple<int, int>{2, 1},
std::tuple<int, int>{-1, 1},
std::tuple<int, int>{-2, 1},
};
const auto gather7ParamsSubset_BD1 = testing::Combine(
testing::ValuesIn(inputShapes4D),
testing::ValuesIn(indicesShapes_BD1),
testing::ValuesIn(axes_BD1),
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD1, Gather7LayerTest, gather7ParamsSubset_BD1, Gather7LayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> indicesShapes_BD2 = {
std::vector<size_t>{4, 5, 4, 3},
std::vector<size_t>{4, 5, 3, 2}
};
const std::vector<std::tuple<int, int>> axes_BD2 = {
std::tuple<int, int>{2, 2},
std::tuple<int, int>{3, -2},
std::tuple<int, int>{-1, 2},
std::tuple<int, int>{-1, -2},
};
const auto gather7ParamsSubset_BD2 = testing::Combine(
testing::ValuesIn(inputShapes4D),
testing::ValuesIn(indicesShapes_BD2),
testing::ValuesIn(axes_BD2),
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_CASE_P(smoke_Gather7_BD2, Gather7LayerTest, gather7ParamsSubset_BD2, Gather7LayerTest::getTestCaseName);
const std::vector<std::vector<size_t>> indicesShapes_NegativeBD = {
std::vector<size_t>{4, 5, 4},
std::vector<size_t>{4, 5, 3}
};
const std::vector<std::tuple<int, int>> axes_NegativeBD = {
std::tuple<int, int>{0, -3},
std::tuple<int, int>{1, -2},
std::tuple<int, int>{2, -2},
std::tuple<int, int>{-2, -2},
std::tuple<int, int>{-1, -1},
std::tuple<int, int>{-2, -1},
};
const auto gather7ParamsSubset_NegativeBD = testing::Combine(
testing::ValuesIn(inputShapes4D),
testing::ValuesIn(indicesShapes_NegativeBD),
testing::ValuesIn(axes_NegativeBD),
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_CASE_P(smoke_Gather7_NegativeBD, Gather7LayerTest, gather7ParamsSubset_NegativeBD, Gather7LayerTest::getTestCaseName);
} // namespace

View File

@ -79,6 +79,8 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*smoke_PSROIPoolingAverageLayoutTest.*BF16.*)",
R"(.*smoke_PSROIPoolingBilinearLayoutTest.*BF16.*)",
R"(.*smoke_ROIAlignLayoutTest.*BF16.*)",
// reference doesn't cover I8, U8 cases. Issue: 55842
R"(.*Gather7LayerTest.*netPRC=I8.*)",
};
// TODO: 54718 Accuracy mismatch
#ifdef _WIN32

View File

@ -51,27 +51,6 @@ private:
};
namespace {
/* Test insertion of the Convert layer if there is no suitable reorder.
Parameter[FP32] Constant[BF16]
\ /
\ /
\ Convert[I32] (Is inserted by the MKLDNNGraph)
\ /
Gather[FP32]
|
|
Output[FP32]
*/
TEST_F(AddConvertToReorderTest, smoke_TestAddConvert_CPU) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
BuildGraph(ngraph::element::bf16);
Run();
CheckNodeOfTypeCount(executableNetwork, "Convert", with_cpu_x86_avx512_core() ? 1 : 0);
CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
}
/* Test insertion of the Reorder layer if there is one.