[IE][VPU]: Enables native gather support (#3502)

* [IE]: Allows plugins to disable Gather -> GatherIE conversion

Gather layer takes axis as 3rd input, not attribute and may
take indices as 0D scalar input

Signed-off-by: Gladilov, Gleb <gleb.gladilov@intel.com>

* [IE][VPU]: Disables Gather -> GatherIE conversion

Gather -> GatherIE conversion may introduce Gather
operation decomposition into Unsqueeze + Gather +
Squeeze in case if indices input is 0D scalar input.

In case of dynamic Gather such decomposition will
break dynamic path. Myriad plugin has to support
Gather operation natively without legacy conversion.

Signed-off-by: Gladilov, Gleb <gleb.gladilov@intel.com>

* [IE][VPU]: Enables native Gather support

Gather layer in contrast with GatherIE takes
axis as 3rd input, not attribute and may take
indices input as 0D scalar input.

0D -> 1D conversion happens automatically at
the beginning of frontend.

Axis as 3rd input is supported for single value
integral scalar only.

Signed-off-by: Gladilov, Gleb <gleb.gladilov@intel.com>

* [IE][VPU][Tests]: Enable new infra single layer Gather tests

* Removes corresponding tests from old infrastructure
* Enables test cases with 0D indices input
* Extracts base test fixture from shared tests fixture.
  Unfortunately, Google Tests supports Combine generator
  for tuples of size up to 10 only. Originally, shared
  tests fixture already has 10 elements in tuple for
  tests parameters. At the same time myriad plugin needs
  to specify configuration option. Since configuration
  option could not be test parameter we are forced to
  use separate class, in order to get rid of code
  duplication base class is used.

Signed-off-by: Gladilov, Gleb <gleb.gladilov@intel.com>

* [IE][VPU]: Updates firmware

Enables native Gather support on device side
This commit is contained in:
Gladilov, Gleb 2020-12-10 13:23:36 +03:00 committed by GitHub
parent cf3213a9c5
commit 8213505e24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 194 additions and 393 deletions

View File

@ -15,14 +15,14 @@ include(dependency_solver)
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
set(VPU_SUPPORTED_FIRMWARES_HASH
"e687ed209ff72b215d3d648b980747faa8287215935bef4a87faa79d1d141df7"
"32a3f529385d9ceec6f6a842dd1927b69c83f9e04f40819c168f8149316402e6")
"abf12ace5e20f77b29743322c7e9f812446936bdcefa0ea640aa914169024e3d"
"8630649b26fc9a38f889225e552b41f1eb5ba1a9a56419c5fd8ed176f0cc2ccf")
#
# Default packages
#
set(FIRMWARE_PACKAGE_VERSION 1534)
set(FIRMWARE_PACKAGE_VERSION 1536)
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
#

View File

@ -485,7 +485,7 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
details::convertPrecision(node->get_output_element_type(0))};
auto res = std::make_shared<InferenceEngine::CNNLayer>(attrs);
res->params["type"] = "not";
return res;
return res;
});
addSpecificCreator({"LSTMCellIE"},
@ -973,7 +973,7 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
REQUIRED_IE_CONVERSION_CREATOR("GroupConvolution", "ConvolutionIE");
REQUIRED_IE_CONVERSION_CREATOR("GroupConvolutionBackpropData", "DeconvolutionIE");
addSpecificCreator({ "Convolution", "Gather", "GatherTree", "GRUCell", "GRUSequence", "HardSigmoid",
addSpecificCreator({ "Convolution", "GatherTree", "GRUCell", "GRUSequence", "HardSigmoid",
"LRN", "LSTMCell", "LSTMSequence", "NonMaxSuppression", "RNNCell", "RNNSequence", "OneHot",
"Pad", "PriorBoxClustered", "PriorBox", "Proposal", "Selu", "Swish", "Tile"},
[](const std::shared_ptr<::ngraph::Node>& node, const std::map<std::string, std::string>& params)

View File

@ -635,12 +635,11 @@ void PadValidator::parseParams(CNNLayer* layer) {
GatherValidator::GatherValidator(const std::string& _type): LayerValidator(_type) {}
void GatherValidator::parseParams(CNNLayer* layer) {
auto casted = dynamic_cast<GatherLayer*>(layer);
if (!casted) {
THROW_IE_EXCEPTION << layer->name << " Layer is not instance of GatherLayer class";
if (auto casted = dynamic_cast<GatherLayer*>(layer)) {
casted->axis = casted->GetParamAsInt("axis", 0);
} else if (layer->insData.size() != 3) {
THROW_IE_EXCEPTION << layer->name << " Gather layer is expected to have 3 inputs";
}
casted->axis = casted->GetParamAsInt("axis", 0);
}
//

View File

@ -38,6 +38,7 @@
#include "vpu/ngraph/transformations/eliminate_shapeof_after_dsr.hpp"
#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
#include <legacy/ie_util_internal.hpp>
#include <legacy/transformations/convert_opset1_to_legacy/convert_gather_to_gather_ie.hpp>
namespace vpu {
@ -187,6 +188,7 @@ ie::ICNNNetwork::Ptr FrontEnd::convertNetwork(ie::ICNNNetwork& network) {
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
manager.get_pass_config()->disable<ngraph::pass::ConvertGatherToGatherIEMatcher>();
manager.set_callback(transformationsPredicate);
manager.run_passes(nGraphFunc);

View File

@ -13,18 +13,23 @@
namespace vpu {
void FrontEnd::parseGather(const Model& model, const ie::CNNLayerPtr& _layer, const DataVector& inputs, const DataVector& outputs) const {
IE_ASSERT(inputs.size() == 2);
IE_ASSERT(outputs.size() == 1);
auto layer = std::dynamic_pointer_cast<ie::GatherLayer>(_layer);
IE_ASSERT(layer != nullptr);
void FrontEnd::parseGather(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const {
VPU_THROW_UNLESS(layer != nullptr, "Encountered nullptr CNN layer");
VPU_THROW_UNLESS(inputs.size() == 3, "Expected {} inputs (data, indices, axis), got {}", 3, inputs.size());
VPU_THROW_UNLESS(outputs.size() == 1, "Expected {} outputs, got {}", 1, outputs.size());
VPU_THROW_UNLESS(inputs[2]->usage() == DataUsage::Const, "Only constant axis is supported, but got {} data object", inputs[2]->usage());
VPU_THROW_UNLESS(inputs[2]->desc().type() == DataType::S32, "Only {} is supported as axis data type, got {}", DataType::S32, inputs[2]->desc().type());
VPU_THROW_UNLESS(inputs[2]->desc().numDims() == 1, "Only single value axis is supported, got {}D data object", inputs[2]->desc().numDims());
VPU_THROW_UNLESS(inputs[2]->desc().totalDimSize() == 1, "Only single value axis is supported, got {} elements", inputs[2]->desc().totalDimSize());
auto input = inputs[0];
IE_ASSERT(layer->axis < input->desc().numDims());
const auto axis = inputs[2]->content()->get<std::int32_t>()[0];
const auto ieNormalizedAxis = axis < 0 ? input->desc().numDims() + axis : axis;
VPU_THROW_UNLESS(ieNormalizedAxis >= 0 && ieNormalizedAxis < input->desc().numDims(),
"Axis value must fit into input tensor, got axis = {}, input rank = {}", axis, input->desc().numDims());
const auto perm = DimsOrder::fromNumDims(input->desc().numDims()).toPermutation();
const auto ieNormalizedAxis = layer->axis < 0 ? input->desc().numDims() + layer->axis : layer->axis;
const auto axisDim = perm[input->desc().numDims() - 1 - ieNormalizedAxis];
_stageBuilder->addGatherStage(model, layer->name, layer, inputs[0], inputs[1], outputs[0], axisDim);

View File

@ -82,7 +82,6 @@ TEST(ConvertFunctionToCNNNetworkTests, OpsShouldBeConvertedToIERepresentation) {
ngraph::NodeVector should_converted_to_ie = {
std::make_shared<ngraph::opset4::Broadcast>(),
std::make_shared<ngraph::opset4::Convolution>(),
std::make_shared<ngraph::opset4::Gather>(),
std::make_shared<ngraph::opset4::GatherTree>(),
std::make_shared<ngraph::opset4::GroupConvolution>(),
std::make_shared<ngraph::opset4::GroupConvolutionBackpropData>(),
@ -391,4 +390,4 @@ TEST(ConvertFunctionToCNNNetworkTests, NonUniqueNamesParametersNegative) {
} catch(InferenceEngine::details::InferenceEngineException & e) {
EXPECT_THAT(e.what(), testing::HasSubstr(std::string("Detected two output operations with the same name:")));
}
}
}

View File

@ -0,0 +1,120 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "single_layer_tests/gather.hpp"
#include <vpu/private_plugin_config.hpp>
using namespace LayerTestsDefinitions;
namespace {
using GatherParams = std::tuple<
std::vector<size_t>, // Indices shape
std::pair<std::vector<size_t>, int>, // Input shapes and axis
InferenceEngine::Precision // Network precision
>;
const std::vector<std::vector<std::size_t>> indicesShapes = {
{},
{5},
{10, 5},
{1, 128, 1},
{15, 4, 20, 5},
};
const std::vector<std::pair<std::vector<std::size_t>, int>> inputShapes = {
{{6, 12, 10, 24}, -4},
{{6, 12, 10, 24}, -3},
{{3052, 768}, -2},
{{6, 12, 10, 24}, -2},
{{10}, -1},
{{3052, 768}, -1},
{{6, 12, 10, 24}, -1},
{{10}, 0},
{{3052, 768}, 0},
{{6, 12, 10, 24}, 0},
{{3052, 768}, 1},
{{6, 12, 10, 24}, 1},
{{6, 12, 10, 24}, 2},
{{6, 12, 10, 24}, 3},
};
const std::vector<InferenceEngine::Precision> networkPrecisions = {
InferenceEngine::Precision::I32,
InferenceEngine::Precision::FP32,
};
class MyriadGatherLayerTest : public testing::WithParamInterface<GatherParams>, public GatherLayerTestBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<GatherParams>& obj) {
std::vector<size_t> indicesShape;
std::pair<std::vector<size_t>, int> inputShapeAndAxis;
InferenceEngine::Precision netPrecision;
std::tie(indicesShape, inputShapeAndAxis, netPrecision) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShapeAndAxis.first) << "_";
result << "axis=" << inputShapeAndAxis.second << "_";
result << "indicesShape=" << CommonTestUtils::vec2str(indicesShape) << "_";
result << "IP=" << netPrecision;
return result.str();
}
protected:
void SetUp() override {
configuration[InferenceEngine::MYRIAD_DETECT_NETWORK_BATCH] = CONFIG_VALUE(NO);
GatherLayerTestBase::SetUp(generateParams(GetParam()));
}
private:
static gatherParamsTuple generateParams(const GatherParams& params) {
const auto& indicesShape = std::get<0>(params);
const auto& inputShape = std::get<1>(params).first;
const auto& axis = std::get<1>(params).second;
const auto& networkPrecision = std::get<2>(params);
const auto& inputPrecision = InferenceEngine::Precision::UNSPECIFIED;
const auto& outputPrecision = InferenceEngine::Precision::UNSPECIFIED;
const auto& inputLayout = InferenceEngine::Layout::ANY;
const auto& outputLayout = InferenceEngine::Layout::ANY;
return std::make_tuple(
generateIndices(indicesShape, inputShape, axis),
indicesShape,
axis,
inputShape,
networkPrecision,
inputPrecision,
outputPrecision,
inputLayout,
outputLayout,
CommonTestUtils::DEVICE_MYRIAD);
}
static std::vector<int> generateIndices(const std::vector<size_t>& indicesShape, const std::vector<size_t>& inputShape, int axis) {
axis = axis < 0 ? axis + static_cast<int>(inputShape.size()) : axis;
std::vector<int> indices(indicesShape.empty() ? 1 : CommonTestUtils::getTotal(indicesShape));
CommonTestUtils::fill_data_random(indices.data(), indices.size(), inputShape[axis]);
return indices;
}
};
TEST_P(MyriadGatherLayerTest, accuracy) {
Run();
}
INSTANTIATE_TEST_CASE_P(
smoke_Gather,
MyriadGatherLayerTest,
testing::Combine(
testing::ValuesIn(indicesShapes),
testing::ValuesIn(inputShapes),
testing::ValuesIn(networkPrecisions)),
MyriadGatherLayerTest::getTestCaseName);
} // namespace

View File

@ -27,8 +27,13 @@ typedef std::tuple<
InferenceEngine::Layout, // Output layout
std::string // Device name
> gatherParamsTuple;
class GatherLayerTest : public testing::WithParamInterface<gatherParamsTuple>,
virtual public LayerTestsUtils::LayerTestsCommon {
class GatherLayerTestBase : virtual public LayerTestsUtils::LayerTestsCommon {
protected:
void SetUp(const gatherParamsTuple& params);
};
class GatherLayerTest : public testing::WithParamInterface<gatherParamsTuple>, public GatherLayerTestBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<gatherParamsTuple> &obj);
@ -36,4 +41,4 @@ protected:
void SetUp() override;
};
} // namespace LayerTestsDefinitions
} // namespace LayerTestsDefinitions

View File

@ -20,6 +20,24 @@
namespace LayerTestsDefinitions {
void GatherLayerTestBase::SetUp(const gatherParamsTuple& params) {
int axis;
std::vector<int> indices;
std::vector<size_t> indicesShape;
std::vector<size_t> inputShape;
InferenceEngine::Precision netPrecision;
std::tie(indices, indicesShape, axis, inputShape, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = params;
ASSERT_EQ(ngraph::shape_size(indicesShape), indices.size()) << "Indices vector size and provided indices shape doesn't fit each other";
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto functionParams = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(functionParams));
auto indicesNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape(indicesShape), indices);
auto axisNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis});
auto gather = std::make_shared<ngraph::opset3::Gather>(paramOuts[0], indicesNode, axisNode);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(gather)};
function = std::make_shared<ngraph::Function>(results, functionParams, "gather");
}
std::string GatherLayerTest::getTestCaseName(const testing::TestParamInfo<gatherParamsTuple> &obj) {
int axis;
std::vector<int> indices;
@ -44,27 +62,12 @@ std::string GatherLayerTest::getTestCaseName(const testing::TestParamInfo<gather
}
void GatherLayerTest::SetUp() {
int axis;
std::vector<int> indices;
std::vector<size_t> indicesShape;
std::vector<size_t> inputShape;
InferenceEngine::Precision netPrecision;
std::tie(indices, indicesShape, axis, inputShape, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = this->GetParam();
ASSERT_EQ(ngraph::shape_size(indicesShape), indices.size())
<< "Indices vector size and provided indices shape doesn't fit each other";
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto indicesNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape(indicesShape), indices);
auto axisNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis});
auto gather = std::make_shared<ngraph::opset3::Gather>(paramOuts[0], indicesNode, axisNode);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(gather)};
function = std::make_shared<ngraph::Function>(results, params, "gather");
GatherLayerTestBase::SetUp(GetParam());
}
TEST_P(GatherLayerTest, CompareWithRefs) {
Run();
};
} // namespace LayerTestsDefinitions
} // namespace LayerTestsDefinitions

View File

@ -120,4 +120,9 @@ inline auto tuple2Vector(Tuple&& tuple) -> decltype(tuple2Vector(std::declval<Tu
return tuple2Vector(std::forward<Tuple>(tuple), makeIndices<Tuple>());
}
template<class T>
inline T getTotal(const std::vector<T>& shape) {
return shape.empty() ? 0 : std::accumulate(shape.cbegin(), shape.cend(), static_cast<T>(1), std::multiplies<T>());
}
} // namespace CommonTestUtils

View File

@ -152,6 +152,20 @@ static void fill_data_roi(float *data, size_t size, const uint32_t range, const
}
}
template<class T>
void inline fill_data_random(T* pointer, std::size_t size, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1, const int seed = 1) {
testing::internal::Random random(seed);
random.Generate(range);
if (start_from < 0 && !std::is_signed<T>::value) {
start_from = 0;
}
for (std::size_t i = 0; i < size; i++) {
pointer[i] = static_cast<T>(start_from + static_cast<int64_t>(random.Generate(range)));
}
}
/** @brief Fill blob with random data.
*
* @param blob Target blob
@ -165,15 +179,8 @@ static void fill_data_roi(float *data, size_t size, const uint32_t range, const
template<InferenceEngine::Precision::ePrecision PRC>
void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1, const int seed = 1) {
using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
testing::internal::Random random(1);
random.Generate(range);
auto *rawBlobDataPtr = blob->buffer().as<dataType *>();
if (start_from < 0 && !std::is_signed<dataType>::value) {
start_from = 0;
}
for (size_t i = 0; i < blob->size(); i++) {
rawBlobDataPtr[i] = static_cast<dataType>(start_from + static_cast<int64_t>(random.Generate(range)));
}
fill_data_random(rawBlobDataPtr, blob->size(), range, start_from, k, seed);
}
template<InferenceEngine::Precision::ePrecision PRC>

View File

@ -1,34 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "myriad_layers_gather_test.hpp"
using namespace testing;
INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGather_smoke,
// Synthetic tests
// input shape, indices shape, axis, precision
Values(GatherTestParams { {36549, 1024}, {16}, 0, "FP16" },
GatherTestParams { {10}, {10}, 0, "FP16" },
GatherTestParams { {36549, 1024}, {10}, 0, "FP16" },
GatherTestParams { {365490}, {10}, 0, "FP16" },
GatherTestParams { {10, 1024}, {10}, 0, "FP16" },
GatherTestParams { {30522, 768}, {1, 128, 1}, 0, "FP16" },
GatherTestParams { {30522, 768}, {1, 128, 1}, 1, "FP16" },
GatherTestParams { {6, 12, 10, 24}, {15, 4, 20, 5}, 0, "FP16" },
GatherTestParams { {6, 12, 10, 24}, {15, 4, 20, 5}, 1, "FP16" },
GatherTestParams { {6, 12, 10, 24}, {15, 4, 20, 5}, 2, "FP16" },
GatherTestParams { {6, 12, 10, 24}, {15, 4, 20, 5}, 3, "FP16" },
GatherTestParams { {10}, {10}, 0, "I32" },
GatherTestParams { {365490}, {10}, 0, "I32" },
GatherTestParams { {36549, 768}, {10}, 0, "I32" },
GatherTestParams { {30522, 768}, {1, 128, 1}, 0, "I32" },
GatherTestParams { {30522, 768}, {1, 128, 1}, 1, "I32" },
GatherTestParams { {6, 12, 10, 24}, {15, 4, 20, 5}, 0, "I32" },
GatherTestParams { {6, 12, 10, 24}, {15, 4, 20, 5}, 3, "I32" },
// Customer use-cases
// From: Mask R-CNN
// input shape, indices shape, axis, precision
GatherTestParams { {1000, 3}, {1}, 1, "FP16" },
GatherTestParams { {1000, 3}, {1}, 1, "I32" }));

View File

@ -1,310 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "myriad_layers_tests.hpp"
#include "myriad_layers_reference_functions.hpp"
#include "vpu_tests_config.hpp"
#include "vpu_case_common.hpp"
#include <algorithm>
#include <random>
#include <vector>
#include <string>
using namespace InferenceEngine;
using InputShape = std::vector<int>;
using IndicesShape = std::vector<int>;
using Axis = int;
using Type = std::string; // "FP16", "I32"
using GatherTestParams = std::tuple<InputShape,
IndicesShape,
Axis,
Type>;
class myriadLayerGather_smoke :
public myriadLayerTestBaseWithParam<GatherTestParams> {
protected:
void testGather() {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
_config[InferenceEngine::MYRIAD_DETECT_NETWORK_BATCH] = CONFIG_VALUE(NO);
//
// Parse and check test parameters
//
const GatherTestParams& gatherTestParams = GetParam();
const std::vector<int>& inputShape = std::get<0>(gatherTestParams);
const std::vector<int>& indicesShape = std::get<1>(gatherTestParams);
const int axisParam = std::get<2>(gatherTestParams);
const std::string & type = std::get<3>(gatherTestParams);
IE_ASSERT(type == "I32" ||
type == "FP16");
const int indicesNDims = indicesShape.size();
const int inputNDims = inputShape.size();
const int outputNDims = indicesNDims + inputNDims - 1;
IE_ASSERT(outputNDims > 0);
// NB: axis param must be in [-len(in.shape), len(in.shape)-1]
const int axis = axisParam + (axisParam < 0 ? inputNDims : 0);
IE_ASSERT(0 <= axis && axis < inputNDims);
// Deduce shape of `output` tensor
//
// E.g.:
// {N, C, H, W} could be shape of `input`
// {I, J} could be shape of `indices`
// {I, J, C, H, W} could be shape of `output`
std::vector<int> outputShape;
for (int i = 0; i < axis; i++) {
outputShape.push_back(inputShape[i]);
}
for (int i = 0; i < indicesNDims; i++) {
outputShape.push_back(indicesShape[i]);
}
for (int i = axis + 1; i < inputNDims; i++) {
outputShape.push_back(inputShape[i]);
}
IE_ASSERT(outputShape.size() == outputNDims);
//
// Skip test if data is too large for device
//
const int inputTotal = getTotal(inputShape);
const int outputTotal = getTotal(outputShape);
const int indicesTotal = getTotal(indicesShape);
const Precision precision = type == "I32" ?
Precision::I32 :
Precision::FP16;
const int bpp = precision == Precision::I32 ?
sizeof(int32_t) :
sizeof(ie_fp16);
const int threshold = 50 * (1 << 20); // empirical
const bool tooLarge = inputTotal * bpp > threshold ||
outputTotal * bpp > threshold;
DISABLE_IF(tooLarge && !CheckMA2085());
//
// Initialize 1-layer network
//
std::string model = createModel(inputShape,
outputShape,
indicesShape,
axis,
type);
ASSERT_NO_THROW(readNetwork(model));
const auto& network = _cnnNetwork;
_inputsInfo = network.getInputsInfo();
_inputsInfo["input"]->setPrecision(precision);
_inputsInfo["indices"]->setPrecision(Precision::I32);
_outputsInfo = network.getOutputsInfo();
_outputsInfo["gather"]->setPrecision(precision);
//
// Create infer request and get its blobs pointers
//
StatusCode st = OK;
ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network, _config, &_resp));
ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
ASSERT_NE(_exeNetwork, nullptr) << _resp.msg;
ASSERT_NO_THROW(st = _exeNetwork->CreateInferRequest(_inferRequest, &_resp));
ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
Blob::Ptr inputBlob;
ASSERT_NO_THROW(st = _inferRequest->GetBlob("input", inputBlob, &_resp));
ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
Blob::Ptr indicesBlob;
ASSERT_NO_THROW(st = _inferRequest->GetBlob("indices", indicesBlob, &_resp));
ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
Blob::Ptr outputBlob;
ASSERT_NO_THROW(st = _inferRequest->GetBlob("gather", outputBlob, &_resp));
ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
Blob::Ptr referenceBlob;
if (type == "I32") {
referenceBlob = make_shared_blob<int32_t>(outputBlob->getTensorDesc());
} else {
referenceBlob = make_shared_blob<ie_fp16>(outputBlob->getTensorDesc());
}
referenceBlob->allocate();
//
// Initialize `input` and `indices` blobs
//
void* inputBlobData = inputBlob->buffer();
ASSERT_NE(inputBlobData, nullptr);
void* indicesBlobData = indicesBlob->buffer();
ASSERT_NE(indicesBlobData, nullptr);
const int indicesLimit = inputShape[axis] - 1;
std::mt19937 gen;
fillUniformly(inputBlobData, inputTotal, precision, 0, 255, gen);
fillUniformly(indicesBlobData, indicesTotal, Precision::I32, 0, indicesLimit, gen);
//
// Infer
//
const auto inputLayout = inputBlob->getTensorDesc().getLayout();
const auto outputLayout = outputBlob->getTensorDesc().getLayout();
const auto indicesLayout = indicesBlob->getTensorDesc().getLayout();
const auto layoutPreference = vpu::LayoutPreference::ChannelMajor;
inputBlob->getTensorDesc().setLayout(vpu::deviceLayout(inputLayout, layoutPreference));
indicesBlob->getTensorDesc().setLayout(vpu::deviceLayout(indicesLayout, layoutPreference));
outputBlob->getTensorDesc().setLayout(vpu::deviceLayout(outputLayout, layoutPreference));
referenceBlob->getTensorDesc().setLayout(vpu::deviceLayout(outputLayout, layoutPreference));
ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
//
// Check result
//
ref_gather(indicesBlob, inputBlob, referenceBlob, axis);
CompareCommonExact(outputBlob, referenceBlob);
}
private:
// Count total number of elements in ND tensor
static
int getTotal(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
}
// Fill data[] array with random numbers
// distributed uniformly in the interval [a,b]
static
void fillUniformly(void* data,
const int num,
const Precision& precision,
const double a,
const double b,
std::mt19937& gen) {
if (Precision::FP16 == precision) {
std::uniform_real_distribution<float> uniform(a, b);
for (int i = 0; i < num; i++) {
const float v = uniform(gen);
reinterpret_cast<ie_fp16*>(data)[i] = PrecisionUtils::f32tof16(v);
}
} else if (Precision::I32 == precision) {
const int ia = static_cast<int>(std::round(a));
const int ib = static_cast<int>(std::round(b));
std::uniform_int_distribution<int> uniform(ia, ib);
for (int i = 0; i < num; i++) {
const int v = uniform(gen);
reinterpret_cast<int32_t*>(data)[i] = v;
}
} else {
IE_ASSERT(precision == Precision::I32 ||
precision == Precision::FP16);
}
}
// Note that:
// - IR version is v7 (should be v10): as readNetwork() method
// cannot parse / denies IR v10 if there's no weights tensor
static
std::string createModel(const std::vector<int>& inputShape,
const std::vector<int>& outputShape,
const std::vector<int>& indicesShape,
const int axis,
const std::string & type) {
std::string model = R"V0G0N(
<?xml version="1.0" ?>
<net name="testGather" version="7">
<layers>
<layer id="0" name="input" type="Input">
<output>
<port id="0" precision="__TYPE__">
__INPUT_DIMS__
</port>
</output>
</layer>
<layer id="1" name="indices" type="Input">
<output>
<port id="0" precision="I32">
__INDICES_DIMS__
</port>
</output>
</layer>
<layer id="2" name="gather" type="Gather">
<data axis="__AXIS__"/>
<input>
<port id="0" precision="__TYPE__">
__INPUT_DIMS__
</port>
<port id="1" precision="I32">
__INDICES_DIMS__
</port>
</input>
<output>
<port id="4" precision="__TYPE__">
__OUTPUT_DIMS__
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
</edges>
</net>
)V0G0N";
const std::string inputDimsStr = shapeToDimsString(inputShape);
const std::string outputDimsStr = shapeToDimsString(outputShape);
const std::string indicesDimsStr = shapeToDimsString(indicesShape);
const std::string axisStr = std::to_string(axis);
REPLACE_WITH_STR(model, "__INPUT_DIMS__", inputDimsStr);
REPLACE_WITH_STR(model, "__OUTPUT_DIMS__", outputDimsStr);
REPLACE_WITH_STR(model, "__INDICES_DIMS__", indicesDimsStr);
REPLACE_WITH_STR(model, "__AXIS__", axisStr);
REPLACE_WITH_STR(model, "__TYPE__", type);
return model;
}
static
std::string shapeToDimsString(const std::vector<int>& shape)
{
std::string str;
for (int i = 0; i < shape.size(); i++) {
str += (i? " ": "");
str += "<dim>" + std::to_string(shape[i]) + "</dim>";
}
return str;
}
};
TEST_P(myriadLayerGather_smoke, Gather) {
testGather();
}