[GPU] Add new operation GatherND-8 (#8586)
Signed-off-by: Kelvin Choi <kelvin.choi@intel.com>
This commit is contained in:
@@ -211,6 +211,6 @@ REGISTER_FACTORY(v7, Gather);
|
||||
|
||||
// ------------------------------ Supported v8 ops ------------------------------ //
|
||||
REGISTER_FACTORY(v8, Gather);
|
||||
|
||||
REGISTER_FACTORY(v8, GatherND);
|
||||
// --------------------------- Supported internal ops --------------------------- //
|
||||
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
|
||||
|
||||
@@ -26,6 +26,7 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v5::G
|
||||
inputPrimitives[1],
|
||||
indices_rank,
|
||||
batch_dims,
|
||||
true,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
@@ -34,4 +35,27 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v5::G
|
||||
|
||||
REGISTER_FACTORY_IMPL(v5, GatherND);
|
||||
|
||||
static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v8::GatherND>& op) {
|
||||
p.ValidateInputs(op, { 2 });
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
std::string layerName = layer_type_name_ID(op);
|
||||
|
||||
int32_t indices_rank = static_cast<int32_t>(op->get_input_shape(1).size());
|
||||
|
||||
auto batch_dims = op->get_batch_dims();
|
||||
|
||||
auto primitive = cldnn::gather_nd(layerName,
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
indices_rank,
|
||||
batch_dims,
|
||||
false,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
}
|
||||
|
||||
REGISTER_FACTORY_IMPL(v8, GatherND);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
|
||||
#include "single_layer_tests/gather_nd.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
@@ -33,15 +32,6 @@ const auto gatherNDArgsSubset1 = ::testing::Combine(
|
||||
::testing::ValuesIn(std::vector<int>({ 0, 1 })) // Batch dims
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND_set1, GatherNDLayerTest,
|
||||
::testing::Combine(
|
||||
gatherNDArgsSubset1,
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(idxPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values<Config>({})),
|
||||
GatherNDLayerTest::getTestCaseName);
|
||||
|
||||
// set2
|
||||
const auto gatherNDArgsSubset2 = ::testing::Combine(
|
||||
::testing::ValuesIn(std::vector<std::vector<size_t>>(
|
||||
@@ -51,15 +41,6 @@ const auto gatherNDArgsSubset2 = ::testing::Combine(
|
||||
::testing::ValuesIn(std::vector<int>({ 1, 2 })) // Batch dims
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND_set2, GatherNDLayerTest,
|
||||
::testing::Combine(
|
||||
gatherNDArgsSubset2,
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(idxPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values<Config>({})),
|
||||
GatherNDLayerTest::getTestCaseName);
|
||||
|
||||
// set3
|
||||
const auto gatherNDArgsSubset3 = ::testing::Combine(
|
||||
::testing::ValuesIn(std::vector<std::vector<size_t>>(
|
||||
@@ -69,7 +50,27 @@ const auto gatherNDArgsSubset3 = ::testing::Combine(
|
||||
::testing::ValuesIn(std::vector<int>({ 3, 4 })) // Batch dims
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND_set3, GatherNDLayerTest,
|
||||
|
||||
// -------------------------------- V5 --------------------------------
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND5_set1, GatherNDLayerTest,
|
||||
::testing::Combine(
|
||||
gatherNDArgsSubset1,
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(idxPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values<Config>({})),
|
||||
GatherNDLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND5_set2, GatherNDLayerTest,
|
||||
::testing::Combine(
|
||||
gatherNDArgsSubset2,
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(idxPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values<Config>({})),
|
||||
GatherNDLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND5_set3, GatherNDLayerTest,
|
||||
::testing::Combine(
|
||||
gatherNDArgsSubset3,
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
@@ -78,4 +79,32 @@ INSTANTIATE_TEST_SUITE_P(smoke_GatherND_set3, GatherNDLayerTest,
|
||||
::testing::Values<Config>({})),
|
||||
GatherNDLayerTest::getTestCaseName);
|
||||
|
||||
// -------------------------------- V8 --------------------------------
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND8_set1, GatherND8LayerTest,
|
||||
::testing::Combine(
|
||||
gatherNDArgsSubset1,
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(idxPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values<Config>({})),
|
||||
GatherND8LayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND8_set2, GatherND8LayerTest,
|
||||
::testing::Combine(
|
||||
gatherNDArgsSubset2,
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(idxPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values<Config>({})),
|
||||
GatherND8LayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GatherND8_set3, GatherND8LayerTest,
|
||||
::testing::Combine(
|
||||
gatherNDArgsSubset3,
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(idxPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values<Config>({})),
|
||||
GatherND8LayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include <vector>
|
||||
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
using Config = std::map<std::string, std::string>;
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "shared_test_classes/single_layer/gather_nd.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
@@ -55,6 +54,7 @@ void GatherNDLayerTest::SetUp() {
|
||||
function = std::make_shared<ngraph::Function>(results, params, "gatherND");
|
||||
}
|
||||
|
||||
|
||||
std::string GatherND8LayerTest::getTestCaseName(const testing::TestParamInfo<GatherNDParams>& obj) {
|
||||
return GatherNDLayerTest::getTestCaseName(obj);
|
||||
}
|
||||
@@ -70,13 +70,13 @@ void GatherND8LayerTest::SetUp() {
|
||||
auto ngDPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dPrecision);
|
||||
auto ngIPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iPrecision);
|
||||
|
||||
auto params = ngraph::builder::makeParams(ngDPrc, {dataShape});
|
||||
auto params = ngraph::builder::makeParams(ngDPrc, { dataShape });
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto dataNode = paramOuts[0];
|
||||
auto gather = std::dynamic_pointer_cast<ngraph::opset8::GatherND>(
|
||||
ngraph::builder::makeGatherND(dataNode, indicesShape, ngIPrc, batchDims));
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gather)};
|
||||
ngraph::builder::makeGatherND8(dataNode, indicesShape, ngIPrc, batchDims));
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(gather) };
|
||||
function = std::make_shared<ngraph::Function>(results, params, "gatherND");
|
||||
}
|
||||
|
||||
|
||||
@@ -531,6 +531,12 @@ std::shared_ptr<ngraph::Node> makeGatherND(
|
||||
const element::Type& indicesType,
|
||||
const std::size_t batchDims);
|
||||
|
||||
std::shared_ptr<ngraph::Node> makeGatherND8(
|
||||
const ngraph::Output<Node>& dataNode,
|
||||
const ngraph::Shape& indicesShape,
|
||||
const element::Type& indicesType,
|
||||
const std::size_t batchDims);
|
||||
|
||||
std::shared_ptr<ngraph::Node> makeTile(const ngraph::Output<Node>& in,
|
||||
const std::vector<int64_t>& repeats);
|
||||
|
||||
|
||||
@@ -41,5 +41,34 @@ std::shared_ptr<Node> makeGatherND(
|
||||
return gatherNdNode;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> makeGatherND8(
|
||||
const ngraph::Output<Node>& dataNode,
|
||||
const ngraph::Shape& indicesShape,
|
||||
const element::Type& indicesType,
|
||||
const std::size_t batchDims) {
|
||||
const auto indices = [&] {
|
||||
const auto& dataShape = dataNode.get_shape();
|
||||
const auto indicesCount = std::accumulate(begin(indicesShape), prev(end(indicesShape)),
|
||||
1ull, std::multiplies<std::size_t>{});
|
||||
const auto sliceRank = indicesShape.back();
|
||||
|
||||
const auto maxDim = *std::max_element(begin(dataShape), end(dataShape));
|
||||
|
||||
auto indicesValues = NGraphFunctions::Utils::generateVector<element::Type_t::i32>(indicesCount * sliceRank, maxDim, 0);
|
||||
auto indicesData = indicesValues.data();
|
||||
for (int i = 0; i < indicesCount; i++) {
|
||||
for (int dim = 0; dim < sliceRank; dim++) {
|
||||
indicesData[0] = indicesData[0] % dataShape[dim + batchDims];
|
||||
indicesData++;
|
||||
}
|
||||
}
|
||||
return opset8::Constant::create(indicesType, indicesShape, indicesValues);
|
||||
}();
|
||||
|
||||
auto gatherNdNode = std::make_shared<opset8::GatherND>(dataNode, indices, batchDims);
|
||||
gatherNdNode->set_friendly_name("GatherND");
|
||||
|
||||
return gatherNdNode;
|
||||
}
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
||||
|
||||
@@ -19,25 +19,37 @@ struct gather_nd : public primitive_base<gather_nd> {
|
||||
CLDNN_DECLARE_PRIMITIVE(gather_nd)
|
||||
|
||||
/// @brief Constructs gather_nd primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param data Input data primitive id.
|
||||
/// @param indices Input indexes primitive id.
|
||||
/// @param indices_rank Rank of indices.
|
||||
/// @param batch_dims batch_dims as an attribute of GatherND. Optional.
|
||||
///
|
||||
/// @param id This primitive id.
|
||||
/// @param data Input data primitive id.
|
||||
/// @param indices Input indexes primitive id.
|
||||
/// @param indices_rank Rank of indices.
|
||||
/// @param batch_dims batch_dims as an attribute of GatherND. Optional.
|
||||
/// @param batch_merged_output batched output shape is merged as a dimention for v5.
|
||||
/// In case of output{3, 2, 4, 5} at batch_dims = 2, real output shape should be {6, 4, 5}.
|
||||
/// This should be false for v8.
|
||||
/// For batch_dims < 2, This doesn't have any meaning.
|
||||
gather_nd(const primitive_id& id,
|
||||
const primitive_id& data,
|
||||
const primitive_id& indices,
|
||||
const uint8_t indices_rank,
|
||||
const uint8_t batch_dims = 0,
|
||||
const bool batch_merged_output = true,
|
||||
const primitive_id& ext_prim_id = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {data, indices}, ext_prim_id, output_padding), indices_rank(indices_rank), batch_dims(batch_dims) {}
|
||||
: primitive_base(id, {data, indices}, ext_prim_id, output_padding),
|
||||
indices_rank(indices_rank),
|
||||
batch_dims(batch_dims),
|
||||
batch_merged_output(batch_merged_output) {}
|
||||
|
||||
/// @brief GatherND indices_rank
|
||||
uint8_t indices_rank;
|
||||
|
||||
/// @brief GatherND batch_dims
|
||||
uint8_t batch_dims;
|
||||
|
||||
/// @brief GatherND batch_merged_output
|
||||
bool batch_merged_output;
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
|
||||
@@ -117,6 +117,7 @@ JitConstants GatherNDKernelRef::GetJitConstants(const gather_nd_params& params)
|
||||
|
||||
jit.AddConstant(MakeJitConstant("INDICES_RANK", params.indices_rank));
|
||||
jit.AddConstant(MakeJitConstant("BATCH_DIMS", params.batch_dims));
|
||||
jit.AddConstant(MakeJitConstant("BATCH_MERGED_OUTPUT", params.batch_merged_output));
|
||||
jit.AddConstant(MakeJitConstant("WI_SLICE_SIZE", GetSliceSize(params)));
|
||||
jit.AddConstant(MakeJitConstant("INDICES_LAST_DIM", GetIndicesLastDim(params)));
|
||||
|
||||
|
||||
@@ -11,11 +11,13 @@ namespace kernel_selector {
|
||||
// gather_nd_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct gather_nd_params : public base_params {
|
||||
gather_nd_params() : base_params(KernelType::GATHER_ND), indices_rank(0), batch_dims(0) {}
|
||||
gather_nd_params() : base_params(KernelType::GATHER_ND), indices_rank(0), batch_dims(0), batch_merged_output(true) {}
|
||||
|
||||
uint8_t indices_rank;
|
||||
|
||||
uint8_t batch_dims;
|
||||
|
||||
bool batch_merged_output;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -125,28 +125,47 @@ KERNEL(gather_nd_ref)(const __global INPUT0_TYPE* data,
|
||||
const uint out_f = idx_f;
|
||||
const uint out_b = idx_b;
|
||||
#else
|
||||
uint pitch_acc = 1;
|
||||
uint output_batch_size = 0;
|
||||
for (int i = BATCH_DIMS - 1; i >= 0; i--) {
|
||||
output_batch_size += (idx_arr[i] * pitch_acc);
|
||||
pitch_acc *= idx_dim[i];
|
||||
}
|
||||
#if BATCH_MERGED_OUTPUT
|
||||
uint pitch_acc = 1;
|
||||
uint output_batch_size = 0;
|
||||
for (int i = BATCH_DIMS - 1; i >= 0; i--) {
|
||||
output_batch_size += (idx_arr[i] * pitch_acc);
|
||||
pitch_acc *= idx_dim[i];
|
||||
}
|
||||
|
||||
#if OUTPUT_DIMS == 4
|
||||
const uint out_x = idx_arr[BATCH_DIMS+2];
|
||||
const uint out_y = idx_arr[BATCH_DIMS+1];
|
||||
#elif OUTPUT_DIMS == 5
|
||||
const uint out_x = idx_arr[BATCH_DIMS+3];
|
||||
const uint out_y = idx_arr[BATCH_DIMS+2];
|
||||
const uint out_z = idx_arr[BATCH_DIMS+1];
|
||||
#if OUTPUT_DIMS == 4
|
||||
const uint out_x = idx_arr[BATCH_DIMS+2];
|
||||
const uint out_y = idx_arr[BATCH_DIMS+1];
|
||||
#elif OUTPUT_DIMS == 5
|
||||
const uint out_x = idx_arr[BATCH_DIMS+3];
|
||||
const uint out_y = idx_arr[BATCH_DIMS+2];
|
||||
const uint out_z = idx_arr[BATCH_DIMS+1];
|
||||
#else
|
||||
const uint out_x = idx_arr[BATCH_DIMS+4];
|
||||
const uint out_y = idx_arr[BATCH_DIMS+3];
|
||||
const uint out_z = idx_arr[BATCH_DIMS+2];
|
||||
const uint out_w = idx_arr[BATCH_DIMS+1];
|
||||
#endif
|
||||
const uint out_f = idx_arr[BATCH_DIMS+0];
|
||||
const uint out_b = output_batch_size;
|
||||
#else
|
||||
const uint out_x = idx_arr[BATCH_DIMS+4];
|
||||
const uint out_y = idx_arr[BATCH_DIMS+3];
|
||||
const uint out_z = idx_arr[BATCH_DIMS+2];
|
||||
const uint out_w = idx_arr[BATCH_DIMS+1];
|
||||
#if OUTPUT_DIMS == 4
|
||||
const uint out_x = idx_arr[3];
|
||||
const uint out_y = idx_arr[2];
|
||||
#elif OUTPUT_DIMS == 5
|
||||
const uint out_x = idx_arr[4];
|
||||
const uint out_y = idx_arr[3];
|
||||
const uint out_z = idx_arr[2];
|
||||
#else
|
||||
const uint out_x = idx_arr[5];
|
||||
const uint out_y = idx_arr[4];
|
||||
const uint out_z = idx_arr[3];
|
||||
const uint out_w = idx_arr[2];
|
||||
#endif
|
||||
const uint out_f = idx_arr[1];
|
||||
const uint out_b = idx_arr[0];
|
||||
|
||||
#endif
|
||||
const uint out_f = idx_arr[BATCH_DIMS+0];
|
||||
const uint out_b = output_batch_size;
|
||||
#endif
|
||||
|
||||
const uint output_idx = GET_OUTPUT_INDEX(OUT_ORDER);
|
||||
|
||||
@@ -41,34 +41,41 @@ layout gather_nd_inst::calc_output_layout(gather_nd_node const& node) {
|
||||
output_sizes.push_back(input_layout[x]);
|
||||
}
|
||||
|
||||
// calculate batch_size by batch_dims
|
||||
int batch_size = 1;
|
||||
for (uint8_t x = 0; x < batch_dims; x++) {
|
||||
batch_size *= output_sizes[x];
|
||||
}
|
||||
|
||||
// create final output shape by batch_dims
|
||||
std::vector<tensor::value_type> final_output_sizes;
|
||||
|
||||
if (batch_dims > 0) {
|
||||
final_output_sizes.push_back(batch_size);
|
||||
if (op->batch_merged_output) {
|
||||
// calculate batch_size by batch_dims
|
||||
int batch_size = 1;
|
||||
for (uint8_t x = 0; x < batch_dims; x++) {
|
||||
batch_size *= output_sizes[x];
|
||||
}
|
||||
|
||||
if (batch_dims > 0) {
|
||||
final_output_sizes.push_back(batch_size);
|
||||
}
|
||||
|
||||
for (size_t x = static_cast<size_t>(batch_dims); x < output_sizes.size(); x++) {
|
||||
final_output_sizes.push_back(output_sizes[x]);
|
||||
}
|
||||
} else {
|
||||
for (size_t x = 0; x < output_sizes.size(); x++) {
|
||||
final_output_sizes.push_back(output_sizes[x]);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t x = static_cast<size_t>(batch_dims); x < output_sizes.size(); x++) {
|
||||
final_output_sizes.push_back(output_sizes[x]);
|
||||
}
|
||||
|
||||
auto output_format = cldnn::format::bfyx;
|
||||
if (final_output_sizes.size() >= 6) {
|
||||
output_format = cldnn::format::bfwzyx;
|
||||
auto output_format = cldnn::format::any;
|
||||
if (final_output_sizes.size() <= 4) {
|
||||
output_format = cldnn::format::bfyx;
|
||||
} else if (final_output_sizes.size() == 5) {
|
||||
output_format = cldnn::format::bfzyx;
|
||||
} else {
|
||||
output_format = cldnn::format::bfwzyx;
|
||||
}
|
||||
|
||||
auto output_sizes_tensor = tensor(tensor(final_output_sizes).sizes(output_format));
|
||||
auto padding = op->output_padding;
|
||||
|
||||
|
||||
if (node.has_fused_primitives()) {
|
||||
input_layout_origin.data_type = node.get_fused_output_layout().data_type;
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ struct gather_nd_impl : typed_primitive_impl_ocl<gather_nd> {
|
||||
|
||||
gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
|
||||
gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
|
||||
gather_nd_params.batch_merged_output = arg.get_primitive()->batch_merged_output;
|
||||
|
||||
gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
|
||||
|
||||
|
||||
@@ -11,18 +11,20 @@
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
inline void DoTest(engine& engine,
|
||||
inline void DoTestBase(engine& engine,
|
||||
const cldnn::memory::ptr input0,
|
||||
const cldnn::memory::ptr input1,
|
||||
const std::vector<float>& expected_results,
|
||||
const int indices_rank,
|
||||
const int batch_dims) {
|
||||
const int batch_dims,
|
||||
const cldnn::format fmt,
|
||||
const tensor ts,
|
||||
const bool batch_merged_output) {
|
||||
topology topology;
|
||||
auto gather_nd_inst = gather_nd("gather_nd", "InputData", "InputIndices", indices_rank, batch_dims, batch_merged_output);
|
||||
topology.add(input_layout("InputData", input0->get_layout()));
|
||||
topology.add(input_layout("InputIndices", input1->get_layout()));
|
||||
topology.add(
|
||||
gather_nd("gather_nd", "InputData", "InputIndices", indices_rank, batch_dims)
|
||||
);
|
||||
topology.add(gather_nd_inst);
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
@@ -30,13 +32,54 @@ inline void DoTest(engine& engine,
|
||||
network.set_input_data("InputIndices", input1);
|
||||
auto outputs = network.execute();
|
||||
auto output = outputs.at("gather_nd").get_memory();
|
||||
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
|
||||
|
||||
// Compare output shape
|
||||
auto output_format = output->get_layout().format;
|
||||
auto output_shape = output->get_layout().size;
|
||||
|
||||
EXPECT_EQ(fmt, output_format);
|
||||
|
||||
int32_t dim_size = 6;
|
||||
if (fmt == format::bfyx) {
|
||||
dim_size = 4;
|
||||
} else if (fmt == format::bfzyx) {
|
||||
dim_size = 5;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < dim_size; i++)
|
||||
{
|
||||
EXPECT_EQ(ts.sizes()[i], output_shape.sizes()[i]);
|
||||
}
|
||||
|
||||
// Compare output value
|
||||
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
inline void DoTestV5(engine& engine,
|
||||
const cldnn::memory::ptr input0,
|
||||
const cldnn::memory::ptr input1,
|
||||
const std::vector<float>& expected_results,
|
||||
const int indices_rank,
|
||||
const int batch_dims,
|
||||
const cldnn::format fmt,
|
||||
const tensor size) {
|
||||
DoTestBase(engine, input0, input1, expected_results, indices_rank, batch_dims, fmt, size, true);
|
||||
}
|
||||
|
||||
inline void DoTestV8(engine& engine,
|
||||
const cldnn::memory::ptr input0,
|
||||
const cldnn::memory::ptr input1,
|
||||
const std::vector<float>& expected_results,
|
||||
const int indices_rank,
|
||||
const int batch_dims,
|
||||
const cldnn::format fmt,
|
||||
const tensor size) {
|
||||
DoTestBase(engine, input0, input1, expected_results, indices_rank, batch_dims, fmt, size, false);
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d23322_i231312_ir6_batch2) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
@@ -44,7 +87,7 @@ TEST(gather_nd_gpu_fp16, d23322_i231312_ir6_batch2) {
|
||||
const int batch_dims = 2;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 2, 2, 3 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 2, 1, 3, 1 } }); // indices
|
||||
// expected output dim: {6,1,3,1,2}
|
||||
// expected output dim: v5{6,1,3,1,2}, v8{2,3,1,3,1,2}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16), FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16),
|
||||
@@ -76,7 +119,8 @@ TEST(gather_nd_gpu_fp16, d23322_i231312_ir6_batch2) {
|
||||
FLOAT16(31), FLOAT16(32), FLOAT16(35), FLOAT16(36), FLOAT16(33), FLOAT16(34),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfzyx, {6, 1, 2, 1, 3});
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfwzyx, { 2, 3, 2, 1, 3, 1 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d231322_i231321_ir6_batch5) {
|
||||
@@ -86,7 +130,7 @@ TEST(gather_nd_gpu_fp16, d231322_i231321_ir6_batch5) {
|
||||
const int batch_dims = 5;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 2, 2, 3, 1 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 1, 2, 3, 1 } }); // indices
|
||||
// expected output dim: {36}
|
||||
// expected output dim: v5{36}, v8{2, 3, 2, 3, 1}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16), FLOAT16(17), FLOAT16(18), FLOAT16(19), FLOAT16(10), FLOAT16(21), FLOAT16(18),
|
||||
@@ -118,7 +162,8 @@ TEST(gather_nd_gpu_fp16, d231322_i231321_ir6_batch5) {
|
||||
FLOAT16(32), FLOAT16(33), FLOAT16(35), FLOAT16(38), FLOAT16(30), FLOAT16(29),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, {36, 1, 1, 1});
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfzyx, {2, 3, 2, 3, 1});
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d23322_i23321_ir5_batch4) {
|
||||
@@ -128,7 +173,7 @@ TEST(gather_nd_gpu_fp16, d23322_i23321_ir5_batch4) {
|
||||
const int batch_dims = 4;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 2, 2, 3 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 1, 2, 3 } }); // indices
|
||||
// expected output dim: {36}
|
||||
// expected output dim: v5{36}, v8{2,3,2,3}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16), FLOAT16(17), FLOAT16(18), FLOAT16(19), FLOAT16(10), FLOAT16(21), FLOAT16(18),
|
||||
@@ -160,9 +205,11 @@ TEST(gather_nd_gpu_fp16, d23322_i23321_ir5_batch4) {
|
||||
FLOAT16(32), FLOAT16(33), FLOAT16(35), FLOAT16(38), FLOAT16(30), FLOAT16(29),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 36, 1, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 3, 2, 3 });
|
||||
}
|
||||
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d23223_i2321_ir4_batch3) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
@@ -170,7 +217,7 @@ TEST(gather_nd_gpu_fp16, d23223_i2321_ir4_batch3) {
|
||||
const int batch_dims = 3;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 3, 2, 2 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // indices
|
||||
// expected output dim: {2*3*2,3}
|
||||
// expected output dim: v5{12,3} v8{2,3,3,2}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16), FLOAT16(17), FLOAT16(18),FLOAT16(15), FLOAT16(16), FLOAT16(17), FLOAT16(18),
|
||||
@@ -202,7 +249,8 @@ TEST(gather_nd_gpu_fp16, d23223_i2321_ir4_batch3) {
|
||||
FLOAT16(29), FLOAT16(30), FLOAT16(31), FLOAT16(35), FLOAT16(36), FLOAT16(33),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 12, 3, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 3, 3, 2 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d2342_i2312_ir4_batch2) {
|
||||
@@ -212,7 +260,7 @@ TEST(gather_nd_gpu_fp16, d2342_i2312_ir4_batch2) {
|
||||
const int batch_dims = 2;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 2, 4 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 2, 1 } }); // indices
|
||||
// expected output dim: {6,1}
|
||||
// expected output dim: v5{6,1}, v8(2,3,1)
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16), FLOAT16(17), FLOAT16(18),
|
||||
@@ -244,7 +292,8 @@ TEST(gather_nd_gpu_fp16, d2342_i2312_ir4_batch2) {
|
||||
FLOAT16(33),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 6, 1, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 3, 1, 1 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d234_i2311_ir4_batch2) {
|
||||
@@ -254,7 +303,7 @@ TEST(gather_nd_gpu_fp16, d234_i2311_ir4_batch2) {
|
||||
const int batch_dims = 2;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 4 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 1 } }); // indices
|
||||
// expected output dim: {6,1,1}
|
||||
// expected output dim: v5{6,1,1}, v8{2,3,1,1}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(4),
|
||||
@@ -287,7 +336,8 @@ TEST(gather_nd_gpu_fp16, d234_i2311_ir4_batch2) {
|
||||
FLOAT16(23),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 6, 1, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 3, 1, 1 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d234_i21_ir2_batch1) {
|
||||
@@ -297,7 +347,7 @@ TEST(gather_nd_gpu_fp16, d234_i21_ir2_batch1) {
|
||||
const int batch_dims = 1;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 4 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices
|
||||
// expected output dim: {2,4}
|
||||
// expected output dim: v5{2,4,1,1}, v8{2,4,1,1}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(4),
|
||||
@@ -320,7 +370,8 @@ TEST(gather_nd_gpu_fp16, d234_i21_ir2_batch1) {
|
||||
FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 4, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 4, 1, 1 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch1) {
|
||||
@@ -330,7 +381,7 @@ TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch1) {
|
||||
const int batch_dims = 1;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices
|
||||
// expected output dim: 2
|
||||
// expected output dim: v5{2,1,1}, v8{2,1,1}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(1), FLOAT16(2),
|
||||
@@ -347,7 +398,8 @@ TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch1) {
|
||||
FLOAT16(3),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 1, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 1, 1, 1, 1 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d3223_i321113_ir6_batch0) {
|
||||
@@ -357,7 +409,7 @@ TEST(gather_nd_gpu_fp16, d3223_i321113_ir6_batch0) {
|
||||
const int batch_dims = 0;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 3, 2 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 3, 2, 3, 1, 1, 1 } }); // indices
|
||||
// expected output dim: 321113
|
||||
// expected output dim: 323111
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16),
|
||||
@@ -392,7 +444,8 @@ TEST(gather_nd_gpu_fp16, d3223_i321113_ir6_batch0) {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfwzyx, { 3, 2, 3, 1, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfwzyx, { 3, 2, 3, 1, 1, 1 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d3221_i32312_ir3_batch0) {
|
||||
@@ -402,7 +455,7 @@ TEST(gather_nd_gpu_fp16, d3221_i32312_ir3_batch0) {
|
||||
const int batch_dims = 0;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 2, 1, 3 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // indices
|
||||
// expected output dim: 32312
|
||||
// expected output dim: 32213
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16),
|
||||
@@ -437,7 +490,8 @@ TEST(gather_nd_gpu_fp16, d3221_i32312_ir3_batch0) {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfzyx, { 3, 2, 2, 1, 3 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfzyx, { 3, 2, 2, 1, 3 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d3231_i32312_ir3_batch0) {
|
||||
@@ -447,7 +501,7 @@ TEST(gather_nd_gpu_fp16, d3231_i32312_ir3_batch0) {
|
||||
const int batch_dims = 0;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 2, 1, 3 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 3 } }); // indices
|
||||
// expected output dim: {3,2,1,2}
|
||||
// expected output dim: {3,2,2,1}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16),
|
||||
@@ -482,7 +536,8 @@ TEST(gather_nd_gpu_fp16, d3231_i32312_ir3_batch0) {
|
||||
FLOAT16(11), FLOAT16(12),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 2, 2, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 2, 2, 1 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d3112_i3221_ir4_batch0) {
|
||||
@@ -522,47 +577,8 @@ TEST(gather_nd_gpu_fp16, d3112_i3221_ir4_batch0) {
|
||||
FLOAT16(1), FLOAT16(2), FLOAT16(7), FLOAT16(8),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d311211_i322111_ir4_batch0) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const int indices_rank = 4;
|
||||
const int batch_dims = 0;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 3, 1, 1, 1, 2, 1 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 3, 2, 1, 1, 1, 2 } }); // indices
|
||||
// expected output dim: {3,2,2,1,1,2,1,1}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(1), FLOAT16(2),
|
||||
FLOAT16(7), FLOAT16(8),
|
||||
FLOAT16(13), FLOAT16(14),
|
||||
});
|
||||
|
||||
set_values(input1, {
|
||||
FLOAT16(2), FLOAT16(1),
|
||||
FLOAT16(0), FLOAT16(1),
|
||||
|
||||
FLOAT16(2), FLOAT16(1),
|
||||
FLOAT16(0), FLOAT16(1),
|
||||
|
||||
FLOAT16(2), FLOAT16(1),
|
||||
FLOAT16(0), FLOAT16(1),
|
||||
});
|
||||
|
||||
std::vector<float> expected_results = {
|
||||
FLOAT16(13), FLOAT16(14), FLOAT16(7), FLOAT16(8),
|
||||
FLOAT16(1), FLOAT16(2), FLOAT16(7), FLOAT16(8),
|
||||
|
||||
FLOAT16(13), FLOAT16(14), FLOAT16(7), FLOAT16(8),
|
||||
FLOAT16(1), FLOAT16(2), FLOAT16(7), FLOAT16(8),
|
||||
|
||||
FLOAT16(13), FLOAT16(14), FLOAT16(7), FLOAT16(8),
|
||||
FLOAT16(1), FLOAT16(2), FLOAT16(7), FLOAT16(8),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfwzyx, { 3, 2, 2, 1, 1, 2 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfwzyx, { 3, 2, 2, 1, 1, 2 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d3332_i3223_ir4_batch0) {
|
||||
@@ -572,6 +588,7 @@ TEST(gather_nd_gpu_fp16, d3332_i3223_ir4_batch0) {
|
||||
const int batch_dims = 0;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 3, 2 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 3, 2 } }); // indices
|
||||
// expected output dim: {3,2,3,2}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(4), FLOAT16(5), FLOAT16(6),
|
||||
@@ -609,7 +626,8 @@ TEST(gather_nd_gpu_fp16, d3332_i3223_ir4_batch0) {
|
||||
FLOAT16(34), FLOAT16(35), FLOAT16(36), FLOAT16(16), FLOAT16(17), FLOAT16(18),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 2, 3, 2 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 2, 3, 2 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d3323_i322_ir3_batch0) {
|
||||
@@ -619,6 +637,7 @@ TEST(gather_nd_gpu_fp16, d3323_i322_ir3_batch0) {
|
||||
const int batch_dims = 0;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 3, 2 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // indices
|
||||
// expected output dim: {3,2,3,2}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(4), FLOAT16(5), FLOAT16(6),
|
||||
@@ -656,7 +675,8 @@ TEST(gather_nd_gpu_fp16, d3323_i322_ir3_batch0) {
|
||||
FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16), FLOAT16(17), FLOAT16(18),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 2, 3, 2 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 2, 3, 2 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch0) {
|
||||
@@ -666,6 +686,7 @@ TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch0) {
|
||||
const int batch_dims = 0;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices
|
||||
// expected output dim: {2,2,1,1}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(1), FLOAT16(2),
|
||||
@@ -681,7 +702,8 @@ TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch0) {
|
||||
FLOAT16(1), FLOAT16(2),
|
||||
};
|
||||
|
||||
DoTest(engine, input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 2, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 2, 2, 1, 1 });
|
||||
}
|
||||
|
||||
TEST(gather_nd_gpu_fp16, d22_i32_ir2_batch0) {
|
||||
@@ -691,6 +713,7 @@ TEST(gather_nd_gpu_fp16, d22_i32_ir2_batch0) {
|
||||
const int batch_dims = 0;
|
||||
auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data
|
||||
auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices
|
||||
// expected output dim: {3,1,1}
|
||||
|
||||
set_values(input0, {
|
||||
FLOAT16(1), FLOAT16(2),
|
||||
@@ -709,5 +732,6 @@ TEST(gather_nd_gpu_fp16, d22_i32_ir2_batch0) {
|
||||
FLOAT16(4),
|
||||
};
|
||||
|
||||
DoTest(engine,input0, input1, expected_results, indices_rank, batch_dims);
|
||||
DoTestV5(engine,input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 1, 1, 1 });
|
||||
DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 1, 1, 1 });
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user