From 8e64eb21cb49a756d516a2acb3af861d9b5904ae Mon Sep 17 00:00:00 2001 From: Patryk Elszkowski Date: Tue, 1 Dec 2020 10:48:58 +0100 Subject: [PATCH] faster implementation of gather_nd operator for reference implementations (#2897) * faster implementation of gather_nd operator for reference implementations * remove old impl and time measurements * exclude test for gather_nd for IE_CPU (output mismatch) * apply review comments and rename variables for clarify * rename variables according to PR comments * try to apply all PR suggestions * fix indices calcualtions Co-authored-by: Patryk Elszkowski --- .../ngraph/runtime/reference/gather.hpp | 70 ++--- .../ngraph/runtime/reference/gather_nd.hpp | 284 +++++++++--------- ngraph/test/backend/gather_nd.in.cpp | 26 ++ ngraph/test/runtime/ie/unit_test.manifest | 3 + 4 files changed, 204 insertions(+), 179 deletions(-) diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/gather.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/gather.hpp index 3dbfecf836b..577f317b62b 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/gather.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/gather.hpp @@ -51,13 +51,10 @@ namespace ngraph const Shape& out_shape, size_t axis) { - using namespace std; // prepare shape of params_prime (remove first "axis" dimensions) - Shape params_prime_shape(params_shape); - params_prime_shape.erase(params_prime_shape.begin(), - params_prime_shape.begin() + axis); + const Shape params_prime_shape(params_shape.begin() + axis, params_shape.end()); // prepare shape of indices_prime - size_t indices_ndim = static_cast(indices_shape.size()); + const size_t indices_ndim = indices_shape.size(); Shape indices_prime_shape; // prepare shape of out_prime (same as params_prime except for first dim) Shape out_prime_shape(params_prime_shape); @@ -73,8 +70,8 @@ namespace ngraph indices_prime_shape.emplace_back(1); // Create a CoordinateTransform for "out" that visits the outer "axis" dimensions - size_t out_ndim = static_cast(out_shape.size()); - Coordinate out_outer_start_corner(out_ndim, 0); + const size_t out_ndim = out_shape.size(); + const Coordinate out_outer_start_corner(out_ndim, 0); Coordinate out_outer_end_corner(out_shape); for (size_t i = axis; i < out_ndim; i++) { @@ -90,44 +87,43 @@ namespace ngraph out_outer_axis_order); // Create a CoordinateTransform for "params" that visits the outer "axis" dimensions - size_t params_ndim = static_cast(params_shape.size()); - Coordinate params_outer_start_corner(params_ndim, 0); + const size_t params_ndim = params_shape.size(); + const Coordinate params_outer_start_corner(params_ndim, 0); Coordinate params_outer_end_corner(params_shape); for (size_t i = axis; i < params_ndim; i++) { params_outer_end_corner[i] = 1; } - Strides params_outer_strides(params_ndim, 1); + const Strides params_outer_strides(params_ndim, 1); AxisVector params_outer_axis_order(params_ndim); std::iota(params_outer_axis_order.begin(), params_outer_axis_order.end(), 0); - CoordinateTransform params_outer_transform(params_shape, - params_outer_start_corner, - params_outer_end_corner, - params_outer_strides, - params_outer_axis_order); + const CoordinateTransform params_outer_transform(params_shape, + params_outer_start_corner, + params_outer_end_corner, + params_outer_strides, + params_outer_axis_order); // Create a CoordinateTransform for "indices" that visits only the first element // along inner most axis - Coordinate indices_outer_start_corner(indices_ndim, 0); + const Coordinate indices_outer_start_corner(indices_ndim, 0); Coordinate indices_outer_end_corner(indices_shape); if (indices_ndim > 0) { indices_outer_end_corner[indices_ndim - 1] = 1; } - Strides indices_outer_strides(indices_ndim, 1); + const Strides indices_outer_strides(indices_ndim, 1); AxisVector indices_outer_axis_order(indices_ndim); std::iota(indices_outer_axis_order.begin(), indices_outer_axis_order.end(), 0); - CoordinateTransform indices_outer_transform(indices_shape, - indices_outer_start_corner, - indices_outer_end_corner, - indices_outer_strides, - indices_outer_axis_order); + const CoordinateTransform indices_outer_transform(indices_shape, + indices_outer_start_corner, + indices_outer_end_corner, + indices_outer_strides, + indices_outer_axis_order); // Create an inner CoordinateTransfrom for "out" - size_t out_inner_ndim = out_ndim - axis; - Shape out_inner_shape(out_shape); - out_inner_shape.erase(out_inner_shape.begin(), out_inner_shape.begin() + axis); - Coordinate out_inner_start_corner(out_inner_ndim, 0); + const size_t out_inner_ndim = out_ndim - axis; + const Shape out_inner_shape(out_shape.begin() + axis, out_shape.end()); + const Coordinate out_inner_start_corner(out_inner_ndim, 0); Coordinate out_inner_end_corner(out_inner_shape); if (indices_ndim > 0) { @@ -137,14 +133,14 @@ namespace ngraph { out_inner_end_corner[i] = 1; } - Strides out_inner_strides(out_inner_ndim, 1); + const Strides out_inner_strides(out_inner_ndim, 1); AxisVector out_inner_axis_order(out_inner_ndim); std::iota(out_inner_axis_order.begin(), out_inner_axis_order.end(), 0); - CoordinateTransform out_inner_transform(out_inner_shape, - out_inner_start_corner, - out_inner_end_corner, - out_inner_strides, - out_inner_axis_order); + const CoordinateTransform out_inner_transform(out_inner_shape, + out_inner_start_corner, + out_inner_end_corner, + out_inner_strides, + out_inner_axis_order); auto out_outer_coord_iter = out_outer_transform.begin(); for (const Coordinate& params_outer_coord : params_outer_transform) @@ -169,11 +165,11 @@ namespace ngraph params_prime_shape, indices_prime_shape, out_prime_shape); - out_inner_coord_iter++; + ++out_inner_coord_iter; } - out_outer_coord_iter++; + ++out_outer_coord_iter; } } - } - } -} + } // namespace reference + } // namespace runtime +} // namespace ngraph diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/gather_nd.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/gather_nd.hpp index 7857aa06e72..805c035c5a6 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/gather_nd.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/gather_nd.hpp @@ -16,6 +16,8 @@ #pragma once +#include +#include #include #include "ngraph/coordinate_transform.hpp" @@ -26,171 +28,169 @@ namespace ngraph { namespace reference { - // foreach leaf_vector_index in indices.shape[:-1] - // vector = indices[leaf_vector_index] - // out[leaf_vector_index:] = params[vector] - template - void gather_nd_batch(const T* params, - const U* indices, - T* out, - const Shape& params_shape, - const Shape& indices_shape, - const Shape& out_shape) + namespace { - using namespace std; - // Create a CoordinateTransform for "indices" that visits only the first element - // along inner most axis - size_t indices_ndim = static_cast(indices_shape.size()); - Coordinate indices_outer_start_corner(indices_ndim, 0); - Coordinate indices_outer_end_corner(indices_shape); - size_t slice_rank = indices_shape[indices_ndim - 1]; - indices_outer_end_corner[indices_ndim - 1] = 1; - Strides indices_strides(indices_ndim, 1); - AxisVector indices_axis_order(indices_ndim); - std::iota(indices_axis_order.begin(), indices_axis_order.end(), 0); - CoordinateTransform indices_outer_transform(indices_shape, - indices_outer_start_corner, - indices_outer_end_corner, - indices_strides, - indices_axis_order); + template + using Required = typename std::enable_if::type; - // Create a matching CoordinateTransform for "out" that visits the same outer - // coordinates - size_t out_ndim = static_cast(out_shape.size()); - Coordinate out_start_corner(out_ndim, 0); - Coordinate out_end_corner(out_shape); - for (size_t i = indices_ndim - 1; i < out_ndim; i++) + template + struct IsRandomAccessIt { - out_end_corner[i] = 1; - } - Strides out_strides(out_ndim, 1); - AxisVector out_axis_order(out_ndim); - std::iota(out_axis_order.begin(), out_axis_order.end(), 0); - CoordinateTransform out_transform( - out_shape, out_start_corner, out_end_corner, out_strides, out_axis_order); - size_t params_ndim = static_cast(params_shape.size()); - Strides params_strides(params_ndim, 1); - AxisVector params_axis_order(params_ndim); - std::iota(params_axis_order.begin(), params_axis_order.end(), 0); + static constexpr bool value = + std::is_same::value; + }; - // Gather slices from "params" and copy to "out" - auto out_coord_iter = out_transform.begin(); - for (const Coordinate& indices_coord : indices_outer_transform) + template ::value> = true> + class Span { - Coordinate params_start_corner(params_ndim, 0); - Coordinate params_end_corner(params_shape); - auto indices_index = indices_outer_transform.index(indices_coord); - for (size_t i = 0; i < slice_rank; i++) + public: + Span(Iterator begin, Iterator end) + : m_begin{begin} + , m_end{end} { - U index = indices[indices_index]; - // take care of negative indices - index = index >= 0 ? index : index + params_shape[i]; - params_start_corner[i] = index; - params_end_corner[i] = index + 1; - indices_index++; } - CoordinateTransform params_transform(params_shape, - params_start_corner, - params_end_corner, - params_strides, - params_axis_order); - if (out_coord_iter == out_transform.end()) - break; - auto out_index = out_transform.index(*out_coord_iter); - for (const Coordinate& params_coord : params_transform) - { - out[out_index] = params[params_transform.index(params_coord)]; - out_index++; - } - out_coord_iter++; - } - } + Iterator begin() const { return m_begin; } + Iterator end() const { return m_end; }; + typename Iterator::value_type operator[](size_t idx) const + { + return *next(m_begin, idx); + } + + typename Iterator::difference_type size() const + { + return std::distance(m_begin, m_end); + } + + private: + Iterator m_begin; + Iterator m_end; + }; + + template + Span span(Iterator begin, Iterator end) + { + return Span{begin, end}; + }; + + template + std::vector get_indices_offsets(const Iterator beg, + const Iterator end, + size_t last_slice_size) + { + auto next_e = beg; + auto i = std::distance(beg, end); + std::vector offsets(i + 1, last_slice_size); + while (i-- > 0) + { + offsets[i] = *next_e * offsets[i + 1]; + ++next_e; + } + + return offsets; + } + } // namespace + + /// + /// Implementation find maximum length of *slice* of input *params* which might be + /// copied to *out* index by index. + /// +-------+--------------+-------+ + /// | batch | indices[:-1] | slice | + /// | shape | shape | shape | + /// +-------+--------------+-------+ + /// template - void gather_nd(const T* params, - const U* indices, - T* out, + void gather_nd(const T* const params, + const U* const indices, + T* const out, const Shape& params_shape, const Shape& indices_shape, const Shape& out_shape, - int batch_dims = 0) + const int batch_dims = 0) { - using namespace std; - if (batch_dims == 0) + using std::begin; + using std::end; + using std::next; + using std::prev; + const auto rbegin = [](const Shape& s) { // generic since C++14 + return s.rbegin(); + }; + + const Shape batch_shape(begin(params_shape), next(begin(params_shape), batch_dims)); + const auto batch_size = shape_size(batch_shape); + + if (batch_dims && batch_size != out_shape.front()) { - gather_nd_batch(params, indices, out, params_shape, indices_shape, out_shape); - return; + throw std::domain_error{ + "out_shape should have on first dim multiplication of batch number of first" + "dimensions of shape "}; } - size_t indices_ndim = static_cast(indices_shape.size()); - Coordinate indices_outer_start_corner(indices_ndim, 0); - Coordinate indices_outer_end_corner(indices_shape); - for (size_t i = batch_dims; i < indices_ndim; i++) + if (!std::equal(begin(params_shape), + next(begin(params_shape), batch_dims), + begin(indices_shape))) { - indices_outer_end_corner[i] = 1; + throw std::domain_error{ + "dimensions in params and indices have to be equal on batch dimensions"}; } - Strides indices_strides(indices_ndim, 1); - AxisVector indices_axis_order(indices_ndim); - std::iota(indices_axis_order.begin(), indices_axis_order.end(), 0); - CoordinateTransform indices_outer_transform(indices_shape, - indices_outer_start_corner, - indices_outer_end_corner, - indices_strides, - indices_axis_order); - size_t params_ndim = static_cast(params_shape.size()); - Coordinate params_outer_start_corner(params_ndim, 0); - Coordinate params_outer_end_corner(params_shape); - for (size_t i = batch_dims; i < params_ndim; i++) + const auto first_slice_index_in_params = batch_dims + indices_shape.back(); + + if (!(first_slice_index_in_params <= params_shape.size())) { - params_outer_end_corner[i] = 1; + throw std::domain_error{ + "params_shape should have enough rank to be index by indices"}; } - Strides params_strides(params_ndim, 1); - AxisVector params_axis_order(params_ndim); - std::iota(params_axis_order.begin(), params_axis_order.end(), 0); - CoordinateTransform params_outer_transform(params_shape, - params_outer_start_corner, - params_outer_end_corner, - params_strides, - params_axis_order); - size_t out_ndim = static_cast(out_shape.size()); - Coordinate out_start_corner(out_ndim, 0); - Coordinate out_end_corner(out_shape); - for (size_t i = 1; i < out_ndim; i++) + const auto slice_shape = + span(next(begin(params_shape), first_slice_index_in_params), end(params_shape)); + const auto slice_size = shape_size(slice_shape); + + const auto dims_begin = next(rbegin(params_shape), slice_shape.size()); + const auto dims_end = next(dims_begin, indices_shape.back() - 1); + + const auto indices_offsets = get_indices_offsets(dims_begin, dims_end, slice_size); + + const auto batch_offset = indices_offsets.front() * params_shape[batch_dims]; + + const auto k_1_indices = + span(next(begin(indices_shape), batch_dims), prev(end(indices_shape))); + + const auto k_1_params = + span(next(begin(params_shape), batch_dims), prev(end(params_shape))); + + const auto number_of_slices_to_copy_in_one_batch = shape_size(k_1_indices); + + const auto coordinates_size = indices_shape.back(); + + for (size_t batch = 0; batch != batch_size; ++batch) { - out_end_corner[i] = 1; - } - Strides out_strides(out_ndim, 1); - AxisVector out_axis_order(out_ndim); - std::iota(out_axis_order.begin(), out_axis_order.end(), 0); - CoordinateTransform out_transform( - out_shape, out_start_corner, out_end_corner, out_strides, out_axis_order); + const auto input_batch_offset = batch * batch_offset; + const auto output_batch_offset = + batch * number_of_slices_to_copy_in_one_batch * slice_size; + const auto coordinates_batch_offset = + batch * number_of_slices_to_copy_in_one_batch * coordinates_size; + for (size_t slice = 0; slice != number_of_slices_to_copy_in_one_batch; ++slice) + { + const auto slice_coordinates = + next(indices, coordinates_batch_offset + slice * coordinates_size); - Shape indices_shape_batch(indices_shape.begin() + batch_dims, indices_shape.end()); - Shape params_shape_batch(params_shape.begin() + batch_dims, params_shape.end()); - Shape output_shape_batch(out_shape.begin() + 1, out_shape.end()); - auto out_coord_iter = out_transform.begin(); - auto params_coord_iter = params_outer_transform.begin(); - for (const Coordinate& indices_coord : indices_outer_transform) - { - if (params_coord_iter == params_outer_transform.end() || - out_coord_iter == out_transform.end()) - break; - auto indices_index = indices_outer_transform.index(indices_coord); - auto params_index = params_outer_transform.index(*params_coord_iter); - auto output_index = out_transform.index(*out_coord_iter); - gather_nd_batch(params + params_index, - indices + indices_index, - out + output_index, - params_shape_batch, - indices_shape_batch, - output_shape_batch); - - out_coord_iter++; - params_coord_iter++; + size_t input_slice_offset = input_batch_offset; + for (size_t c = 0; c != coordinates_size; ++c) + { + const auto i_c = slice_coordinates[c]; + const auto index = i_c < 0 ? k_1_params[c] + i_c : i_c; + input_slice_offset += index * indices_offsets[c]; + } + const auto output_slice_offset = output_batch_offset + slice * slice_size; + std::copy(next(params, input_slice_offset), + next(params, input_slice_offset + slice_size), + next(out, output_slice_offset)); + } } } - } - } -} + + } // namespace reference + } // namespace runtime +} // namespace ngraph diff --git a/ngraph/test/backend/gather_nd.in.cpp b/ngraph/test/backend/gather_nd.in.cpp index 7bb292efd53..5fe3578a4d6 100644 --- a/ngraph/test/backend/gather_nd.in.cpp +++ b/ngraph/test/backend/gather_nd.in.cpp @@ -372,6 +372,32 @@ NGRAPH_TEST(${BACKEND_NAME}, gather_nd_batch_1d_from_3d) MIN_FLOAT_TOLERANCE_BITS)); } +NGRAPH_TEST(${BACKEND_NAME}, gather_nd_batch_1d_from_3d_negative) +{ + Shape params_shape{2, 2, 2}; + Shape indices_shape{2, 2, 2}; + Shape out_shape{2, 2, 2}; + auto P = make_shared(element::f32, params_shape); + auto I = make_shared(element::i32, indices_shape); + auto G = make_shared(P, I); + auto f = make_shared(G, ParameterVector{P, I}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto p = backend->create_tensor(element::f32, params_shape); + copy_data(p, vector{1.0f, 1.1f, 1.2f, 1.3f, 2.0f, 2.1f, 2.2f, 2.3f}); + auto i = backend->create_tensor(element::i32, indices_shape); + copy_data(i, vector{0, -1, -1, 0, 0, 0, 1, 1}); + auto result = backend->create_tensor(element::f32, out_shape); + + auto c = backend->compile(f); + c->call_with_validate({result}, {p, i}); + EXPECT_TRUE(test::all_close_f((vector{1.2f, 1.3f, 2.0f, 2.1f, 1.0f, 1.1f, 2.2f, 2.3f}), + read_vector(result), + MIN_FLOAT_TOLERANCE_BITS)); +} + NGRAPH_TEST(${BACKEND_NAME}, gather_nd_batch_2d_from_3d) { Shape params_shape{2, 2, 2}; diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index 719da7f9ab9..af9e7f51121 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -1144,6 +1144,9 @@ IE_CPU.nonmaxsuppression_two_classes # Bug in CPU plugin for ROIPooling when pooled size is 1x1 and method is bilinear IE_CPU.roi_pooling_1x1_bilinear +# output mismatch +IE_CPU.gather_nd_batch_1d_from_3d_negative + #------------------------------------------------------------------------------- # # Inference Engine GPU plugin excludes