MatMul reference implementation refactoring (#4671)

* MatMul backend tests

* Single layer tests

* Style apply

* Updaye IE_CPU manifest

* New dot implementation

* Use new dot in MatMul reference

* Fix output batch offset

* Style apply

* Relax tests tolerance

* Remove legacy dot reference file

* Remove usage of broadcast builder

* Add one more broadcast test

* Remove NGRAPH_SUPPRESS_DEPRECATED

* Style apply

* Few more MatMul single layer tests

* Update IE tests manifest

* Move variable declarations to inner loops

* Add const to variables

* Apply review suggestions

* Reuse vector for transposed and broadcasted data
This commit is contained in:
Katarzyna Mitrus 2021-03-22 13:27:31 +01:00 committed by GitHub
parent e64d84b47b
commit b8f36ec354
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 268 additions and 318 deletions

View File

@ -18,6 +18,19 @@ const std::vector<ShapeRelatedParams> shapeRelatedParams = {
{ { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } },
{ { {4, 5, 6}, false }, { {6, 3}, false } },
{ { {9, 9, 9}, false }, { {9, 9}, false } },
{ { {1, 2, 3}, false }, { {1, 1, 3, 2}, false } },
{ { {1, 3, 2, 4}, false }, { {2, 1, 4, 2}, false } },
{ { {2, 1, 2, 4}, false }, { {1, 3, 4, 2}, false } },
{ { {3, 2, 4}, false }, { {2, 1, 4, 2}, false } },
{ { {2, 1, 4, 2}, false }, { {3, 2, 4}, false } },
{ { {2, 1, 2, 3}, true }, { {3, 2, 4}, false } },
{ { {2, 1, 3, 2}, false }, { {3, 4, 2}, true } },
{ { {2, 1, 2, 3}, true }, { {3, 4, 2}, true } },
{ { {3}, false }, { {2, 2, 3, 1}, false } },
{ { {2, 2, 1, 3}, false }, { {3}, false } },
{ { {1, 5}, false }, { {5, 1}, false } },
{ { {5, 1}, true }, { {5, 1}, false } },
{ { {1, 5}, false }, { {1, 5}, true } },
{ { {1, 5}, false }, { {5}, false } },
{ { {5}, false }, { {5, 1}, false } },
{ { {5}, false }, { {5}, false } },

View File

@ -1,170 +0,0 @@
//*****************************************************************************
// Copyright 2017-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <cmath>
#include <utility>
#include <cfenv>
#include <functional>
#include "ngraph/coordinate_transform.hpp"
#include "ngraph/runtime/reference/helpers.hpp"
#include "ngraph/shape_util.hpp"
namespace ngraph
{
namespace runtime
{
namespace reference
{
template <typename INPUT0,
typename INPUT1,
typename OUTPUT,
typename ACCUMULATION = typename widen<OUTPUT>::type>
void dot(const INPUT0* arg0,
const INPUT1* arg1,
OUTPUT* out,
const Shape& arg0_shape,
const Shape& arg1_shape,
const Shape& out_shape,
size_t reduction_axes_count,
const float* input0_scale = nullptr,
const INPUT0* input0_zero_point = nullptr,
const float* input1_scale = nullptr,
const INPUT1* input1_zero_point = nullptr,
const float* output_scale = nullptr,
const OUTPUT* output_zero_point = nullptr)
{
bool is_quantized = false;
if (input0_scale && input0_zero_point && input1_scale && input1_zero_point &&
output_scale && output_zero_point)
{
is_quantized = true;
}
auto old_mode = std::fegetround();
std::fesetround(FE_TONEAREST);
// Get the sizes of the dot axes. It's easiest to pull them from arg1 because
// they're right up front.
Shape dot_axis_sizes(reduction_axes_count);
std::copy(arg1_shape.begin(),
arg1_shape.begin() + reduction_axes_count,
dot_axis_sizes.begin());
CoordinateTransform arg0_transform(arg0_shape);
CoordinateTransform arg1_transform(arg1_shape);
CoordinateTransform output_transform(out_shape);
// Create coordinate transforms for arg0 and arg1 that throw away the dotted axes.
size_t arg0_projected_rank = arg0_shape.size() - reduction_axes_count;
size_t arg1_projected_rank = arg1_shape.size() - reduction_axes_count;
Shape arg0_projected_shape(arg0_projected_rank);
std::copy(arg0_shape.begin(),
arg0_shape.begin() + arg0_projected_rank,
arg0_projected_shape.begin());
Shape arg1_projected_shape(arg1_projected_rank);
std::copy(arg1_shape.begin() + reduction_axes_count,
arg1_shape.end(),
arg1_projected_shape.begin());
CoordinateTransform arg0_projected_transform(arg0_projected_shape);
CoordinateTransform arg1_projected_transform(arg1_projected_shape);
// Create a coordinate transform that allows us to iterate over all possible values
// for the dotted axes.
CoordinateTransform dot_axes_transform(dot_axis_sizes);
for (const Coordinate& arg0_projected_coord : arg0_projected_transform)
{
for (const Coordinate& arg1_projected_coord : arg1_projected_transform)
{
// The output coordinate is just the concatenation of the projected
// coordinates.
Coordinate out_coord(arg0_projected_coord.size() +
arg1_projected_coord.size());
auto out_coord_it = std::copy(arg0_projected_coord.begin(),
arg0_projected_coord.end(),
out_coord.begin());
std::copy(
arg1_projected_coord.begin(), arg1_projected_coord.end(), out_coord_it);
// Zero out to start the sum.
ACCUMULATION sum = 0;
size_t out_index = output_transform.index(out_coord);
// Walk along the dotted axes.
Coordinate arg0_coord(arg0_shape.size());
Coordinate arg1_coord(arg1_shape.size());
auto arg0_it = std::copy(arg0_projected_coord.begin(),
arg0_projected_coord.end(),
arg0_coord.begin());
for (const Coordinate& dot_axis_positions : dot_axes_transform)
{
// In order to find the points to multiply together, we need to inject
// our current positions along the dotted axes back into the projected
// arg0 and arg1 coordinates.
std::copy(
dot_axis_positions.begin(), dot_axis_positions.end(), arg0_it);
auto arg1_it = std::copy(dot_axis_positions.begin(),
dot_axis_positions.end(),
arg1_coord.begin());
std::copy(
arg1_projected_coord.begin(), arg1_projected_coord.end(), arg1_it);
// Multiply and add to the sum.
if (is_quantized)
{
sum = sum + ((static_cast<ACCUMULATION>(
arg0[arg0_transform.index(arg0_coord)]) -
static_cast<ACCUMULATION>(*input0_zero_point)) *
(static_cast<ACCUMULATION>(
arg1[arg1_transform.index(arg1_coord)]) -
static_cast<ACCUMULATION>(*input1_zero_point)));
}
else
{
sum = sum + (static_cast<ACCUMULATION>(
arg0[arg0_transform.index(arg0_coord)]) *
static_cast<ACCUMULATION>(
arg1[arg1_transform.index(arg1_coord)]));
}
}
if (is_quantized)
{
float scale = *input0_scale * *input1_scale / *output_scale;
// Write the sum back.
out[out_index] =
static_cast<OUTPUT>(std::round(static_cast<float>(sum) * scale)) +
*output_zero_point;
}
else
{
out[out_index] = sum;
}
}
std::fesetround(old_mode);
}
}
}
}
}

View File

@ -225,7 +225,7 @@ namespace ngraph
clip_activation(r_t, activation_f);
// calculate h_t
vector<T> h_t(gate_shape_size);
std::vector<T> h_t(gate_shape_size);
if (linear_before_reset)
{
// ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh)
@ -287,8 +287,8 @@ namespace ngraph
}
clip_activation(h_t, activation_g);
// Ht = (1 - zt) (.) ht + zt (.) Ht-1
vector<T> mul1(gate_shape_size);
vector<T> mul2(gate_shape_size);
std::vector<T> mul1(gate_shape_size);
std::vector<T> mul2(gate_shape_size);
T one[] = {1};
reference::subtract(
one, z_t.data(), mul1.data(), {1}, gate_shape, op::AutoBroadcastSpec::NUMPY);

View File

@ -178,9 +178,9 @@ namespace ngraph
// ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Wbo + Rbo)
clip_activation(X_W_fico[3], activation_f);
vector<T> mul1(gate_shape_size);
vector<T> mul2(gate_shape_size);
vector<T> Ct(gate_shape_size);
std::vector<T> mul1(gate_shape_size);
std::vector<T> mul2(gate_shape_size);
std::vector<T> Ct(gate_shape_size);
// ft (.) Ct-1
reference::multiply(X_W_fico[0].data(),
C,

View File

@ -21,23 +21,63 @@
#include <utility>
#include <vector>
#include "ngraph/axis_vector.hpp"
#include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/runtime/opt_kernel/reshape.hpp"
#include "ngraph/runtime/reference/broadcast.hpp"
#include "ngraph/runtime/reference/dot.hpp"
#include "ngraph/shape_util.hpp"
NGRAPH_SUPPRESS_DEPRECATED_START
using namespace std;
namespace ngraph
{
namespace runtime
{
namespace reference
{
namespace details
{
template <typename T>
void dot(const T* arg0,
const T* arg1,
T* out,
const Shape& arg0_shape,
const Shape& arg1_shape,
const Shape& out_shape)
{
std::fill(out, out + shape_size(out_shape), T{0});
const size_t arg0_rank = arg0_shape.size();
const size_t arg1_rank = arg1_shape.size();
// 2D inputs shapes are interpreted as {I, K} x {K, J}
// If first input is 1D tensor of shape {K}, it is interpreted as {1, K}
// If second input is 1D tensor of shape {K}, it is interpreted as {K, 1}
const size_t I_dim = arg0_rank == 1 ? 1 : arg0_shape[arg0_rank - 2];
const size_t J_dim = arg1_rank == 1 ? 1 : arg1_shape[arg1_rank - 1];
const size_t K_dim =
arg1_rank == 1 ? arg1_shape[arg1_rank - 1] : arg1_shape[arg1_rank - 2];
for (size_t i = 0; i < I_dim; ++i)
{
for (size_t k = 0; k < K_dim; ++k)
{
const size_t a_idx = i * K_dim + k;
for (size_t j = 0; j < J_dim; ++j)
{
const size_t b_idx = k * J_dim + j;
const size_t out_idx = i * J_dim + j;
out[out_idx] += arg0[a_idx] * arg1[b_idx];
}
}
}
}
std::vector<size_t> get_transpose_order(const Shape& input_shape)
{
size_t rank = input_shape.size();
NGRAPH_CHECK(rank > 1, "Invalid input for transpose");
std::vector<size_t> axes_order(rank);
std::iota(axes_order.begin(), axes_order.end(), 0);
std::swap(axes_order[rank - 1], axes_order[rank - 2]);
return axes_order;
}
}
/// \brief Reference kernel for matmul computation.
///
/// \tparam T Type of input and output tensors.
@ -70,89 +110,59 @@ namespace ngraph
// and perform broadcast if applicable
// 4) Perform dot on the args or updated args and return result
size_t arg0_rank = arg0_shape.size();
size_t arg1_rank = arg1_shape.size();
size_t out_rank = out_shape.size();
// vector vars to hold pontential intermediate transpose,
// broadcast result
vector<T> arg0_transpose_vec;
vector<T> arg1_transpose_vec;
vector<T> arg0_broadcast_vec;
vector<T> arg1_broadcast_vec;
// pointers to updated inputs
const T* arg0_update = arg0;
const T* arg1_update = arg1;
const T* arg0_data = arg0;
const T* arg1_data = arg1;
// vectors to hold pontential intermediate transpose,
// broadcast result
std::vector<T> arg0_new_data;
std::vector<T> arg1_new_data;
// vars for updated inputs shapes
Shape wip_arg0_shape = arg0_shape;
Shape wip_arg1_shape = arg1_shape;
Shape arg0_shape_tmp = arg0_shape;
Shape arg1_shape_tmp = arg1_shape;
auto get_transpose_order = [](const Shape& input_shape) {
size_t rank = input_shape.size();
NGRAPH_CHECK(rank > 1, "Invalid input for transpose");
vector<size_t> axes_order(rank);
iota(axes_order.begin(), axes_order.end(), 0);
swap(axes_order[rank - 1], axes_order[rank - 2]);
return AxisVector{begin(axes_order), end(axes_order)};
};
auto get_broadcast_axes = [](const Shape& marker_shape, const Shape& target_shape) {
NGRAPH_CHECK(marker_shape.size() == target_shape.size(),
"Incompatible input shapes");
AxisSet broadcast_axes;
for (size_t i = 0; i < marker_shape.size(); i++)
{
if (marker_shape[i] == 1 && target_shape[i] != 1)
{
broadcast_axes.insert(i);
}
}
return broadcast_axes;
};
size_t arg0_rank = arg0_shape.size();
size_t arg1_rank = arg1_shape.size();
const size_t out_rank = out_shape.size();
// Perform transpose if requested
if (transpose_arg0 && arg0_rank > 1)
{
arg0_transpose_vec.reserve(shape_size(arg0_shape));
auto axis_vector = get_transpose_order(arg0_shape);
swap(wip_arg0_shape[arg0_rank - 1], wip_arg0_shape[arg0_rank - 2]);
opt_kernel::reshape(reinterpret_cast<const char*>(arg0),
reinterpret_cast<char*>(arg0_transpose_vec.data()),
std::vector<T> tmp(shape_size(arg0_shape));
auto axis_vector = details::get_transpose_order(arg0_shape);
std::swap(arg0_shape_tmp[arg0_rank - 1], arg0_shape_tmp[arg0_rank - 2]);
opt_kernel::reshape(reinterpret_cast<const char*>(arg0_data),
reinterpret_cast<char*>(tmp.data()),
arg0_shape,
axis_vector,
wip_arg0_shape,
arg0_shape_tmp,
sizeof(T));
arg0_update = arg0_transpose_vec.data();
arg0_new_data.swap(tmp);
arg0_data = arg0_new_data.data();
}
if (transpose_arg1 && arg1_rank > 1)
{
arg1_transpose_vec.reserve(shape_size(arg1_shape));
auto axis_vector = get_transpose_order(arg1_shape);
swap(wip_arg1_shape[arg1_rank - 1], wip_arg1_shape[arg1_rank - 2]);
opt_kernel::reshape(reinterpret_cast<const char*>(arg1),
reinterpret_cast<char*>(arg1_transpose_vec.data()),
std::vector<T> tmp(shape_size(arg1_shape));
auto axis_vector = details::get_transpose_order(arg1_shape);
std::swap(arg1_shape_tmp[arg1_rank - 1], arg1_shape_tmp[arg1_rank - 2]);
opt_kernel::reshape(reinterpret_cast<const char*>(arg1_data),
reinterpret_cast<char*>(tmp.data()),
arg1_shape,
axis_vector,
wip_arg1_shape,
arg1_shape_tmp,
sizeof(T));
arg1_update = arg1_transpose_vec.data();
arg1_new_data.swap(tmp);
arg1_data = arg1_new_data.data();
}
// Inputs are 2D and below, perform dot directly
if (arg0_rank <= 2 && arg1_rank <= 2)
{
dot(arg0_update,
arg1_update,
out,
wip_arg0_shape,
wip_arg1_shape,
out_shape,
1);
details::dot(
arg0_data, arg1_data, out, arg0_shape_tmp, arg1_shape_tmp, out_shape);
return;
}
@ -163,80 +173,73 @@ namespace ngraph
if (arg0_rank > 2 && arg1_rank > 2)
{
const auto& broadcast_shapes = builder::get_numpy_broadcast_shapes(
{Shape{begin(wip_arg0_shape), next(end(wip_arg0_shape), -2)},
Shape{begin(wip_arg1_shape), next(end(wip_arg1_shape), -2)}});
Shape arg0_br_target_shape = broadcast_shapes.first;
Shape arg1_br_target_shape = broadcast_shapes.first;
Shape arg0_br_marker_shape = broadcast_shapes.second.at(0);
Shape arg1_br_marker_shape = broadcast_shapes.second.at(1);
// Align input batches to the output shape
Shape arg0_br_target_shape(out_shape.begin(), out_shape.end() - 2);
Shape arg1_br_target_shape(out_shape.begin(), out_shape.end() - 2);
arg0_br_target_shape.insert(
end(arg0_br_target_shape),
next(begin(wip_arg0_shape), wip_arg0_shape.size() - 2),
end(wip_arg0_shape));
end(arg0_br_target_shape), end(arg0_shape_tmp) - 2, end(arg0_shape_tmp));
arg1_br_target_shape.insert(
end(arg1_br_target_shape),
next(begin(wip_arg1_shape), wip_arg1_shape.size() - 2),
end(wip_arg1_shape));
end(arg1_br_target_shape), end(arg1_shape_tmp) - 2, end(arg1_shape_tmp));
arg0_br_marker_shape.insert(
end(arg0_br_marker_shape),
next(begin(wip_arg0_shape), wip_arg0_shape.size() - 2),
end(wip_arg0_shape));
arg1_br_marker_shape.insert(
end(arg1_br_marker_shape),
next(begin(wip_arg1_shape), wip_arg1_shape.size() - 2),
end(wip_arg1_shape));
if (arg0_br_target_shape != wip_arg0_shape)
std::vector<size_t> broadcast_axes(out_shape.size() - 2);
std::iota(broadcast_axes.begin(), broadcast_axes.end(), 0);
if (!broadcast_axes.empty())
{
auto broadcast_axes =
get_broadcast_axes(arg0_br_marker_shape, arg0_br_target_shape);
if (!broadcast_axes.empty())
// Usual rules of the broadcasting are applied for batch dimensions.
// If ranks of input arguments are different,
// the smaller tensor is unsqueezed from the left side of the shape
// by necessary number of axes to make both shapes of the same rank.
// Broadcast all batches (last two dimensions represent matrix),
// expand dim with value 1 to bigger dim if dimensions are not equal.
if (arg0_br_target_shape != arg0_shape_tmp)
{
arg0_broadcast_vec.reserve(shape_size(arg0_br_target_shape));
broadcast(reinterpret_cast<const char*>(arg0_update),
reinterpret_cast<char*>(arg0_broadcast_vec.data()),
wip_arg0_shape,
std::vector<T> tmp(shape_size(arg0_br_target_shape));
broadcast(reinterpret_cast<const char*>(arg0_data),
reinterpret_cast<char*>(tmp.data()),
arg0_shape_tmp,
arg0_br_target_shape,
broadcast_axes,
sizeof(T));
arg0_update = arg0_broadcast_vec.data();
wip_arg0_shape = arg0_br_target_shape;
arg0_rank = wip_arg0_shape.size();
arg0_shape_tmp = arg0_br_target_shape;
arg0_rank = arg0_shape_tmp.size();
arg0_new_data.swap(tmp);
arg0_data = arg0_new_data.data();
}
}
if (arg1_br_target_shape != wip_arg1_shape)
{
auto broadcast_axes =
get_broadcast_axes(arg1_br_marker_shape, arg1_br_target_shape);
if (!broadcast_axes.empty())
if (arg1_br_target_shape != arg1_shape_tmp)
{
arg1_broadcast_vec.reserve(shape_size(arg1_br_target_shape));
broadcast(reinterpret_cast<const char*>(arg1_update),
reinterpret_cast<char*>(arg1_broadcast_vec.data()),
wip_arg1_shape,
std::vector<T> tmp(shape_size(arg1_br_target_shape));
broadcast(reinterpret_cast<const char*>(arg1_data),
reinterpret_cast<char*>(tmp.data()),
arg1_shape_tmp,
arg1_br_target_shape,
broadcast_axes,
sizeof(T));
arg1_update = arg1_broadcast_vec.data();
wip_arg1_shape = arg1_br_target_shape;
arg1_rank = wip_arg1_shape.size();
arg1_shape_tmp = arg1_br_target_shape;
arg1_rank = arg1_shape_tmp.size();
arg1_new_data.swap(tmp);
arg1_data = arg1_new_data.data();
}
}
}
// Perform batched dot
size_t output_batch_size = 1;
const Shape dot_arg0_shape = (arg0_rank > 2) ? Shape{arg0_shape_tmp[arg0_rank - 2],
arg0_shape_tmp[arg0_rank - 1]}
: arg0_shape_tmp;
const Shape dot_arg1_shape = (arg1_rank > 2) ? Shape{arg1_shape_tmp[arg1_rank - 2],
arg1_shape_tmp[arg1_rank - 1]}
: arg1_shape_tmp;
const Shape dot_output_shape =
(out_rank > 2 && arg0_rank > 1 && arg1_rank > 1)
? Shape{out_shape[out_rank - 2], out_shape[out_rank - 1]}
: Shape{out_shape[out_rank - 1]};
// Calculate number of batches
if (out_rank < 3)
size_t output_batch_size = 1;
if (out_rank <= 2)
{
// Output is {batch_size, dot_result}, i.e.,
// arg 0 shape {2}, arg1 shape {3, 2, 1}, output shape {3, 1}
@ -244,38 +247,24 @@ namespace ngraph
}
else
{
for (size_t i = 0; i < (out_rank - 2); i++)
for (size_t i = 0; i < (out_rank - dot_output_shape.size()); i++)
{
output_batch_size *= out_shape[i];
}
}
Shape dot_arg0_shape = (arg0_rank > 2) ? Shape{wip_arg0_shape[arg0_rank - 2],
wip_arg0_shape[arg0_rank - 1]}
: wip_arg0_shape;
Shape dot_arg1_shape = (arg1_rank > 2) ? Shape{wip_arg1_shape[arg1_rank - 2],
wip_arg1_shape[arg1_rank - 1]}
: wip_arg1_shape;
Shape dot_output_shape =
(out_rank > 2) ? Shape{out_shape[out_rank - 2], out_shape[out_rank - 1]}
: Shape{out_shape[out_rank - 1]};
const size_t arg0_offset = (arg0_rank > 2) ? shape_size(dot_arg0_shape) : 0;
const size_t arg1_offset = (arg1_rank > 2) ? shape_size(dot_arg1_shape) : 0;
const size_t output_offset = shape_size(dot_output_shape);
for (size_t i = 0; i < output_batch_size; i++)
{
dot(arg0_update + i * arg0_offset,
arg1_update + i * arg1_offset,
out + i * output_offset,
dot_arg0_shape,
dot_arg1_shape,
dot_output_shape,
1);
details::dot(arg0_data + i * arg0_offset,
arg1_data + i * arg1_offset,
out + i * output_offset,
dot_arg0_shape,
dot_arg1_shape,
dot_output_shape);
}
}
}
}
}
NGRAPH_SUPPRESS_DEPRECATED_END

View File

@ -1008,3 +1008,121 @@ NGRAPH_TEST(${BACKEND_NAME}, matmul_3_x_1_1_3_false_true_const)
test_case.add_expected_output<float>(shape_out, expected_result);
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_2_2_1_3_x_3_false_false_param)
{
Shape shape_a{2, 2, 1, 3};
Shape shape_b{3};
Shape shape_out{2, 2, 1};
bool transpose_a = false;
bool transpose_b = false;
std::vector<float> inputs_a(shape_size(shape_a));
std::iota(inputs_a.begin(), inputs_a.end(), 0);
std::vector<float> inputs_b(shape_size(shape_b));
std::iota(inputs_b.begin(), inputs_b.end(), 0);
std::vector<float> expected_result{5, 14, 23, 32};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto B = make_shared<op::Parameter>(element::f32, shape_b);
auto matmul = make_shared<op::MatMul>(A, B, transpose_a, transpose_b);
auto f = make_shared<Function>(matmul, ParameterVector{A, B});
auto test_case = test::TestCase<TestEngine>(f);
test_case.add_input<float>(inputs_a);
test_case.add_input<float>(inputs_b);
test_case.add_expected_output<float>(shape_out, expected_result);
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_2_2_1_3_x_3_false_false_const)
{
Shape shape_a{2, 2, 1, 3};
Shape shape_b{3};
Shape shape_out{2, 2, 1};
bool transpose_a = false;
bool transpose_b = false;
std::vector<float> inputs_a(shape_size(shape_a));
std::iota(inputs_a.begin(), inputs_a.end(), 0);
std::vector<float> inputs_b(shape_size(shape_b));
std::iota(inputs_b.begin(), inputs_b.end(), 0);
std::vector<float> expected_result{5, 14, 23, 32};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto B = make_shared<op::Constant>(element::f32, shape_b, inputs_b);
auto matmul = make_shared<op::MatMul>(A, B, transpose_a, transpose_b);
auto f = make_shared<Function>(matmul, ParameterVector{A});
auto test_case = test::TestCase<TestEngine>(f);
test_case.add_input<float>(inputs_a);
test_case.add_expected_output<float>(shape_out, expected_result);
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_3_x_2_2_3_1_false_false_param)
{
Shape shape_a{3};
Shape shape_b{2, 2, 3, 1};
Shape shape_out{2, 2, 1};
bool transpose_a = false;
bool transpose_b = false;
std::vector<float> inputs_a(shape_size(shape_a));
std::iota(inputs_a.begin(), inputs_a.end(), 0);
std::vector<float> inputs_b(shape_size(shape_b));
std::iota(inputs_b.begin(), inputs_b.end(), 0);
std::vector<float> expected_result{5, 14, 23, 32};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto B = make_shared<op::Parameter>(element::f32, shape_b);
auto matmul = make_shared<op::MatMul>(A, B, transpose_a, transpose_b);
auto f = make_shared<Function>(matmul, ParameterVector{A, B});
auto test_case = test::TestCase<TestEngine>(f);
test_case.add_input<float>(inputs_a);
test_case.add_input<float>(inputs_b);
test_case.add_expected_output<float>(shape_out, expected_result);
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_3_x_2_2_3_1_false_false_const)
{
Shape shape_a{3};
Shape shape_b{2, 2, 3, 1};
Shape shape_out{2, 2, 1};
bool transpose_a = false;
bool transpose_b = false;
std::vector<float> inputs_a(shape_size(shape_a));
std::iota(inputs_a.begin(), inputs_a.end(), 0);
std::vector<float> inputs_b(shape_size(shape_b));
std::iota(inputs_b.begin(), inputs_b.end(), 0);
std::vector<float> expected_result{5, 14, 23, 32};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto B = make_shared<op::Constant>(element::f32, shape_b, inputs_b);
auto matmul = make_shared<op::MatMul>(A, B, transpose_a, transpose_b);
auto f = make_shared<Function>(matmul, ParameterVector{A});
auto test_case = test::TestCase<TestEngine>(f);
test_case.add_input<float>(inputs_a);
test_case.add_expected_output<float>(shape_out, expected_result);
test_case.run();
}

View File

@ -1839,7 +1839,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations_con
0.f,
0.f,
});
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3);
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 5);
}
NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations)
@ -1887,7 +1887,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations)
0.f,
0.f,
});
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3);
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 5);
}
NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_mixed_seq_len_const)
@ -1983,7 +1983,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_mixed_seq_len)
-0.18203181f,
0.9996245f,
});
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3);
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 4);
}
NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_mixed_seq_len_const)