MatMul reference implementation refactoring (#4671)
* MatMul backend tests * Single layer tests * Style apply * Updaye IE_CPU manifest * New dot implementation * Use new dot in MatMul reference * Fix output batch offset * Style apply * Relax tests tolerance * Remove legacy dot reference file * Remove usage of broadcast builder * Add one more broadcast test * Remove NGRAPH_SUPPRESS_DEPRECATED * Style apply * Few more MatMul single layer tests * Update IE tests manifest * Move variable declarations to inner loops * Add const to variables * Apply review suggestions * Reuse vector for transposed and broadcasted data
This commit is contained in:
parent
e64d84b47b
commit
b8f36ec354
@ -18,6 +18,19 @@ const std::vector<ShapeRelatedParams> shapeRelatedParams = {
|
||||
{ { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } },
|
||||
{ { {4, 5, 6}, false }, { {6, 3}, false } },
|
||||
{ { {9, 9, 9}, false }, { {9, 9}, false } },
|
||||
{ { {1, 2, 3}, false }, { {1, 1, 3, 2}, false } },
|
||||
{ { {1, 3, 2, 4}, false }, { {2, 1, 4, 2}, false } },
|
||||
{ { {2, 1, 2, 4}, false }, { {1, 3, 4, 2}, false } },
|
||||
{ { {3, 2, 4}, false }, { {2, 1, 4, 2}, false } },
|
||||
{ { {2, 1, 4, 2}, false }, { {3, 2, 4}, false } },
|
||||
{ { {2, 1, 2, 3}, true }, { {3, 2, 4}, false } },
|
||||
{ { {2, 1, 3, 2}, false }, { {3, 4, 2}, true } },
|
||||
{ { {2, 1, 2, 3}, true }, { {3, 4, 2}, true } },
|
||||
{ { {3}, false }, { {2, 2, 3, 1}, false } },
|
||||
{ { {2, 2, 1, 3}, false }, { {3}, false } },
|
||||
{ { {1, 5}, false }, { {5, 1}, false } },
|
||||
{ { {5, 1}, true }, { {5, 1}, false } },
|
||||
{ { {1, 5}, false }, { {1, 5}, true } },
|
||||
{ { {1, 5}, false }, { {5}, false } },
|
||||
{ { {5}, false }, { {5, 1}, false } },
|
||||
{ { {5}, false }, { {5}, false } },
|
||||
|
@ -1,170 +0,0 @@
|
||||
//*****************************************************************************
|
||||
// Copyright 2017-2021 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//*****************************************************************************
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
|
||||
#include <cfenv>
|
||||
#include <functional>
|
||||
#include "ngraph/coordinate_transform.hpp"
|
||||
#include "ngraph/runtime/reference/helpers.hpp"
|
||||
#include "ngraph/shape_util.hpp"
|
||||
|
||||
namespace ngraph
|
||||
{
|
||||
namespace runtime
|
||||
{
|
||||
namespace reference
|
||||
{
|
||||
template <typename INPUT0,
|
||||
typename INPUT1,
|
||||
typename OUTPUT,
|
||||
typename ACCUMULATION = typename widen<OUTPUT>::type>
|
||||
void dot(const INPUT0* arg0,
|
||||
const INPUT1* arg1,
|
||||
OUTPUT* out,
|
||||
const Shape& arg0_shape,
|
||||
const Shape& arg1_shape,
|
||||
const Shape& out_shape,
|
||||
size_t reduction_axes_count,
|
||||
const float* input0_scale = nullptr,
|
||||
const INPUT0* input0_zero_point = nullptr,
|
||||
const float* input1_scale = nullptr,
|
||||
const INPUT1* input1_zero_point = nullptr,
|
||||
const float* output_scale = nullptr,
|
||||
const OUTPUT* output_zero_point = nullptr)
|
||||
{
|
||||
bool is_quantized = false;
|
||||
if (input0_scale && input0_zero_point && input1_scale && input1_zero_point &&
|
||||
output_scale && output_zero_point)
|
||||
{
|
||||
is_quantized = true;
|
||||
}
|
||||
|
||||
auto old_mode = std::fegetround();
|
||||
std::fesetround(FE_TONEAREST);
|
||||
// Get the sizes of the dot axes. It's easiest to pull them from arg1 because
|
||||
// they're right up front.
|
||||
Shape dot_axis_sizes(reduction_axes_count);
|
||||
std::copy(arg1_shape.begin(),
|
||||
arg1_shape.begin() + reduction_axes_count,
|
||||
dot_axis_sizes.begin());
|
||||
|
||||
CoordinateTransform arg0_transform(arg0_shape);
|
||||
CoordinateTransform arg1_transform(arg1_shape);
|
||||
CoordinateTransform output_transform(out_shape);
|
||||
|
||||
// Create coordinate transforms for arg0 and arg1 that throw away the dotted axes.
|
||||
size_t arg0_projected_rank = arg0_shape.size() - reduction_axes_count;
|
||||
size_t arg1_projected_rank = arg1_shape.size() - reduction_axes_count;
|
||||
|
||||
Shape arg0_projected_shape(arg0_projected_rank);
|
||||
std::copy(arg0_shape.begin(),
|
||||
arg0_shape.begin() + arg0_projected_rank,
|
||||
arg0_projected_shape.begin());
|
||||
|
||||
Shape arg1_projected_shape(arg1_projected_rank);
|
||||
std::copy(arg1_shape.begin() + reduction_axes_count,
|
||||
arg1_shape.end(),
|
||||
arg1_projected_shape.begin());
|
||||
|
||||
CoordinateTransform arg0_projected_transform(arg0_projected_shape);
|
||||
CoordinateTransform arg1_projected_transform(arg1_projected_shape);
|
||||
|
||||
// Create a coordinate transform that allows us to iterate over all possible values
|
||||
// for the dotted axes.
|
||||
CoordinateTransform dot_axes_transform(dot_axis_sizes);
|
||||
|
||||
for (const Coordinate& arg0_projected_coord : arg0_projected_transform)
|
||||
{
|
||||
for (const Coordinate& arg1_projected_coord : arg1_projected_transform)
|
||||
{
|
||||
// The output coordinate is just the concatenation of the projected
|
||||
// coordinates.
|
||||
Coordinate out_coord(arg0_projected_coord.size() +
|
||||
arg1_projected_coord.size());
|
||||
|
||||
auto out_coord_it = std::copy(arg0_projected_coord.begin(),
|
||||
arg0_projected_coord.end(),
|
||||
out_coord.begin());
|
||||
std::copy(
|
||||
arg1_projected_coord.begin(), arg1_projected_coord.end(), out_coord_it);
|
||||
|
||||
// Zero out to start the sum.
|
||||
ACCUMULATION sum = 0;
|
||||
|
||||
size_t out_index = output_transform.index(out_coord);
|
||||
|
||||
// Walk along the dotted axes.
|
||||
Coordinate arg0_coord(arg0_shape.size());
|
||||
Coordinate arg1_coord(arg1_shape.size());
|
||||
auto arg0_it = std::copy(arg0_projected_coord.begin(),
|
||||
arg0_projected_coord.end(),
|
||||
arg0_coord.begin());
|
||||
for (const Coordinate& dot_axis_positions : dot_axes_transform)
|
||||
{
|
||||
// In order to find the points to multiply together, we need to inject
|
||||
// our current positions along the dotted axes back into the projected
|
||||
// arg0 and arg1 coordinates.
|
||||
std::copy(
|
||||
dot_axis_positions.begin(), dot_axis_positions.end(), arg0_it);
|
||||
|
||||
auto arg1_it = std::copy(dot_axis_positions.begin(),
|
||||
dot_axis_positions.end(),
|
||||
arg1_coord.begin());
|
||||
std::copy(
|
||||
arg1_projected_coord.begin(), arg1_projected_coord.end(), arg1_it);
|
||||
|
||||
// Multiply and add to the sum.
|
||||
if (is_quantized)
|
||||
{
|
||||
sum = sum + ((static_cast<ACCUMULATION>(
|
||||
arg0[arg0_transform.index(arg0_coord)]) -
|
||||
static_cast<ACCUMULATION>(*input0_zero_point)) *
|
||||
(static_cast<ACCUMULATION>(
|
||||
arg1[arg1_transform.index(arg1_coord)]) -
|
||||
static_cast<ACCUMULATION>(*input1_zero_point)));
|
||||
}
|
||||
else
|
||||
{
|
||||
sum = sum + (static_cast<ACCUMULATION>(
|
||||
arg0[arg0_transform.index(arg0_coord)]) *
|
||||
static_cast<ACCUMULATION>(
|
||||
arg1[arg1_transform.index(arg1_coord)]));
|
||||
}
|
||||
}
|
||||
|
||||
if (is_quantized)
|
||||
{
|
||||
float scale = *input0_scale * *input1_scale / *output_scale;
|
||||
// Write the sum back.
|
||||
out[out_index] =
|
||||
static_cast<OUTPUT>(std::round(static_cast<float>(sum) * scale)) +
|
||||
*output_zero_point;
|
||||
}
|
||||
else
|
||||
{
|
||||
out[out_index] = sum;
|
||||
}
|
||||
}
|
||||
std::fesetround(old_mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -225,7 +225,7 @@ namespace ngraph
|
||||
clip_activation(r_t, activation_f);
|
||||
|
||||
// calculate h_t
|
||||
vector<T> h_t(gate_shape_size);
|
||||
std::vector<T> h_t(gate_shape_size);
|
||||
if (linear_before_reset)
|
||||
{
|
||||
// ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh)
|
||||
@ -287,8 +287,8 @@ namespace ngraph
|
||||
}
|
||||
clip_activation(h_t, activation_g);
|
||||
// Ht = (1 - zt) (.) ht + zt (.) Ht-1
|
||||
vector<T> mul1(gate_shape_size);
|
||||
vector<T> mul2(gate_shape_size);
|
||||
std::vector<T> mul1(gate_shape_size);
|
||||
std::vector<T> mul2(gate_shape_size);
|
||||
T one[] = {1};
|
||||
reference::subtract(
|
||||
one, z_t.data(), mul1.data(), {1}, gate_shape, op::AutoBroadcastSpec::NUMPY);
|
||||
|
@ -178,9 +178,9 @@ namespace ngraph
|
||||
// ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Wbo + Rbo)
|
||||
clip_activation(X_W_fico[3], activation_f);
|
||||
|
||||
vector<T> mul1(gate_shape_size);
|
||||
vector<T> mul2(gate_shape_size);
|
||||
vector<T> Ct(gate_shape_size);
|
||||
std::vector<T> mul1(gate_shape_size);
|
||||
std::vector<T> mul2(gate_shape_size);
|
||||
std::vector<T> Ct(gate_shape_size);
|
||||
// ft (.) Ct-1
|
||||
reference::multiply(X_W_fico[0].data(),
|
||||
C,
|
||||
|
@ -21,23 +21,63 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ngraph/axis_vector.hpp"
|
||||
#include "ngraph/builder/autobroadcast.hpp"
|
||||
#include "ngraph/runtime/opt_kernel/reshape.hpp"
|
||||
#include "ngraph/runtime/reference/broadcast.hpp"
|
||||
#include "ngraph/runtime/reference/dot.hpp"
|
||||
#include "ngraph/shape_util.hpp"
|
||||
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ngraph
|
||||
{
|
||||
namespace runtime
|
||||
{
|
||||
namespace reference
|
||||
{
|
||||
namespace details
|
||||
{
|
||||
template <typename T>
|
||||
void dot(const T* arg0,
|
||||
const T* arg1,
|
||||
T* out,
|
||||
const Shape& arg0_shape,
|
||||
const Shape& arg1_shape,
|
||||
const Shape& out_shape)
|
||||
{
|
||||
std::fill(out, out + shape_size(out_shape), T{0});
|
||||
const size_t arg0_rank = arg0_shape.size();
|
||||
const size_t arg1_rank = arg1_shape.size();
|
||||
|
||||
// 2D inputs shapes are interpreted as {I, K} x {K, J}
|
||||
// If first input is 1D tensor of shape {K}, it is interpreted as {1, K}
|
||||
// If second input is 1D tensor of shape {K}, it is interpreted as {K, 1}
|
||||
const size_t I_dim = arg0_rank == 1 ? 1 : arg0_shape[arg0_rank - 2];
|
||||
const size_t J_dim = arg1_rank == 1 ? 1 : arg1_shape[arg1_rank - 1];
|
||||
const size_t K_dim =
|
||||
arg1_rank == 1 ? arg1_shape[arg1_rank - 1] : arg1_shape[arg1_rank - 2];
|
||||
|
||||
for (size_t i = 0; i < I_dim; ++i)
|
||||
{
|
||||
for (size_t k = 0; k < K_dim; ++k)
|
||||
{
|
||||
const size_t a_idx = i * K_dim + k;
|
||||
for (size_t j = 0; j < J_dim; ++j)
|
||||
{
|
||||
const size_t b_idx = k * J_dim + j;
|
||||
const size_t out_idx = i * J_dim + j;
|
||||
out[out_idx] += arg0[a_idx] * arg1[b_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> get_transpose_order(const Shape& input_shape)
|
||||
{
|
||||
size_t rank = input_shape.size();
|
||||
NGRAPH_CHECK(rank > 1, "Invalid input for transpose");
|
||||
std::vector<size_t> axes_order(rank);
|
||||
std::iota(axes_order.begin(), axes_order.end(), 0);
|
||||
std::swap(axes_order[rank - 1], axes_order[rank - 2]);
|
||||
return axes_order;
|
||||
}
|
||||
}
|
||||
/// \brief Reference kernel for matmul computation.
|
||||
///
|
||||
/// \tparam T Type of input and output tensors.
|
||||
@ -70,89 +110,59 @@ namespace ngraph
|
||||
// and perform broadcast if applicable
|
||||
// 4) Perform dot on the args or updated args and return result
|
||||
|
||||
size_t arg0_rank = arg0_shape.size();
|
||||
size_t arg1_rank = arg1_shape.size();
|
||||
size_t out_rank = out_shape.size();
|
||||
|
||||
// vector vars to hold pontential intermediate transpose,
|
||||
// broadcast result
|
||||
vector<T> arg0_transpose_vec;
|
||||
vector<T> arg1_transpose_vec;
|
||||
vector<T> arg0_broadcast_vec;
|
||||
vector<T> arg1_broadcast_vec;
|
||||
|
||||
// pointers to updated inputs
|
||||
const T* arg0_update = arg0;
|
||||
const T* arg1_update = arg1;
|
||||
const T* arg0_data = arg0;
|
||||
const T* arg1_data = arg1;
|
||||
|
||||
// vectors to hold pontential intermediate transpose,
|
||||
// broadcast result
|
||||
std::vector<T> arg0_new_data;
|
||||
std::vector<T> arg1_new_data;
|
||||
|
||||
// vars for updated inputs shapes
|
||||
Shape wip_arg0_shape = arg0_shape;
|
||||
Shape wip_arg1_shape = arg1_shape;
|
||||
Shape arg0_shape_tmp = arg0_shape;
|
||||
Shape arg1_shape_tmp = arg1_shape;
|
||||
|
||||
auto get_transpose_order = [](const Shape& input_shape) {
|
||||
size_t rank = input_shape.size();
|
||||
NGRAPH_CHECK(rank > 1, "Invalid input for transpose");
|
||||
vector<size_t> axes_order(rank);
|
||||
iota(axes_order.begin(), axes_order.end(), 0);
|
||||
swap(axes_order[rank - 1], axes_order[rank - 2]);
|
||||
return AxisVector{begin(axes_order), end(axes_order)};
|
||||
};
|
||||
|
||||
auto get_broadcast_axes = [](const Shape& marker_shape, const Shape& target_shape) {
|
||||
NGRAPH_CHECK(marker_shape.size() == target_shape.size(),
|
||||
"Incompatible input shapes");
|
||||
AxisSet broadcast_axes;
|
||||
for (size_t i = 0; i < marker_shape.size(); i++)
|
||||
{
|
||||
if (marker_shape[i] == 1 && target_shape[i] != 1)
|
||||
{
|
||||
broadcast_axes.insert(i);
|
||||
}
|
||||
}
|
||||
return broadcast_axes;
|
||||
};
|
||||
size_t arg0_rank = arg0_shape.size();
|
||||
size_t arg1_rank = arg1_shape.size();
|
||||
const size_t out_rank = out_shape.size();
|
||||
|
||||
// Perform transpose if requested
|
||||
if (transpose_arg0 && arg0_rank > 1)
|
||||
{
|
||||
arg0_transpose_vec.reserve(shape_size(arg0_shape));
|
||||
auto axis_vector = get_transpose_order(arg0_shape);
|
||||
swap(wip_arg0_shape[arg0_rank - 1], wip_arg0_shape[arg0_rank - 2]);
|
||||
opt_kernel::reshape(reinterpret_cast<const char*>(arg0),
|
||||
reinterpret_cast<char*>(arg0_transpose_vec.data()),
|
||||
std::vector<T> tmp(shape_size(arg0_shape));
|
||||
auto axis_vector = details::get_transpose_order(arg0_shape);
|
||||
std::swap(arg0_shape_tmp[arg0_rank - 1], arg0_shape_tmp[arg0_rank - 2]);
|
||||
opt_kernel::reshape(reinterpret_cast<const char*>(arg0_data),
|
||||
reinterpret_cast<char*>(tmp.data()),
|
||||
arg0_shape,
|
||||
axis_vector,
|
||||
wip_arg0_shape,
|
||||
arg0_shape_tmp,
|
||||
sizeof(T));
|
||||
|
||||
arg0_update = arg0_transpose_vec.data();
|
||||
arg0_new_data.swap(tmp);
|
||||
arg0_data = arg0_new_data.data();
|
||||
}
|
||||
|
||||
if (transpose_arg1 && arg1_rank > 1)
|
||||
{
|
||||
arg1_transpose_vec.reserve(shape_size(arg1_shape));
|
||||
auto axis_vector = get_transpose_order(arg1_shape);
|
||||
swap(wip_arg1_shape[arg1_rank - 1], wip_arg1_shape[arg1_rank - 2]);
|
||||
opt_kernel::reshape(reinterpret_cast<const char*>(arg1),
|
||||
reinterpret_cast<char*>(arg1_transpose_vec.data()),
|
||||
std::vector<T> tmp(shape_size(arg1_shape));
|
||||
auto axis_vector = details::get_transpose_order(arg1_shape);
|
||||
std::swap(arg1_shape_tmp[arg1_rank - 1], arg1_shape_tmp[arg1_rank - 2]);
|
||||
opt_kernel::reshape(reinterpret_cast<const char*>(arg1_data),
|
||||
reinterpret_cast<char*>(tmp.data()),
|
||||
arg1_shape,
|
||||
axis_vector,
|
||||
wip_arg1_shape,
|
||||
arg1_shape_tmp,
|
||||
sizeof(T));
|
||||
|
||||
arg1_update = arg1_transpose_vec.data();
|
||||
arg1_new_data.swap(tmp);
|
||||
arg1_data = arg1_new_data.data();
|
||||
}
|
||||
|
||||
// Inputs are 2D and below, perform dot directly
|
||||
if (arg0_rank <= 2 && arg1_rank <= 2)
|
||||
{
|
||||
dot(arg0_update,
|
||||
arg1_update,
|
||||
out,
|
||||
wip_arg0_shape,
|
||||
wip_arg1_shape,
|
||||
out_shape,
|
||||
1);
|
||||
details::dot(
|
||||
arg0_data, arg1_data, out, arg0_shape_tmp, arg1_shape_tmp, out_shape);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -163,80 +173,73 @@ namespace ngraph
|
||||
|
||||
if (arg0_rank > 2 && arg1_rank > 2)
|
||||
{
|
||||
const auto& broadcast_shapes = builder::get_numpy_broadcast_shapes(
|
||||
{Shape{begin(wip_arg0_shape), next(end(wip_arg0_shape), -2)},
|
||||
Shape{begin(wip_arg1_shape), next(end(wip_arg1_shape), -2)}});
|
||||
|
||||
Shape arg0_br_target_shape = broadcast_shapes.first;
|
||||
Shape arg1_br_target_shape = broadcast_shapes.first;
|
||||
Shape arg0_br_marker_shape = broadcast_shapes.second.at(0);
|
||||
Shape arg1_br_marker_shape = broadcast_shapes.second.at(1);
|
||||
// Align input batches to the output shape
|
||||
Shape arg0_br_target_shape(out_shape.begin(), out_shape.end() - 2);
|
||||
Shape arg1_br_target_shape(out_shape.begin(), out_shape.end() - 2);
|
||||
|
||||
arg0_br_target_shape.insert(
|
||||
end(arg0_br_target_shape),
|
||||
next(begin(wip_arg0_shape), wip_arg0_shape.size() - 2),
|
||||
end(wip_arg0_shape));
|
||||
end(arg0_br_target_shape), end(arg0_shape_tmp) - 2, end(arg0_shape_tmp));
|
||||
arg1_br_target_shape.insert(
|
||||
end(arg1_br_target_shape),
|
||||
next(begin(wip_arg1_shape), wip_arg1_shape.size() - 2),
|
||||
end(wip_arg1_shape));
|
||||
end(arg1_br_target_shape), end(arg1_shape_tmp) - 2, end(arg1_shape_tmp));
|
||||
|
||||
arg0_br_marker_shape.insert(
|
||||
end(arg0_br_marker_shape),
|
||||
next(begin(wip_arg0_shape), wip_arg0_shape.size() - 2),
|
||||
end(wip_arg0_shape));
|
||||
arg1_br_marker_shape.insert(
|
||||
end(arg1_br_marker_shape),
|
||||
next(begin(wip_arg1_shape), wip_arg1_shape.size() - 2),
|
||||
end(wip_arg1_shape));
|
||||
|
||||
if (arg0_br_target_shape != wip_arg0_shape)
|
||||
std::vector<size_t> broadcast_axes(out_shape.size() - 2);
|
||||
std::iota(broadcast_axes.begin(), broadcast_axes.end(), 0);
|
||||
if (!broadcast_axes.empty())
|
||||
{
|
||||
auto broadcast_axes =
|
||||
get_broadcast_axes(arg0_br_marker_shape, arg0_br_target_shape);
|
||||
if (!broadcast_axes.empty())
|
||||
// Usual rules of the broadcasting are applied for batch dimensions.
|
||||
// If ranks of input arguments are different,
|
||||
// the smaller tensor is unsqueezed from the left side of the shape
|
||||
// by necessary number of axes to make both shapes of the same rank.
|
||||
// Broadcast all batches (last two dimensions represent matrix),
|
||||
// expand dim with value 1 to bigger dim if dimensions are not equal.
|
||||
if (arg0_br_target_shape != arg0_shape_tmp)
|
||||
{
|
||||
arg0_broadcast_vec.reserve(shape_size(arg0_br_target_shape));
|
||||
broadcast(reinterpret_cast<const char*>(arg0_update),
|
||||
reinterpret_cast<char*>(arg0_broadcast_vec.data()),
|
||||
wip_arg0_shape,
|
||||
std::vector<T> tmp(shape_size(arg0_br_target_shape));
|
||||
broadcast(reinterpret_cast<const char*>(arg0_data),
|
||||
reinterpret_cast<char*>(tmp.data()),
|
||||
arg0_shape_tmp,
|
||||
arg0_br_target_shape,
|
||||
broadcast_axes,
|
||||
sizeof(T));
|
||||
|
||||
arg0_update = arg0_broadcast_vec.data();
|
||||
wip_arg0_shape = arg0_br_target_shape;
|
||||
arg0_rank = wip_arg0_shape.size();
|
||||
arg0_shape_tmp = arg0_br_target_shape;
|
||||
arg0_rank = arg0_shape_tmp.size();
|
||||
arg0_new_data.swap(tmp);
|
||||
arg0_data = arg0_new_data.data();
|
||||
}
|
||||
}
|
||||
|
||||
if (arg1_br_target_shape != wip_arg1_shape)
|
||||
{
|
||||
auto broadcast_axes =
|
||||
get_broadcast_axes(arg1_br_marker_shape, arg1_br_target_shape);
|
||||
if (!broadcast_axes.empty())
|
||||
if (arg1_br_target_shape != arg1_shape_tmp)
|
||||
{
|
||||
arg1_broadcast_vec.reserve(shape_size(arg1_br_target_shape));
|
||||
broadcast(reinterpret_cast<const char*>(arg1_update),
|
||||
reinterpret_cast<char*>(arg1_broadcast_vec.data()),
|
||||
wip_arg1_shape,
|
||||
std::vector<T> tmp(shape_size(arg1_br_target_shape));
|
||||
broadcast(reinterpret_cast<const char*>(arg1_data),
|
||||
reinterpret_cast<char*>(tmp.data()),
|
||||
arg1_shape_tmp,
|
||||
arg1_br_target_shape,
|
||||
broadcast_axes,
|
||||
sizeof(T));
|
||||
|
||||
arg1_update = arg1_broadcast_vec.data();
|
||||
wip_arg1_shape = arg1_br_target_shape;
|
||||
arg1_rank = wip_arg1_shape.size();
|
||||
arg1_shape_tmp = arg1_br_target_shape;
|
||||
arg1_rank = arg1_shape_tmp.size();
|
||||
arg1_new_data.swap(tmp);
|
||||
arg1_data = arg1_new_data.data();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Perform batched dot
|
||||
|
||||
size_t output_batch_size = 1;
|
||||
const Shape dot_arg0_shape = (arg0_rank > 2) ? Shape{arg0_shape_tmp[arg0_rank - 2],
|
||||
arg0_shape_tmp[arg0_rank - 1]}
|
||||
: arg0_shape_tmp;
|
||||
const Shape dot_arg1_shape = (arg1_rank > 2) ? Shape{arg1_shape_tmp[arg1_rank - 2],
|
||||
arg1_shape_tmp[arg1_rank - 1]}
|
||||
: arg1_shape_tmp;
|
||||
const Shape dot_output_shape =
|
||||
(out_rank > 2 && arg0_rank > 1 && arg1_rank > 1)
|
||||
? Shape{out_shape[out_rank - 2], out_shape[out_rank - 1]}
|
||||
: Shape{out_shape[out_rank - 1]};
|
||||
|
||||
// Calculate number of batches
|
||||
if (out_rank < 3)
|
||||
size_t output_batch_size = 1;
|
||||
if (out_rank <= 2)
|
||||
{
|
||||
// Output is {batch_size, dot_result}, i.e.,
|
||||
// arg 0 shape {2}, arg1 shape {3, 2, 1}, output shape {3, 1}
|
||||
@ -244,38 +247,24 @@ namespace ngraph
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < (out_rank - 2); i++)
|
||||
for (size_t i = 0; i < (out_rank - dot_output_shape.size()); i++)
|
||||
{
|
||||
output_batch_size *= out_shape[i];
|
||||
}
|
||||
}
|
||||
|
||||
Shape dot_arg0_shape = (arg0_rank > 2) ? Shape{wip_arg0_shape[arg0_rank - 2],
|
||||
wip_arg0_shape[arg0_rank - 1]}
|
||||
: wip_arg0_shape;
|
||||
Shape dot_arg1_shape = (arg1_rank > 2) ? Shape{wip_arg1_shape[arg1_rank - 2],
|
||||
wip_arg1_shape[arg1_rank - 1]}
|
||||
: wip_arg1_shape;
|
||||
Shape dot_output_shape =
|
||||
(out_rank > 2) ? Shape{out_shape[out_rank - 2], out_shape[out_rank - 1]}
|
||||
: Shape{out_shape[out_rank - 1]};
|
||||
|
||||
const size_t arg0_offset = (arg0_rank > 2) ? shape_size(dot_arg0_shape) : 0;
|
||||
const size_t arg1_offset = (arg1_rank > 2) ? shape_size(dot_arg1_shape) : 0;
|
||||
const size_t output_offset = shape_size(dot_output_shape);
|
||||
for (size_t i = 0; i < output_batch_size; i++)
|
||||
{
|
||||
dot(arg0_update + i * arg0_offset,
|
||||
arg1_update + i * arg1_offset,
|
||||
out + i * output_offset,
|
||||
dot_arg0_shape,
|
||||
dot_arg1_shape,
|
||||
dot_output_shape,
|
||||
1);
|
||||
details::dot(arg0_data + i * arg0_offset,
|
||||
arg1_data + i * arg1_offset,
|
||||
out + i * output_offset,
|
||||
dot_arg0_shape,
|
||||
dot_arg1_shape,
|
||||
dot_output_shape);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
|
@ -1008,3 +1008,121 @@ NGRAPH_TEST(${BACKEND_NAME}, matmul_3_x_1_1_3_false_true_const)
|
||||
test_case.add_expected_output<float>(shape_out, expected_result);
|
||||
test_case.run();
|
||||
}
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, matmul_2_2_1_3_x_3_false_false_param)
|
||||
{
|
||||
Shape shape_a{2, 2, 1, 3};
|
||||
Shape shape_b{3};
|
||||
Shape shape_out{2, 2, 1};
|
||||
|
||||
bool transpose_a = false;
|
||||
bool transpose_b = false;
|
||||
|
||||
std::vector<float> inputs_a(shape_size(shape_a));
|
||||
std::iota(inputs_a.begin(), inputs_a.end(), 0);
|
||||
|
||||
std::vector<float> inputs_b(shape_size(shape_b));
|
||||
std::iota(inputs_b.begin(), inputs_b.end(), 0);
|
||||
|
||||
std::vector<float> expected_result{5, 14, 23, 32};
|
||||
|
||||
auto A = make_shared<op::Parameter>(element::f32, shape_a);
|
||||
auto B = make_shared<op::Parameter>(element::f32, shape_b);
|
||||
auto matmul = make_shared<op::MatMul>(A, B, transpose_a, transpose_b);
|
||||
auto f = make_shared<Function>(matmul, ParameterVector{A, B});
|
||||
|
||||
auto test_case = test::TestCase<TestEngine>(f);
|
||||
test_case.add_input<float>(inputs_a);
|
||||
test_case.add_input<float>(inputs_b);
|
||||
|
||||
test_case.add_expected_output<float>(shape_out, expected_result);
|
||||
test_case.run();
|
||||
}
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, matmul_2_2_1_3_x_3_false_false_const)
|
||||
{
|
||||
Shape shape_a{2, 2, 1, 3};
|
||||
Shape shape_b{3};
|
||||
Shape shape_out{2, 2, 1};
|
||||
|
||||
bool transpose_a = false;
|
||||
bool transpose_b = false;
|
||||
|
||||
std::vector<float> inputs_a(shape_size(shape_a));
|
||||
std::iota(inputs_a.begin(), inputs_a.end(), 0);
|
||||
|
||||
std::vector<float> inputs_b(shape_size(shape_b));
|
||||
std::iota(inputs_b.begin(), inputs_b.end(), 0);
|
||||
|
||||
std::vector<float> expected_result{5, 14, 23, 32};
|
||||
|
||||
auto A = make_shared<op::Parameter>(element::f32, shape_a);
|
||||
auto B = make_shared<op::Constant>(element::f32, shape_b, inputs_b);
|
||||
auto matmul = make_shared<op::MatMul>(A, B, transpose_a, transpose_b);
|
||||
auto f = make_shared<Function>(matmul, ParameterVector{A});
|
||||
|
||||
auto test_case = test::TestCase<TestEngine>(f);
|
||||
test_case.add_input<float>(inputs_a);
|
||||
|
||||
test_case.add_expected_output<float>(shape_out, expected_result);
|
||||
test_case.run();
|
||||
}
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, matmul_3_x_2_2_3_1_false_false_param)
|
||||
{
|
||||
Shape shape_a{3};
|
||||
Shape shape_b{2, 2, 3, 1};
|
||||
Shape shape_out{2, 2, 1};
|
||||
|
||||
bool transpose_a = false;
|
||||
bool transpose_b = false;
|
||||
|
||||
std::vector<float> inputs_a(shape_size(shape_a));
|
||||
std::iota(inputs_a.begin(), inputs_a.end(), 0);
|
||||
|
||||
std::vector<float> inputs_b(shape_size(shape_b));
|
||||
std::iota(inputs_b.begin(), inputs_b.end(), 0);
|
||||
|
||||
std::vector<float> expected_result{5, 14, 23, 32};
|
||||
|
||||
auto A = make_shared<op::Parameter>(element::f32, shape_a);
|
||||
auto B = make_shared<op::Parameter>(element::f32, shape_b);
|
||||
auto matmul = make_shared<op::MatMul>(A, B, transpose_a, transpose_b);
|
||||
auto f = make_shared<Function>(matmul, ParameterVector{A, B});
|
||||
|
||||
auto test_case = test::TestCase<TestEngine>(f);
|
||||
test_case.add_input<float>(inputs_a);
|
||||
test_case.add_input<float>(inputs_b);
|
||||
|
||||
test_case.add_expected_output<float>(shape_out, expected_result);
|
||||
test_case.run();
|
||||
}
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, matmul_3_x_2_2_3_1_false_false_const)
|
||||
{
|
||||
Shape shape_a{3};
|
||||
Shape shape_b{2, 2, 3, 1};
|
||||
Shape shape_out{2, 2, 1};
|
||||
|
||||
bool transpose_a = false;
|
||||
bool transpose_b = false;
|
||||
|
||||
std::vector<float> inputs_a(shape_size(shape_a));
|
||||
std::iota(inputs_a.begin(), inputs_a.end(), 0);
|
||||
|
||||
std::vector<float> inputs_b(shape_size(shape_b));
|
||||
std::iota(inputs_b.begin(), inputs_b.end(), 0);
|
||||
|
||||
std::vector<float> expected_result{5, 14, 23, 32};
|
||||
|
||||
auto A = make_shared<op::Parameter>(element::f32, shape_a);
|
||||
auto B = make_shared<op::Constant>(element::f32, shape_b, inputs_b);
|
||||
auto matmul = make_shared<op::MatMul>(A, B, transpose_a, transpose_b);
|
||||
auto f = make_shared<Function>(matmul, ParameterVector{A});
|
||||
|
||||
auto test_case = test::TestCase<TestEngine>(f);
|
||||
test_case.add_input<float>(inputs_a);
|
||||
|
||||
test_case.add_expected_output<float>(shape_out, expected_result);
|
||||
test_case.run();
|
||||
}
|
||||
|
@ -1839,7 +1839,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations_con
|
||||
0.f,
|
||||
0.f,
|
||||
});
|
||||
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3);
|
||||
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 5);
|
||||
}
|
||||
|
||||
NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations)
|
||||
@ -1887,7 +1887,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations)
|
||||
0.f,
|
||||
0.f,
|
||||
});
|
||||
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3);
|
||||
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 5);
|
||||
}
|
||||
|
||||
NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_mixed_seq_len_const)
|
||||
@ -1983,7 +1983,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_mixed_seq_len)
|
||||
-0.18203181f,
|
||||
0.9996245f,
|
||||
});
|
||||
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3);
|
||||
test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 4);
|
||||
}
|
||||
|
||||
NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_mixed_seq_len_const)
|
||||
|
Loading…
Reference in New Issue
Block a user