[Core] Conv reference optimization. (#17303)

This commit is contained in:
Nikolay Shchegolev
2023-05-17 15:54:33 +04:00
committed by GitHub
parent db58355fad
commit a6b043b1ca
8 changed files with 331 additions and 120 deletions

View File

@@ -78,7 +78,7 @@ namespace v4 {
/// \ingroup ov_ops_cpp_api
class OPENVINO_API Proposal : public op::v0::Proposal {
public:
OPENVINO_OP("Proposal", "opset4", op::Op);
OPENVINO_OP("Proposal", "opset4", op::v0::Proposal);
Proposal() = default;
/// \brief Constructs a Proposal operation
///

View File

@@ -4,31 +4,21 @@
#pragma once
#include <cassert>
#include <cfenv>
#include <cmath>
#include <functional>
#include <numeric>
#include <future>
#include "ngraph/axis_vector.hpp"
#include "ngraph/coordinate_transform.hpp"
#include "ngraph/runtime/reference/concat.hpp"
#include "ngraph/runtime/reference/helpers.hpp"
#include "ngraph/runtime/reference/reverse.hpp"
#include "ngraph/runtime/reference/split.hpp"
#include "ngraph/util.hpp"
namespace ngraph {
namespace runtime {
namespace reference {
namespace {
constexpr size_t in_batch_axis = 0;
constexpr size_t in_channel_axis = 1;
constexpr size_t filter_out_ch_axis = 0;
constexpr size_t filter_in_ch_axis = 1;
constexpr size_t out_batch_axis = 0;
constexpr size_t out_channel_axis = 1;
constexpr size_t spatial_axis = 2;
struct ConvolutionParams {
std::vector<int64_t> strides;
@@ -54,65 +44,164 @@ constexpr inline bool in_range(Int val, std::pair<Int, Int> range) noexcept {
return val >= range.first && val < range.second;
}
template <typename T>
void convolve_2D_channels(const ConvolutionParams& p,
const T* batch,
const Shape& batch_shape,
const T* filter,
const Shape& filter_shape,
T* out) {
const int dilation_y = static_cast<int>(p.dilation[0]);
const int dilation_x = static_cast<int>(p.dilation[1]);
const int pad_begin_y = static_cast<int>(p.pads_begin[0]);
const int pad_begin_x = static_cast<int>(p.pads_begin[1]);
const int stride_y = static_cast<int>(p.strides[0]);
const int stride_x = static_cast<int>(p.strides[1]);
const int input_size_y = static_cast<int>(batch_shape[1]);
const int input_size_x = static_cast<int>(batch_shape[2]);
const int input_size_yx = input_size_y * input_size_x;
const size_t f_channels = filter_shape[0];
const int filter_size_y = static_cast<int>(filter_shape[1]);
const int filter_size_x = static_cast<int>(filter_shape[2]);
const int dilated_filter_size_y = static_cast<int>(filter_size_y + (filter_size_y - 1) * (dilation_y - 1));
const int dilated_filter_size_x = static_cast<int>(filter_size_x + (filter_size_x - 1) * (dilation_x - 1));
const int i_y_lim = static_cast<int>(p.pads_end[0] + input_size_y - dilated_filter_size_y + p.output_padding[0]);
const int i_x_lim = static_cast<int>(p.pads_end[1] + input_size_x - dilated_filter_size_x + p.output_padding[1]);
const int f_y_increment = dilation_y * input_size_x;
const int f_x_increment = dilation_x;
const int f_y_block = filter_size_y * f_y_increment;
const int f_x_block = filter_size_x * f_x_increment;
for (int i_y = -pad_begin_y; i_y <= i_y_lim; i_y += stride_y) {
const int i_y_m = i_y * input_size_x;
const int f_y_up_lim = f_y_block + i_y_m;
for (int i_x = -pad_begin_x; i_x <= i_x_lim; i_x += stride_x) {
const int f_x_up_lim = f_x_block + i_x;
auto input_channel = batch;
auto filter_channel = filter;
T sum = 0;
size_t filter_channels_count = f_channels;
while (filter_channels_count--) {
for (int f_y_i = i_y_m; f_y_i < f_y_up_lim; f_y_i += f_y_increment) {
if (f_y_i < 0 || f_y_i >= input_size_yx) {
filter_channel += filter_size_x;
continue;
}
const int x_up_bound = input_size_x + f_y_i;
for (int f_x_i = f_y_i + i_x; f_x_i < f_x_up_lim + f_y_i;
f_x_i += f_x_increment, filter_channel++) {
if (f_x_i < f_y_i || f_x_i >= x_up_bound) {
continue;
}
sum += input_channel[f_x_i] * filter_channel[0];
}
}
input_channel += input_size_yx;
}
*out = sum;
++out;
}
}
}
template <typename T>
void convolve_3D_channels(const ConvolutionParams& p,
const T* batch,
const Shape& batch_shape,
const T* filter,
const Shape& filter_shape,
T*& out) {
T* out) {
const int dilation_z = static_cast<int>(p.dilation[0]);
const int dilation_y = static_cast<int>(p.dilation[1]);
const int dilation_x = static_cast<int>(p.dilation[2]);
const int pad_begin_z = static_cast<int>(p.pads_begin[0]);
const int pad_begin_y = static_cast<int>(p.pads_begin[1]);
const int pad_begin_x = static_cast<int>(p.pads_begin[2]);
const int stride_z = static_cast<int>(p.strides[0]);
const int stride_y = static_cast<int>(p.strides[1]);
const int stride_x = static_cast<int>(p.strides[2]);
const int input_size_z = static_cast<int>(batch_shape[1]);
const int input_size_y = static_cast<int>(batch_shape[2]);
const int input_size_x = static_cast<int>(batch_shape[3]);
const int input_size_yx = input_size_y * input_size_x;
const int input_size_zyx = input_size_z * input_size_yx;
const size_t f_channels = filter_shape[0];
const int filter_size_z = static_cast<int>(filter_shape[1]);
const int filter_size_y = static_cast<int>(filter_shape[2]);
const int filter_size_x = static_cast<int>(filter_shape[3]);
const int dilated_filter_size_z = static_cast<int>(filter_size_z + (filter_size_z - 1) * (p.dilation[0] - 1));
const int dilated_filter_size_y = static_cast<int>(filter_size_y + (filter_size_y - 1) * (p.dilation[1] - 1));
const int dilated_filter_size_x = static_cast<int>(filter_size_x + (filter_size_x - 1) * (p.dilation[2] - 1));
const int filter_size_yx = filter_size_y * filter_size_x;
const Shape input_channel_shape(++batch_shape.begin(), batch_shape.end());
const size_t input_channel_size = shape_size(input_channel_shape);
const Shape filter_channel_shape(++filter_shape.begin(), filter_shape.end());
const size_t filter_channel_size = shape_size(filter_channel_shape);
const int dilated_filter_size_z = static_cast<int>(filter_size_z + (filter_size_z - 1) * (dilation_z - 1));
const int dilated_filter_size_y = static_cast<int>(filter_size_y + (filter_size_y - 1) * (dilation_y - 1));
const int dilated_filter_size_x = static_cast<int>(filter_size_x + (filter_size_x - 1) * (dilation_x - 1));
for (int i_z = static_cast<int>(-p.pads_begin[0]);
i_z <= static_cast<int>(p.pads_end[0] + input_size_z - dilated_filter_size_z + p.output_padding[0]);
i_z += static_cast<int>(p.strides[0])) {
for (int i_y = static_cast<int>(-p.pads_begin[1]);
i_y <= static_cast<int>(p.pads_end[1] + input_size_y - dilated_filter_size_y + p.output_padding[1]);
i_y += static_cast<int>(p.strides[1])) {
for (int i_x = static_cast<int>(-p.pads_begin[2]);
i_x <= static_cast<int>(p.pads_end[2] + input_size_x - dilated_filter_size_x + p.output_padding[2]);
i_x += static_cast<int>(p.strides[2])) {
const int i_z_lim = static_cast<int>(p.pads_end[0] + input_size_z - dilated_filter_size_z + p.output_padding[0]);
const int i_y_lim = static_cast<int>(p.pads_end[1] + input_size_y - dilated_filter_size_y + p.output_padding[1]);
const int i_x_lim = static_cast<int>(p.pads_end[2] + input_size_x - dilated_filter_size_x + p.output_padding[2]);
const int f_z_increment = dilation_z * input_size_yx;
const int f_y_increment = dilation_y * input_size_x;
const int f_x_increment = dilation_x;
const int f_z_block = filter_size_z * f_z_increment;
const int f_y_block = filter_size_y * f_y_increment;
const int f_x_block = filter_size_x * f_x_increment;
for (int i_z = -pad_begin_z; i_z <= i_z_lim; i_z += stride_z) {
const int s_z_shift = i_z * input_size_yx;
const int f_z_up_bound = f_z_block + s_z_shift;
for (int i_y = -pad_begin_y; i_y <= i_y_lim; i_y += stride_y) {
const int i_y_m = i_y * input_size_x;
for (int i_x = -pad_begin_x; i_x <= i_x_lim; i_x += stride_x) {
auto input_channel = batch;
auto filter_channel = filter;
T sum = 0;
size_t filter_channels_count = filter_shape[0];
size_t filter_channels_count = f_channels;
while (filter_channels_count--) {
for (int f_z = 0; f_z < filter_size_z; ++f_z) {
for (int f_y = 0; f_y < filter_size_y; ++f_y) {
for (int f_x = 0; f_x < filter_size_x; ++f_x) {
int rel_i_z = i_z + (f_z * static_cast<int>(p.dilation[0]));
int rel_i_y = i_y + (f_y * static_cast<int>(p.dilation[1]));
int rel_i_x = i_x + (f_x * static_cast<int>(p.dilation[2]));
bool padding =
!(in_range(rel_i_x, {0, input_size_x}) && in_range(rel_i_y, {0, input_size_y}) &&
in_range(rel_i_z, {0, input_size_z}));
if (padding)
for (int f_z_i = s_z_shift; f_z_i < f_z_up_bound; f_z_i += f_z_increment) {
if (f_z_i < 0 || f_z_i >= input_size_zyx) {
filter_channel += filter_size_yx;
continue;
}
const int y_up_bound = f_z_i + input_size_yx;
const int y_shift = f_z_i + i_y_m;
for (int f_y_i = y_shift; f_y_i < f_y_block + y_shift; f_y_i += f_y_increment) {
if (f_y_i < f_z_i || f_y_i >= y_up_bound) {
filter_channel += filter_size_x;
continue;
}
const int x_up_bound = input_size_x + f_y_i;
for (int f_x_i = f_y_i + i_x; f_x_i < f_x_block + f_y_i + i_x;
f_x_i += f_x_increment, filter_channel++) {
if (f_x_i < f_y_i || f_x_i >= x_up_bound) {
continue;
}
int f_buf_idx = (f_z * filter_size_y * filter_size_x) + (f_y * filter_size_x) + f_x;
int i_buf_idx =
(rel_i_z * input_size_y * input_size_x) + (rel_i_y * input_size_x) + rel_i_x;
sum += static_cast<T>(input_channel[i_buf_idx]) *
static_cast<T>(filter_channel[f_buf_idx]);
sum += input_channel[f_x_i] * filter_channel[0];
}
}
}
input_channel += input_channel_size;
filter_channel += filter_channel_size;
input_channel += input_size_zyx;
}
*out = sum;
++out;
@@ -121,6 +210,20 @@ void convolve_3D_channels(const ConvolutionParams& p,
}
}
inline void extend_to_2D(ConvolutionParams& p, Shape& in_shape, Shape& filter_shape) {
const int spatial_rank = static_cast<int>(in_shape.size() - 2);
if (spatial_rank < 2) {
int missing_dims = 2 - spatial_rank;
p.dilation.insert(std::prev(p.dilation.end(), spatial_rank), missing_dims, 1);
p.strides.insert(std::prev(p.strides.end(), spatial_rank), missing_dims, 1);
p.pads_begin.insert(std::prev(p.pads_begin.end(), spatial_rank), missing_dims, 0);
p.pads_end.insert(std::prev(p.pads_end.end(), spatial_rank), missing_dims, 0);
p.output_padding.insert(std::prev(p.output_padding.end(), spatial_rank), missing_dims, 0);
in_shape.insert(std::next(in_shape.end(), -spatial_rank), missing_dims, 1);
filter_shape.insert(std::prev(filter_shape.end(), spatial_rank), missing_dims, 1);
}
}
inline void extend_to_3D(ConvolutionParams& p, Shape& in_shape, Shape& filter_shape) {
int spatial_rank = static_cast<int>(in_shape.size() - 2);
if (spatial_rank < 3) {
@@ -209,9 +312,7 @@ void convolution(const T* in,
const Strides& strides,
const Strides& dilations,
const CoordinateDiff& pads_begin,
const CoordinateDiff& pads_end)
{
const CoordinateDiff& pads_end) {
validate_convolution_parameters(in_shape, f_shape, out_shape, strides, dilations, pads_begin, pads_end);
// here we are converting all param types to int's to avoid arithmetic issues
@@ -222,26 +323,83 @@ void convolution(const T* in,
// convolution implementation to convolve also in 1D & 2D case
Shape input_shape{in_shape};
Shape filters_shape{f_shape};
if (in_shape.size() < 5) {
extend_to_3D(params, input_shape, filters_shape);
if (in_shape.size() < 4) {
extend_to_2D(params, input_shape, filters_shape);
}
const size_t batches_count = input_shape[in_batch_axis];
const Shape batch_shape(++input_shape.begin(), input_shape.end());
const size_t batch_size = shape_size(batch_shape);
const size_t out_spatial_size =
std::accumulate(out_shape.begin() + 2, out_shape.end(), size_t(1), std::multiplies<size_t>());
const size_t filters_count = filters_shape[filter_out_ch_axis];
const Shape filter_shape(++filters_shape.begin(), filters_shape.end());
const size_t filter_size = shape_size(filter_shape);
auto batch = in;
for (size_t batch_idx = 0; batch_idx < batches_count; ++batch_idx) {
auto filter = f;
for (size_t f_idx = 0; f_idx < filters_count; ++f_idx) {
convolve_3D_channels(params, batch, batch_shape, filter, filter_shape, out);
filter += filter_size;
const size_t work_amount = batches_count * filters_count;
void (*conv_channels)(const ConvolutionParams&, const T*, const Shape&, const T*, const Shape&, T*);
if (in_shape.size() == 5) {
conv_channels = &convolve_3D_channels;
} else {
conv_channels = &convolve_2D_channels;
}
auto ncores = std::thread::hardware_concurrency() / 2;
if (ncores == 0) {
ncores = 1;
}
std::vector<std::future<void>> futures(ncores);
auto split_work = [](const size_t& n, const size_t& nthr, const size_t ithr, size_t& n_start, size_t& n_end) {
if (nthr <= 1 || n == 0) {
n_start = 0;
n_end = n;
} else {
auto n1 = (n + nthr - 1) / nthr;
auto n2 = n1 - 1;
auto T1 = n - n2 * nthr;
n_end = ithr < T1 ? n1 : n2;
n_start = ithr <= T1 ? ithr * n1 : T1 * n1 + (ithr - T1) * n2;
}
batch += batch_size;
n_end += n_start;
};
auto ker_callback = [&](int nthr, int ithr) {
size_t start = 0, end = 0;
split_work(work_amount, nthr, ithr, start, end);
if (start >= end) {
return;
}
size_t batch_idx = start / filters_count;
size_t c_idx = start % filters_count;
auto in_data = in + batch_size * batch_idx;
auto filter = f + filter_size * c_idx;
auto out_data = out + out_spatial_size * filters_count * batch_idx + out_spatial_size * c_idx;
for (; batch_idx < batches_count; ++batch_idx) {
for (; c_idx < filters_count && start < end; c_idx++, start++) {
conv_channels(params, in_data, batch_shape, filter, filter_shape, out_data);
filter += filter_size;
out_data += out_spatial_size;
}
if (start >= end) {
break;
}
filter = f;
c_idx = 0;
in_data += batch_size;
}
};
for (size_t ithr = 0; ithr < ncores; ithr++) {
futures[ithr] = std::async(ker_callback, ncores, ithr);
}
for (size_t ithr = 0; ithr < ncores; ithr++) {
futures[ithr].get();
}
}
} // namespace reference

View File

@@ -11,6 +11,7 @@
#include "ngraph/axis_vector.hpp"
#include "ngraph/runtime/reference/convolution.hpp"
#include "ngraph/runtime/reference/reverse.hpp"
#include "ngraph/util.hpp"
namespace ngraph {
@@ -171,8 +172,8 @@ void convolution_backprop_impl(const T* in,
// convolution implementation to convolve also in 1D & 2D case
Shape input_shape{in_shape};
Shape filters_shape{f_shape};
if (in_shape.size() < 5) {
extend_to_3D(params, input_shape, filters_shape);
if (in_shape.size() < 4) {
extend_to_2D(params, input_shape, filters_shape);
}
for (size_t i = 0; i < input_shape.size() - 2; ++i) {
@@ -186,24 +187,26 @@ void convolution_backprop_impl(const T* in,
}
// convert output shape to 3D, contains only dimensions
Shape out_shape_3d{out_shape.begin() + 2, out_shape.end()};
Shape out_shape_2d{out_shape.begin() + 2, out_shape.end()};
const size_t out_spatial_size =
std::accumulate(out_shape.begin() + 2, out_shape.end(), size_t(1), std::multiplies<size_t>());
int out_shape_rank = static_cast<int>(out_shape.size()) - 2;
if (out_shape_rank < 3) {
int missing_dims = 3 - out_shape_rank;
out_shape_3d.insert(std::prev(out_shape_3d.end(), out_shape_rank), missing_dims, 1);
if (out_shape_rank < 2) {
int missing_dims = 2 - out_shape_rank;
out_shape_2d.insert(std::prev(out_shape_2d.end(), out_shape_rank), missing_dims, 1);
}
// modify params.pads_end when output_shape was provided in ctor in order to
// calculate expected number of output elements
for (size_t i = 0; i < out_shape_3d.size(); i++) {
if (out_shape_3d[i] > 1) {
for (size_t i = 0; i < out_shape_2d.size(); i++) {
if (out_shape_2d[i] > 1) {
// expected_dim = (in - 1)* strides + filter - 2*padding + out_padding
// strides is already applied (through 0's extension in input)
// padding = pads_begin + pads_end, formula below is using
// params.pad_begin/params.pads_end:
const size_t expected_dim =
out_shape_3d[i] - ((input_shape[i + 2] - 1) - filters_shape[i + 2] + params.pads_begin[i] +
out_shape_2d[i] - ((input_shape[i + 2] - 1) - filters_shape[i + 2] + params.pads_begin[i] +
params.pads_end[i] + 2 + params.output_padding[i]);
params.pads_end[i] += expected_dim;
}
@@ -217,15 +220,69 @@ void convolution_backprop_impl(const T* in,
Shape batch_shape(++input_shape.begin(), input_shape.end());
const size_t batch_size = shape_size(batch_shape);
auto batch = in;
const size_t work_amount = batches_count * filters_count;
for (size_t batch_idx = 0; batch_idx < batches_count; ++batch_idx) {
auto filter = f;
for (size_t f_idx = 0; f_idx < filters_count; ++f_idx) {
convolve_3D_channels(params, batch, batch_shape, filter, filter_shape, out);
filter += filter_size;
void (*conv_channels)(const ConvolutionParams&, const T*, const Shape&, const T*, const Shape&, T*);
if (in_shape.size() == 5) {
conv_channels = &convolve_3D_channels;
} else {
conv_channels = &convolve_2D_channels;
}
auto ncores = std::thread::hardware_concurrency() / 2;
if (ncores == 0) {
ncores = 1;
}
std::vector<std::future<void>> futures(ncores);
auto split_work = [](const size_t& n, const size_t& nthr, const size_t ithr, size_t& n_start, size_t& n_end) {
if (nthr <= 1 || n == 0) {
n_start = 0;
n_end = n;
} else {
auto n1 = (n + nthr - 1) / nthr;
auto n2 = n1 - 1;
auto T1 = n - n2 * nthr;
n_end = ithr < T1 ? n1 : n2;
n_start = ithr <= T1 ? ithr * n1 : T1 * n1 + (ithr - T1) * n2;
}
batch += batch_size;
n_end += n_start;
};
auto ker_callback = [&](int nthr, int ithr) {
size_t start = 0, end = 0;
split_work(work_amount, nthr, ithr, start, end);
if (start >= end) {
return;
}
size_t batch_idx = start / filters_count;
size_t c_idx = start % filters_count;
auto in_data = in + batch_size * batch_idx;
auto filter = f + filter_size * c_idx;
auto out_data = out + out_spatial_size * filters_count * batch_idx + out_spatial_size * c_idx;
for (; batch_idx < batches_count; ++batch_idx) {
for (; c_idx < filters_count && start < end; c_idx++, start++) {
conv_channels(params, in_data, batch_shape, filter, filter_shape, out_data);
filter += filter_size;
out_data += out_spatial_size;
}
if (start >= end) {
break;
}
filter = f;
c_idx = 0;
in_data += batch_size;
}
};
for (size_t ithr = 0; ithr < ncores; ithr++) {
futures[ithr] = std::async(ker_callback, ncores, ithr);
}
for (size_t ithr = 0; ithr < ncores; ithr++) {
futures[ithr].get();
}
}
@@ -305,7 +362,7 @@ void convolution_backprop_in(const T* delta_in,
if (stride_dim >= 2) {
extend_with_zeros(stride, in_shape, delta_in, conv_input_shape, extended_input);
std::fill(conv_stride.begin(), conv_stride.end(), 1);
conv_input_data = &extended_input[0];
conv_input_data = extended_input.data();
}
const size_t dilation_dim =
@@ -317,7 +374,7 @@ void convolution_backprop_in(const T* delta_in,
conv_filter_shape,
extended_filter);
std::fill(conv_filter_dilation.begin(), conv_filter_dilation.end(), 1);
conv_filter_data = &extended_filter[0];
conv_filter_data = extended_filter.data();
}
convolution_backprop_impl(conv_input_data,

View File

@@ -114,47 +114,56 @@ void convolve_2D_channels(const ConvolutionParams& p,
const T* mask,
const Shape& mask_shape,
T* out,
size_t group_idx,
int64_t groups,
int64_t deformable_groups,
bool bilinear_interpolation_pad) {
const size_t group_idx,
const int64_t groups,
const int64_t deformable_groups,
const bool bilinear_interpolation_pad) {
const int input_size_y = static_cast<int>(batch_shape[1]);
const int input_size_x = static_cast<int>(batch_shape[2]);
const int filter_size_y = static_cast<int>(filter_shape[1]);
const int filter_size_x = static_cast<int>(filter_shape[2]);
const int dilated_filter_size_y = filter_size_y + (filter_size_y - 1) * (static_cast<int>(p.dilation[0]) - 1);
const int dilated_filter_size_x = filter_size_x + (filter_size_x - 1) * (static_cast<int>(p.dilation[1]) - 1);
const int dilation_y = static_cast<int>(p.dilation[0]);
const int dilation_x = static_cast<int>(p.dilation[1]);
const int dilated_filter_size_y = filter_size_y + (filter_size_y - 1) * (dilation_y - 1);
const int dilated_filter_size_x = filter_size_x + (filter_size_x - 1) * (dilation_x - 1);
const int i_y_lim = static_cast<int>(p.pads_end[0] + input_size_y - dilated_filter_size_y);
const int i_x_lim = static_cast<int>(p.pads_end[1] + input_size_x - dilated_filter_size_x);
const int input_channel_size = static_cast<int>(shape_size(shape_reduce(batch_shape)));
const int filter_channel_size = static_cast<int>(shape_size(shape_reduce(filter_shape)));
const int offsets_size = static_cast<int>(shape_size(offset_shape));
const int offsets_spatial_size = static_cast<int>(shape_size(shape_reduce(offset_shape)));
const int filter_channels_count = static_cast<int>(filter_shape[0]);
const int mask_size = static_cast<int>(shape_size(mask_shape));
const int mask_spatial_size = static_cast<int>(shape_size(shape_reduce(mask_shape)));
const int group_idx_m = filter_channels_count * static_cast<int>(group_idx);
const int group_idx_d = filter_channels_count * groups / deformable_groups;
const int f_shift_inc = 2 * offsets_spatial_size;
int out_idx = 0;
for (int i_y = static_cast<int>(-p.pads_begin[0]);
i_y <= static_cast<int>(p.pads_end[0] + input_size_y - dilated_filter_size_y);
i_y += static_cast<int>(p.strides[0])) {
for (int i_x = static_cast<int>(-p.pads_begin[1]);
i_x <= static_cast<int>(p.pads_end[1] + input_size_x - dilated_filter_size_x);
i_x += static_cast<int>(p.strides[1])) {
for (int i_y = static_cast<int>(-p.pads_begin[0]); i_y <= i_y_lim; i_y += static_cast<int>(p.strides[0])) {
for (int i_x = static_cast<int>(-p.pads_begin[1]); i_x <= i_x_lim; i_x += static_cast<int>(p.strides[1])) {
auto input_channel = batch;
auto filter_channel = filter;
T sum = 0;
for (int fc = 0; fc < filter_channels_count; fc++) {
auto deformable_group_idx =
(filter_channels_count * group_idx + fc) / (filter_channels_count * groups / deformable_groups);
for (int f_y = 0; f_y < filter_size_y; ++f_y) {
for (int f_x = 0; f_x < filter_size_x; ++f_x) {
int f_buf_idx = (f_y * filter_size_x) + f_x;
T y_offset = offsets[deformable_group_idx * offsets_size +
f_buf_idx * 2 * offsets_spatial_size + out_idx];
T x_offset = offsets[deformable_group_idx * offsets_size +
(f_buf_idx * 2 + 1) * offsets_spatial_size + out_idx];
T rel_i_y = static_cast<T>(i_y + (f_y * p.dilation[0]) + y_offset);
T rel_i_x = static_cast<T>(i_x + (f_x * p.dilation[1]) + x_offset);
const int deformable_group_idx = (group_idx_m + fc) / group_idx_d;
int f_y_shift = deformable_group_idx * offsets_size + out_idx;
int f_x_shift = f_y_shift + offsets_spatial_size;
int f_mask_shift = deformable_group_idx * mask_size + out_idx;
int i_y_dil = i_y;
for (int f_y = 0; f_y < filter_size_y; ++f_y, i_y_dil += dilation_y) {
int i_x_dil = i_x;
for (int f_x = 0; f_x < filter_size_x; ++f_x,
f_y_shift += f_shift_inc,
f_x_shift += f_shift_inc,
f_mask_shift += mask_spatial_size,
i_x_dil += dilation_x,
filter_channel++) {
T rel_i_y = static_cast<T>(i_y_dil + offsets[f_y_shift]);
T rel_i_x = static_cast<T>(i_x_dil + offsets[f_x_shift]);
bool padding;
if (bilinear_interpolation_pad) {
@@ -168,19 +177,17 @@ void convolve_2D_channels(const ConvolutionParams& p,
if (padding)
continue;
T mask_scalar =
mask[deformable_group_idx * mask_size + f_buf_idx * mask_spatial_size + out_idx];
T mask_scalar = mask[f_mask_shift];
sum += static_cast<T>(bilinear_interpolation(input_channel,
static_cast<float>(rel_i_x),
static_cast<float>(rel_i_y),
input_size_x,
input_size_y,
bilinear_interpolation_pad)) *
filter_channel[f_buf_idx] * mask_scalar;
filter_channel[0] * mask_scalar;
}
}
input_channel += input_channel_size;
filter_channel += filter_channel_size;
}
out[out_idx++] = sum;
}
@@ -206,9 +213,7 @@ void deformable_convolution(const T* in,
const CoordinateDiff& pads_end,
const int64_t groups,
const int64_t deformable_groups,
const bool bilinear_interpolation_pad)
{
const bool bilinear_interpolation_pad) {
using namespace def_conv_impl;
validate_deformable_convolution_params(in_shape,

View File

@@ -5,7 +5,7 @@
#pragma once
#include "ngraph/runtime/reference/convolution.hpp"
#include "ngraph/util.hpp"
#include "ngraph/runtime/reference/helpers.hpp"
namespace {
constexpr size_t filter_group_axis = 0;

View File

@@ -4,7 +4,6 @@
#pragma once
#include "ngraph/runtime/reference/convolution_backprop_data.hpp"
#include "ngraph/runtime/reference/group_convolution.hpp"
#include "ngraph/util.hpp"

View File

@@ -99,12 +99,6 @@ protected:
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hiddenSize/*, activations, {}, {}, clip, linearBeforeReset*/);
function = makeNgraphFunction(netPrecision, params, augruCellOp, "AUGRUCell");
auto filePrefix = CommonTestUtils::generateTestFilePrefix();
std::string xmlFileName = filePrefix + "_AUGRUCell.xml";
std::string binFileName = filePrefix + "_AUGRUCell.bin";
ov::pass::Serialize serializer(xmlFileName, binFileName);
serializer.run_on_model(function);
std::cout << "Saved subgraph IR." << std::endl;
}
};

View File

@@ -16,8 +16,6 @@
#include "ie_core.hpp"
#include "ie_precision.hpp"
#include "ngraph/opsets/opset1.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "common_test_utils/common_utils.hpp"