[GPU] Added shape agnostic Pad kernel implementation (#16160)
Signed-off-by: Andrew Park <andrew.park@intel.com>
This commit is contained in:
committed by
GitHub
parent
3d52fc843a
commit
b7ff3a1d64
@@ -24,9 +24,9 @@ struct border_impl : typed_primitive_impl_ocl<border> {
|
||||
return make_unique<border_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<border>();
|
||||
auto params = get_default_params<kernel_selector::border_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::border_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::border_optional_params>(impl_param.get_program());
|
||||
|
||||
size_t rank = impl_param.get_input_layout(0).get_rank();
|
||||
@@ -36,7 +36,7 @@ struct border_impl : typed_primitive_impl_ocl<border> {
|
||||
std::vector<int32_t> end(primitive->pads_end.begin(), primitive->pads_end.end());
|
||||
|
||||
size_t input_offset = 1;
|
||||
if (!(primitive->non_constant_input_mask & border::PAD_NON_CONST_INPUT::BEGIN) && !params.has_dynamic_tensors()) {
|
||||
if (!(primitive->non_constant_input_mask & border::PAD_NON_CONST_INPUT::BEGIN)) {
|
||||
params.begin_type = kernel_selector::base_params::ArgType::Constant;
|
||||
|
||||
std::vector<int64_t> begin_vec;
|
||||
@@ -55,7 +55,7 @@ struct border_impl : typed_primitive_impl_ocl<border> {
|
||||
input_offset += 1;
|
||||
}
|
||||
|
||||
if (!(primitive->non_constant_input_mask & border::PAD_NON_CONST_INPUT::END) && !params.has_dynamic_tensors()) {
|
||||
if (!(primitive->non_constant_input_mask & border::PAD_NON_CONST_INPUT::END)) {
|
||||
params.end_type = kernel_selector::base_params::ArgType::Constant;
|
||||
|
||||
std::vector<int64_t> end_vec;
|
||||
@@ -102,108 +102,55 @@ struct border_impl : typed_primitive_impl_ocl<border> {
|
||||
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_border_impl::attach_border_impl() {
|
||||
implementation_map<border>::add(impl_types::ocl, typed_primitive_impl_ocl<border>::create<border_impl>, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i32, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::yxfb),
|
||||
auto types = {data_types::f32, data_types::f16, data_types::i32, data_types::i8, data_types::u8};
|
||||
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
auto formats = {
|
||||
format::yxfb,
|
||||
format::bfyx,
|
||||
format::byxf,
|
||||
format::bfzyx,
|
||||
format::bfwzyx,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::b_fs_zyx_fsv16,
|
||||
format::bs_fs_yx_bsv4_fsv2,
|
||||
format::bs_fs_yx_bsv4_fsv4,
|
||||
format::bs_fs_yx_bsv8_fsv2,
|
||||
format::bs_fs_yx_bsv8_fsv4,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
format::bs_fs_zyx_bsv16_fsv16
|
||||
};
|
||||
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i32, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
implementation_map<border>::add(impl_types::ocl,
|
||||
shape_types::static_shape,
|
||||
typed_primitive_impl_ocl<border>::create<border_impl>,
|
||||
types,
|
||||
formats);
|
||||
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
auto dyn_formats = {
|
||||
format::bfyx,
|
||||
format::bfzyx,
|
||||
format::bfwzyx
|
||||
};
|
||||
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv2),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv16),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
});
|
||||
implementation_map<border>::add(impl_types::ocl,
|
||||
shape_types::dynamic_shape,
|
||||
typed_primitive_impl_ocl<border>::create<border_impl>,
|
||||
types,
|
||||
dyn_formats);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "include/fetch_utils.cl"
|
||||
|
||||
KERNEL(border_gpu_ref)(
|
||||
OPTIONAL_SHAPE_INFO_ARG
|
||||
const __global INPUT0_TYPE* input,
|
||||
#ifdef BEGIN_TYPE
|
||||
const __global BEGIN_TYPE* begin,
|
||||
@@ -154,7 +155,7 @@ KERNEL(border_gpu_ref)(
|
||||
const uint in_f = out_f - blt_sf;
|
||||
const uint in_b = out_b - blt_sb;
|
||||
|
||||
const uint in_pos = FUNC_CALL(get_input_index)(in_b, in_f, in_w, in_z, in_y, in_x);
|
||||
const uint in_pos = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR in_b, in_f, in_w, in_z, in_y, in_x);
|
||||
in_val = input[in_pos];
|
||||
}
|
||||
#elif defined BORDER_TYPE_EDGE
|
||||
@@ -165,7 +166,7 @@ KERNEL(border_gpu_ref)(
|
||||
const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? 0 : in_sf - 1);
|
||||
const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? 0 : in_sb - 1);
|
||||
|
||||
const uint in_pos = FUNC_CALL(get_input_index)(in_b, in_f, in_w, in_z, in_y, in_x);
|
||||
const uint in_pos = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR in_b, in_f, in_w, in_z, in_y, in_x);
|
||||
INPUT0_TYPE in_val = input[in_pos];
|
||||
#elif defined BORDER_TYPE_MIRROR
|
||||
const uint in_x = (out_x >= blt_sx & out_x < in_lx) ? out_x - blt_sx : (out_x < blt_sx ? blt_sx - 1 - out_x : in_sx + in_lx - 1 - out_x);
|
||||
@@ -175,7 +176,7 @@ KERNEL(border_gpu_ref)(
|
||||
const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? blt_sf - 1 - out_f : in_sf + in_lf - 1 - out_f);
|
||||
const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? blt_sb - 1 - out_b : in_sb + in_lb - 1 - out_b);
|
||||
|
||||
const uint in_pos = FUNC_CALL(get_input_index)(in_b, in_f, in_w, in_z, in_y, in_x);
|
||||
const uint in_pos = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR in_b, in_f, in_w, in_z, in_y, in_x);
|
||||
INPUT0_TYPE in_val = input[in_pos];
|
||||
#elif defined BORDER_TYPE_MIRROR_101
|
||||
const uint in_x = (out_x >= blt_sx & out_x < in_lx) ? out_x - blt_sx : (out_x < blt_sx ? blt_sx - out_x : in_sx + in_lx - 2 - out_x);
|
||||
@@ -185,12 +186,12 @@ KERNEL(border_gpu_ref)(
|
||||
const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? blt_sf - out_f : in_sf + in_lf - 2 - out_f);
|
||||
const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? blt_sb - out_b : in_sb + in_lb - 2 - out_b);
|
||||
|
||||
const uint in_pos = FUNC_CALL(get_input_index)(in_b, in_f, in_w, in_z, in_y, in_x);
|
||||
const uint in_pos = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR in_b, in_f, in_w, in_z, in_y, in_x);
|
||||
INPUT0_TYPE in_val = input[in_pos];
|
||||
#else
|
||||
#error Unsupported border type.
|
||||
#endif
|
||||
|
||||
const uint out_pos = FUNC_CALL(get_output_index)(out_b, out_f, out_w, out_z, out_y, out_x);
|
||||
const uint out_pos = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR out_b, out_f, out_w, out_z, out_y, out_x);
|
||||
output[out_pos] = in_val;
|
||||
}
|
||||
|
||||
@@ -46,14 +46,16 @@ BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params&
|
||||
const auto& output = params.outputs[0];
|
||||
|
||||
DispatchData dispatchData;
|
||||
auto in_layout = params.inputs[0].GetLayout();
|
||||
auto out_layout = params.outputs[0].GetLayout();
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws = {{ Tensor::DataChannelName::X, Tensor::DataChannelName::Z },
|
||||
{ Tensor::DataChannelName::Y, Tensor::DataChannelName::W },
|
||||
{ Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH }};
|
||||
if (!params.has_dynamic_tensors()) {
|
||||
auto in_layout = params.inputs[0].GetLayout();
|
||||
auto out_layout = params.outputs[0].GetLayout();
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws = {{ Tensor::DataChannelName::X, Tensor::DataChannelName::Z },
|
||||
{ Tensor::DataChannelName::Y, Tensor::DataChannelName::W },
|
||||
{ Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH }};
|
||||
|
||||
dispatchData.gws = { output.X().v * output.Z().v, output.Y().v * output.W().v, output.Batch().v * output.Feature().v };
|
||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo, in_layout, out_layout, dims_by_gws);
|
||||
dispatchData.gws = { output.X().v * output.Z().v, output.Y().v * output.W().v, output.Batch().v * output.Feature().v };
|
||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo, in_layout, out_layout, dims_by_gws);
|
||||
}
|
||||
|
||||
return dispatchData;
|
||||
}
|
||||
@@ -67,16 +69,32 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
|
||||
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
KernelData k_data = KernelData::Default<border_params>(params);
|
||||
border_params& newParams = *static_cast<border_params*>(k_data.params.get());
|
||||
k_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const border_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
};
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
|
||||
"", false, false, static_cast<int>(newParams.inputs.size()),
|
||||
0, 1, newParams.has_dynamic_tensors());
|
||||
FillCLKernelData(kernel,
|
||||
dispatchData,
|
||||
params.engineInfo,
|
||||
kernelName,
|
||||
jit,
|
||||
entry_point,
|
||||
EXE_MODE_DEFAULT,
|
||||
false,
|
||||
false,
|
||||
(uint32_t)prim_params.inputs.size(),
|
||||
GetFusedPrimitiveInputsCount(params),
|
||||
1,
|
||||
prim_params.outputs[0].is_dynamic());
|
||||
|
||||
return {k_data};
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableDifferentTypes();
|
||||
k.EnableDynamicShapesSupport();
|
||||
return k;
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/border.hpp>
|
||||
|
||||
#include <border_inst.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <array>
|
||||
|
||||
@@ -1551,3 +1553,81 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant_dynamic) {
|
||||
constexpr auto in_size_b = 2;
|
||||
constexpr auto in_size_f = 3;
|
||||
constexpr auto in_size_y = 5;
|
||||
constexpr auto in_size_x = 4;
|
||||
|
||||
constexpr auto blt_size_b = 2;
|
||||
constexpr auto blt_size_f = 1;
|
||||
constexpr auto blt_size_y = 2;
|
||||
constexpr auto blt_size_x = 3;
|
||||
|
||||
constexpr auto brb_size_b = 1;
|
||||
constexpr auto brb_size_f = 2;
|
||||
constexpr auto brb_size_y = 3;
|
||||
constexpr auto brb_size_x = 4;
|
||||
|
||||
constexpr auto out_size_b = in_size_b + blt_size_b + brb_size_b;
|
||||
constexpr auto out_size_f = in_size_f + blt_size_f + brb_size_f;
|
||||
constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
|
||||
constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
auto input_layout_dynamic = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
|
||||
auto input_layout_static = layout{ov::PartialShape{in_size_b, in_size_f, in_size_y, in_size_x}, data_types::f32, format::bfyx};
|
||||
auto input = engine.allocate_memory(input_layout_static);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input_layout_dynamic));
|
||||
topology.add(border("border",
|
||||
{input_info("input")}, 0,
|
||||
ov::CoordinateDiff{blt_size_b, blt_size_f, blt_size_y, blt_size_x},
|
||||
ov::CoordinateDiff{brb_size_b, brb_size_f, brb_size_y, brb_size_x},
|
||||
ov::op::PadMode::CONSTANT,
|
||||
0.0f));
|
||||
|
||||
const std::vector<size_t> sizes{ static_cast<std::size_t>(in_size_b), static_cast<std::size_t>(in_size_f),
|
||||
static_cast<std::size_t>(in_size_y), static_cast<std::size_t>(in_size_x)};
|
||||
std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
|
||||
set_values(input, input_data);
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto inst = network.get_primitive("border");
|
||||
auto impl = inst->get_impl();
|
||||
ASSERT_TRUE(impl != nullptr);
|
||||
ASSERT_TRUE(impl->is_dynamic());
|
||||
|
||||
auto outputs = network.execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "border");
|
||||
|
||||
auto output = outputs.at("border").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
for (auto b = 0; b < out_size_b; ++b) { // B
|
||||
for (auto f = 0; f < out_size_f; ++f) { // F
|
||||
for (auto y = 0; y < out_size_y; ++y) { // Y
|
||||
for (auto x = 0; x < out_size_x; ++x) { // X
|
||||
auto output_off = ((b * out_size_f + f) * out_size_y + y) * out_size_x + x; // BFYX
|
||||
|
||||
if (b < blt_size_b || b >= out_size_b - brb_size_b ||
|
||||
f < blt_size_f || f >= out_size_f - brb_size_f ||
|
||||
y < blt_size_y || y >= out_size_y - brb_size_y ||
|
||||
x < blt_size_x || x >= out_size_x - brb_size_x) {
|
||||
ASSERT_EQ(output_ptr[output_off], 0.0f);
|
||||
} else {
|
||||
auto input_off = (((b - blt_size_b) * in_size_f + f - blt_size_f) * in_size_y + y - blt_size_y) * in_size_x + x - blt_size_x; // BFYX
|
||||
ASSERT_EQ(output_ptr[output_off], input_data[input_off]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user