[IE CLDNN] Fix input feature padding handling in dw conv fsv16 kernel (#1217)

This commit is contained in:
Jedrzej Hajduczenia 2020-07-05 17:57:15 +02:00 committed by GitHub
parent fee4a01b26
commit fd9ae15fdd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 100 additions and 1 deletions

View File

@ -57,7 +57,6 @@ KERNEL(convolution_depthwise)(
const uint input_fs_pad_before = INPUT0_PAD_BEFORE_FEATURE_NUM / FEATURE_SLICE_SIZE;
const uint input_offset = b * input_b_pitch +
input_fs_pad_before * input_fs_pitch +
INPUT0_PAD_BEFORE_SIZE_Y * input_y_pitch +
INPUT0_PAD_BEFORE_SIZE_X * input_x_pitch +
(f_block + input_fs_pad_before) * input_fs_pitch;

View File

@ -6620,6 +6620,106 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16)
}
}
TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_padding) {
// Input: 1x32x2x1
// Input padding above: 0x16x0x0
// Input padding below: 0x64x0x0
// Groups: 32
// Filter: 32x1x1x1x1
// Output: 1x32x2x1
auto num_groups = 32;
auto input_size = tensor{ 1, num_groups, 1, 2 };
auto weights_size = tensor(group(num_groups), batch(1), feature(1), spatial(1, 1));
auto bias_size = tensor{ 1, num_groups, 1, 1 };
auto stride = tensor{ 1, 1, 1, 1 };
auto input_offset = tensor{ 0, 0, 0, 0 };
auto dilation = tensor{ 1, 1, 1, 1 };
auto output_size = tensor{ 1, num_groups, 1, 2};
auto input_lower_sizes = { 0, 16, 0, 0 };
auto input_upper_sizes = { 0, 64, 0, 0 };
const auto& engine = get_test_engine();
auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_size });
auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, weights_size});
auto bias = memory::allocate(engine, { data_types::f32, format::bfyx, bias_size});
set_values<float>(input, {
3, -1, -1, -1, 2, -2, 2, 2, 0, 1, -5, 4, -1, 4, 1, 0,
-1, 4, -4, 3, -4, 4, 0, 1, -4, 3, 1, 0, -3, -1, 3, 0,
4, -2, 0, -2, -1, -4, 1, -4, 3, 2, 2, 2, 3, 4, 0, -5,
3, -5, 2, -3, 0, 3, -3, -4, -1, -4, 2, 4, 3, 4, -5, -5
});
set_values<float>(weights, {
-4, -1, -4, -2, 1, 2, 3, -4, -5, -2, -2, 1, 2, 2, -5, 2,
-5, 4, -3, 2, -1, -3, 0, -1, -5, -3, -5, -4, 1, -4, 4, 2
});
set_values<float>(bias, {
-2, 3, -4, -4, -4, 4, -3, -1, -3, -5, 3, 3, 1, 4, -4, 1,
-1, -2, -2, -1, 2, -1, -4, 2, -4, -1, 3, 0, 0, 4, -3, -4
});
std::vector<float> output_vec = {
-14, 2, 4, 4, -12, 4, -8, -8, -4, -3, -6, 12, -6, 9, -5, -1,
2, -23, 3, -11, 11, -5, 3, 4, -7, 7, 6, 4, 11, 1, 7, 1,
-21, 9, -2, -10, 1, 10, 1, -9, -1, 0, -7, -7, -4, -4, 2, 7,
-19, 21, -7, 8, 3, -12, 12, 16, -1, -4, -4, -12, 9, 13, -14, -14
};
// reorder input to fsv16 format and introduce feature padding
padding input_padding = padding(input_lower_sizes, input_upper_sizes);
layout reordered_input_layout = layout(data_types::f32, format::b_fs_yx_fsv16, input_size, input_padding);
topology topology(
input_layout("input", input.get_layout()),
reorder("input_reordered", "input", reordered_input_layout),
data("weights", weights),
data("bias", bias),
convolution("conv", "input_reordered", { "weights" }, { "bias" }, num_groups, stride, input_offset, dilation, output_size, data_types::f32),
reorder("out", "conv", format::bfyx, data_types::f32));
build_options options;
options.set_option(build_option::optimize_data(true));
implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
options.set_option(build_option::force_implementations({ {"conv", conv_impl} }));
network network(engine, topology, options);
network.set_input_data("input", input);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "out");
auto output_memory = outputs.at("out").get_memory();
auto output_layout = output_memory.get_layout();
auto output_ptr = output_memory.pointer<float>();
int y_size = output_layout.size.spatial[1];
int x_size = output_layout.size.spatial[0];
int f_size = output_layout.size.feature[0];
int b_size = output_layout.size.batch[0];
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(y_size, output_size.spatial[1]);
EXPECT_EQ(x_size, output_size.spatial[0]);
EXPECT_EQ(f_size, output_size.feature[0]);
EXPECT_EQ(b_size, output_size.batch[0]);
for (int b = 0; b < b_size; ++b) {
for (int f = 0; f < f_size; ++f) {
for (int y = 0; y < y_size; ++y) {
for (int x = 0; x < x_size; ++x) {
EXPECT_EQ(
output_vec[b * f_size * y_size * x_size + f * y_size * x_size + y * x_size + x],
output_ptr[b * f_size * y_size * x_size + f * y_size * x_size + y * x_size + x]
);
}
}
}
}
}
struct convolution_depthwise_gpu_bfyx : public convolution_depthwise_gpu {};
TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx)