[GPU] Add reorder from i32 to f32 for max-pooling/conv/fc which doesn't support i32 (#12144)

This commit is contained in:
Kelvin Choi
2022-07-20 22:14:22 +09:00
committed by GitHub
parent fdae95a769
commit 3a72200f92
3 changed files with 74 additions and 7 deletions

View File

@@ -13,6 +13,7 @@
#include "binary_convolution_inst.h"
#include "mvn_inst.h"
#include "to_string_utils.h"
#include "pooling_inst.h"
#include "reshape_inst.h"
#include <vector>
@@ -581,6 +582,34 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
}
}
{
// Change input data type of conv node from i32 to f32
auto& input = conv_node.input();
auto input_layout = input.get_output_layout();
if (input_layout.data_type == data_types::i32) {
auto new_layout = input_layout;
new_layout.data_type = data_types::f32;
auto new_input = rf.get_reorder(input.id(), input_layout, new_layout);
if (new_input.first) {
p.add_intermediate(new_input.first, conv_node, 0, !new_input.second);
p.get_or_create(new_input.first).recalc_output_layout(true);
}
}
// Change weights type i32 to f32
auto& weights = conv_node.weights();
auto weights_layout = weights.get_output_layout();
if (weights_layout.data_type == data_types::i32) {
auto new_layout = weights_layout;
new_layout.data_type = data_types::f32;
auto new_input = rf.get_reorder(weights.id(), weights_layout, new_layout);
if (new_input.first) {
p.add_intermediate(new_input.first, conv_node, 1, !new_input.second);
p.get_or_create(new_input.first).recalc_output_layout(false);
}
}
}
// For supporting optimized onednn first conv, the input format from prev reorder to this conv is changed to a recommended format by onednn.
auto& input = conv_node.input();
auto input_layout = input.get_output_layout();
@@ -713,6 +742,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
p.add_intermediate(new_input.first, fc_node, 0);
}
}
// Change input data of fully-connected node from bx to bf
if (format::is_simple_data_format(input_layout.format) && weights.is_constant() && input_layout.format.dimension() == 4 &&
input_layout.size.feature[0] == 1 && input_layout.size.spatial[0] != 1 && input_layout.size.spatial[1] == 1) {
@@ -723,16 +753,42 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
auto& new_reorder_node = p.get_or_create(new_reshape);
p.add_intermediate(new_reorder_node, fc_node, 0);
}
// Change weights type i32 to f32
auto weights_layout = weights.get_output_layout();
if (weights_layout.data_type == data_types::i32) {
auto new_layout = weights_layout;
new_layout.data_type = data_types::f32;
auto new_input = rf.get_reorder(weights.id(), weights_layout, new_layout);
if (new_input.first) {
p.add_intermediate(new_input.first, fc_node, 1);
}
}
};
const auto reorder_input_pooling = [&p, &rf](typed_program_node<pooling>& pooling_node) {
// Change input data type of pooling node from i32 to f32
auto& input = pooling_node.input();
auto input_layout = input.get_output_layout();
if (pooling_node.get_primitive()->mode == pooling_mode::max && input_layout.data_type == data_types::i32) {
auto new_layout = input_layout;
new_layout.data_type = data_types::f32;
auto new_input = rf.get_reorder(input.id(), input_layout, new_layout);
if (new_input.first) {
p.add_intermediate(new_input.first, pooling_node, 0);
}
}
};
for (auto& prim : p.get_processing_order()) {
program_helpers::do_for_types<detection_output, binary_convolution, deconvolution, convolution, fully_connected>(
program_helpers::do_for_types<detection_output, binary_convolution, deconvolution, convolution, fully_connected, pooling>(
*prim,
reorder_input_detection_output,
reorder_input_binary_convolution,
reorder_input_and_weights_deconvolution,
reorder_convolution,
reorder_input_fully_connected);
reorder_input_fully_connected,
reorder_input_pooling);
}
for (auto n : p.get_processing_order()) {

View File

@@ -36,9 +36,14 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node) {
}
}
if (node.has_fused_primitives()) {
output_type = node.get_fused_output_layout().data_type;
// pooling doesn't support i32 data type
// FIXME: Someday delete this, when pooling supports i32 output.
if (desc->mode == pooling_mode::max && output_type == data_types::i32) {
output_type = data_types::f32;
}
}
if (!desc->argmax.empty())

View File

@@ -17,6 +17,12 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP16
};
const std::vector<InferenceEngine::Precision> netPrecisions_fp_i32 = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
InferenceEngine::Precision::I32
};
const std::vector<std::vector<size_t >> kernels = {{3, 3},
{3, 5}};
const std::vector<std::vector<size_t >> strides = {{1, 1},
@@ -50,7 +56,7 @@ const auto maxPool_ExplicitPad_FloorRounding_Params = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_FloorRounding, PoolingLayerTest,
::testing::Combine(
maxPool_ExplicitPad_FloorRounding_Params,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(netPrecisions_fp_i32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
@@ -75,7 +81,7 @@ const auto maxPool_ExplicitPad_CeilRounding_Params = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_CeilRounding, PoolingLayerTest,
::testing::Combine(
maxPool_ExplicitPad_CeilRounding_Params,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(netPrecisions_fp_i32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
@@ -181,7 +187,7 @@ const auto maxPool8_ExplicitPad_FloorRounding_Params = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool8_ExplicitPad_FloorRounding, MaxPoolingV8LayerTest,
::testing::Combine(
maxPool8_ExplicitPad_FloorRounding_Params,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(netPrecisions_fp_i32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
@@ -206,7 +212,7 @@ const auto maxPool8_ExplicitPad_CeilRounding_Params = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool8_ExplicitPad_CeilRounding, MaxPoolingV8LayerTest,
::testing::Combine(
maxPool8_ExplicitPad_CeilRounding_Params,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(netPrecisions_fp_i32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),