[GPU] Update Reduce Op to use dynamic input (#13998)

This commit is contained in:
Kelvin Choi 2022-11-28 10:13:58 +09:00 committed by GitHub
parent e0c026cdfc
commit 003e25b6fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 390 additions and 6 deletions

View File

@ -83,7 +83,7 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
auto params = get_default_params<kernel_selector::reduce_params>(impl_param); auto params = get_default_params<kernel_selector::reduce_params>(impl_param);
auto optional_params = get_default_optional_params<kernel_selector::reduce_optional_params>(impl_param.get_program()); auto optional_params = get_default_optional_params<kernel_selector::reduce_optional_params>(impl_param.get_program());
params.reduceAxes = convert_axes(primitive->axes, impl_param.get_output_layout().get_rank()); params.reduceAxes = convert_axes(primitive->axes, impl_param.input_layouts[0].get_rank());
params.keepDims = primitive->keep_dims; params.keepDims = primitive->keep_dims;
params.reduceMode = cldnn_2_reduce_mode(primitive->mode); params.reduceMode = cldnn_2_reduce_mode(primitive->mode);

View File

@ -97,7 +97,9 @@ static bool is_reduce_blocked_axes(reduce_node const& node) {
auto num_spatial = format::spatial_num(node.get_output_layout().format); auto num_spatial = format::spatial_num(node.get_output_layout().format);
auto dims = node.get_output_layout().format.dimension(); auto dims = node.get_output_layout().format.dimension();
if ((count(reduce_axes.begin(), reduce_axes.end(), 1) > 0 ||
if (input_layout.is_static() &&
(count(reduce_axes.begin(), reduce_axes.end(), 1) > 0 ||
(count(reduce_axes.begin(), reduce_axes.end(), 0) > 0 && input_layout.batch() > 1))) { (count(reduce_axes.begin(), reduce_axes.end(), 0) > 0 && input_layout.batch() > 1))) {
for (size_t idx_spatial = dims - num_spatial ; idx_spatial < dims ; idx_spatial++) { for (size_t idx_spatial = dims - num_spatial ; idx_spatial < dims ; idx_spatial++) {
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0)
@ -1777,6 +1779,18 @@ format layout_optimizer::get_preferred_format(program_node& node) {
expected = format::b_fs_yx_fsv32; expected = format::b_fs_yx_fsv32;
} }
} }
} else if (node.is_type<reduce>()) {
auto& reduce_node = node.as<reduce>();
auto input_layout = reduce_node.input().get_output_layout();
// TODO: Under the currnet implement, dynamic shape doesn't support blocked format. Will support in future.
if (!use_onednn_impls && input_layout.is_dynamic()) {
if (input_layout.format.dimension() == 6)
expected = format::bfwzyx;
else if (input_layout.format.dimension() == 5)
expected = format::bfzyx;
else if (input_layout.format.dimension() == 4)
expected = format::bfyx;
}
} }
return expected; return expected;

View File

@ -36,14 +36,19 @@ KERNEL(reduce_ref)(
#if INPUT0_DIMS == 4 #if INPUT0_DIMS == 4
const uint w = 0; const uint w = 0;
const uint z = 0; const uint z = 0;
const uint out_idx = OUTPUT_GET_INDEX(b, f, y, x);
#elif INPUT0_DIMS == 5 #elif INPUT0_DIMS == 5
const uint z = wz % OUTPUT_SIZE_Z; const uint z = wz % OUTPUT_SIZE_Z;
const uint w = 0; const uint w = 0;
const uint out_idx = OUTPUT_GET_INDEX(b, f, z, y, x);
#elif INPUT0_DIMS == 6 #elif INPUT0_DIMS == 6
const uint z = wz % OUTPUT_SIZE_Z; const uint z = wz % OUTPUT_SIZE_Z;
const uint w = wz / OUTPUT_SIZE_Z; const uint w = wz / OUTPUT_SIZE_Z;
#endif
#if OUTPUT_DIMS == 4
const uint out_idx = OUTPUT_GET_INDEX(b, f, y, x);
#elif OUTPUT_DIMS == 5
const uint out_idx = OUTPUT_GET_INDEX(b, f, z, y, x);
#elif OUTPUT_DIMS == 6
const uint out_idx = OUTPUT_GET_INDEX(b, f, w, z, y, x); const uint out_idx = OUTPUT_GET_INDEX(b, f, w, z, y, x);
#endif #endif

View File

@ -27,8 +27,8 @@ static void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
validate_inputs_count(op, {2}); validate_inputs_count(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op); auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op); std::string layerName = layer_type_name_ID(op);
auto input_pshape = op->get_input_partial_shape(0);
int64_t rank = op->get_input_partial_shape(0).size(); int64_t rank = input_pshape.size();
auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1)); auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
if (!axes_constant) { if (!axes_constant) {
@ -52,6 +52,10 @@ static void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
p.add_primitive(*op, reducePrim); p.add_primitive(*op, reducePrim);
if (input_pshape.is_dynamic() || p.use_new_shape_infer()) {
return;
}
auto resultLayerName = layerName; auto resultLayerName = layerName;
auto out_dims = op->get_output_shape(0).size(); auto out_dims = op->get_output_shape(0).size();
if (out_dims == 3 && !keep_dims && rank >= 4) { if (out_dims == 3 && !keep_dims && rank >= 4) {

View File

@ -0,0 +1,361 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/reduce_ops.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "ie_precision.hpp"
#include "ngraph_functions/builders.hpp"
#include <string>
using namespace ngraph;
using namespace InferenceEngine;
using namespace ov::test;
namespace GPULayerTestsDefinitions {
typedef struct {
std::vector<InputShape> data_shape;
std::vector<int> axes;
} ReduceInput;
typedef std::tuple<
ReduceInput, // input data (data shape, axes shape, axes values)
ElementType, // presion of inputs
helpers::ReductionType, // reduction type
bool, // keepDims
TargetDevice // device name
> ReduceLayerTestParamSet;
class ReduceLayerGPUTest : public testing::WithParamInterface<ReduceLayerTestParamSet>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<ReduceLayerTestParamSet>& obj) {
ReduceInput input_data;
ElementType netType;
helpers::ReductionType reductionType;
bool keepDims;
TargetDevice targetDevice;
std::tie(input_data, netType, reductionType, keepDims, targetDevice) = obj.param;
std::vector<InputShape> inshapes = input_data.data_shape;
std::vector<int> axes = input_data.axes;
std::ostringstream result;
result << "IS=";
for (const auto& shape : inshapes) {
result << CommonTestUtils::partialShape2str({shape.first}) << "_";
}
result << "TS=";
for (const auto& shape : inshapes) {
for (const auto& item : shape.second) {
result << CommonTestUtils::vec2str(item) << "_";
}
}
result << "axes=";
result << CommonTestUtils::vec2str(axes) << "_";
result << "Precision=" << netType << "_";
result << "reductionType=" << reductionType << "_";
result << "keepDims=" << keepDims << "_";
result << "trgDev=" << targetDevice;
return result.str();
}
protected:
void SetUp() override {
ReduceInput input_data;
ElementType netPrecision;
helpers::ReductionType reductionType;
bool keepDims;
std::tie(input_data, netPrecision, reductionType, keepDims, targetDevice) = this->GetParam();
std::vector<InputShape> inputShapes = input_data.data_shape;
std::vector<int> axes = input_data.axes;
init_input_shapes(inputShapes);
auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
std::vector<size_t> shapeAxes;
shapeAxes.push_back(axes.size());
auto reductionAxesNode = std::dynamic_pointer_cast<ngraph::Node>(
std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes));
const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType);
auto makeFunction = [](ParameterVector &params, const std::shared_ptr<Node> &lastNode) {
ResultVector results;
for (int i = 0; i < lastNode->get_output_size(); i++)
results.push_back(std::make_shared<opset1::Result>(lastNode->output(i)));
return std::make_shared<Function>(results, params, "ReduceLayerGPUTest");
};
function = makeFunction(params, reduce);
}
};
TEST_P(ReduceLayerGPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
}
namespace {
const std::vector<bool> keepDims = {
true,
false,
};
const std::vector<ElementType> floatPrecisions = {
ElementType::f32,
ElementType::f16,
};
const std::vector<ElementType> floatIntPrecisions = {
ElementType::f32,
ElementType::f16,
ElementType::i32,
};
namespace Reduce {
const ReduceInput dyn1d = {
{
{ {-1}, {{4}, {5}} }
},
{0}
};
const ReduceInput dyn2d = {
{
{ {-1, -1}, {{4, 5}, {5, 6}} }
},
{1}
};
const ReduceInput dyn3d = {
{
{ {-1, -1, -1}, {{4, 5, 6}, {5, 1, 6}} }
},
{0, -1}
};
const ReduceInput dyn4d = {
{
{ {-1, -1, -1, -1}, {{2, 3, 4, 5}, {5, 4, 3, 1}} }
},
{1, -2}
};
const ReduceInput dyn5d = {
{
{ {-1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6}, {5, 6, 3, 1, 2}} }
},
{-3, 3}
};
const ReduceInput dyn6d = {
{
{ {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {5, 4, 3, 1, 2, 6}} }
},
{1}
};
// ================== Reduction int32/float types (Sum, Min, Max, L1) ==================
const auto reduceSum = ::testing::Combine(
::testing::ValuesIn({dyn1d, dyn5d}),
::testing::ValuesIn(floatIntPrecisions),
::testing::Values(helpers::ReductionType::Sum),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_sum_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceSum, ReduceLayerGPUTest::getTestCaseName);
const auto reduceMin = ::testing::Combine(
::testing::ValuesIn({dyn2d, dyn6d}),
::testing::ValuesIn(floatIntPrecisions),
::testing::Values(helpers::ReductionType::Min),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_min_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceMin, ReduceLayerGPUTest::getTestCaseName);
const auto reduceMax = ::testing::Combine(
::testing::ValuesIn({dyn3d, dyn5d}),
::testing::ValuesIn(floatIntPrecisions),
::testing::Values(helpers::ReductionType::Max),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_max_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceMax, ReduceLayerGPUTest::getTestCaseName);
const auto reduceL1 = ::testing::Combine(
::testing::ValuesIn({dyn4d, dyn6d}),
::testing::ValuesIn(floatIntPrecisions),
::testing::Values(helpers::ReductionType::L1),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_l1_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceL1, ReduceLayerGPUTest::getTestCaseName);
// ================== Reduction float types (Mean, Prod, L2) ==================
const auto reduceMean = ::testing::Combine(
::testing::ValuesIn({dyn1d, dyn6d}),
::testing::ValuesIn(floatPrecisions),
::testing::Values(helpers::ReductionType::Mean),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_mean_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceMean, ReduceLayerGPUTest::getTestCaseName);
const auto reduceProd = ::testing::Combine(
::testing::ValuesIn({dyn2d, dyn4d}),
::testing::ValuesIn({ElementType::f32}),
::testing::Values(helpers::ReductionType::Prod),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_prod_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceProd, ReduceLayerGPUTest::getTestCaseName);
const auto reduceL2 = ::testing::Combine(
::testing::ValuesIn({dyn4d, dyn5d}),
::testing::ValuesIn(floatPrecisions),
::testing::Values(helpers::ReductionType::L2),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_l2_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceL2, ReduceLayerGPUTest::getTestCaseName);
// ================== Reduction logical types (LogicalOr, LogicalAnd) ==================
const auto reduceLogicalOr = ::testing::Combine(
::testing::ValuesIn({dyn1d, dyn6d}),
::testing::Values(ElementType::boolean),
::testing::Values(helpers::ReductionType::LogicalOr),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_logicalor_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceLogicalOr, ReduceLayerGPUTest::getTestCaseName);
const auto reduceLogicalAnd = ::testing::Combine(
::testing::ValuesIn({dyn3d, dyn5d}),
::testing::Values(ElementType::boolean),
::testing::Values(helpers::ReductionType::LogicalAnd),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_logicaland_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceLogicalAnd, ReduceLayerGPUTest::getTestCaseName);
// ================== various reduce-axis ==================
const std::vector<ReduceInput> dynVariousAxisInputs = {
// 4D
{
{
{ {-1, -1, -1, -1}, {{2, 3, 4, 5}, {5, 4, 3, 1}} }
},
{0}
},
{
{
{ {-1, -1, -1, -1}, {{2, 3, 4, 5}, {5, 4, 3, 1}} }
},
{1, -1}
},
{
{
{ {-1, -1, -1, -1}, {{2, 3, 4, 5}, {5, 3, 7, 1}} }
},
{2, 3}
},
{
{
{ {-1, -1, -1, -1}, {{2, 3, 4, 5}, {1, 2, 3, 1}} }
},
{0, 2, -1}
},
// 5D
{
{
{ {-1, -1, -1, -1, -1}, {{2, 4, 3, 4, 5}, {5, 3, 2, 1, 2}} }
},
{1}
},
{
{
{ {-1, -1, -1, -1, -1}, {{4, 3, 2, 5, 6}, {5, 3, 2, 1, 4}} }
},
{0, -3}
},
{
{
{ {-1, -1, -1, -1, -1}, {{3, 4, 2, 6, 5}, {3, 5, 7, 1, 5}} }
},
{2, -2, 4}
},
{
{
{ {-1, -1, -1, -1, -1}, {{4, 2, 5, 1, 9}, {5, 3, 7, 1, 2}} }
},
{0, 1, -2, 4}
},
// 6D
{
{
{ {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {5, 3, 4, 1, 7, 5}} }
},
{0}
},
{
{
{ {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {5, 3, 5, 1, 2, 5}} }
},
{0, -3}
},
{
{
{ {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {2, 5, 4, 1, 5, 3}} }
},
{2, 3, -2, 5}
},
{
{
{ {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {3, 5, 4, 1, 8, 5}} }
},
{0, 2, -3, 4, 5}
},
{
{
{ {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {7, 5, 3, 1, 6, 9}} }
},
{4}
},
};
const auto reduceMaxWithVariousAxis = ::testing::Combine(
::testing::ValuesIn(dynVariousAxisInputs),
::testing::Values(ElementType::f32),
::testing::Values(helpers::ReductionType::Max),
::testing::ValuesIn(keepDims),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_reduce_max_withVariousAxis_compareWithRefs_dynamic,
ReduceLayerGPUTest, reduceMaxWithVariousAxis, ReduceLayerGPUTest::getTestCaseName);
} // namespace Reduce
} // namespace
} // namespace GPULayerTestsDefinitions