diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp index cf4aa3892a3..78d611a9b1e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reduce.cpp @@ -83,7 +83,7 @@ struct reduce_impl : typed_primitive_impl_ocl { auto params = get_default_params(impl_param); auto optional_params = get_default_optional_params(impl_param.get_program()); - params.reduceAxes = convert_axes(primitive->axes, impl_param.get_output_layout().get_rank()); + params.reduceAxes = convert_axes(primitive->axes, impl_param.input_layouts[0].get_rank()); params.keepDims = primitive->keep_dims; params.reduceMode = cldnn_2_reduce_mode(primitive->mode); diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 1ddd4ffc93c..715ca845cf6 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -97,7 +97,9 @@ static bool is_reduce_blocked_axes(reduce_node const& node) { auto num_spatial = format::spatial_num(node.get_output_layout().format); auto dims = node.get_output_layout().format.dimension(); - if ((count(reduce_axes.begin(), reduce_axes.end(), 1) > 0 || + + if (input_layout.is_static() && + (count(reduce_axes.begin(), reduce_axes.end(), 1) > 0 || (count(reduce_axes.begin(), reduce_axes.end(), 0) > 0 && input_layout.batch() > 1))) { for (size_t idx_spatial = dims - num_spatial ; idx_spatial < dims ; idx_spatial++) { if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) @@ -1777,6 +1779,18 @@ format layout_optimizer::get_preferred_format(program_node& node) { expected = format::b_fs_yx_fsv32; } } + } else if (node.is_type()) { + auto& reduce_node = node.as(); + auto input_layout = reduce_node.input().get_output_layout(); + // TODO: Under the currnet implement, dynamic shape doesn't support blocked format. Will support in future. + if (!use_onednn_impls && input_layout.is_dynamic()) { + if (input_layout.format.dimension() == 6) + expected = format::bfwzyx; + else if (input_layout.format.dimension() == 5) + expected = format::bfzyx; + else if (input_layout.format.dimension() == 4) + expected = format::bfyx; + } } return expected; diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reduce_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reduce_ref.cl index 61bd6226e37..eb3c7df4896 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reduce_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reduce_ref.cl @@ -36,14 +36,19 @@ KERNEL(reduce_ref)( #if INPUT0_DIMS == 4 const uint w = 0; const uint z = 0; - const uint out_idx = OUTPUT_GET_INDEX(b, f, y, x); #elif INPUT0_DIMS == 5 const uint z = wz % OUTPUT_SIZE_Z; const uint w = 0; - const uint out_idx = OUTPUT_GET_INDEX(b, f, z, y, x); #elif INPUT0_DIMS == 6 const uint z = wz % OUTPUT_SIZE_Z; const uint w = wz / OUTPUT_SIZE_Z; +#endif + +#if OUTPUT_DIMS == 4 + const uint out_idx = OUTPUT_GET_INDEX(b, f, y, x); +#elif OUTPUT_DIMS == 5 + const uint out_idx = OUTPUT_GET_INDEX(b, f, z, y, x); +#elif OUTPUT_DIMS == 6 const uint out_idx = OUTPUT_GET_INDEX(b, f, w, z, y, x); #endif diff --git a/src/plugins/intel_gpu/src/plugin/ops/reduce.cpp b/src/plugins/intel_gpu/src/plugin/ops/reduce.cpp index 0f841f7e1c3..634e8add090 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/reduce.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/reduce.cpp @@ -27,8 +27,8 @@ static void CreateReduceOp(Program& p, const std::shared_ptr& op, validate_inputs_count(op, {2}); auto inputPrimitives = p.GetInputPrimitiveIDs(op); std::string layerName = layer_type_name_ID(op); - - int64_t rank = op->get_input_partial_shape(0).size(); + auto input_pshape = op->get_input_partial_shape(0); + int64_t rank = input_pshape.size(); auto axes_constant = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(1)); if (!axes_constant) { @@ -52,6 +52,10 @@ static void CreateReduceOp(Program& p, const std::shared_ptr& op, p.add_primitive(*op, reducePrim); + if (input_pshape.is_dynamic() || p.use_new_shape_infer()) { + return; + } + auto resultLayerName = layerName; auto out_dims = op->get_output_shape(0).size(); if (out_dims == 3 && !keep_dims && rank >= 4) { diff --git a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/reduce.cpp b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/reduce.cpp new file mode 100644 index 00000000000..796a58995ba --- /dev/null +++ b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/reduce.cpp @@ -0,0 +1,361 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/reduce_ops.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ie_precision.hpp" +#include "ngraph_functions/builders.hpp" +#include + +using namespace ngraph; +using namespace InferenceEngine; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +typedef struct { + std::vector data_shape; + std::vector axes; +} ReduceInput; + +typedef std::tuple< + ReduceInput, // input data (data shape, axes shape, axes values) + ElementType, // presion of inputs + helpers::ReductionType, // reduction type + bool, // keepDims + TargetDevice // device name +> ReduceLayerTestParamSet; + +class ReduceLayerGPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + ReduceInput input_data; + ElementType netType; + helpers::ReductionType reductionType; + bool keepDims; + TargetDevice targetDevice; + std::tie(input_data, netType, reductionType, keepDims, targetDevice) = obj.param; + + std::vector inshapes = input_data.data_shape; + std::vector axes = input_data.axes; + + std::ostringstream result; + + result << "IS="; + for (const auto& shape : inshapes) { + result << CommonTestUtils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : inshapes) { + for (const auto& item : shape.second) { + result << CommonTestUtils::vec2str(item) << "_"; + } + } + result << "axes="; + result << CommonTestUtils::vec2str(axes) << "_"; + + result << "Precision=" << netType << "_"; + result << "reductionType=" << reductionType << "_"; + result << "keepDims=" << keepDims << "_"; + result << "trgDev=" << targetDevice; + + return result.str(); + } + +protected: + void SetUp() override { + ReduceInput input_data; + ElementType netPrecision; + helpers::ReductionType reductionType; + bool keepDims; + std::tie(input_data, netPrecision, reductionType, keepDims, targetDevice) = this->GetParam(); + + std::vector inputShapes = input_data.data_shape; + std::vector axes = input_data.axes; + + init_input_shapes(inputShapes); + + auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); + auto paramOuts = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(params)); + + std::vector shapeAxes; + shapeAxes.push_back(axes.size()); + + auto reductionAxesNode = std::dynamic_pointer_cast( + std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes)); + + const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType); + + auto makeFunction = [](ParameterVector ¶ms, const std::shared_ptr &lastNode) { + ResultVector results; + + for (int i = 0; i < lastNode->get_output_size(); i++) + results.push_back(std::make_shared(lastNode->output(i))); + + return std::make_shared(results, params, "ReduceLayerGPUTest"); + }; + + function = makeFunction(params, reduce); + } +}; + +TEST_P(ReduceLayerGPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); +} + +namespace { + +const std::vector keepDims = { + true, + false, +}; + +const std::vector floatPrecisions = { + ElementType::f32, + ElementType::f16, +}; + +const std::vector floatIntPrecisions = { + ElementType::f32, + ElementType::f16, + ElementType::i32, +}; + + + +namespace Reduce { + +const ReduceInput dyn1d = { + { + { {-1}, {{4}, {5}} } + }, + {0} +}; + +const ReduceInput dyn2d = { + { + { {-1, -1}, {{4, 5}, {5, 6}} } + }, + {1} +}; + +const ReduceInput dyn3d = { + { + { {-1, -1, -1}, {{4, 5, 6}, {5, 1, 6}} } + }, + {0, -1} +}; + +const ReduceInput dyn4d = { + { + { {-1, -1, -1, -1}, {{2, 3, 4, 5}, {5, 4, 3, 1}} } + }, + {1, -2} +}; + +const ReduceInput dyn5d = { + { + { {-1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6}, {5, 6, 3, 1, 2}} } + }, + {-3, 3} +}; + +const ReduceInput dyn6d = { + { + { {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {5, 4, 3, 1, 2, 6}} } + }, + {1} +}; + + +// ================== Reduction int32/float types (Sum, Min, Max, L1) ================== +const auto reduceSum = ::testing::Combine( + ::testing::ValuesIn({dyn1d, dyn5d}), + ::testing::ValuesIn(floatIntPrecisions), + ::testing::Values(helpers::ReductionType::Sum), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_sum_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceSum, ReduceLayerGPUTest::getTestCaseName); + +const auto reduceMin = ::testing::Combine( + ::testing::ValuesIn({dyn2d, dyn6d}), + ::testing::ValuesIn(floatIntPrecisions), + ::testing::Values(helpers::ReductionType::Min), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_min_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceMin, ReduceLayerGPUTest::getTestCaseName); + +const auto reduceMax = ::testing::Combine( + ::testing::ValuesIn({dyn3d, dyn5d}), + ::testing::ValuesIn(floatIntPrecisions), + ::testing::Values(helpers::ReductionType::Max), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_max_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceMax, ReduceLayerGPUTest::getTestCaseName); + +const auto reduceL1 = ::testing::Combine( + ::testing::ValuesIn({dyn4d, dyn6d}), + ::testing::ValuesIn(floatIntPrecisions), + ::testing::Values(helpers::ReductionType::L1), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_l1_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceL1, ReduceLayerGPUTest::getTestCaseName); + + +// ================== Reduction float types (Mean, Prod, L2) ================== +const auto reduceMean = ::testing::Combine( + ::testing::ValuesIn({dyn1d, dyn6d}), + ::testing::ValuesIn(floatPrecisions), + ::testing::Values(helpers::ReductionType::Mean), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_mean_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceMean, ReduceLayerGPUTest::getTestCaseName); + +const auto reduceProd = ::testing::Combine( + ::testing::ValuesIn({dyn2d, dyn4d}), + ::testing::ValuesIn({ElementType::f32}), + ::testing::Values(helpers::ReductionType::Prod), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_prod_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceProd, ReduceLayerGPUTest::getTestCaseName); + +const auto reduceL2 = ::testing::Combine( + ::testing::ValuesIn({dyn4d, dyn5d}), + ::testing::ValuesIn(floatPrecisions), + ::testing::Values(helpers::ReductionType::L2), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_l2_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceL2, ReduceLayerGPUTest::getTestCaseName); + + +// ================== Reduction logical types (LogicalOr, LogicalAnd) ================== +const auto reduceLogicalOr = ::testing::Combine( + ::testing::ValuesIn({dyn1d, dyn6d}), + ::testing::Values(ElementType::boolean), + ::testing::Values(helpers::ReductionType::LogicalOr), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_logicalor_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceLogicalOr, ReduceLayerGPUTest::getTestCaseName); + +const auto reduceLogicalAnd = ::testing::Combine( + ::testing::ValuesIn({dyn3d, dyn5d}), + ::testing::Values(ElementType::boolean), + ::testing::Values(helpers::ReductionType::LogicalAnd), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_logicaland_compareWithRefs_dynamic, ReduceLayerGPUTest, reduceLogicalAnd, ReduceLayerGPUTest::getTestCaseName); + + +// ================== various reduce-axis ================== +const std::vector dynVariousAxisInputs = { + // 4D + { + { + { {-1, -1, -1, -1}, {{2, 3, 4, 5}, {5, 4, 3, 1}} } + }, + {0} + }, + { + { + { {-1, -1, -1, -1}, {{2, 3, 4, 5}, {5, 4, 3, 1}} } + }, + {1, -1} + }, + { + { + { {-1, -1, -1, -1}, {{2, 3, 4, 5}, {5, 3, 7, 1}} } + }, + {2, 3} + }, + { + { + { {-1, -1, -1, -1}, {{2, 3, 4, 5}, {1, 2, 3, 1}} } + }, + {0, 2, -1} + }, + // 5D + { + { + { {-1, -1, -1, -1, -1}, {{2, 4, 3, 4, 5}, {5, 3, 2, 1, 2}} } + }, + {1} + }, + { + { + { {-1, -1, -1, -1, -1}, {{4, 3, 2, 5, 6}, {5, 3, 2, 1, 4}} } + }, + {0, -3} + }, + { + { + { {-1, -1, -1, -1, -1}, {{3, 4, 2, 6, 5}, {3, 5, 7, 1, 5}} } + }, + {2, -2, 4} + }, + { + { + { {-1, -1, -1, -1, -1}, {{4, 2, 5, 1, 9}, {5, 3, 7, 1, 2}} } + }, + {0, 1, -2, 4} + }, + // 6D + { + { + { {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {5, 3, 4, 1, 7, 5}} } + }, + {0} + }, + { + { + { {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {5, 3, 5, 1, 2, 5}} } + }, + {0, -3} + }, + { + { + { {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {2, 5, 4, 1, 5, 3}} } + }, + {2, 3, -2, 5} + }, + { + { + { {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {3, 5, 4, 1, 8, 5}} } + }, + {0, 2, -3, 4, 5} + }, + { + { + { {-1, -1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6, 7}, {7, 5, 3, 1, 6, 9}} } + }, + {4} + }, +}; + +const auto reduceMaxWithVariousAxis = ::testing::Combine( + ::testing::ValuesIn(dynVariousAxisInputs), + ::testing::Values(ElementType::f32), + ::testing::Values(helpers::ReductionType::Max), + ::testing::ValuesIn(keepDims), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); +INSTANTIATE_TEST_SUITE_P(smoke_reduce_max_withVariousAxis_compareWithRefs_dynamic, + ReduceLayerGPUTest, reduceMaxWithVariousAxis, ReduceLayerGPUTest::getTestCaseName); + + +} // namespace Reduce +} // namespace +} // namespace GPULayerTestsDefinitions