Add decomposing Reduce for Bugfix of byx reduction (#14449)

+ Add transformation to fix accuracy issue of oneDNN reduction : DecomposeReduceForFalseKeepdims
+ Add Reshape to modify output of Reduce and update keep_dims to true : reduce-reshape
+ Add exception logic for unsupported reduce mode by byx conversion

Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
Min, Byungil
2023-01-05 15:44:12 +09:00
committed by GitHub
parent 9427623046
commit 0d261dbf83
5 changed files with 374 additions and 5 deletions

View File

@@ -99,14 +99,20 @@ static bool is_reduce_blocked_axes(reduce_node const& node) {
auto num_spatial = format::spatial_num(node.get_output_layout().format);
auto dims = node.get_output_layout().format.dimension();
// Check if it reduces all spatial axes
bool feature_axis_is_only_remaining = true;
for (size_t idx_spatial = (dims - num_spatial); idx_spatial < dims; idx_spatial++) {
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) {
feature_axis_is_only_remaining = false;
break;
}
}
if (input_layout.is_static() &&
(count(reduce_axes.begin(), reduce_axes.end(), 1) > 0 ||
(count(reduce_axes.begin(), reduce_axes.end(), 0) > 0 && input_layout.batch() > 1))) {
for (size_t idx_spatial = dims - num_spatial ; idx_spatial < dims ; idx_spatial++) {
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0)
return true;
}
(count(reduce_axes.begin(), reduce_axes.end(), 0) > 0))) {
if (!feature_axis_is_only_remaining)
return true;
}
return false;

View File

@@ -0,0 +1,119 @@
// Copyright (C) 2018-2022 Intel Corporationc
// SPDX-License-Identifier: Apache-2.0
//
#include "decompose_reduce_for_false_keepdims.hpp"
#include <algorithm>
#include <cassert>
#include <memory>
#include <ngraph/opsets/opset10.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include <vector>
namespace ov {
namespace intel_gpu {
DecomposeReduceForFalseKeepDims::DecomposeReduceForFalseKeepDims() {
// Get one MatcherPass for all modes
auto reduce_pattern = ngraph::pattern::wrap_type<ngraph::opset10::ReduceSum,
ngraph::opset10::ReduceMean,
ngraph::opset10::ReduceProd,
ngraph::opset10::ReduceMin,
ngraph::opset10::ReduceMax>(
{ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
ngraph::pattern::wrap_type<ngraph::opset10::Constant>()},
ngraph::pattern::has_static_shape());
// register callback
ov::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto reduce =
as_type_ptr<op::util::ArithmeticReductionKeepDims>(pattern_map.at(reduce_pattern).get_node_shared_ptr());
if (!reduce)
return false;
auto input = reduce->input_value(0);
const auto input_shape = input.get_shape();
const auto reduce_shape = reduce->output(0).get_shape();
const auto input_rank = input.get_partial_shape().rank().get_length();
auto axes_vector = reduce->get_reduction_axes().to_vector();
std::sort(axes_vector.begin(), axes_vector.end());
if (!reduce->get_keep_dims() &&
need_transformation_for_reordered_axes(axes_vector, input_rank, (input_rank - 2)) &&
input_shape.size() < 6) {
ngraph::NodeVector new_ops;
// Reduce
auto reduce_const =
ngraph::opset10::Constant::create(ngraph::element::i64, ngraph::Shape{axes_vector.size()}, axes_vector);
// Add each reduce mode supported by oneDNN
if (ngraph::is_type<ngraph::opset10::ReduceSum>(reduce))
input = std::make_shared<ngraph::opset10::ReduceSum>(input, reduce_const, true);
else if (ngraph::is_type<ngraph::opset10::ReduceMean>(reduce))
input = std::make_shared<ngraph::opset10::ReduceMean>(input, reduce_const, true);
else if (ngraph::is_type<ngraph::opset10::ReduceMin>(reduce))
input = std::make_shared<ngraph::opset10::ReduceMin>(input, reduce_const, true);
else if (ngraph::is_type<ngraph::opset10::ReduceMax>(reduce))
input = std::make_shared<ngraph::opset10::ReduceMax>(input, reduce_const, true);
else if (ngraph::is_type<ngraph::opset10::ReduceProd>(reduce))
input = std::make_shared<ngraph::opset10::ReduceProd>(input, reduce_const, true);
else
return false;
input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name());
new_ops.push_back(input.get_node_shared_ptr());
// Reshape
auto reshape_shape = ngraph::Shape((input_rank - axes_vector.size()), 1);
// Expected that a feature axis is only un-reduced unless a new case for this decomposition is added.
assert(reshape_shape.size() == 1);
reshape_shape[0] = reduce_shape[0];
input = std::make_shared<ngraph::opset10::Reshape>(
input,
ngraph::opset10::Constant::create(ngraph::element::i64,
ngraph::Shape{reshape_shape.size()},
reshape_shape),
false);
input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "_reshape_false_keepdims");
new_ops.push_back(input.get_node_shared_ptr());
ngraph::copy_runtime_info(reduce, new_ops);
reduce->output(0).replace(input);
return true;
}
return false;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(reduce_pattern, "DecomposeReduceForFalseKeepDims");
register_matcher(m, callback);
}
bool DecomposeReduceForFalseKeepDims::need_transformation_for_reordered_axes(std::vector<int64_t> reduce_axes,
size_t num_dim,
size_t num_spatial) {
bool feature_axis_is_only_remaining = false;
// Case to reduce batch axis and spatial axes
if (reduce_axes.size() > 1 && count(reduce_axes.begin(), reduce_axes.end(), 0) != 0 &&
count(reduce_axes.begin(), reduce_axes.end(), 1) == 0) {
feature_axis_is_only_remaining = true;
// Check if it reduces all spatial axes
for (size_t idx_spatial = (num_dim - num_spatial); idx_spatial < num_dim; idx_spatial++) {
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) {
feature_axis_is_only_remaining = false;
break;
}
}
}
return feature_axis_is_only_remaining;
}
} // namespace intel_gpu
} // namespace ov

View File

@@ -0,0 +1,28 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
#include <transformations_visibility.hpp>
namespace ov {
namespace intel_gpu {
/**
* @brief Add Reshape to modify output of Reduce and modify keep_dims to true : reduce-reshape
* A clDNN Reduce reorders un-reduced axes of its output tensor to b-f and spatial order when keep_dims is false.
* oneDNN reduction does not allow this. And clDNN execution shows a huge perf drop for blocked formats.
*/
class DecomposeReduceForFalseKeepDims : public ngraph::pass::MatcherPass {
public:
// Decompose reduce if keep_dims is false and it reduces batch and spatial axes
DecomposeReduceForFalseKeepDims();
// Returns true if reduction axes includes one of blocked axis and all spatial axes
bool need_transformation_for_reordered_axes(std::vector<int64_t> reduce_axes, size_t num_dim, size_t num_spatial);
};
} // namespace intel_gpu
} // namespace ov

View File

@@ -28,6 +28,7 @@
#include "transformations/einsum_decomposition.hpp"
#include "transformations/convert_pooling_to_reduce.hpp"
#include "transformations/decompose_reduce_for_false_keepdims.hpp"
#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
@@ -266,6 +267,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>();
pass_config->disable<ngraph::pass::ConvertReduceMaxToPooling>();
manager.register_pass<ConvertAvgPoolingToReduce>();
manager.register_pass<DecomposeReduceForFalseKeepDims>();
} else {
pass_config->set_callback<ngraph::pass::ConvertReduceSumToPooling>(
[](const_node_ptr &node) -> bool {

View File

@@ -0,0 +1,214 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <memory>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset10.hpp>
#include <ngraph/pass/manager.hpp>
#include <plugin/transformations/decompose_reduce_for_false_keepdims.hpp>
#include <string>
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include <tuple>
#include "intel_gpu/primitives/reduce.hpp"
#include "ngraph/type/element_type.hpp"
#include "openvino/core/descriptor/tensor.hpp"
#include "test_utils.h"
using namespace testing;
using namespace ::tests;
using InputShape = ngraph::PartialShape;
using KeepDims = bool;
using ReduceAxes = std::vector<int64_t>;
using ReduceType = cldnn::reduce_mode;
using ReshapeShape = std::vector<size_t>;
using NeedDecompose = bool;
class ReduceDecomposeTests
: public ::testing::Test,
public testing::WithParamInterface<
std::tuple<ReduceType, InputShape, ReduceAxes, KeepDims, NeedDecompose, ReshapeShape>> {
public:
std::shared_ptr<ngraph::Function> fc;
bool need_decompose;
ReshapeShape result_shape;
void SetUp() override {
const auto& reduce_type = std::get<0>(GetParam());
const auto& input_shape = std::get<1>(GetParam());
const auto& axes = std::get<2>(GetParam());
const auto& keep_dims = std::get<3>(GetParam());
need_decompose = std::get<4>(GetParam());
result_shape = std::get<5>(GetParam());
fc = get_transformed_function(input_shape, axes, reduce_type, keep_dims);
}
static std::shared_ptr<ngraph::Function> get_transformed_function(const ngraph::PartialShape& input_shape,
const std::vector<int64_t>& axes,
const ReduceType& reduce_type,
const bool keep_dim) {
auto param = std::make_shared<ngraph::opset10::Parameter>(ngraph::element::f32, input_shape);
if (reduce_type == reduce_mode::logical_or || reduce_type == reduce_mode::logical_and)
param = std::make_shared<ngraph::opset10::Parameter>(ngraph::element::boolean, input_shape);
ngraph::Output<ngraph::Node> input = param->output(0);
auto axes_const = ngraph::opset10::Constant::create(ngraph::element::i64, ngraph::Shape{axes.size()}, axes);
if (reduce_type == reduce_mode::sum)
input = std::make_shared<ngraph::opset10::ReduceSum>(input, axes_const, keep_dim);
else if (reduce_type == reduce_mode::mean)
input = std::make_shared<ngraph::opset10::ReduceMean>(input, axes_const, keep_dim);
else if (reduce_type == reduce_mode::min)
input = std::make_shared<ngraph::opset10::ReduceMin>(input, axes_const, keep_dim);
else if (reduce_type == reduce_mode::max)
input = std::make_shared<ngraph::opset10::ReduceMax>(input, axes_const, keep_dim);
else if (reduce_type == reduce_mode::prod)
input = std::make_shared<ngraph::opset10::ReduceProd>(input, axes_const, keep_dim);
else if (reduce_type == reduce_mode::logical_or)
input = std::make_shared<ngraph::opset10::ReduceLogicalOr>(input, axes_const, keep_dim);
else if (reduce_type == reduce_mode::logical_and)
input = std::make_shared<ngraph::opset10::ReduceLogicalAnd>(input, axes_const, keep_dim);
else
throw std::runtime_error("Invalid reduce type for this test-case.");
return std::make_shared<ngraph::Function>(ngraph::NodeVector{input.get_node_shared_ptr()},
ngraph::ParameterVector{param});
}
};
TEST_P(ReduceDecomposeTests, CompareFunctions) {
ngraph::pass::Manager m;
m.set_per_pass_validation(false);
m.register_pass<ngraph::pass::InitNodeInfo>();
m.register_pass<ov::intel_gpu::DecomposeReduceForFalseKeepDims>();
m.run_passes(fc);
bool success = false;
ov::Shape output_shape;
for (auto& ops : fc->get_ops()) {
std::string type_name(ops->get_type_name());
if (type_name.find("Reshape") != std::string::npos) {
success = true;
}
else if (type_name.find("Result") != std::string::npos) {
output_shape = ops->get_shape();
}
}
ASSERT_TRUE(success == need_decompose);
ASSERT_TRUE(output_shape == result_shape);
}
INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForFalseKeepdims,
ReduceDecomposeTests,
testing::Values(std::make_tuple(reduce_mode::prod,
InputShape{32, 32, 32, 32},
ReduceAxes{0, 2, 3},
KeepDims{false},
true,
ReshapeShape{32}),
std::make_tuple(reduce_mode::sum,
InputShape{16, 3, 32, 32},
ReduceAxes{0, 2, 3},
KeepDims{false},
true,
ReshapeShape{3}),
std::make_tuple(reduce_mode::mean,
InputShape{16, 3, 32, 32},
ReduceAxes{0, 2, 3},
KeepDims{false},
true,
ReshapeShape{3}),
std::make_tuple(reduce_mode::min,
InputShape{16, 3, 32, 32},
ReduceAxes{0, 2, 3},
KeepDims{false},
true,
ReshapeShape{3}),
std::make_tuple(reduce_mode::max,
InputShape{16, 3, 32, 32},
ReduceAxes{0, 2, 3},
KeepDims{false},
true,
ReshapeShape{3}),
std::make_tuple(reduce_mode::max,
InputShape{8, 3, 64, 64},
ReduceAxes{0, 2, 3},
KeepDims{false},
true,
ReshapeShape{3})));
INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForFalseKeepdimsNotCase,
ReduceDecomposeTests,
testing::Values(std::make_tuple(reduce_mode::max,
InputShape{32, 32, 32, 32},
ReduceAxes{0, 2},
KeepDims{false},
false,
ReshapeShape{32, 32}),
std::make_tuple(reduce_mode::max,
InputShape{1, 3, 64, 64},
ReduceAxes{0, 3},
KeepDims{false},
false,
ReshapeShape{3, 64}),
std::make_tuple(reduce_mode::max,
InputShape{32, 32, 32, 32},
ReduceAxes{0},
KeepDims{false},
false,
ReshapeShape{32, 32, 32}),
std::make_tuple(reduce_mode::logical_and,
InputShape{16, 3, 32, 32},
ReduceAxes{0, 2, 3},
KeepDims{false},
false,
ReshapeShape{3}),
std::make_tuple(reduce_mode::logical_or,
InputShape{16, 3, 32, 32},
ReduceAxes{0, 2, 3},
KeepDims{false},
false,
ReshapeShape{3}),
std::make_tuple(reduce_mode::max,
InputShape{1, 3, 64, 64},
ReduceAxes{0},
KeepDims{false},
false,
ReshapeShape{3, 64, 64})));
INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForTrueKeepdims,
ReduceDecomposeTests,
testing::Values(std::make_tuple(reduce_mode::max,
InputShape{32, 32, 32, 32},
ReduceAxes{0, 2, 3},
KeepDims{true},
false,
ReshapeShape{1, 32, 1, 1}),
std::make_tuple(reduce_mode::max,
InputShape{1, 3, 64, 64},
ReduceAxes{0, 2, 3},
KeepDims{true},
false,
ReshapeShape{1, 3, 1, 1}),
std::make_tuple(reduce_mode::max,
InputShape{32, 32, 32, 32},
ReduceAxes{0, 2},
KeepDims{true},
false,
ReshapeShape{1, 32, 1, 32})));
TEST(DecomposeReduceForFalseKeepDims, Negative) {
auto f =
ReduceDecomposeTests::get_transformed_function(ngraph::PartialShape::dynamic(), {3}, reduce_mode::max, true);
ngraph::pass::Manager manager;
manager.register_pass<ov::intel_gpu::DecomposeReduceForFalseKeepDims>();
ASSERT_NO_THROW(manager.run_passes(f));
}