Add decomposing Reduce for Bugfix of byx reduction (#14449)
+ Add transformation to fix accuracy issue of oneDNN reduction : DecomposeReduceForFalseKeepdims + Add Reshape to modify output of Reduce and update keep_dims to true : reduce-reshape + Add exception logic for unsupported reduce mode by byx conversion Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
@@ -99,14 +99,20 @@ static bool is_reduce_blocked_axes(reduce_node const& node) {
|
||||
auto num_spatial = format::spatial_num(node.get_output_layout().format);
|
||||
auto dims = node.get_output_layout().format.dimension();
|
||||
|
||||
// Check if it reduces all spatial axes
|
||||
bool feature_axis_is_only_remaining = true;
|
||||
for (size_t idx_spatial = (dims - num_spatial); idx_spatial < dims; idx_spatial++) {
|
||||
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) {
|
||||
feature_axis_is_only_remaining = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (input_layout.is_static() &&
|
||||
(count(reduce_axes.begin(), reduce_axes.end(), 1) > 0 ||
|
||||
(count(reduce_axes.begin(), reduce_axes.end(), 0) > 0 && input_layout.batch() > 1))) {
|
||||
for (size_t idx_spatial = dims - num_spatial ; idx_spatial < dims ; idx_spatial++) {
|
||||
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0)
|
||||
return true;
|
||||
}
|
||||
(count(reduce_axes.begin(), reduce_axes.end(), 0) > 0))) {
|
||||
if (!feature_axis_is_only_remaining)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporationc
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "decompose_reduce_for_false_keepdims.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <ngraph/opsets/opset10.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
DecomposeReduceForFalseKeepDims::DecomposeReduceForFalseKeepDims() {
|
||||
// Get one MatcherPass for all modes
|
||||
auto reduce_pattern = ngraph::pattern::wrap_type<ngraph::opset10::ReduceSum,
|
||||
ngraph::opset10::ReduceMean,
|
||||
ngraph::opset10::ReduceProd,
|
||||
ngraph::opset10::ReduceMin,
|
||||
ngraph::opset10::ReduceMax>(
|
||||
{ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
|
||||
ngraph::pattern::wrap_type<ngraph::opset10::Constant>()},
|
||||
ngraph::pattern::has_static_shape());
|
||||
|
||||
// register callback
|
||||
ov::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
|
||||
const auto& pattern_map = m.get_pattern_value_map();
|
||||
auto reduce =
|
||||
as_type_ptr<op::util::ArithmeticReductionKeepDims>(pattern_map.at(reduce_pattern).get_node_shared_ptr());
|
||||
if (!reduce)
|
||||
return false;
|
||||
|
||||
auto input = reduce->input_value(0);
|
||||
const auto input_shape = input.get_shape();
|
||||
const auto reduce_shape = reduce->output(0).get_shape();
|
||||
const auto input_rank = input.get_partial_shape().rank().get_length();
|
||||
|
||||
auto axes_vector = reduce->get_reduction_axes().to_vector();
|
||||
std::sort(axes_vector.begin(), axes_vector.end());
|
||||
|
||||
if (!reduce->get_keep_dims() &&
|
||||
need_transformation_for_reordered_axes(axes_vector, input_rank, (input_rank - 2)) &&
|
||||
input_shape.size() < 6) {
|
||||
ngraph::NodeVector new_ops;
|
||||
|
||||
// Reduce
|
||||
auto reduce_const =
|
||||
ngraph::opset10::Constant::create(ngraph::element::i64, ngraph::Shape{axes_vector.size()}, axes_vector);
|
||||
|
||||
// Add each reduce mode supported by oneDNN
|
||||
if (ngraph::is_type<ngraph::opset10::ReduceSum>(reduce))
|
||||
input = std::make_shared<ngraph::opset10::ReduceSum>(input, reduce_const, true);
|
||||
else if (ngraph::is_type<ngraph::opset10::ReduceMean>(reduce))
|
||||
input = std::make_shared<ngraph::opset10::ReduceMean>(input, reduce_const, true);
|
||||
else if (ngraph::is_type<ngraph::opset10::ReduceMin>(reduce))
|
||||
input = std::make_shared<ngraph::opset10::ReduceMin>(input, reduce_const, true);
|
||||
else if (ngraph::is_type<ngraph::opset10::ReduceMax>(reduce))
|
||||
input = std::make_shared<ngraph::opset10::ReduceMax>(input, reduce_const, true);
|
||||
else if (ngraph::is_type<ngraph::opset10::ReduceProd>(reduce))
|
||||
input = std::make_shared<ngraph::opset10::ReduceProd>(input, reduce_const, true);
|
||||
else
|
||||
return false;
|
||||
|
||||
input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name());
|
||||
new_ops.push_back(input.get_node_shared_ptr());
|
||||
|
||||
// Reshape
|
||||
auto reshape_shape = ngraph::Shape((input_rank - axes_vector.size()), 1);
|
||||
// Expected that a feature axis is only un-reduced unless a new case for this decomposition is added.
|
||||
assert(reshape_shape.size() == 1);
|
||||
reshape_shape[0] = reduce_shape[0];
|
||||
input = std::make_shared<ngraph::opset10::Reshape>(
|
||||
input,
|
||||
ngraph::opset10::Constant::create(ngraph::element::i64,
|
||||
ngraph::Shape{reshape_shape.size()},
|
||||
reshape_shape),
|
||||
false);
|
||||
|
||||
input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "_reshape_false_keepdims");
|
||||
new_ops.push_back(input.get_node_shared_ptr());
|
||||
|
||||
ngraph::copy_runtime_info(reduce, new_ops);
|
||||
reduce->output(0).replace(input);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(reduce_pattern, "DecomposeReduceForFalseKeepDims");
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool DecomposeReduceForFalseKeepDims::need_transformation_for_reordered_axes(std::vector<int64_t> reduce_axes,
|
||||
size_t num_dim,
|
||||
size_t num_spatial) {
|
||||
bool feature_axis_is_only_remaining = false;
|
||||
// Case to reduce batch axis and spatial axes
|
||||
if (reduce_axes.size() > 1 && count(reduce_axes.begin(), reduce_axes.end(), 0) != 0 &&
|
||||
count(reduce_axes.begin(), reduce_axes.end(), 1) == 0) {
|
||||
feature_axis_is_only_remaining = true;
|
||||
// Check if it reduces all spatial axes
|
||||
for (size_t idx_spatial = (num_dim - num_spatial); idx_spatial < num_dim; idx_spatial++) {
|
||||
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) {
|
||||
feature_axis_is_only_remaining = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return feature_axis_is_only_remaining;
|
||||
}
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace ov
|
||||
@@ -0,0 +1,28 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
/**
|
||||
* @brief Add Reshape to modify output of Reduce and modify keep_dims to true : reduce-reshape
|
||||
* A clDNN Reduce reorders un-reduced axes of its output tensor to b-f and spatial order when keep_dims is false.
|
||||
* oneDNN reduction does not allow this. And clDNN execution shows a huge perf drop for blocked formats.
|
||||
*/
|
||||
class DecomposeReduceForFalseKeepDims : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
// Decompose reduce if keep_dims is false and it reduces batch and spatial axes
|
||||
DecomposeReduceForFalseKeepDims();
|
||||
|
||||
// Returns true if reduction axes includes one of blocked axis and all spatial axes
|
||||
bool need_transformation_for_reordered_axes(std::vector<int64_t> reduce_axes, size_t num_dim, size_t num_spatial);
|
||||
};
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace ov
|
||||
@@ -28,6 +28,7 @@
|
||||
|
||||
#include "transformations/einsum_decomposition.hpp"
|
||||
#include "transformations/convert_pooling_to_reduce.hpp"
|
||||
#include "transformations/decompose_reduce_for_false_keepdims.hpp"
|
||||
|
||||
#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
|
||||
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
|
||||
@@ -266,6 +267,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>();
|
||||
pass_config->disable<ngraph::pass::ConvertReduceMaxToPooling>();
|
||||
manager.register_pass<ConvertAvgPoolingToReduce>();
|
||||
manager.register_pass<DecomposeReduceForFalseKeepDims>();
|
||||
} else {
|
||||
pass_config->set_callback<ngraph::pass::ConvertReduceSumToPooling>(
|
||||
[](const_node_ptr &node) -> bool {
|
||||
|
||||
@@ -0,0 +1,214 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/opsets/opset10.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <plugin/transformations/decompose_reduce_for_false_keepdims.hpp>
|
||||
#include <string>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <tuple>
|
||||
|
||||
#include "intel_gpu/primitives/reduce.hpp"
|
||||
#include "ngraph/type/element_type.hpp"
|
||||
#include "openvino/core/descriptor/tensor.hpp"
|
||||
#include "test_utils.h"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ::tests;
|
||||
|
||||
using InputShape = ngraph::PartialShape;
|
||||
using KeepDims = bool;
|
||||
using ReduceAxes = std::vector<int64_t>;
|
||||
using ReduceType = cldnn::reduce_mode;
|
||||
using ReshapeShape = std::vector<size_t>;
|
||||
using NeedDecompose = bool;
|
||||
|
||||
class ReduceDecomposeTests
|
||||
: public ::testing::Test,
|
||||
public testing::WithParamInterface<
|
||||
std::tuple<ReduceType, InputShape, ReduceAxes, KeepDims, NeedDecompose, ReshapeShape>> {
|
||||
public:
|
||||
std::shared_ptr<ngraph::Function> fc;
|
||||
bool need_decompose;
|
||||
ReshapeShape result_shape;
|
||||
|
||||
void SetUp() override {
|
||||
const auto& reduce_type = std::get<0>(GetParam());
|
||||
const auto& input_shape = std::get<1>(GetParam());
|
||||
const auto& axes = std::get<2>(GetParam());
|
||||
const auto& keep_dims = std::get<3>(GetParam());
|
||||
need_decompose = std::get<4>(GetParam());
|
||||
result_shape = std::get<5>(GetParam());
|
||||
|
||||
fc = get_transformed_function(input_shape, axes, reduce_type, keep_dims);
|
||||
}
|
||||
|
||||
static std::shared_ptr<ngraph::Function> get_transformed_function(const ngraph::PartialShape& input_shape,
|
||||
const std::vector<int64_t>& axes,
|
||||
const ReduceType& reduce_type,
|
||||
const bool keep_dim) {
|
||||
auto param = std::make_shared<ngraph::opset10::Parameter>(ngraph::element::f32, input_shape);
|
||||
if (reduce_type == reduce_mode::logical_or || reduce_type == reduce_mode::logical_and)
|
||||
param = std::make_shared<ngraph::opset10::Parameter>(ngraph::element::boolean, input_shape);
|
||||
|
||||
ngraph::Output<ngraph::Node> input = param->output(0);
|
||||
|
||||
auto axes_const = ngraph::opset10::Constant::create(ngraph::element::i64, ngraph::Shape{axes.size()}, axes);
|
||||
|
||||
if (reduce_type == reduce_mode::sum)
|
||||
input = std::make_shared<ngraph::opset10::ReduceSum>(input, axes_const, keep_dim);
|
||||
else if (reduce_type == reduce_mode::mean)
|
||||
input = std::make_shared<ngraph::opset10::ReduceMean>(input, axes_const, keep_dim);
|
||||
else if (reduce_type == reduce_mode::min)
|
||||
input = std::make_shared<ngraph::opset10::ReduceMin>(input, axes_const, keep_dim);
|
||||
else if (reduce_type == reduce_mode::max)
|
||||
input = std::make_shared<ngraph::opset10::ReduceMax>(input, axes_const, keep_dim);
|
||||
else if (reduce_type == reduce_mode::prod)
|
||||
input = std::make_shared<ngraph::opset10::ReduceProd>(input, axes_const, keep_dim);
|
||||
else if (reduce_type == reduce_mode::logical_or)
|
||||
input = std::make_shared<ngraph::opset10::ReduceLogicalOr>(input, axes_const, keep_dim);
|
||||
else if (reduce_type == reduce_mode::logical_and)
|
||||
input = std::make_shared<ngraph::opset10::ReduceLogicalAnd>(input, axes_const, keep_dim);
|
||||
else
|
||||
throw std::runtime_error("Invalid reduce type for this test-case.");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{input.get_node_shared_ptr()},
|
||||
ngraph::ParameterVector{param});
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReduceDecomposeTests, CompareFunctions) {
|
||||
ngraph::pass::Manager m;
|
||||
m.set_per_pass_validation(false);
|
||||
m.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
m.register_pass<ov::intel_gpu::DecomposeReduceForFalseKeepDims>();
|
||||
m.run_passes(fc);
|
||||
|
||||
bool success = false;
|
||||
ov::Shape output_shape;
|
||||
for (auto& ops : fc->get_ops()) {
|
||||
std::string type_name(ops->get_type_name());
|
||||
|
||||
if (type_name.find("Reshape") != std::string::npos) {
|
||||
success = true;
|
||||
}
|
||||
else if (type_name.find("Result") != std::string::npos) {
|
||||
output_shape = ops->get_shape();
|
||||
}
|
||||
}
|
||||
ASSERT_TRUE(success == need_decompose);
|
||||
ASSERT_TRUE(output_shape == result_shape);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForFalseKeepdims,
|
||||
ReduceDecomposeTests,
|
||||
testing::Values(std::make_tuple(reduce_mode::prod,
|
||||
InputShape{32, 32, 32, 32},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{false},
|
||||
true,
|
||||
ReshapeShape{32}),
|
||||
std::make_tuple(reduce_mode::sum,
|
||||
InputShape{16, 3, 32, 32},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{false},
|
||||
true,
|
||||
ReshapeShape{3}),
|
||||
std::make_tuple(reduce_mode::mean,
|
||||
InputShape{16, 3, 32, 32},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{false},
|
||||
true,
|
||||
ReshapeShape{3}),
|
||||
std::make_tuple(reduce_mode::min,
|
||||
InputShape{16, 3, 32, 32},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{false},
|
||||
true,
|
||||
ReshapeShape{3}),
|
||||
std::make_tuple(reduce_mode::max,
|
||||
InputShape{16, 3, 32, 32},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{false},
|
||||
true,
|
||||
ReshapeShape{3}),
|
||||
std::make_tuple(reduce_mode::max,
|
||||
InputShape{8, 3, 64, 64},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{false},
|
||||
true,
|
||||
ReshapeShape{3})));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForFalseKeepdimsNotCase,
|
||||
ReduceDecomposeTests,
|
||||
testing::Values(std::make_tuple(reduce_mode::max,
|
||||
InputShape{32, 32, 32, 32},
|
||||
ReduceAxes{0, 2},
|
||||
KeepDims{false},
|
||||
false,
|
||||
ReshapeShape{32, 32}),
|
||||
std::make_tuple(reduce_mode::max,
|
||||
InputShape{1, 3, 64, 64},
|
||||
ReduceAxes{0, 3},
|
||||
KeepDims{false},
|
||||
false,
|
||||
ReshapeShape{3, 64}),
|
||||
std::make_tuple(reduce_mode::max,
|
||||
InputShape{32, 32, 32, 32},
|
||||
ReduceAxes{0},
|
||||
KeepDims{false},
|
||||
false,
|
||||
ReshapeShape{32, 32, 32}),
|
||||
std::make_tuple(reduce_mode::logical_and,
|
||||
InputShape{16, 3, 32, 32},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{false},
|
||||
false,
|
||||
ReshapeShape{3}),
|
||||
std::make_tuple(reduce_mode::logical_or,
|
||||
InputShape{16, 3, 32, 32},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{false},
|
||||
false,
|
||||
ReshapeShape{3}),
|
||||
std::make_tuple(reduce_mode::max,
|
||||
InputShape{1, 3, 64, 64},
|
||||
ReduceAxes{0},
|
||||
KeepDims{false},
|
||||
false,
|
||||
ReshapeShape{3, 64, 64})));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForTrueKeepdims,
|
||||
ReduceDecomposeTests,
|
||||
testing::Values(std::make_tuple(reduce_mode::max,
|
||||
InputShape{32, 32, 32, 32},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{true},
|
||||
false,
|
||||
ReshapeShape{1, 32, 1, 1}),
|
||||
std::make_tuple(reduce_mode::max,
|
||||
InputShape{1, 3, 64, 64},
|
||||
ReduceAxes{0, 2, 3},
|
||||
KeepDims{true},
|
||||
false,
|
||||
ReshapeShape{1, 3, 1, 1}),
|
||||
std::make_tuple(reduce_mode::max,
|
||||
InputShape{32, 32, 32, 32},
|
||||
ReduceAxes{0, 2},
|
||||
KeepDims{true},
|
||||
false,
|
||||
ReshapeShape{1, 32, 1, 32})));
|
||||
|
||||
TEST(DecomposeReduceForFalseKeepDims, Negative) {
|
||||
auto f =
|
||||
ReduceDecomposeTests::get_transformed_function(ngraph::PartialShape::dynamic(), {3}, reduce_mode::max, true);
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ov::intel_gpu::DecomposeReduceForFalseKeepDims>();
|
||||
ASSERT_NO_THROW(manager.run_passes(f));
|
||||
}
|
||||
Reference in New Issue
Block a user