bug fix update (#19568)

* [GPU] Fix gpu functional test failures
* set m_max_batch to 1
* add debug log for condition operation

* Add debug logs for condition and constant

* To fix zero byte allocation issue, Convert zero dimension to 1 dimension in constant

* Add the code to check if output shape is dynamic in split offset calculation and checking allow_new_shape_infer in program_builder

* Add unit test for fix checking output shape

* Add test case for zero dimennsion allocation and debug message

* Fix build failure for condition unit test

* Follow up code review
This commit is contained in:
Paul Youngsoo Ahn
2023-09-19 06:13:38 +09:00
committed by GitHub
parent e34c5a09c6
commit 03918c2cac
10 changed files with 240 additions and 17 deletions

View File

@@ -26,19 +26,17 @@ struct condition : public primitive_base<condition> {
std::string str() {
std::stringstream ss;
ss << "branch: { " << std::endl;
ss<< "* input_map : [(outer_id,inner_id),";
ss << "branch: {input_map : [(outer_id,inner_id),";
for (auto& in_iter : input_map) {
ss << "(" << in_iter.first << "," << in_iter.second << "),";
}
ss << "]," << std::endl;
ss << "],";
ss << "* output_map : [(outer_idx,inner_id),";
ss << " output_map : [(outer_idx,inner_id),";
for (auto& out_iter : output_map) {
ss << "(" << out_iter.first << ","<< out_iter.second << "),";
}
ss << "]" << std::endl;
ss << "}" << std::endl;
ss << "]}";
return ss.str();
}
};

View File

@@ -39,6 +39,7 @@ struct condition_impl : typed_primitive_impl<condition> {
auto pred = condition_inst::get_pred_from_memory(instance.pred_memory_ptr(), instance.get_network().get_stream());
network::ptr executed_net = pred? instance.get_net_true() : instance.get_net_false();
auto branch = pred? instance.get_branch_true() : instance.get_branch_false();
GPU_DEBUG_LOG << "predicate: " << (pred ? "True" : "False") << std::endl;
// Set input memory of inner network before its execution
for (size_t mem_idx = 0; mem_idx < instance.inputs_memory_count(); mem_idx++) {
@@ -48,6 +49,7 @@ struct condition_impl : typed_primitive_impl<condition> {
const primitive_id& input_internal_id = iter->second;
auto mem_ptr = instance.input_memory_ptr(mem_idx);
executed_net->set_input_data(input_internal_id, mem_ptr);
GPU_DEBUG_LOG << "Inner net - Inputs[" << mem_idx << "]" << mem_ptr->get_layout().to_short_string() << std::endl;
}
}
@@ -62,6 +64,7 @@ struct condition_impl : typed_primitive_impl<condition> {
auto inner_out_id = out_mem_map.second;
auto mem_ptr = executed_net->get_output(inner_out_id).get_memory();
instance.set_output_memory(mem_ptr, false, out_mem_idx);
GPU_DEBUG_LOG << "Inner net - Outputs[" << out_mem_idx << "]" << mem_ptr->get_layout().to_short_string() << std::endl;
}
ev->set();

View File

@@ -1234,9 +1234,18 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
GPU_DEBUG_COUT << inst->id() << std::endl;
if (inst->get_node().is_type<loop>()) {
auto& loop_node = inst->get_node().as<loop>();
auto loop_body_primitives = loop_node.get_body_topology().get_primitives_ids();
for (auto& primitive_id : loop_body_primitives) {
GPU_DEBUG_COUT << "\t" << primitive_id << std::endl;
for (auto& prim : loop_node.get_body_program()->get_processing_order()) {
GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
}
} else if (inst->get_node().is_type<condition>()) {
auto& cond_node = inst->get_node().as<condition>();
GPU_DEBUG_COUT << "* Branch_True" << std::endl;
for (auto& prim : cond_node.get_branch_true().inner_program->get_processing_order()) {
GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
}
GPU_DEBUG_COUT << "* Branch_False" << std::endl;
for (auto& prim : cond_node.get_branch_false().inner_program->get_processing_order()) {
GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
}
}
}

View File

@@ -14,6 +14,12 @@ const size_t idx_false = 1;
static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ptr<ov::op::v8::If>& op, size_t idx) {
cldnn::condition::branch branch;
const auto& internal_body = (idx == idx_true)? op->get_then_body() : op->get_else_body();
GPU_DEBUG_LOG << "Generate inner program for " << "op::v"
<< op->get_type_info().version_id << "::"
<< op->get_type_name() << " operation "
<< "(friendly_name=" << op->get_friendly_name() << ") : "
<< internal_body->get_friendly_name()
<< ", num inputs: " << op->get_input_size() << std::endl;
auto config = p.get_config();
config.set_property(ov::intel_gpu::max_dynamic_batch(1));
@@ -40,6 +46,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_
output_map.insert({out_desc->m_output_index, internal_id});
}
GPU_DEBUG_LOG << op->get_friendly_name() << " branch_info[" << internal_body->get_friendly_name() << "] : " << branch << std::endl;
return branch;
}

View File

@@ -97,8 +97,14 @@ static void create_data(ProgramBuilder& p, const ov::Shape& constDims, const std
p.primitive_ids[initialconstPrimID] = constPrimID;
p.profiling_ids.push_back(initialconstPrimID);
} else {
GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant]" << std::endl;
if (constLayout.count() == 0) {
// Convert zero dimension constant layout to 1 dimension to fix the issue
// that memory allocation is failed on windows when constant layout is zero dimension.
constLayout = cldnn::layout(ov::PartialShape({1}), constLayout.data_type, constLayout.format);
}
cldnn::memory::ptr mem = p.get_engine().allocate_memory(constLayout, false);
GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant] layout: "
<< constLayout.to_short_string() << ", mem_ptr(" << mem << ", " << mem->size() << " bytes)"<< std::endl;
auto& stream = p.get_engine().get_service_stream();
cldnn::mem_lock<char> lock{mem, stream};
auto buf = lock.data();

View File

@@ -13,6 +13,21 @@
namespace ov {
namespace intel_gpu {
static bool IsDynamic(const std::shared_ptr<ov::Node>& op) {
if (op->is_dynamic()) {
return true;
}
for (size_t i = 0; i < op->get_output_size(); i++) {
const auto outPartialShape = op->get_output_partial_shape(i);
if (outPartialShape.is_dynamic()) {
return true;
}
}
return false;
}
static void CreateCommonSplitOp(ProgramBuilder& p, const std::shared_ptr<ov::Node>& op) {
auto get_layer_name = [&](size_t idx)->std::string {
return layer_type_name_ID(op) + ((op->get_output_size() == 1)? "" : ".out" + std::to_string(idx));
@@ -22,11 +37,15 @@ static void CreateCommonSplitOp(ProgramBuilder& p, const std::shared_ptr<ov::Nod
if (p.use_new_shape_infer() || op->is_dynamic()) {
std::vector<cldnn::tensor> offsets;
if (!op->is_dynamic()) {
// op->is_dynamic() does not check if output shape is dynamic. it only check dynamism for input shapes
// Even if op->is_dynamic() is false, output shape can be dynamic.
// Thus, it is necessary to check if output shape is dynamic.
if (!IsDynamic(op)) {
auto input_pshape = op->get_input_partial_shape(0);
ov::Shape start_offset(input_pshape.size());
for (size_t i = 0; i < op->get_output_size(); i++) {
const auto outPartialShape = op->get_output_partial_shape(i);
auto offsetTensor = tensor_from_dims(start_offset, 0);
offsets.push_back(offsetTensor);
@@ -49,7 +68,7 @@ static void CreateCommonSplitOp(ProgramBuilder& p, const std::shared_ptr<ov::Nod
auto cropPrim = cldnn::crop(get_layer_name(i),
inputs,
cldnn::tensor(1),
(op->is_dynamic() ? cldnn::tensor(0) : offsets[i]),
(offsets.empty() ? cldnn::tensor(0) : offsets[i]),
op_mode,
static_cast<int>(i),
num_splits);

View File

@@ -314,6 +314,14 @@ bool ProgramBuilder::requires_new_shape_infer(const ov::Node& op) const {
return true;
}
// When input node has dynamic shape with 4 dimension, this function return false
// because op.is_dynamic() which only checks input shapes return false.
// So, in the case of input data, we need to check output shape.
for (size_t i = 0; i < op.get_output_size(); i++) {
if (op.get_output_partial_shape(i).is_dynamic())
return true;
}
if (ov::is_type<op::FullyConnectedCompressed>(&op))
return true;

View File

@@ -215,4 +215,89 @@ INSTANTIATE_TEST_SUITE_P(smoke_ShapeOf_5d_compareWithRefs_static,
} // namespace
using ShapeOfParams = typename std::tuple<
InputShape, // Shape
InferenceEngine::Precision, // Precision
LayerTestsUtils::TargetDevice // Device name
>;
class ShapeOfDynamicInputGPUTest : public testing::WithParamInterface<ShapeOfParams>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<ShapeOfParams>& obj) {
InputShape inputShapes;
InferenceEngine::Precision dataPrc;
std::string targetDevice;
std::tie(inputShapes, dataPrc, targetDevice) = obj.param;
std::ostringstream result;
result << "IS=(";
result << ov::test::utils::partialShape2str({inputShapes.first}) << "_";
for (size_t i = 0lu; i < inputShapes.second.size(); i++) {
result << "{";
result << ov::test::utils::vec2str(inputShapes.second[i]) << "_";
result << "}_";
}
result << ")_";
result << "netPRC=" << dataPrc << "_";
result << "targetDevice=" << targetDevice << "_";
auto res_str = result.str();
std::replace(res_str.begin(), res_str.end(), '-', '_');
return res_str;
}
protected:
void SetUp() override {
InputShape inputShapes;
InferenceEngine::Precision dataPrc;
targetDevice = ov::test::utils::DEVICE_GPU;
std::tie(inputShapes, dataPrc, targetDevice) = GetParam();
init_input_shapes({inputShapes});
InferenceEngine::PreProcessInfo pre_process_info;
pre_process_info.setVariant(InferenceEngine::MeanVariant::MEAN_VALUE);
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrc);
auto input = std::make_shared<ngraph::opset9::Parameter>(prc, inputShapes.first);
input->get_output_tensor(0).get_rt_info()["ie_legacy_preproc"] = pre_process_info;
input->set_friendly_name("input_data");
auto shape_of_01 = std::make_shared<ngraph::opset9::ShapeOf>(input);
shape_of_01->set_friendly_name("shape_of_01");
auto shape_of_02 = std::make_shared<ngraph::opset9::ShapeOf>(shape_of_01);
shape_of_02->set_friendly_name("shape_of_02");
auto result = std::make_shared<ngraph::opset1::Result>(shape_of_02);
result->set_friendly_name("outer_result");
function = std::make_shared<ngraph::Function>(ngraph::OutputVector{result}, ngraph::ParameterVector{input});
function->set_friendly_name("shape_of_test");
}
};
TEST_P(ShapeOfDynamicInputGPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
}
const std::vector<ov::test::InputShape> dynamicInputShapes = {
ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{4, 1, 1, 64, 32}, {6, 1, 1, 8, 4}, {8, 1, 1, 24, 16}}),
};
const std::vector<InferenceEngine::Precision> dynamicInputPrec = {
InferenceEngine::Precision::FP16,
};
INSTANTIATE_TEST_SUITE_P(smoke_Check, ShapeOfDynamicInputGPUTest,
testing::Combine(
testing::ValuesIn(dynamicInputShapes), // input shapes
testing::ValuesIn(dynamicInputPrec), // network precision
testing::Values<std::string>(ov::test::utils::DEVICE_GPU)), // device type
ShapeOfDynamicInputGPUTest::getTestCaseName);
} // namespace GPULayerTestsDefinitions

View File

@@ -264,4 +264,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck6D, VariadicSplitLayerGPUDynam
::testing::ValuesIn(restInputTypes)), // input type of splitLength
VariadicSplitLayerGPUDynamicTest::getTestCaseName);
const std::vector<InputShape> inputShapes4d_static = {
{
{5, 16, 10, 8}, {{5, 16, 10, 8}, }
}
};
INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck4D_static_input_dyn_output, VariadicSplitLayerGPUDynamicTest,
::testing::Combine(
::testing::Values(1), // axes
::testing::Values(std::vector<int32_t>{2, 1, -1}), // splitLength
::testing::Values(ElementType::f16), // netPrec
::testing::ValuesIn(inputShapes4d_static), // inShapes
::testing::ValuesIn(restInputTypes)), // input type of splitLength
VariadicSplitLayerGPUDynamicTest::getTestCaseName);
} // namespace GPULayerTestsDefinitions

View File

@@ -13,6 +13,8 @@
#include "common_test_utils/test_constants.hpp"
#include "shared_test_classes/base/utils/ranges.hpp"
#include <common_test_utils/ov_tensor_utils.hpp>
#include "shared_test_classes/base/utils/compare_results.hpp"
#include "openvino/pass/constant_folding.hpp"
using namespace InferenceEngine;
@@ -45,7 +47,11 @@ enum InnerBodyType {
/**
* Inner body with nested condition case
*/
Type05 = 5
Type05 = 5,
/**
* Inner body with single constant with zero dimensions
*/
Type06 = 6
};
public:
@@ -251,6 +257,24 @@ protected:
}
};
class InnerBodyType06 : public InnerBodyGenerator {
protected:
std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
auto constant = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 0), {2.0f});
constant->set_friendly_name("body1_constant");
// constant->get_rt_info().emplace(ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding{});
// constant->get_rt_info().emplace("can_be_folded", false);
auto result = std::make_shared<ngraph::opset1::Result>(constant);
auto o_layout = result->get_layout();
result->set_friendly_name("body1_result");
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {result},
ngraph::ParameterVector{},
"constant_only");
return body;
}
};
static std::shared_ptr<InnerBodyGenerator> get_inner_body_generator(InnerBodyGenerator::InnerBodyType type) {
std::shared_ptr<InnerBodyGenerator> generator_ptr;
switch (type) {
@@ -274,6 +298,10 @@ static std::shared_ptr<InnerBodyGenerator> get_inner_body_generator(InnerBodyGen
{
return std::make_shared<InnerBodyType05>();
}
case InnerBodyGenerator::InnerBodyType::Type06:
{
return std::make_shared<InnerBodyType06>();
}
default:
{
OPENVINO_ASSERT(false, "Not supported type");
@@ -314,9 +342,22 @@ public:
cond->set_then_body(body_then_generator->get_function());
cond->set_input(data, body_then_generator->get_input(), body_else_generator->get_input());
cond->set_output(body_then_generator->get_result(), body_else_generator->get_result());
auto result = std::make_shared<ngraph::opset1::Result>(cond);
result->set_friendly_name("outer_result");
function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
if (then_body_type == InnerBodyGenerator::InnerBodyType::Type06 || else_body_type == InnerBodyGenerator::InnerBodyType::Type06) {
auto constant = create_condition_input(params, prc, ngraph::Shape{1}, 0, true);
auto addition = std::make_shared<ngraph::opset9::Add>(cond, constant);
auto shapeof1 = std::make_shared<ngraph::opset9::ShapeOf>(addition);
auto convert = std::make_shared<ngraph::opset9::Convert>(shapeof1, prc);
auto mul = std::make_shared<ngraph::opset9::Multiply>(convert, constant);
auto shapePatternsNode = create_condition_input(params, ov::element::Type_t::i64, ngraph::Shape{1}, 0, true);
auto reshapeOp = std::make_shared<ngraph::opset1::Reshape>(mul, shapePatternsNode, true);
auto result = std::make_shared<ngraph::opset1::Result>(reshapeOp);
result->set_friendly_name("outer_result");
function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
} else {
auto result = std::make_shared<ngraph::opset1::Result>(cond);
result->set_friendly_name("outer_result");
function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
}
}
std::shared_ptr<ngraph::Function> get_function() { return function; }
@@ -394,6 +435,11 @@ static std::ostream& operator<<(std::ostream& os, const InnerBodyGenerator::Inne
os << "Type05";
break;
}
case InnerBodyGenerator::InnerBodyType::Type06:
{
os << "Type06";
break;
}
default:
{
os << "NONE";
@@ -596,7 +642,8 @@ protected:
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
ov::Shape input_shape;
for (auto& shape : targetInputStaticShapes) {
if (shape.size() > 1) {
// Change condition to cover 1 dim input shape
if (shape.size() > 0) {
input_shape = shape;
break;
}
@@ -622,6 +669,7 @@ protected:
}
}
}
size_t niter = 0;
};
@@ -648,6 +696,10 @@ const std::vector<ov::test::InputShape> dynamicInputShapes_f16 = {
ov::test::InputShape(ov::PartialShape({-1, -1, -1}), {{2, 24, 16}, {2, 64, 32}, {2, 8, 4}})
};
const std::vector<ov::test::InputShape> dynamicInputShapes_zero_dims = {
ov::test::InputShape(ov::PartialShape({-1}), {{24}, {64}, {8}})
};
const std::vector<InnerBodyTypeParams> innerBodyTypes_f32 = {
{
InnerBodyGenerator::InnerBodyType::Type01,
@@ -670,11 +722,22 @@ const std::vector<InnerBodyTypeParams> innerBodyTypes_f16 = {
}
};
const std::vector<InnerBodyTypeParams> innerBodyTypes_zero_dims = {
{
InnerBodyGenerator::InnerBodyType::Type02,
InnerBodyGenerator::InnerBodyType::Type06
},
};
const std::vector<TestModelGenerator::PredicateTypes> condTypes = {
TestModelGenerator::PredicateTypes::PARAM,
TestModelGenerator::PredicateTypes::NODE
};
const std::vector<TestModelGenerator::PredicateTypes> condTypes_zero_dims = {
TestModelGenerator::PredicateTypes::PARAM
};
INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f32, DynamicConditionLayerGPUTest,
testing::Combine(
testing::ValuesIn(dynamicInputShapes_f32), // input shapes
@@ -693,4 +756,13 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f16, DynamicConditionLay
testing::ValuesIn(condTypes), // cond type
testing::Values<std::string>(ov::test::utils::DEVICE_GPU)), // device type
DynamicConditionLayerGPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_zero_dims, DynamicConditionLayerGPUTest,
testing::Combine(
testing::ValuesIn(dynamicInputShapes_zero_dims), // input shapes
testing::ValuesIn(innerBodyTypes_zero_dims), // inner body type
testing::ValuesIn(netPrecisions_f32), // network precision
testing::ValuesIn(condTypes_zero_dims), // cond type
testing::Values<std::string>(ov::test::utils::DEVICE_GPU)), // device type
DynamicConditionLayerGPUTest::getTestCaseName);
} // namespace GPULayerTestsDefinitions