bug fix update (#19568)

* [GPU] Fix gpu functional test failures * set m_max_batch to 1 * add debug log for condition operation * Add debug logs for condition and constant * To fix zero byte allocation issue, Convert zero dimension to 1 dimension in constant * Add the code to check if output shape is dynamic in split offset calculation and checking allow_new_shape_infer in program_builder * Add unit test for fix checking output shape * Add test case for zero dimennsion allocation and debug message * Fix build failure for condition unit test * Follow up code review
2023-09-19 06:13:38 +09:00
parent e34c5a09c6
commit 03918c2cac
10 changed files with 240 additions and 17 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp
@@ -26,19 +26,17 @@ struct condition : public primitive_base<condition> {

        std::string str() {
            std::stringstream ss;
-            ss << "branch: { " << std::endl;
-            ss<< "* input_map : [(outer_id,inner_id),";
+            ss << "branch: {input_map : [(outer_id,inner_id),";
            for (auto& in_iter : input_map) {
                ss << "(" << in_iter.first << "," << in_iter.second << "),";
            }
-            ss << "]," << std::endl;
+            ss << "],";

-            ss << "* output_map : [(outer_idx,inner_id),";
+            ss << " output_map : [(outer_idx,inner_id),";
            for (auto& out_iter : output_map) {
                ss << "(" << out_iter.first << ","<< out_iter.second << "),";
            }
-            ss << "]" << std::endl;
-            ss << "}" << std::endl;
+            ss << "]}";
            return ss.str();
        }
    };
--- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
@@ -39,6 +39,7 @@ struct condition_impl : typed_primitive_impl<condition> {
        auto pred = condition_inst::get_pred_from_memory(instance.pred_memory_ptr(), instance.get_network().get_stream());
        network::ptr executed_net = pred? instance.get_net_true() : instance.get_net_false();
        auto branch = pred? instance.get_branch_true() : instance.get_branch_false();
+        GPU_DEBUG_LOG << "predicate: " << (pred ? "True" : "False") << std::endl;

        // Set input memory of inner network before its execution
        for (size_t mem_idx = 0; mem_idx < instance.inputs_memory_count(); mem_idx++) {
@@ -48,6 +49,7 @@ struct condition_impl : typed_primitive_impl<condition> {
                const primitive_id& input_internal_id = iter->second;
                auto mem_ptr = instance.input_memory_ptr(mem_idx);
                executed_net->set_input_data(input_internal_id, mem_ptr);
+                GPU_DEBUG_LOG << "Inner net - Inputs[" << mem_idx << "]" << mem_ptr->get_layout().to_short_string() << std::endl;
            }
        }

@@ -62,6 +64,7 @@ struct condition_impl : typed_primitive_impl<condition> {
            auto inner_out_id = out_mem_map.second;
            auto mem_ptr = executed_net->get_output(inner_out_id).get_memory();
            instance.set_output_memory(mem_ptr, false, out_mem_idx);
+            GPU_DEBUG_LOG << "Inner net - Outputs[" << out_mem_idx << "]" << mem_ptr->get_layout().to_short_string() << std::endl;
        }

        ev->set();
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -1234,9 +1234,18 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
            GPU_DEBUG_COUT << inst->id() << std::endl;
            if (inst->get_node().is_type<loop>()) {
                auto& loop_node = inst->get_node().as<loop>();
-                auto loop_body_primitives = loop_node.get_body_topology().get_primitives_ids();
-                for (auto& primitive_id : loop_body_primitives) {
-                    GPU_DEBUG_COUT << "\t" << primitive_id << std::endl;
+                for (auto& prim : loop_node.get_body_program()->get_processing_order()) {
+                    GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
+                }
+            } else if (inst->get_node().is_type<condition>()) {
+                auto& cond_node = inst->get_node().as<condition>();
+                GPU_DEBUG_COUT << "* Branch_True" << std::endl;
+                for (auto& prim : cond_node.get_branch_true().inner_program->get_processing_order()) {
+                    GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
+                }
+                GPU_DEBUG_COUT << "* Branch_False" << std::endl;
+                for (auto& prim : cond_node.get_branch_false().inner_program->get_processing_order()) {
+                    GPU_DEBUG_COUT << "\t" << prim->id() << std::endl;
                }
            }
        }
--- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp
@@ -14,6 +14,12 @@ const size_t idx_false = 1;
 static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ptr<ov::op::v8::If>& op, size_t idx) {
    cldnn::condition::branch branch;
    const auto& internal_body = (idx == idx_true)? op->get_then_body() : op->get_else_body();
+    GPU_DEBUG_LOG << "Generate inner program for " << "op::v"
+                    << op->get_type_info().version_id << "::"
+                    << op->get_type_name() << " operation "
+                    << "(friendly_name=" << op->get_friendly_name() << ") : "
+                    << internal_body->get_friendly_name()
+                    << ", num inputs: " << op->get_input_size() << std::endl;

    auto config = p.get_config();
    config.set_property(ov::intel_gpu::max_dynamic_batch(1));
@@ -40,6 +46,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_
        output_map.insert({out_desc->m_output_index, internal_id});
    }

+    GPU_DEBUG_LOG << op->get_friendly_name() << " branch_info[" << internal_body->get_friendly_name() << "] : " << branch << std::endl;
    return branch;
 }

--- a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
@@ -97,8 +97,14 @@ static void create_data(ProgramBuilder& p, const ov::Shape& constDims, const std
        p.primitive_ids[initialconstPrimID] = constPrimID;
        p.profiling_ids.push_back(initialconstPrimID);
    } else {
-        GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant]" << std::endl;
+        if (constLayout.count() == 0) {
+            // Convert zero dimension constant layout to 1 dimension to fix the issue
+            // that memory allocation is failed on windows when constant layout is zero dimension.
+            constLayout = cldnn::layout(ov::PartialShape({1}), constLayout.data_type, constLayout.format);
+        }
        cldnn::memory::ptr mem = p.get_engine().allocate_memory(constLayout, false);
+        GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant] layout: "
+                        << constLayout.to_short_string() << ", mem_ptr(" << mem << ", " << mem->size() << " bytes)"<< std::endl;
        auto& stream = p.get_engine().get_service_stream();
        cldnn::mem_lock<char> lock{mem, stream};
        auto buf = lock.data();
--- a/src/plugins/intel_gpu/src/plugin/ops/split.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/split.cpp
@@ -13,6 +13,21 @@
 namespace ov {
 namespace intel_gpu {

+static bool IsDynamic(const std::shared_ptr<ov::Node>& op) {
+    if (op->is_dynamic()) {
+        return true;
+    }
+
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        const auto outPartialShape = op->get_output_partial_shape(i);
+        if (outPartialShape.is_dynamic()) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void CreateCommonSplitOp(ProgramBuilder& p, const std::shared_ptr<ov::Node>& op) {
    auto get_layer_name = [&](size_t idx)->std::string {
        return layer_type_name_ID(op) + ((op->get_output_size() == 1)? "" : ".out" + std::to_string(idx));
@@ -22,11 +37,15 @@ static void CreateCommonSplitOp(ProgramBuilder& p, const std::shared_ptr<ov::Nod
    if (p.use_new_shape_infer() || op->is_dynamic()) {
        std::vector<cldnn::tensor> offsets;

-        if (!op->is_dynamic()) {
+        // op->is_dynamic() does not check if output shape is dynamic. it only check dynamism for input shapes
+        // Even if op->is_dynamic() is false, output shape can be dynamic.
+        // Thus, it is necessary to check if output shape is dynamic.
+        if (!IsDynamic(op)) {
            auto input_pshape = op->get_input_partial_shape(0);
            ov::Shape start_offset(input_pshape.size());
            for (size_t i = 0; i < op->get_output_size(); i++) {
                const auto outPartialShape = op->get_output_partial_shape(i);
+
                auto offsetTensor = tensor_from_dims(start_offset, 0);
                offsets.push_back(offsetTensor);

@@ -49,7 +68,7 @@ static void CreateCommonSplitOp(ProgramBuilder& p, const std::shared_ptr<ov::Nod
            auto cropPrim = cldnn::crop(get_layer_name(i),
                                        inputs,
                                        cldnn::tensor(1),
-                                        (op->is_dynamic() ? cldnn::tensor(0) : offsets[i]),
+                                        (offsets.empty() ? cldnn::tensor(0) : offsets[i]),
                                        op_mode,
                                        static_cast<int>(i),
                                        num_splits);
--- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -314,6 +314,14 @@ bool ProgramBuilder::requires_new_shape_infer(const ov::Node& op) const {
        return true;
    }

+    // When input node has dynamic shape with 4 dimension, this function return false
+    // because op.is_dynamic() which only checks input shapes return false.
+    // So, in the case of input data, we need to check output shape.
+    for (size_t i = 0; i < op.get_output_size(); i++) {
+        if (op.get_output_partial_shape(i).is_dynamic())
+            return true;
+    }
+
    if (ov::is_type<op::FullyConnectedCompressed>(&op))
        return true;

--- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/shapeof.cpp
+++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/shapeof.cpp
@@ -215,4 +215,89 @@ INSTANTIATE_TEST_SUITE_P(smoke_ShapeOf_5d_compareWithRefs_static,

 } // namespace

+using ShapeOfParams = typename std::tuple<
+        InputShape,                     // Shape
+        InferenceEngine::Precision,     // Precision
+        LayerTestsUtils::TargetDevice   // Device name
+>;
+
+class ShapeOfDynamicInputGPUTest : public testing::WithParamInterface<ShapeOfParams>,
+                                virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<ShapeOfParams>& obj) {
+        InputShape inputShapes;
+        InferenceEngine::Precision dataPrc;
+        std::string targetDevice;
+
+        std::tie(inputShapes, dataPrc, targetDevice) = obj.param;
+        std::ostringstream result;
+        result << "IS=(";
+        result << ov::test::utils::partialShape2str({inputShapes.first}) << "_";
+        for (size_t i = 0lu; i < inputShapes.second.size(); i++) {
+            result << "{";
+            result << ov::test::utils::vec2str(inputShapes.second[i]) << "_";
+            result << "}_";
+        }
+        result << ")_";
+        result << "netPRC=" << dataPrc << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        auto res_str = result.str();
+        std::replace(res_str.begin(), res_str.end(), '-', '_');
+        return res_str;
+    }
+
+protected:
+    void SetUp() override {
+        InputShape inputShapes;
+        InferenceEngine::Precision dataPrc;
+        targetDevice = ov::test::utils::DEVICE_GPU;
+
+        std::tie(inputShapes, dataPrc, targetDevice) = GetParam();
+
+        init_input_shapes({inputShapes});
+
+        InferenceEngine::PreProcessInfo pre_process_info;
+        pre_process_info.setVariant(InferenceEngine::MeanVariant::MEAN_VALUE);
+
+        const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrc);
+
+        auto input = std::make_shared<ngraph::opset9::Parameter>(prc, inputShapes.first);
+        input->get_output_tensor(0).get_rt_info()["ie_legacy_preproc"] = pre_process_info;
+        input->set_friendly_name("input_data");
+
+        auto shape_of_01 = std::make_shared<ngraph::opset9::ShapeOf>(input);
+        shape_of_01->set_friendly_name("shape_of_01");
+
+        auto shape_of_02 = std::make_shared<ngraph::opset9::ShapeOf>(shape_of_01);
+        shape_of_02->set_friendly_name("shape_of_02");
+
+        auto result = std::make_shared<ngraph::opset1::Result>(shape_of_02);
+        result->set_friendly_name("outer_result");
+
+        function = std::make_shared<ngraph::Function>(ngraph::OutputVector{result}, ngraph::ParameterVector{input});
+        function->set_friendly_name("shape_of_test");
+    }
+};
+
+TEST_P(ShapeOfDynamicInputGPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+}
+
+const std::vector<ov::test::InputShape> dynamicInputShapes = {
+    ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{4, 1, 1, 64, 32}, {6, 1, 1, 8, 4}, {8, 1, 1, 24, 16}}),
+};
+
+const std::vector<InferenceEngine::Precision> dynamicInputPrec = {
+    InferenceEngine::Precision::FP16,
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Check, ShapeOfDynamicInputGPUTest,
+                testing::Combine(
+                    testing::ValuesIn(dynamicInputShapes),                          // input shapes
+                    testing::ValuesIn(dynamicInputPrec),                               // network precision
+                    testing::Values<std::string>(ov::test::utils::DEVICE_GPU)),     // device type
+                ShapeOfDynamicInputGPUTest::getTestCaseName);
+
 } // namespace GPULayerTestsDefinitions
--- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp
+++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp
@@ -264,4 +264,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck6D, VariadicSplitLayerGPUDynam
                                ::testing::ValuesIn(restInputTypes)),                       // input type of splitLength
                        VariadicSplitLayerGPUDynamicTest::getTestCaseName);

+
+const std::vector<InputShape> inputShapes4d_static = {
+        {
+            {5, 16, 10, 8}, {{5, 16, 10, 8}, }
+        }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_VariadicSplitsCheck4D_static_input_dyn_output, VariadicSplitLayerGPUDynamicTest,
+                        ::testing::Combine(
+                                ::testing::Values(1),                                       // axes
+                                ::testing::Values(std::vector<int32_t>{2, 1, -1}),          // splitLength
+                                ::testing::Values(ElementType::f16),                        // netPrec
+                                ::testing::ValuesIn(inputShapes4d_static),                         // inShapes
+                                ::testing::ValuesIn(restInputTypes)),                       // input type of splitLength
+                        VariadicSplitLayerGPUDynamicTest::getTestCaseName);
+
 } // namespace GPULayerTestsDefinitions
--- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/condition.cpp
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/condition.cpp
@@ -13,6 +13,8 @@
 #include "common_test_utils/test_constants.hpp"
 #include "shared_test_classes/base/utils/ranges.hpp"
 #include <common_test_utils/ov_tensor_utils.hpp>
+#include "shared_test_classes/base/utils/compare_results.hpp"
+#include "openvino/pass/constant_folding.hpp"


 using namespace InferenceEngine;
@@ -45,7 +47,11 @@ enum InnerBodyType {
    /**
     * Inner body with nested condition case
     */
-    Type05 = 5
+    Type05 = 5,
+    /**
+     * Inner body with single constant with zero dimensions
+     */
+    Type06 = 6
 };

 public:
@@ -251,6 +257,24 @@ protected:
    }
 };

+class InnerBodyType06 : public InnerBodyGenerator {
+protected:
+    std::shared_ptr<ngraph::Function> generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override {
+        auto constant   = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 0), {2.0f});
+        constant->set_friendly_name("body1_constant");
+        // constant->get_rt_info().emplace(ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding{});
+        // constant->get_rt_info().emplace("can_be_folded", false);
+        auto result     = std::make_shared<ngraph::opset1::Result>(constant);
+        auto o_layout = result->get_layout();
+        result->set_friendly_name("body1_result");
+        auto body       = std::make_shared<ngraph::Function>(
+            ngraph::OutputVector {result},
+            ngraph::ParameterVector{},
+            "constant_only");
+        return body;
+    }
+};
+
 static std::shared_ptr<InnerBodyGenerator> get_inner_body_generator(InnerBodyGenerator::InnerBodyType type) {
    std::shared_ptr<InnerBodyGenerator> generator_ptr;
    switch (type) {
@@ -274,6 +298,10 @@ static std::shared_ptr<InnerBodyGenerator> get_inner_body_generator(InnerBodyGen
        {
            return std::make_shared<InnerBodyType05>();
        }
+        case InnerBodyGenerator::InnerBodyType::Type06:
+        {
+            return std::make_shared<InnerBodyType06>();
+        }
        default:
        {
            OPENVINO_ASSERT(false, "Not supported type");
@@ -314,9 +342,22 @@ public:
                            cond->set_then_body(body_then_generator->get_function());
                            cond->set_input(data, body_then_generator->get_input(), body_else_generator->get_input());
                            cond->set_output(body_then_generator->get_result(), body_else_generator->get_result());
-                            auto result = std::make_shared<ngraph::opset1::Result>(cond);
-                            result->set_friendly_name("outer_result");
-                            function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
+                            if (then_body_type == InnerBodyGenerator::InnerBodyType::Type06 || else_body_type == InnerBodyGenerator::InnerBodyType::Type06) {
+                                auto constant = create_condition_input(params, prc, ngraph::Shape{1}, 0, true);
+                                auto addition = std::make_shared<ngraph::opset9::Add>(cond, constant);
+                                auto shapeof1 = std::make_shared<ngraph::opset9::ShapeOf>(addition);
+                                auto convert = std::make_shared<ngraph::opset9::Convert>(shapeof1, prc);
+                                auto mul = std::make_shared<ngraph::opset9::Multiply>(convert, constant);
+                                auto shapePatternsNode = create_condition_input(params, ov::element::Type_t::i64, ngraph::Shape{1}, 0, true);
+                                auto reshapeOp = std::make_shared<ngraph::opset1::Reshape>(mul, shapePatternsNode, true);
+                                auto result = std::make_shared<ngraph::opset1::Result>(reshapeOp);
+                                result->set_friendly_name("outer_result");
+                                function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
+                            } else {
+                                auto result = std::make_shared<ngraph::opset1::Result>(cond);
+                                result->set_friendly_name("outer_result");
+                                function = std::make_shared<ngraph::Function>(ngraph::OutputVector {result}, params);
+                            }
                        }
    std::shared_ptr<ngraph::Function> get_function() { return function; }

@@ -394,6 +435,11 @@ static std::ostream& operator<<(std::ostream& os, const InnerBodyGenerator::Inne
            os << "Type05";
            break;
        }
+        case InnerBodyGenerator::InnerBodyType::Type06:
+        {
+            os << "Type06";
+            break;
+        }
        default:
        {
            os << "NONE";
@@ -596,7 +642,8 @@ protected:
    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
        ov::Shape input_shape;
        for (auto& shape : targetInputStaticShapes) {
-            if (shape.size() > 1) {
+            // Change condition to cover 1 dim input shape
+            if (shape.size() > 0) {
                input_shape = shape;
                break;
            }
@@ -622,6 +669,7 @@ protected:
            }
        }
    }
+
    size_t niter = 0;
 };

@@ -648,6 +696,10 @@ const std::vector<ov::test::InputShape> dynamicInputShapes_f16 = {
    ov::test::InputShape(ov::PartialShape({-1, -1, -1}), {{2, 24, 16}, {2, 64, 32}, {2, 8, 4}})
 };

+const std::vector<ov::test::InputShape> dynamicInputShapes_zero_dims = {
+    ov::test::InputShape(ov::PartialShape({-1}), {{24}, {64}, {8}})
+};
+
 const std::vector<InnerBodyTypeParams> innerBodyTypes_f32 = {
    {
        InnerBodyGenerator::InnerBodyType::Type01,
@@ -670,11 +722,22 @@ const std::vector<InnerBodyTypeParams> innerBodyTypes_f16 = {
    }
 };

+const std::vector<InnerBodyTypeParams> innerBodyTypes_zero_dims = {
+    {
+        InnerBodyGenerator::InnerBodyType::Type02,
+        InnerBodyGenerator::InnerBodyType::Type06
+    },
+};
+
 const std::vector<TestModelGenerator::PredicateTypes> condTypes = {
    TestModelGenerator::PredicateTypes::PARAM,
    TestModelGenerator::PredicateTypes::NODE
 };

+const std::vector<TestModelGenerator::PredicateTypes> condTypes_zero_dims = {
+    TestModelGenerator::PredicateTypes::PARAM
+};
+
 INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f32, DynamicConditionLayerGPUTest,
                testing::Combine(
                    testing::ValuesIn(dynamicInputShapes_f32),                          // input shapes
@@ -693,4 +756,13 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_dynamic_f16, DynamicConditionLay
                    testing::ValuesIn(condTypes),                                       // cond type
                    testing::Values<std::string>(ov::test::utils::DEVICE_GPU)),         // device type
                DynamicConditionLayerGPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConditionGPUTest_zero_dims, DynamicConditionLayerGPUTest,
+                testing::Combine(
+                    testing::ValuesIn(dynamicInputShapes_zero_dims),                    // input shapes
+                    testing::ValuesIn(innerBodyTypes_zero_dims),                        // inner body type
+                    testing::ValuesIn(netPrecisions_f32),                               // network precision
+                    testing::ValuesIn(condTypes_zero_dims),                             // cond type
+                    testing::Values<std::string>(ov::test::utils::DEVICE_GPU)),         // device type
+                DynamicConditionLayerGPUTest::getTestCaseName);
 } // namespace GPULayerTestsDefinitions