From c70f0ca45d04f9a3b7a0a9ce4999afc2b43ecc06 Mon Sep 17 00:00:00 2001
From: Wilson Seok <wilson.seok@intel.com>
Date: Wed, 25 Oct 2023 16:09:11 +0900
Subject: [PATCH] [GPU] skip excessive mem alloc request in build (#20399)

* skip excessive mem alloc request in build

* update mem check function

* fix os behavior

* update mem size check location

* only dynamic shape case takes check_allocatable

* update check condition
---
 .../include/intel_gpu/runtime/engine.hpp      |   2 +
 .../intel_gpu/src/graph/primitive_inst.cpp    |   7 +-
 .../intel_gpu/src/runtime/ocl/ocl_engine.cpp  |  34 +--
 .../intel_gpu/src/runtime/ocl/ocl_engine.hpp  |   2 +-
 .../gpu_dyn_huge_input_range.cpp              | 235 ++++++++++++++++++
 5 files changed, 263 insertions(+), 17 deletions(-)
 create mode 100644 src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
index ee8d10bb580..6b9195097b3 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
@@ -79,6 +79,8 @@ public:
     /// Checks whether two memory objects represents the same physical memory
     virtual bool is_the_same_buffer(const memory& mem1, const memory& mem2) = 0;
 
+    virtual bool check_allocatable(const layout& layout, allocation_type type) = 0;
+
     /// Returns basic allocation type which will be used as a fallback when allocation type is not specified or device doesn't support some features.
     virtual allocation_type get_default_allocation_type() const = 0;
 
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 92f9f60743b..a81d0bd10ad 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -991,7 +991,6 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
     , _outputs({memory::ptr()})
     , _reordered_weights_cache(network.get_weights_cache_capacity())
     , _output_changed(false)
-    , _mem_allocated(allocate_memory)
     , _is_dynamic(node.is_dynamic() || node.generates_dynamic_output())
     , _type(node.type())
     , _id(node.id())
@@ -1006,6 +1005,12 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
     , _can_share_buffer(node.can_share_buffer())
     , _is_constant(node.is_constant())
     , _needs_completion_event(is_any_user_cpu(node.get_users()) || node.is_output()) {
+    // When dynamic shape node has huge upper boundary which causes bigger mem size than system max allocable mem size, do not allocate in build time.
+    auto output_layout = node.get_output_layout();
+    if (allocate_memory && node.is_dynamic() && (!network.get_engine().check_allocatable(output_layout, allocation_type::usm_host))) {
+        allocate_memory = false;
+    }
+    _mem_allocated = allocate_memory;
     if (allocate_memory) {
         // In case when output is mutable_data primitive, and other users dependencies are only used for
         // suychronization, The output memory of such primitive will be fused with mutable_data
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
index 325bb3aa581..9e4bbd9aa6b 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
@@ -125,29 +125,33 @@ allocation_type ocl_engine::detect_usm_allocation_type(const void* memory) const
                                        : allocation_type::unknown;
 }
 
-bool ocl_engine::check_allocatable(const layout& layout, allocation_type type) const {
+bool ocl_engine::check_allocatable(const layout& layout, allocation_type type) {
     OPENVINO_ASSERT(supports_allocation(type) || type == allocation_type::cl_mem, "[GPU] Unsupported allocation type: ", type);
-    auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
-#ifdef __unix__
-    // Prevent from being killed by Ooo Killer of Linux
-    OPENVINO_ASSERT(layout.bytes_count() + used_mem <= get_max_memory_size(),
-            "[GPU] Exceeded max size of memory allocation: ",
-            "Required ", layout.bytes_count(), " bytes, already occupied : ", used_mem, " bytes, ",
-            "but available memory size is ", get_max_memory_size(), " bytes");
-#else
-    if (layout.bytes_count() + used_mem > get_max_memory_size()) {
-        GPU_DEBUG_COUT << "[Warning] [GPU] Exceeded max size of memory allocation: " << "Required " << layout.bytes_count() << " bytes, already occupied : "
-                       << used_mem << " bytes, but available memory size is " << get_max_memory_size() << " bytes" << std::endl;
-        GPU_DEBUG_COUT << "Please note that performance might drop due to memory swap." << std::endl;
+    auto alloc_mem_size = layout.bytes_count();
+    auto max_mem_size = get_device_info().max_alloc_mem_size;
+    if (alloc_mem_size > max_mem_size) {
+        auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
+        GPU_DEBUG_LOG << "[GPU] Mem size info: " << "Required " << alloc_mem_size << " bytes, already occupied : "
+                      << used_mem << " bytes, available memory size is " << get_max_memory_size() << " bytes, but max allocable memory size is "
+                      << max_mem_size << " bytes." << std::endl;
+        return false;
     }
-#endif
+
     return true;
 }
 
 memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) {
     OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate memory for dynamic layout");
 
-    check_allocatable(layout, type);
+    bool allocatable = check_allocatable(layout, type);
+    if (!allocatable) {
+#ifdef __unix__
+        OPENVINO_ASSERT(allocatable, "[GPU] Exceeded max size of memory allocation, check debug message for size info");
+#else
+        GPU_DEBUG_COUT << "[Warning][GPU] Please note that performance might drop due to memory swap caused by exceeded mem size alloc." << std::endl;
+#endif
+    }
+
     try {
         memory::ptr res = nullptr;
         if (layout.format.is_image_2d()) {
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp
index 6d414139651..ee76fcca82a 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp
@@ -28,7 +28,7 @@ public:
     memory_ptr reinterpret_handle(const layout& new_layout, shared_mem_params params) override;
     memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) override;
     bool is_the_same_buffer(const memory& mem1, const memory& mem2) override;
-    bool check_allocatable(const layout& layout, allocation_type type) const;
+    bool check_allocatable(const layout& layout, allocation_type type) override;
 
     void* get_user_context() const override;
 
diff --git a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp
new file mode 100644
index 00000000000..62eb867df97
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp
@@ -0,0 +1,235 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/strided_slice.hpp"
+#include "shared_test_classes/single_layer/shape_of.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "ov_models/builders.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/ov_tensor_utils.hpp"
+
+using namespace InferenceEngine;
+using namespace ov::test;
+
+namespace GPULayerTestsDefinitions {
+
+struct StridedSliceParams {
+    std::vector<int64_t> begin;
+    std::vector<int64_t> end;
+    std::vector<int64_t> stride;
+    std::vector<int64_t> beginMask;
+    std::vector<int64_t> endMask;
+    std::vector<int64_t> newAxisMask;
+    std::vector<int64_t> shrinkAxisMask;
+    std::vector<int64_t> ellipsisAxisMask;
+};
+
+typedef std::tuple<
+        InputShape,                                     // Input shapes
+        StridedSliceParams,
+        ElementType,                                    // Element type
+        std::vector<ngraph::helpers::InputLayerType>,   // begin/end/stride input type
+        std::map<std::string, std::string>              // Additional network configuration
+> StridedSliceLayerParamSet;
+
+class DynamicShapeHugeRangeGPUTest : public testing::WithParamInterface<StridedSliceLayerParamSet>,
+                                 virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<StridedSliceLayerParamSet>& obj) {
+        InputShape shapes;
+        StridedSliceParams params;
+        ElementType elementType;
+        std::vector<ngraph::helpers::InputLayerType> restInputType;
+        TargetDevice targetDevice;
+        std::map<std::string, std::string> additionalConfig;
+        std::tie(shapes, params, elementType, restInputType, additionalConfig) = obj.param;
+
+        std::ostringstream results;
+        results << "IS=" << ov::test::utils::partialShape2str({shapes.first}) << "_";
+        results << "TS=";
+        for (const auto& item : shapes.second) {
+            results << ov::test::utils::vec2str(item) << "_";
+        }
+        results << "netPRC=" << elementType << "_";
+        results << "begin=" << ov::test::utils::vec2str(params.begin) << "_";
+        results << "end=" << ov::test::utils::vec2str(params.end) << "_";
+        results << "stride=" << ov::test::utils::vec2str(params.stride) << "_";
+        results << "begin_m=" << ov::test::utils::vec2str(params.beginMask) << "_";
+        results << "end_m=" << ov::test::utils::vec2str(params.endMask) << "_";
+        results << "new_axis_m=" << (params.newAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.newAxisMask)) << "_";
+        results << "shrink_m=" << (params.shrinkAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.shrinkAxisMask)) << "_";
+        results << "ellipsis_m=" << (params.ellipsisAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.ellipsisAxisMask)) << "_";
+        results << "beginType=" << restInputType[0] << "_";
+        results << "endType=" << restInputType[1] << "_";
+        results << "strideType=" << restInputType[2] << "_";
+        results << "config=(";
+        for (const auto& configEntry : additionalConfig) {
+            results << configEntry.first << ", " << configEntry.second << ":";
+        }
+        results << ")";
+
+        return results.str();
+    }
+
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        ov::Tensor tensor;
+
+        // input0: data
+        int32_t idx = 0;
+        tensor = ov::test::utils::create_and_fill_tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]);
+        inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor});
+
+        // input1: begin
+        if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER) {
+            idx += 1;
+            tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]);
+            auto *dataPtr = tensor.data<float>();
+            for (size_t i = 0; i < begin.size(); i++) {
+                dataPtr[i] = static_cast<float>(begin[i]);
+            }
+            inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor});
+        }
+
+        // input2: end
+        if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER) {
+            idx += 1;
+            tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]);
+            auto *dataPtr = tensor.data<float>();
+            for (size_t i = 0; i < end.size(); i++) {
+                dataPtr[i] = static_cast<float>(end[i]);
+            }
+            inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor});
+        }
+
+        // input3: stride
+        if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER) {
+            idx += 1;
+            tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]);
+            auto *dataPtr = tensor.data<float>();
+            for (size_t i = 0; i < stride.size(); i++) {
+                dataPtr[i] = static_cast<float>(stride[i]);
+            }
+            inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor});
+        }
+
+        inferRequestNum++;
+    }
+
+protected:
+    std::vector<int64_t> begin;
+    std::vector<int64_t> end;
+    std::vector<int64_t> stride;
+    std::vector<ngraph::helpers::InputLayerType> restInputType;
+    size_t inferRequestNum = 0;
+
+    void SetUp() override {
+        InputShape shapes;
+        StridedSliceParams ssParams;
+        std::map<std::string, std::string> additionalConfig;
+        std::tie(shapes, ssParams, inType, restInputType, additionalConfig) = this->GetParam();
+
+        begin = ssParams.begin;
+        end = ssParams.end;
+        stride = ssParams.stride;
+
+        targetDevice = ov::test::utils::DEVICE_GPU;
+
+        std::vector<InputShape> inputShapes;
+        inputShapes.push_back(shapes);
+        if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER)
+            inputShapes.push_back(InputShape({static_cast<int64_t>(begin.size())}, std::vector<ov::Shape>(shapes.second.size(), {begin.size()})));
+        if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER)
+            inputShapes.push_back(InputShape({static_cast<int64_t>(end.size())}, std::vector<ov::Shape>(shapes.second.size(), {end.size()})));
+        if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER)
+            inputShapes.push_back(InputShape({static_cast<int64_t>(stride.size())}, std::vector<ov::Shape>(shapes.second.size(), {stride.size()})));
+
+        init_input_shapes(inputShapes);
+
+        ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(inType, inputDynamicShapes.front())};
+
+        std::shared_ptr<ov::Node> beginInput, endInput, strideInput;
+        if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER) {
+            auto beginNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{begin.size()});
+            params.push_back(beginNode);
+            beginInput = beginNode;
+        } else {
+            beginInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{begin.size()}, begin);
+        }
+
+        if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER) {
+            auto endNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{end.size()});
+            params.push_back(endNode);
+            endInput = endNode;
+        } else {
+            endInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{end.size()}, end);
+        }
+
+        if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER) {
+            auto strideNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{stride.size()});
+            params.push_back(strideNode);
+            strideInput = strideNode;
+        } else {
+            strideInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{stride.size()}, stride);
+        }
+
+        auto stridedSliceOp = std::make_shared<ngraph::op::v1::StridedSlice>(params[0], beginInput, endInput, strideInput, ssParams.beginMask, ssParams.endMask,
+                                                                 ssParams.newAxisMask, ssParams.shrinkAxisMask, ssParams.ellipsisAxisMask);
+
+        auto shapeOfOp = std::make_shared<ngraph::opset3::ShapeOf>(stridedSliceOp, ov::element::Type_t::i32);
+
+        ngraph::ResultVector results;
+        for (size_t i = 0; i < shapeOfOp->get_output_size(); i++) {
+            results.push_back(std::make_shared<ngraph::opset1::Result>(shapeOfOp->output(i)));
+        }
+
+        function = std::make_shared<ngraph::Function>(results, params, "result");
+    }
+};
+
+TEST_P(DynamicShapeHugeRangeGPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+}
+
+namespace {
+
+std::map<std::string, std::string> emptyAdditionalConfig;
+
+const std::vector<ElementType> inputPrecisions = {
+        ElementType::f32
+};
+
+const std::vector<std::vector<ngraph::helpers::InputLayerType>> restInputTypes = {
+    {ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT},
+    {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER},
+    {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT},
+    {ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT},
+    {ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER},
+    {ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER},
+    {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER},
+    {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT},
+};
+
+const std::vector<InputShape> inputShapesDynamic2D_excessive_uppper_boundary = {
+        {{{0, 1000}, {0, 364000000}, 4},
+         {{640, 640, 4}}},
+};
+
+const std::vector<StridedSliceParams> paramsPlain2D_excessive_uppper_boundary = {
+        StridedSliceParams{ { 0, 1 }, { 0, 2147483647 }, { 1, 1 }, { 1, 0 }, { 1, 0 },  { },  { },  { } },
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Dynamic_2D_excessive_uppper_boundary, DynamicShapeHugeRangeGPUTest,
+                         ::testing::Combine(
+                             ::testing::ValuesIn(inputShapesDynamic2D_excessive_uppper_boundary),
+                             ::testing::ValuesIn(paramsPlain2D_excessive_uppper_boundary),
+                             ::testing::ValuesIn(inputPrecisions),
+                             ::testing::Values(restInputTypes[0]),
+                             ::testing::Values(emptyAdditionalConfig)),
+                         DynamicShapeHugeRangeGPUTest::getTestCaseName);
+} // namespace
+} // namespace GPULayerTestsDefinitions