[GPU] skip excessive mem alloc request in build (#20399)

* skip excessive mem alloc request in build

* update mem check function

* fix os behavior

* update mem size check location

* only dynamic shape case takes check_allocatable

* update check condition
This commit is contained in:
Wilson Seok 2023-10-25 16:09:11 +09:00 committed by GitHub
parent 9d56c31581
commit c70f0ca45d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 263 additions and 17 deletions

View File

@ -79,6 +79,8 @@ public:
/// Checks whether two memory objects represents the same physical memory
virtual bool is_the_same_buffer(const memory& mem1, const memory& mem2) = 0;
virtual bool check_allocatable(const layout& layout, allocation_type type) = 0;
/// Returns basic allocation type which will be used as a fallback when allocation type is not specified or device doesn't support some features.
virtual allocation_type get_default_allocation_type() const = 0;

View File

@ -991,7 +991,6 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
, _outputs({memory::ptr()})
, _reordered_weights_cache(network.get_weights_cache_capacity())
, _output_changed(false)
, _mem_allocated(allocate_memory)
, _is_dynamic(node.is_dynamic() || node.generates_dynamic_output())
, _type(node.type())
, _id(node.id())
@ -1006,6 +1005,12 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
, _can_share_buffer(node.can_share_buffer())
, _is_constant(node.is_constant())
, _needs_completion_event(is_any_user_cpu(node.get_users()) || node.is_output()) {
// When dynamic shape node has huge upper boundary which causes bigger mem size than system max allocable mem size, do not allocate in build time.
auto output_layout = node.get_output_layout();
if (allocate_memory && node.is_dynamic() && (!network.get_engine().check_allocatable(output_layout, allocation_type::usm_host))) {
allocate_memory = false;
}
_mem_allocated = allocate_memory;
if (allocate_memory) {
// In case when output is mutable_data primitive, and other users dependencies are only used for
// suychronization, The output memory of such primitive will be fused with mutable_data

View File

@ -125,29 +125,33 @@ allocation_type ocl_engine::detect_usm_allocation_type(const void* memory) const
: allocation_type::unknown;
}
bool ocl_engine::check_allocatable(const layout& layout, allocation_type type) const {
bool ocl_engine::check_allocatable(const layout& layout, allocation_type type) {
OPENVINO_ASSERT(supports_allocation(type) || type == allocation_type::cl_mem, "[GPU] Unsupported allocation type: ", type);
auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
#ifdef __unix__
// Prevent from being killed by Ooo Killer of Linux
OPENVINO_ASSERT(layout.bytes_count() + used_mem <= get_max_memory_size(),
"[GPU] Exceeded max size of memory allocation: ",
"Required ", layout.bytes_count(), " bytes, already occupied : ", used_mem, " bytes, ",
"but available memory size is ", get_max_memory_size(), " bytes");
#else
if (layout.bytes_count() + used_mem > get_max_memory_size()) {
GPU_DEBUG_COUT << "[Warning] [GPU] Exceeded max size of memory allocation: " << "Required " << layout.bytes_count() << " bytes, already occupied : "
<< used_mem << " bytes, but available memory size is " << get_max_memory_size() << " bytes" << std::endl;
GPU_DEBUG_COUT << "Please note that performance might drop due to memory swap." << std::endl;
auto alloc_mem_size = layout.bytes_count();
auto max_mem_size = get_device_info().max_alloc_mem_size;
if (alloc_mem_size > max_mem_size) {
auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
GPU_DEBUG_LOG << "[GPU] Mem size info: " << "Required " << alloc_mem_size << " bytes, already occupied : "
<< used_mem << " bytes, available memory size is " << get_max_memory_size() << " bytes, but max allocable memory size is "
<< max_mem_size << " bytes." << std::endl;
return false;
}
#endif
return true;
}
memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) {
OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate memory for dynamic layout");
check_allocatable(layout, type);
bool allocatable = check_allocatable(layout, type);
if (!allocatable) {
#ifdef __unix__
OPENVINO_ASSERT(allocatable, "[GPU] Exceeded max size of memory allocation, check debug message for size info");
#else
GPU_DEBUG_COUT << "[Warning][GPU] Please note that performance might drop due to memory swap caused by exceeded mem size alloc." << std::endl;
#endif
}
try {
memory::ptr res = nullptr;
if (layout.format.is_image_2d()) {

View File

@ -28,7 +28,7 @@ public:
memory_ptr reinterpret_handle(const layout& new_layout, shared_mem_params params) override;
memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) override;
bool is_the_same_buffer(const memory& mem1, const memory& mem2) override;
bool check_allocatable(const layout& layout, allocation_type type) const;
bool check_allocatable(const layout& layout, allocation_type type) override;
void* get_user_context() const override;

View File

@ -0,0 +1,235 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/strided_slice.hpp"
#include "shared_test_classes/single_layer/shape_of.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "ov_models/builders.hpp"
#include "common_test_utils/test_constants.hpp"
#include "common_test_utils/ov_tensor_utils.hpp"
using namespace InferenceEngine;
using namespace ov::test;
namespace GPULayerTestsDefinitions {
struct StridedSliceParams {
std::vector<int64_t> begin;
std::vector<int64_t> end;
std::vector<int64_t> stride;
std::vector<int64_t> beginMask;
std::vector<int64_t> endMask;
std::vector<int64_t> newAxisMask;
std::vector<int64_t> shrinkAxisMask;
std::vector<int64_t> ellipsisAxisMask;
};
typedef std::tuple<
InputShape, // Input shapes
StridedSliceParams,
ElementType, // Element type
std::vector<ngraph::helpers::InputLayerType>, // begin/end/stride input type
std::map<std::string, std::string> // Additional network configuration
> StridedSliceLayerParamSet;
class DynamicShapeHugeRangeGPUTest : public testing::WithParamInterface<StridedSliceLayerParamSet>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<StridedSliceLayerParamSet>& obj) {
InputShape shapes;
StridedSliceParams params;
ElementType elementType;
std::vector<ngraph::helpers::InputLayerType> restInputType;
TargetDevice targetDevice;
std::map<std::string, std::string> additionalConfig;
std::tie(shapes, params, elementType, restInputType, additionalConfig) = obj.param;
std::ostringstream results;
results << "IS=" << ov::test::utils::partialShape2str({shapes.first}) << "_";
results << "TS=";
for (const auto& item : shapes.second) {
results << ov::test::utils::vec2str(item) << "_";
}
results << "netPRC=" << elementType << "_";
results << "begin=" << ov::test::utils::vec2str(params.begin) << "_";
results << "end=" << ov::test::utils::vec2str(params.end) << "_";
results << "stride=" << ov::test::utils::vec2str(params.stride) << "_";
results << "begin_m=" << ov::test::utils::vec2str(params.beginMask) << "_";
results << "end_m=" << ov::test::utils::vec2str(params.endMask) << "_";
results << "new_axis_m=" << (params.newAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.newAxisMask)) << "_";
results << "shrink_m=" << (params.shrinkAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.shrinkAxisMask)) << "_";
results << "ellipsis_m=" << (params.ellipsisAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.ellipsisAxisMask)) << "_";
results << "beginType=" << restInputType[0] << "_";
results << "endType=" << restInputType[1] << "_";
results << "strideType=" << restInputType[2] << "_";
results << "config=(";
for (const auto& configEntry : additionalConfig) {
results << configEntry.first << ", " << configEntry.second << ":";
}
results << ")";
return results.str();
}
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
inputs.clear();
const auto& funcInputs = function->inputs();
ov::Tensor tensor;
// input0: data
int32_t idx = 0;
tensor = ov::test::utils::create_and_fill_tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]);
inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor});
// input1: begin
if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER) {
idx += 1;
tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]);
auto *dataPtr = tensor.data<float>();
for (size_t i = 0; i < begin.size(); i++) {
dataPtr[i] = static_cast<float>(begin[i]);
}
inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor});
}
// input2: end
if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER) {
idx += 1;
tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]);
auto *dataPtr = tensor.data<float>();
for (size_t i = 0; i < end.size(); i++) {
dataPtr[i] = static_cast<float>(end[i]);
}
inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor});
}
// input3: stride
if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER) {
idx += 1;
tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]);
auto *dataPtr = tensor.data<float>();
for (size_t i = 0; i < stride.size(); i++) {
dataPtr[i] = static_cast<float>(stride[i]);
}
inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor});
}
inferRequestNum++;
}
protected:
std::vector<int64_t> begin;
std::vector<int64_t> end;
std::vector<int64_t> stride;
std::vector<ngraph::helpers::InputLayerType> restInputType;
size_t inferRequestNum = 0;
void SetUp() override {
InputShape shapes;
StridedSliceParams ssParams;
std::map<std::string, std::string> additionalConfig;
std::tie(shapes, ssParams, inType, restInputType, additionalConfig) = this->GetParam();
begin = ssParams.begin;
end = ssParams.end;
stride = ssParams.stride;
targetDevice = ov::test::utils::DEVICE_GPU;
std::vector<InputShape> inputShapes;
inputShapes.push_back(shapes);
if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER)
inputShapes.push_back(InputShape({static_cast<int64_t>(begin.size())}, std::vector<ov::Shape>(shapes.second.size(), {begin.size()})));
if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER)
inputShapes.push_back(InputShape({static_cast<int64_t>(end.size())}, std::vector<ov::Shape>(shapes.second.size(), {end.size()})));
if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER)
inputShapes.push_back(InputShape({static_cast<int64_t>(stride.size())}, std::vector<ov::Shape>(shapes.second.size(), {stride.size()})));
init_input_shapes(inputShapes);
ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(inType, inputDynamicShapes.front())};
std::shared_ptr<ov::Node> beginInput, endInput, strideInput;
if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER) {
auto beginNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{begin.size()});
params.push_back(beginNode);
beginInput = beginNode;
} else {
beginInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{begin.size()}, begin);
}
if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER) {
auto endNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{end.size()});
params.push_back(endNode);
endInput = endNode;
} else {
endInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{end.size()}, end);
}
if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER) {
auto strideNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{stride.size()});
params.push_back(strideNode);
strideInput = strideNode;
} else {
strideInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{stride.size()}, stride);
}
auto stridedSliceOp = std::make_shared<ngraph::op::v1::StridedSlice>(params[0], beginInput, endInput, strideInput, ssParams.beginMask, ssParams.endMask,
ssParams.newAxisMask, ssParams.shrinkAxisMask, ssParams.ellipsisAxisMask);
auto shapeOfOp = std::make_shared<ngraph::opset3::ShapeOf>(stridedSliceOp, ov::element::Type_t::i32);
ngraph::ResultVector results;
for (size_t i = 0; i < shapeOfOp->get_output_size(); i++) {
results.push_back(std::make_shared<ngraph::opset1::Result>(shapeOfOp->output(i)));
}
function = std::make_shared<ngraph::Function>(results, params, "result");
}
};
TEST_P(DynamicShapeHugeRangeGPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
}
namespace {
std::map<std::string, std::string> emptyAdditionalConfig;
const std::vector<ElementType> inputPrecisions = {
ElementType::f32
};
const std::vector<std::vector<ngraph::helpers::InputLayerType>> restInputTypes = {
{ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT},
{ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER},
{ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT},
{ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT},
{ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER},
{ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER},
{ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER},
{ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT},
};
const std::vector<InputShape> inputShapesDynamic2D_excessive_uppper_boundary = {
{{{0, 1000}, {0, 364000000}, 4},
{{640, 640, 4}}},
};
const std::vector<StridedSliceParams> paramsPlain2D_excessive_uppper_boundary = {
StridedSliceParams{ { 0, 1 }, { 0, 2147483647 }, { 1, 1 }, { 1, 0 }, { 1, 0 }, { }, { }, { } },
};
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Dynamic_2D_excessive_uppper_boundary, DynamicShapeHugeRangeGPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesDynamic2D_excessive_uppper_boundary),
::testing::ValuesIn(paramsPlain2D_excessive_uppper_boundary),
::testing::ValuesIn(inputPrecisions),
::testing::Values(restInputTypes[0]),
::testing::Values(emptyAdditionalConfig)),
DynamicShapeHugeRangeGPUTest::getTestCaseName);
} // namespace
} // namespace GPULayerTestsDefinitions