[GPU] Updated to get lockable memories for device inputs in prepare_input() (#14418)

* updated to get ockable memory for device inputs

* gpuFuncTest to set f64 scalar tensor

* aligned type names

* updated to get lockable memory for some specific precisions

* add a function to get a supported precision
This commit is contained in:
Eddy Kim 2022-12-16 14:12:47 +09:00 committed by GitHub
parent fa1cddf278
commit b46dd308d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 3 deletions

View File

@ -902,6 +902,19 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
const bool is_dev_input = remote_ptr != nullptr;
const bool can_use_usm = m_graph->GetEngine()->use_unified_shared_memory();
auto conv_to_supported_prec = [](Precision::ePrecision prec) {
switch (prec) {
case Precision::I16:
case Precision::U16:
case Precision::FP64:
return Precision::FP32;
case Precision::U64:
case Precision::U32:
return Precision::I32;
default: return prec;
}
};
if (input_layout.is_dynamic()) {
bool has_device_blob = _deviceInputs.find(inputName) != _deviceInputs.end();
bool should_allocate_device_blob = !has_device_blob;
@ -918,7 +931,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
_deviceInputs[inputName] = reinterpret_device_blob(_deviceInputs[inputName], inputBlob->getTensorDesc());
}
} else if (input_layout.is_static() && !is_dev_input && can_use_usm) {
allocate_dev_mem_if_needed(_deviceInputs, inputBlob, inputName, input_layout);
allocate_dev_mem_if_needed(_deviceInputs, inputBlob, inputName, input_layout, (conv_to_supported_prec(prec) != prec));
}
OPENVINO_ASSERT(_deviceInputs.find(inputName) != _deviceInputs.end(), "[GPU] Couldn't find device blob allocated for ", inputName, " input");
auto reqBlob = _deviceInputs.at(inputName)->as<gpu::ClBlob>();
@ -954,8 +967,9 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
}
if (!is_dev_input) {
Precision conv_prec = conv_to_supported_prec(prec);
// TODO: Remove this checks once 95363 issue is solved
if (prec == Precision::I16 || prec == Precision::U16 || prec == Precision::FP64) {
if (conv_prec != prec && conv_prec == Precision::FP32) {
// GPU plugin doesn't support I16 input precision,
// so have to convert input data to fp32 precision
cldnn::mem_lock<float> ptr{ inputMem, stream };
@ -966,7 +980,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
} else {
convertAndCopy<double, float>(inputBlob.get(), ptr.data());
}
} else if (prec == Precision::U64 || prec == Precision::U32) {
} else if (conv_prec != prec && conv_prec == Precision::I32) {
cldnn::mem_lock<int32_t> ptr{ inputMem, stream };
if (prec == Precision::U64) {
convertAndCopy<uint64_t, int32_t>(inputBlob.get(), ptr.data());

View File

@ -137,3 +137,29 @@ TEST(TensorTest, smoke_canSetShapeForPreallocatedTensor) {
ASSERT_NO_THROW(output_tensor.set_shape({1, 10, 10, 10}));
ASSERT_NO_THROW(output_tensor.set_shape({2, 10, 20, 20}));
}
TEST(TensorTest, smoke_canSetScalarTensor) {
std::vector<std::vector<size_t>> scalar_shape = {{}};
auto params = ngraph::builder::makeParams(ngraph::element::f64, scalar_shape);
params.front()->set_friendly_name("Scalar_1");
params.front()->output(0).get_tensor().set_names({"scalar1"});
std::vector<size_t> const_shape = {1};
auto const1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, const_shape);
const1->set_friendly_name("Const_1");
const1->output(0).get_tensor().set_names({"const1"});
const1->fill_data(ov::element::i64, 0);
auto unsqueeze1 = std::make_shared<ngraph::opset1::Unsqueeze>(params.front(), const1);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(unsqueeze1)};
std::shared_ptr<ngraph::Function> fnPtr = std::make_shared<ngraph::Function>(results, params);
auto ie = ov::Core();
auto compiled_model = ie.compile_model(fnPtr, CommonTestUtils::DEVICE_GPU);
auto request = compiled_model.create_infer_request();
double real_data = 1.0;
ov::Tensor input_data(ngraph::element::f64, {}, &real_data);
request.set_tensor("scalar1", input_data);
ASSERT_NO_THROW(request.infer());
}