diff --git a/src/plugins/intel_gpu/src/plugin/infer_request.cpp b/src/plugins/intel_gpu/src/plugin/infer_request.cpp index 24c3c314fa4..713b325d6fc 100644 --- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp @@ -902,6 +902,19 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr const bool is_dev_input = remote_ptr != nullptr; const bool can_use_usm = m_graph->GetEngine()->use_unified_shared_memory(); + auto conv_to_supported_prec = [](Precision::ePrecision prec) { + switch (prec) { + case Precision::I16: + case Precision::U16: + case Precision::FP64: + return Precision::FP32; + case Precision::U64: + case Precision::U32: + return Precision::I32; + default: return prec; + } + }; + if (input_layout.is_dynamic()) { bool has_device_blob = _deviceInputs.find(inputName) != _deviceInputs.end(); bool should_allocate_device_blob = !has_device_blob; @@ -918,7 +931,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr _deviceInputs[inputName] = reinterpret_device_blob(_deviceInputs[inputName], inputBlob->getTensorDesc()); } } else if (input_layout.is_static() && !is_dev_input && can_use_usm) { - allocate_dev_mem_if_needed(_deviceInputs, inputBlob, inputName, input_layout); + allocate_dev_mem_if_needed(_deviceInputs, inputBlob, inputName, input_layout, (conv_to_supported_prec(prec) != prec)); } OPENVINO_ASSERT(_deviceInputs.find(inputName) != _deviceInputs.end(), "[GPU] Couldn't find device blob allocated for ", inputName, " input"); auto reqBlob = _deviceInputs.at(inputName)->as(); @@ -954,8 +967,9 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr } if (!is_dev_input) { + Precision conv_prec = conv_to_supported_prec(prec); // TODO: Remove this checks once 95363 issue is solved - if (prec == Precision::I16 || prec == Precision::U16 || prec == Precision::FP64) { + if (conv_prec != prec && conv_prec == Precision::FP32) { // GPU plugin doesn't support I16 input precision, // so have to convert input data to fp32 precision cldnn::mem_lock ptr{ inputMem, stream }; @@ -966,7 +980,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr } else { convertAndCopy(inputBlob.get(), ptr.data()); } - } else if (prec == Precision::U64 || prec == Precision::U32) { + } else if (conv_prec != prec && conv_prec == Precision::I32) { cldnn::mem_lock ptr{ inputMem, stream }; if (prec == Precision::U64) { convertAndCopy(inputBlob.get(), ptr.data()); diff --git a/src/tests/functional/plugin/gpu/behavior/infer_request.cpp b/src/tests/functional/plugin/gpu/behavior/infer_request.cpp index 22a4881d791..03e3bc92656 100644 --- a/src/tests/functional/plugin/gpu/behavior/infer_request.cpp +++ b/src/tests/functional/plugin/gpu/behavior/infer_request.cpp @@ -137,3 +137,29 @@ TEST(TensorTest, smoke_canSetShapeForPreallocatedTensor) { ASSERT_NO_THROW(output_tensor.set_shape({1, 10, 10, 10})); ASSERT_NO_THROW(output_tensor.set_shape({2, 10, 20, 20})); } + +TEST(TensorTest, smoke_canSetScalarTensor) { + std::vector> scalar_shape = {{}}; + auto params = ngraph::builder::makeParams(ngraph::element::f64, scalar_shape); + params.front()->set_friendly_name("Scalar_1"); + params.front()->output(0).get_tensor().set_names({"scalar1"}); + + std::vector const_shape = {1}; + auto const1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, const_shape); + const1->set_friendly_name("Const_1"); + const1->output(0).get_tensor().set_names({"const1"}); + const1->fill_data(ov::element::i64, 0); + + auto unsqueeze1 = std::make_shared(params.front(), const1); + + ngraph::ResultVector results{std::make_shared(unsqueeze1)}; + std::shared_ptr fnPtr = std::make_shared(results, params); + + auto ie = ov::Core(); + auto compiled_model = ie.compile_model(fnPtr, CommonTestUtils::DEVICE_GPU); + auto request = compiled_model.create_infer_request(); + double real_data = 1.0; + ov::Tensor input_data(ngraph::element::f64, {}, &real_data); + request.set_tensor("scalar1", input_data); + ASSERT_NO_THROW(request.infer()); +}