[GPU] Updated to get lockable memories for device inputs in prepare_input() (#14418)

* updated to get ockable memory for device inputs * gpuFuncTest to set f64 scalar tensor * aligned type names * updated to get lockable memory for some specific precisions * add a function to get a supported precision
2022-12-16 14:12:47 +09:00 · 2022-12-16 14:12:47 +09:00 · b46dd308d6
commit b46dd308d6
parent fa1cddf278
2 changed files with 43 additions and 3 deletions
--- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
@ -902,6 +902,19 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
    const bool is_dev_input = remote_ptr != nullptr;
    const bool can_use_usm = m_graph->GetEngine()->use_unified_shared_memory();

+    auto conv_to_supported_prec = [](Precision::ePrecision prec) {
+        switch (prec) {
+            case Precision::I16:
+            case Precision::U16:
+            case Precision::FP64:
+                return Precision::FP32;
+            case Precision::U64:
+            case Precision::U32:
+                return Precision::I32;
+            default: return prec;
+        }
+    };
+
    if (input_layout.is_dynamic()) {
        bool has_device_blob = _deviceInputs.find(inputName) != _deviceInputs.end();
        bool should_allocate_device_blob = !has_device_blob;
@ -918,7 +931,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
            _deviceInputs[inputName] = reinterpret_device_blob(_deviceInputs[inputName], inputBlob->getTensorDesc());
        }
    } else if (input_layout.is_static() && !is_dev_input && can_use_usm) {
-        allocate_dev_mem_if_needed(_deviceInputs, inputBlob, inputName, input_layout);
+        allocate_dev_mem_if_needed(_deviceInputs, inputBlob, inputName, input_layout, (conv_to_supported_prec(prec) != prec));
    }
    OPENVINO_ASSERT(_deviceInputs.find(inputName) != _deviceInputs.end(), "[GPU] Couldn't find device blob allocated for ", inputName, " input");
    auto reqBlob = _deviceInputs.at(inputName)->as<gpu::ClBlob>();
@ -954,8 +967,9 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
            }

            if (!is_dev_input) {
+                Precision conv_prec = conv_to_supported_prec(prec);
                // TODO: Remove this checks once 95363 issue is solved
-                if (prec == Precision::I16 || prec == Precision::U16 || prec == Precision::FP64) {
+                if (conv_prec != prec && conv_prec == Precision::FP32) {
                    // GPU plugin doesn't support I16 input precision,
                    // so have to convert input data to fp32 precision
                    cldnn::mem_lock<float> ptr{ inputMem, stream };
@ -966,7 +980,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
                    } else {
                        convertAndCopy<double, float>(inputBlob.get(), ptr.data());
                    }
-                } else if (prec == Precision::U64 || prec == Precision::U32) {
+                } else if (conv_prec != prec && conv_prec == Precision::I32) {
                    cldnn::mem_lock<int32_t> ptr{ inputMem, stream };
                    if (prec == Precision::U64) {
                        convertAndCopy<uint64_t, int32_t>(inputBlob.get(), ptr.data());
--- a/src/tests/functional/plugin/gpu/behavior/infer_request.cpp
+++ b/src/tests/functional/plugin/gpu/behavior/infer_request.cpp
@ -137,3 +137,29 @@ TEST(TensorTest, smoke_canSetShapeForPreallocatedTensor) {
    ASSERT_NO_THROW(output_tensor.set_shape({1, 10, 10, 10}));
    ASSERT_NO_THROW(output_tensor.set_shape({2, 10, 20, 20}));
 }
+
+TEST(TensorTest, smoke_canSetScalarTensor) {
+    std::vector<std::vector<size_t>> scalar_shape = {{}};
+    auto params = ngraph::builder::makeParams(ngraph::element::f64, scalar_shape);
+    params.front()->set_friendly_name("Scalar_1");
+    params.front()->output(0).get_tensor().set_names({"scalar1"});
+
+    std::vector<size_t> const_shape = {1};
+    auto const1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, const_shape);
+    const1->set_friendly_name("Const_1");
+    const1->output(0).get_tensor().set_names({"const1"});
+    const1->fill_data(ov::element::i64, 0);
+
+    auto unsqueeze1 = std::make_shared<ngraph::opset1::Unsqueeze>(params.front(), const1);
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(unsqueeze1)};
+    std::shared_ptr<ngraph::Function> fnPtr = std::make_shared<ngraph::Function>(results, params);
+
+    auto ie = ov::Core();
+    auto compiled_model = ie.compile_model(fnPtr, CommonTestUtils::DEVICE_GPU);
+    auto request = compiled_model.create_infer_request();
+    double real_data = 1.0;
+    ov::Tensor input_data(ngraph::element::f64, {}, &real_data);
+    request.set_tensor("scalar1", input_data);
+    ASSERT_NO_THROW(request.infer());
+}