[GPU] Get rid of input/ouput memcpy on USM host (#9048)

This commit is contained in:
Mikhail Letavin 2021-12-07 13:06:31 +03:00 committed by GitHub
parent 9a5b911856
commit 8e67d74932
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -163,6 +163,14 @@ void checkOutputBlob(const Blob::Ptr &blob,
checkAlloc(blob, str_output_not_allocated);
}
bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) {
uint8_t* bufferMem = nullptr;
if (memPtr->get_allocation_type() == cldnn::allocation_type::usm_host) {
bufferMem = reinterpret_cast<uint8_t*>(memPtr->get_internal_params().mem);
}
return bufferMem == hostPtr;
}
} // namespace
namespace CLDNNPlugin {
@ -562,9 +570,17 @@ void CLDNNInferRequest::wait() {
// mapping remote blobs not needed -
// let the user take care of them explicitly
if (!bptr->is<gpu::ClBlob>()) {
bool same_mem = false;
{
auto dst_lock = bptr->cbuffer();
auto dst_ptr = dst_lock.as<uint8_t*>();
same_mem = same_host_mem(outputMemory, dst_ptr);
}
if (!same_mem) {
copy_output_data(outputMemory, bptr);
}
}
}
// finally collect profiling info
if (m_useProfiling) {
@ -899,13 +915,14 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob
if (inputLayoutItr == m_graph->GetInputLayouts().end()) {
IE_THROW() << "Input name mismatch.";
}
Blob::Ptr reqBlob = _deviceInputs.at(inputName);
auto reqBlob = _deviceInputs.at(inputName)->as<gpu::ClBlob>();
auto _nw_ptr = m_graph->GetNetwork();
cldnn::primitive_id internalName = "parameter:" + inputName;
const auto& prec = inputBlob->getTensorDesc().getPrecision();
auto remote_ptr = inputBlob->as<gpu::ClBlob>();
auto& stream = m_graph->GetNetwork()->get_stream();
bool is_dev_input = remote_ptr != nullptr;
switch (prec) {
case Precision::FP32:
case Precision::FP16:
@ -918,7 +935,7 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob
case Precision::I64: {
auto impl = getBlobImpl(is_dev_input ?
remote_ptr :
reqBlob->as<gpu::ClBlob>());
reqBlob);
if (!impl->is_allocated()) {
IE_THROW() << str_input_not_allocated;
}
@ -936,10 +953,13 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob
}
} else {
auto src_lock = inputBlob->cbuffer();
auto ev = inputMem->copy_from(stream, src_lock.as<const uint8_t*>());
auto src_ptr = src_lock.as<uint8_t*>();
if (!same_host_mem(inputMem, src_ptr)) {
auto ev = inputMem->copy_from(stream, src_ptr);
dependencies.push_back(ev);
}
}
}
_nw_ptr->set_input_data(internalName, inputMem);
break;
}