[GPU] Force shared surface cache cleanup in RemoteContext (#9816)

This commit is contained in:
Mikhail Letavin 2022-01-26 19:19:29 +03:00 committed by GitHub
parent e0c10c1a6f
commit a90bb1ff01
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 120 additions and 11 deletions

View File

@ -476,6 +476,11 @@ public:
const Config& config = {})
: _impl(plugin, params, config) {}
~TypedExecutionContext() {
shared_surf_reg.clear();
shared_obj_reg.clear();
}
InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }

View File

@ -13,6 +13,7 @@
#include <gpu/gpu_config.hpp>
#include <common_test_utils/test_common.hpp>
#include <common_test_utils/test_constants.hpp>
#include "ngraph_functions/subgraph_builders.hpp"
#ifdef _WIN32
#ifdef ENABLE_DX11
@ -46,7 +47,9 @@ using namespace ::testing;
using namespace InferenceEngine;
using namespace InferenceEngine::gpu;
class DX11RemoteCtx_Test : public CommonTestUtils::TestsCommon {
struct DX11RemoteCtx_Test : public CommonTestUtils::TestsCommon {
virtual ~DX11RemoteCtx_Test() = default;
protected:
CComPtr<IDXGIFactory> factory;
std::vector<CComPtr<IDXGIAdapter>> intel_adapters;
@ -103,20 +106,58 @@ protected:
}
};
struct DX11CachedTexture_Test : DX11RemoteCtx_Test {
D3D11_TEXTURE2D_DESC texture_description = { 0 };
std::vector<CComPtr<ID3D11Texture2D>> dx11_textures;
CComPtr<ID3D11Device> device_ptr;
CComPtr<ID3D11DeviceContext> ctx_ptr;
void SetUp() override {
DX11RemoteCtx_Test::SetUp();
ASSERT_FALSE(intel_adapters.empty());
ASSERT_NO_THROW(std::tie(device_ptr, ctx_ptr) =
create_device_with_ctx(*intel_adapters.begin()));
// create textures
const size_t textures_count = 4;
texture_description.Width = 1024;
texture_description.Height = 768;
texture_description.MipLevels = 1;
texture_description.ArraySize = 1;
texture_description.Format = DXGI_FORMAT_NV12;
texture_description.SampleDesc.Count = 1;
texture_description.Usage = D3D11_USAGE_DEFAULT;
texture_description.MiscFlags = 0;
texture_description.BindFlags = 0;
dx11_textures.reserve(textures_count);
HRESULT err = S_OK;
for (size_t i = 0; i < textures_count; i++) {
ID3D11Texture2D *pTexture2D = nullptr;
err = device_ptr->CreateTexture2D(&texture_description, nullptr, &pTexture2D);
ASSERT_FALSE(FAILED(err));
dx11_textures.emplace_back(pTexture2D);
}
}
};
TEST_F(DX11RemoteCtx_Test, smoke_make_shared_context) {
#if defined(ANDROID)
GTEST_SKIP();
#endif
auto ie = InferenceEngine::Core();
for (auto adapter : intel_adapters) {
CComPtr<ID3D11Device> device_ptr;
CComPtr<ID3D11DeviceContext> ctx_ptr;
CComPtr<ID3D11Device> device_ptr;
CComPtr<ID3D11DeviceContext> ctx_ptr;
ASSERT_NO_THROW(std::tie(device_ptr, ctx_ptr) =
create_device_with_ctx(adapter));
auto remote_context = make_shared_context(ie,
CommonTestUtils::DEVICE_GPU,
device_ptr);
ASSERT_TRUE(remote_context);
}
ASSERT_NO_THROW(std::tie(device_ptr, ctx_ptr) =
create_device_with_ctx(intel_adapters[0]));
auto remote_context = make_shared_context(ie,
CommonTestUtils::DEVICE_GPU,
device_ptr);
ASSERT_TRUE(remote_context);
for (auto adapter : other_adapters) {
CComPtr<ID3D11Device> device_ptr;
@ -130,5 +171,68 @@ TEST_F(DX11RemoteCtx_Test, smoke_make_shared_context) {
}
}
TEST_F(DX11CachedTexture_Test, smoke_make_shared_nv12_blob_cached) {
#if defined(ANDROID)
GTEST_SKIP();
#endif
auto ie = InferenceEngine::Core();
auto remote_context = make_shared_context(ie, CommonTestUtils::DEVICE_GPU,
device_ptr);
ASSERT_TRUE(remote_context);
const size_t total_run_number = 4;
for (size_t i = 0; i < total_run_number; i++) {
for (const auto& t : dx11_textures) {
auto blob = make_shared_blob_nv12(texture_description.Height,
texture_description.Width,
remote_context, t);
ASSERT_TRUE(blob);
ASSERT_NO_THROW(blob->allocate());
}
}
}
TEST_F(DX11CachedTexture_Test, _make_shared_nv12_blob_cached_inference) {
#if defined(ANDROID)
GTEST_SKIP();
#endif
// inference using remote blob with batch
auto fn_ptr_remote = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, texture_description.Height, texture_description.Width});
auto ie = InferenceEngine::Core();
CNNNetwork net(fn_ptr_remote);
net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
net.getInputsInfo().begin()->second->setPrecision(Precision::U8);
net.getInputsInfo().begin()->second->getPreProcess().setColorFormat(ColorFormat::NV12);
auto remote_context = make_shared_context(ie, CommonTestUtils::DEVICE_GPU, device_ptr);
Blob::Ptr nv12_blob = make_shared_blob_nv12(texture_description.Height,
texture_description.Width,
remote_context, dx11_textures[0]);
ASSERT_TRUE(remote_context);
const size_t total_run_number = 4;
{
auto exec_net = ie.LoadNetwork(net, remote_context,
{ { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, PluginConfigParams::YES} });
// inference using shared nv12 blob
auto inf_req_shared = exec_net.CreateInferRequest();
auto dims = net.getInputsInfo().begin()->second->getTensorDesc().getDims();
size_t imSize = dims[1] * dims[2] * dims[3];
const size_t iteration_count = 10;
for (size_t i = 0; i < iteration_count; i++) {
inf_req_shared.SetBlob(net.getInputsInfo().begin()->first, nv12_blob);
inf_req_shared.Infer();
auto outputBlob_shared = inf_req_shared.GetBlob(net.getOutputsInfo().begin()->first);
}
}
}
#endif // ENABLE_DX11
#endif // WIN32