[GPU] Force shared surface cache cleanup in RemoteContext (#9816)
This commit is contained in:
parent
e0c10c1a6f
commit
a90bb1ff01
@ -476,6 +476,11 @@ public:
|
||||
const Config& config = {})
|
||||
: _impl(plugin, params, config) {}
|
||||
|
||||
~TypedExecutionContext() {
|
||||
shared_surf_reg.clear();
|
||||
shared_obj_reg.clear();
|
||||
}
|
||||
|
||||
InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
|
||||
std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <gpu/gpu_config.hpp>
|
||||
#include <common_test_utils/test_common.hpp>
|
||||
#include <common_test_utils/test_constants.hpp>
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef ENABLE_DX11
|
||||
@ -46,7 +47,9 @@ using namespace ::testing;
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::gpu;
|
||||
|
||||
class DX11RemoteCtx_Test : public CommonTestUtils::TestsCommon {
|
||||
struct DX11RemoteCtx_Test : public CommonTestUtils::TestsCommon {
|
||||
virtual ~DX11RemoteCtx_Test() = default;
|
||||
|
||||
protected:
|
||||
CComPtr<IDXGIFactory> factory;
|
||||
std::vector<CComPtr<IDXGIAdapter>> intel_adapters;
|
||||
@ -103,20 +106,58 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
struct DX11CachedTexture_Test : DX11RemoteCtx_Test {
|
||||
D3D11_TEXTURE2D_DESC texture_description = { 0 };
|
||||
std::vector<CComPtr<ID3D11Texture2D>> dx11_textures;
|
||||
CComPtr<ID3D11Device> device_ptr;
|
||||
CComPtr<ID3D11DeviceContext> ctx_ptr;
|
||||
|
||||
void SetUp() override {
|
||||
DX11RemoteCtx_Test::SetUp();
|
||||
ASSERT_FALSE(intel_adapters.empty());
|
||||
ASSERT_NO_THROW(std::tie(device_ptr, ctx_ptr) =
|
||||
create_device_with_ctx(*intel_adapters.begin()));
|
||||
|
||||
// create textures
|
||||
const size_t textures_count = 4;
|
||||
|
||||
texture_description.Width = 1024;
|
||||
texture_description.Height = 768;
|
||||
texture_description.MipLevels = 1;
|
||||
|
||||
texture_description.ArraySize = 1;
|
||||
texture_description.Format = DXGI_FORMAT_NV12;
|
||||
texture_description.SampleDesc.Count = 1;
|
||||
texture_description.Usage = D3D11_USAGE_DEFAULT;
|
||||
texture_description.MiscFlags = 0;
|
||||
texture_description.BindFlags = 0;
|
||||
|
||||
dx11_textures.reserve(textures_count);
|
||||
HRESULT err = S_OK;
|
||||
for (size_t i = 0; i < textures_count; i++) {
|
||||
ID3D11Texture2D *pTexture2D = nullptr;
|
||||
err = device_ptr->CreateTexture2D(&texture_description, nullptr, &pTexture2D);
|
||||
ASSERT_FALSE(FAILED(err));
|
||||
dx11_textures.emplace_back(pTexture2D);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(DX11RemoteCtx_Test, smoke_make_shared_context) {
|
||||
#if defined(ANDROID)
|
||||
GTEST_SKIP();
|
||||
#endif
|
||||
auto ie = InferenceEngine::Core();
|
||||
|
||||
for (auto adapter : intel_adapters) {
|
||||
CComPtr<ID3D11Device> device_ptr;
|
||||
CComPtr<ID3D11DeviceContext> ctx_ptr;
|
||||
CComPtr<ID3D11Device> device_ptr;
|
||||
CComPtr<ID3D11DeviceContext> ctx_ptr;
|
||||
|
||||
ASSERT_NO_THROW(std::tie(device_ptr, ctx_ptr) =
|
||||
create_device_with_ctx(adapter));
|
||||
auto remote_context = make_shared_context(ie,
|
||||
CommonTestUtils::DEVICE_GPU,
|
||||
device_ptr);
|
||||
ASSERT_TRUE(remote_context);
|
||||
}
|
||||
ASSERT_NO_THROW(std::tie(device_ptr, ctx_ptr) =
|
||||
create_device_with_ctx(intel_adapters[0]));
|
||||
auto remote_context = make_shared_context(ie,
|
||||
CommonTestUtils::DEVICE_GPU,
|
||||
device_ptr);
|
||||
ASSERT_TRUE(remote_context);
|
||||
|
||||
for (auto adapter : other_adapters) {
|
||||
CComPtr<ID3D11Device> device_ptr;
|
||||
@ -130,5 +171,68 @@ TEST_F(DX11RemoteCtx_Test, smoke_make_shared_context) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_F(DX11CachedTexture_Test, smoke_make_shared_nv12_blob_cached) {
|
||||
#if defined(ANDROID)
|
||||
GTEST_SKIP();
|
||||
#endif
|
||||
auto ie = InferenceEngine::Core();
|
||||
auto remote_context = make_shared_context(ie, CommonTestUtils::DEVICE_GPU,
|
||||
device_ptr);
|
||||
ASSERT_TRUE(remote_context);
|
||||
const size_t total_run_number = 4;
|
||||
for (size_t i = 0; i < total_run_number; i++) {
|
||||
for (const auto& t : dx11_textures) {
|
||||
auto blob = make_shared_blob_nv12(texture_description.Height,
|
||||
texture_description.Width,
|
||||
remote_context, t);
|
||||
ASSERT_TRUE(blob);
|
||||
ASSERT_NO_THROW(blob->allocate());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DX11CachedTexture_Test, _make_shared_nv12_blob_cached_inference) {
|
||||
#if defined(ANDROID)
|
||||
GTEST_SKIP();
|
||||
#endif
|
||||
|
||||
// inference using remote blob with batch
|
||||
auto fn_ptr_remote = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, texture_description.Height, texture_description.Width});
|
||||
auto ie = InferenceEngine::Core();
|
||||
|
||||
CNNNetwork net(fn_ptr_remote);
|
||||
net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
|
||||
net.getInputsInfo().begin()->second->setPrecision(Precision::U8);
|
||||
net.getInputsInfo().begin()->second->getPreProcess().setColorFormat(ColorFormat::NV12);
|
||||
|
||||
auto remote_context = make_shared_context(ie, CommonTestUtils::DEVICE_GPU, device_ptr);
|
||||
Blob::Ptr nv12_blob = make_shared_blob_nv12(texture_description.Height,
|
||||
texture_description.Width,
|
||||
remote_context, dx11_textures[0]);
|
||||
|
||||
ASSERT_TRUE(remote_context);
|
||||
const size_t total_run_number = 4;
|
||||
|
||||
{
|
||||
auto exec_net = ie.LoadNetwork(net, remote_context,
|
||||
{ { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, PluginConfigParams::YES} });
|
||||
|
||||
// inference using shared nv12 blob
|
||||
auto inf_req_shared = exec_net.CreateInferRequest();
|
||||
auto dims = net.getInputsInfo().begin()->second->getTensorDesc().getDims();
|
||||
size_t imSize = dims[1] * dims[2] * dims[3];
|
||||
|
||||
const size_t iteration_count = 10;
|
||||
for (size_t i = 0; i < iteration_count; i++) {
|
||||
inf_req_shared.SetBlob(net.getInputsInfo().begin()->first, nv12_blob);
|
||||
|
||||
inf_req_shared.Infer();
|
||||
auto outputBlob_shared = inf_req_shared.GetBlob(net.getOutputsInfo().begin()->first);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // ENABLE_DX11
|
||||
#endif // WIN32
|
||||
|
Loading…
Reference in New Issue
Block a user