[IE CLDNN] Batched blob support is added for NV12 (#5230)
This commit is contained in:
parent
887c8c46cc
commit
d4b071bd49
@ -852,6 +852,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
|
||||
|
||||
capabilities.push_back(METRIC_VALUE(FP32));
|
||||
capabilities.push_back(METRIC_VALUE(BIN));
|
||||
capabilities.push_back(METRIC_VALUE(BATCHED_BLOB));
|
||||
if (device_info.supports_fp16)
|
||||
capabilities.push_back(METRIC_VALUE(FP16));
|
||||
if (device_info.supports_imad || device_info.supports_immad)
|
||||
|
@ -21,6 +21,7 @@ const char CLDNNInferRequest::fp32_suffix[] = "_fp32";
|
||||
const char str_not_allocated[] = "Input data was not allocated.";
|
||||
const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing";
|
||||
const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format";
|
||||
const char unsupported_batched_blob[] = "Batched input blob is expected to contain nv12 blobs";
|
||||
|
||||
Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createInputBlob");
|
||||
@ -322,6 +323,27 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
|
||||
}
|
||||
}
|
||||
|
||||
void checkInputBlobNV12(const NV12Blob *nv12_ptr) {
|
||||
auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
|
||||
|
||||
// if the blobs are not remote, check their size
|
||||
if (!y_ptr) {
|
||||
if (nv12_ptr->y()->buffer() == nullptr) IE_THROW(NotAllocated) << str_not_allocated;
|
||||
}
|
||||
|
||||
auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
|
||||
if (!uv_ptr) {
|
||||
if (nv12_ptr->uv()->buffer() == nullptr) IE_THROW(NotAllocated) << str_not_allocated;
|
||||
}
|
||||
}
|
||||
|
||||
NV12Blob *getNV12BlobOrException(BatchedBlob *batched_ptr, int idx) {
|
||||
auto nv12_ptr = batched_ptr->getBlob(idx)->as<NV12Blob>();
|
||||
if (nv12_ptr == nullptr)
|
||||
IE_THROW(NotImplemented) << unsupported_batched_blob;
|
||||
return nv12_ptr;
|
||||
}
|
||||
|
||||
void checkInputBlob(const Blob::Ptr &blob,
|
||||
const std::string &name,
|
||||
const InputInfo::Ptr foundInput,
|
||||
@ -334,23 +356,17 @@ void checkInputBlob(const Blob::Ptr &blob,
|
||||
|
||||
if (ColorFormat::NV12 == foundInput->getPreProcess().getColorFormat() &&
|
||||
nv12_two_inputs) {
|
||||
auto nv12_ptr = blob->as<NV12Blob>();
|
||||
|
||||
if (nv12_ptr == nullptr) {
|
||||
if (auto nv12_ptr = blob->as<NV12Blob>()) {
|
||||
checkInputBlobNV12(nv12_ptr);
|
||||
} else if (auto batched_ptr = blob->as<BatchedBlob>()) {
|
||||
for (auto i = 0; i < batched_ptr->size(); i++) {
|
||||
auto nv12_ptr = getNV12BlobOrException(batched_ptr, i);
|
||||
checkInputBlobNV12(nv12_ptr);
|
||||
}
|
||||
} else {
|
||||
IE_THROW(ParameterMismatch) << wrong_nv12_blob;
|
||||
}
|
||||
|
||||
auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
|
||||
|
||||
// if the blobs are not remote, check their size
|
||||
if (!y_ptr) {
|
||||
if (nv12_ptr->y()->buffer() == nullptr) IE_THROW() << str_not_allocated;
|
||||
}
|
||||
|
||||
auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
|
||||
if (!uv_ptr) {
|
||||
if (nv12_ptr->uv()->buffer() == nullptr) IE_THROW() << str_not_allocated;
|
||||
}
|
||||
} else {
|
||||
SizeVector dims = foundInput->getTensorDesc().getDims();
|
||||
|
||||
@ -498,27 +514,35 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data)
|
||||
// and put them into appropriate network inputs
|
||||
// that should then go into biplanar NV12 reorder
|
||||
auto nv12_ptr = data->as<NV12Blob>();
|
||||
auto batched_ptr = data->as<BatchedBlob>();
|
||||
|
||||
if (nv12_ptr == nullptr) {
|
||||
if (nv12_ptr != nullptr || batched_ptr != nullptr) {
|
||||
int num_blobs = batched_ptr != nullptr ? batched_ptr->size() : 1;
|
||||
|
||||
for (auto i = 0; i < num_blobs; i++) {
|
||||
if (batched_ptr != nullptr)
|
||||
nv12_ptr = getNV12BlobOrException(batched_ptr, i);
|
||||
|
||||
auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
|
||||
if (y_ptr) {
|
||||
auto y_impl = getBlobImpl(y_ptr);
|
||||
y_impl->allocate_if_needed();
|
||||
input_attach(internalName + "_Y" + std::to_string(i), y_impl->getMemory());
|
||||
is_remote = true;
|
||||
}
|
||||
|
||||
auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
|
||||
if (uv_ptr) {
|
||||
auto uv_impl = getBlobImpl(uv_ptr);
|
||||
uv_impl->allocate_if_needed();
|
||||
input_attach(internalName + "_UV" + std::to_string(i), uv_impl->getMemory());
|
||||
is_remote = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
IE_THROW(ParameterMismatch) << wrong_nv12_blob;
|
||||
}
|
||||
|
||||
auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
|
||||
if (y_ptr) {
|
||||
auto y_impl = getBlobImpl(y_ptr);
|
||||
y_impl->allocate_if_needed();
|
||||
input_attach(internalName + "_Y", y_impl->getMemory());
|
||||
is_remote = true;
|
||||
}
|
||||
|
||||
auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
|
||||
if (uv_ptr) {
|
||||
auto uv_impl = getBlobImpl(uv_ptr);
|
||||
uv_impl->allocate_if_needed();
|
||||
input_attach(internalName + "_UV", uv_impl->getMemory());
|
||||
is_remote = true;
|
||||
}
|
||||
|
||||
if (is_remote) _inputs[name] = data;
|
||||
}
|
||||
}
|
||||
@ -582,28 +606,33 @@ void CLDNNInferRequest::AllocateInputs() {
|
||||
|
||||
if (ColorFormat::NV12 == ni.second->getPreProcess().getColorFormat() &&
|
||||
m_graph->getConfig().nv12_two_inputs) {
|
||||
cldnn::primitive_id YName(name + "_Y");
|
||||
cldnn::primitive_id UVName(name + "_UV");
|
||||
std::vector<Blob::Ptr> blobs;
|
||||
for (auto i = 0; i < desc.getDims()[0]; i++) {
|
||||
cldnn::primitive_id YName(name + "_Y" + std::to_string(i));
|
||||
cldnn::primitive_id UVName(name + "_UV" + std::to_string(i));
|
||||
|
||||
if (inputLayouts.find(YName) == inputLayouts.end()) {
|
||||
IE_THROW() << "Input layout for " << YName << " is not found";
|
||||
if (inputLayouts.find(YName) == inputLayouts.end()) {
|
||||
IE_THROW(ParameterMismatch) << "Input layout for " << YName << " is not found";
|
||||
}
|
||||
if (inputLayouts.find(UVName) == inputLayouts.end()) {
|
||||
IE_THROW(ParameterMismatch) << "Input layout for " << YName << " is not found";
|
||||
}
|
||||
input_alloc(YName, inputLayouts.at(YName));
|
||||
input_alloc(UVName, inputLayouts.at(UVName));
|
||||
|
||||
size_t height = desc.getDims()[2], width = desc.getDims()[3];
|
||||
cldnn::pointer<uint8_t> input_mem_ptr_Y = inputsMemory.at(YName).pointer<uint8_t>();
|
||||
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
|
||||
auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data());
|
||||
|
||||
cldnn::pointer<uint8_t> input_mem_ptr_UV = inputsMemory.at(UVName).pointer<uint8_t>();
|
||||
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
|
||||
auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data());
|
||||
|
||||
blobs.push_back(make_shared_blob<NV12Blob>(blobY, blobUV));
|
||||
}
|
||||
if (inputLayouts.find(UVName) == inputLayouts.end()) {
|
||||
IE_THROW() << "Input layout for " << UVName << " is not found";
|
||||
}
|
||||
input_alloc(YName, inputLayouts.at(YName));
|
||||
input_alloc(UVName, inputLayouts.at(UVName));
|
||||
_inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob<BatchedBlob>(blobs);
|
||||
|
||||
size_t height = desc.getDims()[2], width = desc.getDims()[3];
|
||||
cldnn::pointer<uint8_t> input_mem_ptr_Y = inputsMemory.at(YName).pointer<uint8_t>();
|
||||
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
|
||||
auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data());
|
||||
|
||||
cldnn::pointer<uint8_t> input_mem_ptr_UV = inputsMemory.at(UVName).pointer<uint8_t>();
|
||||
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
|
||||
auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data());
|
||||
|
||||
_inputs[name] = make_shared_blob<NV12Blob>(blobY, blobUV);
|
||||
} else {
|
||||
if (inputLayouts.find(name) == inputLayouts.end()) {
|
||||
IE_THROW() << "Input layout for " << name << " is not found";
|
||||
@ -868,14 +897,21 @@ void CLDNNInferRequest::InferImpl() {
|
||||
PrepareInputDyn(name, *inputBlob);
|
||||
} else {
|
||||
auto nv12_ptr = inputBlob->as<NV12Blob>();
|
||||
auto batched_ptr = inputBlob->as<BatchedBlob>();
|
||||
|
||||
if (nv12_ptr == nullptr) {
|
||||
if (nv12_ptr != nullptr || batched_ptr != nullptr) {
|
||||
// special case for NV12 input blob
|
||||
int num_blobs = batched_ptr != nullptr ? batched_ptr->size() : 1;
|
||||
for (auto i = 0; i < num_blobs; i++) {
|
||||
if (batched_ptr != nullptr)
|
||||
nv12_ptr = getNV12BlobOrException(batched_ptr, i);
|
||||
|
||||
PrepareInput(name + "_Y" + std::to_string(i), *nv12_ptr->y());
|
||||
PrepareInput(name + "_UV" + std::to_string(i), *nv12_ptr->uv());
|
||||
}
|
||||
} else {
|
||||
// regular blob
|
||||
PrepareInput(name, *inputBlob);
|
||||
} else {
|
||||
// special case for NV12 input blob
|
||||
PrepareInput(name + "_Y", *nv12_ptr->y());
|
||||
PrepareInput(name + "_UV", *nv12_ptr->uv());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "api/input_layout.hpp"
|
||||
#include "api/reorder.hpp"
|
||||
#include "api/data.hpp"
|
||||
#include "api/concatenation.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
@ -97,7 +98,6 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
|
||||
networkInputLayout.format = inputFormat;
|
||||
networkInputLayout.size = networkInputLayout.size.transform(inputFormat, 1);
|
||||
networkInputLayout.data_type = DataTypeFromPrecision(op->get_output_element_type(0));
|
||||
auto preprocessPrimID = "reorder:" + inputName + Program::m_preProcessTag;
|
||||
cldnn::primitive_id meanBlobID = inputName + Program::m_meanValuesTag;
|
||||
std::vector<float> meanValues;
|
||||
|
||||
@ -184,41 +184,55 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
|
||||
}
|
||||
int height = inputDims[2];
|
||||
int width = inputDims[3];
|
||||
std::vector<cldnn::primitive_id> reorders;
|
||||
|
||||
std::string y_name = inputName + "_Y";
|
||||
std::string uv_name = inputName + "_UV";
|
||||
for (auto i = 0; i < inputDims[0]; i++) {
|
||||
auto preprocessPrimID = "reorder:" + inputName + std::to_string(i) + Program::m_preProcessTag;
|
||||
std::string y_name = inputName + "_Y" + std::to_string(i);
|
||||
std::string uv_name = inputName + "_UV" + std::to_string(i);
|
||||
|
||||
cldnn::layout y_layout(DataTypeFromPrecision(ip),
|
||||
cldnn::format::nv12, { 1, 1, width, height });
|
||||
cldnn::layout uv_layout(DataTypeFromPrecision(ip),
|
||||
cldnn::format::nv12, { 1, 2, width / 2, height / 2 });
|
||||
auto inputY = cldnn::input_layout(y_name, y_layout);
|
||||
auto inputUV = cldnn::input_layout(uv_name, uv_layout);
|
||||
cldnn::layout y_layout(DataTypeFromPrecision(ip),
|
||||
cldnn::format::nv12, { 1, 1, width, height });
|
||||
cldnn::layout uv_layout(DataTypeFromPrecision(ip),
|
||||
cldnn::format::nv12, { 1, 2, width / 2, height / 2 });
|
||||
auto inputY = cldnn::input_layout(y_name, y_layout);
|
||||
auto inputUV = cldnn::input_layout(uv_name, uv_layout);
|
||||
|
||||
p.AddPrimitive(inputY);
|
||||
p.inputLayouts.insert({ inputInfo->name() + "_Y", y_layout });
|
||||
p.AddPrimitive(inputUV);
|
||||
p.inputLayouts.insert({ inputInfo->name() + "_UV", uv_layout });
|
||||
switch (preProcess.getMeanVariant()) {
|
||||
case NONE:
|
||||
case MEAN_VALUE: {
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanValues));
|
||||
break;
|
||||
}
|
||||
case MEAN_IMAGE: {
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanBlobID));
|
||||
break;
|
||||
}
|
||||
default: IE_THROW() << "Invalid mean variant in input " + inputName;
|
||||
break;
|
||||
p.AddPrimitive(inputY);
|
||||
p.inputLayouts.insert({ inputInfo->name() + "_Y" + std::to_string(i), y_layout });
|
||||
p.AddPrimitive(inputUV);
|
||||
p.inputLayouts.insert({ inputInfo->name() + "_UV" + std::to_string(i), uv_layout });
|
||||
switch (preProcess.getMeanVariant()) {
|
||||
case NONE:
|
||||
case MEAN_VALUE: {
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanValues));
|
||||
break;
|
||||
}
|
||||
case MEAN_IMAGE: {
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanBlobID));
|
||||
break;
|
||||
}
|
||||
default: IE_THROW(Unexpected) << "Invalid mean variant in input " + inputName;
|
||||
break;
|
||||
}
|
||||
|
||||
p.primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() };
|
||||
p.primitivesToIRLayersMap[y_name] = { inputInfo->name() };
|
||||
p.primitivesToIRLayersMap[uv_name] = { inputInfo->name() };
|
||||
p.profilingIDs.push_back(preprocessPrimID);
|
||||
p.InitProfileInfo(preprocessPrimID, "Reorder");
|
||||
p.primitiveIDs[inputName] = preprocessPrimID; // If it is batched blob, it will be overwritten afterwards.
|
||||
p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
|
||||
reorders.push_back(preprocessPrimID);
|
||||
}
|
||||
|
||||
p.primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() };
|
||||
p.primitivesToIRLayersMap[y_name] = { inputInfo->name() };
|
||||
p.primitivesToIRLayersMap[uv_name] = { inputInfo->name() };
|
||||
p.profilingIDs.push_back(preprocessPrimID);
|
||||
p.InitProfileInfo(preprocessPrimID, "Reorder");
|
||||
if (inputDims[0] > 1) {
|
||||
auto concatPrimID = "concat:" + inputName + Program::m_preProcessTag;
|
||||
p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b));
|
||||
p.primitiveIDs[inputName] = concatPrimID;
|
||||
}
|
||||
} else {
|
||||
auto preprocessPrimID = "reorder:" + inputName + Program::m_preProcessTag;
|
||||
cldnn::layout inputLayout(networkInputLayout);
|
||||
inputLayout.data_type = DataTypeFromPrecision(ip);
|
||||
p.inputLayouts.insert({ inputInfo->name(), inputLayout });
|
||||
@ -244,11 +258,9 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
|
||||
}
|
||||
p.InitProfileInfo(preprocessPrimID, "reorder");
|
||||
p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
|
||||
p.primitiveIDs[inputName] = preprocessPrimID;
|
||||
p.profilingIDs.push_back(preprocessPrimID);
|
||||
}
|
||||
|
||||
p.primitiveIDs[inputName] = preprocessPrimID;
|
||||
p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
|
||||
}
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, Parameter);
|
||||
|
@ -127,6 +127,150 @@ TEST_F(RemoteBlob_Test, smoke_canInferOnUserContext) {
|
||||
}
|
||||
}
|
||||
|
||||
class BatchedBlob_Test : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<size_t> {
|
||||
void SetUp() override {
|
||||
num_batch = this->GetParam();
|
||||
};
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<std::size_t> &obj) {
|
||||
return "num_batch_" + std::to_string(obj.param);
|
||||
}
|
||||
|
||||
protected:
|
||||
size_t num_batch;
|
||||
std::vector<std::shared_ptr<ngraph::Function>> fn_ptrs;
|
||||
};
|
||||
|
||||
TEST_P(BatchedBlob_Test, canInputNV12) {
|
||||
#if defined(_WIN32) || defined(ANDROID)
|
||||
GTEST_SKIP();
|
||||
#endif
|
||||
const int height = 16;
|
||||
const int width = 16;
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Prepare input data
|
||||
const InferenceEngine::TensorDesc y_plane_desc(InferenceEngine::Precision::U8, {1, 1, height, width},
|
||||
InferenceEngine::Layout::NHWC);
|
||||
const InferenceEngine::TensorDesc uv_plane_desc(InferenceEngine::Precision::U8, {1, 2, height / 2, width / 2},
|
||||
InferenceEngine::Layout::NHWC);
|
||||
std::vector<InferenceEngine::Blob::Ptr> fake_image_data_y;
|
||||
std::vector<InferenceEngine::Blob::Ptr> fake_image_data_uv;
|
||||
|
||||
for (int i = 0; i < num_batch; i++) {
|
||||
fake_image_data_y.push_back(FuncTestUtils::createAndFillBlob(y_plane_desc, 50, 0, 1, i));
|
||||
fake_image_data_uv.push_back(FuncTestUtils::createAndFillBlob(uv_plane_desc, 256, 0, 1, i));
|
||||
}
|
||||
|
||||
auto ie = InferenceEngine::Core();
|
||||
|
||||
// ------------------------------------------------------
|
||||
// inference using remote blob with batch
|
||||
auto fn_ptr_remote = ngraph::builder::subgraph::makeConvPoolRelu({num_batch, 3, height, width});
|
||||
|
||||
CNNNetwork net_remote(fn_ptr_remote);
|
||||
net_remote.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
|
||||
net_remote.getInputsInfo().begin()->second->setPrecision(Precision::U8);
|
||||
net_remote.getInputsInfo().begin()->second->getPreProcess().setColorFormat(ColorFormat::NV12);
|
||||
|
||||
/* XXX: is it correct to set KEY_CLDNN_NV12_TWO_INPUTS in case of remote blob? */
|
||||
auto exec_net_b = ie.LoadNetwork(net_remote, CommonTestUtils::DEVICE_GPU,
|
||||
{ { CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS, PluginConfigParams::YES} });
|
||||
auto inf_req_remote = exec_net_b.CreateInferRequest();
|
||||
auto cldnn_context = exec_net_b.GetContext();
|
||||
cl_context ctx = std::dynamic_pointer_cast<ClContext>(cldnn_context)->get();
|
||||
auto ocl_instance = std::make_shared<OpenCL>(ctx);
|
||||
cl_int err;
|
||||
|
||||
std::vector<cl_mem> nv12_image_plane_y, nv12_image_plane_uv;
|
||||
std::vector<cl::Image2D> img_y, img_uv;
|
||||
std::vector<Blob::Ptr> blob_remote;
|
||||
|
||||
for (int i = 0; i < num_batch; i++) {
|
||||
cl_image_format image_format;
|
||||
cl_image_desc image_desc = { 0 };
|
||||
image_format.image_channel_order = CL_R;
|
||||
image_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
image_desc.image_width = width;
|
||||
image_desc.image_height = height;
|
||||
|
||||
nv12_image_plane_y.push_back(clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err));
|
||||
ASSERT_EQ(err, 0);
|
||||
|
||||
image_format.image_channel_order = CL_RG;
|
||||
image_desc.image_width = width / 2;
|
||||
image_desc.image_height = height / 2;
|
||||
|
||||
nv12_image_plane_uv.push_back(clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err));
|
||||
ASSERT_EQ(err, 0);
|
||||
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t y_region[3] = { (size_t)width, (size_t)height, 1 };
|
||||
size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 };
|
||||
|
||||
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y[i],
|
||||
true, origin, y_region, 0, 0, fake_image_data_y[i]->buffer(), 0, NULL, NULL);
|
||||
ASSERT_EQ(err, 0);
|
||||
|
||||
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv[i],
|
||||
true, origin, uv_region, 0, 0, fake_image_data_uv[i]->buffer(), 0, NULL, NULL);
|
||||
ASSERT_EQ(err, 0);
|
||||
|
||||
img_y.push_back(cl::Image2D(nv12_image_plane_y[i]));
|
||||
img_uv.push_back(cl::Image2D(nv12_image_plane_uv[i]));
|
||||
|
||||
blob_remote.push_back(make_shared_blob_nv12(cldnn_context, img_y[i], img_uv[i]));
|
||||
}
|
||||
|
||||
if (num_batch == 1) {
|
||||
inf_req_remote.SetBlob(net_remote.getInputsInfo().begin()->first, blob_remote[0]);
|
||||
} else {
|
||||
auto batched_blob = make_shared_blob<BatchedBlob>(blob_remote);
|
||||
inf_req_remote.SetBlob(net_remote.getInputsInfo().begin()->first, batched_blob);
|
||||
}
|
||||
|
||||
inf_req_remote.Infer();
|
||||
|
||||
auto outputBlob_shared = inf_req_remote.GetBlob(net_remote.getOutputsInfo().begin()->first);
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Setup to inference using local blob with batch=1
|
||||
auto fn_ptr_local = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, height, width});
|
||||
|
||||
CNNNetwork net_local(fn_ptr_local);
|
||||
|
||||
net_local.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
|
||||
net_local.getInputsInfo().begin()->second->setPrecision(Precision::U8);
|
||||
net_local.getInputsInfo().begin()->second->getPreProcess().setColorFormat(ColorFormat::NV12);
|
||||
|
||||
auto exec_net_b1 = ie.LoadNetwork(net_local, CommonTestUtils::DEVICE_GPU);
|
||||
|
||||
auto inf_req_local = exec_net_b1.CreateInferRequest();
|
||||
|
||||
// Run regular input for each image and compare against batched blob
|
||||
for (int i = 0; i < num_batch; i++) {
|
||||
auto y_blob = make_shared_blob<uint8_t>(y_plane_desc, fake_image_data_y[i]->buffer().as<uint8_t *>());
|
||||
auto uv_blob = make_shared_blob<uint8_t>(uv_plane_desc, fake_image_data_uv[i]->buffer().as<uint8_t *>());
|
||||
auto blob = make_shared_blob<NV12Blob>(y_blob, uv_blob);
|
||||
inf_req_local.SetBlob(net_local.getInputsInfo().begin()->first, blob);
|
||||
inf_req_local.Infer();
|
||||
auto output_blob_local = inf_req_local.GetBlob(net_local.getOutputsInfo().begin()->first);
|
||||
|
||||
// This network generates [1, size] tensor whether batch=1 or 2. So need to split
|
||||
auto split_shared_blob = make_shared_blob<float_t>(output_blob_local->getTensorDesc(),
|
||||
outputBlob_shared->buffer().as<float_t *>() + output_blob_local->size() * i);
|
||||
ASSERT_EQ(output_blob_local->size(), split_shared_blob->size());
|
||||
float thr = 0.1;
|
||||
|
||||
FuncTestUtils::compareBlobs(output_blob_local, split_shared_blob, thr, "", false);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<size_t> num_batches{1, 2, 4};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_RemoteBlob, BatchedBlob_Test, ::testing::ValuesIn(num_batches), BatchedBlob_Test::getTestCaseName);
|
||||
|
||||
class TwoNets_Test : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<size_t> {
|
||||
void SetUp() override {
|
||||
num_streams = this->GetParam();
|
||||
@ -211,6 +355,6 @@ TEST_P(TwoNets_Test, canInferTwoExecNets) {
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<size_t> num_strems{1, 2};
|
||||
const std::vector<size_t> num_streams{1, 2};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_RemoteBlob, TwoNets_Test, ::testing::ValuesIn(num_strems), TwoNets_Test::getTestCaseName);
|
||||
INSTANTIATE_TEST_CASE_P(smoke_RemoteBlob, TwoNets_Test, ::testing::ValuesIn(num_streams), TwoNets_Test::getTestCaseName);
|
||||
|
@ -10,8 +10,9 @@ using namespace InferenceEngine;
|
||||
|
||||
const std::vector<FuncTestUtils::BlobKind> blobKinds = {
|
||||
FuncTestUtils::BlobKind::Simple,
|
||||
FuncTestUtils::BlobKind::Compound,
|
||||
FuncTestUtils::BlobKind::BatchOfSimple
|
||||
FuncTestUtils::BlobKind::Compound
|
||||
/* BatchOfSimple is not supported on GPU currently. Batch of remote is supported */
|
||||
/* , FuncTestUtils::BlobKind::BatchOfSimple */
|
||||
};
|
||||
|
||||
const SetBlobOfKindConfig gpuConfig{}; //nothing special
|
||||
|
Loading…
Reference in New Issue
Block a user