[GPU] Add NV12toRGB/NV12toBGR operations (#8838)

This commit is contained in:
Roman Lyamin 2021-12-14 12:01:50 +03:00 committed by GitHub
parent b6176fa768
commit 7d0d0ea503
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 1465 additions and 186 deletions

View File

@ -498,21 +498,7 @@ TEST_F(OVRemoteTensor_Test, smoke_canInferOnUserQueue_in_order) {
}
}
class OVRemoteTensorBatched_Test : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<size_t> {
void SetUp() override {
num_batch = this->GetParam();
};
public:
static std::string getTestCaseName(const testing::TestParamInfo<std::size_t> &obj) {
return "num_batch_" + std::to_string(obj.param);
}
protected:
size_t num_batch;
std::vector<std::shared_ptr<ngraph::Function>> fn_ptrs;
};
TEST_P(OVRemoteTensorBatched_Test, DISABLED_canInputNV12) {
TEST_F(OVRemoteTensor_Test, NV12toBGR_image) {
#if defined(ANDROID)
GTEST_SKIP();
#endif
@ -521,119 +507,188 @@ TEST_P(OVRemoteTensorBatched_Test, DISABLED_canInputNV12) {
// ------------------------------------------------------
// Prepare input data
std::vector<ov::runtime::Tensor> fake_image_data_y;
std::vector<ov::runtime::Tensor> fake_image_data_uv;
for (int i = 0; i < num_batch; i++) {
fake_image_data_y.push_back(FuncTestUtils::create_and_fill_tensor(ov::element::u8, {1, 1, height, width}, 50, 0, 1, i));
fake_image_data_uv.push_back(FuncTestUtils::create_and_fill_tensor(ov::element::u8, {1, 2, height / 2, width / 2}, 256, 0, 1, i));
}
ov::runtime::Tensor fake_image_data_y = FuncTestUtils::create_and_fill_tensor(ov::element::u8, {1, 1, height, width}, 50, 0, 1);
ov::runtime::Tensor fake_image_data_uv = FuncTestUtils::create_and_fill_tensor(ov::element::u8, {1, 2, height / 2, width / 2}, 256, 0, 1);
auto ie = ov::runtime::Core();
// ------------------------------------------------------
// inference using remote tensor with batch
auto fn_ptr_remote = ngraph::builder::subgraph::makeConvPoolRelu({num_batch, 3, height, width});
// inference using remote tensor
auto fn_ptr_remote = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, height, width});
using namespace ov::preprocess;
auto p = PrePostProcessor(fn_ptr_remote);
p.input().tensor().set_element_type(ov::element::i8).set_color_format(ov::preprocess::ColorFormat::NV12_TWO_PLANES);
p.input().preprocess().convert_element_type(ov::element::f32);
p.input().tensor().set_element_type(ov::element::u8)
.set_color_format(ov::preprocess::ColorFormat::NV12_TWO_PLANES, {"y", "uv"})
.set_memory_type(GPU_CONFIG_KEY(SURFACE));
p.input().preprocess().convert_color(ov::preprocess::ColorFormat::BGR);
p.input().model().set_layout("NCHW");
auto function = p.build();
auto exec_net_b = ie.compile_model(fn_ptr_remote, CommonTestUtils::DEVICE_GPU);
auto param_input_y = fn_ptr_remote->get_parameters().at(0);
auto param_input_uv = fn_ptr_remote->get_parameters().at(1);
auto exec_net_b = ie.compile_model(function, CommonTestUtils::DEVICE_GPU);
auto inf_req_remote = exec_net_b.create_infer_request();
auto cldnn_context = exec_net_b.get_context().as<ov::runtime::gpu::ocl::ClContext>();
cl_context ctx = cldnn_context.get();
auto ocl_instance = std::make_shared<OpenCL>(ctx);
cl_int err;
std::vector<cl_mem> nv12_image_plane_y, nv12_image_plane_uv;
std::vector<cl::Image2D> img_y, img_uv;
std::vector<std::pair<ov::runtime::RemoteTensor, ov::runtime::RemoteTensor>> tensor_remote;
cl_image_format image_format;
cl_image_desc image_desc = { 0 };
image_format.image_channel_order = CL_R;
image_format.image_channel_data_type = CL_UNORM_INT8;
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
image_desc.image_width = width;
image_desc.image_height = height;
cl_mem nv12_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err);
ASSERT_EQ(err, 0);
for (int i = 0; i < num_batch; i++) {
cl_image_format image_format;
cl_image_desc image_desc = { 0 };
image_format.image_channel_order = CL_R;
image_format.image_channel_data_type = CL_UNORM_INT8;
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
image_desc.image_width = width;
image_desc.image_height = height;
image_format.image_channel_order = CL_RG;
image_desc.image_width = width / 2;
image_desc.image_height = height / 2;
cl_mem nv12_image_plane_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err);
ASSERT_EQ(err, 0);
nv12_image_plane_y.push_back(clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err));
ASSERT_EQ(err, 0);
size_t origin[3] = { 0, 0, 0 };
size_t y_region[3] = { (size_t)width, (size_t)height, 1 };
size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 };
image_format.image_channel_order = CL_RG;
image_desc.image_width = width / 2;
image_desc.image_height = height / 2;
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y,
true, origin, y_region, 0, 0, fake_image_data_y.data(), 0, NULL, NULL);
ASSERT_EQ(err, 0);
nv12_image_plane_uv.push_back(clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err));
ASSERT_EQ(err, 0);
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv,
true, origin, uv_region, 0, 0, fake_image_data_uv.data(), 0, NULL, NULL);
ASSERT_EQ(err, 0);
size_t origin[3] = { 0, 0, 0 };
size_t y_region[3] = { (size_t)width, (size_t)height, 1 };
size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 };
cl::Image2D img_y = cl::Image2D(nv12_image_plane_y);
cl::Image2D img_uv = cl::Image2D(nv12_image_plane_uv);
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y[i],
true, origin, y_region, 0, 0, fake_image_data_y[i].data(), 0, NULL, NULL);
ASSERT_EQ(err, 0);
auto tensor_remote_y = cldnn_context.create_tensor(param_input_y->get_element_type(), fake_image_data_y.get_shape(), img_y);
auto tensor_remote_uv = cldnn_context.create_tensor(param_input_uv->get_element_type(), fake_image_data_uv.get_shape(), img_uv);
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv[i],
true, origin, uv_region, 0, 0, fake_image_data_uv[i].data(), 0, NULL, NULL);
ASSERT_EQ(err, 0);
img_y.push_back(cl::Image2D(nv12_image_plane_y[i]));
img_uv.push_back(cl::Image2D(nv12_image_plane_uv[i]));
tensor_remote.push_back(cldnn_context.create_tensor_nv12(img_y[i], img_uv[i]));
}
if (num_batch == 1) {
inf_req_remote.set_tensor(fn_ptr_remote->get_parameters().front()->get_friendly_name() + "/y", tensor_remote[0].first);
inf_req_remote.set_tensor(fn_ptr_remote->get_parameters().front()->get_friendly_name() + "/uv", tensor_remote[0].second);
} else {
GTEST_SKIP() << "Not implemented test";
}
inf_req_remote.set_tensor(*param_input_y->output(0).get_tensor().get_names().begin(), tensor_remote_y);
inf_req_remote.set_tensor(*param_input_uv->output(0).get_tensor().get_names().begin(), tensor_remote_uv);
inf_req_remote.infer();
auto outputTensor_shared = inf_req_remote.get_tensor(
ngraph::op::util::create_ie_output_name(fn_ptr_remote->get_results().front()->input_value(0)));
auto output_tensor_shared = inf_req_remote.get_tensor(function->get_results().at(0));
// ------------------------------------------------------
// Setup to inference using local tensor with batch=1
auto fn_ptr_local = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, height, width});
// regular inference
auto fn_ptr_regular = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, height, width});
// net_local.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
// net_local.getInputsInfo().begin()->second->setPrecision(Precision::U8);
// net_local.getInputsInfo().begin()->second->getPreProcess().setColorFormat(ColorFormat::NV12);
using namespace ov::preprocess;
auto p_reg = PrePostProcessor(fn_ptr_regular);
p_reg.input().tensor().set_element_type(ov::element::u8)
.set_color_format(ov::preprocess::ColorFormat::NV12_TWO_PLANES, {"y", "uv"})
.set_memory_type(GPU_CONFIG_KEY(BUFFER));
p_reg.input().preprocess().convert_color(ov::preprocess::ColorFormat::BGR);
p_reg.input().model().set_layout("NCHW");
auto function_regular = p_reg.build();
auto exec_net_b1 = ie.compile_model(fn_ptr_local, CommonTestUtils::DEVICE_GPU);
auto exec_net_regular = ie.compile_model(function_regular, CommonTestUtils::DEVICE_GPU);
auto inf_req_regular = exec_net_regular.create_infer_request();
inf_req_regular.set_tensor(param_input_y, fake_image_data_y);
inf_req_regular.set_tensor(param_input_uv, fake_image_data_uv);
auto inf_req_local = exec_net_b1.create_infer_request();
inf_req_regular.infer();
auto output_tensor_regular = inf_req_regular.get_tensor(exec_net_regular.output());
// Run regular input for each image and compare against batched tensor
for (int i = 0; i < num_batch; i++) {
auto y_tensor = ov::runtime::Tensor{ov::element::u8, {1, 1, height, width}};
auto uv_tensor = ov::runtime::Tensor{ov::element::u8, {1, 2, height / 2, width / 2}};
inf_req_local.set_tensor(fn_ptr_local->get_parameters().front()->get_friendly_name() + "/y", y_tensor);
inf_req_local.set_tensor(fn_ptr_local->get_parameters().front()->get_friendly_name() + "/uv", uv_tensor);
inf_req_local.infer();
auto output_tensor_local = inf_req_local.get_tensor(
ngraph::op::util::create_ie_output_name(fn_ptr_local->get_results().front()->input_value(0)));
// This network generates [1, size] tensor whether batch=1 or 2. So need to split
auto split_shared_tensor = ov::runtime::Tensor{output_tensor_local.get_element_type(),
output_tensor_local.get_shape(),
outputTensor_shared.data<float_t>() + output_tensor_local.get_size() * i};
ASSERT_EQ(output_tensor_local.get_size(), split_shared_tensor.get_size());
float thr = 0.1;
FuncTestUtils::compare_tensor(output_tensor_local, split_shared_tensor, thr, "", false);
}
// ------------------------------------------------------
// compare results
ASSERT_EQ(output_tensor_regular.get_size(), output_tensor_shared.get_size());
ASSERT_NO_THROW(output_tensor_regular.data());
ASSERT_NO_THROW(output_tensor_shared.data());
float thr = 0.1;
FuncTestUtils::compare_tensor(output_tensor_shared, output_tensor_regular, thr);
}
const std::vector<size_t> num_batches{1, 2, 4};
TEST_F(OVRemoteTensor_Test, NV12toBGR_buffer) {
#if defined(ANDROID)
GTEST_SKIP();
#endif
const int height = 16;
const int width = 16;
INSTANTIATE_TEST_SUITE_P(smoke_RemoteTensor, OVRemoteTensorBatched_Test, ::testing::ValuesIn(num_batches), OVRemoteTensorBatched_Test::getTestCaseName);
// ------------------------------------------------------
// Prepare input data
ov::runtime::Tensor fake_image_data_y = FuncTestUtils::create_and_fill_tensor(ov::element::u8, {1, 1, height, width}, 50, 0, 1);
ov::runtime::Tensor fake_image_data_uv = FuncTestUtils::create_and_fill_tensor(ov::element::u8, {1, 2, height / 2, width / 2}, 256, 0, 1);
auto ie = ov::runtime::Core();
auto fn_ptr_remote = ngraph::builder::subgraph::makeConvPoolRelu({1, 3, height, width});
using namespace ov::preprocess;
auto p = PrePostProcessor(fn_ptr_remote);
p.input().tensor().set_element_type(ov::element::u8)
.set_color_format(ov::preprocess::ColorFormat::NV12_TWO_PLANES, {"y", "uv"})
.set_memory_type(GPU_CONFIG_KEY(BUFFER));
p.input().preprocess().convert_color(ov::preprocess::ColorFormat::BGR);
p.input().model().set_layout("NCHW");
auto function = p.build();
auto param_input_y = function->get_parameters().at(0);
auto param_input_uv = function->get_parameters().at(1);
auto output = function->get_results().at(0);
// ------------------------------------------------------
// inference using remote tensor
auto ocl_instance = std::make_shared<OpenCL>();
ocl_instance->_queue = cl::CommandQueue(ocl_instance->_context, ocl_instance->_device);
auto in_size_y = ov::shape_size(param_input_y->get_output_shape(0)) * param_input_y->get_output_element_type(0).size();
auto in_size_uv = ov::shape_size(param_input_uv->get_output_shape(0)) * param_input_uv->get_output_element_type(0).size();
auto out_size = ov::shape_size(output->get_output_shape(0)) * output->get_output_element_type(0).size();
cl_int err;
cl::Buffer shared_input_y_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, in_size_y, NULL, &err);
cl::Buffer shared_input_uv_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, in_size_uv, NULL, &err);
cl::Buffer shared_output_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, out_size, NULL, &err);
auto remote_context = ov::runtime::gpu::ocl::ClContext(ie, ocl_instance->_queue.get());
auto exec_net_shared = ie.compile_model(function, remote_context);
auto gpu_context = exec_net_shared.get_context().as<ov::runtime::gpu::ocl::ClContext>();
auto gpu_in_y_tensor = gpu_context.create_tensor(param_input_y->get_output_element_type(0), fake_image_data_y.get_shape(), shared_input_y_buffer);
auto gpu_in_uv_tensor = gpu_context.create_tensor(param_input_uv->get_output_element_type(0), fake_image_data_uv.get_shape(), shared_input_uv_buffer);
auto gpu_out_tensor = gpu_context.create_tensor(output->get_output_element_type(0), output->get_output_shape(0), shared_output_buffer);
auto out_tensor = FuncTestUtils::create_and_fill_tensor(output->get_output_element_type(0), output->get_output_shape(0));
auto inf_req_shared = exec_net_shared.create_infer_request();
inf_req_shared.set_tensor(param_input_y, gpu_in_y_tensor);
inf_req_shared.set_tensor(param_input_uv, gpu_in_uv_tensor);
inf_req_shared.set_tensor(output, gpu_out_tensor);
void* buffer_y = fake_image_data_y.data();
void* buffer_uv = fake_image_data_uv.data();
ocl_instance->_queue.enqueueWriteBuffer(shared_input_y_buffer, false, 0, in_size_y, buffer_y);
ocl_instance->_queue.enqueueWriteBuffer(shared_input_uv_buffer, false, 0, in_size_uv, buffer_uv);
inf_req_shared.start_async();
ocl_instance->_queue.enqueueReadBuffer(shared_output_buffer, false, 0, out_size, out_tensor.data(), nullptr, nullptr);
ocl_instance->_queue.finish();
// ------------------------------------------------------
// regular inference
auto exec_net_regular = ie.compile_model(function, CommonTestUtils::DEVICE_GPU);
auto inf_req_regular = exec_net_regular.create_infer_request();
inf_req_regular.set_tensor(param_input_y, fake_image_data_y);
inf_req_regular.set_tensor(param_input_uv, fake_image_data_uv);
inf_req_regular.infer();
auto output_tensor_regular = inf_req_regular.get_tensor(exec_net_regular.output());
// ------------------------------------------------------
// compare results
ASSERT_EQ(output_tensor_regular.get_size(), out_tensor.get_size());
ASSERT_NO_THROW(output_tensor_regular.data());
ASSERT_NO_THROW(out_tensor.data());
float thr = 0.1;
FuncTestUtils::compare_tensor(out_tensor, output_tensor_regular, thr);
}

View File

@ -0,0 +1,60 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/convert_color_nv12.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<ov::Shape> inShapes_nhwc = {
{1, 10, 10, 1}
};
const std::vector<ov::element::Type> inTypes = {
ov::element::u8,
ov::element::f32
};
const auto testCase_values = ::testing::Combine(
::testing::ValuesIn(inShapes_nhwc),
::testing::ValuesIn(inTypes),
::testing::Bool(),
::testing::Bool(),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12, ConvertColorNV12LayerTest, testCase_values, ConvertColorNV12LayerTest::getTestCaseName);
const auto testCase_accuracy_values = ::testing::Combine(
::testing::Values(ov::Shape{1, 16*6, 16, 1}),
::testing::Values(ov::element::u8),
::testing::Bool(),
::testing::Bool(),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12_acc,
ConvertColorNV12AccuracyTest,
testCase_accuracy_values,
ConvertColorNV12LayerTest::getTestCaseName);
const auto testCase_accuracy_values_nightly = ::testing::Combine(
::testing::Values(ov::Shape{1, 256*256, 256, 1}),
::testing::Values(ov::element::u8),
::testing::Values(false),
::testing::Values(true),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorNV12_acc,
ConvertColorNV12AccuracyTest,
testCase_accuracy_values_nightly,
ConvertColorNV12LayerTest::getTestCaseName);
} // namespace

View File

@ -415,11 +415,11 @@ std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> LayerTe
const auto &&outputsInfo = executableNetwork.GetOutputsInfo();
std::vector<ngraph::element::Type_t> convertType;
convertType.reserve(outputsInfo.size());
for (const auto &output : outputsInfo) {
convertType.push_back(
FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(
output.second->getTensorDesc().getPrecision()));
}
for (const auto &output : outputsInfo) {
convertType.push_back(
FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(
output.second->getTensorDesc().getPrecision()));
}
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> expectedOutputs;
switch (refMode) {

View File

@ -38,7 +38,7 @@ struct condition : public primitive_base<condition> {
/// false..
/// @param compare_Data An identifier of primitive which contains compare values
/// @param func Used function during comparison.
/// @param offseg Offset for compare data.
/// @param offset Offset for compare data.
/// @param output_padding Optional padding for output from primitive.
condition(const primitive_id& id,
const primitive_id& input,

View File

@ -0,0 +1,64 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "primitive.hpp"
#include "intel_gpu/runtime/memory_caps.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs image conversion from one format to another
struct convert_color : public primitive_base<convert_color> {
CLDNN_DECLARE_PRIMITIVE(convert_color)
enum color_format : uint32_t {
RGB, ///< RGB color format
BGR, ///< BGR color format, default in DLDT
RGBX, ///< RGBX color format with X ignored during inference
BGRX, ///< BGRX color format with X ignored during inference
NV12, ///< NV12 color format represented as compound Y+UV blob
I420, ///< I420 color format represented as compound Y+U+V blob
};
enum memory_type : uint32_t {
buffer,
image
};
/// @brief Constructs convert_color primitive.
/// @param id This primitive id.
/// @param inputs Input primitives ids.
/// @param input_color_format Color to convert from.
/// @param output_color_format Color to convert to.
/// @param mem_type Memory type.
/// @param output_layout Requested memory layout.
convert_color(const primitive_id& id,
const std::vector<primitive_id>& inputs,
const color_format input_color_format,
const color_format output_color_format,
const memory_type mem_type,
const layout& output_layout,
const primitive_id& ext_prim_id = "",
const padding& output_padding = padding())
: primitive_base(id, inputs, ext_prim_id, output_padding),
input_color_format(input_color_format),
output_color_format(output_color_format),
mem_type(mem_type),
output_layout(output_layout) {}
color_format input_color_format;
color_format output_color_format;
memory_type mem_type;
layout output_layout;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -73,7 +73,8 @@ enum class KernelType {
LOOP,
NON_MAX_SUPPRESSION,
DETECTION_OUTPUT,
EXPERIMENTAL_DETECTRON_ROI_FEATURE_EXTRACTOR
EXPERIMENTAL_DETECTRON_ROI_FEATURE_EXTRACTOR,
CONVERT_COLOR
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -593,4 +594,22 @@ enum class BoxEncodingType {
BOX_ENCODING_CORNER,
BOX_ENCODING_CENTER,
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// ConvertColor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
enum class color_format : uint32_t {
RGB, ///< RGB color format
BGR, ///< BGR color format, default in DLDT
RGBX, ///< RGBX color format with X ignored during inference
BGRX, ///< BGRX color format with X ignored during inference
NV12, ///< NV12 color format represented as compound Y+UV blob
I420, ///< I420 color format represented as compound Y+U+V blob
};
enum class memory_type : uint32_t {
buffer,
image
};
} // namespace kernel_selector

View File

@ -0,0 +1,94 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "convert_color_kernel_base.h"
#include "kernel_selector_utils.h"
#include <string>
namespace kernel_selector {
bool ConvertColorKernelBase::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::CONVERT_COLOR ||
o.GetType() != KernelType::CONVERT_COLOR) {
return false;
}
const convert_color_params& params = static_cast<const convert_color_params&>(p);
if (params.inputs[0].Dimentions() > 4)
return false;
return true;
}
CommonDispatchData ConvertColorKernelBase::SetDefault(const convert_color_params& params, const optional_params&) const {
CommonDispatchData dispatchData;
const auto& out = params.output;
auto in_layout = params.inputs[0].GetLayout();
auto out_layout = params.output.GetLayout();
dispatchData.gws = { out.Batch().v, out.Y().v, out.X().v };
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo, in_layout, out_layout);
return dispatchData;
}
JitConstants ConvertColorKernelBase::GetJitConstants(const convert_color_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
jit.AddConstant(MakeJitConstant("INPUTS_COUNT", params.inputs.size()));
switch (params.input_color_format) {
case color_format::NV12:
jit.AddConstant(MakeJitConstant("CONVERT_FROM_NV12", ""));
break;
default:
IE_THROW() << "Not supported input color format";
}
switch (params.output_color_format) {
case color_format::RGB:
jit.AddConstant(MakeJitConstant("CONVERT_TO_RGB", ""));
break;
case color_format::BGR:
jit.AddConstant(MakeJitConstant("CONVERT_TO_BGR", ""));
break;
default:
IE_THROW() << "Not supported output color format";
}
switch (params.mem_type) {
case memory_type::buffer:
jit.AddConstant(MakeJitConstant("BUFFER_MEM", ""));
break;
case memory_type::image:
jit.AddConstant(MakeJitConstant("SURFACE_MEM", ""));
break;
default:
IE_THROW() << "Not supported memory type";
}
return jit;
}
KernelsData ConvertColorKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
KernelData kd = KernelData::Default<convert_color_params>(params);
const auto& prim_params = static_cast<const convert_color_params&>(params);
if (!Validate(params, options)) {
return {};
}
auto dispatchData = SetDefault(prim_params, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto cldnn_jit = GetJitConstants(prim_params);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
size_t number_of_inputs = prim_params.inputs.size();
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
"", false, false, number_of_inputs);
return { kd };
}
} // namespace kernel_selector

View File

@ -0,0 +1,49 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_base_opencl.h"
#include "kernel_selector_params.h"
#include <vector>
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// convert_color_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct convert_color_params : public base_params {
convert_color_params() : base_params(KernelType::CONVERT_COLOR) {}
color_format input_color_format;
color_format output_color_format;
memory_type mem_type;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// convert_color_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct convert_color_optional_params : optional_params {
convert_color_optional_params() : optional_params(KernelType::CONVERT_COLOR) {}
};
struct convert_color_fuse_params : fuse_params {
convert_color_fuse_params() : fuse_params(KernelType::CONVERT_COLOR) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// ConvertColorKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class ConvertColorKernelBase : public KernelBaseOpenCL {
public:
using KernelBaseOpenCL::KernelBaseOpenCL;
virtual ~ConvertColorKernelBase() {}
struct DispatchData : public CommonDispatchData {};
protected:
bool Validate(const Params&, const optional_params&) const override;
virtual JitConstants GetJitConstants(const convert_color_params& params) const;
virtual CommonDispatchData SetDefault(const convert_color_params& params, const optional_params&) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
};
} // namespace kernel_selector

View File

@ -0,0 +1,39 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "convert_color_kernel_ref.h"
#include "kernel_selector_utils.h"
#include <string>
#include <vector>
namespace kernel_selector {
ParamsKey ConvertColorKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::nv12);
k.EnableInputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableDifferentTypes();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
return k;
}
KernelsData ConvertColorKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options);
}
KernelsPriority ConvertColorKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
return FORCE_PRIORITY_9;
}
} // namespace kernel_selector

View File

@ -0,0 +1,20 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "convert_color_kernel_base.h"
namespace kernel_selector {
class ConvertColorKernelRef : public ConvertColorKernelBase {
public:
using Parent = ConvertColorKernelBase;
ConvertColorKernelRef() : ConvertColorKernelBase("convert_color_ref") {}
virtual ~ConvertColorKernelRef() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -0,0 +1,17 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "convert_color_kernel_selector.h"
#include "convert_color_kernel_ref.h"
namespace kernel_selector {
convert_color_kernel_selector::convert_color_kernel_selector() {
Attach<ConvertColorKernelRef>();
}
KernelsData convert_color_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::CONVERT_COLOR);
}
} // namespace kernel_selector

View File

@ -0,0 +1,23 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class convert_color_kernel_selector : public kernel_selector_base {
public:
static convert_color_kernel_selector& Instance() {
static convert_color_kernel_selector instance_;
return instance_;
}
convert_color_kernel_selector();
virtual ~convert_color_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -0,0 +1,106 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "include/batch_headers/fetch_data.cl"
#include "include/batch_headers/data_types.cl"
#ifdef CONVERT_FROM_NV12
#ifdef BUFFER_MEM
KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y,
#if INPUTS_COUNT == 2
const __global INPUT1_TYPE* input_uv,
#endif
__global OUTPUT_TYPE* output) {
const uint b = get_global_id(0);
const uint y = get_global_id(1);
const uint x = get_global_id(2);
float Y = input_y[GET_DATA_INDEX(INPUT0, b, 0, y, x)];
#if INPUTS_COUNT == 2
float U = input_uv[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)];
float V = input_uv[GET_DATA_INDEX(INPUT1, b, 1, y / 2, x / 2)];
#else // Single plane
uint input_uv_offset = INPUT0_SIZE_X * INPUT0_SIZE_Y / 3 * 2;
float U = input_y[GET_DATA_INDEX(INPUT0, b, 0, y / 2, (x / 2) * 2) + input_uv_offset];
float V = input_y[GET_DATA_INDEX(INPUT0, b, 1, y / 2, (x / 2) * 2) + input_uv_offset];
#endif
float Ycomponent = mad(Y, 1.164f, -18.624f);
float Ucomponent = mad(U, 1.f, -128.f);
float Vcomponent = mad(V, 1.f, -128.f);
float R = clamp(mad(Vcomponent, 1.596f, Ycomponent), 0.f, 255.f);
float G = clamp(mad(Vcomponent, -0.813f, mad(Ucomponent, -0.391f, Ycomponent)), 0.f, 255.f);
float B = clamp(mad(Ucomponent, 2.018f, Ycomponent), 0.f, 255.f);
#if UINT8_UNIT_USED
R = round(R);
G = round(G);
B = round(B);
#endif
#ifdef CONVERT_TO_RGB
output[OUTPUT_GET_INDEX(b, 0, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS);
output[OUTPUT_GET_INDEX(b, 1, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS);
output[OUTPUT_GET_INDEX(b, 2, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS);
#else // BGR
output[OUTPUT_GET_INDEX(b, 0, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS);
output[OUTPUT_GET_INDEX(b, 1, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS);
output[OUTPUT_GET_INDEX(b, 2, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS);
#endif
}
#endif
#ifdef SURFACE_MEM
KERNEL(convert_color_ref)(read_only image2d_t input_y,
#if INPUTS_COUNT == 2
read_only image2d_t input_uv,
#endif
__global OUTPUT_TYPE* output) {
const uint b = get_global_id(0);
const uint y = get_global_id(1);
const uint x = get_global_id(2);
float4 Y = read_imagef(input_y, (int2)(x, y));
float Ycomponent = mad(Y.x, 296.82f, -18.624f);
#if INPUTS_COUNT == 2
float4 UV = read_imagef(input_uv, (int2)(x / 2, y / 2));
float Ucomponent = mad(UV.x, 255.0f, -128.f);
float Vcomponent = mad(UV.y, 255.0f, -128.f);
#else // Single plane
uint input_y_offset = INPUT0_SIZE_Y / 3 * 2;
float4 U = read_imagef(input_y, (int2)((x / 2) * 2, y / 2 + input_y_offset));
float4 V = read_imagef(input_y, (int2)((x / 2) * 2 + 1, y / 2 + input_y_offset));
float Ucomponent = mad(U.x, 255.0f, -128.f);
float Vcomponent = mad(V.x, 255.0f, -128.f);
#endif
float R = clamp(mad(Vcomponent, 1.596f, Ycomponent), 0.f, 255.f);
float G = clamp(mad(Vcomponent, -0.813f, mad(Ucomponent, -0.391f, Ycomponent)), 0.f, 255.f);
float B = clamp(mad(Ucomponent, 2.018f, Ycomponent), 0.f, 255.f);
#if UINT8_UNIT_USED
R = round(R);
G = round(G);
B = round(B);
#endif
#ifdef CONVERT_TO_RGB
output[OUTPUT_GET_INDEX(b, 0, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS);
output[OUTPUT_GET_INDEX(b, 1, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS);
output[OUTPUT_GET_INDEX(b, 2, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS);
#else // BGR
output[OUTPUT_GET_INDEX(b, 0, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS);
output[OUTPUT_GET_INDEX(b, 1, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS);
output[OUTPUT_GET_INDEX(b, 2, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS);
#endif
}
#endif
#endif

View File

@ -0,0 +1,44 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "convert_color_inst.h"
#include "primitive_type_base.h"
#include "intel_gpu/runtime/error_handler.hpp"
#include "json_object.h"
#include <string>
namespace cldnn {
primitive_type_id convert_color::type_id() {
static primitive_type_base<convert_color> instance;
return &instance;
}
layout convert_color_inst::calc_output_layout(convert_color_node const& node) {
auto desc = node.get_primitive();
return desc->output_layout;
}
std::string convert_color_inst::to_string(convert_color_node const& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();
auto& input = node.input();
std::stringstream primitive_description;
json_composite convert_color_info;
convert_color_info.add("input id", input.id());
convert_color_info.add("memory type", desc->mem_type);
convert_color_info.add("input color format", desc->input_color_format);
convert_color_info.add("output color format", desc->output_color_format);
node_info->add("convert_color info", convert_color_info);
node_info->dump(primitive_description);
return primitive_description.str();
}
convert_color_inst::typed_primitive_inst(network& network, convert_color_node const& node) : parent(network, node) {}
} // namespace cldnn

View File

@ -0,0 +1,78 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "convert_color_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "convert_color/convert_color_kernel_selector.h"
#include "convert_color/convert_color_kernel_base.h"
#include "intel_gpu/runtime/error_handler.hpp"
#include "data_inst.h"
#include <vector>
using namespace cldnn;
namespace cldnn {
namespace ocl {
struct convert_color_impl : typed_primitive_impl_ocl<convert_color> {
using parent = typed_primitive_impl_ocl<convert_color>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<convert_color_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<convert_color>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
return args;
}
public:
static primitive_impl* create(const convert_color_node& arg) {
auto convert_color_params = get_default_params<kernel_selector::convert_color_params>(arg);
auto convert_color_optional_params =
get_default_optional_params<kernel_selector::convert_color_optional_params>(arg.get_program());
for (size_t i = 1; i < arg.inputs_count(); ++i) {
convert_color_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
}
auto primitive = arg.get_primitive();
convert_color_params.input_color_format = static_cast<kernel_selector::color_format>(primitive->input_color_format);
convert_color_params.output_color_format = static_cast<kernel_selector::color_format>(primitive->output_color_format);
convert_color_params.mem_type = static_cast<kernel_selector::memory_type>(primitive->mem_type);
auto& kernel_selector = kernel_selector::convert_color_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(convert_color_params, convert_color_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto convert_color = new convert_color_impl(arg, best_kernels[0]);
return convert_color;
}
};
namespace detail {
attach_convert_color_impl::attach_convert_color_impl() {
implementation_map<convert_color>::add(impl_types::ocl, convert_color_impl::create, {
std::make_tuple(data_types::f32, format::nv12),
std::make_tuple(data_types::f16, format::nv12),
std::make_tuple(data_types::u8, format::nv12),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -77,6 +77,7 @@ void register_implementations() {
REGISTER_OCL(cum_sum);
REGISTER_OCL(embedding_bag);
REGISTER_OCL(extract_image_patches);
REGISTER_OCL(convert_color);
}
} // namespace ocl

View File

@ -63,6 +63,7 @@
#include "intel_gpu/primitives/lstm_dynamic_timeloop.hpp"
#include "intel_gpu/primitives/grn.hpp"
#include "intel_gpu/primitives/ctc_greedy_decoder.hpp"
#include "intel_gpu/primitives/convert_color.hpp"
#include "generic_layer.hpp"
@ -144,6 +145,7 @@ REGISTER_OCL(ctc_greedy_decoder);
REGISTER_OCL(cum_sum);
REGISTER_OCL(embedding_bag);
REGISTER_OCL(extract_image_patches);
REGISTER_OCL(convert_color);
#undef REGISTER_OCL

View File

@ -0,0 +1,34 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "intel_gpu/primitives/convert_color.hpp"
#include "primitive_inst.h"
#include <string>
namespace cldnn {
template <>
struct typed_program_node<convert_color> : public typed_program_node_base<convert_color> {
using parent = typed_program_node_base<convert_color>;
public:
using parent::parent;
program_node& input(size_t index = 0) const { return get_dependency(index); }
size_t inputs_count() const { return get_primitive()->input.size(); }
};
using convert_color_node = typed_program_node<convert_color>;
template <>
class typed_primitive_inst<convert_color> : public typed_primitive_inst_base<convert_color> {
using parent = typed_primitive_inst_base<convert_color>;
public:
static layout calc_output_layout(convert_color_node const& node);
static std::string to_string(convert_color_node const& node);
typed_primitive_inst(network& network, convert_color_node const& desc);
};
using convert_color_inst = typed_primitive_inst<convert_color>;
} // namespace cldnn

View File

@ -0,0 +1,431 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include "opencl_helper_instance.hpp"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/data.hpp>
#include <intel_gpu/primitives/convert_color.hpp>
#include <intel_gpu/runtime/device_query.hpp>
#include <ocl/ocl_wrapper.hpp>
using namespace cldnn;
using namespace ::tests;
template <typename T, typename U>
void createReferenceData(const T* arg_y, const T* arg_uv, U* out_ptr,
size_t batch_size, size_t image_h, size_t image_w,
size_t stride_y, size_t stride_uv, bool to_rgb) {
for (int batch = 0; batch < batch_size; batch++) {
U* out = out_ptr + batch * image_w * image_h * 3;
auto y_ptr = arg_y + batch * stride_y;
auto uv_ptr = arg_uv + batch * stride_uv;
for (int h = 0; h < image_h; h++) {
for (int w = 0; w < image_w; w++) {
auto y_index = h * image_w + w;
auto y_val = static_cast<float>(y_ptr[y_index]);
auto uv_index = (h / 2) * image_w + (w / 2) * 2;
auto u_val = static_cast<float>(uv_ptr[uv_index]);
auto v_val = static_cast<float>(uv_ptr[uv_index + 1]);
auto c = y_val - 16.f;
auto d = u_val - 128.f;
auto e = v_val - 128.f;
auto clip = [](float a) -> U {
if (std::is_integral<U>()) {
return static_cast<U>(std::min(std::max(std::round(a), 0.f), 255.f));
} else {
return static_cast<U>(std::min(std::max(a, 0.f), 255.f));
}
};
auto b = clip(1.164f * c + 2.018f * d);
auto g = clip(1.164f * c - 0.391f * d - 0.813f * e);
auto r = clip(1.164f * c + 1.596f * e);
if (to_rgb) {
out[y_index * 3] = r;
out[y_index * 3 + 1] = g;
out[y_index * 3 + 2] = b;
} else {
out[y_index * 3] = b;
out[y_index * 3 + 1] = g;
out[y_index * 3 + 2] = r;
}
}
}
}
}
TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp32) {
auto& engine = get_test_engine();
int width = 224;
int height = 448;
auto input_y = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, height } });
auto input_uv = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 2, width / 2 , height / 2 } });
std::vector<float> input_y_data = generate_random_1d<float>(width * height, 0, 255);
std::vector<float> input_uv_data = generate_random_1d<float>(width * height / 2, 0, 255);
set_values(input_y, input_y_data);
set_values(input_uv, input_uv_data);
layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height });
topology topology;
topology.add(input_layout("input_y", input_y->get_layout()));
topology.add(input_layout("input_uv", input_uv->get_layout()));
topology.add(convert_color("convert_color", { "input_y", "input_uv" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
cldnn::convert_color::memory_type::buffer, output_layout));
network network(engine, topology);
network.set_input_data("input_y", input_y);
network.set_input_data("input_uv", input_uv);
auto outputs = network.execute();
std::vector<float> ref_res(width * height * 3);
createReferenceData<float, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
1, height, width, height * width, height * width / 2, true);
auto output = outputs.at("convert_color").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (size_t i = 0; i < ref_res.size(); ++i) {
EXPECT_NEAR(ref_res[i], output_ptr[i], 1.001f);
}
}
TEST(convert_color, nv12_to_bgr_two_planes_buffer_fp32) {
auto& engine = get_test_engine();
int width = 224;
int height = 224;
auto input_y = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, height } });
auto input_uv = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 2, width / 2 , height / 2 } });
std::vector<float> input_y_data = generate_random_1d<float>(width * height, 0, 255);
std::vector<float> input_uv_data = generate_random_1d<float>(width * height / 2, 0, 255);
set_values(input_y, input_y_data);
set_values(input_uv, input_uv_data);
layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height });
topology topology;
topology.add(input_layout("input_y", input_y->get_layout()));
topology.add(input_layout("input_uv", input_uv->get_layout()));
topology.add(convert_color("convert_color", { "input_y", "input_uv" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR,
cldnn::convert_color::memory_type::buffer, output_layout));
network network(engine, topology);
network.set_input_data("input_y", input_y);
network.set_input_data("input_uv", input_uv);
auto outputs = network.execute();
std::vector<float> ref_res(width * height * 3);
createReferenceData<float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
1, height, width, height * width, height * width / 2, false);
auto output = outputs.at("convert_color").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (size_t i = 0; i < ref_res.size(); ++i) {
EXPECT_NEAR(ref_res[i], output_ptr[i], 1.001f);
}
}
TEST(convert_color, nv12_to_rgb_two_planes_buffer_u8) {
auto& engine = get_test_engine();
int width = 224;
int height = 224;
auto input_y = engine.allocate_memory({ data_types::u8, format::byxf, { 1, 1, width, height } });
auto input_uv = engine.allocate_memory({ data_types::u8, format::byxf, { 1, 2, width / 2 , height / 2 } });
std::vector<uint8_t> input_y_data = generate_random_1d<uint8_t>(width * height, 0, 255);
std::vector<uint8_t> input_uv_data = generate_random_1d<uint8_t>(width * height / 2, 0, 255);
set_values(input_y, input_y_data);
set_values(input_uv, input_uv_data);
layout output_layout(data_types::u8, cldnn::format::byxf, { 1, 3, width, height });
topology topology;
topology.add(input_layout("input_y", input_y->get_layout()));
topology.add(input_layout("input_uv", input_uv->get_layout()));
topology.add(convert_color("convert_color", { "input_y", "input_uv" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
cldnn::convert_color::memory_type::buffer, output_layout));
network network(engine, topology);
network.set_input_data("input_y", input_y);
network.set_input_data("input_uv", input_uv);
auto outputs = network.execute();
std::vector<float> ref_res(width * height * 3);
createReferenceData<uint8_t, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
1, height, width, height * width, height * width / 2, true);
auto output = outputs.at("convert_color").get_memory();
cldnn::mem_lock<uint8_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < ref_res.size(); ++i) {
EXPECT_NEAR(ref_res[i], static_cast<float>(output_ptr[i]), 1.001f);
}
}
TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) {
auto& engine = get_test_engine();
int width = 224;
int height = 224;
auto input_y = engine.allocate_memory({ data_types::f16, format::byxf, { 1, 1, width, height } });
auto input_uv = engine.allocate_memory({ data_types::f16, format::byxf, { 1, 2, width / 2 , height / 2 } });
std::vector<FLOAT16> input_y_data = generate_random_1d<FLOAT16>(width * height, 0, 255);
std::vector<FLOAT16> input_uv_data = generate_random_1d<FLOAT16>(width * height / 2, 0, 255);
set_values(input_y, input_y_data);
set_values(input_uv, input_uv_data);
layout output_layout(data_types::f16, cldnn::format::byxf, { 1, 3, width, height });
topology topology;
topology.add(input_layout("input_y", input_y->get_layout()));
topology.add(input_layout("input_uv", input_uv->get_layout()));
topology.add(convert_color("convert_color", { "input_y", "input_uv" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
cldnn::convert_color::memory_type::buffer, output_layout));
network network(engine, topology);
network.set_input_data("input_y", input_y);
network.set_input_data("input_uv", input_uv);
auto outputs = network.execute();
std::vector<float> ref_res(width * height * 3);
createReferenceData<FLOAT16, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
1, height, width, height * width, height * width / 2, true);
auto output = outputs.at("convert_color").get_memory();
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < ref_res.size(); ++i) {
EXPECT_NEAR(ref_res[i], float16_to_float32(output_ptr[i]), 1.001f);
}
}
TEST(convert_color, nv12_to_rgb_single_plane_buffer_fp32) {
auto& engine = get_test_engine();
int width = 224;
int height = 448;
int input_height = height + height / 2;
auto input = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, input_height } });
int data_size = width * (height + height / 2);
std::vector<float> input_data = generate_random_1d<float>(data_size, 0, 255);
set_values(input, input_data);
layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height });
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(convert_color("convert_color", { "input" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
cldnn::convert_color::memory_type::buffer, output_layout));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
std::vector<float> ref_res(width * height * 3);
createReferenceData<float, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
1, height, width, input_height * width, input_height * width, true);
auto output = outputs.at("convert_color").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (size_t i = 0; i < ref_res.size(); ++i) {
EXPECT_NEAR(ref_res[i], output_ptr[i], 1.001f);
}
}
TEST(convert_color, nv12_to_rgb_single_plane_buffer_u8) {
auto& engine = get_test_engine();
int width = 224;
int height = 448;
int input_height = height + height / 2;
auto input = engine.allocate_memory({ data_types::u8, format::byxf, { 1, 1, width, input_height } });
int data_size = width * (height + height / 2);
std::vector<uint8_t> input_data = generate_random_1d<uint8_t>(data_size, 0, 255);
set_values(input, input_data);
layout output_layout(data_types::u8, cldnn::format::byxf, { 1, 3, width, height });
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(convert_color("convert_color", { "input" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
cldnn::convert_color::memory_type::buffer, output_layout));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
std::vector<float> ref_res(width * height * 3);
createReferenceData<uint8_t, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
1, height, width, input_height * width, input_height * width, true);
auto output = outputs.at("convert_color").get_memory();
cldnn::mem_lock<uint8_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < ref_res.size(); ++i) {
EXPECT_NEAR(ref_res[i], static_cast<float>(output_ptr[i]), 1.001f);
}
}
TEST(convert_color, nv12_to_rgb_two_planes_surface_u8) {
int width = 224;
int height = 448;
auto ocl_instance = std::make_shared<OpenCL>();
device_query query(engine_types::ocl, runtime_types::ocl, static_cast<void*>(ocl_instance->_context.get()));
auto devices = query.get_available_devices();
auto engine_config = cldnn::engine_configuration();
auto engine = engine::create(engine_types::ocl, runtime_types::ocl, devices.begin()->second, engine_config);
if (!engine->get_device_info().supports_image) {
GTEST_SKIP() << "Device doesn't support images";
}
cl_int err;
int data_size = width * (height + height / 2);
std::vector<uint8_t> data = generate_random_1d<uint8_t>(data_size, 0, 255);
cl_image_format image_format;
image_format.image_channel_order = CL_R;
image_format.image_channel_data_type = CL_UNORM_INT8;
cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)height, 0,
0, 0, 0, 0, 0, { nullptr } };
cl_mem nv12_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
checkStatus(err, "Creating nv12 image plane_y failed");
image_format.image_channel_order = CL_RG;
image_desc.image_width = width / 2;
image_desc.image_height = height / 2;
image_desc.image_depth = 1;
cl_mem nv12_image_plane_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
checkStatus(err, "Creating nv12 image plane_uv failed");
size_t origin[3] = { 0, 0, 0 };
size_t y_region[3] = { (size_t)width, (size_t)height, 1 };
size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 };
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, nullptr, nullptr);
checkStatus(err, "Writing nv12 image plane_y failed");
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr);
checkStatus(err, "Writing nv12 image plane_uv failed");
auto input = input_layout("input", { data_types::u8, format::nv12, { 1, 1, width, height } });
auto input2 = input_layout("input2", { data_types::u8, format::nv12, { 1, 2, width / 2, height / 2} });
auto output_format = cldnn::format::byxf;
layout output_layout(data_types::f32, output_format, { 1, 3, width, height });
auto input_memory = engine->share_image(input.layout, nv12_image_plane_y);
auto input_memory2 = engine->share_image(input2.layout, nv12_image_plane_uv);
topology topology;
topology.add(input);
topology.add(input2);
topology.add(convert_color("convert_color", { "input", "input2" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
cldnn::convert_color::memory_type::image, output_layout));
network network(*engine, topology);
network.set_input_data("input", input_memory);
network.set_input_data("input2", input_memory2);
auto outputs = network.execute();
std::vector<float> reference_results(width * height * 3);
createReferenceData<uint8_t, float>(data.data(), data.data() + height * width, reference_results.data(),
1, height, width, height * width, height * width / 2, true);
auto output_prim = outputs.begin()->second.get_memory();
cldnn::mem_lock<float> output_ptr(output_prim, get_test_stream());
for (auto i = 0; i < reference_results.size(); i++) {
EXPECT_NEAR(reference_results[i], output_ptr[i], 1.001f);
}
checkStatus(clReleaseMemObject(nv12_image_plane_uv), "clReleaseMemObject");
checkStatus(clReleaseMemObject(nv12_image_plane_y), "clReleaseMemObject");
}
TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) {
int width = 224;
int height = 448;
int input_height = height + height / 2;
auto ocl_instance = std::make_shared<OpenCL>();
device_query query(engine_types::ocl, runtime_types::ocl, static_cast<void*>(ocl_instance->_context.get()));
auto devices = query.get_available_devices();
auto engine_config = cldnn::engine_configuration();
auto engine = engine::create(engine_types::ocl, runtime_types::ocl, devices.begin()->second, engine_config);
if (!engine->get_device_info().supports_image) {
GTEST_SKIP() << "Device doesn't support images";
}
cl_int err;
int data_size = width * (height + height / 2);
std::vector<uint8_t> input_data = generate_random_1d<uint8_t>(data_size, 0, 255);
cl_image_format image_format;
image_format.image_channel_order = CL_R;
image_format.image_channel_data_type = CL_UNORM_INT8;
cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)input_height, 0,
0, 0, 0, 0, 0, { nullptr } };
cl_mem nv12_image = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
checkStatus(err, "Creating nv12 image failed");
size_t origin[3] = { 0, 0, 0 };
size_t y_region[3] = { (size_t)width, (size_t)input_height, 1 };
err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image, true, origin, y_region, 0, 0, &input_data[0], 0, nullptr, nullptr);
checkStatus(err, "Writing nv12 image failed");
auto input = input_layout("input", { data_types::u8, format::nv12, { 1, 1, width, input_height } });
auto output_format = cldnn::format::byxf;
layout output_layout(data_types::f32, output_format, { 1, 3, width, height });
auto input_memory = engine->share_image(input.layout, nv12_image);
topology topology;
topology.add(input);
topology.add(convert_color("convert_color", { "input" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
cldnn::convert_color::memory_type::image, output_layout));
network network(*engine, topology);
network.set_input_data("input", input_memory);
auto outputs = network.execute();
std::vector<float> reference_results(width * height * 3);
createReferenceData<uint8_t, float>(input_data.data(), input_data.data() + height * width, reference_results.data(),
1, height, width, input_height * width, input_height * width, true);
auto output_prim = outputs.begin()->second.get_memory();
cldnn::mem_lock<float> output_ptr(output_prim, get_test_stream());
for (auto i = 0; i < reference_results.size(); i++) {
EXPECT_NEAR(reference_results[i], output_ptr[i], 1.001f);
}
checkStatus(clReleaseMemObject(nv12_image), "clReleaseMemObject");
}

View File

@ -1991,7 +1991,7 @@ TEST_P(conv_int8_scale_shift_swish, basic) {
reorder("reorder_bfyx", "mul", p.default_format, data_types::f32)
);
tolerance = 1e-4f;
tolerance = 1e-3f;
execute(p);
}
@ -5087,7 +5087,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv
deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
deconv_test_params{ CASE_DECONV_FP32_3D_1, 2, 9 },
deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 9 },
// deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 9 },
deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 9 },
deconv_test_params{ CASE_DECONV_FP32_3D_4, 2, 9 },
deconv_test_params{ CASE_DECONV_FP32_3D_5, 2, 9 },

View File

@ -57,7 +57,7 @@ struct OpenCL {
_out_of_order_queue = out_of_order_queue;
auto extensions = _device.getInfo<CL_DEVICE_EXTENSIONS>();
_supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos;;
_supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos;
_usm_helper = std::make_shared<cl::UsmHelper>(_context, _device, _supports_usm);

View File

@ -33,6 +33,8 @@ class OPENVINO_API NV12toBGR : public util::ConvertColorNV12Base {
public:
OPENVINO_OP("NV12toBGR", "opset8", util::ConvertColorNV12Base);
BWDCMP_RTTI_DECLARATION;
NV12toBGR() = default;
/// \brief Constructs a conversion operation from input image in NV12 format

View File

@ -33,6 +33,8 @@ class OPENVINO_API NV12toRGB : public util::ConvertColorNV12Base {
public:
OPENVINO_OP("NV12toRGB", "opset8", util::ConvertColorNV12Base);
BWDCMP_RTTI_DECLARATION;
NV12toRGB() = default;
/// \brief Constructs a conversion operation from input image in NV12 format

View File

@ -6,6 +6,8 @@
#include "itt.hpp"
BWDCMP_RTTI_DEFINITION(ov::op::v8::NV12toBGR);
ov::op::v8::NV12toBGR::NV12toBGR(const Output<Node>& arg)
: util::ConvertColorNV12Base(arg, util::ConvertColorNV12Base::ColorConversion::NV12_TO_BGR) {
constructor_validate_and_infer_types();

View File

@ -6,6 +6,8 @@
#include "itt.hpp"
BWDCMP_RTTI_DEFINITION(ov::op::v8::NV12toRGB);
ov::op::v8::NV12toRGB::NV12toRGB(const Output<Node>& arg)
: util::ConvertColorNV12Base(arg, util::ConvertColorNV12Base::ColorConversion::NV12_TO_RGB) {
constructor_validate_and_infer_types();

View File

@ -155,6 +155,13 @@ DECLARE_GPU_CONFIG_KEY(MAX_NUM_THREADS);
* Thus, this key should be turned off if graph loading time is considered to be most important target to optimize.*/
DECLARE_GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING);
/**
* @brief This keys instructs the GPU plugin to use surface/buffer and batched memory type.
*/
DECLARE_GPU_CONFIG_KEY(SURFACE);
DECLARE_GPU_CONFIG_KEY(BUFFER);
DECLARE_GPU_CONFIG_KEY(BATCHED);
} // namespace GPUConfigParams
namespace PluginConfigParams {

View File

@ -6,6 +6,7 @@
#include <ie_layouts.h>
#include "intel_gpu/runtime/layout.hpp"
#include "openvino/core/layout.hpp"
#include "ngraph/type/element_type.hpp"
@ -187,6 +188,21 @@ inline std::vector<uint16_t> ConvertPermuteOrder(const std::vector<uint16_t>& ie
return cldnn_order;
}
inline InferenceEngine::Layout InferenceEngineLayoutFromOVLayout(ov::Layout l) {
if (l == ov::Layout("C")) return InferenceEngine::Layout::C;
if (l == ov::Layout("CN")) return InferenceEngine::Layout::CN;
if (l == ov::Layout("HW")) return InferenceEngine::Layout::HW;
if (l == ov::Layout("NC")) return InferenceEngine::Layout::NC;
if (l == ov::Layout("CHW")) return InferenceEngine::Layout::CHW;
if (l == ov::Layout("HWC")) return InferenceEngine::Layout::HWC;
if (l == ov::Layout("NCHW")) return InferenceEngine::Layout::NCHW;
if (l == ov::Layout("NC??")) return InferenceEngine::Layout::NCHW;
if (l == ov::Layout("NHWC")) return InferenceEngine::Layout::NHWC;
if (l == ov::Layout("NCDHW")) return InferenceEngine::Layout::NCDHW;
if (l == ov::Layout("NDHWC")) return InferenceEngine::Layout::NDHWC;
IE_THROW() << "The plugin does not support " << l.to_string() << " layout";
}
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -214,5 +214,8 @@ REGISTER_FACTORY(v7, Gather);
REGISTER_FACTORY(v8, Gather);
REGISTER_FACTORY(v8, GatherND);
REGISTER_FACTORY(v8, DeformableConvolution);
REGISTER_FACTORY(v8, NV12toRGB);
REGISTER_FACTORY(v8, NV12toBGR);
// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);

View File

@ -234,6 +234,17 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
<< (is_input ? "input" : "output") << " precision";
}
size_t dataBinSize = dataSize * data->element_size();
size_t netReqBinSize = std::accumulate(desc.getDims().begin(), desc.getDims().end(),
desc.getPrecision().size(),
std::multiplies<size_t>());
if (dataBinSize != netReqBinSize && !compoundBlobPassed) {
IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") <<
" blob with name: \'" << name << "\' " <<
"Current: " << dataBinSize << " Required: " << netReqBinSize;
}
auto remote_ptr = data->as<gpu::ClBlob>();
bool is_remote = remote_ptr != nullptr;
if (is_remote) {
@ -943,6 +954,13 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
}
auto inputMem = impl->getMemory();
auto input_layout = m_graph->GetInputLayouts().find(inputName);
if (input_layout != m_graph->GetInputLayouts().end()) {
if (input_layout->second.format != inputMem->get_layout().format) {
inputMem = m_graph->GetNetwork()->get_engine().reinterpret_buffer(*inputMem, input_layout->second);
}
}
if (!is_dev_input) {
if (prec == Precision::I16 || prec == Precision::U16) {
// GPU plugin doesn't support I16 input precision,

View File

@ -0,0 +1,60 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/primitives/convert_color.hpp"
#include "openvino/core/preprocess/input_tensor_info.hpp"
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateCommonConvertColorOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
const cldnn::convert_color::color_format from_color,
const cldnn::convert_color::color_format to_color) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto outDatatype = DataTypeFromPrecision(op->get_input_element_type(0));
auto outShape = tensor_from_dims(op->get_output_shape(0));
outShape = { outShape.sizes()[0], outShape.sizes()[2], outShape.sizes()[3], outShape.sizes()[1] };
auto out_layout = cldnn::layout(outDatatype, cldnn::format::byxf, outShape);
auto memory_type = cldnn::convert_color::memory_type::buffer;
if (op->get_input_node_ptr(0)->output(0).get_rt_info().count(ov::preprocess::TensorInfoMemoryType::get_type_info_static())) {
std::string mem_type = op->get_input_node_ptr(0)->output(0).get_rt_info().at(ov::preprocess::TensorInfoMemoryType::get_type_info_static())
.as<ov::preprocess::TensorInfoMemoryType>().value;
if (mem_type.find(GPU_CONFIG_KEY(SURFACE)) != std::string::npos) {
memory_type = cldnn::convert_color::memory_type::image;
}
}
p.AddPrimitive(cldnn::convert_color(layerName,
inputPrimitives,
from_color,
to_color,
memory_type,
out_layout,
op->get_friendly_name()));
p.AddPrimitiveToProfiler(op);
}
static void CreateNV12toRGBOp(Program& p, const std::shared_ptr<ngraph::op::v8::NV12toRGB>& op) {
p.ValidateInputs(op, {1, 2});
CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB);
}
static void CreateNV12toBGROp(Program& p, const std::shared_ptr<ngraph::op::v8::NV12toBGR>& op) {
p.ValidateInputs(op, {1, 2});
CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR);
}
REGISTER_FACTORY_IMPL(v8, NV12toRGB);
REGISTER_FACTORY_IMPL(v8, NV12toBGR);
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -12,6 +12,8 @@
#include "intel_gpu/primitives/data.hpp"
#include "intel_gpu/primitives/concatenation.hpp"
#include "openvino/core/preprocess/input_tensor_info.hpp"
using namespace InferenceEngine;
namespace ov {
@ -176,107 +178,131 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
break;
}
if (ColorFormat::NV12 == preProcess.getColorFormat() && p.GetConfig().nv12_two_inputs) {
// for NV12, create two input layouts with reorder instead of one,
// and then would expect compound blob in inferRequest
if (InferenceEngine::Layout::NCHW != l &&
(InferenceEngine::Precision::I8 != ip || InferenceEngine::Precision::U8 != ip)) {
IE_THROW() << "Unsupported layout (" << l << ") or precision "
<< ip.name() << ") for NV12 input " + inputInfo->name();
bool is_convert_color_input = false;
for (auto& node : op->get_users()) {
is_convert_color_input |= ngraph::is_type<ngraph::op::v8::NV12toRGB>(node) ||
ngraph::is_type<ngraph::op::v8::NV12toBGR>(node);
}
if (is_convert_color_input) {
networkInputLayout.format = cldnn::format::byxf;
if (op->output(0).get_rt_info().count(ov::preprocess::TensorInfoMemoryType::get_type_info_static())) {
std::string mem_type = op->output(0).get_rt_info().at(ov::preprocess::TensorInfoMemoryType::get_type_info_static())
.as<ov::preprocess::TensorInfoMemoryType>().value;
if (mem_type.find(GPU_CONFIG_KEY(SURFACE)) != std::string::npos) {
networkInputLayout.format = cldnn::format::nv12;
}
}
int height = inputDims[2];
int width = inputDims[3];
std::vector<cldnn::primitive_id> reorders;
networkInputLayout.size = { TensorValue(inputDims[0]), TensorValue(inputDims[3]),
TensorValue(inputDims[2]), TensorValue(inputDims[1]) };
for (auto i = 0; i < inputDims[0]; i++) {
auto preprocessPrimID = "reorder:" + inputName + std::to_string(i) + Program::m_preProcessTag;
std::string y_name = inputName + "_Y" + std::to_string(i);
std::string uv_name = inputName + "_UV" + std::to_string(i);
p.inputLayouts.insert({ inputInfo->name(), networkInputLayout });
p.AddPrimitive(cldnn::input_layout(inputName, networkInputLayout, inputInfo->name()));
p.AddPrimitiveToProfiler(op);
} else {
if (ColorFormat::NV12 == preProcess.getColorFormat() && p.GetConfig().nv12_two_inputs) {
// for NV12, create two input layouts with reorder instead of one,
// and then would expect compound blob in inferRequest
if (InferenceEngine::Layout::NCHW != l &&
(InferenceEngine::Precision::I8 != ip || InferenceEngine::Precision::U8 != ip)) {
IE_THROW() << "Unsupported layout (" << l << ") or precision "
<< ip.name() << ") for NV12 input " + inputInfo->name();
}
int height = inputDims[2];
int width = inputDims[3];
std::vector<cldnn::primitive_id> reorders;
cldnn::layout y_layout(DataTypeFromPrecision(ip),
cldnn::format::nv12, { 1, 1, width, height });
cldnn::layout uv_layout(DataTypeFromPrecision(ip),
cldnn::format::nv12, { 1, 2, width / 2, height / 2 });
auto inputY = cldnn::input_layout(y_name, y_layout, inputInfo->name());
auto inputUV = cldnn::input_layout(uv_name, uv_layout, inputInfo->name());
for (auto i = 0; i < inputDims[0]; i++) {
auto preprocessPrimID = "reorder:" + inputName + std::to_string(i) + Program::m_preProcessTag;
std::string y_name = inputName + "_Y" + std::to_string(i);
std::string uv_name = inputName + "_UV" + std::to_string(i);
cldnn::layout y_layout(DataTypeFromPrecision(ip),
cldnn::format::nv12, { 1, 1, width, height });
cldnn::layout uv_layout(DataTypeFromPrecision(ip),
cldnn::format::nv12, { 1, 2, width / 2, height / 2 });
auto inputY = cldnn::input_layout(y_name, y_layout, inputInfo->name());
auto inputUV = cldnn::input_layout(uv_name, uv_layout, inputInfo->name());
p.AddPrimitive(inputY);
p.inputLayouts.insert({ inputInfo->name() + "_Y" + std::to_string(i), y_layout });
p.AddPrimitive(inputUV);
p.inputLayouts.insert({ inputInfo->name() + "_UV" + std::to_string(i), uv_layout });
switch (preProcess.getMeanVariant()) {
case NONE:
case MEAN_VALUE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
y_name,
uv_name,
networkInputLayout,
meanValues,
cldnn::reorder_mean_mode::subtract,
inputInfo->name()));
break;
}
case MEAN_IMAGE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
y_name,
uv_name,
networkInputLayout,
meanBlobID,
cldnn::reorder_mean_mode::subtract,
inputInfo->name()));
break;
}
default: IE_THROW(Unexpected) << "Invalid mean variant in input " + inputName;
break;
}
p.profilingIDs.push_back(preprocessPrimID);
p.InitProfileInfo(preprocessPrimID, "Reorder");
p.primitiveIDs[inputName] = preprocessPrimID; // If it is batched blob, it will be overwritten afterwards.
p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
reorders.push_back(preprocessPrimID);
}
if (inputDims[0] > 1) {
auto concatPrimID = "concat:" + inputName + Program::m_preProcessTag;
p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b, op->get_friendly_name()));
p.primitiveIDs[inputName] = concatPrimID;
}
} else {
auto preprocessPrimID = "reorder:" + inputName + Program::m_preProcessTag;
cldnn::layout inputLayout(networkInputLayout);
inputLayout.data_type = DataTypeFromPrecision(ip);
p.inputLayouts.insert({ inputInfo->name(), inputLayout });
p.AddPrimitive(cldnn::input_layout(inputName, inputLayout, inputInfo->name()));
p.AddPrimitive(inputY);
p.inputLayouts.insert({ inputInfo->name() + "_Y" + std::to_string(i), y_layout });
p.AddPrimitive(inputUV);
p.inputLayouts.insert({ inputInfo->name() + "_UV" + std::to_string(i), uv_layout });
switch (preProcess.getMeanVariant()) {
case NONE:
case MEAN_VALUE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
y_name,
uv_name,
inputName,
networkInputLayout,
meanValues,
cldnn::reorder_mean_mode::subtract,
inputInfo->name()));
op->get_friendly_name()));
break;
}
case MEAN_IMAGE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
y_name,
uv_name,
inputName,
networkInputLayout,
meanBlobID,
cldnn::reorder_mean_mode::subtract,
inputInfo->name()));
op->get_friendly_name()));
break;
}
default: IE_THROW(Unexpected) << "Invalid mean variant in input " + inputName;
default: IE_THROW() << "Invalid mean variant in input " << inputName;
break;
}
p.profilingIDs.push_back(preprocessPrimID);
p.InitProfileInfo(preprocessPrimID, "Reorder");
p.primitiveIDs[inputName] = preprocessPrimID; // If it is batched blob, it will be overwritten afterwards.
p.InitProfileInfo(preprocessPrimID, "reorder");
p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
reorders.push_back(preprocessPrimID);
p.primitiveIDs[inputName] = preprocessPrimID;
p.profilingIDs.push_back(preprocessPrimID);
}
if (inputDims[0] > 1) {
auto concatPrimID = "concat:" + inputName + Program::m_preProcessTag;
p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b, op->get_friendly_name()));
p.primitiveIDs[inputName] = concatPrimID;
}
} else {
auto preprocessPrimID = "reorder:" + inputName + Program::m_preProcessTag;
cldnn::layout inputLayout(networkInputLayout);
inputLayout.data_type = DataTypeFromPrecision(ip);
p.inputLayouts.insert({ inputInfo->name(), inputLayout });
p.AddPrimitive(cldnn::input_layout(inputName, inputLayout, inputInfo->name()));
switch (preProcess.getMeanVariant()) {
case NONE:
case MEAN_VALUE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
inputName,
networkInputLayout,
meanValues,
cldnn::reorder_mean_mode::subtract,
op->get_friendly_name()));
break;
}
case MEAN_IMAGE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
inputName,
networkInputLayout,
meanBlobID,
cldnn::reorder_mean_mode::subtract,
op->get_friendly_name()));
break;
}
default: IE_THROW() << "Invalid mean variant in input " << inputName;
break;
}
p.InitProfileInfo(preprocessPrimID, "reorder");
p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
p.primitiveIDs[inputName] = preprocessPrimID;
p.profilingIDs.push_back(preprocessPrimID);
}
}

View File

@ -38,7 +38,12 @@ static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Res
auto inputs = p.GetInputPrimitiveIDs(op);
const auto outputDesc = outputData->getTensorDesc();
const auto outputlayout = outputDesc.getLayout();
auto outputlayout = outputDesc.getLayout();
if (ngraph::is_type<ngraph::op::v8::NV12toRGB>(prev) ||
ngraph::is_type<ngraph::op::v8::NV12toBGR>(prev)) {
outputlayout = NHWC;
}
// TODO: add precision check once there's an outputInfo object
if (outputlayout != NCHW &&
@ -59,7 +64,7 @@ static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Res
p.AddPrimitive(cldnn::reorder(outLayerName,
outputID,
FormatFromLayout(outputData->getLayout()),
FormatFromLayout(outputlayout),
DataTypeFromPrecision(precision),
std::vector<float>(),
cldnn::reorder_mean_mode::subtract,