[PP] Support for FP16 in Reorder cases (#4427)

- added basic support for FP16 (plain wrapper over int16_t)
 - extended Split/Merge operations to support it
 - tests
This commit is contained in:
Anton Potapov
2021-04-07 14:57:52 +03:00
committed by GitHub
parent 2486c5b90a
commit 54b6c77202
5 changed files with 91 additions and 18 deletions

View File

@@ -81,7 +81,8 @@ inline int get_cv_depth(const TensorDesc &ie_desc) {
case Precision::U8: return CV_8U;
case Precision::FP32: return CV_32F;
case Precision::U16: return CV_16U;
case Precision::FP16: return CV_16U;
case Precision::I16: return CV_16S;
case Precision::FP16: return CV_16F;
default: IE_THROW() << "Unsupported data type";
}

View File

@@ -434,6 +434,11 @@ void splitRow(const uint8_t* in, std::array<uint8_t*, chs>& outs, int length) {
namespace {
struct fp_16_t {
int16_t v;
};
template<typename type>
struct cv_type_to_depth;
@@ -443,6 +448,7 @@ template<> struct cv_type_to_depth<std::uint16_t> { enum { depth = CV_16U }; }
template<> struct cv_type_to_depth<std::int16_t> { enum { depth = CV_16S }; };
template<> struct cv_type_to_depth<std::int32_t> { enum { depth = CV_32S }; };
template<> struct cv_type_to_depth<float> { enum { depth = CV_32F }; };
template<> struct cv_type_to_depth<fp_16_t> { enum { depth = CV_16F }; };
template<typename ... types>
struct typelist {};
@@ -500,7 +506,7 @@ bool is_cv_type_in_list(const int type_id) {
namespace {
using merge_supported_types = typelist<uint8_t, int8_t, uint16_t, int16_t, int32_t, float>;
using merge_supported_types = typelist<uint8_t, int8_t, uint16_t, int16_t, int32_t, float, fp_16_t>;
template<int chs>
struct typed_merge_row {
@@ -508,6 +514,12 @@ struct typed_merge_row {
template <typename type>
p_f operator()(type_to_type<type> ) { return mergeRow<type, chs>; }
p_f operator()(type_to_type<fp_16_t> ) {
static_assert(sizeof(fp_16_t) == sizeof(fp_16_t::v),
"fp_16_t should be a plain wrap over FP16 implementation type");
return mergeRow<decltype(fp_16_t::v), chs>;
}
};
} // namespace
@@ -562,8 +574,7 @@ GAPI_FLUID_KERNEL(FMerge4, Merge4, false) {
namespace {
using split_supported_types = typelist<uint8_t, int8_t, uint16_t, int16_t, int32_t, float>;
using split_supported_types = typelist<uint8_t, int8_t, uint16_t, int16_t, int32_t, float, fp_16_t>;
template<int chs>
struct typed_split_row {
@@ -571,6 +582,12 @@ struct typed_split_row {
template <typename type>
p_f operator()(type_to_type<type> ) { return splitRow<type, chs>; }
p_f operator()(type_to_type<fp_16_t> ) {
static_assert(sizeof(fp_16_t) == sizeof(fp_16_t::v),
"fp_16_t should be a plain wrap over FP16 implementation type");
return splitRow<decltype(fp_16_t::v), chs>;
}
};
} // namespace

View File

@@ -193,7 +193,7 @@ InferenceEngine::Blob::Ptr img2Blob(cv::Mat &img, InferenceEngine::Layout layout
const size_t height = img.size().height;
const size_t width = img.size().width;
CV_Assert(cv::DataType<data_t>::depth == img.depth());
CV_Assert(cv::DataType<data_t>::depth == img.depth() || (PRC == Precision::FP16 && img.depth() == CV_16F));
SizeVector dims = {1, channels, height, width};
Blob::Ptr resultBlob = make_shared_blob<data_t>(TensorDesc(PRC, dims, layout));;
@@ -237,7 +237,8 @@ void Blob2Img(const InferenceEngine::Blob::Ptr& blobP, cv::Mat& img, InferenceEn
const size_t height = img.size().height;
const size_t width = img.size().width;
CV_Assert(cv::DataType<data_t>::depth == img.depth());
//IE and OpenCV use different data types for FP16 representation, so need to check for it explicitly
CV_Assert(cv::DataType<data_t>::depth == img.depth() || ((img.depth() == CV_16F) && (PRC == Precision::FP16)));
data_t* blobData = blobP->buffer().as<data_t*>();
@@ -438,11 +439,20 @@ TEST_P(SplitTestGAPI, AccuracyTest)
cv::Size sz = std::get<2>(params);
double tolerance = std::get<3>(params);
int srcType = CV_MAKE_TYPE(depth, planes);
auto make_src_type = [planes](int d){
return CV_MAKE_TYPE(d, planes);
};
int srcType = make_src_type(depth);
int dstType = CV_MAKE_TYPE(depth, 1);
cv::Mat in_mat(sz, srcType);
cv::randn(in_mat, cv::Scalar::all(127), cv::Scalar::all(40.f));
bool const is_fp16 = (depth == CV_16F);
cv::Mat rnd_mat = is_fp16 ? cv::Mat(sz, make_src_type(CV_32F)) : in_mat;
cv::randn(rnd_mat, cv::Scalar::all(127), cv::Scalar::all(40.f));
if (is_fp16) {
rnd_mat.convertTo(in_mat, depth);
}
std::vector<cv::Mat> out_mats_gapi(planes, cv::Mat::zeros(sz, dstType));
std::vector<cv::Mat> out_mats_ocv (planes, cv::Mat::zeros(sz, dstType));
@@ -520,12 +530,21 @@ TEST_P(MergeTestGAPI, AccuracyTest)
cv::Size sz = std::get<2>(params);
double tolerance = std::get<3>(params);
int srcType = CV_MAKE_TYPE(depth, 1);
auto make_src_type = [](int d){
return CV_MAKE_TYPE(d, 1);
};
int srcType = make_src_type(depth);
int dstType = CV_MAKE_TYPE(depth, planes);
std::vector<cv::Mat> in_mats(planes, cv::Mat(sz, srcType));
for (int p = 0; p < planes; p++) {
cv::randn(in_mats[p], cv::Scalar::all(127), cv::Scalar::all(40.f));
bool const is_fp16 = (depth == CV_16F);
cv::Mat rnd_mat = is_fp16 ? cv::Mat(sz, make_src_type(CV_32F)) : in_mats[p];
cv::randn(rnd_mat, cv::Scalar::all(127), cv::Scalar::all(40.f));
if (is_fp16) {
rnd_mat.convertTo(in_mats[p], depth);
}
}
cv::Mat out_mat_ocv = cv::Mat::zeros(sz, dstType);
@@ -754,7 +773,8 @@ TEST_P(ColorConvertTestIE, AccuracyTest)
cv::Scalar mean = cv::Scalar::all(127);
cv::Scalar stddev = cv::Scalar::all(40.f);
cv::randn(in_mat1, mean, stddev);
if (depth != CV_16F)
cv::randn(in_mat1, mean, stddev);
cv::Mat out_mat(size, out_type);
cv::Mat out_mat_ocv(size, out_type);
@@ -771,7 +791,7 @@ TEST_P(ColorConvertTestIE, AccuracyTest)
size_t out_channels = out_mat.channels();
CV_Assert(3 == out_channels || 4 == out_channels);
CV_Assert(CV_8U == depth || CV_32F == depth);
CV_Assert(CV_8U == depth || CV_32F == depth || depth == CV_16S || depth == CV_16F);
ASSERT_TRUE(in_mat1.isContinuous() && out_mat.isContinuous());
@@ -780,8 +800,21 @@ TEST_P(ColorConvertTestIE, AccuracyTest)
InferenceEngine::SizeVector in_sv = { 1, in_channels, in_height, in_width };
InferenceEngine::SizeVector out_sv = { 1, out_channels, out_height, out_width };
auto depth_to_precision = [](int depth) -> Precision::ePrecision {
switch (depth)
{
case CV_8U: return Precision::U8;
case CV_16S: return Precision::I16;
case CV_16F: return Precision::FP16;
case CV_32F: return Precision::FP32;
default:
throw std::logic_error("Unsupported configuration");
}
return Precision::UNSPECIFIED;
};
// HWC blob: channels are interleaved
Precision precision = CV_8U == depth ? Precision::U8 : Precision::FP32;
Precision precision = depth_to_precision(depth);
Blob::Ptr in_blob, out_blob;
switch (precision)
@@ -796,6 +829,18 @@ TEST_P(ColorConvertTestIE, AccuracyTest)
out_blob = img2Blob<Precision::FP32>(out_mat, out_layout);
break;
case Precision::I16:
in_blob = img2Blob<Precision::I16>(in_mat1, in_layout);
out_blob = img2Blob<Precision::I16>(out_mat, out_layout);
break;
case Precision::FP16:
in_blob = img2Blob<Precision::FP16>(in_mat1, in_layout);
out_blob = img2Blob<Precision::FP16>(out_mat, out_layout);
break;
default:
FAIL() << "Unsupported configuration";
}
@@ -813,6 +858,8 @@ TEST_P(ColorConvertTestIE, AccuracyTest)
{
case Precision::U8: Blob2Img<Precision::U8> (out_blob, out_mat, out_layout); break;
case Precision::FP32: Blob2Img<Precision::FP32>(out_blob, out_mat, out_layout); break;
case Precision::I16: Blob2Img<Precision::I16> (out_blob, out_mat, out_layout); break;
case Precision::FP16: Blob2Img<Precision::FP16> (out_blob, out_mat, out_layout); break;
default: FAIL() << "Unsupported configuration";
}

View File

@@ -132,7 +132,7 @@ INSTANTIATE_TEST_CASE_P(ResizeTestFluid_F32, ResizeTestGAPI,
INSTANTIATE_TEST_CASE_P(SplitTestFluid, SplitTestGAPI,
Combine(Values(2, 3, 4),
Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F, CV_32S),
Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_16F, CV_32F, CV_32S),
Values(TEST_SIZES),
Values(0)));
@@ -144,7 +144,7 @@ INSTANTIATE_TEST_CASE_P(ChanToPlaneTestFluid, ChanToPlaneTestGAPI,
INSTANTIATE_TEST_CASE_P(MergeTestFluid, MergeTestGAPI,
Combine(Values(2, 3, 4),
Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32F, CV_32S),
Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_16F, CV_32F, CV_32S),
Values(TEST_SIZES),
Values(0)));
@@ -269,7 +269,7 @@ INSTANTIATE_TEST_CASE_P(ColorConvertYUV420Fluid, ColorConvertYUV420TestIE,
Values(0)));
INSTANTIATE_TEST_CASE_P(Reorder_HWC2CHW, ColorConvertTestIE,
Combine(Values(CV_8U, CV_32F),
Combine(Values(CV_8U, CV_32F, CV_16S, CV_16F),
Values(InferenceEngine::ColorFormat::BGR),
Values(InferenceEngine::NHWC),
Values(InferenceEngine::NCHW),
@@ -277,7 +277,7 @@ INSTANTIATE_TEST_CASE_P(Reorder_HWC2CHW, ColorConvertTestIE,
Values(0)));
INSTANTIATE_TEST_CASE_P(Reorder_CHW2HWC, ColorConvertTestIE,
Combine(Values(CV_8U, CV_32F),
Combine(Values(CV_8U, CV_32F, CV_16S, CV_16F),
Values(InferenceEngine::ColorFormat::BGR),
Values(InferenceEngine::NCHW),
Values(InferenceEngine::NHWC),

View File

@@ -32,7 +32,8 @@ typedef unsigned short ushort;
#define CV_32S 4
#define CV_32F 5
#define CV_64F 6
#define CV_USRTYPE1 7
#define CV_16F 7
#define CV_USRTYPE1 8
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
@@ -70,6 +71,13 @@ typedef unsigned short ushort;
#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
#define CV_16FC1 CV_MAKETYPE(CV_16F,1)
#define CV_16FC2 CV_MAKETYPE(CV_16F,2)
#define CV_16FC3 CV_MAKETYPE(CV_16F,3)
#define CV_16FC4 CV_MAKETYPE(CV_16F,4)
#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
#define CV_32FC3 CV_MAKETYPE(CV_32F,3)