[LPT] Mixed precision GPU Plugin tests (#4498)

This commit is contained in:
Vladimir Zinoviev 2021-04-05 18:04:34 +03:00 committed by GitHub
parent 7d0cc01c1b
commit e2ada66826
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
57 changed files with 189 additions and 96 deletions

View File

@ -72,7 +72,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
{ {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true },
{ {0.2f}, ngraph::element::f32, {}, false }
},
"output_original",
"Convolution",
"FP32"
},
@ -126,7 +126,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
{},
{ {0.2f}, ngraph::element::f32, {}, false }
},
"output_original",
"Convolution",
"U8"
},
@ -177,7 +177,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
{ {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true },
{ {0.2f}, ngraph::element::f32, {}, false }
},
"output_original",
"Convolution",
"FP32"
},
@ -228,7 +228,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
{},
{ {0.2f}, ngraph::element::f32, {}, false }
},
"output_original",
"Convolution",
"U8"
},
};

View File

@ -27,7 +27,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
false,
{},
false,
"output",
"Convolution",
"FP32"
},
{
@ -35,7 +35,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
false,
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
false,
"output",
"Convolution",
"FP32"
},
{
@ -43,7 +43,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
false,
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
false,
"output_original",
"Convolution",
"U8"
},
{
@ -51,7 +51,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
false,
{ 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
false,
"output",
"Convolution",
"FP32"
},
{
@ -59,7 +59,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
false,
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f } },
false,
"output",
"Convolution",
"FP32"
},
{
@ -67,7 +67,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
false,
{ 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
false,
"output",
"Convolution",
"FP32"
},
{
@ -75,7 +75,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
true,
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
false,
"output_original",
"Convolution",
"U8"
},
{
@ -83,7 +83,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
true,
{ 255ul, ngraph::Shape { 1 }, { 0.f }, { 254.f }, { -18.7f }, { 18.7f } },
false,
"output_original",
"Convolution",
"U8"
},
};

View File

@ -12,8 +12,9 @@ using namespace LayerTestsDefinitions;
using namespace ngraph::pass::low_precision;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<LayerTransformation::Params> trasformationParamValues = {

View File

@ -21,7 +21,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
{ {}, {}, {} },
"matMul/FC",
"FullyConnected",
"U8"
},
// 3D with dequantize on weights
@ -31,7 +31,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
{},
{ ngraph::element::f32, {}, {0.1f} },
"matMul/FC",
"FullyConnected",
"U8"
},
// 3D with different values
@ -41,7 +41,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
{ {}, {}, {} },
"matMul/FC",
"FullyConnected",
"U8"
},
// 4D with different values
@ -51,7 +51,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
{ {}, {}, {} },
"matMul/FC",
"FullyConnected",
"U8"
},
// 4D with Dq on weights
@ -61,7 +61,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
{},
{ ngraph::element::f32, {}, {{0.1f, 0.01}, ngraph::element::f32, ngraph::Shape{ 2, 1 }} },
"matMul/FC",
"FullyConnected",
"U8"
},
// 3D with the same values
@ -71,7 +71,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 4, 2.f), ngraph::element::f32, ngraph::Shape{ 4, 4 } },
{ 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-128.f}, {127.f} },
{ {}, {}, {} },
"matMul/FC",
"FullyConnected",
"U8"
},
// 2D with subtract on activations
@ -81,7 +81,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::f32, ngraph::Shape{ 2, 3 } },
{ 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-12.8f}, {12.7f} },
{ {}, {}, {} },
"matMul/1",
"FullyConnected",
"U8"
},
// 2D with subtract on activations & Dq on weights
@ -91,7 +91,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::i8, ngraph::Shape{ 2, 3 } },
{},
{ ngraph::element::f32, {}, {0.1f} },
"matMul/1",
"FullyConnected",
"U8"
}
};

View File

@ -13,7 +13,7 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
//ngraph::element::f16
ngraph::element::f16
};
const std::vector<LayerTestsDefinitions::AddTestValues> params = {

View File

@ -14,7 +14,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,7 +13,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
@ -72,7 +72,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
{ {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true },
{ {0.2f}, ngraph::element::f32, {}, false }
},
"output_original",
"Convolution",
"U8"
},
@ -126,7 +126,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
{},
{ {0.2f}, ngraph::element::f32, {}, false }
},
"output_original",
"Convolution",
"U8"
},
@ -177,7 +177,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
{ {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true },
{ {0.2f}, ngraph::element::f32, {}, false }
},
"output_original",
"Convolution",
"U8"
},
@ -228,7 +228,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
{},
{ {0.2f}, ngraph::element::f32, {}, false }
},
"output_original",
"Convolution",
"U8"
},
};

View File

@ -13,7 +13,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
@ -27,30 +27,32 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
false,
{},
false,
"output",
""
"Convolution",
"FP32"
},
{
{},
false,
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
false,
"output",
""
"Convolution",
"FP32"
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
false,
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
false,
"output_original",
"Convolution",
"U8"
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -12.75f }, { 6.375f } },
true,
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
false
false,
"Convolution",
"U8"
}
};

View File

@ -14,7 +14,7 @@ using namespace ngraph::opset1;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<DepthToSpace::DepthToSpaceMode> modes = {

View File

@ -13,7 +13,8 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,7 +13,8 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,7 +13,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<LayerTransformation::Params> trasformationParamValues = {

View File

@ -14,7 +14,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<LayerTransformation::Params> trasformationParamValues = {

View File

@ -14,7 +14,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<LayerTransformation::Params> trasformationParamValues = {

View File

@ -12,8 +12,9 @@ using namespace LayerTestsDefinitions;
using namespace ngraph::pass::low_precision;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16,
};
const std::vector<LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,7 +13,7 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<MatMulShapes> shapes = {

View File

@ -9,7 +9,8 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<element::Type> precisions = {
element::f32
element::f32,
// element::f16 // TODO: temporarily commented due to failing in GPU Plugin on constant folding stage
};
const std::vector< ngraph::Shape > inputAndQuantizationShapes = {

View File

@ -14,6 +14,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<ngraph::Shape> dimensions = {

View File

@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -9,7 +9,8 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<std::pair<ngraph::Shape, ngraph::Shape>> shapes = {

View File

@ -12,7 +12,7 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
std::vector<MatMulTransformationTestValues> testValues = {

View File

@ -10,7 +10,10 @@ using namespace LayerTestsDefinitions;
using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> precisions = { ngraph::element::f32 };
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32,
ngraph::element::f16
};
//transpose_a = false, transpose_b = true
std::vector<MatMulWithConstantTransformationTestValues> testValues = {
@ -20,7 +23,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
{ {}, {}, {} },
"result_result",
"FullyConnected",
"FP32"
},
{
@ -29,7 +32,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
{},
{ ngraph::element::f32, {}, {0.1f} },
"result_result",
"FullyConnected",
"FP32"
},
{
@ -38,7 +41,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
{ {}, {}, {} },
"result_result",
"FullyConnected",
"FP32"
},
{
@ -47,7 +50,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
{ {}, {}, {} },
"matMul",
"FullyConnected",
"U8"
},
{
@ -56,7 +59,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
{},
{ ngraph::element::f32, {}, {{0.1f, 0.01}, ngraph::element::f32, ngraph::Shape{ 2, 1 }} },
"matMul",
"FullyConnected",
"U8"
},
{
@ -65,7 +68,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>(4 * 4, 2.f), ngraph::element::f32, ngraph::Shape{ 4, 4 } },
{ 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-128.f}, {127.f} },
{ {}, {}, {} },
"result_result",
"FullyConnected",
"FP32"
},
{
@ -74,7 +77,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::f32, ngraph::Shape{ 2, 3 } },
{ 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-12.8f}, {12.7f} },
{ {}, {}, {} },
"matMul",
"FullyConnected",
"U8"
},
{
@ -83,6 +86,8 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
{ std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::i8, ngraph::Shape{ 2, 3 } },
{},
{ ngraph::element::f32, {}, {0.1f} },
"FullyConnected",
"U8"
}
};

View File

@ -12,6 +12,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {

View File

@ -9,7 +9,8 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<element::Type> precisions = {
element::f32
element::f32,
element::f16
};
const std::vector< ngraph::Shape > inputShapes = {

View File

@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
//ngraph::element::f16
ngraph::element::f16
};
const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {

View File

@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<MultiplyWithOneParentTransformationValues> values = {

View File

@ -9,7 +9,8 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<element::Type> precisions = {
element::f32
element::f32,
element::f16
};
const std::vector<ngraph::Shape> inputAndQuantizationShapes = {

View File

@ -13,7 +13,7 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32,
//ngraph::element::f16
ngraph::element::f16
};
const std::vector<std::pair<ngraph::Shape, ngraph::Shape> > inputAndQuantizationShapes = {

View File

@ -12,7 +12,8 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32
ngraph::element::f32,
ngraph::element::f16
};
std::vector<PReluTestValues> testValues = {

View File

@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
// ngraph::element::f16 // TODO: enable f16 test inference (change ngraph function + fp32 to fp16 replacements)
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,7 +13,7 @@ using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
std::vector<ReluTestValues> testValues = {

View File

@ -11,8 +11,8 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32
// ngraph::element::f16
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -15,7 +15,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};

View File

@ -14,7 +14,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<LayerTransformation::Params> trasformationParamValues = {

View File

@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<LayerTransformation::Params> trasformationParamValues = {

View File

@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<TransposeTransformationTestValues> testValues = {

View File

@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};

View File

@ -15,7 +15,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
// ngraph::element::f16
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {

View File

@ -49,8 +49,8 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeWithNotOpti
// ngraph::builder::subgraph::FakeQuantizeOnData
typedef std::tuple<
InferenceEngine::Precision,
InferenceEngine::SizeVector,
ngraph::element::Type,
ngraph::Shape,
std::string,
ngraph::pass::low_precision::LayerTransformation::Params,
FakeQuantizeWithNotOptimalTransformationTestValues> FakeQuantizeTransformationParams;

View File

@ -59,7 +59,7 @@ void ConvolutionQDqTransformation::Run() {
LayerTestsCommon::Run();
const auto params = std::get<4>(GetParam());
const auto actualType = getRuntimePrecision(params.layerName);
const auto actualType = getRuntimePrecisionByType(params.layerName);
EXPECT_EQ(actualType, params.expectedKernelType);
}

View File

@ -58,8 +58,12 @@ void ConvolutionTransformation::Run() {
LayerTestsCommon::Run();
const auto params = std::get<4>(GetParam());
const auto actualType = getRuntimePrecision(params.layerName);
EXPECT_EQ(actualType, params.expectedKernelType);
const auto actualPrecision = getRuntimePrecisionByType(params.layerName);
auto expectedPrecision = params.expectedKernelType;
if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
expectedPrecision = "FP16";
}
EXPECT_EQ(actualPrecision, expectedPrecision);
}
void ConvolutionTransformation::validate() {

View File

@ -16,8 +16,8 @@
namespace LayerTestsDefinitions {
std::string FakeQuantizeWithNotOptimalTransformation::getTestCaseName(testing::TestParamInfo<FakeQuantizeTransformationParams> obj) {
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShapes;
ngraph::element::Type netPrecision;
ngraph::Shape inputShapes;
std::string targetDevice;
ngraph::pass::low_precision::LayerTransformation::Params params;
FakeQuantizeWithNotOptimalTransformationTestValues testValues;
@ -29,14 +29,14 @@ std::string FakeQuantizeWithNotOptimalTransformation::getTestCaseName(testing::T
}
void FakeQuantizeWithNotOptimalTransformation::SetUp() {
InferenceEngine::SizeVector inputShape;
InferenceEngine::Precision netPrecision;
ngraph::Shape inputShape;
ngraph::element::Type netPrecision;
ngraph::pass::low_precision::LayerTransformation::Params params;
FakeQuantizeWithNotOptimalTransformationTestValues testValues;
std::tie(netPrecision, inputShape, targetDevice, params, testValues) = this->GetParam();
function = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get(
FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision),
netPrecision,
inputShape,
testValues.fqOnData,
testValues.convertOnData,
@ -52,7 +52,7 @@ void FakeQuantizeWithNotOptimalTransformation::Run() {
LayerTestsCommon::Run();
const auto params = std::get<4>(GetParam());
const auto actualType = getRuntimePrecision("output_original");
const auto actualType = getRuntimePrecisionByType("Convolution");
EXPECT_EQ(actualType, params.expectedPrecision);
}

View File

@ -96,9 +96,12 @@ void MatMulWithConstantTransformation::Run() {
LayerTestsCommon::Run();
const auto params = std::get<2>(GetParam());
const auto actualType = getRuntimePrecision(params.layerName);
EXPECT_EQ(actualType, params.expectedKernelType);
const auto actualPrecision = getRuntimePrecisionByType(params.layerName);
auto expectedPrecision = params.expectedKernelType;
if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
expectedPrecision = "FP16";
}
EXPECT_EQ(actualPrecision, expectedPrecision);
}
TEST_P(MatMulWithConstantTransformation, CompareWithRefImpl) {

View File

@ -83,10 +83,10 @@ void MultiplyTransformation::validate() {
const auto mul = output->get_input_node_shared_ptr(0);
const std::string typeName = mul->get_type_name();
ASSERT_EQ("Eltwise", typeName);
const bool notTransformed = param.expectedPrecisions[0] == param.expectedPrecisions[1];
for (size_t i = 0; i < param.expectedPrecisions.size(); ++i) {
const auto curPrecision = mul->get_input_element_type(i);
const auto expectedPrecision = param.expectedPrecisions[i];
const auto expectedPrecision = notTransformed ? precision : param.expectedPrecisions[i];
ASSERT_EQ(curPrecision, expectedPrecision);
}
}

View File

@ -82,7 +82,7 @@ void NormalizeL2Transformation::validate() {
ASSERT_EQ("NormalizeIE", typeName);
const auto inputPrecision = normalize->get_input_element_type(0);
const auto expectedPrecision = shift ? ngraph::element::f32 : ngraph::element::u8;
const auto expectedPrecision = shift ? precision : ngraph::element::u8;
ASSERT_EQ(inputPrecision, expectedPrecision);
}

View File

@ -79,6 +79,7 @@ public:
std::map<std::string, std::string>& GetConfiguration();
std::string getRuntimePrecision(const std::string& layerName);
std::string getRuntimePrecisionByType(const std::string& layerType);
template<class T>
static void Compare(const T *expected, const T *actual, std::size_t size, T threshold) {

View File

@ -358,6 +358,30 @@ std::string LayerTestsCommon::getRuntimePrecision(const std::string& layerName)
return "";
}
std::string LayerTestsCommon::getRuntimePrecisionByType(const std::string& layerType) {
const auto execGraph = executableNetwork.GetExecGraphInfo();
const auto function = execGraph.getFunction();
for (const auto& op : function->get_ops()) {
const auto& rtInfo = op->get_rt_info();
const auto& typeIt = rtInfo.find("layerType");
IE_ASSERT(typeIt != rtInfo.end()) << "Layer is not found for type: " << layerType;
const auto type = ngraph::as_type_ptr<ngraph::VariantWrapper<std::string>>(typeIt->second)->get();
if (type == layerType) {
const auto& it = rtInfo.find("runtimePrecision");
IE_ASSERT(it != rtInfo.end()) << "Runtime precision is not found for node: " << type;
const auto rtPrecisionPtr = ngraph::as_type_ptr<ngraph::VariantWrapper<std::string>>(it->second);
return rtPrecisionPtr->get();
}
}
return "";
}
void LayerTestsCommon::SetRefMode(RefMode mode) {
refMode = mode;
}

View File

@ -109,6 +109,7 @@ public:
bool operator==(const DequantizationOperations& value) const noexcept {
return equal(value);
}
void setPrecision(const ngraph::element::Type& type) noexcept;
Convert convert;
Subtract subtract;

View File

@ -185,6 +185,14 @@ DequantizationOperations::DequantizationOperations(
multiply(multiply)
{}
void DequantizationOperations::setPrecision(const ngraph::element::Type& type) noexcept {
convert.outPrecision = type;
subtract.constantPrecision = type;
subtract.outPrecision = type;
multiply.constantPrecision = type;
multiply.outPrecision = type;
}
bool DequantizationOperations::empty() const noexcept {
return convert.empty() && subtract.empty() && multiply.empty();
}

View File

@ -147,7 +147,7 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginalWithIncorrectW
fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues);
const auto subtract = isCorrect ? nullptr : std::make_shared<DequantizationSubtract>(fqOnWeights,
std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, Shape{ 1, 1, 1, 1 }, 3.0f));
std::make_shared<ngraph::opset1::Constant>(precision, Shape{ 1, 1, 1, 1 }, 3.0f));
const auto convolution = std::make_shared<ngraph::opset1::Convolution>(
fakeQuantizeOnData.empty() ? input : fqOnData,

View File

@ -54,8 +54,9 @@ std::shared_ptr<ngraph::Function> FuseConvertFunction::getWithFQ(
ngraph::Shape(inputShape));
parent = input1;
}
const std::shared_ptr<Node> dequantizationOp = makeDequantization(parent, dequantization);
auto deqStructure = dequantization;
deqStructure.multiply.outPrecision = inputPrecision;
const std::shared_ptr<Node> dequantizationOp = makeDequantization(parent, deqStructure);
std::shared_ptr<op::Parameter> input2 = std::make_shared<ngraph::opset1::Parameter>(
inputPrecision,
@ -68,7 +69,7 @@ std::shared_ptr<ngraph::Function> FuseConvertFunction::getWithFQ(
// just some non-transparent layer
const auto power = std::make_shared<opset1::Power>(
fakeQuantizeOnActivations,
std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{2.f}));
std::make_shared<opset1::Constant>(inputPrecision, Shape{}, std::vector<float>{2.f}));
const auto add = std::make_shared<opset1::Add>(
dequantizationOp,

View File

@ -70,7 +70,7 @@ std::shared_ptr<Node> createWeightsOriginal(
ngraph::opset1::Constant::create(
element::i64,
Shape{ 5 },
std::vector<size_t>({ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, 7, 7 })),
std::vector<size_t>({ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize, kernelSize })),
true);
}
@ -146,7 +146,7 @@ std::shared_ptr<ngraph::Function> GroupConvolutionFunction::getOriginal(
// TODO: pass as argument
//const size_t groupCount = 3ul;
const size_t outputChannelsCount = outputShape[1];
const size_t kernelSize = 7ul;
const size_t kernelSize = 5ul;
const size_t inputChannelsCount = inputShape[1];
std::vector<float> weightsValues = { 1.f };

View File

@ -313,12 +313,15 @@ std::shared_ptr<ngraph::Function> MatMulFunction::getOriginal(
const auto dequantizationOnData = makeFakeQuantize(input, precision, fqOnData);
const std::shared_ptr<ngraph::Node> weightsConst = std::make_shared<ngraph::opset1::Constant>(
weights.outPrecision,
weights.outPrecision.is_real() ? precision : weights.outPrecision,
weights.shape,
weights.values);
const std::shared_ptr<ngraph::Node> fakeQuantize = fqOnWeights.empty() ? nullptr : makeFakeQuantize(weightsConst, precision, fqOnWeights);
const auto dequantizationOnWeights = makeDequantization(fakeQuantize == nullptr ? weightsConst : fakeQuantize, deqOnWeights);
auto deqStructure = deqOnWeights;
deqStructure.setPrecision(precision);
const auto dequantizationOnWeights = makeDequantization(fakeQuantize == nullptr ? weightsConst : fakeQuantize, deqStructure);
const std::shared_ptr<ngraph::opset1::MatMul> matMul = std::make_shared<ngraph::opset1::MatMul>(
dequantizationOnData,

View File

@ -9,6 +9,7 @@
#include <ngraph_functions/utils/ngraph_helpers.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
namespace ngraph {
namespace pass {
@ -61,12 +62,36 @@ public:
}
};
template<ngraph::element::Type_t from, ngraph::element::Type_t to>
class ConvertConvertLayerOutputPrecision : public MatcherPass {
public:
ConvertConvertLayerOutputPrecision() {
auto convert = ngraph::pattern::wrap_type<opset1::Convert>();
ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
auto convert = std::dynamic_pointer_cast<ngraph::op::Convert>(m.get_match_root());
if (!convert) {
return false;
}
if (convert->get_convert_element_type() == ngraph::element::Type(from)) {
convert->set_convert_element_type(to);
return true;
}
return false;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(convert, "ConvertConvertLayerPrecision");
register_matcher(m, callback);
}
};
template<ngraph::element::Type_t from, ngraph::element::Type_t to>
class ConvertPrecision : public ngraph::pass::GraphRewrite {
public:
ConvertPrecision() {
add_matcher<ConvertConstantsPrecision<from, to>>();
add_matcher<ConvertParametersPrecision<from, to>>();
add_matcher<ConvertConvertLayerOutputPrecision<from, to>>();
}
};
} // namespace pass